[med-svn] [fasta3] 08/10: New upstream version 36.3.8f

Andreas Tille tille at debian.org
Tue Dec 5 16:21:48 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository fasta3.

commit 2ee98997730aa71d5298843512acf174ad04f950
Author: Andreas Tille <tille at debian.org>
Date:   Tue Dec 5 17:20:06 2017 +0100

    New upstream version 36.3.8f
---
 COPYRIGHT                         |   19 +
 FASTA_LIST                        |   17 +
 LICENSE                           |  202 ++
 README                            |   77 +
 README.md                         |   77 +
 bin/README                        |    1 +
 conf/README                       |   33 +
 conf/fast_libs_e.www              |   23 +
 conf/fast_new                     |   38 +
 conf/fastlibs                     |   42 +
 data/VTML_10.mat                  |   35 +
 data/VTML_120.mat                 |   34 +
 data/VTML_160.mat                 |   34 +
 data/VTML_20.mat                  |   34 +
 data/VTML_200.mat                 |   34 +
 data/VTML_40.mat                  |   34 +
 data/VTML_80.mat                  |   34 +
 data/blosum45.mat                 |   30 +
 data/blosum50.mat                 |   29 +
 data/blosum62.mat                 |   30 +
 data/blosum80.mat                 |   30 +
 data/dna.mat                      |   19 +
 data/idn_aa.mat                   |   24 +
 data/md_10.mat                    |   24 +
 data/md_20.mat                    |   24 +
 data/md_40.mat                    |   24 +
 data/pam120.mat                   |   34 +
 data/pam250.mat                   |   34 +
 data/rna.mat                      |   19 +
 data/vtml160.mat                  |   38 +
 debian/changelog                  |    5 -
 debian/compat                     |    1 -
 debian/control                    |   75 -
 debian/copyright                  |   51 -
 debian/docs                       |    5 -
 debian/fasta3-doc.doc-base        |   15 -
 debian/fasta3.install             |    1 -
 debian/fasta3.manpages            |    6 -
 debian/patches/Makefile.patch     |   47 -
 debian/patches/series             |    1 -
 debian/rules                      |   35 -
 debian/source/format              |    1 -
 debian/upstream/metadata          |   20 -
 debian/watch                      |    6 -
 doc/INSTALL                       |   17 +
 doc/README.versions               |   48 +
 doc/README_v36.3.8d.md            |   37 +
 doc/changes_v34.html              |  351 +++
 doc/changes_v35.html              |  212 ++
 doc/changes_v36.html              |  467 ++++
 doc/fasta.defaults                |   17 +
 doc/fasta.history.tex             |  180 ++
 doc/fasta.options                 |   55 +
 doc/fasta36.1                     |  467 ++++
 doc/fasta_func.doc                |  300 +++
 doc/fasta_guide.bib               |  265 +++
 doc/fasta_guide.fg1.tex           |   60 +
 doc/fasta_guide.fg2.tex           |   25 +
 doc/fasta_guide.pdf               |  Bin 0 -> 265293 bytes
 doc/fasta_guide.tex               | 2115 ++++++++++++++++++
 doc/fasta_versions.html           |  101 +
 doc/fastf3.1                      |  176 ++
 doc/fasts3.1                      |  169 ++
 doc/map_db.1                      |   45 +
 doc/prss3.1                       |  170 ++
 doc/ps_lav.1                      |   20 +
 doc/readme.v30                    |   38 +
 doc/readme.v30t6                  |   74 +
 doc/readme.v30t7                  |  175 ++
 doc/readme.v31t0                  |  160 ++
 doc/readme.v31t1                  |  113 +
 doc/readme.v32t0                  |  407 ++++
 doc/readme.v33t0                  | 1268 +++++++++++
 doc/readme.v34t0                  | 1683 +++++++++++++++
 doc/readme.v35                    |  535 +++++
 doc/readme.v36                    | 2213 +++++++++++++++++++
 doc/readme.w32                    |   67 +
 make/Makefile                     |   53 +
 make/Makefile.NetBSD              |   40 +
 make/Makefile.cray_pvp            |   41 +
 make/Makefile.fcom                |  344 +++
 make/Makefile.freebsd             |   72 +
 make/Makefile.hpux_it             |   56 +
 make/Makefile.ibm                 |   37 +
 make/Makefile.linux               |    1 +
 make/Makefile.linux32             |   63 +
 make/Makefile.linux32_sse2        |   68 +
 make/Makefile.linux64             |    1 +
 make/Makefile.linux64_sse2        |   65 +
 make/Makefile.linux_icc           |   58 +
 make/Makefile.linux_icc_sse2      |   55 +
 make/Makefile.linux_mysql         |   57 +
 make/Makefile.linux_pgsql         |   58 +
 make/Makefile.linux_sql           |   58 +
 make/Makefile.linux_sse2          |    1 +
 make/Makefile.mp_com2             |  116 +
 make/Makefile.mpi_icc_sse2        |   55 +
 make/Makefile.nm_fcom             |  304 +++
 make/Makefile.nm_pcom             |  217 ++
 make/Makefile.nmk_icl             |   35 +
 make/Makefile.os_x                |   69 +
 make/Makefile.os_x86              |   61 +
 make/Makefile.os_x86_64           |   61 +
 make/Makefile.os_x86_clang        |   59 +
 make/Makefile.os_x86_icc          |   61 +
 make/Makefile.pLinux              |   83 +
 make/Makefile.pLinux_sql          |   81 +
 make/Makefile.pcom                |  229 ++
 make/Makefile.pcom_s              |  162 ++
 make/Makefile.pcom_t              |  184 ++
 make/Makefile.sgi                 |   58 +
 make/Makefile.sse_alt             |   23 +
 make/Makefile.sun                 |   52 +
 make/Makefile.sun_x86             |   51 +
 make/Makefile35.common            |   45 +
 make/Makefile35.common_sql        |   50 +
 make/Makefile35.nmk_com           |   30 +
 make/Makefile35m.common_mysql     |   49 +
 make/Makefile35m.common_pgsql     |   49 +
 make/Makefile35m.common_sql       |   48 +
 make/Makefile36.nmk_com           |   30 +
 make/Makefile36m.common           |   51 +
 make/Makefile36mpi.common         |   43 +
 make/Makefile36t.common           |   43 +
 make/README                       |   44 +
 make/make_osx_univ.sh             |   31 +
 misc/README                       |   14 +
 misc/parse_m9.pl                  |  139 ++
 misc/res2R.pl                     |   22 +
 misc/shuffle_embed.pl             |  148 ++
 psisearch2/README.md              |   92 +
 psisearch2/m89_btop_msa2.pl       |  927 ++++++++
 psisearch2/psisearch2_msa.pl      |  453 ++++
 psisearch2/psisearch2_msa.py      |  368 ++++
 scripts/README                    |  108 +
 scripts/README.scripts            |   84 +
 scripts/acc_examples              |    5 +
 scripts/ann_exons_ens.pl          |  287 +++
 scripts/ann_exons_ncbi.pl         |  243 +++
 scripts/ann_exons_up_www.pl       |  239 ++
 scripts/ann_feats2ipr.pl          |  526 +++++
 scripts/ann_feats2ipr_e.pl        |  544 +++++
 scripts/ann_feats_up_sql.pl       |  463 ++++
 scripts/ann_feats_up_www2.pl      |  455 ++++
 scripts/ann_ipr_www.pl            |  467 ++++
 scripts/ann_pdb_cath.pl           |  345 +++
 scripts/ann_pdb_vast.pl           |  320 +++
 scripts/ann_pfam27.pl             |  656 ++++++
 scripts/ann_pfam28.pl             |  782 +++++++
 scripts/ann_pfam30.pl             |  859 ++++++++
 scripts/ann_pfam30_tmptbl.pl      |  875 ++++++++
 scripts/ann_pfam_www.pl           |  687 ++++++
 scripts/ann_script_list           |    9 +
 scripts/ann_upfeats_pfam_www_e.pl |  801 +++++++
 scripts/annot_blast_btop2.pl      | 1306 +++++++++++
 scripts/blastp_cmd.sh             |   31 +
 scripts/color_defs.pl             |  170 ++
 scripts/exp_up_ensg.pl            |  145 ++
 scripts/expand_links.pl           |  100 +
 scripts/expand_uniref50.pl        |   83 +
 scripts/lav2plt.pl                |  349 +++
 scripts/lavplt_ps.pl              |  540 +++++
 scripts/lavplt_svg.pl             |  461 ++++
 scripts/links2sql.pl              |   61 +
 scripts/m8_btop_msa.pl            |  412 ++++
 scripts/m9B_btop_msa.pl           |  654 ++++++
 scripts/plot_domain2t.cgi         |  667 ++++++
 scripts/summ_domain_ident.pl      |   97 +
 scripts/test_ann_scripts.sh       |   30 +
 seq/bovgh.seq                     |   38 +
 seq/bovprl.seq                    |   17 +
 seq/dna_test_s.nlib               |   47 +
 seq/dyr_human.aa                  |    4 +
 seq/egmsmg.aa                     |   19 +
 seq/grou_drome.pseg               |   14 +
 seq/gst.nlib                      |  284 +++
 seq/gst.seq                       |   20 +
 seq/gstm1_human.vaa               |    2 +
 seq/gstm1b_human.nt               |   17 +
 seq/gstm1b_human_fs.nt            |   17 +
 seq/gstt1_drome.aa                |    4 +
 seq/gstt1_pssm.asn1               |  Bin 0 -> 56931 bytes
 seq/gtm1_human.aa                 |    4 +
 seq/gtt1_drome.aa                 |    4 +
 seq/h10_human.aa                  |    4 +
 seq/hahu.aa                       |    4 +
 seq/hsgstm1b.gcg                  |  214 ++
 seq/hsgstm1b.seq                  |   40 +
 seq/humgstd.seq                   |   20 +
 seq/lcbo.aa                       |    5 +
 seq/m1r.aa                        |    5 +
 seq/m2.aa                         |    5 +
 seq/mchu.aa                       |    3 +
 seq/mgstm1.3nt                    |   60 +
 seq/mgstm1.aa                     |    5 +
 seq/mgstm1.aaa                    |    8 +
 seq/mgstm1.e05                    |   20 +
 seq/mgstm1.eeq                    |   20 +
 seq/mgstm1.esq                    |   20 +
 seq/mgstm1.gcg                    |   13 +
 seq/mgstm1.lc                     |    8 +
 seq/mgstm1.nt                     |   12 +
 seq/mgstm1.nt1                    |   12 +
 seq/mgstm1.nt12r                  |   26 +
 seq/mgstm1.nt13                   |   36 +
 seq/mgstm1.nt13r                  |   35 +
 seq/mgstm1.nt1r                   |   13 +
 seq/mgstm1.nts                    |    9 +
 seq/mgstm1.raa                    |    5 +
 seq/mgstm1.rev                    |   16 +
 seq/mgstm1.seq                    |   20 +
 seq/mgstm1_genclone.seq           | 2088 ++++++++++++++++++
 seq/mgtt2_x.seq                   |   12 +
 seq/ms1.aa                        |    6 +
 seq/mu.lib                        |   50 +
 seq/musplfm.aa                    |    9 +
 seq/mwkw.aa                       |   31 +
 seq/mwrtc1.aa                     |    8 +
 seq/myosin_bp.aa                  |   20 +
 seq/n0.aa                         |    4 +
 seq/n1.aa                         |    5 +
 seq/n2.aa                         |   28 +
 seq/n2_fs.lib                     |   84 +
 seq/n2s.aa                        |    8 +
 seq/n2t.aa                        |   16 +
 seq/n_fs.lib                      |   20 +
 seq/ngt.aa                        |   20 +
 seq/ngts.aa                       |    7 +
 seq/oohu.aa                       |    6 +
 seq/oohu.raa                      |    7 +
 seq/prio_atepa.aa                 |    5 +
 seq/prot_test.lib                 |   51 +
 seq/prot_test.lseg                |   66 +
 seq/prot_test_s.lseg              |   25 +
 seq/qrhuld.aa                     |   15 +
 seq/titin_hum.aa                  |  431 ++++
 seq/titin_hum.seq                 | 1174 ++++++++++
 seq/xurt8c.aa                     |    5 +
 seq/xurt8c.lc                     |    5 +
 seq/xurtg.aa                      |    5 +
 sql/README                        |   26 +
 sql/create_seq_demo.sql           |   30 +
 sql/join_up50.pl                  |   99 +
 sql/mysql_demo1.sql               |    6 +
 sql/mysql_demo_pv.sql             |    6 +
 sql/nr_to_sql.pl                  |  103 +
 sql/pirpsd.sql                    |    8 +
 sql/psql_demo.sql                 |    7 +
 sql/psql_demo1.sql                |    6 +
 sql/psql_demo_pv.sql              |    7 +
 src/a_mark.h                      |   51 +
 src/aamap.h                       |   17 +
 src/ag_stats.c                    |  129 ++
 src/aln_structs.h                 |   61 +
 src/alt_parms.h                   |  399 ++++
 src/altlib.h                      |  142 ++
 src/apam.c                        |  502 +++++
 src/best_stats.h                  |   52 +
 src/build_ares.c                  |  248 +++
 src/c_dispn.c                     |  573 +++++
 src/cal_cons.c                    | 1226 +++++++++++
 src/cal_cons2.c                   | 1164 ++++++++++
 src/cal_consf.c                   |  591 +++++
 src/comp_lib9.c                   | 3052 ++++++++++++++++++++++++++
 src/compacc2.c                    | 4119 +++++++++++++++++++++++++++++++++++
 src/compacc2e.c                   | 4316 +++++++++++++++++++++++++++++++++++++
 src/dec_pthr_subs.c               |  246 +++
 src/dec_pthr_subs.h               |   42 +
 src/defs.h                        |  171 ++
 src/doinit.c                      |  975 +++++++++
 src/drop_func.h                   |  185 ++
 src/dropff2.c                     | 1394 ++++++++++++
 src/dropfs2.c                     | 1681 +++++++++++++++
 src/dropfx.c                      | 4072 ++++++++++++++++++++++++++++++++++
 src/dropfx2.c                     | 3892 +++++++++++++++++++++++++++++++++
 src/dropfz2.c                     | 3969 ++++++++++++++++++++++++++++++++++
 src/dropfz3.c                     | 3864 +++++++++++++++++++++++++++++++++
 src/dropgsw2.c                    | 1128 ++++++++++
 src/dropgsw2.h                    |   46 +
 src/dropnfa.c                     | 2250 +++++++++++++++++++
 src/dropnfa.h                     |   84 +
 src/dropnnw2.c                    |  900 ++++++++
 src/dropnsw.c                     |  424 ++++
 src/dyn_string.h                  |   30 +
 src/faatran.c                     |  445 ++++
 src/getenv.c                      |   56 +
 src/getopt.c                      |   64 +
 src/getseq.c                      |  313 +++
 src/global_sse2.c                 |  547 +++++
 src/global_sse2.h                 |   41 +
 src/glocal_sse2.c                 |  596 +++++
 src/glocal_sse2.h                 |   41 +
 src/h_altlib.h                    |   28 +
 src/htime.c                       |   43 +
 src/initfa.c                      | 3183 +++++++++++++++++++++++++++
 src/karlin.c                      |  519 +++++
 src/last_tat.c                    |  155 ++
 src/last_thresh.c                 |   62 +
 src/lav_defs.h                    |   44 +
 src/lib_sel.c                     |  341 +++
 src/list_db.c                     |  245 +++
 src/llgetaa.c                     |  497 +++++
 src/lsim4.c                       |  998 +++++++++
 src/lsim4.h                       |  145 ++
 src/map_db.c                      |  600 ++++++
 src/mm_file.h                     |  153 ++
 src/mmgetaa.c                     | 1116 ++++++++++
 src/mrandom.c                     |   97 +
 src/msg.h                         |   57 +
 src/mshowalign2.c                 |  999 +++++++++
 src/mshowbest.c                   |  665 ++++++
 src/mw.h                          |   49 +
 src/mysql_lib.c                   |  636 ++++++
 src/ncbl2_head.h                  |   35 +
 src/ncbl2_mlib.c                  | 2442 +++++++++++++++++++++
 src/ncbl_head.h                   |   33 +
 src/ncbl_lib.c                    |  491 +++++
 src/nmgetlib.c                    | 2254 +++++++++++++++++++
 src/param.h                       |  251 +++
 src/pcomp_bufs.h                  |   33 +
 src/pcomp_subs2.c                 |  686 ++++++
 src/pgsql_lib.c                   |  631 ++++++
 src/print_pssm.c                  |  793 +++++++
 src/pssm_asn_subs.c               | 1756 +++++++++++++++
 src/pthr_subs.h                   |   49 +
 src/pthr_subs2.c                  |  377 ++++
 src/randtest.c                    |   38 +
 src/re_getlib.c                   |  146 ++
 src/res_stats.c                   |  703 ++++++
 src/rstruct.h                     |   16 +
 src/sc_to_e.c                     |   71 +
 src/scaleswn.c                    | 3136 +++++++++++++++++++++++++++
 src/scaleswt.c                    | 1566 ++++++++++++++
 src/showrss.c                     |   82 +
 src/smith_waterman_altivec.c      | 3086 ++++++++++++++++++++++++++
 src/smith_waterman_altivec.h      |   26 +
 src/smith_waterman_sse2.c         |  432 ++++
 src/smith_waterman_sse2.h         |   43 +
 src/structs.h                     |  196 ++
 src/tatstats.c                    |  583 +++++
 src/tatstats.h                    |  160 ++
 src/thr_buf_structs.h             |  119 +
 src/thr_bufs2.h                   |   41 +
 src/uascii.h                      |   59 +
 src/upam.h                        |  872 ++++++++
 src/url_subs.c                    |  383 ++++
 src/uthr_subs.h                   |   52 +
 src/wm_align.c                    |  581 +++++
 src/work_thr2.c                   |  492 +++++
 test/results/README               |    1 +
 test/test.bat                     |   73 +
 test/test.sh                      |   79 +
 test/test2.sh                     |   53 +
 test/test2G.sh                    |   79 +
 test/test2V.sh                    |   28 +
 test/test_mpi.pbs                 |   59 +
 test/test_mpi1.pbs                |   27 +
 test/test_mpi2.pbs                |   33 +
 test/test_s.sh                    |   47 +
 test/test_z.sh                    |   22 +
 360 files changed, 116366 insertions(+), 269 deletions(-)

diff --git a/COPYRIGHT b/COPYRIGHT
new file mode 100644
index 0000000..0c5a7b4
--- /dev/null
+++ b/COPYRIGHT
@@ -0,0 +1,19 @@
+   Copyright (c) 1996, 1997, 1998, 1999, 2002, 2014, 2015 by William R. Pearson
+   and The Rector & Visitors of the University of Virginia */
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+
+   Code in the smith_waterman_sse2.c and smith_waterman_sse2.h files
+   is copyright (c) 2006 by Michael Farrar.  Code in the
+   global_sse2.c, global_sse2.h, glocal_sse2.c, and glocal_sse2.h
+   files is copyright (c) 2010 by Michael Farrar.
diff --git a/FASTA_LIST b/FASTA_LIST
new file mode 100644
index 0000000..a7b6bf7
--- /dev/null
+++ b/FASTA_LIST
@@ -0,0 +1,17 @@
+
+4 Aug 2010
+
+If you regularly install the latest version of the FASTA package from
+http://faculty.virginia.edu/wrpearson/fasta, you may want to join the
+fasta_list SYMPA mailing list.  I use this list to announce new
+releases and solicit bug reports.
+
+To join the mailing list, go to the WWW page at:
+
+   lists.virginia.edu/sympa/info/fasta_list
+
+Select the "Subscribe" option on the lower left, and at the linked
+page, enter your email address, and click "submit".  You will be asked
+to confirm your membership in the mailing list.
+
+Bill Pearson
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..e06d208
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright {yyyy} {name of copyright owner}
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
diff --git a/README b/README
new file mode 100644
index 0000000..950ae70
--- /dev/null
+++ b/README
@@ -0,0 +1,77 @@
+
+July, 2015
+
+This version of the FASTA programs is fasta-36.3.8.  Since March, 2011
+(fasta-36.3.4), the FASTA programs are no longer interactive.  Typing
+bin/fasta36 (or any of the other programs) provides a help message.
+The "classic" interactive mode is available by typing "fasta36 -I".
+In addition, there is only one version of the programs, "fasta36",
+"ssearch36", etc., which is threaded by default on Unix/Linux/MacOSX.
+
+As of November, 2014, the FASTA program code is avaiable under the
+Apache 2.0 open source license.
+
+Up to date release notes are available in the file doc/readme.v36
+
+Documentation on the fasta3 version programs is available in the files:
+
+	doc/fasta36.1	(unix man page)
+
+	doc/changes_v36.html      (short descriptions of enhancements to
+				   FASTA programs)
+
+	doc/readme.v36	(text descriptions of bug fixes and version history)
+
+	doc/fasta_guide.tex 	(Latex file which describes fasta-36,
+				 and provides an introduction to the FASTA programs,
+				 their use and installation.)
+
+	doc/fasta_guide.pdf	(printable/viewable description of fasta-36)
+
+The latter two files provide background information on installing the
+fasta programs (in particular, the FASTLIBS file), that new users of
+the fasta3 package may find useful.
+
+================================================================
+
+The FASTA distribution directories (this directory) has been
+substantially re-organized to make it easier to find things.  However,
+some documentation has not yet been completely updated to reflect the
+re-organization, so some things may not make sense.
+
+Files can now be found in several sub-directories
+      bin/	(pre-compiled binaries for some architectures)
+      conf/	example fastlibs files
+      data/	scoring matrices
+      doc/	documentation files
+      make/	make files
+      misc/	perl scripts to reformat -m 9 output, convert -R search.res files for 'R', and embed domains in shuffled sequences
+      scripts/	perl scripts for -V (annotate alignments) and -E (expand library) options
+      seq/ 	test sequences
+      src/	source code
+      sql/	sql files and scripts for using the sql database access
+      test/	test scripts
+
+For some binary distributions, only the doc/, data/, seq/, and bin/,
+directories are provided.
+
+================
+
+To make the standard FASTA programs:
+
+   cd src
+   make -f ../make/Makefile.linux_sse2 all
+
+where "../make/Makefile.linux_sse2" is the appropriate file for your system. 
+
+The executable programs will then be found in ../bin
+(e.g. ../bin/fasta36, etc.)
+
+For a simple test of a program, try (from the src directory)
+
+   ../bin/fasta36 -q ../seq/mgstm1.aa ../seq/prot_test.lseg     
+
+================================================================
+
+Bill Pearson
+wrp at virginia.edu
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..30b64a3
--- /dev/null
+++ b/README.md
@@ -0,0 +1,77 @@
+
+## The FASTA package - protein and DNA sequence similarity searching and alignment programs
+
+The **FASTA** (pronounced FAST-Aye, not FAST-Ah) programs are a
+comprehensive set of similarity searching and alignment programs for
+searching protein and DNA sequence databases.  Like the **BLAST** programs `blastp` and `blastn`, the `fasta` program itself uses a rapid heuristic strategy for finding similar regions in protein and DNA sequences.  But in
+addition to heuristic similarity searching, the FASTA package provides
+programs for rigorous local (`ssearch`) and global (`ggsearch`)
+similarity searching, as well as a program for finding non-overlapping
+sequence similarities (`lalign`).  Like BLAST, the FASTA package also
+includes programs for aligning translated DNA sequences against
+proteins (`fastx`, `fasty` are equivalent to `blastx`, `tfastx`,
+`tfasty` are similar to `tblastn`).
+
+####September, 2016
+
+The current FASTA version is fasta-36.3.8e.
+
+The fasta-36.3.6e version includes a new directory, `psisearch2`, with
+scripts to run iterative PSSM (PSI-BLAST or SSEARCH36) searches using
+an improved strategy for reducing PSSM contamination due to alignment
+over-extension.
+
+As of November, 2014, the FASTA program code is available under the
+Apache 2.0 open source license.
+
+Up-to-date release notes are available in the file `doc/readme.v36`.
+
+Documentation on the FASTA programs is available in the files:
+
+dir/file | description
+----------|------------
+`doc/fasta36.1` | (unix man page)
+`doc/changes_v36.html` | (short descriptions of enhancements to FASTA programs)
+`doc/readme.v36` | (text descriptions of bug fixes and version history)
+`doc/fasta_guide.tex` | (Latex file which describes fasta36, and provides an introduction to the FASTA programs, their use and installation.)
+`doc/fasta_guide.pdf1` | (printable/viewable description of fasta-36)
+
+`fasta_guide.pdf` provides background information on installing the
+fasta programs (in particular, the `FASTLIBS` file), that new users of
+the fasta3 package may find useful.
+
+Parts of the FASTA package are distributed across several sub-directories
+
+dir | description
+----|------------
+`bin/` | (pre-compiled binaries for some architectures)
+`conf/` | example `FASTLIBS` files (files for finding libraries)
+`data/` | scoring matrices
+`doc/` | documentation files
+`make/` | make files
+`misc/` | perl scripts to reformat -m 9 output, convert -R search.res files for 'R', and embed domains in shuffled sequences
+`psisearch2/` | perl/python scripts implementing the new `psisearch2_msa` iterative PSSM search
+`scripts/` |  perl scripts for -V (annotate alignments) and -E (expand library) options
+`seq/` | test sequences
+`src/` | source code
+`sql/` | sql files and scripts for using the sql database access
+`test/` | test scripts
+
+For some binary distributions, only the `doc/`, `data/`, `seq/`, and `bin/`,
+directories are provided.
+
+To make the standard FASTA programs:
+```
+   cd src
+   make -f ../make/Makefile.linux_sse2 all
+```
+where `../make/Makefile.linux_sse2` is the appropriate Makefile for your system. 
+
+The executable programs will then be found in `../bin`
+(e.g. `../bin/fasta36`, etc.)
+
+For a simple test of a program, try (from the src directory)
+```
+   ../bin/fasta36 -q ../seq/mgstm1.aa ../seq/prot_test.lseg
+```
+
diff --git a/bin/README b/bin/README
new file mode 100644
index 0000000..da233d3
--- /dev/null
+++ b/bin/README
@@ -0,0 +1 @@
+Placeholder file to create destination for program binaries.
diff --git a/conf/README b/conf/README
new file mode 100644
index 0000000..5758016
--- /dev/null
+++ b/conf/README
@@ -0,0 +1,33 @@
+
+22-Jan-2014
+
+fasta36/conf
+
+================
+
+Files that allow FASTA programs to find libraries using abbreviations.
+
+For example, if the fast_libs_e.www has the line:
+
+Swissprot (NCBI)$0Q${SLIB2}/fa_dbs/swissprot.lseg
+
+and export SLIB2=/slib2
+
+then:
+
+fasta36 ../seq/mgstm1.aa q
+
+is equivalent to:
+
+fasta36 ../seq/mgstm1.aa /slib2/fa_dbs/swissprot.lseg
+
+================
+
+fastlibs -- the original library abbreviation file
+
+fast_new -- allows abbreviations longer than one letter by using "+abbrev+"
+  NBRF PIR1 Annotated Protein Database (rel 56)$0+pir1+/slib2/fa_dbs/pir1.lseg
+
+fast_libs_e.www -- use environment variables in library file name
+
+(+long+ abbreviations and ${SLIB2} environment variables can be combined)
diff --git a/conf/fast_libs_e.www b/conf/fast_libs_e.www
new file mode 100644
index 0000000..d615eaa
--- /dev/null
+++ b/conf/fast_libs_e.www
@@ -0,0 +1,23 @@
+PIR1 Annotated (rel. 66) $0A${SLIB2}/fa_dbs/pir1.lseg
+Swissprot (NCBI)$0Q${SLIB2}/fa_dbs/swissprot.lseg
+NCBI Refseq NP only$0P${SLIB2}/fa_dbs/refseq_np.lseg
+NCBI Refseq proteins$0S${SLIB2}/fa_dbs/refseq_protein.lseg
+NCBI PDB structures$0D${SLIB2}/fa_dbs/pdbaa.lseg
+NCBI NR non-redundant$0N${SLIB2}/fa_dbs/nr.lseg
+Human/Refseq proteins$0H${SLIB2}/genomes/hum_refseq.lseg
+Mouse/Refseq proteins$0M${SLIB2}/genomes/mus_refseq.lseg
+Rat/Refseq proteins$0R${SLIB2}/genomes/rat_refseq.lseg
+Drosophila/RefSeq proteins$0F${SLIB2}/genomes/d_melanogaster.lseg
+C. elegans/RefSeq proteins$0W${SLIB2}/genomes/c_elegans.lseg
+Arabidopsis/RefSeq proteins$0L${SLIB2}/genomes/a_thaliana.lseg
+Yeast (S. cerevisiae)${SLIB2}/genomes/s_cerevisiae.lseg
+E. coli proteins$0E${SLIB2}/genomes/ecoli_k12.lseg
+GB170.0 Primate$1P@${RDLIB2}/gb_asn/gbpri.nam
+GB170.0 Rodent$1R@${RDLIB2}/gb_asn/gbrod.nam
+GB170.0 other Mammal$1M@${RDLIB2}/gb_asn/gbmam.nam
+GB170.0 verteBrates$1B@${RDLIB2}/gb_asn/gbvrt.nam
+GB170.0 Invertebrates$1I@${RDLIB2}/gb_asn/gbinv.nam
+GB170.0 Bacteria$1T@${RDLIB2}/gb_asn/gbbct.nam
+GB170.0 pLants$1L@${RDLIB2}/gb_asn/gbpln.nam
+GB171.0 Viral$1V@${RDLIB2}/gb_asn/gbvrl.nam
+GB171.0 Phage$1G@${RDLIB2}/gb_asn/gbphg.nam
diff --git a/conf/fast_new b/conf/fast_new
new file mode 100644
index 0000000..f5871a8
--- /dev/null
+++ b/conf/fast_new
@@ -0,0 +1,38 @@
+NBRF PIR1 Annotated Protein Database (rel 56)$0+pir1+/slib2/fa_dbs/pir1.lseg
+NBRF Protein database (complete)$0+nbrf+@/seqlib/lib/NBRF.nam
+NRL_3d structure database$0D/seqlib/lib/nrl_3d.seq 5
+NCBI/Blast non-redundant proteins$0+nr+/slib2/fa_dbs/nr.lseg
+NCBI/Blast Swissprot$0+sp+/slib2/fa_dbs/swissprot.lseg
+GENPEPT Translated Protein Database (rel 106.0)$0G/slib2/fa_dbs/genpept.fsa
+Swiss-Prot Release 34$0S/slib0/lib/swiss.seq 5
+Yeast proteins$0Y/slib0/genomes/yeast_nr.pep
+C. elegans blast server$0W/slib2/fa_dbs/C.elegans_blast.fa
+E. coli proteome$0E/slib0/genomes/ecoli.npep
+H. influenzae proteome$0I/slib0/genomes/hinf.npep
+H. pylori proteome$0L/slib0/genomes/hpyl.npep
+NCBI Entrez Human proteins$0H/slib2/fa_dbs/human.aa
+M. pneumococcus proteome$0M/slib0/genomes/mpneu.npep
+M. jannaschii proteome$0J/slib0/genomes/mjan.npep
+Synechosystis proteome$0C/slib0/genomes/synecho.npep
+GB108.0 Invertebrates$1I/seqlib2/gcggenbank/gb_in.seq 6
+GB108.0 Bacteria$1T@/slib0/lib/gb_ba.nam 6
+GB108.0 Primate$1P@/slib0/lib/gb_pri.nam
+GB108.0 Rodent$1R/seqlib2/gcggenbank/gb_ro.seq 6
+GB108.0 other Mammal$1M/seqlib2/gcggenbank/gb_om.seq 6
+GB108.0 verteBrates$1B/seqlib2/gcggenbank/gb_ov.seq 6
+GB108.0 Expressed Seq. Tags$1E@/slib0/lib/gb_est.nam
+GB108.0 High throughput genmomic$1h/seqlib2/gcggenbank/gb_htg.seq 6
+GB108.0 pLants$1L@/slib0/lib/gb_pl.nam 6
+GB108.0 genome Survey sequences$1S@/slib0/lib/gb_gss.nam 6
+GB108.0 Viral$1V/seqlib2/gcggenbank/gb_vi.seq 6
+GB108.0 Phage$1G/seqlib2/gcggenbank/gb_ph.seq 6
+GB108.0 Unannotated$1D/seqlib2/gcggenbank/gb_un.seq 6
+GB108.0 New$1u/seqlib2/gcggenbank/gb_new.seq 6
+GB108.0 All sequences (long)$1A@/slib0/lib/genbank.nam
+Yeast genome$1Y@/seqlib/yeast/yeast_chr.nam
+E. coli genome$1D/slib0/genomes/ecoli.gbk 1
+Blast Human ESTs$1F/slib2/fa_dbs/est_human
+TIGR Human Gene Index$1K/slib2/fa_dbs/HGI.nr.031898
+Blast Mouse ESTs$1C/slib2/fa_dbs/est_mouse
+TIGR Mouse Gene Index$1J/slib2/fa_dbs/MGI.nr.022498
+NCBI/BLAST NR DNA$1n/slib2/fa_dbs/nt
diff --git a/conf/fastlibs b/conf/fastlibs
new file mode 100644
index 0000000..6b94a66
--- /dev/null
+++ b/conf/fastlibs
@@ -0,0 +1,42 @@
+NBRF PIR1 Annotated Protein Database (rel 56)$0A/seqlib/lib/pir1.seq 5
+NBRF PIR1 Annotated (seg) (rel 56)$0B/slib2/fa_dbs/pir1.seg
+NBRF Protein database (complete)$0P@/seqlib/lib/NBRF.nam
+NRL_3d structure database$0D/seqlib/lib/nrl_3d.seq 5
+NCBI/Blast non-redundant proteins$0N/slib2/fa_dbs/nr
+NCBI/Blast non-redundant proteins (seg)$0K/slib2/fa_dbs/nr.seg
+NCBI/Blast Swissprot$0Q/slib2/fa_dbs/swissprot
+NCBI/Blast Swissprot (seg)$0R/slib2/fa_dbs/swissprot.seg
+OWL 30.1 non-redundant protein database$0O/slib2/OWL/owl.seq 5
+GENPEPT Translated Protein Database (rel 106.0)$0G/slib2/fa_dbs/genpept.fsa
+Swiss-Prot Release 34$0S/slib0/lib/swiss.seq 5
+Yeast proteins$0Y/slib0/genomes/yeast_nr.pep
+C. elegans blast server$0W/slib2/fa_dbs/C.elegans_blast.fa
+E. coli proteome$0E/slib0/genomes/ecoli.npep
+H. influenzae proteome$0I/slib0/genomes/hinf.npep
+H. pylori proteome$0L/slib0/genomes/hpyl.npep
+NCBI Entrez Human proteins$0H/slib2/fa_dbs/human.aa
+M. pneumococcus proteome$0M/slib0/genomes/mpneu.npep
+M. jannaschii proteome$0J/slib0/genomes/mjan.npep
+Synechosystis proteome$0C/slib0/genomes/synecho.npep
+GB108.0 Invertebrates$1I/seqlib2/gcggenbank/gb_in.seq 6
+GB108.0 Bacteria$1T@/slib0/lib/gb_ba.nam 6
+GB108.0 Primate$1P@/slib0/lib/gb_pri.nam
+GB108.0 Rodent$1R/seqlib2/gcggenbank/gb_ro.seq 6
+GB108.0 other Mammal$1M/seqlib2/gcggenbank/gb_om.seq 6
+GB108.0 verteBrates$1B/seqlib2/gcggenbank/gb_ov.seq 6
+GB108.0 Expressed Seq. Tags$1E@/slib0/lib/gb_est.nam
+GB108.0 High throughput genmomic$1h/seqlib2/gcggenbank/gb_htg.seq 6
+GB108.0 pLants$1L@/slib0/lib/gb_pl.nam 6
+GB108.0 genome Survey sequences$1S@/slib0/lib/gb_gss.nam 6
+GB108.0 Viral$1V/seqlib2/gcggenbank/gb_vi.seq 6
+GB108.0 Phage$1G/seqlib2/gcggenbank/gb_ph.seq 6
+GB108.0 Unannotated$1D/seqlib2/gcggenbank/gb_un.seq 6
+GB108.0 New$1u/seqlib2/gcggenbank/gb_new.seq 6
+GB108.0 All sequences (long)$1A@/slib0/lib/genbank.nam
+Yeast genome$1Y@/seqlib/yeast/yeast_chr.nam
+E. coli genome$1D/slib0/genomes/ecoli.gbk 1
+Blast Human ESTs$1F/slib2/fa_dbs/est_human
+TIGR Human Gene Index$1K/slib2/fa_dbs/HGI.nr.031898
+Blast Mouse ESTs$1C/slib2/fa_dbs/est_mouse
+TIGR Mouse Gene Index$1J/slib2/fa_dbs/MGI.nr.022498
+NCBI/BLAST NR DNA$1n/slib2/fa_dbs/nt
diff --git a/data/VTML_10.mat b/data/VTML_10.mat
new file mode 100644
index 0000000..9646f64
--- /dev/null
+++ b/data/VTML_10.mat
@@ -0,0 +1,35 @@
+#
+#  VTML_10
+#
+# This matrix was produced from: vtml_10qij.mat using vtml_P.mat background frequencies
+#
+# VTML_10 substitution matrix, Units = bits/2.0
+# Expected score = -3.896435 bits; Entropy = 3.467957 bits
+# Target fraction identity = 0.9105
+# Lowest Score = -20, Highest Score= 12
+#
+    A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X   * 
+A   7  -8  -8  -8  -5  -7  -7  -6  -9  -9  -9  -8  -7 -10  -6  -4  -5 -11 -10  -5  -8  -7   0  -7 
+R  -8   8  -7 -16  -9  -4 -14  -9  -5 -10 -10  -2  -8 -12  -9  -8  -8 -10  -9 -11 -11  -9   0  -7 
+N  -8  -7   9  -3 -10  -5  -7  -7  -4 -11 -11  -5  -9 -12 -10  -4  -5 -12  -8 -11   3  -6   0  -7 
+D  -8 -16  -3   8 -18  -6  -3  -8  -6 -15 -19  -7 -11 -20  -8  -7  -8 -12 -17 -11   2  -4   0  -7 
+C  -5  -9 -10 -18  12 -17 -18  -9  -8  -7 -16 -17  -6 -17 -11  -5  -7 -19  -6  -5 -14 -17   0  -7 
+Q  -7  -4  -5  -6 -17   9  -3 -10  -3 -12  -8  -4  -6 -10  -7  -6  -7 -19 -16  -9  -5   3   0  -7 
+E  -7 -14  -7  -3 -18  -3   8  -8  -8 -12 -10  -4 -10 -18  -8  -6  -7 -20  -9  -9  -5   2   0  -7 
+G  -6  -9  -7  -8  -9 -10  -8   7  -9 -19 -13  -9 -12 -13 -10  -6 -10 -11 -12 -12  -7  -9   0  -7 
+H  -9  -5  -4  -6  -8  -3  -8  -9  10 -11  -9  -7 -16  -7  -8  -6  -7  -8  -3 -10  -5  -5   0  -7 
+I  -9 -10 -11 -15  -7 -12 -12 -19 -11   8  -3 -11  -3  -7 -13 -11  -7  -8 -10  -1 -13 -12   0  -7 
+L  -9 -10 -11 -19 -16  -8 -10 -13  -9  -3   7 -10  -2  -5  -9 -10  -9  -8  -8  -5 -15  -9   0  -7 
+K  -8  -2  -5  -7 -17  -4  -4  -9  -7 -11 -10   8  -7 -18  -8  -7  -6 -10 -10 -10  -6  -4   0  -7 
+M  -7  -8  -9 -11  -6  -6 -10 -12 -16  -3  -2  -7  10  -4 -12 -10  -6 -16 -15  -5 -10  -8   0  -7 
+F -10 -12 -12 -20 -17 -10 -18 -13  -7  -7  -5 -18  -4   9 -11  -9 -10  -5  -1  -8 -16 -14   0  -7 
+P  -6  -9 -10  -8 -11  -7  -8 -10  -8 -13  -9  -8 -12 -11   9  -6  -8 -11 -19  -9  -9  -7   0  -7 
+S  -4  -8  -4  -7  -5  -6  -6  -6  -6 -11 -10  -7 -10  -9  -6   8  -3 -10  -8 -10  -5  -6   0  -7 
+T  -5  -8  -5  -8  -7  -7  -7 -10  -7  -7  -9  -6  -6 -10  -8  -3   8 -19 -10  -6  -6  -7   0  -7 
+W -11 -10 -12 -12 -19 -19 -20 -11  -8  -8  -8 -10 -16  -5 -11 -10 -19  12  -4 -17 -12 -19   0  -7 
+Y -10  -9  -8 -17  -6 -16  -9 -12  -3 -10  -8 -10 -15  -1 -19  -8 -10  -4  10 -10 -12 -12   0  -7 
+V  -5 -11 -11 -11  -5  -9  -9 -12 -10  -1  -5 -10  -5  -8  -9 -10  -6 -17 -10   7 -11  -9   0  -7 
+B  -8 -11   3   2 -14  -5  -5  -7  -5 -13 -15  -6 -10 -16  -9  -5  -6 -12 -12 -11   8  -4   0  -7 
+Z  -7  -9  -6  -4 -17   3   2  -9  -5 -12  -9  -4  -8 -14  -7  -6  -7 -19 -12  -9  -4   8   0  -7 
+X   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1  -7 
+*  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7   1 
\ No newline at end of file
diff --git a/data/VTML_120.mat b/data/VTML_120.mat
new file mode 100644
index 0000000..61fbf77
--- /dev/null
+++ b/data/VTML_120.mat
@@ -0,0 +1,34 @@
+#
+#  VTML_120
+#
+# This matrix was produced from: vtml_120qij.mat using vtml_P.mat background frequencies
+#
+# VTML_120 substitution matrix, Units = bits/2.0
+# Expected score = -0.712191 bits; Entropy = 0.933608 bits
+# Target fraction identity = 0.3740
+# Lowest Score = -7, Highest Score= 11
+#
+    A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X
+A   4  -2  -1  -1   0  -1  -1   0  -2  -2  -2  -1  -1  -3  -1   1   0  -4  -3   0  -1  -1   0
+R  -2   6  -1  -3  -3   1  -2  -3   0  -4  -3   3  -2  -4  -2  -1  -2  -3  -3  -3  -2   0   0
+N  -1  -1   6   2  -3   0   0  -1   1  -4  -4   0  -3  -4  -2   1   0  -5  -2  -3   4   0   0
+D  -1  -3   2   6  -5   0   2  -1  -1  -5  -6  -1  -4  -7  -2  -1  -1  -6  -5  -4   4   1   0
+C   0  -3  -3  -5  10  -4  -5  -2  -2  -1  -4  -4  -1  -4  -3   0  -1  -6  -1   0  -4  -4   0
+Q  -1   1   0   0  -4   5   2  -2   1  -3  -2   1  -1  -3  -1  -1  -1  -6  -4  -2   0   3   0
+E  -1  -2   0   2  -5   2   5  -2  -1  -4  -4   1  -3  -5  -2  -1  -1  -6  -3  -3   1   3   0
+G   0  -3  -1  -1  -2  -2  -2   6  -2  -6  -5  -2  -4  -5  -3  -1  -2  -4  -5  -4  -1  -2   0
+H  -2   0   1  -1  -2   1  -1  -2   7  -3  -2  -1  -3  -1  -2  -1  -1  -2   2  -3   0   0   0
+I  -2  -4  -4  -5  -1  -3  -4  -6  -3   5   2  -3   2   0  -4  -3  -1  -2  -2   3  -4  -3   0
+L  -2  -3  -4  -6  -4  -2  -4  -5  -2   2   5  -3   2   1  -3  -3  -2  -2  -1   1  -5  -3   0
+K  -1   3   0  -1  -4   1   1  -2  -1  -3  -3   5  -2  -5  -1  -1  -1  -4  -3  -3   0   1   0
+M  -1  -2  -3  -4  -1  -1  -3  -4  -3   2   2  -2   7   1  -4  -3  -1  -4  -3   1  -3  -2   0
+F  -3  -4  -4  -7  -4  -3  -5  -5  -1   0   1  -5   1   7  -4  -3  -3   1   4  -1  -5  -4   0
+P  -1  -2  -2  -2  -3  -1  -2  -3  -2  -4  -3  -1  -4  -4   7   0  -1  -4  -5  -3  -2  -1   0
+S   1  -1   1  -1   0  -1  -1  -1  -1  -3  -3  -1  -3  -3   0   4   2  -3  -2  -2   0  -1   0
+T   0  -2   0  -1  -1  -1  -1  -2  -1  -1  -2  -1  -1  -3  -1   2   5  -6  -3   0   0  -1   0
+W  -4  -3  -5  -6  -6  -6  -6  -4  -2  -2  -2  -4  -4   1  -4  -3  -6  11   2  -4  -5  -6   0
+Y  -3  -3  -2  -5  -1  -4  -3  -5   2  -2  -1  -3  -3   4  -5  -2  -3   2   7  -3  -3  -3   0
+V   0  -3  -3  -4   0  -2  -3  -4  -3   3   1  -3   1  -1  -3  -2   0  -4  -3   4  -3  -2   0
+B  -1  -2   4   4  -4   0   1  -1   0  -4  -5   0  -3  -5  -2   0   0  -5  -3  -3   6   1   0
+Z  -1   0   0   1  -4   3   3  -2   0  -3  -3   1  -2  -4  -1  -1  -1  -6  -3  -2   1   5   0
+X   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
diff --git a/data/VTML_160.mat b/data/VTML_160.mat
new file mode 100644
index 0000000..6f155ae
--- /dev/null
+++ b/data/VTML_160.mat
@@ -0,0 +1,34 @@
+#
+#  VTML_160
+#
+# This matrix was produced from: vtml_160qij.mat using vtml_P.mat background frequencies
+#
+# VTML_160 substitution matrix, Units = bits/3.0
+# Expected score = -0.493659 bits; Entropy = 0.617215 bits
+# Target fraction identity = 0.2884
+# Lowest Score = -8, Highest Score= 16
+#
+    A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X
+A   5  -2  -1  -1   1  -1  -1   0  -2  -2  -2  -1  -1  -3   0   1   1  -5  -4   0  -1  -1   0
+R  -2   8  -1  -3  -3   2  -2  -3   1  -4  -4   4  -2  -5  -2  -1  -2  -4  -3  -4  -2   0   0
+N  -1  -1   7   3  -3   0   0   0   1  -5  -5   0  -3  -5  -2   1   0  -6  -2  -4   5   0   0
+D  -1  -3   3   8  -6   0   3  -1   0  -6  -7   0  -5  -8  -2   0  -1  -7  -6  -5   5   1   0
+C   1  -3  -3  -6  13  -5  -5  -3  -2  -1  -4  -5  -1  -4  -4   1  -1  -7  -1   1  -4  -5   0
+Q  -1   2   0   0  -5   6   3  -3   2  -4  -3   2  -1  -4  -1   0  -1  -7  -4  -3   0   4   0
+E  -1  -2   0   3  -5   3   6  -2  -1  -5  -4   1  -4  -6  -1   0  -1  -8  -4  -3   1   4   0
+G   0  -3   0  -1  -3  -3  -2   8  -3  -7  -7  -3  -5  -6  -3   0  -3  -5  -6  -5   0  -2   0
+H  -2   1   1   0  -2   2  -1  -3  10  -4  -3   0  -4   0  -2  -1  -1  -2   3  -4   0   0   0
+I  -2  -4  -5  -6  -1  -4  -5  -7  -4   6   3  -4   2   0  -5  -4  -1  -2  -2   4  -5  -4   0
+L  -2  -4  -5  -7  -4  -3  -4  -7  -3   3   6  -4   4   2  -3  -4  -2  -2  -1   2  -6  -3   0
+K  -1   4   0   0  -5   2   1  -3   0  -4  -4   6  -2  -6  -1  -1  -1  -5  -4  -3   0   1   0
+M  -1  -2  -3  -5  -1  -1  -4  -5  -4   2   4  -2   8   1  -4  -3  -1  -4  -3   1  -4  -2   0
+F  -3  -5  -5  -8  -4  -4  -6  -6   0   0   2  -6   1   9  -5  -3  -3   3   6  -1  -6  -5   0
+P   0  -2  -2  -2  -4  -1  -1  -3  -2  -5  -3  -1  -4  -5  10   0  -1  -5  -6  -3  -2  -1   0
+S   1  -1   1   0   1   0   0   0  -1  -4  -4  -1  -3  -3   0   5   2  -4  -2  -2   0   0   0
+T   1  -2   0  -1  -1  -1  -1  -3  -1  -1  -2  -1  -1  -3  -1   2   6  -7  -3   0   0  -1   0
+W  -5  -4  -6  -7  -7  -7  -8  -5  -2  -2  -2  -5  -4   3  -5  -4  -7  16   4  -5  -6  -7   0
+Y  -4  -3  -2  -6  -1  -4  -4  -6   3  -2  -1  -4  -3   6  -6  -2  -3   4  10  -3  -4  -4   0
+V   0  -4  -4  -5   1  -3  -3  -5  -4   4   2  -3   1  -1  -3  -2   0  -5  -3   5  -4  -3   0
+B  -1  -2   5   5  -4   0   1   0   0  -5  -6   0  -4  -6  -2   0   0  -6  -4  -4   7   1   0
+Z  -1   0   0   1  -5   4   4  -2   0  -4  -3   1  -2  -5  -1   0  -1  -7  -4  -3   1   6   0
+X   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
diff --git a/data/VTML_20.mat b/data/VTML_20.mat
new file mode 100644
index 0000000..4ad6d45
--- /dev/null
+++ b/data/VTML_20.mat
@@ -0,0 +1,34 @@
+#
+#  VTML_20
+#
+# This matrix was produced from: vtml_20qij.mat using vtml_P.mat background frequencies
+#
+# VTML_20 substitution matrix, Units = bits/2.0
+# Expected score = -2.916179 bits; Entropy = 2.912514 bits
+# Target fraction identity = 0.8307
+# Lowest Score = -16, Highest Score= 12
+#
+    A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X
+A   7  -7  -6  -6  -3  -5  -5  -4  -7  -7  -7  -6  -5  -8  -4  -2  -3  -9  -8  -3  -6  -5   0
+R  -7   8  -5 -12  -7  -2 -10  -7  -3  -8  -8   0  -6 -10  -7  -6  -6  -8  -7  -9  -8  -6   0
+N  -6  -5   8  -1  -8  -4  -5  -5  -3  -9  -9  -3  -7 -10  -8  -2  -4 -10  -6  -9   3  -4   0
+D  -6 -12  -1   8 -14  -4  -1  -6  -4 -12 -15  -5  -9 -16  -6  -5  -6 -10 -14  -9   3  -2   0
+C  -3  -7  -8 -14  12 -13 -14  -7  -6  -5 -12 -13  -4 -13  -9  -3  -5 -15  -4  -3 -11 -13   0
+Q  -5  -2  -4  -4 -13   9  -1  -8  -2  -9  -6  -2  -4  -8  -5  -4  -5 -15 -12  -7  -4   4   0
+E  -5 -10  -5  -1 -14  -1   7  -6  -6 -10  -8  -2  -8 -14  -6  -5  -6 -16  -7  -7  -3   3   0
+G  -4  -7  -5  -6  -7  -8  -6   7  -7 -15 -11  -7 -10 -11  -8  -4  -8  -9 -10 -10  -5  -7   0
+H  -7  -3  -3  -4  -6  -2  -6  -7  10  -9  -7  -5 -12  -5  -6  -5  -5  -6  -1  -8  -3  -4   0
+I  -7  -8  -9 -12  -5  -9 -10 -15  -9   7  -2  -9  -2  -5 -10  -9  -5  -6  -8   1 -10  -9   0
+L  -7  -8  -9 -15 -12  -6  -8 -11  -7  -2   6  -8   0  -3  -7  -8  -7  -6  -6  -3 -12  -7   0
+K  -6   0  -3  -5 -13  -2  -2  -7  -5  -9  -8   7  -5 -14  -6  -5  -4  -9  -8  -8  -4  -2   0
+M  -5  -6  -7  -9  -4  -4  -8 -10 -12  -2   0  -5  10  -3 -10  -8  -4 -13 -11  -3  -8  -6   0
+F  -8 -10 -10 -16 -13  -8 -14 -11  -5  -5  -3 -14  -3   9  -9  -7  -8  -3   0  -6 -13 -11   0
+P  -4  -7  -8  -6  -9  -5  -6  -8  -6 -10  -7  -6 -10  -9   9  -4  -6  -9 -15  -7  -7  -5   0
+S  -2  -6  -2  -5  -3  -4  -5  -4  -5  -9  -8  -5  -8  -7  -4   7  -1  -8  -6  -8  -3  -4   0
+T  -3  -6  -4  -6  -5  -5  -6  -8  -5  -5  -7  -4  -4  -8  -6  -1   8 -15  -8  -4  -5  -5   0
+W  -9  -8 -10 -10 -15 -15 -16  -9  -6  -6  -6  -9 -13  -3  -9  -8 -15  12  -2 -13 -10 -15   0
+Y  -8  -7  -6 -14  -4 -12  -7 -10  -1  -8  -6  -8 -11   0 -15  -6  -8  -2   9  -8 -10  -9   0
+V  -3  -9  -9  -9  -3  -7  -7 -10  -8   1  -3  -8  -3  -6  -7  -8  -4 -13  -8   7  -9  -7   0
+B  -6  -8   3   3 -11  -4  -3  -5  -3 -10 -12  -4  -8 -13  -7  -3  -5 -10 -10  -9   8  -2   0
+Z  -5  -6  -4  -2 -13   4   3  -7  -4  -9  -7  -2  -6 -11  -5  -4  -5 -15  -9  -7  -2   8   0
+X   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
diff --git a/data/VTML_200.mat b/data/VTML_200.mat
new file mode 100644
index 0000000..25a8f4a
--- /dev/null
+++ b/data/VTML_200.mat
@@ -0,0 +1,34 @@
+#
+#  VTML_200
+#
+# This matrix was produced from: vtml_200qij.mat using vtml_P.mat background frequencies
+#
+# VTML_200 substitution matrix, Units = bits/3.0
+# Expected score = -0.358430 bits; Entropy = 0.412084 bits
+# Target fraction identity = 0.2295
+# Lowest Score = -6, Highest Score= 15
+#
+    A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X
+A   4  -2  -1  -1   1  -1  -1   0  -2  -1  -2  -1  -1  -3   0   1   1  -4  -3   0  -1  -1   0
+R  -2   7   0  -2  -3   2  -1  -2   1  -3  -3   4  -2  -4  -1  -1  -1  -3  -2  -3  -1   0   0
+N  -1   0   6   3  -2   1   1   0   1  -4  -4   1  -3  -4  -2   1   0  -5  -2  -3   4   1   0
+D  -1  -2   3   6  -4   1   3  -1   0  -5  -5   0  -4  -6  -1   0  -1  -6  -4  -4   4   2   0
+C   1  -3  -2  -4  12  -3  -4  -2  -2   0  -3  -4  -1  -3  -3   1   0  -6   0   1  -3  -3   0
+Q  -1   2   1   1  -3   5   2  -2   2  -3  -2   2  -1  -3  -1   0   0  -6  -3  -2   1   3   0
+E  -1  -1   1   3  -4   2   5  -1   0  -4  -4   1  -3  -5  -1   0  -1  -6  -3  -3   2   3   0
+G   0  -2   0  -1  -2  -2  -1   8  -2  -6  -5  -2  -4  -5  -2   0  -2  -5  -5  -4   0  -1   0
+H  -2   1   1   0  -2   2   0  -2   8  -3  -2   0  -3   0  -2   0  -1  -1   3  -3   0   1   0
+I  -1  -3  -4  -5   0  -3  -4  -6  -3   5   3  -3   2   0  -4  -3  -1  -2  -2   4  -4  -3   0
+L  -2  -3  -4  -5  -3  -2  -4  -5  -2   3   5  -3   3   2  -3  -3  -2  -1  -1   2  -4  -3   0
+K  -1   4   1   0  -4   2   1  -2   0  -3  -3   5  -2  -5  -1   0   0  -4  -3  -3   0   1   0
+M  -1  -2  -3  -4  -1  -1  -3  -4  -3   2   3  -2   6   1  -3  -2  -1  -3  -2   2  -3  -2   0
+F  -3  -4  -4  -6  -3  -3  -5  -5   0   0   2  -5   1   8  -4  -3  -3   3   5  -1  -5  -4   0
+P   0  -1  -2  -1  -3  -1  -1  -2  -2  -4  -3  -1  -3  -4   9   0  -1  -4  -5  -3  -1  -1   0
+S   1  -1   1   0   1   0   0   0   0  -3  -3   0  -2  -3   0   4   2  -4  -2  -2   0   0   0
+T   1  -1   0  -1   0   0  -1  -2  -1  -1  -2   0  -1  -3  -1   2   4  -5  -3   0   0   0   0
+W  -4  -3  -5  -6  -6  -6  -6  -5  -1  -2  -1  -4  -3   3  -4  -4  -5  15   4  -4  -5  -6   0
+Y  -3  -2  -2  -4   0  -3  -3  -5   3  -2  -1  -3  -2   5  -5  -2  -3   4   9  -2  -3  -3   0
+V   0  -3  -3  -4   1  -2  -3  -4  -3   4   2  -3   2  -1  -3  -2   0  -4  -2   4  -3  -2   0
+B  -1  -1   4   4  -3   1   2   0   0  -4  -4   0  -3  -5  -1   0   0  -5  -3  -3   6   2   0
+Z  -1   0   1   2  -3   3   3  -1   1  -3  -3   1  -2  -4  -1   0   0  -6  -3  -2   2   5   0
+X   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
diff --git a/data/VTML_40.mat b/data/VTML_40.mat
new file mode 100644
index 0000000..0a9e637
--- /dev/null
+++ b/data/VTML_40.mat
@@ -0,0 +1,34 @@
+#
+#  VTML_40
+#
+# This matrix was produced from: vtml_40qij.mat using vtml_P.mat background frequencies
+#
+# VTML_40 substitution matrix, Units = bits/2.0
+# Expected score = -1.991667 bits; Entropy = 2.267456 bits
+# Target fraction identity = 0.6960
+# Lowest Score = -12, Highest Score= 12
+#
+    A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X
+A   6  -5  -4  -4  -1  -3  -3  -2  -5  -5  -5  -4  -4  -6  -3   0  -1  -7  -6  -1  -4  -3   0
+R  -5   8  -3  -8  -5  -1  -7  -5  -2  -7  -6   2  -4  -8  -5  -4  -4  -6  -5  -7  -5  -4   0
+N  -4  -3   8   0  -6  -2  -3  -3  -1  -7  -7  -2  -5  -8  -6  -1  -2  -8  -4  -7   4  -2   0
+D  -4  -8   0   8 -10  -3   1  -4  -3 -10 -11  -3  -7 -12  -4  -3  -4  -9 -10  -7   4  -1   0
+C  -1  -5  -6 -10  11  -9 -10  -5  -5  -3  -9  -9  -3  -9  -7  -2  -3 -12  -3  -2  -8  -9   0
+Q  -3  -1  -2  -3  -9   8   1  -6   0  -7  -4   0  -3  -6  -3  -2  -3 -11  -8  -5  -2   4   0
+E  -3  -7  -3   1 -10   1   7  -5  -4  -8  -7  -1  -6 -11  -4  -3  -4 -12  -5  -6  -1   4   0
+G  -2  -5  -3  -4  -5  -6  -5   7  -5 -12  -9  -5  -8  -9  -6  -3  -6  -7  -8  -8  -3  -5   0
+H  -5  -2  -1  -3  -5   0  -4  -5  10  -7  -5  -3  -8  -3  -4  -3  -4  -4   0  -6  -2  -2   0
+I  -5  -7  -7 -10  -3  -7  -8 -12  -7   7   0  -7   0  -3  -8  -7  -3  -4  -5   2  -8  -7   0
+L  -5  -6  -7 -11  -9  -4  -7  -9  -5   0   6  -6   1  -1  -5  -6  -5  -4  -4  -1  -9  -5   0
+K  -4   2  -2  -3  -9   0  -1  -5  -3  -7  -6   7  -4 -10  -4  -3  -3  -7  -6  -6  -2   0   0
+M  -4  -4  -5  -7  -3  -3  -6  -8  -8   0   1  -4   9  -1  -8  -6  -3  -9  -8  -2  -6  -4   0
+F  -6  -8  -8 -12  -9  -6 -11  -9  -3  -3  -1 -10  -1   8  -7  -5  -6  -1   2  -4 -10  -8   0
+P  -3  -5  -6  -4  -7  -3  -4  -6  -4  -8  -5  -4  -8  -7   8  -2  -4  -7 -11  -6  -5  -3   0
+S   0  -4  -1  -3  -2  -2  -3  -3  -3  -7  -6  -3  -6  -5  -2   7   1  -6  -4  -6  -2  -2   0
+T  -1  -4  -2  -4  -3  -3  -4  -6  -4  -3  -5  -3  -3  -6  -4   1   7 -11  -6  -2  -3  -3   0
+W  -7  -6  -8  -9 -12 -11 -12  -7  -4  -4  -4  -7  -9  -1  -7  -6 -11  12   0 -10  -8 -11   0
+Y  -6  -5  -4 -10  -3  -8  -5  -8   0  -5  -4  -6  -8   2 -11  -4  -6   0   9  -6  -7  -6   0
+V  -1  -7  -7  -7  -2  -5  -6  -8  -6   2  -1  -6  -2  -4  -6  -6  -2 -10  -6   6  -7  -5   0
+B  -4  -5   4   4  -8  -2  -1  -3  -2  -8  -9  -2  -6 -10  -5  -2  -3  -8  -7  -7   8   0   0
+Z  -3  -4  -2  -1  -9   4   4  -5  -2  -7  -5   0  -4  -8  -3  -2  -3 -11  -6  -5   0   7   0
+X   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
diff --git a/data/VTML_80.mat b/data/VTML_80.mat
new file mode 100644
index 0000000..c4202ff
--- /dev/null
+++ b/data/VTML_80.mat
@@ -0,0 +1,34 @@
+#
+#  VTML_80
+#
+# This matrix was produced from: vtml_80qij.mat using vtml_P.mat background frequencies
+#
+# VTML_80 substitution matrix, Units = bits/2.0
+# Expected score = -1.134601 bits; Entropy = 1.427882 bits
+# Target fraction identity = 0.5015
+# Lowest Score = -9, Highest Score= 11
+#
+    A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X
+A   5  -3  -2  -2   0  -2  -2  -1  -3  -3  -3  -2  -2  -4  -1   1   0  -5  -4   0  -2  -2   0
+R  -3   7  -2  -5  -4   1  -3  -3   0  -5  -4   3  -3  -6  -3  -2  -3  -4  -3  -5  -3  -1   0
+N  -2  -2   7   1  -4  -1  -1  -1   0  -5  -5   0  -4  -5  -4   1  -1  -6  -2  -5   4  -1   0
+D  -2  -5   1   7  -7  -1   2  -2  -1  -7  -8  -2  -5  -9  -2  -1  -2  -7  -7  -5   4   0   0
+C   0  -4  -4  -7  10  -6  -7  -3  -3  -2  -5  -6  -1  -6  -4   0  -2  -8  -1   0  -5  -6   0
+Q  -2   1  -1  -1  -6   7   2  -4   1  -5  -3   1  -2  -4  -2  -1  -2  -8  -5  -3  -1   4   0
+E  -2  -3  -1   2  -7   2   6  -3  -2  -5  -5   0  -4  -7  -2  -1  -2  -8  -4  -4   0   4   0
+G  -1  -3  -1  -2  -3  -4  -3   7  -3  -8  -7  -3  -6  -6  -4  -1  -4  -5  -6  -5  -1  -3   0
+H  -3   0   0  -1  -3   1  -2  -3   9  -5  -3  -1  -5  -1  -3  -1  -2  -2   1  -4   0   0   0
+I  -3  -5  -5  -7  -2  -5  -5  -8  -5   6   1  -5   1  -1  -6  -5  -2  -3  -3   3  -6  -5   0
+L  -3  -4  -5  -8  -5  -3  -5  -7  -3   1   5  -4   2   0  -4  -4  -3  -2  -2   0  -6  -4   0
+K  -2   3   0  -2  -6   1   0  -3  -1  -5  -4   6  -2  -7  -2  -2  -1  -5  -4  -4  -1   0   0
+M  -2  -3  -4  -5  -1  -2  -4  -6  -5   1   2  -2   8   0  -5  -4  -1  -6  -4   0  -4  -3   0
+F  -4  -6  -5  -9  -6  -4  -7  -6  -1  -1   0  -7   0   8  -5  -3  -4   1   3  -2  -7  -5   0
+P  -1  -3  -4  -2  -4  -2  -2  -4  -3  -6  -4  -2  -5  -5   8  -1  -2  -5  -7  -4  -3  -2   0
+S   1  -2   1  -1   0  -1  -1  -1  -1  -5  -4  -2  -4  -3  -1   5   1  -4  -3  -3   0  -1   0
+T   0  -3  -1  -2  -2  -2  -2  -4  -2  -2  -3  -1  -1  -4  -2   1   6  -7  -4  -1  -1  -2   0
+W  -5  -4  -6  -7  -8  -8  -8  -5  -2  -3  -2  -5  -6   1  -5  -4  -7  11   1  -6  -6  -8   0
+Y  -4  -3  -2  -7  -1  -5  -4  -6   1  -3  -2  -4  -4   3  -7  -3  -4   1   8  -4  -4  -4   0
+V   0  -5  -5  -5   0  -3  -4  -5  -4   3   0  -4   0  -2  -4  -3  -1  -6  -4   5  -5  -3   0
+B  -2  -3   4   4  -5  -1   0  -1   0  -6  -6  -1  -4  -7  -3   0  -1  -6  -4  -5   7   0   0
+Z  -2  -1  -1   0  -6   4   4  -3   0  -5  -4   0  -3  -5  -2  -1  -2  -8  -4  -3   0   6   0
+X   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
diff --git a/data/blosum45.mat b/data/blosum45.mat
new file mode 100644
index 0000000..07b8f7a
--- /dev/null
+++ b/data/blosum45.mat
@@ -0,0 +1,30 @@
+#  Matrix made by matblas from blosum45.iij
+#  BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 45
+#  Entropy =   0.3795, Expected =  -0.2789
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X
+A  5 -2 -1 -2 -1 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -2 -2  0 -1 -1  0
+R -2  7  0 -1 -3  1  0 -2  0 -3 -2  3 -1 -2 -2 -1 -1 -2 -1 -2 -1  0 -1
+N -1  0  6  2 -2  0  0  0  1 -2 -3  0 -2 -2 -2  1  0 -4 -2 -3  4  0 -1
+D -2 -1  2  7 -3  0  2 -1  0 -4 -3  0 -3 -4 -1  0 -1 -4 -2 -3  5  1 -1
+C -1 -3 -2 -3 12 -3 -3 -3 -3 -3 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -2 -3 -2
+Q -1  1  0  0 -3  6  2 -2  1 -2 -2  1  0 -4 -1  0 -1 -2 -1 -3  0  4 -1
+E -1  0  0  2 -3  2  6 -2  0 -3 -2  1 -2 -3  0  0 -1 -3 -2 -3  1  4 -1
+G  0 -2  0 -1 -3 -2 -2  7 -2 -4 -3 -2 -2 -3 -2  0 -2 -2 -3 -3 -1 -2 -1
+H -2  0  1  0 -3  1  0 -2 10 -3 -2 -1  0 -2 -2 -1 -2 -3  2 -3  0  0 -1
+I -1 -3 -2 -4 -3 -2 -3 -4 -3  5  2 -3  2  0 -2 -2 -1 -2  0  3 -3 -3 -1
+L -1 -2 -3 -3 -2 -2 -2 -3 -2  2  5 -3  2  1 -3 -3 -1 -2  0  1 -3 -2 -1
+K -1  3  0  0 -3  1  1 -2 -1 -3 -3  5 -1 -3 -1 -1 -1 -2 -1 -2  0  1 -1
+M -1 -1 -2 -3 -2  0 -2 -2  0  2  2 -1  6  0 -2 -2 -1 -2  0  1 -2 -1 -1
+F -2 -2 -2 -4 -2 -4 -3 -3 -2  0  1 -3  0  8 -3 -2 -1  1  3  0 -3 -3 -1
+P -1 -2 -2 -1 -4 -1  0 -2 -2 -2 -3 -1 -2 -3  9 -1 -1 -3 -3 -3 -2 -1 -1
+S  1 -1  1  0 -1  0  0  0 -1 -2 -3 -1 -2 -2 -1  4  2 -4 -2 -1  0  0  0
+T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -1 -1  2  5 -3 -1  0  0 -1  0
+W -2 -2 -4 -4 -5 -2 -3 -2 -3 -2 -2 -2 -2  1 -3 -4 -3 15  3 -3 -4 -2 -2
+Y -2 -1 -2 -2 -3 -1 -2 -3  2  0  0 -1  0  3 -3 -2 -1  3  8 -1 -2 -2 -1
+V  0 -2 -3 -3 -1 -3 -3 -3 -3  3  1 -2  1  0 -3 -1  0 -3 -1  5 -3 -3 -1
+B -1 -1  4  5 -2  0  1 -1  0 -3 -3  0 -2 -3 -2  0  0 -4 -2 -3  4  2 -1
+Z -1  0  0  1 -3  4  4 -2  0 -3 -2  1 -1 -3 -1  0 -1 -2 -2 -3  2  4 -1
+X  0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  0  0 -2 -1 -1 -1 -1 -1
+
diff --git a/data/blosum50.mat b/data/blosum50.mat
new file mode 100644
index 0000000..513e4f2
--- /dev/null
+++ b/data/blosum50.mat
@@ -0,0 +1,29 @@
+#  Matrix made by matblas from blosum50.iij
+#  BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 50
+#  Entropy =   0.4808, Expected =  -0.3573
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X
+A  5 -2 -1 -2 -1 -1 -1  0 -2 -1 -2 -1 -1 -3 -1  1  0 -3 -2  0 -2 -1 -1
+R -2  7 -1 -2 -4  1  0 -3  0 -4 -3  3 -2 -3 -3 -1 -1 -3 -1 -3 -1  0 -1
+N -1 -1  7  2 -2  0  0  0  1 -3 -4  0 -2 -4 -2  1  0 -4 -2 -3  4  0 -1
+D -2 -2  2  8 -4  0  2 -1 -1 -4 -4 -1 -4 -5 -1  0 -1 -5 -3 -4  5  1 -1
+C -1 -4 -2 -4 13 -3 -3 -3 -3 -2 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -3 -3 -2
+Q -1  1  0  0 -3  7  2 -2  1 -3 -2  2  0 -4 -1  0 -1 -1 -1 -3  0  4 -1
+E -1  0  0  2 -3  2  6 -3  0 -4 -3  1 -2 -3 -1 -1 -1 -3 -2 -3  1  5 -1
+G  0 -3  0 -1 -3 -2 -3  8 -2 -4 -4 -2 -3 -4 -2  0 -2 -3 -3 -4 -1 -2 -2
+H -2  0  1 -1 -3  1  0 -2 10 -4 -3  0 -1 -1 -2 -1 -2 -3  2 -4  0  0 -1
+I -1 -4 -3 -4 -2 -3 -4 -4 -4  5  2 -3  2  0 -3 -3 -1 -3 -1  4 -4 -3 -1
+L -2 -3 -4 -4 -2 -2 -3 -4 -3  2  5 -3  3  1 -4 -3 -1 -2 -1  1 -4 -3 -1
+K -1  3  0 -1 -3  2  1 -2  0 -3 -3  6 -2 -4 -1  0 -1 -3 -2 -3  0  1 -1
+M -1 -2 -2 -4 -2  0 -2 -3 -1  2  3 -2  7  0 -3 -2 -1 -1  0  1 -3 -1 -1
+F -3 -3 -4 -5 -2 -4 -3 -4 -1  0  1 -4  0  8 -4 -3 -2  1  4 -1 -4 -4 -2
+P -1 -3 -2 -1 -4 -1 -1 -2 -2 -3 -4 -1 -3 -4 10 -1 -1 -4 -3 -3 -2 -1 -2
+S  1 -1  1  0 -1  0 -1  0 -1 -3 -3  0 -2 -3 -1  5  2 -4 -2 -2  0  0 -1
+T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  2  5 -3 -2  0  0 -1  0
+W -3 -3 -4 -5 -5 -1 -3 -3 -3 -3 -2 -3 -1  1 -4 -4 -3 15  2 -3 -5 -2 -3
+Y -2 -1 -2 -3 -3 -1 -2 -3  2 -1 -1 -2  0  4 -3 -2 -2  2  8 -1 -3 -2 -1
+V  0 -3 -3 -4 -1 -3 -3 -4 -4  4  1 -3  1 -1 -3 -2  0 -3 -1  5 -4 -3 -1
+B -2 -1  4  5 -3  0  1 -1  0 -4 -4  0 -3 -4 -2  0  0 -5 -3 -4  5  2 -1
+Z -1  0  0  1 -3  4  5 -2  0 -3 -3  1 -1 -4 -1  0 -1 -2 -2 -3  2  5 -1
+X -1 -1 -1 -1 -2 -1 -1 -2 -1 -1 -1 -1 -1 -2 -2 -1  0 -3 -1 -1 -1 -1 -1
diff --git a/data/blosum62.mat b/data/blosum62.mat
new file mode 100644
index 0000000..6174a66
--- /dev/null
+++ b/data/blosum62.mat
@@ -0,0 +1,30 @@
+#  Matrix made by matblas from blosum62.iij
+#  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 62
+#  Entropy =   0.6979, Expected =  -0.5209
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X
+A  4 -1 -2 -2  0 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -3 -2  0 -2 -1  0
+R -1  5  0 -2 -3  1  0 -2  0 -3 -2  2 -1 -3 -2 -1 -1 -3 -2 -3 -1  0 -1
+N -2  0  6  1 -3  0  0  0  1 -3 -3  0 -2 -3 -2  1  0 -4 -2 -3  3  0 -1
+D -2 -2  1  6 -3  0  2 -1 -1 -3 -4 -1 -3 -3 -1  0 -1 -4 -3 -3  4  1 -1
+C  0 -3 -3 -3  9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2
+Q -1  1  0  0 -3  5  2 -2  0 -3 -2  1  0 -3 -1  0 -1 -2 -1 -2  0  3 -1
+E -1  0  0  2 -4  2  5 -2  0 -3 -3  1 -2 -3 -1  0 -1 -3 -2 -2  1  4 -1
+G  0 -2  0 -1 -3 -2 -2  6 -2 -4 -4 -2 -3 -3 -2  0 -2 -2 -3 -3 -1 -2 -1
+H -2  0  1 -1 -3  0  0 -2  8 -3 -3 -1 -2 -1 -2 -1 -2 -2  2 -3  0  0 -1
+I -1 -3 -3 -3 -1 -3 -3 -4 -3  4  2 -3  1  0 -3 -2 -1 -3 -1  3 -3 -3 -1
+L -1 -2 -3 -4 -1 -2 -3 -4 -3  2  4 -2  2  0 -3 -2 -1 -2 -1  1 -4 -3 -1
+K -1  2  0 -1 -3  1  1 -2 -1 -3 -2  5 -1 -3 -1  0 -1 -3 -2 -2  0  1 -1
+M -1 -1 -2 -3 -1  0 -2 -3 -2  1  2 -1  5  0 -2 -1 -1 -1 -1  1 -3 -1 -1
+F -2 -3 -3 -3 -2 -3 -3 -3 -1  0  0 -3  0  6 -4 -2 -2  1  3 -1 -3 -3 -1
+P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4  7 -1 -1 -4 -3 -2 -2 -1 -2
+S  1 -1  1  0 -1  0  0  0 -1 -2 -2  0 -1 -2 -1  4  1 -3 -2 -2  0  0  0
+T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  1  5 -2 -2  0 -1 -1  0
+W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1  1 -4 -3 -2 11  2 -3 -4 -3 -2
+Y -2 -2 -2 -3 -2 -1 -2 -3  2 -1 -1 -2 -1  3 -3 -2 -2  2  7 -1 -3 -2 -1
+V  0 -3 -3 -3 -1 -2 -2 -3 -3  3  1 -2  1 -1 -2 -2  0 -3 -1  4 -3 -2 -1
+B -2 -1  3  4 -3  0  1 -1  0 -3 -4  0 -3 -3 -2  0 -1 -4 -3 -3  4  1 -1
+Z -1  0  0  1 -3  3  4 -2  0 -3 -3  1 -1 -3 -1  0 -1 -3 -2 -2  1  4 -1
+X  0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2  0  0 -2 -1 -1 -1 -1 -1
+
diff --git a/data/blosum80.mat b/data/blosum80.mat
new file mode 100644
index 0000000..23191a3
--- /dev/null
+++ b/data/blosum80.mat
@@ -0,0 +1,30 @@
+#  Matrix made by matblas from blosum80_3.iij
+#  BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 80
+#  Entropy =   0.9868, Expected =  -0.7442
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X
+A  7 -3 -3 -3 -1 -2 -2  0 -3 -3 -3 -1 -2 -4 -1  2  0 -5 -4 -1 -3 -2 -1
+R -3  9 -1 -3 -6  1 -1 -4  0 -5 -4  3 -3 -5 -3 -2 -2 -5 -4 -4 -2  0 -2
+N -3 -1  9  2 -5  0 -1 -1  1 -6 -6  0 -4 -6 -4  1  0 -7 -4 -5  5 -1 -2
+D -3 -3  2 10 -7 -1  2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6  6  1 -3
+C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4
+Q -2  1  0 -1 -5  9  3 -4  1 -5 -4  2 -1 -5 -3 -1 -1 -4 -3 -4 -1  5 -2
+E -2 -1 -1  2 -7  3  8 -4  0 -6 -6  1 -4 -6 -2 -1 -2 -6 -5 -4  1  6 -2
+G  0 -4 -1 -3 -6 -4 -4  9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3
+H -3  0  1 -2 -7  1  0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4  3 -5 -1  0 -2
+I -3 -5 -6 -7 -2 -5 -6 -7 -6  7  2 -5  2 -1 -5 -4 -2 -5 -3  4 -6 -6 -2
+L -3 -4 -6 -7 -3 -4 -6 -7 -5  2  6 -4  3  0 -5 -4 -3 -4 -2  1 -7 -5 -2
+K -1  3  0 -2 -6  2  1 -3 -1 -5 -4  8 -3 -5 -2 -1 -1 -6 -4 -4 -1  1 -2
+M -2 -3 -4 -6 -3 -1 -4 -5 -4  2  3 -3  9  0 -4 -3 -1 -3 -3  1 -5 -3 -2
+F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1  0 -5  0 10 -6 -4 -4  0  4 -2 -6 -6 -3
+P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3
+S  2 -2  1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2  7  2 -6 -3 -3  0 -1 -1
+T  0 -2  0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3  2  8 -5 -3  0 -1 -2 -1
+W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3  0 -7 -6 -5 16  3 -5 -8 -5 -5
+Y -4 -4 -4 -6 -5 -3 -5 -6  3 -3 -2 -4 -3  4 -6 -3 -3  3 11 -3 -5 -4 -3
+V -1 -4 -5 -6 -2 -4 -4 -6 -5  4  1 -4  1 -2 -4 -3  0 -5 -3  7 -6 -4 -2
+B -3 -2  5  6 -6 -1  1 -2 -1 -6 -7 -1 -5 -6 -4  0 -1 -8 -5 -6  6  0 -3
+Z -2  0 -1  1 -7  5  6 -4  0 -6 -5  1 -3 -6 -2 -1 -2 -5 -4 -4  0  6 -1
+X -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2
+
diff --git a/data/dna.mat b/data/dna.mat
new file mode 100644
index 0000000..914cac4
--- /dev/null
+++ b/data/dna.mat
@@ -0,0 +1,19 @@
+#  Sample dna matrix
+   A  C  G  T  U  R  Y  M  W  S  K  D  H  V  B  N  X 
+A  5 -4 -4 -4 -4  2 -1  2  2 -1 -1  1  1  1 -2 -1 -1
+C -4  5 -4 -4 -4 -1  2  2 -1  2 -1 -2  1  1  1 -1 -1
+G -4 -4  5 -4 -4  2 -1 -1 -1  2  2  1 -2  1  1 -1 -1
+T -4 -4 -4  5  5 -1  2 -1  2 -1  2  1  1 -2  1 -1 -1
+U -4 -4 -4  5  5 -1  2 -1  2 -1  2  1  1 -2  1 -1 -1
+R  2 -1  2 -1 -1  2 -2 -1  1  1  1  1 -1  1 -1 -1 -1
+Y -1  2 -1  2  2 -2  2 -1  1  1  1 -1  1 -1  1 -1 -1
+M  2  2 -1 -1 -1 -1 -1  2  1  1 -1 -1  1  1 -1 -1 -1
+W  2 -1 -1  2  2  1  1  1  2 -1  1  1  1 -1 -1 -1 -1
+S -1  2  2 -1 -1  1  1  1 -1  2  1 -1 -1  1  1 -1 -1
+K -1 -1  2  2  2  1  1 -1  1  1  2  1 -1 -1  1 -1 -1
+D  1 -2  1  1  1  1 -1 -1  1 -1  1  1 -1 -1 -1 -1 -1
+H  1  1 -2  1  1 -1  1  1  1 -1 -1 -1  1 -1 -1 -1 -1
+V  1  1  1 -2 -2  1 -1  1 -1  1 -1 -1 -1  1 -1 -1 -1
+B -2  1  1  1  1 -1  1 -1 -1  1  1 -1 -1 -1  1 -1 -1
+N -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
diff --git a/data/idn_aa.mat b/data/idn_aa.mat
new file mode 100644
index 0000000..f972612
--- /dev/null
+++ b/data/idn_aa.mat
@@ -0,0 +1,24 @@
+   A    R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X
+A  4  -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+R -10   4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+N -10 -10   4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+D -10 -10 -10   4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+C -10 -10 -10 -10   4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+Q -10 -10 -10 -10 -10   4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+E -10 -10 -10 -10 -10 -10   4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+G -10 -10 -10 -10 -10 -10 -10   4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+H -10 -10 -10 -10 -10 -10 -10 -10   4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+I -10 -10 -10 -10 -10 -10 -10 -10 -10   4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+L -10 -10 -10 -10 -10 -10 -10 -10 -10 -10   4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+K -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10   4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+M -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10   4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+F -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10   4 -10 -10 -10 -10 -10 -10 -10 -10 -10
+P -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10   4 -10 -10 -10 -10 -10 -10 -10 -10
+S -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10   4 -10 -10 -10 -10 -10 -10 -10
+T -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10   4 -10 -10 -10 -10 -10 -10
+W -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10   4 -10 -10 -10 -10 -10
+Y -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10   4 -10 -10 -10 -10
+V -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10   4 -10 -10 -10
+B -10 -10   2   2 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10   4 -10 -10
+Z -10 -10 -10 -10 -10   2   2 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10   4 -10
+X -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10   0
diff --git a/data/md_10.mat b/data/md_10.mat
new file mode 100644
index 0000000..918e604
--- /dev/null
+++ b/data/md_10.mat
@@ -0,0 +1,24 @@
+   A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X
+A  11 -13 -12 -11 -13 -13 -10  -8 -15 -13 -15 -14 -13 -18  -7  -5  -4 -20 -19  -6 -12 -11  -1
+R -12  12 -13 -18 -10  -5 -15  -9  -5 -17 -14  -2 -14 -22 -11 -10 -12  -9 -17 -17 -15 -10  -1
+N -12 -13  13  -3 -14 -11 -12 -11  -5 -13 -19  -6 -15 -20 -17  -4  -7 -21 -12 -17   5 -11  -1
+D -11 -18  -3  12 -20 -13  -2  -9 -10 -19 -21 -15 -18 -23 -18 -12 -14 -24 -13 -15   5  -7  -1
+C -13 -10 -14 -20  17 -19 -22 -12 -12 -18 -16 -21 -15 -11 -18  -7 -14  -9  -7 -12 -17 -21  -1
+Q -13  -5 -11 -13 -19  13  -5 -15  -3 -19 -12  -6 -14 -22  -8 -13 -13 -17 -16 -17 -12   4  -1
+E -10 -15 -12  -2 -22  -5  12  -9 -15 -19 -20  -8 -17 -23 -17 -15 -15 -20 -21 -14  -7   3  -1
+G  -8  -9 -11  -9 -12 -16  -9  11 -16 -21 -21 -15 -18 -22 -16  -7 -14 -13 -21 -13 -10 -13  -1
+H -16  -5  -5 -10 -12  -3 -15 -16  16 -17 -13 -13 -15 -14 -10 -11 -13 -20  -3 -19  -7  -9  -1
+I -13 -17 -14 -19 -17 -20 -19 -21 -18  12  -7 -17  -4 -11 -19 -14  -7 -20 -15  -1 -16 -19  -1
+L -15 -14 -19 -21 -16 -12 -20 -21 -13  -7  10 -18  -4  -6 -10 -13 -15 -13 -16  -8 -20 -16  -1
+K -14  -2  -6 -15 -21  -6  -8 -15 -13 -17 -18  12 -12 -24 -17 -13 -10 -19 -20 -18 -11  -7  -1
+M -13 -14 -15 -18 -15 -14 -18 -19 -15  -4  -4 -12  16 -14 -17 -15  -7 -16 -18  -5 -16 -16  -1
+F -18 -22 -19 -22 -11 -22 -23 -22 -14 -11  -6 -23 -14  14 -17 -11 -18 -13  -3 -12 -21 -22  -1
+P  -7 -12 -17 -18 -18  -8 -17 -16 -10 -19 -10 -16 -17 -17  13  -6  -9 -22 -20 -16 -17 -13  -1
+S  -5 -10  -4 -12  -7 -13 -15  -7 -11 -14 -13 -13 -15 -11  -6  11  -4 -15 -12 -14  -8 -14  -1
+T  -4 -12  -7 -14 -14 -13 -15 -14 -13  -7 -16 -10  -7 -19  -9  -4  12 -19 -17 -10 -10 -14  -1
+W -21  -9 -21 -21 -10 -17 -21 -13 -21 -21 -13 -21 -17 -13 -21 -15 -18  18 -12 -16 -21 -19  -1
+Y -20 -17 -12 -13  -7 -16 -21 -20  -3 -15 -16 -20 -17  -3 -20 -12 -17 -12  15 -18 -13 -19  -1
+V  -6 -17 -17 -15 -12 -17 -14 -13 -19  -1  -8 -18  -5 -12 -16 -14 -10 -16 -18  11 -16 -15  -1
+B -12 -15   5   5 -17 -12  -7 -10  -7 -16 -20 -11 -17 -21 -17  -8 -10 -22 -13 -16  13  -9  -1
+Z -16 -18 -17  -8 -32   1   9 -17 -17 -29 -26 -11 -24 -34 -21 -21 -21 -29 -29 -22  -9  13  -1
+X  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
diff --git a/data/md_20.mat b/data/md_20.mat
new file mode 100644
index 0000000..d4f7ab9
--- /dev/null
+++ b/data/md_20.mat
@@ -0,0 +1,24 @@
+    A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X
+A  10 -10  -9  -8 -10 -10  -7  -5 -12 -10 -12 -11  -9 -15  -5  -2  -1 -17 -16  -3  -9  -8  -1
+R -10  12 -10 -14  -7  -3 -11  -6  -3 -14 -12   0 -11 -18  -9  -7  -9  -6 -14 -14 -12  -7  -1
+N  -9 -10  13  -1 -11  -8  -9  -8  -2 -11 -15  -4 -12 -16 -13  -1  -4 -18  -9 -14   6  -8  -1
+D  -8 -14  -1  12 -16  -9   1  -6  -7 -16 -18 -11 -15 -20 -15  -9 -11 -20 -11 -12   6  -4  -1
+C -10  -7 -11 -16  17 -16 -19  -9  -9 -14 -13 -17 -12  -8 -14  -4 -11  -7  -4 -10 -14 -17  -1
+Q -10  -3  -8  -9 -16  13  -3 -12   0 -16  -9  -3 -11 -18  -5 -10 -10 -14 -12 -14  -9   5  -1
+E  -7 -11  -9   1 -19  -3  11  -7 -12 -16 -17  -5 -14 -20 -14 -12 -12 -17 -18 -11  -4   4  -1
+G  -5  -6  -8  -6  -9 -12  -7  11 -13 -17 -18 -12 -15 -19 -12  -5 -11 -10 -17 -11  -7  -9  -1
+H -12  -3  -2  -7  -9   0 -12 -13  15 -14 -10  -9 -12 -11  -7  -8 -10 -16   0 -15  -4  -6  -1
+I -10 -14 -11 -16 -14 -16 -16 -17 -14  12  -4 -14  -1  -8 -15 -11  -4 -16 -12   2 -13 -16  -1
+L -12 -11 -15 -18 -13  -9 -17 -18 -10  -4  10 -15  -2  -4  -7 -10 -12 -10 -13  -5 -17 -13  -1
+K -11   0  -4 -12 -17  -3  -5 -12  -9 -14 -15  12  -9 -21 -13 -10  -7 -16 -17 -15  -8  -4  -1
+M  -9 -11 -12 -15 -12 -11 -15 -16 -12  -1  -2  -9  15 -10 -14 -12  -4 -13 -14  -3 -13 -13  -1
+F -15 -19 -16 -19  -8 -18 -20 -19 -11  -8  -4 -19 -10  13 -14  -8 -15 -10   0  -9 -17 -19  -1
+P  -5  -9 -13 -15 -14  -5 -14 -12  -7 -15  -7 -13 -14 -14  12  -3  -7 -18 -16 -13 -14 -10  -1
+S  -2  -8  -1  -9  -4 -10 -12  -5  -8 -11 -10 -10 -12  -8  -3  10  -1 -12  -9 -11  -5 -11  -1
+T  -1  -9  -4 -11 -10 -10 -12 -11 -10  -4 -12  -7  -4 -15  -7  -1  11 -16 -14  -7  -7 -11  -1
+W -17  -6 -18 -18  -7 -14 -18 -10 -17 -17 -10 -17 -14 -10 -18 -12 -15  18  -9 -13 -18 -16  -1
+Y -16 -14  -9 -11  -4 -12 -18 -17   0 -12 -12 -17 -14   0 -16  -9 -13  -9  14 -15 -10 -15  -1
+V  -3 -14 -14 -12  -9 -14 -11 -11 -15   2  -5 -15  -2  -9 -13 -11  -7 -13 -14  11 -13 -12  -1
+B  -9 -12   6   6 -14  -9  -4  -7  -4 -13 -17  -8 -13 -18 -14  -5  -7 -19 -10 -13  12  -6  -1
+Z -12 -13 -13  -4 -27   4  10 -13 -12 -24 -21  -6 -20 -29 -17 -17 -17 -24 -24 -18  -6  12  -1
+X  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
diff --git a/data/md_40.mat b/data/md_40.mat
new file mode 100644
index 0000000..ff34bd3
--- /dev/null
+++ b/data/md_40.mat
@@ -0,0 +1,24 @@
+   A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X
+A   9  -7  -6  -6  -7  -7  -5  -3 -10  -6  -9  -8  -7 -11  -2   0   1 -13 -12  -1  -6  -6  -1
+R  -7  11  -6 -10  -5   0  -8  -4   0 -10  -9   3  -8 -14  -6  -5  -6  -4 -10 -11  -8  -4  -1
+N  -6  -6  12   2  -8  -5  -5  -5   0  -8 -12  -1  -9 -13  -9   1  -2 -16  -6 -10   7  -5  -1
+D  -6 -10   2  11 -13  -6   3  -4  -5 -12 -15  -8 -11 -16 -11  -6  -7 -15  -8  -9   6  -1  -1
+C  -6  -5  -8 -13  16 -12 -15  -7  -6 -11 -11 -13  -9  -6 -11  -2  -7  -4  -2  -7 -11 -13  -1
+Q  -7   0  -5  -6 -12  12   0  -9   2 -13  -6   0  -8 -14  -3  -7  -7 -11  -9 -11  -6   6  -1
+E  -5  -8  -5   3 -15   0  10  -4  -8 -12 -13  -3 -11 -16 -10  -8  -8 -13 -14  -8  -1   5  -1
+G  -3  -4  -5  -4  -7  -9  -4  10 -10 -13 -14  -9 -12 -15  -9  -2  -8  -7 -15  -8  -5  -7  -1
+H -10   0   0  -5  -6   2  -8 -10  14 -11  -7  -6  -9  -7  -4  -6  -7 -12   2 -12  -2  -3  -1
+I  -6 -10  -8 -12 -11 -13 -12 -13 -11  11  -1 -11   1  -6 -11  -8  -2 -12  -9   4 -10 -12  -1
+L  -9  -9 -12 -14 -11  -6 -13 -14  -7  -1   9 -12   1  -1  -5  -7  -9  -7  -9  -2 -13 -10  -1
+K  -8   3  -1  -8 -13   0  -3  -9  -6 -11 -12  11  -7 -18 -10  -7  -5 -12 -13 -12  -5  -2  -1
+M  -7  -8  -9 -11  -8  -8 -11 -12  -9   1   1  -7  14  -7 -10  -8  -2 -11 -11   0 -10 -10  -1
+F -11 -14 -12 -16  -6 -14 -16 -15  -7  -6  -1 -17  -7  13 -11  -5 -11  -7   2  -6 -14 -15  -1
+P  -2  -6  -9 -12 -11  -3 -10  -9  -4 -11  -5 -10 -10 -11  12  -1  -4 -14 -12  -9 -11  -7  -1
+S   0  -5   1  -6  -2  -7  -8  -2  -6  -8  -7  -7  -8  -5  -1   9   1 -10  -7  -7  -3  -8  -1
+T   1  -6  -2  -7  -7  -7  -8  -8  -7  -2  -9  -5  -2 -11  -4   1  10 -14 -10  -4  -5  -8  -1
+W -14  -4 -17 -15  -4 -12 -13  -7 -11 -12  -7 -13 -11  -7 -14 -10 -14  18  -6 -11 -16 -12  -1
+Y -12  -9  -6  -8  -2  -9 -14 -14   2  -9  -9 -13 -11   2 -12  -7 -11  -6  14 -11  -7 -11  -1
+V  -1 -11 -10  -9  -7 -11  -8  -8 -12   4  -2 -12   0  -6 -10  -7  -4 -10 -11  10 -10  -9  -1
+B  -6  -8   7   6 -11  -6  -1  -5  -2 -10 -13  -5 -10 -14 -10  -3  -5 -16  -7 -10  11  -3  -1
+Z  -8  -8  -8   0 -21   6  10  -9  -7 -18 -16  -3 -15 -23 -12 -12 -12 -19 -18 -14  -3  11  -1
+X  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
diff --git a/data/pam120.mat b/data/pam120.mat
new file mode 100644
index 0000000..322256a
--- /dev/null
+++ b/data/pam120.mat
@@ -0,0 +1,34 @@
+#
+# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
+#
+# PAM 120 substitution matrix, scale = ln(2)/2 = 0.346574
+#
+# Expected score = -1.64, Entropy = 0.979 bits
+#
+# Lowest score = -8, Highest score = 12
+#
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X
+A  3 -3 -1  0 -3 -1  0  1 -3 -1 -3 -2 -2 -4  1  1  1 -7 -4  0  0 -1 -1
+R -3  6 -1 -3 -4  1 -3 -4  1 -2 -4  2 -1 -5 -1 -1 -2  1 -5 -3 -2 -1 -2
+N -1 -1  4  2 -5  0  1  0  2 -2 -4  1 -3 -4 -2  1  0 -4 -2 -3  3  0 -1
+D  0 -3  2  5 -7  1  3  0  0 -3 -5 -1 -4 -7 -3  0 -1 -8 -5 -3  4  3 -2
+C -3 -4 -5 -7  9 -7 -7 -4 -4 -3 -7 -7 -6 -6 -4  0 -3 -8 -1 -3 -6 -7 -4
+Q -1  1  0  1 -7  6  2 -3  3 -3 -2  0 -1 -6  0 -2 -2 -6 -5 -3  0  4 -1
+E  0 -3  1  3 -7  2  5 -1 -1 -3 -4 -1 -3 -7 -2 -1 -2 -8 -5 -3  3  4 -1
+G  1 -4  0  0 -4 -3 -1  5 -4 -4 -5 -3 -4 -5 -2  1 -1 -8 -6 -2  0 -2 -2
+H -3  1  2  0 -4  3 -1 -4  7 -4 -3 -2 -4 -3 -1 -2 -3 -3 -1 -3  1  1 -2
+I -1 -2 -2 -3 -3 -3 -3 -4 -4  6  1 -3  1  0 -3 -2  0 -6 -2  3 -3 -3 -1
+L -3 -4 -4 -5 -7 -2 -4 -5 -3  1  5 -4  3  0 -3 -4 -3 -3 -2  1 -4 -3 -2
+K -2  2  1 -1 -7  0 -1 -3 -2 -3 -4  5  0 -7 -2 -1 -1 -5 -5 -4  0 -1 -2
+M -2 -1 -3 -4 -6 -1 -3 -4 -4  1  3  0  8 -1 -3 -2 -1 -6 -4  1 -4 -2 -2
+F -4 -5 -4 -7 -6 -6 -7 -5 -3  0  0 -7 -1  8 -5 -3 -4 -1  4 -3 -5 -6 -3
+P  1 -1 -2 -3 -4  0 -2 -2 -1 -3 -3 -2 -3 -5  6  1 -1 -7 -6 -2 -2 -1 -2
+S  1 -1  1  0  0 -2 -1  1 -2 -2 -4 -1 -2 -3  1  3  2 -2 -3 -2  0 -1 -1
+T  1 -2  0 -1 -3 -2 -2 -1 -3  0 -3 -1 -1 -4 -1  2  4 -6 -3  0  0 -2 -1
+W -7  1 -4 -8 -8 -6 -8 -8 -3 -6 -3 -5 -6 -1 -7 -2 -6 12 -2 -8 -6 -7 -5
+Y -4 -5 -2 -5 -1 -5 -5 -6 -1 -2 -2 -5 -4  4 -6 -3 -3 -2  8 -3 -3 -5 -3
+V  0 -3 -3 -3 -3 -3 -3 -2 -3  3  1 -4  1 -3 -2 -2  0 -8 -3  5 -3 -3 -1
+B  0 -2  3  4 -6  0  3  0  1 -3 -4  0 -4 -5 -2  0  0 -6 -3 -3  4  2 -1
+Z -1 -1  0  3 -7  4  4 -2  1 -3 -3 -1 -2 -6 -1 -1 -2 -7 -5 -3  2  4 -1
+X -1 -2 -1 -2 -4 -1 -1 -2 -2 -1 -2 -2 -2 -3 -2 -1 -1 -5 -3 -1 -1 -1 -2
+
diff --git a/data/pam250.mat b/data/pam250.mat
new file mode 100644
index 0000000..9c79415
--- /dev/null
+++ b/data/pam250.mat
@@ -0,0 +1,34 @@
+#
+# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
+#
+# PAM 250 substitution matrix, scale = ln(2)/3 = 0.231049
+#
+# Expected score = -0.844, Entropy = 0.354 bits
+#
+# Lowest score = -8, Highest score = 17
+#
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X
+A  2 -2  0  0 -2  0  0  1 -1 -1 -2 -1 -1 -3  1  1  1 -6 -3  0  0  0  0
+R -2  6  0 -1 -4  1 -1 -3  2 -2 -3  3  0 -4  0  0 -1  2 -4 -2 -1  0 -1
+N  0  0  2  2 -4  1  1  0  2 -2 -3  1 -2 -3  0  1  0 -4 -2 -2  2  1  0
+D  0 -1  2  4 -5  2  3  1  1 -2 -4  0 -3 -6 -1  0  0 -7 -4 -2  3  3 -1
+C -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3  0 -2 -8  0 -2 -4 -5 -3
+Q  0  1  1  2 -5  4  2 -1  3 -2 -2  1 -1 -5  0 -1 -1 -5 -4 -2  1  3 -1
+E  0 -1  1  3 -5  2  4  0  1 -2 -3  0 -2 -5 -1  0  0 -7 -4 -2  3  3 -1
+G  1 -3  0  1 -3 -1  0  5 -2 -3 -4 -2 -3 -5  0  1  0 -7 -5 -1  0  0 -1
+H -1  2  2  1 -3  3  1 -2  6 -2 -2  0 -2 -2  0 -1 -1 -3  0 -2  1  2 -1
+I -1 -2 -2 -2 -2 -2 -2 -3 -2  5  2 -2  2  1 -2 -1  0 -5 -1  4 -2 -2 -1
+L -2 -3 -3 -4 -6 -2 -3 -4 -2  2  6 -3  4  2 -3 -3 -2 -2 -1  2 -3 -3 -1
+K -1  3  1  0 -5  1  0 -2  0 -2 -3  5  0 -5 -1  0  0 -3 -4 -2  1  0 -1
+M -1  0 -2 -3 -5 -1 -2 -3 -2  2  4  0  6  0 -2 -2 -1 -4 -2  2 -2 -2 -1
+F -3 -4 -3 -6 -4 -5 -5 -5 -2  1  2 -5  0  9 -5 -3 -3  0  7 -1 -4 -5 -2
+P  1  0  0 -1 -3  0 -1  0  0 -2 -3 -1 -2 -5  6  1  0 -6 -5 -1 -1  0 -1
+S  1  0  1  0  0 -1  0  1 -1 -1 -3  0 -2 -3  1  2  1 -2 -3 -1  0  0  0
+T  1 -1  0  0 -2 -1  0  0 -1  0 -2  0 -1 -3  0  1  3 -5 -3  0  0 -1  0
+W -6  2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4  0 -6 -2 -5 17  0 -6 -5 -6 -4
+Y -3 -4 -2 -4  0 -4 -4 -5  0 -1 -1 -4 -2  7 -5 -3 -3  0 10 -2 -3 -4 -2
+V  0 -2 -2 -2 -2 -2 -2 -1 -2  4  2 -2  2 -1 -1 -1  0 -6 -2  4 -2 -2 -1
+B  0 -1  2  3 -4  1  3  0  1 -2 -3  1 -2 -4 -1  0  0 -5 -3 -2  3  2 -1
+Z  0  0  1  3 -5  3  3  0  2 -2 -3  0 -2 -5  0  0 -1 -6 -4 -2  2  3 -1
+X  0 -1  0 -1 -3 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1  0  0 -4 -2 -1 -1 -1 -1
+
diff --git a/data/rna.mat b/data/rna.mat
new file mode 100644
index 0000000..b759092
--- /dev/null
+++ b/data/rna.mat
@@ -0,0 +1,19 @@
+#  Sample rna matrix with +2 for G:A, TU:C
+   A  C  G  T  U  R  Y  M  W  S  K  D  H  V  B  N  X 
+A  5 -4  2 -4 -4  2 -1  1  1 -1 -1  1  1  1 -2 -1 -1
+C -4  5 -4  2  2 -1  1  1 -1  1 -1 -2  1  1  1 -1 -1
+G -4 -4  5 -4 -4  1 -1 -1 -1  1  1  1 -2  1  1 -1 -1
+T -4 -4 -4  5  5 -1  2 -1  1 -1  1  1  1 -2  1 -1 -1
+U -4 -4 -4  5  5 -1  2 -1  1 -1  1  1  1 -2  1 -1 -1
+R  2 -1  2 -1 -1  2 -2 -1  1  1  1  1 -1  1 -1 -1 -1
+Y -1  2 -1  2  2 -2  2 -1  1  1  1 -1  1 -1  1 -1 -1
+M  1  1 -1 -1 -1 -1 -1  2  1  1 -1 -1  1  1 -1 -1 -1
+W  1 -1 -1  1  1  1  1  1  1 -1  1  1  1 -1 -1 -1 -1
+S -1  1  1 -1 -1  1  1  1 -1  2  1 -1 -1  1  1 -1 -1
+K -1 -1  1  1  1  1  1 -1  1  1  2  1 -1 -1  1 -1 -1
+D  1 -2  1  1  1  1 -1 -1  1 -1  1  1 -1 -1 -1 -1 -1
+H  1  1 -2  1  1 -1  1  1  1 -1 -1 -1  1 -1 -1 -1 -1
+V  1  1  1 -2 -2  1 -1  1 -1  1 -1 -1 -1  1 -1 -1 -1
+B -2  1  1  1  1 -1  1 -1 -1  1  1 -1 -1 -1  1 -1 -1
+N -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
diff --git a/data/vtml160.mat b/data/vtml160.mat
new file mode 100644
index 0000000..e525883
--- /dev/null
+++ b/data/vtml160.mat
@@ -0,0 +1,38 @@
+#
+#     VTML160
+#
+# This matrix was produced with scripts written by
+# Tobias Mueller and Sven Rahmann [June-2001]. 
+#
+# VTML160 substitution matrix, Units = Third-Bits
+# Expected Score = -1.297840 Third-Bits
+# Lowest Score = -7, Highest Score = 16
+#
+# Entropy H = 0.562489 Bits
+#
+# 30-Jun-2001 
+    A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X   * 
+A   5  -2  -1  -1   1  -1  -1   0  -2  -1  -2  -1  -1  -3   0   1   1  -5  -3   0  -1  -1   0  -7 
+R  -2   7   0  -3  -3   2  -1  -3   1  -4  -3   4  -2  -5  -2  -1  -1  -4  -3  -4  -2   0   0  -7 
+N  -1   0   7   3  -3   0   0   0   1  -4  -4   0  -3  -5  -2   1   0  -5  -2  -4   5   0   0  -7 
+D  -1  -3   3   7  -5   1   3  -1   0  -6  -6   0  -5  -7  -1   0  -1  -7  -5  -4   6   3   0  -7 
+C   1  -3  -3  -5  13  -4  -5  -2  -2  -1  -4  -4  -1  -4  -3   1   0  -7  -1   1  -4  -5   0  -7 
+Q  -1   2   0   1  -4   6   2  -3   2  -4  -2   2  -1  -4  -1   0  -1  -6  -4  -3   0   4   0  -7 
+E  -1  -1   0   3  -5   2   6  -2  -1  -5  -4   1  -3  -6  -1   0  -1  -7  -3  -3   2   5   0  -7 
+G   0  -3   0  -1  -2  -3  -2   8  -3  -7  -6  -2  -5  -6  -3   0  -2  -5  -5  -5  -1  -2   0  -7 
+H  -2   1   1   0  -2   2  -1  -3   9  -4  -3   0  -3   0  -2  -1  -1  -1   3  -3   0   0   0  -7 
+I  -1  -4  -4  -6  -1  -4  -5  -7  -4   6   3  -4   2   0  -4  -3  -1  -2  -2   4  -5  -4   0  -7 
+L  -2  -3  -4  -6  -4  -2  -4  -6  -3   3   6  -3   4   2  -3  -3  -2  -1  -1   2  -5  -3   0  -7 
+K  -1   4   0   0  -4   2   1  -2   0  -4  -3   5  -2  -5  -1  -1  -1  -5  -3  -3   0   2   0  -7 
+M  -1  -2  -3  -5  -1  -1  -3  -5  -3   2   4  -2   8   1  -4  -3  -1  -4  -2   1  -4  -3   0  -7 
+F  -3  -5  -5  -7  -4  -4  -6  -6   0   0   2  -5   1   9  -5  -3  -3   3   6  -1  -6  -5   0  -7 
+P   0  -2  -2  -1  -3  -1  -1  -3  -2  -4  -3  -1  -4  -5   9   0  -1  -5  -6  -3  -2  -1   0  -7 
+S   1  -1   1   0   1   0   0   0  -1  -3  -3  -1  -3  -3   0   4   2  -4  -2  -2   1   0   0  -7 
+T   1  -1   0  -1   0  -1  -1  -2  -1  -1  -2  -1  -1  -3  -1   2   5  -6  -3   0   0  -1   0  -7 
+W  -5  -4  -5  -7  -7  -6  -7  -5  -1  -2  -1  -5  -4   3  -5  -4  -6  16   4  -5  -6  -7   0  -7 
+Y  -3  -3  -2  -5  -1  -4  -3  -5   3  -2  -1  -3  -2   6  -6  -2  -3   4  10  -3  -3  -4   0  -7 
+V   0  -4  -4  -4   1  -3  -3  -5  -3   4   2  -3   1  -1  -3  -2   0  -5  -3   5  -4  -3   0  -7 
+B  -1  -2   5   6  -4   0   2  -1   0  -5  -5   0  -4  -6  -2   1   0  -6  -3  -4   5   2   0  -7 
+Z  -1   0   0   3  -5   4   5  -2   0  -4  -3   2  -3  -5  -1   0  -1  -7  -4  -3   2   5   0  -7 
+X   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  -7 
+*  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7  -7   1 
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 9dfbcc0..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,5 +0,0 @@
-fasta3 (36.3.8f-1) UNRELEASED; urgency=low
-
-  * Initial release (Closes: #coming)
-
- -- Steffen Moeller <moeller at debian.org>  Tue, 05 Dec 2017 17:19:26 +0100
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
deleted file mode 100644
index ca445e7..0000000
--- a/debian/control
+++ /dev/null
@@ -1,75 +0,0 @@
-Source: fasta3
-Section: non-free/science
-Priority: optional
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Steffen Moeller <moeller at debian.org>
-Build-Depends: debhelper (>= 9)
-Standards-Version: 3.9.6
-Vcs-Browser: http://anonscm.debian.org/viewvc/debian-med/trunk/packages/fasta3/trunk/
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/fasta3/trunk/
-Homepage: http://fasta.bioch.virginia.edu
-
-Package: fasta3
-Architecture: any
-Depends: ${shlibs:Depends}, ${misc:Depends}
-Description: tools for searching collections of biological sequences
- The FASTA programs find regions of local or global similarity between
- Protein or DNA sequences, either by searching Protein or DNA databases,
- or by identifying local duplications within a sequence. Other
- programs provide information on the statistical significance of an
- alignment. Like BLAST, FASTA can be used to infer functional and
- evolutionary relationships between sequences as well as help identify
- members of gene families.
- .
- * Protein
- .
-  - Protein-protein FASTA
-  - Protein-protein Smith-Waterman (ssearch)
-  - Global Protein-protein (Needleman-Wunsch) (ggsearch)
-  - Global/Local protein-protein (glsearch)
-  - Protein-protein with unordered peptides (fasts)
-  - Protein-protein with mixed peptide sequences (fastf)
- .	
- * Nucleotide
- .
-  - Nucleotide-Nucleotide (DNA/RNA fasta)
-  - Ordered Nucleotides vs Nucleotide (fastm)
-  - Un-ordered Nucleotides vs Nucleotide (fasts)
- .
- * Translated
- .
-  - Translated DNA (with frameshifts, e.g. ESTs)
-    vs Proteins (fastx/fasty)
-  - Protein vs Translated DNA (with frameshifts)
-    (tfastx/tfasty)
-  - Peptides vs Translated DNA (tfasts)
- . 	
- * Statistical Significance
- .
-  - Protein vs Protein shuffle (prss)
-  - DNA vs DNA shuffle (prss)
-  - Translated DNA vs Protein shuffle (prfx)
- .
- * Local Duplications
- .
-  - Local Protein alignments (lalign)
-  - Plot Protein alignment "dot-plot" (plalign)
-  - Local DNA alignments (lalign)
-  - Plot DNA alignment "dot-plot" (plalign)
-
-Package: fasta3-doc
-Architecture: all
-Depends: ${shlibs:Depends}, ${misc:Depends}
-Description: user guide for FASTA tools
- The FASTA programs find regions of local or global similarity between
- Protein or DNA sequences, either by searching Protein or DNA databases,
- or by identifying local duplications within a sequence. Other
- programs provide information on the statistical significance of an
- alignment. Like BLAST, FASTA can be used to infer functional and
- evolutionary relationships between sequences as well as help identify
- members of gene families.
- .
- The use of the package's many binaries and the equally representated
- conceptual approaches towards sequence analyses are summarised in
- this PDF.
- 
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 08c2bf4..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,51 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: FASTA
-Source: http://faculty.virginia.edu/wrpearson/fasta/fasta3/
-
-Files: *
-Copyright: 1996, 1997, 1998, 1999, 2014 by William R. Pearson and
-   The Rector & Visitors of the University of Virginia
-License: Apache-2.0
-
-Files: src/wm_align.c
-Copyright: William R. Pearson, University of Virginia
-           Webb Miller, Penn State University 
-License: Apache-2.0
-
-Files: src/karlin.c
-Copyright: 1990, 1993 Stephen Altschul, NCBI
-License: Apache-2.0
-
-Files: src/smith_waterman_altivec.c
-Copyright: 2004, Erik Lindahl <lindahl at sbc.su.se>
-    Stockholm Bioinformatics Center, 2004
-License: Apache-2.0
-
-Files: src/glo[bc]al_sse2.[ch]
-       src/smith_waterman_sse2.c
-Copyright: 2006,2010 Michael Farrar <farrar.michael at gmail.com>
-License: Academics-only
- This program may not be sold or incorporated into a commercial product,
- in whole or in part, without written consent of Michael Farrar.  For
- further information regarding permission for use or reproduction, please
- contact: Michael Farrar at farrar.michael at gmail.com.
-
-Files: debian/*
-Copyright: 2015 Steffen Moeller <moeller at debian.org>
-License: Apache-2.0
-
-License: Apache-2.0
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- .
- http://www.apache.org/licenses/LICENSE-2.0
- .
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- .
- On Debian systems, the complete text of the Apache version 2.0 license
- can be found in "/usr/share/common-licenses/Apache-2.0".
diff --git a/debian/docs b/debian/docs
deleted file mode 100644
index ab8aa33..0000000
--- a/debian/docs
+++ /dev/null
@@ -1,5 +0,0 @@
-README
-doc/readme*
-doc/README*
-doc/changes*
-doc/fasta_guide.pdf
diff --git a/debian/fasta3-doc.doc-base b/debian/fasta3-doc.doc-base
deleted file mode 100644
index 80cb0da..0000000
--- a/debian/fasta3-doc.doc-base
+++ /dev/null
@@ -1,15 +0,0 @@
-Document: fasta3
-Title: The FASTA program package
-Author: William R. Pearson
-Abstract: This documentation describes the version 36 of the FASTA program
- package (see W. R. Pearson and D. J. Lipman (1988), "Improved Tools for
- Biological Sequence Analysis", PNAS 85:2444-2448.  W. R. Pearson (1996)
- "Effective protein sequence comparison" Meth. Enzymol. 266:227-258;
- and Pearson et. al. (1997) Genomics 46:24-36 [18]. Version 3 of the
- FASTA packages contains many programs for searching DNA and protein
- databases and for evaluating statistical significance from randomly
- shuffled sequences.
-Section: Science/Biology
-
-Format: PDF
-Files: /usr/share/doc/fasta3/fasta_guide.pdf
diff --git a/debian/fasta3.install b/debian/fasta3.install
deleted file mode 100644
index eb6cf92..0000000
--- a/debian/fasta3.install
+++ /dev/null
@@ -1 +0,0 @@
-bin/[a-z]* usr/bin
diff --git a/debian/fasta3.manpages b/debian/fasta3.manpages
deleted file mode 100644
index d71b3a8..0000000
--- a/debian/fasta3.manpages
+++ /dev/null
@@ -1,6 +0,0 @@
-doc/fasta36.1
-doc/fastf3.1
-doc/fasts3.1
-doc/map_db.1
-doc/prss3.1
-doc/ps_lav.1
diff --git a/debian/patches/Makefile.patch b/debian/patches/Makefile.patch
deleted file mode 100644
index ac20e6b..0000000
--- a/debian/patches/Makefile.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-Index: fasta3-36.3.7a/make/Makefile
-===================================================================
---- fasta3-36.3.7a.orig/make/Makefile
-+++ fasta3-36.3.7a/make/Makefile
-@@ -34,6 +34,7 @@
- THR_LIBS = -lpthread
- THR_CC =
- 
-+BIN = ../bin
- XDIR = /seqprg/bin
- 
- DROPGSW_NA_O = dropgsw2.o wm_align.o calcons_sw.o
-Index: fasta3-36.3.7a/make/Makefile.linux64_sse2
-===================================================================
---- fasta3-36.3.7a.orig/make/Makefile.linux64_sse2
-+++ fasta3-36.3.7a/make/Makefile.linux64_sse2
-@@ -12,7 +12,7 @@
- 
- SHELL=/bin/bash
- 
--CC = gcc -g -O -msse2
-+CC = gcc -g -O -msse2 $(CPPFLAGS)
- #CC= gcc -pg -g -O -msse2 -ffast-math
- #CC = gcc -g -DDEBUG -msse2
- 
-@@ -25,7 +25,7 @@
- 
- # standard options
- 
--CFLAGS= -DSHOW_HELP -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=8 -DTHR_EXIT=pthread_exit -DPROGRESS -DM10_CONS -DFASTA_HOST='"your_fasta_host_here"' -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC -DUSE_MMAP  -D_LARGEFILE64_SOURCE  -DBIG_LIB64
-+CFLAGS+= -DSHOW_HELP -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=8 -DTHR_EXIT=pthread_exit -DPROGRESS -DM10_CONS -DFASTA_HOST='"your_fasta_host_here"' -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC -DUSE_MMAP  -D_LARGEFILE64_SOURCE  -DBIG_LIB64
- # -I/usr/include/mysql -DMYSQL_DB
- # -DSUPERFAMNUM -DSFCHAR="'|'" 
- 
-Index: fasta3-36.3.7a/make/Makefile36m.common
-===================================================================
---- fasta3-36.3.7a.orig/make/Makefile36m.common
-+++ fasta3-36.3.7a/make/Makefile36m.common
-@@ -34,7 +34,7 @@
- # and "-L/usr/lib64/mysql -lmysqlclient -lz" in LIB_M
- # some systems may also require a LD_LIBRARY_PATH change
- 
--LIB_M= -lm -lz
-+LIB_M= $(LDFLAGS) -lm -lz
- #LIB_M= -L/usr/lib64/mysql -lmysqlclient -lz -lm
- NCBL_LIB=ncbl2_mlib.o
- #NCBL_LIB=ncbl2_mlib.o mysql_lib.o
diff --git a/debian/patches/series b/debian/patches/series
deleted file mode 100644
index 5b1c0a4..0000000
--- a/debian/patches/series
+++ /dev/null
@@ -1 +0,0 @@
-Makefile.patch
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index a60608a..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/make -f
-#DH_VERBOSE = 1
-
-DPKG_EXPORT_BUILDFLAGS = 1
-include /usr/share/dpkg/default.mk
-
-ARCH=`dpkg-architecture -qDEB_TARGET_GNU_CPU`
-
-#MAKEFILE="../make/Makefile"
-#
-#ifeq (x86_64,$(ARCH))
-MAKEFILE="../make/Makefile.linux64"
-#endif
-
-export DEB_BUILD_MAINT_OPTIONS = hardening=+all
-CFLAGS+=-flto
-LDFLAGS+=-flto
-
-# package maintainers to append CFLAGS
-#export DEB_CFLAGS_MAINT_APPEND  = -Wall -pedantic
-# package maintainers to append LDFLAGS
-#export DEB_LDFLAGS_MAINT_APPEND = -Wl,--as-needed
-
-%:
-	dh $@ 
-
-override_dh_auto_build:
-	cd src && $(MAKE) -f $(MAKEFILE)
-
-override_dh_auto_clean:
-	if [ -d src ]; then cd src && $(MAKE) -f $(MAKEFILE) clean-up; fi
-
-override_dh_compress:
-	dh_compress --exclude=.pdf
-
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/upstream/metadata b/debian/upstream/metadata
deleted file mode 100644
index be6be7b..0000000
--- a/debian/upstream/metadata
+++ /dev/null
@@ -1,20 +0,0 @@
-Reference:
- - Author: William R. Pearson and D. J. Lipman
-   Title: "Improved tools for biological sequence comparison"
-   Journal: Proc Natl Acad Sci U S A
-   Year: 1988
-   Volume: 85
-   Number: 8
-   Pages: 2444-8
-   PMID: 3162770
-   URL: http://www.ncbi.nlm.nih.gov/pmc/articles/PMC280013/
-   eprint: http://www.ncbi.nlm.nih.gov/pmc/articles/PMC280013/pdf/pnas00260-0036.pdf
- - Author: William R. Pearson
-   Title: "Effective protein sequence comparison"
-   Journal: Methods Enzymol.
-   Year: 1996
-   Volume: 266
-   Pages: 227-58
-   DOI: 10.1016/S0076-6879(96)66017-0
-   PMID: 8743688
-   URL: http://www.sciencedirect.com/science/article/pii/S0076687996660170
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 6fdeb49..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,6 +0,0 @@
-version=3
-http://faculty.virginia.edu/wrpearson/fasta/fasta3/fasta-(3.*)\.tar\.gz
-
-# This is also at Github but
-#    https://github.com/wrpearson/fasta36/releases
-# is lagging begind a bit
\ No newline at end of file
diff --git a/doc/INSTALL b/doc/INSTALL
new file mode 100644
index 0000000..285bbcd
--- /dev/null
+++ b/doc/INSTALL
@@ -0,0 +1,17 @@
+
+22-Jan-2014
+
+fasta36/doc/INSTALL
+
+To compile the FASTA programs, change to the fasta36/src directory:
+
+  cd ~/fasta36/src
+
+and type:
+
+  make -f ../make/Makefile.linux64_sse2 all
+
+The file fasta36/make/README gives more information about the
+different Makefiles available. The code is routinely compiled and
+tested under Linux (64-bit, sse2) and MacOS X (64-bit, sse2).
+
diff --git a/doc/README.versions b/doc/README.versions
new file mode 100644
index 0000000..e7a7ae1
--- /dev/null
+++ b/doc/README.versions
@@ -0,0 +1,48 @@
+
+ $Id: README.versions 120 2010-01-31 19:42:09Z wrp $
+ $Revision: 210 $
+
+January, 2010
+
+This directory contains the newest version of FASTA, version 36.
+FASTA36 is a major update to FASTA35 that provides the ability to
+display multiple significant alignments to a query sequence.  Previous
+versions of FASTA displayed only the best alignment between the query
+and library sequence; if the library sequence was long, with multiple
+similar regions, only the best was shown.  This contrasts with BLAST,
+which has always displayed multiple "HSPs" when they are present.
+
+FASTA36 provides some additional improvements; like BLAST, it now uses
+statistical estimates to set thresholds for band optimization, which
+can increase search speed as much as 2-fold, and it provides much more
+flexibility in specifying the files that are searched (indirect files
+of filenames can include additional indirection).  But the main
+improvement is the display of multiple HSPs.
+
+All of the traditional alignment programs: ssearch36, fasta36,
+[t]fast[xy]36 and glsearch36 display multiple HSPs.  The peptide and
+mixed peptide alignment programs ([t]fasts36, fastf36, fastm36) still
+show a single HSP.
+
+Currently, the PVM/MPI parallel versions of the programs still display
+a single HSP.
+
+As of late 2007, there is almost no reason to use the fasta2 programs;
+the major programs present in fasta2 that were not present in fasta3
+(version 34) -- align (global alignments) and lalign (non-overlapping
+local alignments) are now available in fasta version 36.
+
+For more information about the programs in the current FASTA v36
+package, see the "changes_v36.html" and "readme.v36" files.
+
+There are still a very few programs in the fasta2 package that are not
+available in the fasta3 package - programs for global alignments
+without end-gap penalties, the "grease" Kyte-Doolittle plot, and
+"garnier" and "chofas" for classic (but inaccurate) secondary
+structure prediction. You should not use the fasta2 programs for
+library searching; the fasta3 programs are more sensitive and have
+better statistics.
+
+Precompiled versions of the programs for Windows and MacOS are available in the
+executables directory.
+
diff --git a/doc/README_v36.3.8d.md b/doc/README_v36.3.8d.md
new file mode 100644
index 0000000..474cea8
--- /dev/null
+++ b/doc/README_v36.3.8d.md
@@ -0,0 +1,37 @@
+
+
+## The FASTA package - protein and DNA sequence similarity searching and alignment programs
+
+Changes in **fasta-36.3.8d** released 13-April-2016:
+
+1. Various bug fixes to `pssm_asn_subs.c` that avoid coredumps when
+   reading NCBI PSSM ASN.1 binary files.  `pssm_asn_subs.c` can now read
+   UUPACAA sequences.
+
+2. default gap penalties for VT40 (from -14/-2 to -13/-1), VT80 (from
+   -14/-2 to -11/-1), and VT120 (from -10/-1 to 11/-1) have changed
+   slightly.
+
+3. Introduction of `scripts/m9B_btop_msa.pl` and
+   `scripts/m8_btop_msa.pl`, which uses the BTOP (`-m 9B` or `-m 8CB`)
+   encoded alignment strings to produce a query driving multiple
+   sequence alignment (MSA) in ClustalW format.  This MSA can be used
+   as input to `psiblast` to produce an ASN.1 PSSM.
+
+4. The `scripts/annot_blast_btop2.pl` script replaces
+   `scripts/annot_blast_btop.pl` and allows annotation of both the query
+   and subject sequences.
+
+5. Various domain annotation scripts have been renamed for clarity.
+   For example, `ann_feats_up_sql.pl` uses an SQL implementation of
+   Uniprot features tables to annotate domains.  Likewise,
+   `ann_pfam_www.pl` gets domain information from the Pfam web site,
+   while `ann_pfam27.pl` gets the information from the downloaded
+   Pfam27 mySQL tables, and `ann_pfam28.pl` uses the Pfam28 mySQL
+   tables.
+
+6. percent identity in sub-alignment scores is calculated like a BLAST
+   percent identity -- gaps are not included in the denominator.
+
+For more detailed information, see `doc/readme.v36`.
+
diff --git a/doc/changes_v34.html b/doc/changes_v34.html
new file mode 100644
index 0000000..54a820a
--- /dev/null
+++ b/doc/changes_v34.html
@@ -0,0 +1,351 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+<title>ChangeLog - FASTA v34</title>
+<style type="text/css">
+body {  margin-left: 6px; }
+.sidebar { 
+font-size: 12px; font-family: sans-serif; text-decoration:none; background-color: #FFFFCC; }
+.fasta { font-family: sans-serif; }
+fasta h3 { font-size: 14px; color: #000000 }
+.fasta td {background-color: #FFFFCC }
+.fasta a { text-decoration: none; }
+.fasta li {  font-size: 12px; margin-left:-1em }
+</style>
+<head>
+<body>
+<div class=fasta>
+<h3>ChangeLog - FASTA v34</h3>
+<pre>
+ $Id: changes_v34.html 120 2010-01-31 19:42:09Z wrp $
+ $Revision: 210 $
+</pre>
+<h3>May 28, 2007</h3>
+
+Small modification for GCG ASCII (libtype=5) header line.
+
+<hr>
+<h3>October 6, 2006	CVS fa34t26b3</h3>
+
+New Windows programs available using Intel C++ compiler.  First
+threaded programs for Windows; first SSE2 acceleration of SSEARCH for
+Windows.
+
+<h3>July 18, 2006	CVS fa34t26b2</h3>
+
+More powerful environment variable substitutions for FASTLIBS files.
+
+The library file name parsing programs now provide the option for
+environment variable substitions.  For example, SLIB2=/slib2 as an
+environment variable (e.g. export SLIB2=/slib2 for ksh and bash), then
+<pre>
+fasta34 -q query.aa '${SLIB2}/swissprot.fa'  expands as expected.
+</pre>
+While this is not important for command lines, where the Unix shell
+would expand things anyway, it is very helpful for various
+configuration files, such as files of file names, where:
+<pre>
+<${SLIB2}/blast
+swissprot.fa
+</pre>
+now expands properly, and in FASTLIBS files the line:
+<pre>
+NCBI/Blast Swissprot$0S${SLIB2}/blast/swissprot.fa
+</pre>
+expands properly.  Currently, Environment variable expansion only
+takes place for library file names, and the <directory in a file of
+file names.
+
+<h3>July 2, 2006  fa34t26b0</h3>
+
+This release provides an extremely efficient SSE2 implementation of
+the Smith-Waterman algorithm for the SSE2 vector instructions written
+by Michael Farrar (farrar.michael at gmail.com).  The SSE code speeds up
+Smith-Waterman 8 - 10-fold in my tests, making it comparable to Eric
+Lindahl's Altivec code for the Apple/IBM G4/G5 architecture.
+
+<h3>May 24, 2006	fa34t25d8</h3>
+
+In addition, support for ASN.1 PSSM:2 files provided by the NCBI
+PSI-BLAST WWW site is included.  This code will not work with
+iteration 0 PSSM's (which have no PSSM information).  For ASN.1
+PSSM's, which provide the matrix name (and in some cases the gap
+penalties), the scoring matrix and gap penalties are set appropriately
+if they were not specified on the command line. ASN.1 PSSM's are type 2:
+<pre>
+ssearch34 -P "pssm.asn1 2" .....
+</pre>
+
+<h3>May 18, 2006</h3>
+
+Support for NCBI Blast formatdb databases has been expanded.  The
+FASTA programs can now read some NCBI *.pal and *.nal files, which are
+used to specify subsets of databases.  Specifically, the
+swissprot.00.pal and pdbaa.00.pal files are supported.  FASTA supports
+files that refer to *.msk files (i.e. swissprot.00.pal refers to
+swissprot.00.msk); it does not currently support .pal files that
+simply list other .pal or database files (e.g. FASTA does not support
+nr.pal or swissprot.pal).
+
+<hr>
+<h3>Nov 20, 2005</h3>
+
+Changes to support asymmetric matrices - a scoring matrix read in from
+a file can be asymmetric.  Default matrices are all symmetric.
+
+<h3>Sept 2, 2005</h3>
+
+The prss34 program has been modified to use the same display routines
+as the other search programs.  To be more consistent with the other
+programs, the old "-w shuffle-window-size" is now "-v window-size".
+
+<tt><b>prss34/prfx34</b></tt> will also show the optimal alignment for which the
+significance is calculated by using the "-A" option. 
+
+Since the new program reports results exactly like other
+<tt><b>fasta/ssearch/fastxy34</b></tt> programs, parsing for statistical significance
+is considerably different.  The old format program can be make using
+"make prss34o".
+
+<h3>May 5, 2005 CVS fa34t25d1</h3>
+
+Modification to the -x option, so that both an "X:X" match score and
+an "X:not-X" mismatch score can be specified. (This score is also used
+to give a positive score to a "*:*" match - the end of a reading frame,
+while giving a negative score to "*:not-*".
+
+<h3>Jan 24, 2005</h3>
+
+Include a new program, "print_pssm", which reads a blastpgp binary
+checkpoint file and writes out the frequency values as text.  These
+values can be used with a new option with ssearch34(_t) and prss34,
+which provides the ability to read a text PSSM file.  To specify a
+text PSSM, use the option -P "query.ckpt 1" where the "1" indicates a
+text, rather than a binary checkpoint file.  "initfa.c" has also been
+modified to work with PSSM files with zero's in the in the frequency
+table.  Presumably these positions (at the ends) do not provide
+information. (Jan 26, 2005) blastpgp actually uses BLOSUM62 values
+when zero frequencies are provided, so read_pssm() has been modified
+to use scoring matrix values for zero frequencies as well.
+
+<hr>
+<h3>Nov 4-8, 2004</h3>
+
+Incorporation of Erik Lindahl "anti-diagonal" Altivec code for
+Smith-Waterman, only.  Altivec SSEARCH is now faster than FASTA for
+
+<h3>Aug 25,26, 2004  CVS fa34t24b3</h3>
+
+Small change in output format for <tt>p34comp*</tt> programs in
+">>>query_file#1 string" line before alignments.  This line is not present
+in the non-parallel versions - it would be better for them to be consistent.
+
+<hr>
+<h3>Dec 10, 2003  CVS fa34t23b3</h3>
+
+Cause default ktup to drop for short query sequences.  For protein queries < 50, <i>ktup=1</i>;
+for DNA queries < 20, 50, 100 <i>ktup</i> = 1, 2, 3, respectively.
+
+<h3>Dec 7, 2003</h3>
+
+A new option, "-U" is available for RNA sequence comparison.  "-U"
+functions like "-n", indicating that the query is an RNA sequence.  In
+addition, to account for "G:U" base pairs, "-U" modifies the scoring
+matrices so that a "G:A" match has the same score as a "G:G" match,
+and "T:C" match has the same score as a "T:T" match.
+
+<h3>Nov 2, 2003</h3>
+
+Support for more sophisticated display options.  Previously, one could
+have only on "-m #" option, even though several of the options were
+orthogonal (-m 9c is independent of -m 1 and -m2, which is independent
+of -m 6 (HTML)).  In particular -m 9c can be combined with -m 6, which
+can be very helpful for runs that need HTML output but can also
+exploit the encoding provided by -m 9c.
+
+The "-m 9" option now also allows "-m 9i", which shows the standard
+best score information, plus percent identity and alignment length.
+
+<h3>Sept 25, 2003</h3>
+
+A new option is available for annotating alignments.  -V '@#?!'
+can be used to annotate sites in a sequence, e.g:
+<pre>
+>GTM1_HUMAN ...
+PMILGYWDIRGLAHAIRLLLEYTDS<b>@</b>S<b>?</b>YEEKKYT at MG
+DAPDYDRS at QWLNEKFKLGLDFPNLPYLIDGAHKIT
+</pre>
+might mark known and expected (S,T) phosphorylation sites.  These
+symbols are then displayed on the query coordinate line:
+<pre>
+               10        20    <b>@?</b>  30  @     40  @     50        60
+GTM1_H PMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLP
+       ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+gtm1_h PMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLP
+               10        20        30        40        50        60
+</pre>
+This annotation is mostly designed to display post-translational
+modifications detected by MassSpec with FASTS, but is also available
+with FASTA and SSEARCH.
+
+<h3>June 16, 2003	version: fasta34t22</h3>
+
+ssearch34 now supports PSI-BLAST PSSM/profiles.  Currently, it only
+supports the "checkpoint" file produced by blastall, and only on
+certain architectures where byte-reordering is unnecessary.  It has not
+been tested extensively with the -S option.
+<pre>
+ssearch34 -P blast.ckpt -f -11 -g -1 -s BL62 query.aa library
+</pre>
+Will use the frequency information in the blast.chkpt file to do a
+position specific scoring matrix (PSSM) search using the
+Smith-Waterman algorithm.  Because ssearch34 calculates scores for
+each of the sequences in the database, we anticipate that PSSM
+ssearch34 statistics will be more reliable than PSI-Blast statistics.
+
+The Blast checkpoint file is mostly double precision frequency
+numbers, which are represented in a machine specific way.  Thus, you
+must generate the checkpoint file on the same machine that you run
+ssearch34 or prss34 -P query.ckpt.  To generate a checkpoint file,
+run:
+<pre>
+blastpgp -j 2 -h 1e-6 -i query.fa -d swissprot -C query.ckpt -o /dev/null
+</pre>
+(This searches swissprot for 2 iterations ("-j 2" using a E()
+threshold 1e-6 saving the resulting position specific frequencies in
+query.ckpt.  Note that the original query.fa and query.ckpt must
+match.)
+
+<h3>Apr 11, 2003  CVS fa34t21b3</h3>
+
+Fixes for "-E" and "-F" with ssearch34, which was inadvertantly disabled.
+<p>
+A new option, "-t t", is available to specify that all the protein
+sequences have implicit termination codons "*" at the end.  Thus, all
+protein sequences are one residue longer, and full length matches are
+extended one extra residue and get a higher score.  For
+fastx34/tfastx34, this helps extend alignments to the very end in
+cases where there may be a mismatch at the C-terminal residues.</p>
+<p>
+<tt>-m 9c</tt> has also been modified to indicate locations of termination
+codons ( *1).</p>
+
+<h3>Mar 17, 2003  CVS fa34t21b2</h3>
+
+A new option on scoring matrices "-MS" (e.g. "BL50-MS") can be used to
+turn the I/L, K/Q identities on or off.  Thus, to make "fastm34" use
+the isobaric identities, use "-s M20-MS".  To turn them off for "fasts34",
+use "-s M20".
+
+<h3>Jan 25, 2003</h3>
+
+Add option "-J start:stop" to pv34comp*/mp34comp*.  "-J x" used to
+allow one to start at query sequence "x"; now both start and stop can
+be specified.
+
+<hr>
+<h3>Nov 14-22, 2002  CVS fa34t20b6</h3>
+
+Include compile-time define (-DPGM_DOC) that causes all the fasta
+programs to provide the same command line echo that is provided by the
+PVM and MPI parallel programs.  Thus, if you run the program:
+<pre>
+fasta34_t -q -S gtt1_drome.aa /slib/swissprot 12
+</pre>
+the first lines of output from FASTA will be:
+<pre>
+# fasta34_t -q gtt1_drome.aa /slib/swissprot
+ FASTA searches a protein or DNA sequence data bank
+  version 3.4t20 Nov 10, 2002
+ Please cite:
+   W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448
+</pre>
+This has been turned on by default in most FASTA Makefiles.  
+
+<h3>Aug 27, 2002</h3>
+
+Modifications to mshowbest.c and drop*.c (and p2_workcomp.c,
+compacc.c, doinit.c, etc.) to provide more information about the
+alignment with the -m 9 option.  There is now a "-m 9c" option, which
+displays an encoded alignment after the -m 9 alignment information.
+The encoding is a string of the form: "=#mat+#ins=#mat-#del=#mat".
+Thus, an alignment over 218 amino acids with no gaps (not necessarily
+100% identical) would be =218.  The alignment:
+<pre>
+       10        20        30        40        50          60         70  
+GT8.7  NVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKL--GLDFPNLPYL-IDGSHKITQ
+       :.::  . :: ::  .   .:::         : .:    ::.:   .: : ..:.. :::  :..:
+XURTG  NARGRMECIRWLLAAAGVEFDEK---------FIQSPEDLEKLKKDGNLMFDQVPMVEIDG-MKLAQ
+               20        30                 40        50        60        
+</pre>
+would be encoded: "=23+9=13-2=10-1=3+1=5".  The alignment encoding is
+with respect to the beginning of the alignment, not the beginning of
+either sequence.  The beginning of the alignment in either sequence is
+given by the an0/an1 values. This capability is particularly useful
+for [t]fast[xy], where it can be used to indicate frameshift positions
+"/#\#" compactly.  If "-m 9c" is used, the "The best scores" title
+line includes "aln_code".
+
+<h3>Aug 14, 2002	CVS tag fa34t20</h3>
+
+Changes to nmgetlib.c to allow multiple query searches coming from
+STDIN, either through pipes or input redirection.  Thus, the command
+<pre>
+cat prot_test.lseg | fasta34 -q -S @ /seqlib/swissprot
+</pre>
+produces 11 searches.  If you use the multiple query functions, the
+query subset applies only to the first sequence.
+
+Unfortunately, it is not possible to search against a STDIN library,
+because the FASTA programs do not keep the entire library in memory
+and need to be able to re-read high-scoring library sequences.  Since
+it is not possible to fseek() against STDIN, searching against a STDIN
+library is not possible.
+
+<h3>Aug 5, 2002</h3>
+
+<tt><b>fasts34(_t)</b></tt> and <tt><b>fastm34(_t)</b></tt> have been modified to allow searches with
+DNA sequences.  This gives a new capability to search for DNA motifs,
+or to search for ordered or unordered DNA sequences spaced at
+arbitrary distances.
+
+<h3>June 25, 2002</h3>
+
+Modify the statistical estimation strategy to sample all the sequences
+in the database, not just the first 60,000.  The histogram is still
+based only on the first 60,000 scores and lengths, though all scores
+an lengths are shown.  The fit to the data may be better than the
+histogram indicates, but it should not be worse.
+
+<h3>June 19, 2002</h3>
+
+Added "-C #" option, where 6 <= # <= MAX_UID (20), to specify the
+length of the sequence name display on the alignment labels.  Until
+now, only 6 characters were ever displayed.  Now, up to MAX_UID
+characters are available.
+
+<h3>Mar 16, 2002</h3>
+
+Added create_seq_demo.sql, nt_to_sql.pl to show how to build an SQL
+protein sequence database that can be used with with the mySQL
+versions of the fasta34 programs.  Once the mySQL seq_demo database
+has been installed, it can be searched using the command:
+<pre>
+fasta34 -q mgstm1.aa "seq_demo.sql 16"
+</pre>
+mysql_lib.c has been modified to remove the restriction that mySQL
+protein sequence unique identifiers be integers.  This allows the
+program to be used with the PIRPSD database.  The RANLIB() function
+call has been changed to include "libstr", to support SQL text keys.
+Due to the size of libstr[], unique ID's must be < MAX_UID (20)
+characters.
+<p>
+A "pirpsd.sql" file is available for searching the mySQL distribution
+of the PIRPSD database.  PIRPSD is available from
+ftp://nbrfa.georgetown.edu/pir_databases/psd/mysql.
+
+</div>
+</body>
+</html>
diff --git a/doc/changes_v35.html b/doc/changes_v35.html
new file mode 100644
index 0000000..5feaa93
--- /dev/null
+++ b/doc/changes_v35.html
@@ -0,0 +1,212 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+<title>ChangeLog - FASTA v35</title>
+<style type="text/css">
+body {  margin-left: 6px; }
+.sidebar { 
+font-size: 12px; font-family: sans-serif; text-decoration:none; background-color: #FFFFCC; }
+.fasta { font-family: sans-serif; }
+.fasta h2 { font-size: 16px; color: #000000 }
+.fasta h3 { font-size: 14px; color: #000000 }
+.fasta td {background-color: #FFFFCC }
+.fasta a { text-decoration: none; }
+.fasta li { margin-left:-1em }
+</style>
+<head>
+<body>
+<div class=fasta>
+<h2>ChangeLog - FASTA v35</h2>
+<pre><small>
+ $Id: changes_v35.html 120 2010-01-31 19:42:09Z wrp $
+ $Revision: 210 $
+</small>
+</pre>
+<hr>
+<h2>Summary - Major Changes in FASTA version 35 (August, 2007)</h2>
+<ol>
+<li>Accurate shuffle based statistics for searches of small libraries (or pairwise comparisons).
+<p/>
+<li>
+Inclusion of <b>lalign35</b> (SIM) into FASTA3.  Accurate statistics for
+<b>lalign35</b> alignments.  <b>plalign</b> has been replaced by
+<b>lalign35</b> and <b>lav2ps</b>.
+</li>
+<p/>
+<li>
+Two new global alignment programs: <b>ggsearch35</b> and <b>glsearch35</b>.
+</li>
+</ol>
+<hr>
+<h3>February 7, 2008</h3>Allow annotations in library, as well as
+query sequences.  Currently, annotations are only available within
+sequences (i.e., they are not read from the feature table), but they
+should be available in FASTA format, or any of the other ascii text
+formats (EMBL/Swissprot, Genbank, PIR/GCG).  If annotations are
+present in a library and the annotation characters includes '*', then
+the -V '*' option MUST be used.  However, special characters other
+than '*' are ignored, so annotations of '@', '%', or '@' should be
+transparent.
+<p>
+In translated sequence comparisons, annotations are only available for
+the protein sequence.
+<p>
+<h3>January 25, 2007</h3> Support protein queries and sequence
+libraries that contain 'O' (pyrrolysine) and 'U' (selenocysteine).
+('J' was supported already). Currently, 'O' is mapped automatically to
+'K' and 'U' to 'C'.
+<p />
+<h3>Dec. 13, 2007 CVS fa35_03_02m</h3>
+<p>
+Add ability to search a subset of a library using a file name and a
+list of accession/gi numbers. This version introduces a new filetype,
+10, which consists of a first line with a target filename, format, and
+accession number format-type, and optionally the accession number
+format in the database, followed by a list of accession numbers.  For
+example:
+<pre>
+	  </slib2/blast/swissprot.lseg 0:2 4|
+	  3121763
+	  51701705
+	  7404340
+	  74735515
+	  ...
+</pre>
+Tells the program that the target database is swissprot.lseg, which is
+in FASTA (library type 0) format.
+<p>
+The accession format comes after the ":".  Currently, there are four
+accession formats, two that require ordered accessions (:1, :2), and
+two that hash the accessions (:3, :4) so they do not need to be
+ordered.  The number and character after the accession format
+(e.g. "4|") indicate the offset of the beginning of the accession and
+the character that terminates the accession.  Thus, in the typical
+NCBI Fasta definition line:
+<pre>
+ >gi|1170095|sp|P46419|GSTM1_DERPT Glutathione S-transferase (GST class-mu)
+</pre>
+The offset is 4 and the termination character is '|'.  For databases
+distributed in FASTA format from the European Bioinformatics
+Institute, the offset depends on the name of the database, e.g.
+<pre>
+ >SW:104K_THEAN Q4U9M9 104 kDa microneme/rhoptry antigen precursor (p104).
+</pre>
+and the delimiter is ' ' (space, the default).
+<p>
+Accession formats 1 and 3 expect strings; accession formats 2 and 4
+work with integers (e.g. gi numbers).
+<p />
+<h3>December 10, 2007</h3>
+Provide encoded annotation information with
+-m 9c alignment summaries.  The encoded alignment information makes it
+much simpler to highlight changes in critical residues.
+<p />
+<h3>August 22, 2007</h3> <a name="lav2svg" /> A new program is
+available, <tt>lav2svg</tt>, which creates SVG (Scalable Vector
+Graphics) output.  In addition, <a href="#ps_lav"><tt>ps_lav</tt></a>,
+which was introduced May 30, 2007, has been replaced
+by <tt>lav2ps</tt>.  SVG files are more easily edited with Adobe
+Illustrator than postscript (<tt>lav2ps</tt>) files.
+<p>
+<h3>July 25, 2007  CVS fa35_02_02</h3>
+Change default gap penalties for OPTIMA5 matrix to -20/-2 from -24/-4.
+<p>
+<h3>July 23, 2007</h3>
+Add code to support to support sub-sequence ranges for "library"
+sequences - necessary for fully functional prss (ssearch35) and
+lalign35.  For all programs, it is now possible to specify a subset of
+both the query and the library, e.g.
+<pre>
+lalign35 -q mchu.aa:1-74 mchu.aa:75-148
+</pre>
+Note, however, that the subset range applied to the library will be
+applied to every sequence in the library - not just the first - and
+that the same subset range is applied to each sequence.  This probably
+makes sense only if the library contains a single sequence (this is
+also true for the query sequence file).
+<p>
+<h3>July 3, 2007  CVS fa35_02_01</h3>
+
+Merge of previous <tt><b>fasta34</tt></b> with development version <tt><b>fasta35</tt></b>.
+
+<h3>June 26, 2007</h3>
+
+Add amino-acid 'J' for 'I' or 'L'.
+<p>
+Add Mueller and Vingron (2000) J. Comp. Biol. 7:761-776 VT160 matrix,
+"-s VT160", and OPTIMA_5 (Kann et al. (2000) Proteins 41:498-503).
+<h3>June 7, 2007</h3>
+
+<tt><b>ggssearch35(_t)</b></tt>, <tt><b>glsearch35(_t)</b></tt> can now use PSSMs.
+
+<h3>May 30, 2007  CVS fa35_01_04</h3>
+
+<a name="ps_lav" /> Addition of <tt><b>ps_lav</b></tt>
+(now <a href="#lav2svf">lav2ps</a> or <a href="#lav2svg">lav2svg</a>) -- which can be used to plot the lav
+output of
+<tt><b>lalign35 -m 11</b></tt>.
+<pre>lalign35 -m 11 | lav2ps</pre> replaces <tt><b>plalign</b></tt>
+(from <tt><b>FASTA2</b></tt>).
+
+<h3>May 2, 2007</h3>
+
+The labels on the alignment scores are much more informative (and more
+diverse).   In the past, alignment scores looked like:
+<pre>
+>>gi|121716|sp|P10649|GSTM1_MOUSE Glutathione S-transfer  (218 aa)
+ s-w opt: 1497  Z-score: 1857.5  bits: 350.8 E(): 8.3e-97
+Smith-Waterman score: 1497; 100.0% identity (100.0% similar) in 218 aa overlap (1-218:1-218)
+^^^^^^^^^^^^^^
+</pre>
+where the highlighted text was either: "Smith-Waterman" or "banded
+Smith-Waterman". In fact, scores were calculated in other ways,
+including global/local for <tt><b>fasts</b></tt> and <tt><b>fastf</b></tt>.  With the addition of
+<tt><b>ggsearch35,</b></tt> <tt><b>glsearch35,</b></tt> and <tt><b>lalign35,</b></tt> there are many more ways to
+calculate alignments: "Smith-Waterman" (ssearch and protein fasta),
+"banded Smith-Waterman" (DNA fasta), "Waterman-Eggert",
+"trans. Smith-Waterman", "global/local", "trans. global/local",
+"global/global (N-W)".  The last option is a global global alignment,
+but with the affine gap penalties used in the Smith-Waterman
+algorithm.
+
+<h3>April 19, 2007	CVS fa34t27br_lal_3</h3>
+
+Two new programs, <tt><b>ggsearch35(_t)</b></tt> and <tt><b>glsearch35(_t)</b></tt> are now available.
+<tt><b>ggsearch35(_t)</b></tt> calculates an alignment score that is global in the
+query and global in the library; <tt><b>glsearch35(_t)</b></tt> calculates an alignment
+that is global in the query and local, while local in the library
+sequence.  The latter program is designed for global alignments to domains.
+
+Both programs assume that scores are normally distributed.  This
+appears to be an excellent approximation for ggsearch35 scores, but
+the distribution is somewhat skewed for global/local (glsearch)
+scores.  <tt><b>ggsearch35(_t)</b></tt> only compares the query to library sequences
+that are beween 80% and 125% of the length of the query; glsearch
+limits comparisons to library sequences that are longer than 80% of
+the query.  Initial results suggest that there is relatively little
+length dependence of scores over this range (scores go down
+dramatically outside these ranges).
+
+<h3>March 29, 2007     CVS fa34t27br_lal_1</h3>
+
+At last, the <tt><b>lalign</b></tt> (SIM) algorithm has been moved from <b>FASTA21</b> to
+<b>FASTA35</b>.  A <tt><b><a href="#ps_lav">plalign</a></b></tt>
+equivalent is also available using <tt>lalign -m 11 | lav2ps</tt>
+or <tt>| lav2svg</tt>.
+
+The statistical estimates for <tt>lalign35</tt> should be much more accurate
+than those from the earlier lalign, because lambda and K are estimated
+from shuffles.
+
+In addition, all programs can now generate accurate statistical
+estimates with shuffles if the library has fewer than 500 sequences.
+If the library contains more than 500 sequences and the sequences are
+related, then the -z 11 option should be used.
+p<hr>
+<a href="changes_v34.html">FASTA v34 Change Log</a>
+<p> </p>
+</div>
+</body>
+</html>
diff --git a/doc/changes_v36.html b/doc/changes_v36.html
new file mode 100644
index 0000000..e45529a
--- /dev/null
+++ b/doc/changes_v36.html
@@ -0,0 +1,467 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+<title>ChangeLog - FASTA v36</title>
+<style type="text/css">
+body {  margin-left: 6px; }
+.sidebar { 
+font-size: 12px; font-family: sans-serif; text-decoration:none; background-color: #FFFFCC; }
+.fasta { font-family: sans-serif; }
+.fasta h2 { font-size: 16px; color: #000000 }
+.fasta h3 { font-size: 14px; color: #000000 }
+.fasta td {background-color: #FFFFCC }
+.fasta a { text-decoration: none; }
+.fasta li { margin-left:-1em }
+</style>
+<head>
+<body>
+<div class=fasta>
+<h2>ChangeLog - FASTA v36</h2>
+<pre><small>
+ $Id: changes_v36.html $
+</small>
+</pre>
+<hr>
+<h2>Latest Updates - FASTA version 36.3.8d (April, 2016)</h2>
+<ol>
+<li>
+The <tt>fasta-36.3.8d/scripts/</tt> directory now provides a
+script, <tt>annot_blast_btop2.pl</tt> that allows annotations and
+sub-alignment scoring on BLAST alignments that use the tabular format
+with BTOP alignment encoding.
+<p>
+  <li>
+    Bug fixes for overlapping domain domain scoring.  v36.3.7 was not thread-safe.
+  <li>
+    Annotation scripts accessing the Pfam domain database can now use
+    the <tt>--vdoms</tt> option to highlight missing parts of a Pfam
+    domain model. In addtion, domains from clans are labeled as clans
+    unless <tt>--no-clans</tt> is specified.
+ </ol>
+<h2>Updates - FASTA version 36.3.7 (November, 2014)</h2>
+<ol>
+  <li>The FASTA programs have been released under the Apache2.0 Open
+  Source License.  The COPYRIGHT file, and copyright notices in
+  program files, have been updated to reflect this change.
+    <p>
+<li>Alignment sub-scoring scripts have been extended to allow
+overlapping domains.  This requires a modified annotation file format.
+  The "classic" format placed the beginning and end of a domain on different lines:
+<pre>
+    1   [    -     GST_N
+   88   ]    -
+   90   [    -     GST_C
+  208   ]    - 
+</pre>
+Since the closing "]" was associated with the previous "[", domains could not overlap.
+<p>
+The new format is:
+<pre>
+    1   -    88     GST_N
+   90   -   208    GST_C
+</pre>
+which allows annotations of the form:
+<pre>
+    1   -    88    GST_N
+   75   -   123    GST-middle
+   90   -   208    GST_C
+</pre>
+<p>
+  <li> New annotation scripts are available in
+  the <tt>fasta-36.3.7/scripts</tt> directory,
+  e.g. <tt>ann_pfam_www_e.pl</tt> (Pfam) and <tt>ann_up_www2_e.pl</tt>
+  (Uniprot) to support this new format.  If the domain annotations
+  provided by Pfam or Uniprot overlap, then overlapping domains are
+  provided.  The <tt>_e.pl</tt> new scripts can be directed to provide
+  non-overlapping domains, using the boundary averaging strategy in
+  the older scripts, by specifying the <tt>--no-over</tt> option.
+</ol>
+      
+<h2>Updates - FASTA version 36.3.6f (August, 2014)</h2>
+<p>
+FASTA version 36.3.6f extends previous versions in several ways:
+<ol>
+<li>
+There is a new command line option, <tt>-XI</tt>, that causes the
+alignment programs to report 100% identity only when there are no
+mismatches.  In previous versions, one mismatch in 10,000 would round
+up to 100.0% identity; with <tt>-XI</tt>, the identity will be
+reported as 99.9%.
+<li>
+The option to provide alignment encodings (-m 9c, or -m 9C forCIGAR
+strings) has been extended to provide mis-match information in the
+alignment encoding using the -m 9d (classic FASTA alignment encoding)
+or -m 9D (CIGAR string).  For protein alignments, which are often <
+40% identity, enabling mismatch encoding produces very long CIGAR
+strings.
+<li>
+Provide more scripts for annotating proteins using either UniProt or
+Pfam web resources.
+</ol>
+<p>
+Additional bug fixes are documented in <tt>fasta-36.3.6f/doc/readme.v36</tt>
+<p>
+<h2>Updates - FASTA version 36.3.6 (July, 2013)</h2>
+<p>
+FASTA version 36.3.6 provides two new features:
+<ol>
+<li>
+A new script-based strategy for including annotation information.
+<li>
+Domain annotation information can be used to produce partition the
+alignment, and partition the scores of the alignment (sub-alignment
+scores).  Sub-alignment scores can be used to identify regions of
+alignment over-extension, where a homologous domain aligns, but the
+alignment extends beyond the homologous region into an adjacent
+non-homologous domain.
+</ol>
+Several scripts are provided (e.g. scripts/ann_feats_up_www.pl) that
+can be used to add Uniprot feature and domain annotations to searches
+of SwissProt and Uniprot.
+
+<p><i>(fasta-36.3.5 January 2013)</i>
+The NCBI's transition from BLAST to BLAST+ several years ago broke the
+ability of <tt>ssearch36</tt> to use PSSMs, because <tt>psiblast</tt>
+did not produce the binary ASN.1 PSSMs that <tt>ssearch36</tt> could
+parse. With the January 2013 <tt>fasta-36.3.5f</tt>,
+release <tt>ssearch36</tt> can read binary ASN.1 PSSM files produced
+by the NCBI <tt>datatool</tt> utility.
+See <a href='fasta_guide.pdf'>fasta_guide.pdf</a> for more information
+(look for the <tt>-P</tt> option).
+<hr>
+<h2>Summary - Major Changes in FASTA version 36.3.5 (May, 2011)</h2>
+<ol>
+<li>
+  By default, the FASTA36 programs are no longer interactive.  Typing
+<tt>fasta36</tt> presents a short help message, and
+<tt>fasta36 -help</tt> presents a complete list of options.  To see the interactive prompts, use
+<tt>fasta36 -I</tt>.
+<p>Likewise, the score histogram is no longer shown by default; use
+  the <tt>-H</tt> option to show the histogram (or compile with
+  -DSHOW_HIST for previous behavior).
+<p>
+The <tt>_t</tt> (<tt>fasta36_t</tt>) versions of the programs are
+built automatically on Linux/MacOSX machines and
+named <tt>fasta36</tt>, etc. (the programs are threaded by default,
+and only one program version is built).
+<p>
+Documentation has been significantly revised and updated.
+See <tt>doc/fasta_guide.pdf</tt> for a description of the programs and options.
+<p>
+<li>
+  Display of all significant alignments between query and library
+  sequence.  BLAST has always displayed multiple high-scoring
+  alignments (HSPs) between the query and library sequence; previous
+  versions of the FASTA programs displayed only the best alignment,
+  even when other high-scoring alignments were present.  This is the
+  major change in FASTA36. For most programs
+  (<tt>fasta36</tt>, <tt>ssearch36</tt>,
+  <tt>[t]fast[xy]36</tt>), if the library sequence contains additional
+  significant alignments, they will be displayed with the alignment
+  output, and as part of <tt>-m 9</tt> output (the initial list of high
+  scores).
+<p>
+  By default, the statistical threshold for alternate alignments
+  (HSPs) is the E()-threshold / 10.0.  For proteins, the default
+  expect threshold is E()< 10.0, the secondary threshold for showing
+  alternate alignments is thus E() < 1.0.  Fror translated
+  comparisons, the E()-thresholds are 5.0/0.5; for DNA:DNA 2.0/0.2.
+<p>
+  Both the primary and secondary E()-thresholds are set with the
+  -E "prim sec" command line option.  If the secondary
+  value is betwee zero and 1.0, it is taken as the actual
+  threshold. If it is > 1.0, it is taken as a divisor for the primary
+  threshold. If it is negative, alternative alignments are disabled
+  and only the best alignment is shown.
+<p>
+<li>
+  New statistical options, <tt>-z 21, 22, 26</tt>, provide a second E()-value
+  estimate based on shuffles of the highest scoring sequences.
+<p>
+<li>
+New output options.  <tt>-m 8</tt> provides the same output format as
+tabular BLAST; <tt>-m 8C</tt> mimics tabular blast with comment
+lines.  <tt>-m 9C</tt> provides CIGAR encoded alignments.
+<p>
+(fasta-36.3.4) Alignment option <tt>-m B</tt> provides BLAST-like alignments (no context, coordinates at the beginning and end of the alignment line, <tt>Query/Sbjct</tt>.
+<p>
+<li>
+ Improved performance using statistics based thresholds for
+ gap-joining and band-optimization in the heuristic FASTA local
+ alignment programs (<tt>fasta36</tt>, <tt>[t]fast[xy]36</tt>). By
+ default (fasta36.3) <tt>fasta36</tt>, <tt>[t]fast[xy]36</tt> can use
+ a similar strategy to BLAST to set the thresholds for combining
+ ungapped regions and performing band alignments.  This dramatically
+ reduces the number of band alignments performed, for a speed increase
+ of 2 - 3X.  The original statistical thresholds can be enabled with
+ the <tt>-c O</tt> (upper-case letter 'O') command line option.
+ Protein and translated protein alignment programs can also use ktup=3
+ for increased speed, though ktup=2 is still the default.
+<p>
+ Statistical thresholds can dramatically reduce the number of
+ "optimized" scores, from which statistical estimates are calculated.
+ To address this problem, the statistical estimation procedure has
+ been adjusted to correct for the fraction of scores that were
+ optimized. This process can dramatically improve statistical accuracy
+ for some matrices and gap pentalies, e.g. BLOSUM62 -11/-1.
+<p>
+ With the new joining thresholds, the
+<tt>-c "E-opt E-join"</tt> options have expanded meanings.  <tt>-c "E-opt E-join"</tt>
+ calculates a threshold designed (but not guaranteed) to do band
+ optimization and joining for that fraction of sequences.  Thus, <tt>-c
+ "0.02 0.1"</tt> seeks to do band optimization (E-opt) on 2% of alignments,
+ and joining on 10% of alignments.  <tt>-c "40 10"</tt> sets the gap
+ threshold as in earlier versions.
+<p>
+<li>
+A new option (<tt>-e expand_script.sh</tt>) is available that allows
+the set of sequences that are aligned to be larger than the set of
+sequences searched. When the <tt>-e expand_script.sh</tt> option is
+used, the <tt>expand_script.sh</tt> script is run with an input
+argument that is a file of accession numbers and E()-values; this
+information can be used to produce a fasta-formatted list of
+additional sequences, which will then be compared and aligned (if they
+are significant), and included in the list of high scoring sequences
+and the alignments.  The expanded set of sequences does not change the
+database size o statisical parameters, it simply expands the set of
+high-scoring sequences.
+<p>
+<li>
+The <tt>-m F</tt> option can be used to produce multiple output formats in different files from the same search.  For example, <tt>-m "F9c,10 m9c10.output" -m "FBB blastBB.output"</tt> produces two output files in addition to the normally formatted output sent to <tt>stdout</tt>. The <tt>m9c10.output</tt> file contains <tt>-m 9c</tt> score descriptions and <tt>-m 10</tt> alignments, while <tt>blastBB.output</tt> contains BLAST-like output (<tt>-m BB</tt>).
+<p>
+<li>
+ Scoring matrices can vary with query sequence length. In large-scale
+ searches with metagenomics reads, some reads may be too short to
+ produce statistically significant scores against comprehensive
+ databases (e.g. a DNA read of 90 nt is translated into 30 aa, which
+ would require a scoring matrix with at least 1.3 bits/position to
+ produce a 40 bit score).  fasta-36.3.* includes the option to specify
+ a "variable" scoring matrix by including '?' as the first letter of
+ the scoring matrix abbreviation, e.g. fasta36_t -q -s '?BP62' would
+ use BP62 for sequences long enough to produce significant alignment
+ scores, but would use scoring matrices with more information content
+ for shorter sequences.  The FASTA programs include BLOSUM50 (0.49
+ bits/pos) and BLOSUM62 (0.58 bits/pos) but can range to MD10 (3.44
+ bits/position). The variable scoring matrix option searches down the
+ list of scoring matrices to find one with information content high
+ enough to produce a 40 bit alignment score. (Several bugs in the
+ process are fixed in fasta-36.3.2.)
+<p>
+
+<li>
+Several less-used options
+(<tt>-1</tt>, <tt>-B</tt>, <tt>-o</tt>, <tt>-x</tt>, <tt>-y</tt>) have
+become <i>extended</i> options, available via the <tt>-X</tt> (upper case X) option.
+The old <tt>-X off1,off2</tt> option is now <tt>-o off1,off2</tt>.
+<p>
+By default, the program will read up to 2 GB (32-bit systems) or 12 GB
+(64-bit systems) of the database into memory for multi-query searches.
+The amount of memory available for databases can be set with
+the <tt>-XM4G</tt> option.
+<p>
+<li>
+ Much greater flexibility in specifying combinations of library files
+ and subsets of libraries.  It has always been possible to search a
+ list of libraries specified by an indirect (@) file; the FASTA36
+ programs can include indirect files of library names inside of
+ indirect files of library names.
+<p>
+<li>
+ <tt>fasta-36.3.2</tt> <b>ggsearch36</b> (global/global)
+ and <b>glsearch36</b> now incorporate SSE2 accelerated global
+ alignment, developed by Michael Farrar.  These programs are now about
+ 20-fold faster.
+<p>
+<li>
+<tt>fasta-36.2.1</tt> (and later versions) are fully threaded, both for
+searches, and for alignments.  The programs routinely run 12 - 15X
+faster on dual quad-core machines with "hyperthreading".
+</ol>
+<hr>
+<h2>Summary - Major Changes in FASTA version 35 (August, 2007)</h2>
+<ol>
+<li>Accurate shuffle based statistics for searches of small libraries (or pairwise comparisons).
+<p/>
+<li>
+Inclusion of <b>lalign35</b> (SIM) into FASTA3.  Accurate statistics for
+<b>lalign35</b> alignments.  <b>plalign</b> has been replaced by
+<b>lalign35</b> and <b>lav2ps</b>.
+</li>
+<p/>
+<li>
+Two new global alignment programs: <b>ggsearch35</b> and <b>glsearch35</b>.
+</li>
+</ol>
+<hr>
+<h3>February 7, 2008</h3>Allow annotations in library, as well as
+query sequences.  Currently, annotations are only available within
+sequences (i.e., they are not read from the feature table), but they
+should be available in FASTA format, or any of the other ascii text
+formats (EMBL/Swissprot, Genbank, PIR/GCG).  If annotations are
+present in a library and the annotation characters includes '*', then
+the -V '*' option MUST be used.  However, special characters other
+than '*' are ignored, so annotations of '@', '%', or '@' should be
+transparent.
+<p>
+In translated sequence comparisons, annotations are only available for
+the protein sequence.
+<p>
+<h3>January 25, 2007</h3> Support protein queries and sequence
+libraries that contain 'O' (pyrrolysine) and 'U' (selenocysteine).
+('J' was supported already). Currently, 'O' is mapped automatically to
+'K' and 'U' to 'C'.
+<p />
+<h3>Dec. 13, 2007 CVS fa35_03_02m</h3>
+<p>
+Add ability to search a subset of a library using a file name and a
+list of accession/gi numbers. This version introduces a new filetype,
+10, which consists of a first line with a target filename, format, and
+accession number format-type, and optionally the accession number
+format in the database, followed by a list of accession numbers.  For
+example:
+<pre>
+	  </slib2/blast/swissprot.lseg 0:2 4|
+	  3121763
+	  51701705
+	  7404340
+	  74735515
+	  ...
+</pre>
+Tells the program that the target database is swissprot.lseg, which is
+in FASTA (library type 0) format.
+<p>
+The accession format comes after the ":".  Currently, there are four
+accession formats, two that require ordered accessions (:1, :2), and
+two that hash the accessions (:3, :4) so they do not need to be
+ordered.  The number and character after the accession format
+(e.g. "4|") indicate the offset of the beginning of the accession and
+the character that terminates the accession.  Thus, in the typical
+NCBI Fasta definition line:
+<pre>
+ >gi|1170095|sp|P46419|GSTM1_DERPT Glutathione S-transferase (GST class-mu)
+</pre>
+The offset is 4 and the termination character is '|'.  For databases
+distributed in FASTA format from the European Bioinformatics
+Institute, the offset depends on the name of the database, e.g.
+<pre>
+ >SW:104K_THEAN Q4U9M9 104 kDa microneme/rhoptry antigen precursor (p104).
+</pre>
+and the delimiter is ' ' (space, the default).
+<p>
+Accession formats 1 and 3 expect strings; accession formats 2 and 4
+work with integers (e.g. gi numbers).
+<p />
+<h3>December 10, 2007</h3>
+Provide encoded annotation information with
+-m 9c alignment summaries.  The encoded alignment information makes it
+much simpler to highlight changes in critical residues.
+<p />
+<h3>August 22, 2007</h3> <a name="lav2svg" /> A new program is
+available, <tt>lav2svg</tt>, which creates SVG (Scalable Vector
+Graphics) output.  In addition, <a href="#ps_lav"><tt>ps_lav</tt></a>,
+which was introduced May 30, 2007, has been replaced
+by <tt>lav2ps</tt>.  SVG files are more easily edited with Adobe
+Illustrator than postscript (<tt>lav2ps</tt>) files.
+<p>
+<h3>July 25, 2007  CVS fa35_02_02</h3>
+Change default gap penalties for OPTIMA5 matrix to -20/-2 from -24/-4.
+<p>
+<h3>July 23, 2007</h3>
+Add code to support to support sub-sequence ranges for "library"
+sequences - necessary for fully functional prss (ssearch35) and
+lalign35.  For all programs, it is now possible to specify a subset of
+both the query and the library, e.g.
+<pre>
+lalign35 -q mchu.aa:1-74 mchu.aa:75-148
+</pre>
+Note, however, that the subset range applied to the library will be
+applied to every sequence in the library - not just the first - and
+that the same subset range is applied to each sequence.  This probably
+makes sense only if the library contains a single sequence (this is
+also true for the query sequence file).
+<p>
+<h3>July 3, 2007  CVS fa35_02_01</h3>
+
+Merge of previous <tt><b>fasta34</tt></b> with development version <tt><b>fasta35</tt></b>.
+
+<h3>June 26, 2007</h3>
+
+Add amino-acid 'J' for 'I' or 'L'.
+<p>
+Add Mueller and Vingron (2000) J. Comp. Biol. 7:761-776 VT160 matrix,
+"-s VT160", and OPTIMA_5 (Kann et al. (2000) Proteins 41:498-503).
+<h3>June 7, 2007</h3>
+
+<tt><b>ggssearch35(_t)</b></tt>, <tt><b>glsearch35(_t)</b></tt> can now use PSSMs.
+
+<h3>May 30, 2007  CVS fa35_01_04</h3>
+
+<a name="ps_lav" /> Addition of <tt><b>ps_lav</b></tt>
+(now <a href="#lav2svf">lav2ps</a> or <a href="#lav2svg">lav2svg</a>) -- which can be used to plot the lav
+output of
+<tt><b>lalign35 -m 11</b></tt>.
+<pre>lalign35 -m 11 | lav2ps</pre> replaces <tt><b>plalign</b></tt>
+(from <tt><b>FASTA2</b></tt>).
+
+<h3>May 2, 2007</h3>
+
+The labels on the alignment scores are much more informative (and more
+diverse).   In the past, alignment scores looked like:
+<pre>
+>>gi|121716|sp|P10649|GSTM1_MOUSE Glutathione S-transfer  (218 aa)
+ s-w opt: 1497  Z-score: 1857.5  bits: 350.8 E(): 8.3e-97
+Smith-Waterman score: 1497; 100.0% identity (100.0% similar) in 218 aa overlap (1-218:1-218)
+^^^^^^^^^^^^^^
+</pre>
+where the highlighted text was either: "Smith-Waterman" or "banded
+Smith-Waterman". In fact, scores were calculated in other ways,
+including global/local for <tt><b>fasts</b></tt> and <tt><b>fastf</b></tt>.  With the addition of
+<tt><b>ggsearch35,</b></tt> <tt><b>glsearch35,</b></tt> and <tt><b>lalign35,</b></tt> there are many more ways to
+calculate alignments: "Smith-Waterman" (ssearch and protein fasta),
+"banded Smith-Waterman" (DNA fasta), "Waterman-Eggert",
+"trans. Smith-Waterman", "global/local", "trans. global/local",
+"global/global (N-W)".  The last option is a global global alignment,
+but with the affine gap penalties used in the Smith-Waterman
+algorithm.
+
+<h3>April 19, 2007	CVS fa34t27br_lal_3</h3>
+
+Two new programs, <tt><b>ggsearch35(_t)</b></tt> and <tt><b>glsearch35(_t)</b></tt> are now available.
+<tt><b>ggsearch35(_t)</b></tt> calculates an alignment score that is global in the
+query and global in the library; <tt><b>glsearch35(_t)</b></tt> calculates an alignment
+that is global in the query and local, while local in the library
+sequence.  The latter program is designed for global alignments to domains.
+
+Both programs assume that scores are normally distributed.  This
+appears to be an excellent approximation for ggsearch35 scores, but
+the distribution is somewhat skewed for global/local (glsearch)
+scores.  <tt><b>ggsearch35(_t)</b></tt> only compares the query to library sequences
+that are beween 80% and 125% of the length of the query; glsearch
+limits comparisons to library sequences that are longer than 80% of
+the query.  Initial results suggest that there is relatively little
+length dependence of scores over this range (scores go down
+dramatically outside these ranges).
+
+<h3>March 29, 2007     CVS fa34t27br_lal_1</h3>
+
+At last, the <tt><b>lalign</b></tt> (SIM) algorithm has been moved from <b>FASTA21</b> to
+<b>FASTA35</b>.  A <tt><b><a href="#ps_lav">plalign</a></b></tt>
+equivalent is also available using <tt>lalign -m 11 | lav2ps</tt>
+or <tt>| lav2svg</tt>.
+
+The statistical estimates for <tt>lalign35</tt> should be much more accurate
+than those from the earlier lalign, because lambda and K are estimated
+from shuffles.
+
+In addition, all programs can now generate accurate statistical
+estimates with shuffles if the library has fewer than 500 sequences.
+If the library contains more than 500 sequences and the sequences are
+related, then the -z 11 option should be used.
+p<hr>
+<a href="changes_v34.html">FASTA v34 Change Log</a>
+<p> </p>
+</div>
+</body>
+</html>
diff --git a/doc/fasta.defaults b/doc/fasta.defaults
new file mode 100644
index 0000000..7404c57
--- /dev/null
+++ b/doc/fasta.defaults
@@ -0,0 +1,17 @@
+#pgm	mol	matrix	g_open	g_ext	fr_shft	e_cut	ktup
+#	-n/-p	-s	-e	-f	-h/-j	-E	argv[3]
+fasta	prot	BL50	-10	-2	-	10.0	2
+fasta	dna	+5/-4	-14	-4	-	2.0	6
+ssearch	prot	bl50	-10	-2	-	10.0	-
+ssearch	dna	+5/-4	-14	-4	-	2.0	-
+fastx	prot	BL50	-12	-2	-20	5.0	2
+fasty	prot	BL50	-12	-2	-20/-24	5.0	2
+tfastx	dna	BL50	-14	-2	-20	5.0	2
+tfasty	dna	BL50	-14	-2	-20/-24	5.0	2
+fasts	prot	MD20-MS	-	-	-	5.0	-
+tfasts	prot	MD10-MS	-	-	-	2.0	-
+fastf	prot	MD20	-	-	-	5.0	-
+tfastf	prot	MD10	-	-	-	2.0	-
+fastm	prot	MD20	-	-	-	5.0	-
+tfastm	prot	MD10	-	-	-	2.0	-
+lalign	prot	BL50	-12	-2		10.0	-
diff --git a/doc/fasta.history.tex b/doc/fasta.history.tex
new file mode 100644
index 0000000..015f37c
--- /dev/null
+++ b/doc/fasta.history.tex
@@ -0,0 +1,180 @@
+\begin{longtable}{p{0.75 in}p{5.25 in}}
+\multicolumn{2}{c}{\textbf{FASTA version history (cont.)}} \\
+\hline\\[-1.0ex]
+% \textbf{Date} & {\bf Improvements} \\[0.5ex] \hline \\[-1.5ex]
+\endhead
+\multicolumn{2}{l}{{\Large {\bf B FASTA version history}}} \\[2 ex]
+\hline\\[-1.0ex]
+% {\bf Date} & {\bf Improvements} \\[0.5ex] \hline \\[-1.5ex]
+\endfirsthead
+\hline\\
+& \\
+\endfoot
+\hline\\
+& \\
+\endlastfoot
+
+\multicolumn{2}{c}{ \FASTA v33, Oct, 1999 -- Dec, 2000 } \\[1 ex]
+\hline \\[-0.5 ex]
+
+Oct 1999 & Add support for NCBI Blast2.0 formatted libraries, and
+memory mapped databases.  \FASTA now reads both \texttt{BLAST1.4} and
+\texttt{BLAST2.0} formatted databases. (version 3.2t08)\\ & Include
+Maximum Likelihood Estimates for Lambda and K ( -z 2) \\
+
+ & Include a new strategy for searching with low
+complexity regions.  The \texttt{pseg} program can produce libraries
+with low complexity regions as lower case characters, which can be
+ignored during the initial \texttt{FASTA}/\texttt{SSEARCH} scan, but are considered when
+producing the final alignments. (3.3t01)\\
+
+ & Change output to report bit scores, which are also  used by BLAST. \\
+
+Mar 2000 & Another new statistics option, -z 6, uses Mott's
+approach \cite{mot921} for calculating a
+composition dependent Lambda for each sequence. (3.3t05) \\
+
+Dec 2000 & Automatically change the gap penalties when alternate
+(known) scoring matrices are used using Reese and Pearson gap
+penalties \cite{wrp022}. First implementation to read from MySQL
+databases. \\ May 2001 & change all \FASTA gap penalties from
+first-residue, additional residue to the gap-open, gap-extend values
+used by BLAST. \\[0.5ex]
+
+\hline \\[-0.5 ex]
+\multicolumn{2}{c}{ \FASTA v34, Jan, 2001 -- Jan, 2007 } \\[1 ex]
+\hline \\[-0.5 ex]
+
+Jun 2002 & Modify statistical estimation strategy to sample all the
+sequences in the database, not just the first 60,000. (3.4t11) \\
+
+Jan 2003 & Implementation of vector-accelerated (Altivec) code for
+Smith-Waterman ({\tt SSEARCH}) and banded Smith-Waterman (\FASTA)
+using the Rognes and Seebug \cite{rog003} algorithm.  This code was
+removed in Sept, 2003, because of possible conflict with a patent
+application, but was restored using a different algorithm in
+Nov. 2004. \\
+
+Jun 2003 & Provide \texttt{PSI-SEARCH} --- an implementation of
+\texttt{SSEARCH} that can search with \texttt{PSI-BLAST} PSSM profile
+files.  \texttt{PSI-SEARCH} estimates statistical significance from
+the distribution of actual alignment scores; thus the estimates are
+much more reliable than \texttt{PSI-BLAST} estimates.  Also, change
+the similarity display to work with profiles. (3.4t22) \\
+
+July 2003 & Provide ASN.1 definition line parsing for \texttt{BLAST}
+{\tt formatdb} v.4 libraries.  Restructure the programs to use a table-driven
+approach to parameter setting.   Two tables now define the algorithm,
+query sequence type, library type, scoring matrix, and gap penalties for
+all programs.  \\
+
+Sept 2003 & A new option {\tt -V} for annotating alignments
+provided. Designed for highlighting post-translational modifications
+with {\tt fasts}, it can also be used to highlight active sites and
+other conserved residues. (3.4t23) \\
+
+Dec 2003 & Addition of {\tt -U} option for RNA sequence
+comparison. {\tt G:A} matches score like {\tt G:G} matches to account
+for {\tt G:U} basepairs.  Change default {\it ktup} for short query
+sequences.  Increase band-width for DNA banded final alignments. \\
+
+July 2004 & Allow searching of \texttt{Postgres}, as well as
+\texttt{MySQL} database queries. \\
+
+Nov 2004 & (\texttt{fa34t24}) Incorporation of Erik Lindahl "anti-diagonal" Altivec
+implementation of \cite{woz974} for Smith-Waterman only.  Altivec
+{\tt ssearch34} is now faster than {\tt fasta34} for query sequences $<$ 250 amino acids. \\
+
+Jan 2005 & Change {\tt FASTS} to accommodate very large numbers of
+peptides ($>$100) for full coverage on long proteins \\
+
+Jun. 2006 & (\texttt{fa34t26}) Incorporation of Smith-Waterman
+algorithm for the SSE2 vector instructions written by Michael Farrar
+\cite{farrar2007}.  The SSE code speeds up Smith-Waterman 8 --
+16-fold. \\[1.0 ex]
+
+\hline \\[-0.5 ex]
+\multicolumn{2}{c}{ \FASTA v35, March, 2007 -- March, 2010 } \\[1 ex]
+\hline \\[-0.5 ex]
+
+Mar. 2007 & fasta v35 -- Accurate shuffle-based $E()$-values for all searches and alignments; statistics from searches against small libraries are supplemented with shuffled alignments.\\[1 ex]
+
+ & More efficient threading strategies on multi-core computers, for 12X speedup on 16-core machines.\\[1 ex]
+
+ & Inclusion of \texttt{lalign} (\texttt{SIM}) local domain alignments. \texttt{lalign} alignments now have accurate shuffle-based $E()$-values.\\[1 ex]
+
+Apr. 2007 & Introduction of \texttt{ggsearch}, for global alignment searches, and \texttt{glsearch}, for searches with scores that are global in the query and local in the library.  \texttt{ggsearch} and \texttt{glsearch} calculate $E()$-values using the normal distribution.  Both programs can search with \texttt{PSI-BLAST} PSSMs.\\[1 ex]
+
+Dec. 2007 & Efficient strategy for searching subsets of databases (lists of GI or accession numbers) \\[1 ex]
+
+Feb. 2008 & Annotations in either query or library sequences can be highlighted in the alignment, and the state of annotated residues is compactly summarized with \texttt{-m 9c}. \\[1 ex]
+
+Oct. 2008 & Modification \texttt{lsim4.c} (\texttt{lalign35}) provided by Xiaoqui Huang to ensure
+that self-alignments do not cross the identity diagonal. \\[1ex]
+%\pagebreak
+\hline \\[-0.5 ex]
+\multicolumn{2}{c}{ \FASTA v36, March, 2010 -- } \\[1 ex]
+\hline \\[-0.5 ex]
+
+Mar. 2010 & \FASTA v36 displays all significant alignments between
+query and library sequence.  BLAST has always displayed multiple
+high-scoring alignments (HSPs) between the query and library sequence;
+previous versions of the FASTA programs displayed only the best
+alignment, even when other high-scoring alignments were present.\\[1
+  ex]
+
+&  New statistical options, \texttt{-z 21, 22, 26}, provide a second $E2()$-value
+estimate based on shuffles of the highest scoring sequences. \\[1 ex]
+
+  &  Improved performance using statistics-based thresholds for
+  gap-joining and band-optimization in the heuristic FASTA local
+  alignment programs, increasing speed 2 - 3X. \\[1 ex]
+
+  &  Greater flexibility in specifying combinations of library files
+  and subsets of libraries.  \FASTA v36
+  programs can include indirect files of library names inside of
+  indirect files of library names. \\[1 ex]
+
+  & \FASTA36 programs are fully threaded, both for
+  searches, and for alignments.  The programs routinely run 12 - 15X
+  faster on 8-core machines with "hyperthreading" (effectively 16 cores).
+  \\[1 ex]
+
+ & \texttt{-z 21} .. \texttt{26} E2() statistical estimates from
+  shuffled best scores.\\[1.0ex]
+
+Sep. 2010 & \texttt{-m 8}, \texttt{-m 8C} BLAST tabular output. \\[1.0ex]
+
+Nov, 2010 & Variable scoring matrices (\texttt{-m ?BP62}).\\[1.0ex]
+
+Dec, 2010 & (\texttt{fasta-36.3.1}) SSE2 vectorized \texttt{ggsearch36}, \texttt{glsearch36} (Michael Farrar).\\[1.0ex]
+
+Jan, 2011 & (\texttt{fasta-36.3.2}) MPI versions implemented and tested.\\[1ex]
+
+Feb, 2011 & Introduce \texttt{-m B}, \texttt{-m BB} BLAST-like output.\\[1.0ex]
+
+Mar, 2011 & (\texttt{fasta-36.3.4}) Program is no longer interactive by
+default. \texttt{fasta36 -h} and \texttt{fasta36 -help} provide
+common/complete options, with many defaults. \texttt{doc/fasta\_guide.pdf} available.\\[1.0ex]
+
+May, 2011 & (\texttt{fasta-36.3.5}) Introduce (1) \texttt{-e
+  expand.sh} scripts to extend the effective size of the database
+searched, based on significant hits; (2) \texttt{-m "F\# output.file"}
+to send different output formats to different files; and (3)
+\texttt{-X} expanded options, \texttt{-o} replaces the old \texttt{-X}
+and \texttt{-Xo} replaces \texttt{-o}. \\[1.0ex]
+
+Jan, 2012 & Include \texttt{.fastq} files as library type 7 \\[1.0ex]
+
+May, 2012 & allow reverse-complement alignments with \texttt{ggsearch} and \texttt{glsearch} \\[1.0ex]
+
+Jun, 2012 & Introduce \texttt{-V !script.pl} driven alignments, and variant scoring.\\[1.0ex]
+
+Aug, 2012 & Introduce \texttt{-V !ann\_feats.pl} sub-alignment (region-based) scoring.\\[1.0ex]
+
+Apr, 2013 & Extend \texttt{ENV} options to introduce a domain-plotting option for FASTA web sites.\\[1.0ex]
+
+Nov, 2014 & (\texttt{fasta-36.3.7}) Allow overlapping domains in annotation scripts.\\[1.0ex]
+
+\hline
+\end{longtable}
diff --git a/doc/fasta.options b/doc/fasta.options
new file mode 100644
index 0000000..bfc7b17
--- /dev/null
+++ b/doc/fasta.options
@@ -0,0 +1,55 @@
+doinit.c
+  case 'B': m_msg->z_bits = 0;
+  case 'C': m_msg->nmlen
+  case 'D': ppst->debug_lib = 1;
+  case 'F': m_msg->e_low
+  case 'H':  m_msg->nohist = 0
+  case 'i':  m_msg->revcomp = 1
+  case 'l':  m_msg->flstr
+  case 'L':  m_msg->long_info = 1
+  case 'm':  m_msg->markx
+  case 'N':  m_msg->maxn
+  case 'O':  m_msg->outfile
+  case 'q':
+  case 'Q':  m_msg->quiet = 1;
+  case 'R':  m_msg->dfile
+  case 'T':  max_workers
+             PCOMPLIB: worker_1,worker_n
+  case 'v':  ppst->zs_win
+  case 'w':  m_msg->aln.llen
+  case 'W':  m_msg->aln.llcntx
+  case 'X':  m_msg->sq0off,&m_msg->sq1off
+  case 'z':  ppst->zsflag
+  case 'v':  ppst->zs_win
+  case 'V':  m_msg->ann_arr
+  case 'Z':  ppst->zdb_size
+
+initfa.c
+  case '1': ppst->param_u.fa.iniflag=1;
+  case '3': m_msg->nframe = 3; /* TFASTA */
+	    m_msg->nframe = 1;	/* for TFASTXY */
+	    m_msg->qframe = 1;  /* for FASTA, FASTX */
+  case 'a': m_msg->aln.showall = 1;
+  case 'A': ppst->sw_flag= 1;
+  case 'b': m_msg->mshow
+  case 'c': ppst->param_u.fa.optcut
+  case 'd': m_msg->ashow;
+  case 'E': m_msg->e_cut, m_msg->e_cut_r
+  case 'f': ppst->gdelval
+  case 'g': ppst->ggapval
+  case 'h': help /ppst->gshift (-USHOW_HELP)
+  case 'I': m_msg->self = 1
+  case 'j': ppst->gshift, ppst->gsubs
+  case 'k': m_msg->shuff_max
+  case 'K': ppst->max_repeat
+  case 'M': m_msg->n1_low,&m_msg->n1_high
+  case 'n': m_msg->qdnaseq = SEQT_DNA (1)
+  case 'o': ppst->param_u.fa.optflag = 0;
+  case 'p': m_msg->qdnaseq = SEQT_PROT (0);
+  case 'r': ppst->p_d_mat,&ppst->p_d_mis
+  case 's': standard_pam(smstr); ppst->pamoff=atoi(bp+1);
+  case 'S': ppst->ext_sq_set = 1;
+  case 't': ppst->tr_type
+  case 'x': ppst->pam_x
+  case 'y': ppst->param_u.fa.optwid
+  case 'z': ppst->zsflag
diff --git a/doc/fasta36.1 b/doc/fasta36.1
new file mode 100644
index 0000000..4f4d52f
--- /dev/null
+++ b/doc/fasta36.1
@@ -0,0 +1,467 @@
+.TH fasta36/ssearch36/[t]fast[x,y]36/lalign36	1 local
+.SH NAME
+fasta36 \- scan a protein or DNA sequence library for similar
+sequences
+
+fastx36 \ - compare a DNA sequence to a protein sequence
+database, comparing the translated DNA sequence in forward and
+reverse frames.
+
+tfastx36 \ - compare a protein sequence to a DNA sequence
+database, calculating similarities with frameshifts to the forward and
+reverse orientations.
+
+fasty36 \ - compare a DNA sequence to a protein sequence
+database, comparing the translated DNA sequence in forward and reverse
+frames.
+
+tfasty36 \ - compare a protein sequence to a DNA sequence
+database, calculating similarities with frameshifts to the forward and
+reverse orientations.
+
+fasts36 \- compare unordered peptides to a protein sequence database
+
+fastm36 \- compare ordered peptides (or short DNA sequences)
+to a protein (DNA) sequence database
+
+tfasts36 \- compare unordered peptides to a translated DNA
+sequence database
+
+fastf36 \- compare mixed peptides to a protein sequence database
+
+tfastf36 \- compare mixed peptides to a translated DNA
+sequence database
+
+ssearch36 \- compare a protein or DNA sequence to a
+sequence database using the Smith-Waterman algorithm.
+
+ggsearch36 \- compare a protein or DNA sequence to a
+sequence database using a global alignment (Needleman-Wunsch)
+
+glsearch36 \- compare a protein or DNA sequence to a
+sequence database with alignments that are global in the query and
+local in the database sequence (global-local).
+
+lalign36 \- produce multiple non-overlapping alignments for protein
+and DNA sequences using the Huang and Miller sim algorithm for the
+Waterman-Eggert algorithm.
+
+prss36, prfx36 \- discontinued; all the FASTA programs will estimate
+statistical significance using 500 shuffled sequence scores if two
+sequences are compared.
+
+.SH DESCRIPTION
+
+Release 3.6 of the FASTA package provides a modular set of sequence
+comparison programs that can run on conventional single processor
+computers or in parallel on multiprocessor computers. More than a
+dozen programs \- fasta36, fastx36/tfastx36, fasty36/tfasty36,
+fasts36/tfasts36, fastm36, fastf36/tfastf36, ssearch36, ggsearch36,
+and glsearch36 \- are currently available.
+
+All the comparison programs share a set of basic command line options;
+additional options are available for individual comparison functions.
+
+Threaded versions of the FASTA programs (built by default under
+Unix/Linux/MacOX) run in parallel on modern Linux and Unix multi-core
+or multi-processor computers.  Accelerated versions of the
+Smith-Waterman algorithm are available for architectures with the
+Intel SSE2 or Altivec PowerPC architectures, which can speed-up
+Smith-Waterman calculations 10 - 20-fold.
+
+In addition to the serial and threaded versions of the FASTA programs,
+MPI parallel versions are available as \fCfasta36_mpi\fP,
+\fCssearch36_mpi\fP, \fCfastx36_mpi\fP, etc. The MPI parallel versions
+use the same command line options as the serial and threaded versions.
+
+.SH Running the FASTA programs
+.LP
+By default, the FASTA programs are no longer interactive; they are run
+from the command line by specifying the program, query.file, and
+library.file.  Program options \fImust\fP preceed the
+query.file and library.file arguments:
+.sp
+.ti 0.5i
+\fCfasta36 -option1 -option2 -option3 query.file library.file > fasta.output\fP
+.sp
+The "classic" interactive mode, which prompts for a query.file and
+library.file, is available with the \fC-I\fP option.  Typing a program
+name without any arguments (\fCssearch36\fP) provides a short help
+message; \fCprogram_name -help\fP provides a complete set of program
+options.
+.LP
+Program options \fIMUST\fP preceed the query.file and library.file arguments.
+
+.SH FASTA program options
+.LP
+The default scoring matrix and gap penalties used by each of the
+programs have been selected for high sensitivity searches with the
+various algorithms.  The default program behavior can be modified by
+providing command line options \fIbefore\fP the query.file and
+library.file arguments.  Command line options can also be used in
+interactive mode.
+
+Command line arguments come in several classes.
+
+(1) Commands that specify the comparison type. FASTA, FASTS, FASTM,
+SSEARCH, GGSEARCH, and GLSEARCH can compare either protein or DNA
+sequences, and attempt to recognize the comparison type by looking the
+residue composition. \fC-n\fP, \fC-p\fP specify DNA (nucleotide) or
+protein comparison, respectively. \fC-U\fP specifies RNA comparison.
+
+(2) Commands that limit the set of sequences compared: \fC-1\fP,
+\fC-3\fP, \fC-M\fP.
+
+(3) Commands that modify the scoring parameters: \fC-f gap-open penalty\P, \fC-g
+gap-extend penalty\fP, \fC-j inter-codon frame-shift, within-codon frameshift\fP,
+\fC-s scoring-matrix\fP, \fC-r
+match/mismatch score\fP, \fC-x X:X score\fP.
+
+(4) Commands that modify the algorithm (mostly FASTA and [T]FASTX/Y):
+\fC-c\fP, \fC-w\fP, \fC-y\fP, \fC-o\fP. The \fC-S\fP can be used to
+ignore lower-case (low complexity) residues during the initial score
+calculation.
+
+(5) Commands that modify the output: \fC-A\fP, \fC-b number\fP, \fC-C
+width\fP, \fC-d number\fP, \fC-L\fP, \fC-m 0-11,B\fP, \fC-w
+line-width\fP, \fC-W context-width\fP, \fC-o offset1,ofset2\fP
+
+(6) Commands that affect statistical estimates: \fC-Z\fP, \fC-k\fP.
+.SH Option summary:
+.TP
+\-1
+Sort by "init1" score (obsolete)
+.TP
+\-3
+([t]fast[x,y] only) use only forward frame translations
+.TP
+\-a
+Displays the full length (included unaligned regions) of both
+sequences with fasta36, ssearch36, glsearch36, and fasts36.
+.TP
+\-A (fasta36 only) For DNA:DNA, force Smith-Waterman alignment for
+output.  Smith-Waterman is the default for FASTA protein alignment and
+[t]fast[x,y], but not for DNA comparisons with FASTA.  For
+protein:protein, use band-alignment algorithm.
+.TP
+\-b #
+number of best scores/descriptions to show (must be <
+expectation cutoff if -E is given).  By default, this option is no
+longer used; all scores better than the expectation (E()) cutoff are
+listed. To guarantee the display of # descriptions/scores, use \fC-b
+=#\fP, i.e. \fC-b =100\fP ensures that 100 descriptions/scores will be
+displayed.  To guarantee at least 1 description, but possibly many
+more (limited by \fC-E e_cut\fP), use \fC-b >1\fP.
+.TP
+\-c "E-opt E-join"
+threshold for gap joining (E-join) and band optimization (E-opt) in
+FASTA and [T]FASTX/Y.  FASTA36 now uses BLAST-like statistical
+thresholds for joining and band optimization.  The default statistical
+thresholds for protein and translated comparisons are E-opt=0.2,
+E-join=0.5; for DNA, E-join = 0.1 and E-opt= 0.02. The actual number
+of joins and optimizations is reported after the E-join and E-opt
+scoring parameters.  Statistical thresholds improves search speed 2 -
+3X, and provides much more accurate statistical estimates for matrices
+other than BLOSUM50. The "classic" joining/optimization thresholds
+that were the default in fasta35 and earlier programs are available
+using -c O (upper case O), possibly followed a value > 1.0 to set
+the optcut optimization threshold.
+.TP
+\-C #
+length of name abbreviation in alignments, default = 6.  Must be less
+than 20.
+.TP
+\-d #
+number of best alignments to show ( must be < expectation (-E) cutoff
+and <= the -b description limit).
+.TP
+\-D
+turn on debugging mode.  Enables checks on sequence alphabet that
+cause problems with tfastx36, tfasty36 (only available after compile
+time option).  Also preserves temp files with -e expand_script.sh option.
+.TP
+\-e expand_script.sh
+Run a script to expand the set of sequences displayed/aligned based on
+the results of the initial search.  When the -e expand_script.sh
+option is used, after the initial scan and statistics calculation, but
+before the "Best scores" are shown, expand_script.sh with a single
+argument, the name of a file that contains the accession information
+(the text on the fasta description line between > and the first space)
+and the E()-value for the sequence.  expand_script.sh then uses this
+information to send a library of additional sequences to stdout. These
+additional sequences are included in the list of high-scoring
+sequences (if their scores are significant) and aligned. The
+additional sequences do not change the statistics or database size.
+.TP
+\-E e_cut e_cut_r
+expectation value upper limit for score and alignment display.
+Defaults are 10.0 for FASTA36 and SSEARCH36 protein searches, 5.0 for
+translated DNA/protein comparisons, and 2.0 for DNA/DNA
+searches. FASTA version 36 now reports additional alignments between
+the query and the library sequence, the second value sets the
+threshold for the subsequent alignments.  If not given, the threshold
+is e_cut/10.0.  If given and value > 1.0, e_cut_r = e_cut / value; for
+value < 1.0, e_cut_r = value;  If e_cut_r < 0, then the additional
+alignment option is disabled.
+.TP
+\-f #
+penalty for opening a gap.
+.TP
+\-F #
+expectation value lower limit for score and alignment display.
+-F 1e-6 prevents library sequences with E()-values lower than 1e-6
+from being displayed. This allows the use to focus on more distant
+relationships.
+.TP
+\-g #
+penalty for additional residues in a gap
+.TP
+\-h
+Show short help message.
+.TP
+\-help
+Show long help message, with all options.
+.TP
+\-H
+show histogram (with fasta-36.3.4, the histogram is not shown by default).
+.TP
+\-i
+(fasta DNA, [t]fastx[x,y]) compare against
+only the reverse complement of the library sequence.
+.TP
+\-I
+interactive mode; prompt for query filename, library.
+.TP
+\-j # #
+([t]fast[x,y] only) penalty for a frameshift between two codons, 
+([t]fasty only) penalty for a frameshift within a codon.
+.TP
+\-J
+(lalign36 only) show identity alignment.
+.TP
+\-k
+specify number of shuffles for statistical parameter estimation (default=500).
+.TP
+\-l str
+specify FASTLIBS file
+.TP
+\-L
+report long sequence description in alignments (up to 200 characters).
+.TP
+\-m 0,1,2,3,4,5,6,8,9,10,11,B,BB,"F# out.file" alignment display
+options.  \fC-m 0, 1, 2, 3\fP display different types of alignments.
+\fC-m 4\fP provides an alignment "map" on the query. \fC-m 5\fP
+combines the alignment map and a \fC-m 0\fP alignment.  \fC-m 6\fP
+provides an HTML output.
+.TP
+\fC-m 8\fP seeks to mimic BLAST -m 8 tabular output.  Only query and
+library sequence names, and identity, mismatch, starts/stops,
+E()-values, and bit scores are displayed.  \fC-m 8C\fp mimics BLAST
+tabular format with comment lines.  \fC-m 8\fP formats do not show
+alignments.
+.TP
+\fC-m 9\fP does not change the alignment output, but provides
+alignment coordinate and percent identity information with the best
+scores report.  \fC-m 9c\fP adds encoded alignment information to the
+\fC-m 9\fP; \fC-m 9C\fP adds encoded alignment information as a CIGAR
+formatted string. To accomodate frameshifts, the CIGAR format has been
+supplemented with F (forward) and R (reverse).  \fC-m 9i\fP provides
+only percent identity and alignment length information with the best
+scores.  With current versions of the FASTA programs, independent
+\fC-m\fP options can be combined; e.g. \fC-m 1 -m 9c -m 6\fP.
+.TP
+\-m 11 provides \fClav\fP format output from lalign36.  It does not
+currently affect other alignment algorithms.  The \fClav2ps\fP and
+\fClav2svg\fP programs can be used to convert \fClav\fP format output
+to postscript/SVG alignment "dot-plots".
+.TP
+\-m B provides \fCBLAST\fP-like alignments.  Alignments are labeled as
+"Query" and "Sbjct", with coordinates on the same line as the
+sequences, and \fCBLAST\fP-like symbols for matches and
+mismatches. \fC-m BB\fP extends BLAST similarity to all the output,
+providing an output that closely mimics BLAST output.
+.TP
+\-m "F# out.file" allows one search to write different alignment
+formats to different files.  The 'F' indicates separate file output;
+the '#' is the output format (1-6,8,9,10,11,B,BB, multiple compatible
+formats can be combined separated by commas -',').
+.TP
+\-M #-#
+molecular weight (residue) cutoffs.  -M "101-200" examines only
+library sequences that are 101-200 residues long.
+.TP
+\-n
+force query to nucleotide sequence
+.TP
+\-N #
+break long library sequences into blocks of # residues.  Useful for
+bacterial genomes, which have only one sequence entry.  -N 2000 works
+well for well for bacterial genomes. (This option was required when
+FASTA only provided one alignment between the query and library
+sequence.  It is not as useful, now that multiple alignments are
+available.)
+.TP
+\-o "#,#"
+offsets query, library sequence for numbering alignments
+.TP
+\-O file
+send output to file.
+.TP
+\-p
+force query to protein alphabet.
+.TP
+\-P pssm_file
+(ssearch36, ggsearch36, glsearch36 only).  Provide blastpgp checkpoint
+file as the PSSM for searching. Two PSSM file formats are available,
+which must be provided with the filename. 'pssm_file 0' uses a binary
+format that is machine specific; 'pssm_file 1' uses the "blastpgp -u 1
+-C pssm_file" ASN.1 binary format (preferred).
+.TP
+\-q/-Q
+quiet option; do not prompt for input (on by default)
+.TP
+\-r "+n/-m" 
+(DNA only) values for match/mismatch for DNA comparisons. \fC+n\fP is
+used for the maximum positive value and \fC-m\fP is used for the
+maximum negative value. Values between max and min, are rescaled, but
+residue pairs having the value -1 continue to be -1.
+.TP 
+\-R file
+save all scores to statistics file (previously -r file)
+.TP
+\-s name
+specify substitution matrix.  BLOSUM50 is used by default; PAM250,
+PAM120, and BLOSUM62 can be specified by setting -s P120, P250, or
+BL62.  Additional scoring matrices include: BLOSUM80 (BL80), and
+MDM10, MDM20, MDM40 (Jones, Taylor, and Thornton, 1992 CABIOS
+8:275-282; specified as -s MD10, -s MD20, -s MD40), OPTIMA5 (-s OPT5,
+Kann and Goldstein, (2002) Proteins 48:367-376), and VTML160 (-s
+VT160, Mueller and Vingron (2002) J. Comp. Biol. 19:8-13).  Each
+scoring matrix has associated default gap penalties.  The BLOSUM62
+scoring matrix and -11/-1 gap penalties can be specified with -s BP62.
+.IP
+Alternatively, a BLASTP format scoring matrix file can be specified,
+e.g. -s matrix.filename.  DNA scoring matrices can also be specified
+with the "-r" option.
+.IP
+With fasta36.3, variable scoring matrices can
+be specified by preceeding the scoring matrix abbreviation with '?',
+e.g. -s '?BP62'. Variable scoring matrices allow the FASTA programs to
+choose an alternative scoring matrix with higher information content
+(bit score/position) when short queries are used.  For example, a 90
+nucleotide FASTX query can produce only a 30 amino-acid alignment, so
+a scoring matrix with 1.33 bits/position is required to produce a 40
+bit score. The FASTA programs include BLOSUM50 (0.49 bits/pos) and
+BLOSUM62 (0.58 bits/pos) but can range to MD10 (3.44
+bits/position). The variable scoring matrix option searches down the
+list of scoring matrices to find one with information content high
+enough to produce a 40 bit alignment score.
+.TP
+\-S
+treat lower case letters in the query or database as low complexity
+regions that are equivalent to 'X' during the initial database scan,
+but are treated as normal residues for the final alignment display.
+Statistical estimates are based on the 'X'ed out sequence used during
+the initial search. Protein databases (and query sequences) can be
+generated in the appropriate format using John Wooton's "pseg"
+program, available from ftp://ftp.ncbi.nih.gov/pub/seg/pseg.  Once you
+have compiled the "pseg" program, use the command:
+.IP
+\fCpseg database.fasta -z 1 -q  > database.lc_seg\fP
+.TP
+\-t #
+Translation table - [t]fastx36 and [t]fasty36 support the BLAST
+tranlation tables.  See
+\fChttp://www.ncbi.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c/\fP.
+.TP
+\-T #
+(threaded, parallel only) number of threads or workers to use (on
+Linux/MacOS/Unix, the default is to use as many processors as are
+available; on Windows systems, 2 processors are used).
+.TP
+\-U
+Do RNA sequence comparisons: treat 'T' as 'U', allow G:U base pairs (by 
+scoring "G-A" and "T-C" as score(G:G)-3).  Search only one strand.
+.TP
+\-V "?$%*"
+Allow special annotation characters in query sequence.  These characters
+will be displayed in the alignments on the coordinate number line.
+.TP
+\-w # line width for similarity score, sequence alignment, output.
+.TP
+\-W # context length (default is 1/2 of line width -w) for alignment,
+like fasta and ssearch, that provide additional sequence context.
+.TP
+\-X extended options.  Less used options. Other options include
+\fC-XB\fP, \fC-XM4G\fP, \fC-Xo\fP, \fC-Xx\fP, and \fC-Xy\fP; see
+\fBfasta_guide.pdf\fP.
+.TP
+\-z 1, 2, 3, 4, 5, 6
+Specify the statistical calculation. Default is -z 1 for local
+similarity searches, which uses regression against the length of the
+library sequence. -z -1 disables statistics.  -z 0 estimates
+significance without normalizing for sequence length. -z 2 provides
+maximum likelihood estimates for lambda and K, censoring the 250
+lowest and 250 highest scores. -z 3 uses Altschul and Gish's
+statistical estimates for specific protein BLOSUM scoring matrices and
+gap penalties. -z 4,5: an alternate regression method.  \-z 6 uses a
+composition based maximum likelihood estimate based on the method of
+Mott (1992) Bull. Math. Biol. 54:59-75.
+.TP
+\-z 11,12,14,15,16
+compute the regression against scores of randomly
+shuffled copies of the library sequences.  Twice as many comparisons
+are performed, but accurate estimates can be generated from databases
+of related sequences. -z 11 uses the -z 1 regression strategy, etc.
+.TP
+\-z 21, 22, 24, 25, 26
+compute two E()-values.  The standard (library-based) E()-value is
+calculated in the standard way (-z 1, 2, etc), but a second E2()
+value is calculated by shuffling the high-scoring sequences (those
+with E()-values less than the threshold).  For "average" composition
+proteins, these two estimates will be similar (though the
+best-shuffle estimates are always more conservative).  For biased
+composition proteins, the two estimates may differ by 100-fold or
+more.  A second -z option, e.g. -z "21 2", specifies the estimation
+method for the best-shuffle E2()-values. Best-shuffle E2()-values
+approximate the estimates given by PRSS (or in a pairwise SSEARCH).
+.TP
+\-Z db_size
+Set the apparent database size used for expectation value calculations
+(used for protein/protein FASTA and SSEARCH, and for [T]FASTX/Y).
+.SH Reading sequences from STDIN
+.LP
+The FASTA programs can accept a query sequence from
+the unix "stdin" data stream.  This makes it much easier to use
+fasta36 and its relatives as part of a WWW page. To indicate that
+stdin is to be used, use "@" as the query sequence file name.  "@" can
+also be used to specify a subset of the query sequence to be used,
+e.g:
+.sp
+.ti 0.5i
+cat query.aa | fasta36 @:50-150 s
+.sp
+would search the 's' database with residues 50-150 of query.aa.  FASTA
+cannot automatically detect the sequence type (protein vs DNA) when
+"stdin" is used and assumes protein comparisons by default; the '-n'
+option is required for DNA for STDIN queries.
+.SH Environment variables:
+.TP
+FASTLIBS
+location of library choice file (-l FASTLIBS)
+.TP
+SRCH_URL1, SRCH_URL2
+format strings used to define options to re-search the
+database.
+.TP
+REF_URL
+the format string used to define the option to lookup the library
+sequence in entrez, or some other database.
+
+.SH AUTHOR
+Bill Pearson
+.br
+wrp at virginia.EDU
+
+Version: $ Id: $
+Revision: $Revision: 210 $
diff --git a/doc/fasta_func.doc b/doc/fasta_func.doc
new file mode 100644
index 0000000..b9330e5
--- /dev/null
+++ b/doc/fasta_func.doc
@@ -0,0 +1,300 @@
+Over all structure of the fasta3 program.  (Some functions
+are different for translated comparisons FASTX, FASTY, TFASTX, TFASTY.)
+
+main() {	/* complib.c structure */
+
+  /* get command line arguments, set up initial parameter values */
+  initenv (argc, argv, &m_msg, &pst,&aa0[0],outtty);
+
+  /* allocate space for sequence arrays */
+  /* get the query file name if not on command line */
+  /* get query */
+  m_msg.n0 = getseq (m_msg.tname,aa0[0], MAXTOT, m_msg.libstr,&pst.dnaseq,
+		     &m_msg.sq0off);
+
+  /* reset some parameters if DNA */		     
+  resetp (aa0[0], m_msg.n0, &m_msg, &pst);
+
+  /* get a library name if not on command line */
+  libchoice(m_msg.lname,sizeof(m_msg.lname),&m_msg);
+  /* use library name to build list of library files */
+  libselect(m_msg.lname, &m_msg);
+
+  /* get additional options (ktup, prss-window) if not specified */
+  query_parm (&m_msg, &pst);
+
+  /* do final parameter initializations */
+  last_init(&m_msg, &pst);
+
+  /* set up structures for saved scores[20000], statistics[50000] */
+  nbest = 0;
+  
+  /* initialize the comparison function */
+  init_work (aa0[0], m_msg.n0, &pst, &f_str[0]);
+
+  /* open the library */
+  for (iln = 0; iln < m_msg.nln; iln++) {
+    if (openlib(m_msg.lbnames[iln],m_msg)!=1) {continue;}
+  }
+
+  /* get the library sequence and do the comparison */
+  while ((n1=GETLIB(aa1ptr,maxt,libstr,&lmark,&lcont))>0) {
+    do_work (aa0[itt], m_msg.n0, aa1, n1, itt, &pst, f_str[itt], &rst);
+
+  /* save the scores */
+  /* save the scores for statistics */
+  }
+
+  /* all done with all libraries */
+  process_hist(stats,nstats,pst);
+
+  /* sort the scores by z-value */
+  sortbestz (bptr, nbest);
+
+  /* sort the scores by E-value */
+  sortbeste (bptr, nbest);
+
+  /* print the histogram */
+  prhist (stdout,m_msg,pst,gstring2);
+
+  /* show the high scoring sequences */
+  showbest (stdout, aa0, aa1, maxn, bptr, nbest, qlib, &m_msg, pst,
+            f_str, gstring2);
+
+  /* show the high-scoring alignments */
+  showalign(outfd, aa0, aa1, maxn, bptr, nbest, qlib, m_msg, pst,
+	    f_str, gstring2);
+
+  /* thats all folks !!! */
+}
+

+================
+complib.c	/* version set as mp_verstr */
+
+main()
+printsum()	/* prints summary of run (residues, entries, time) */
+void fsigint() 	/* sets up interrupt handler for HUP not used */
+
+================
+compacc.c
+
+void selectbest() /* select best 15000/20000 based on raw score */
+void selectbestz() /* select best 15000/20000 based on z-score */
+void sortbest()  /* sort based on raw score */
+void sortbestz() /* sort based on z-score */
+void sortbeste() /* sort based on E() score - different from z-score for DNA */
+
+prhist()	/* print histogram */
+
+shuffle()	/* shuffle sequence (prss) */
+wshuffle()	/* window shuffle */
+
+================
+showbest.c
+
+void showbest()	/* present list of high scoring sequences */
+
+================
+showalign.c
+
+void showalign() /* show list of high-scoring alignments */
+void do_show()	/* show an individual alignment */
+void initseq()	/* setup seqc0/seqc1 which contain alignment characters */
+void freeseq()	/* free them up */
+
+================
+htime.c
+
+time_t s_time()	/* get the time in usecs */
+void ptime()	/* print elapsed time */
+
+================
+apam.c
+
+initpam ()	/* read in PAM matrix or change default array */
+void mk_n_pam()	/* make DNA pam from +5/-3 values */
+================
+doinit.c
+
+void initenv()	/* read environment variables, general options */
+================
+initfa.c	/* version set as "verstr" */
+
+alloc_pam()	/* allocate 2D pam array */
+initpam2()	/* fill it up from 1D pam triangle */
+f_initenv()	/* function-specific environment variables */
+f_getopt()	/* function-specific options */
+f_getarg()	/* function specific argument - ktup */
+resetp()	/* reset scoring matrix, optional parameters for DNA-DNA */
+reseta()	/* reset scoring matrix, optional parameters for prot-DNA */
+query_parm()	/* ask for additional program arguments (ktup) */
+last_init()	/* last chance to set up parameters based on query,lib,parms */
+f_initpam()	/* not used - could set parameters from pam matrix */
+
+================
+scaleswn.c
+
+process_hist()	/* do statistics calculations */
+
+  proc_hist_r()	/* regression fit z=1, also used by z=5 */
+    float find_z() /* gives z-score for score, length, mu, rho, var */
+    float find_zr() /* gives z-score for score, length, mu, rho, var */
+    fit_llen()	/* first estimate of mu, rho, var */
+    fit_llens()	/* second estimate of mu, rho, var, mu2, rho2 */
+
+  proc_hist_r2()  /* regression_i fit z=4 */
+    float find_zr2() /* gives z-score for score, length, mu, rho, mu2, rho2 */
+    fit_llen2()	/* iterative estimate of mu, rho, var */
+
+  proc_hist_ln()  /* ln()-scaled z=2 */ /* no longer used */
+    float find_zl() /* gives z-score from ln()-scaled scores */
+
+  proc_hist_ml() /* estimate lambda, K using Maximum Likelihood */
+    float find_ze() /* z-score from lambda, K */
+
+  proc_hist_n()   /* no length-scaling z=0 */
+    float find_zn() /* gives z-score from mu, var (no scaling) */
+
+  proc_hist_a()   /* Altschul-Gish params z= 3 */
+    ag_parm()	/* match pst.pamfile name, look_p() */
+    look_p()	/* lookup Lambda, K, H given param struct */
+    float find_za()
+
+eq_s()	/* returns (double)score (available for length correction) */
+ln_s()	/* returns (double)score * ln(200)/ln(length) */
+
+proc_hist_r()	/* regression fit z=1, also used by z=5 */
+alloc_hist()	/* set up arrays for score vs length  */
+free_hist()	/* free them */
+inithist()	/* calls alloc_hist(), sets some other globals */
+addhist()	/* update score vs length hist */ 
+inithistz()	/* initialize displayed (z-score) histogram hist[]*/
+addhistz()	/* add to hist[], increment num_db_entries */
+addhistzp()	/* add to hist[], don't change num_db_entries */
+prune_hist()	/* remove scores from score vs length */
+update_db_size() /* num_db_entries = nlib - ntrimmed */
+set_db_size()	/* -Z db_size; set nlib */
+
+double z_to_E()	/* z-value to E() (extreme value distribution */
+double zs_to_E() /* z-score (mu=50, sigma=10) to E() */
+double zs_to_bit() /* z-score to BLAST2 bit score */
+
+float E_to_zs()	/* E() to z-score */
+double zs_to_Ec() /* z-score to num_db_entries*(1 - P(zs))
+
+summ_stats()	/* put stat summary in string */
+vsort()		/* not used, does shell sort */
+calc_ks()	/* does Kolmogorov-Smirnoff calculation for histogram */
+================
+dropnfa.c	/* contains worker comparison functions */
+
+init_work()	/* set up struct f_struct fstr - hash query */
+get_param()	/* actually prints parameters to string */
+close_work()	/* clean up fstr */
+do_work()	/* do a comparison */
+  do_fasta()	/* use the fasta() function */
+    savemax()	/* save the best region during scan */
+    spam()	/* rescan the best regions */
+    sconn()	/* try to connect the best regions for initn */
+    kssort()	/* sort by score */
+    kpsort()	/* sort by left end pos */
+    shscore()	/* best self-score */
+    dmatch()	/* do band alignment for opt score */
+      FLOCAL_ALIGN()	/* fast band score-only */
+
+do_opt()	/* do an "optimized comparison */
+
+do_walign()	/* put an alignment into res[] for calcons() */
+  sw_walign()	/* SW alignment driver - find boundaries */
+    ALIGN()	/* actual alignment driver */
+      nw_align()	/* recursive global alignment */
+    CHECK_SCORE()	/* double check */
+    DISPLAY()	/* Miller's display routine */
+
+  bd_walign()	/* band alignment driver for DNA */
+    LOCAL_ALIGN()	/* find boundaries in band */
+    B_ALIGN()		/* produce band alignment  */
+      bg_align()	/* recursively produce band alignment */
+    BCHECK_SCORE()	/* double check */
+
+calcons()	/* calculate ascii alignment  seqc0,seqc1 from res[]*/
+calc_id()	/* calculate % identity with no alignment */
+================
+nxgetaa.c
+
+getseq()	/* get a query (prot or DNA) */
+getntseq()	/* get a nt query (for fastx, fasty) */
+gettitle()	/* get a description */
+
+int openlib()	/* open a library */
+closelib()	/* close it */
+GETLIB()	/* get a fasta-format next library entry */
+RANLIB()	/* jump back in, get description, position for getlib() */
+
+lgetlib()	/* get a Genbank flat-file format next library entry */
+lranlib()	/* jump back in, get description, position for lgetlib() */
+
+pgetlib()	/* get CODATA format next library entry */
+pranlib()	/* jump back in, get description, position for lgetlib() */
+
+egetlib()	/* get EMBL format next library entry */
+eranlib()	/* jump back in, get description, position for egetlib() */
+
+igetlib()	/* get Intelligenetics format next library entry */
+iranlib()	/* jump back in, get description, position for igetlib() */
+
+vgetlib()	/* get PIR/VMS/GCG format next library entry */
+vranlib()	/* jump back in, get description, position for vgetlib() */
+
+gcg_getlib()	/* get GCG binary format next library entry */
+gcg_ranlib()	/* jump back in, get description, position for gcg_getlib() */
+
+int scanseq()	/* find %ACGT */
+
+revcomp()	/* do reverse complement */
+sf_sort()	/* sort superfamily numbers */
+================
+c_dispn.c
+
+discons()	/* display alignment from seqc0, seqc1 */
+disgraph()	/* display graphical representation, -m 4,5 */
+aancpy()	/* copy a binary sequence to ascii */
+r_memcpy()
+l_memcpy()
+iidex()		/* lookup ascii-encoding of residue */
+cal_coord()	/* calculate coordinates of alignment ends */
+
+================
+ncbl_lib.c
+
+ncbl_openlib()
+ncbl_closelib()
+ncbl_getliba()
+ncbl_getlibn()
+ncbl_ranlib()
+src_ulong_read()
+src_long_read()
+src_char_read()
+src_fstr_read()
+newname()
+
+================
+lib_sel.c
+
+getlnames()
+libchoice()
+libselect()
+addfile()
+ulindex()
+
+================
+nrand48.c
+
+irand(time)	/* initialize random number generator */
+nrand(n)	/* get a number 0 - n */
+
+================
+url_subs.c
+
+void do_url1()	/* setup search links */
+
diff --git a/doc/fasta_guide.bib b/doc/fasta_guide.bib
new file mode 100644
index 0000000..4d722e6
--- /dev/null
+++ b/doc/fasta_guide.bib
@@ -0,0 +1,265 @@
+
+ at article( WRP881, 
+  author =	{W. R. Pearson
+		and D. J. Lipman}, 
+  title =	{Improved tools for biological sequence comparison}, 
+  year =	1988, 
+  journal =	{Proc. Natl. Acad. Sci. USA}, 
+  volume =	85, 
+  pages =	{2444-2448}, 
+  annote =	88190088 )
+
+ at incollection( day787, 
+  author =	{M. Dayhoff
+		and R. M. Schwartz
+		and B. C. Orcutt}, 
+  title =	{A model of evolutionary change in proteins}, 
+  year =	1978, 
+  volume =	{5, supplement 3}, 
+  booktitle =	{Atlas of Protein Sequence and Structure}, 
+  editor =	{M. Dayhoff}, 
+  publisher =	{National Biomedical Research Foundation}, 
+  pages =	{345-352}, 
+  address =	{Silver Spring, MD} )
+
+ at article( WRP960, 
+  author =	{W. R. Pearson}, 
+  title =	{Effective protein sequence comparison}, 
+  year =	1996, 
+  journal =	{Methods Enzymol.}, 
+  volume =	266, 
+  pages =	{227-258}, 
+  annote =	97422296 )
+
+ at article( wrp971, 
+  author =	{Z. Zhang
+		and W. R. Pearson
+		and W. Miller}, 
+  title =	{Aligning a {DNA} sequence with a protein sequence}, 
+  year =	1997, 
+  journal =	{J. Computational Biology}, 
+  volume =	4, 
+  pages =	{339-349}, 
+  annote =	97422296 )
+
+ at article( wrp973, 
+  author =	{W. R. Pearson
+		and T. C. Wood
+		and Z. Zhang
+		and W. Miller}, 
+  title =	{Comparison of {DNA} sequences with protein sequences}, 
+  year =	1997, 
+  journal =	{Genomics}, 
+  volume =	46, 
+  pages =	{24-36}, 
+  annote =	98066759 )
+
+ at article( wrp951, 
+  author =	{W. R. Pearson}, 
+  title =	{
+Comparison of methods for searching protein sequence databases}, 
+  year =	1995, 
+  journal =	{Prot. Sci.}, 
+  volume =	4, 
+  pages =	{1145-1160}, 
+  annote =	97422296 )
+
+ at article( wrp981, 
+  author =	{W. R. Pearson}, 
+  title =	{
+Empirical statistical estimates for sequence similarity searches}, 
+  year =	1998, 
+  journal =	{J. Mol. Biol.}, 
+  volume =	276, 
+  pages =	{71-84}, 
+  annote =	98179551 )
+
+ at article( tay925, 
+  author =	{D. T. Jones
+		and W. R. Taylor
+		and J. M. Thornton}, 
+  title =	{
+The rapid generation of mutation data matrices from protein sequences}, 
+  year =	1992, 
+  journal =	{Comp. Appl. Biosci.}, 
+  volume =	8, 
+  pages =	{275-282} )
+
+ at article( woo935, 
+  author =	{J. C. Wootton
+		and S. Federhen}, 
+  title =	{
+Statistics of local complexity in amino acid sequences and sequence databases}, 
+  year =	1993, 
+  journal =	{Comput. Chem.}, 
+  volume =	17, 
+  pages =	{149-163} )
+
+ at article( alt960, 
+  author =	{S. F. Altschul
+		and W. Gish}, 
+  title =	{Local alignment statistics}, 
+  year =	1996, 
+  journal =	{Methods Enzymol.}, 
+  volume =	266, 
+  pages =	{460-480} )
+
+ at article( alt915, 
+  author =	{S. F. Altschul}, 
+  title =	{
+Amino acid substitution matrices from an information theoretic
+perspective}, 
+  year =	1991, 
+  journal =	{J. Mol. Biol.}, 
+  volume =	219, 
+  pages =	{555-65} )
+
+ at article( WAT815,
+  author =	{T. F. Smith
+		and M. S. Waterman}, 
+  title =	{Identification of common molecular subsequences}, 
+  year =	1981, 
+  journal =	{J. Mol. Biol.}, 
+  volume =	147, 
+  pages =	{195-197}, 
+  annote =	81267385 )
+
+ at article( wrp021, 
+  author =	{A. J. Mackey
+		and T. A. J. Haystead
+		and W. R. Pearson}, 
+  title =	{
+Getting more From Less: Algorithms for Rapid Protein Identification
+with Multiple Short Peptide Sequences}, 
+  year =	2002, 
+  journal =	{Mol. Cell. Proteomics}, 
+  volume =	1, 
+  pages =	{139-147} )
+
+ at article( farrar2007, 
+  author =	{M. Farrar}, 
+  title =	{
+Striped {S}mith-{W}aterman speeds database searches six times over
+   other SIMD implementations}, 
+  year =	2007, 
+  journal =	{Bioinformatics}, 
+  volume =	23, 
+  pages =	{156-161}, 
+  annote =	17110365 )
+
+ at article{kan023,
+author = {Maricel G Kann and Richard A Goldstein}, 
+journal = {Proteins},
+title = {Performance evaluation of a new algorithm for the detection of remote homologs with sequence comparison},
+pages = {367--76},
+volume = {48},
+year = {2002},
+month = {Aug},
+pmid = {12112703}
+}
+
+ at article{Muller2002,
+author = {Tobias Muller and Rainer Spang and Martin Vingron}, 
+journal = {Mol Biol Evol},
+title = {Estimating amino acid substitution models: a comparison of Dayhoff's estimator, the resolvent approach and a maximum likelihood method},
+pages = {8--13},
+volume = {19},
+year = {2002},
+date-added = {2011-03-14 22:15:08 -0400},
+date-modified = {2011-03-14 22:15:08 -0400},
+pmid = {11752185},
+URL = {http://mbe.oxfordjournals.org/content/19/1/8.long}
+}
+
+ at article( hen929, 
+  author =	{S. Henikoff
+		and J. G. Henikoff}, 
+  title =	{Amino acid substitutions matrices from protein blocks}, 
+  year =	1992, 
+  journal =	{Proc. Natl. Acad. Sci. USA}, 
+  volume =	89, 
+  pages =	{10915-10919} )
+
+ at article( WAT875, 
+  author =	{M. S. Waterman
+		and M. Eggert}, 
+  title =	{
+A new algorithm for best subsequences alignment with application to
+t{RNA}-r{RNA} comparisons}, 
+  year =	1987, 
+  journal =	{J. Mol. Biol.}, 
+  volume =	197, 
+  pages =	{723-728} )
+
+ at article( mil908, 
+  author =	{X. Huang
+		and R. C. Hardison
+		and W. Miller}, 
+  title =	{A space-efficient algorithm for local similarities}, 
+  year =	1990, 
+  journal =	{Comp. Appl. Biosci.}, 
+  volume =	6, 
+  pages =	{373-381} )
+
+
+ at article( uniprot11, 
+  author =	{UniProt Consortium}, 
+  title =	{
+Ongoing and future developments at the Universal Protein Resource.}, 
+  year =	2011, 
+  journal =	{Nucleic Acids Res}, 
+  volume =	39, 
+  pages =	{D214-D219}, 
+  annote =	21051339 )
+
+ at article( wrp022, 
+  author =	{J. T. Reese
+		and W. R. Pearson}, 
+  title =	{
+Empirical determination of effective gap penalties for sequence
+comparison}, 
+  year =	2002, 
+  journal =	{Bioinformatics}, 
+  volume =	18, 
+  pages =	{1500-1507}, 
+  annote =	22310732 )
+
+ at article( rog003, 
+  author =	{T. Rognes
+		and E. Seeberg}, 
+  title =	{
+Six-fold speed-up of Smith-Waterman sequence database searches using
+parallel processing on common microprocessors}, 
+  year =	2000, 
+  journal =	{Bioinformatics}, 
+  volume =	16, 
+  pages =	{699-706}, 
+  annote =	20551510 )
+
+ at article( mot921, 
+  author =	{R. Mott}, 
+  title =	{
+Maximum-likelihood estimation of the statistical distribution of
+Smith-Waterman local sequence similarity scores}, 
+  year =	1992, 
+  journal =	{Bull. Math. Biol.}, 
+  volume =	54, 
+  pages =	{59-75} )
+
+ at article( woz974, 
+  author =	{A. Wozniak}, 
+  title =	{
+Using video-oriented instructions to speed up sequence comparison}, 
+  year =	1997, 
+  journal =	{Comput Appl Biosci}, 
+  volume =	13, 
+  pages =	{145-150}, 
+  annote =	97292450 )
+
+ at article{wrp136,
+	Author = {L. J. Mills and W. R. Pearson},
+	Journal = {Bioinformatics},
+	Pages = {3007-3013},
+	Title = {Adjusting scoring matrices to correct overextended alignments.},
+	Volume = 29,
+	Year = 2013}
diff --git a/doc/fasta_guide.fg1.tex b/doc/fasta_guide.fg1.tex
new file mode 100644
index 0000000..bb223d9
--- /dev/null
+++ b/doc/fasta_guide.fg1.tex
@@ -0,0 +1,60 @@
+\begin{footnotesize}
+\begin{quote}
+\begin{verbatim}
+# ../bin/ssearch36 -q -w 80 ../seq/mgstm1.aa a
+SSEARCH performs a Smith-Waterman search
+ version 36.3.6 June, 2013(preload9)
+Please cite:
+ T. F. Smith and M. S. Waterman, (1981) J. Mol. Biol. 147:195-197; 
+ W.R. Pearson (1991) Genomics 11:635-650
+Query: ../seq/mgstm1.aa
+  1>>>mGSTM1 mouse glutathione transferase M1 - 218 aa
+Library: PIR1 Annotated (rel. 66) 
+  5121825 residues in 13143 sequences
+
+Statistics:  Expectation_n fit: rho(ln(x))= 7.4729+/-0.000484; mu= 2.0282+/- 0.027
+ mean_var=56.9651+/-10.957, 0's: 9 Z-trim(119.4): 17  B-trim: 67 in 1/62
+ Lambda= 0.169930
+ statistics sampled from 13135 (13143) to 13135 sequences
+Algorithm: Smith-Waterman (SSE2, Michael Farrar 2006) (7.2 Nov 2010)
+Parameters: BL50 matrix (15:-5), open/ext: -10/-2
+ Scan time:  3.820
+The best scores are:                                                    s-w bits E(13143)
+sp|P08010|GSTM2_RAT Glutathione S-transferase Mu 2; GST 4-4; GT  ( 218) 1248 312.0 7.7e-86
+sp|P04906|GSTP1_RAT Glutathione S-transferase P; Chain 7; GST -  ( 210)  344 90.4 3.8e-19
+sp|P00502|GSTA1_RAT Glutathione S-transferase alpha-1; GST 1-1   ( 222)  237 64.1 3.2e-11
+sp|P14942|GSTA4_RAT Glutathione S-transferase alpha-4; GST 8-8   ( 222)  179 49.9 6.1e-07
+sp|P12653|GSTF1_MAIZE Glutathione S-transferase 1; GST class-pi  ( 214)  120 35.4   0.013
+sp|P04907|GSTF3_MAIZE Glutathione S-transferase 3; GST class-pi  ( 222)  115 34.2   0.032
+sp|P20432|GSTT1_DROME Glutathione S-transferase 1-1; DDT-dehydr  ( 209)  100 30.5    0.38
+sp|P11277|SPTB1_HUMAN Spectrin beta chain, erythrocytic; Beta-   (2137)  108 31.6     1.9
+... (alignments deleted) ...
+>>sp|P14942|GSTA4_RAT Glutathione S-transferase alpha-4; GST 8-8;             (222 aa)
+ s-w opt: 179  Z-score: 231.0  bits: 49.9 E(13143): 6.1e-07
+Smith-Waterman score: 179; 25.6% identity (54.5% similar) in 211 aa overlap (5-207:7-207)
+                 10        20        30        40        50          60         70     
+mGSTM    MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKF-KLG-LDFPNLPYL-IDGSHKITQSNA
+             : :.. ::  . :: ::  .   ..:         .: ...   ::. : : : : ..: . :::   .::. :
+sp|P14 MEVKPKLYYFQGRGRMESIRWLLATAGVEFEE---------EFLETREQYEKLQKDGCLLFGQVPLVEIDG-MLLTQTRA
+               10        20        30                 40        50        60         70
+          80        90       100       110       120       130       140            150
+mGSTM  ILRYLARKHHLDGETEEERIRADIVENQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEF--LGK---RPWFAG
+       :: ::: :..: :.  .::.: :.  . ..:  :..:   ..   ::..   : .   : . .  :  . :   . ...:
+sp|P14 ILSYLAAKYNLYGKDLKERVRIDMYADGTQDLMMMIIGAPFKAPQEKEESLALAVKRAKNRYFPVFEKILKDHGEAFLVG
+               80        90       100       110       120       130       140       150
+              160       170       180       190       200       210            
+mGSTM  DKVTYVDFLAYDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSSRYIATPIFSKMAHWSNK    
+       ......:.   . . . .      :. :: :. : .:. ..  :. ... .     :               
+sp|P14 NQLSWADIQLLEAILMVEEVSAPVLSDFPLLQAFKTRISNIPTIKKFLQPGSQRKPPPDGHYVDVVRTVLKF
+              160       170       180       190       200       210       220  
+... (alignments deleted) ...
+218 residues in 1 query   sequences
+5121825 residues in 13143 library sequences
+ Tcomplib [36.3.6 May, 2013(preload9)] (4 proc in memory [0G])
+ start: Thu Jun  6 11:23:28 2013 done: Thu Jun  6 11:23:30 2013
+ Total Scan time:  3.820 Total Display time:  0.130
+Function used was SSEARCH [36.3.6 May, 2013(preload9)]
+\end{verbatim}
+\end{quote}
+\end{footnotesize}
+\vspace{-4.0ex}
diff --git a/doc/fasta_guide.fg2.tex b/doc/fasta_guide.fg2.tex
new file mode 100644
index 0000000..d636b7d
--- /dev/null
+++ b/doc/fasta_guide.fg2.tex
@@ -0,0 +1,25 @@
+\begin{footnotesize}
+\begin{verbatim}
+>>GST26_SCHMA Glutathione S-transferase class-mu  (218 aa)
+ initn: 422 init1: 359 opt: 407  Z-score: 836.8  bits: 162.0 E(437847): 3.7e-39
+Smith-Waterman score: 451; 42.4% identity (73.4% similar) in 203 aa overlap (6-208:6-203)
+
+               10        20        30        40        50        60        70        80
+mGSTM1 MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKLGLDFPNLPYLIDGSHKITQSNAILRYL
+            :::.:.::..: :.:::. ...:.:. :   :  ..:   : :.::::::.:::::: :::. :.::: ::.::.
+GST26_ MAPKFGYWKVKGLVQPTRlllehleetyeeRAY---DRNEIDA--WSNDKFKLGLEFPNLPYYIDGDFKLTQSMAIIRYI
+               10        20        30           40          50        60        70     
+
+               90       100       110       120       130       140       150       160
+mGSTM1 ARKHHLDGETEEERIRADIVENQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAGDKVTYVDFLA
+       : ::.. :   .:: . ...:. :.: :: .. ..:: ..:  : .::. .: ..:.... :... .. :. ::. ::. 
+GST26_ ADKHNMLGACPKERAEISMLEGAVLDIRMGVLRIAYNKEYETLKVDFLNKLPGRLKMFEDRLSNKTYLNGNCVTHPDFML
+          80        90       100       110       120       130       140       150     
+
+              170       180       190       200       210             
+mGSTM1 YDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSSRYIATPIFSKMAHWSNK     
+       :: ::    .. .::. ::.: .:   .: : .:. :..:::::  :.               
+GST26_ YDALDVVLYMDSQCLNEFPKLVSFKKCIEDLPQIKNYLNSSRYIKWPLQGWDATFGGGDTPPK
+         160       170       180       190       200       210        
+\end{verbatim}
+\end{footnotesize}
diff --git a/doc/fasta_guide.pdf b/doc/fasta_guide.pdf
new file mode 100644
index 0000000..0e04c65
Binary files /dev/null and b/doc/fasta_guide.pdf differ
diff --git a/doc/fasta_guide.tex b/doc/fasta_guide.tex
new file mode 100644
index 0000000..b88568d
--- /dev/null
+++ b/doc/fasta_guide.tex
@@ -0,0 +1,2115 @@
+\documentclass[11pt]{article}
+\RequirePackage{helvet}
+\newcommand{\CURRENT}{fasta-36.3.8}
+\renewcommand{\familydefault}{\sfdefault}
+\usepackage{cite}
+\usepackage{url}
+\usepackage{fancyhdr}
+\usepackage{url}
+\usepackage{needspace}
+\usepackage{longtable}
+\addtolength{\oddsidemargin}{-0.75in}
+\addtolength{\evensidemargin}{-0.75in}
+\addtolength{\textwidth}{1.5in}
+\addtolength{\topmargin}{-0.75in}
+\addtolength{\textheight}{1.5in}
+
+\lhead{\CURRENT}
+\rhead{\today}
+\cfoot{\thepage}
+\pagestyle{fancy}
+\newcommand{\FASTA}{\texttt{FASTA }}
+\hyphenation{Swiss-Prot}
+
+\parskip 0.5ex
+
+\begin{document}
+%% .he 'FASTA3.DOC''Release 3.6, March, 2011'
+
+\section*{\Large{The FASTA program package}}
+
+%% \begin{quote}
+%% \emph{This document is undergoing extensive revision. 
+%% Some parts of it are old; while still accurate, they are less
+%% relevant.  Recent improvements to the FASTA programs are not well
+%% documented, particularly with respect to options for selecting
+%% databases and database sequences.}
+%% \end{quote}
+
+\section*{Introduction}
+
+This documentation describes the version 36 of the FASTA program
+package (see W. R. Pearson and D. J. Lipman (1988), ``Improved Tools
+for Biological Sequence Analysis'', PNAS 85:2444-2448,\cite{wrp881}
+W. R. Pearson (1996) ``Effective protein sequence comparison''
+Meth. Enzymol. 266:227-258 \cite{wrp960}; and Pearson et. al. (1997)
+Genomics 46:24-36 \cite{wrp973}.  Version 3 of the FASTA packages
+contains many programs for searching DNA and protein databases and for
+evaluating statistical significance from randomly shuffled sequences.
+
+This document is divided into four sections: (1) A summary overview of
+the programs in the FASTA3 package; (2) A guide to using the FASTA
+programs; (3) A guide to installing the programs and
+databases. Section (4) provides answers to some Frequently Asked
+Questions (FAQs).  In addition to this document, the
+\texttt{changes\_v36.html}, \texttt{changes\_v35.html} and
+\texttt{changes\_v34.html} files list functional changes to the programs.
+The \texttt{readme.v30..v36} files provide a more complete revision
+history of the programs, including bug fixes.
+
+The programs are easy to use; if you are using them on a machine that
+is administered by someone else, you can focus on sections (1) and (2)
+to learn how to use the programs.  If you are installing the programs
+on your own machine, you will need to read section (3) carefully.
+
+\emph{FASTA and BLAST} -- FASTA and BLAST have the same goal: to
+identify statistically significant sequence similarity that can be
+used to infer homology.  The FASTA programs offer several advantages
+over BLAST:
+\begin{enumerate}
+\item
+Rigorous algorithms unavailable in BLAST (Table I).  Smith-Waterman
+(\texttt{ssearch36}), global: global (\texttt{ggsearch36}), and
+global:local (\texttt{glsearch36}) programs are available, and these
+programs can be used with \texttt{psiblast} PSSM profiles.
+\item
+Better translated alignments. \texttt{fastx36}, \texttt{fasty36},
+\texttt{tfastx36}, and \texttt{tfastx36} allow frame-shifts in
+alignments; frame-shifts are treated like gap-penalties, alignments
+tend to be longer in error-prone reads.
+\item
+Better statistics. BLAST calculates very accurate statistics for
+protein:protein alignments, but its model-based strategy is less
+robust for translated-DNA:protein and DNA:DNA scores.  FASTA uses an
+empirical estimation strategy, and now provides both search-based, and
+high-scoring shuffle-based statistics (\texttt{-z 21}).
+\item
+More flexible library sequence formats.  The FASTA programs can read
+FASTA, NCBI/ \texttt{formatdb}, and several other sequence formats, and can
+directly query MySQL and Postgres databases. The programs offer
+several strategies for specifying subsets of databases.
+\item
+A very efficient threaded implementation.  The FASTA programs are
+fully threaded; both similarity scores and alignments can be
+calculated in parallel on multi-core hardware.  On multi-core
+machines, FASTA can be faster than BLAST while producing better
+alignments with more accurate statistical estimates.
+\item
+  A powerful annotation facility.  The FASTA programs can incorporate
+  functional site annotations, site variation, and domain-based
+  sub-alignment scoring using annotations from sequence libraries.
+  Scripts are available to download site and domain information from
+  Uniprot, and domain information from Pfam and CATH.  Domain
+  information can be used to sub-divide alignment scores to ensure
+  that the aligned domain is homologous.
+
+\end{enumerate}
+
+In addition, the FASTA programs from \texttt{fasta-36.3.4} on provide
+an option to produce very BLAST-like output (\texttt{-m BB}), so that
+analysis pipelines require minimal modification.
+
+\section{An overview of the \texttt{FASTA} programs}
+
+\begin{table}
+\caption{\label{table1} Comparison programs in the FASTA36 package}
+\vspace{0.5ex}
+\begin{tabular}{ p{0.8in} p{0.6in} p{4.6 in}}
+\hline \\[-1.0ex]
+FASTA \mbox{program} & BLAST equiv. & Description \\[1.2ex]
+\hline \\[-1.0ex]
+\texttt{fasta36} & \texttt{blastp}/ \texttt{blastn} &
+Compare a protein sequence to a protein sequence 
+database or a DNA sequence to a DNA sequence database using the FASTA 
+algorithm \cite{wrp881,wrp960}.  Search speed and selectivity are 
+controlled with the \emph{ktup}(wordsize) parameter.  For protein 
+comparisons, \emph{ktup} = 2 by default; \emph{ktup} =1 is more sensitive 
+but slower.  For DNA comparisons, \emph{ktup}=6 by default; \emph{ktup}=3 or 
+\emph{ktup}=4 provides higher sensitivity.\\[1 ex]
+
+\texttt{ssearch36} &  & Compare a protein sequence to a protein sequence 
+database or a DNA sequence to a DNA sequence database using the 
+Smith-Waterman algorithm \cite{wat815}. \texttt{ssearch36} uses SSE2
+acceleration, and is only 2 - 5X slower than \texttt{fasta36} \cite{farrar2007}. \\[1 ex]
+
+\texttt{ggsearch36}/ \texttt{glsearch36} &  & Compare a protein sequence to a protein sequence 
+database or a DNA sequence to a DNA sequence database using
+an optimal global:global (\texttt{ggsearch36}) or global:local
+(\texttt{glsearch36}) algorithm.\\[1 ex]
+
+\texttt{fastx36}/ \texttt{fasty36} & \texttt{blastx} &
+Compare a DNA sequence to a protein
+sequence database, by comparing the translated DNA sequence in three
+frames and allowing gaps and frameshifts.  \texttt{fastx36} uses a
+simpler, faster algorithm for alignments that allows frameshifts only
+between codons; \texttt{fasty36} is slower but can produce better alignments
+because frameshifts are allowed within codons \cite{wrp971}.\\[1 ex]
+
+\texttt{tfastx36}/ \texttt{tfasty36}& \texttt{tblastn} &
+Compare a protein sequence to a DNA sequence
+database, calculating similarities with frameshifts to the forward and
+reverse orientations  \cite{wrp971}.\\[1 ex]
+
+\texttt{fastf36/ tfastf36} &  &
+Compares an ordered peptide mixture, as would be obtained by
+Edman degradation of a CNBr cleavage of a protein, against a protein
+(\texttt{fastf}) or DNA (\texttt{tfastf}) database \cite{wrp021}.\\[1 ex]
+
+\texttt{fasts36/ tfasts36} &  &
+Compares set of short peptide fragments, as would be obtained
+from mass-spec. analysis of a protein, against a
+protein (\texttt{fasts}) or DNA (\texttt{tfasts}) database \cite{wrp021}.\\[1 ex]
+
+\texttt{lalign36} & & Calculate multiple, non-intersecting alignments
+using the sim2 implementation of the Waterman-Eggert
+algorithm\cite{wat875} developed by Xiaoqui Huang and Web
+Miller\cite{mil908}.  Statistical estimates are calculated from
+Smith-Waterman scores of shuffled sequences. \\[1 ex]
+
+\hline \\
+\end{tabular}
+\end{table}
+
+Although there are a large number of programs in this package, they
+belong to three groups: (1) Traditional similarity searching programs:
+\texttt{fasta36}, \texttt{fastx36}, \texttt{fasty36},
+\texttt{tfastx36}, \texttt{tfasty36}, \texttt{ssearch36},
+\texttt{ggsearch36}, and \texttt{glsearch36}; (2) Programs for
+searching with short fragments: \texttt{fasts36}, \texttt{fastf36},
+\texttt{tfasts36}, \texttt{tfastf36}, and \texttt{fastm36}; (3) A
+program for finding non-overlapping local alignments: \texttt{lalign36}.
+Programs that start with \texttt{fast} search protein databases, while
+\texttt{tfast} programs search translated DNA databases.  Table I
+gives a brief description of the programs.
+
+In addition, there are several programs included. \texttt{map\_db} is
+used to index FASTA format sequence databases for more efficient
+scanning. \texttt{scripts/lav2plt.pl} can plot the \texttt{.lav}
+files produced by \texttt{lalign -m 11} as postscript
+(\texttt{lav2plt.pl --dev ps}) or SVG (\texttt{lav2plt.pl --dev svg}) output.
+
+\section{Using the FASTA Package}
+\subsection{Introduction/Overview}
+
+All the FASTA sequence comparison programs use similar command line
+options and arguments.  The simplest command line arguments are (in
+order): the name of a query sequence file, a library file, and
+(possibly) the \emph{ktup} parameter.  If command line options are
+provided, they \emph{must} precede the standard query-file and
+library-file arguments. Thus:
+\begin{quote}
+\texttt{fasta36 -s BP62 query.file library.file}
+\end{quote}
+will compare the sequences in \texttt{query.file} with those in
+\texttt{library.file} using the \texttt{BLOSUM62} scoring matrix with
+BLASTP gap penalties (\texttt{-11/-1}).
+
+The program can also be run by typing:
+\begin{quote}
+\texttt{fasta36 -I}
+\end{quote}
+which presents the ``classic'' interative mode (this was
+the default behavior before version \texttt{36.3.4}).
+In interactive mode,
+you will be prompted for: (1) the name of the test sequence file; (2)
+the name of the library file; (3) whether you want ktup = 1 or 2. (
+1 -- 6 for DNA sequences).
+
+Current versions of the FASTA programs expect a query file and library, if you simply type ``\texttt{fasta36}'', you will see a short help message:
+\begin{footnotesize}
+\begin{quote}
+\begin{verbatim}
+% ssearch36
+USAGE
+ ssearch36 [-options] query_file library_file
+ ssearch36 -help for a complete option list
+
+DESCRIPTION
+ SSEARCH performs a Smith-Waterman search
+ version: 36.3.4 Mar, 2011
+
+COMMON OPTIONS (options must precede query_file library_file)
+ -s:  [BL50] scoring matrix;
+ -f:  [-10] gap-open penalty;
+ -g:  [-2] gap-extension penalty;
+ -S   filter lowercase (seg) residues;
+ -b:  high scores reported (limited by -E by default);
+ -d:  number of alignments shown (limited by -E by default);
+ -I   interactive mode;
+\end{verbatim}
+\end{quote}
+\end{footnotesize}
+``\texttt{fasta36 -help}'' (or any of the other program names in Table
+I) provides complete listing of the options available for the program
+and their default values.
+
+The package includes several test files.  To check to make certain
+that everything is working, you can try:
+\begin{quote}
+\begin{verbatim}
+fasta36  ../seq/musplfm.aa ../seq/prot_test.lib
+or
+tfastx36 ../seq/mgstm1.aa ../seq/gst.nlib
+\end{verbatim}
+\end{quote}
+
+\subsection{Sequence files}
+
+The \texttt{fasta36} programs can read query and library files in many
+standard formats (Section \ref{fastlibs}). The default file format for query and library files
+-- the format that will be used if no additional file format
+information is provided -- is \texttt{FASTA} format. Like
+\texttt{BLAST}, version 36 can compare a query file with multiple
+query sequences to a sequence database, performing an independent
+search with each sequence in the query file.
+
+FASTA format files consist of a description line, beginning
+with a '$>$' character, followed by the sequence itself:
+\begin{quote}
+\begin{verbatim}
+>sequence name and description 1
+A F A S Y T .... actual sequence.
+F S S       .... second line of sequence.
+>sequence name and description 2
+PMILTYV ... sequence 2
+\end{verbatim}
+\end{quote}
+All of the characters of the description line are read, and special
+characters can be used to indicate additional information about the
+sequence. In general, non-amino-acid/non-nucleotide sequences in the
+sequence lines are ignored.
+
+FASTA format files from major sequence distributors, like the NCBI and
+EBI, have specially formatted description lines, e.g.:\\
+\indent
+\texttt{
+>gi|54321|ref|np\_12345| example NCBI refseq sequence\\
+}
+or\\
+\indent
+\texttt{
+>sw:gstm1\_human P01234 glutathione transferase GSTM1 - human\\
+}
+
+Several sample test files are included with the FASTA distribution:
+\texttt{seq/*.aa} and \texttt{seq/*.seq}, as well as two small sequence
+libraries, \texttt{seq/prot\_test.lib} and \texttt{seq/gst.nlib}.
+
+You can build your own library by concatenating several sequence
+files.  Just be sure that each sequence is preceded by a line
+beginning with a '\texttt{>}' followed by a sequence name/description.  Sequences
+entered with word processors should use a ``text'' mode, e.g. ``Save as
+text'' with MS-WORD, with end of line characters and no special
+formatting characters in the file.  The FASTA program cannot read
+Microsoft Word .DOC files, or rich text (.RTF) files; query and
+library sequence files should contain only sequence descriptions,
+sequences, and end-of-line characters.
+
+\subsection{Running the programs}
+As mentioned earlier, the FASTA programs can be run either
+interactively, by typing the name of a FASTA program (and possibly
+command line options), followed by \texttt{-I} (\texttt{fasta36 -I})
+or from the command line, entering command line options, and the
+query and library file names. For searches of large databases that
+may take several minutes (or longer), it is more convenient
+to run searches from the command line, e.g.:
+\begin{quote}
+\begin{verbatim}
+fasta36 query.file library.file > output.file
+\end{verbatim}
+\end{quote}
+The command line shown above could be typed in a Unix or MacOSX
+terminal window, or from the MS-Windows command line interface
+(command.exe).  The command line syntax shown above works for all
+the FASTA programs, e.g.:
+\begin{quote}
+\begin{verbatim}
+lalign36 mchu.aa mchu.aa > mchu.laln
+fastx36 mgstm1.seq prot_test.lseg > mgstm1.fx_out
+ssearch36 mgstm1.aa xurtg.aa > mgstm1_xurtg.ss
+\end{verbatim}
+\end{quote}
+
+\emph{Command line options} -- The FASTA programs provide a variety of
+command line options that modify the default scoring matrix
+(\texttt{-sBL62}) and gap penalties (\texttt{-f -11}, \texttt{-g -1}), other
+algorithm parameters, the output options (\texttt{-E 0.1}, \texttt{-d 20},
+\texttt{-m 9i}), and statistical procedures (\texttt{z -2}).  A complete
+list of command line options is shown near the end of this document.
+Unlike the \texttt{BLAST} programs, all \texttt{FASTA} command line options
+must precede the query file name and library file name (and there are
+no command line options available to specify the query and library
+file names).  Thus, you should type:
+\begin{quote}
+\begin{verbatim}
+ssearch36 -s BL62 -f -11 -g -1 query.file library.file > output.file
+\end{verbatim}
+\end{quote}
+If you include \texttt{-I} as one of the options, you can provide
+command line options (e.g. to change the scoring matrix or gap
+penalties) without a query file or library file, and the program will
+use the options but prompt for the necessary files .
+
+\subsection{Interpreting the results}
+
+Fig. \ref{ssearch_run} shows the output from a typical FASTA program
+(\texttt{ssearch36}).  The output file can be
+viewed as four parts: (a) the initial command line and description of
+the query sequence used (mgstm1.aa, 218 aa) and library (PIR1, 13,143
+entries); (b) a description of the search statistics, algorithm
+(Smith-Waterman, SSE2 accelerated), and search parameters (BLOSUM50
+matrix, gap penalties: -10 to open a gap, -2 for each residue in a
+gap); (c) a list of high scoring library sequences, descriptions, similarity scores, and statistical significance; (d) the alignments that produced the scores.
+
+\begin{figure}
+\include{fasta_guide.fg1}
+\caption{\label{ssearch_run}\texttt{ssearch36} results}
+\vspace{1.0ex}
+Comparison of \texttt{seq/mgstm1.aa} against a small protein database
+(\texttt{pir1.lseg}). Some high-scoring sequences and all but one
+alignment were removed to reduce the output size.
+\end{figure}
+
+\subsubsection{Identifying homologs}
+In the description section (which starts: \texttt{The best scores
+  are:}), four numbers after the description of each library sequence
+are shown: (i) (in parentheses) the length of the library sequence;
+(ii) the raw Smith-Waterman score for the alignment (\texttt{s-w}; for
+the \texttt{fasta36}, \texttt{[t]fast[x,y]36} programs, this column
+would be labeled \texttt{opt}, for the \emph{opt}imized -- banded
+Smith-Waterman -- score), (iii) the \emph{bit} score, and (iv) the
+expectation (E()), or statistical significance, of the alignment
+score.  The E()-value depends on the size of the database searched, in
+this case, 13,143 sequences, so the database size is given at the top
+of the list.
+
+The bit score is equivalent to a BLAST bit score; together with the
+length of query and library sequences, it can be used to calculate the
+significance of the alignment.\footnote{$E(D) = D m n 2^{-b}$,
+where $D$ is the number of sequences in the database, $m, n$ are the
+lengths of the two sequences, and $b$ is the bit score.}  Bit scores
+are convenient because they provide a matrix independent score that
+can be compared with other searches performed with other matrices and
+gap penalties against other databases.  However, the E()-value, or
+expectation, provides the most direct measure of the statistical
+significance of the match.
+
+In this example, the \texttt{GSTP1\_RAT}, \texttt{GSTA1\_RAT}, and
+\texttt{GSTA4\_RAT} proteins share strong significant similarity
+(better than $E() < 6.1 \times 10^{-7}$ ), while the
+\texttt{GSTF1\_MAIZE}, \texttt{GSTF3\_MAIZE}, and
+\texttt{GSTT1\_DROME} sequences do not share significant similarity
+($E() < 0.001$).  However, \texttt{GSTF1\_MAIZE},
+\texttt{GSTF3\_MAIZE}, and \texttt{GSTT1\_DROME} are all glutathione
+transferase homologs, they simply do not share statistically
+significant similarity with this particular \texttt{mGSTM1} query.
+Statistically significant sequence similarity scores \emph{can} be
+used to infer \emph{homology} (common ancestry), but non-significant
+scores \emph{cannot} be used to infer \emph{non-homology}.
+
+While percent identity is often used to characterize the quality of an
+alignment and the likelihood that it reflects homology, the E()-value
+is a much more reliable value for homology infernence (once homology is
+established, the percent identity is much more useful for estimating
+evolutionary distance).  Often sequences that share less than 30\%
+identity will share very significant similarity (in the example above
+\texttt{mgstm1.aa} and \texttt{GSTA4\_RAT}, with E() $<$ 6.1E-07 are
+25.6\% identical).  The expectation value captures information about
+conservative replacements, identities, and alignment length to provide
+a \emph{single} value that captures the significance of the alignment.
+
+For protein searches, library sequences with E()-values $<$ 0.001 for
+searches of a 10,000 entry protein database are almost always
+homologous. Some sequences with E()-values from 1 - 10 may also be
+related, but unrelated sequences ( 1--10 per search) will have scores
+in this range as well.
+
+E()-values $<$ 0.001 can reliably be used to infer homology, assuming
+that the statistical estimates are accurate.  The two most common
+causes of statistical problems are low-complexity regions and
+amino-acid composition bias.  Low-complexity regions are can be
+identified using the \texttt{pseg} program \cite{woo935}, and filtered
+out using the \texttt{-S} option. Composition bias rarely produces
+highly-signficiant E()-values, but can cause unrelated sequences to
+have E()-values between 0.01 and 0.001. The FASTA programs offer two
+shuffle-based strategies for evaluating composition bias; calculating
+similarity scores for random sequences with the same length and amino
+acid composition (\texttt{-z 11} $..$ \texttt{16}),\footnote{Random
+  shuffles are performed for pairwise alignments and \texttt{lalign36}
+  by default.} and calculating statistical estimates derived from
+shuffles of the high-scoring sequences (\texttt{-z 21, 22, 24, 25,
+  26}).
+
+When \texttt{-z 21 .. 26} shuffles are performed, the FASTA36 programs
+present two E()-values in the list of high scoring sequences and the
+alignments; the traditional one based on the library search, and a
+second \texttt{E2()} value, based on the shuffles of the high scoring
+sequences.  \texttt{-z 21 .. 26} shuffles are most useful for
+evaluating the significance of translated-DNA:protein searches like
+\texttt{fastx36}.  Out-of-frame translations can produce cryptic low
+complexity regions, which are most apparent when the high-scoring
+sequences are shuffled.  \texttt{-z 21} shuffles are more efficient
+than individual sequence shuffles, because the set of high scoring
+sequences is shuffled 500 -- 1,000 times, rather than 500 shuffles for
+each of 50 -- 100 high scoring library sequences.  It is almost as
+effective, because homologous sequences share similar amino-acid
+composition.
+
+The statistical routines assume that the library contains a large
+sample of unrelated sequences.  If the library contains fewer than
+500 sequences (\texttt{MAX\_RSTATS}), then the library sequences are
+shuffled to produce 500 random scores, from which lambda and K
+statistical parameters are estimated. If the library contains a large
+number of \emph{related} sequences, then the statistical parameters
+should be estimated by using the \texttt{-z 11-15}, options.
+\texttt{-z} options greater than 10 calculate a shuffled similarity
+score for each library sequence, in addition to the unshuffled score,
+and estimate the statistical parameters from the scores of the
+shuffled sequences.
+
+\subsubsection{Looking at alignments}
+
+The description section described above contains the critical
+information for inferring homology, the \texttt{E()}-value.  The
+alignment section shows the actual alignments that produced the
+similarity score and statistical estimates.  In
+Fig. \ref{ssearch_run}, the alignment display reports the percent
+identity, percent similarity (number of aligned residues with BLOSUM50
+values $\ge$ 0), and the boundaries of the alignment.  Note that for
+the \texttt{ssearch36} and \texttt{fasta36}, the alignment shown can
+include residues that are not part of the best local alignment
+(e.g. residues 1--5 and 207--218 in \texttt{mGSTM1} in
+Fig. \ref{ssearch_run}).  The amount of additional sequence context
+shown is the alignment line length (60 residues, set by \texttt{-w
+  len}) divided by 2 by default, but can be adjusted with the
+\texttt{-W context} option.
+
+\begin{figure}
+\include{fasta_guide.fg2}
+\vspace{-3.0ex}
+\caption{\label{seg-aln}Alignment with \texttt{-S} filtered sequence}
+\end{figure}
+
+Fig. \ref{seg-aln} shows an example of a \texttt{fasta36} alignment
+produced using the \texttt{-S} option to filter out lower-case (low
+complexity) residues.  Here, additional scores (\texttt{initn},
+\texttt{init1} are shown, in addition to the \texttt{opt} score which
+is used to rank the sequences and calculate statistical significance.
+The \texttt{init1} score is the highest scoring alignment without
+gaps; \texttt{initn} is a score that combines consistent
+(non-overlapping) runs without gaps, and \texttt{opt} is the score of
+a banded Smith-Waterman of width 16 for \emph{ktup=2} that is applied
+to sequences with \texttt{initn} scores over the optimization
+threshold. In Fig. \ref{seg-aln},  the \texttt{init1} score is
+based on the long, un-gapped region from residues 46--208 in
+\texttt{mGSTM1}, while the \texttt{initn} and \texttt{opt} scores
+include the other regions joined by gaps.  The \texttt{initn} score is
+higher than the \texttt{opt} score, because it uses a simpler,
+length-independent, gap penalty.
+
+The \texttt{init1} and \texttt{initn} scores are shown for
+historical reasons, and can be used to illustrate the FASTA algorithm.
+But the \texttt{opt} score is the most reliable and sensitive score
+for inferring homology; the others can be ignored.
+
+For \texttt{fasta36} with proteins, the final alignment and score is
+calculated with the Smith-Waterman algorithm. For DNA sequences, a
+banded Smith-Waterman is used. (The \texttt{-A} option produces banded
+Smith-Waterman alignments for proteins, and full Smith-Waterman for
+DNA.)  In Fig. \ref{seg-aln}, the \texttt{opt} score and
+\texttt{Smith-Waterman} scores are calculated on exactly the same
+alignment, but the \texttt{opt} score excludes the contribution from
+the ``low-complexity'' region between 19--30 in \texttt{GST26\_SCHMA}.
+
+\subsubsection{Results without alignments}
+
+While sequence alignments are very informative, it is often not
+practical to examine all the statistically significant alignments in
+large-scale searches. The \texttt{-m 9} and \texttt{-m 8} options
+present summaries of each alignment (alignment boundaries, percent
+identity, and other information) in a much more compact
+form. \texttt{-m 9i} adds three columns to the summary report line,
+the fraction identical, fraction similar, and alignment length (in
+addition, variants are reported if they affect identity). \texttt{-m
+  9I} is similar to \texttt{-m 9i}, but also reports domain content if
+annotations are used.  \texttt{-m 9c} or \texttt{-m 9C} (see options
+below) provide a detailed encoding of the alignment, that allows it to
+be reconstructed. For large-scale searches, we routinely use
+\texttt{-m 8} with the \texttt{-d 0} option, which sets the number of
+alignments shown to 0 (thus none are shown).  Alternatively, the
+\texttt{-m 8} and \texttt{-m 8C} ouput options produce BLAST-format
+tabular results summaries (\texttt{-m 8C} provides commented tabular
+results).  \texttt{-m 8CC} adds an alignment CIGAR string and
+annotation string to the BLAST tabular format.  \texttt{-m BB}
+produces an output that mimics BLAST output (with alignments).
+
+\subsubsection{Alignments with annotations}
+
+The command line \texttt{-V} option (described below) causes the FASTA
+programs to ``decorate'' its sequence alignments with annotation
+information, such as functional sites, variants, and domain-based
+sub-alignment scores.  For example, a comparison of the
+\texttt{seqs/gstm1\_human.vaa} sequence with SwissProt using the
+\texttt{scripts/ann\_feats\_up\_www2.pl} script:
+\begin{footnotesize}
+\begin{quote}
+\begin{verbatim}
+ssearch36 -m 9i -V \!../scripts/ann_feats_up_www2.pl ../seq/gstm1\_human.vaa /slib/swissprot.fa
+\end{verbatim}
+\end{quote}
+\end{footnotesize}
+Produces the following addtional output:
+\begin{footnotesize}
+\begin{verbatim}
+Annotation symbols:
+ = : Active site
+ * : Modified
+ # : Substrate binding
+ ^ : Metal binding
+ @ : Site
+
+The best scores are:                       s-w bits E(458668) %_id  %_sim  alen
+sp|P09488.3|GSTM1_HUMAN Glutathione ( 218) 1500 375.2 2.4e-103 1.000 1.000  218 |Var: K173N;S210T;
+sp|Q03013.3|GSTM4_HUMAN Glutathione ( 218) 1375 344.5 4.4e-94 0.904 0.963  218 |Var: S2P;A160V;L208V;Y209F;...
+sp|Q5R8E8.3|GSTM2_PONAB Glutathione ( 218) 1310 328.5 2.9e-89 0.862 0.959  218
+sp|Q9TSM5.3|GSTM1_MACFA Glutathione ( 218) 1308 328.0   4e-89 0.858 0.954  218
+sp|Q9TSM4.3|GSTM2_MACFA Glutathione ( 218) 1307 327.7 4.8e-89 0.862 0.954  218
+sp|P28161.2|GSTM2_HUMAN Glutathione ( 218) 1306 327.5 5.7e-89 0.853 0.959  218 |Var: S173N;
+sp|P46439.3|GSTM5_HUMAN Glutathione ( 218) 1305 327.2 6.7e-89 0.876 0.954  218 |Var: L179P;
+\end{verbatim}
+\end{footnotesize}
+This summary of high scoring hits shows one of the effects of
+annotation---substitution of variant residues to increase the score.
+In this case, the query sequence \texttt{gstm1\_human.vaa} is a known
+variant of the canonical \texttt{GSTM1\_HUMAN}/\texttt{P09488} UniProt
+sequence.  Without the \texttt{-V} annotation option,
+\texttt{gstm1\_human.vaa} would be 99\% identical to
+\texttt{GSTM1\_HUMAN}, but because UniProt documents the variant
+residues in the feature table, the \texttt{K173N} and \texttt{S210T}
+substutions are made in the library (subject) sequence, producing a
+perfect match.
+
+In addition to the variant substitution shown above, the alignments
+provide a more complete view of the annotations available on the
+library (subject) proteins.  Below is the report for the
+\texttt{GSTM4\_HUMAN} alignment:
+\begin{footnotesize}
+\begin{quote}
+\begin{verbatim}
+>>sp|Q03013.3|GSTM4_HUMAN Glutathione S-trans             (218 aa)
+ Variant: 2P=2P : S2P : UniProtKB FT ID: VAR_033979
+ Site:@ : 7Y=7Y : Site: Glutathione binding
+ Site:@ : 46W=46W : Site: Glutathione binding
+ Site:@ : 59N=59N : Site: Glutathione binding
+ Site:@ : 72Q=72Q : Site: Glutathione binding
+ Region: 2-88:2-88 : score=599; bits=150.1; Id=0.989; Q=415.2 :  GST N-terminal :1
+ Site:# : 116Y=116Y : Substrate binding: Substrate
+ Variant: 160V=160V : A160V : UniProtKB FT ID: VAR_033980
+ Variant: 208V=208V : L208V : UniProtKB FT ID: VAR_049487
+ Region: 90-208:90-208 : score=699; bits=175.1; Id=0.833; Q=489.3 :  GST C-terminal :2
+ Variant: 209F=209F : Y209F : UniProtKB FT ID: VAR_049488
+ Variant: 211K=211K : R211K : UniProtKB FT ID: VAR_049489
+ Variant: 212M=212M : V212M : UniProtKB FT ID: VAR_049490
+ s-w opt: 1375  Z-score: 1823.2  bits: 344.5 E(458668): 4.4e-94
+Smith-Waterman score: 1375; 90.4% identity (96.3% similar) in 218 aa overlap (1-218:1-218)
+\end{verbatim}
+\end{quote}
+\end{footnotesize}
+This report can be broken into three parts: (1) information on
+variants, described above, (2) information on annotated sites, and (3)
+information on annotated domains.  For each annotated site, the
+coordinate and amino-acid resdiue in the query and library (subject)
+sequence is shown, as well as the conservation state (\texttt{=} in
+all these examples).  For annotated domains, the overall alignment
+score is broken into pieces, based on the boundaries of the domains.
+In this case, the full alignment extends from residues 1--218,
+producing a raw Smith-Waterman score of 1375 and a bit score of
+344.5.  The GST N-terminal domain is annotated from residue 2--88 on
+\texttt{GSTM4\_HUMAN}, and the 2--88 region of the alignment produces
+a score of 599 and bit score of 150.1.  This region is 98.9\%
+identical, and the probability of that similarity score is
+$10^{-41.52}$ (the Qvalue score is $-10 log_{10} P$).  The
+alignment associated with GST C-terminal domain is slightly less well
+conserved (83.3\% identical), but longer, so it produces a higher
+Smith-Waterman score (699), bit score (175.1) and Q-value.
+
+Sub-alignment scores can be used to identify cases of alignment
+over-extension \cite{wrp136}, where an alignment extends well beyond
+the homologous domain.  In this case, the homologous region will
+produce the vast majority of the score, and the non-homologous
+over-extension will produce very little score.  For example, when
+\texttt{SRC8\_HUMAN} aligns with \texttt{LASP1\_MOUSE}, the alignment
+spans 200 residues, but only about 49 of those residues, an
+\texttt{SH3} domain, are homologous:
+
+\begin{footnotesize}
+\begin{quote}
+\begin{verbatim}
+>>sp|Q61792.1|LASP1_MOUSE LIM and SH3 domain protein 1;  LASP-1;              (263 aa)
+ Region: 369-398:66-95 : score=20; bits=13.7; Id=0.200; Q=0.0 :  Nebulin_repeat  InterPro
+ Region: 400-434:97-131 : score=-8; bits=8.7; Id=0.150; Q=0.0 :  Nebulin_repeat  InterPro
+ Region: 435-499:132-203 : score=13; bits=11.4; Id=0.197; Q=0.0 :  NODOM :0
+ Region: 499-547:204-261 : score=124; bits=47.8; Id=0.474; Q=92.1 :  SH3  InterPro
+ s-w opt: 148  Z-score: 253.4  bits: 55.6 E(459565): 1.2e-06
+Smith-Waterman score: 159; 26.3% identity (55.6% similar) in 205 aa overlap (369-547:66-261)
+\end{verbatim}
+\end{quote}
+\end{footnotesize}
+The 150 aligned residues outside the \texttt{SH3} homology produce
+less than 20\% of the alignment score, while spannign to
+non-homologous Nebulin repeat domains.
+
+\subsection{Program Options}
+
+Command line options are available to change the scoring parameters
+and output display. Unlike the NCBI BLAST programs, command line
+options \emph{must} precede the query file name and library file name
+arguments.  To see the command-line options for a program and their
+defaults, type \texttt{program\_name -help}, e.g. \texttt{fasta36
+  -help} or \texttt{ssearch36 -help}. For a quick list of the most
+common options, just type the program name without any options
+(e.g. \texttt{fasta36$<$ret$>$}).
+
+\subsubsection{Command line options}
+\begin{description}
+\item[\texttt{-a}] (\texttt{fasta36}, \texttt{ssearch36},
+  \texttt{glsearch36}, \texttt{fasts36}) show both sequences in their
+  entirety.
+\item[\texttt{-A}] force Smith-Waterman alignments for
+  \texttt{fasta36} DNA sequences.  By default, only \texttt{fasta36}
+  protein sequence comparisons use Smith-Waterman alignments.
+  Likewise, for proteins, use band alignments (Smith-Waterman is used
+  by default).
+\item[\texttt{-b \#}] Number of sequence scores to be shown on output.
+  In the absence of this option, \texttt{fasta36} (and
+  \texttt{ssearch36}) display all library sequences obtaining
+  similarity scores with expectations less than the expectation (-E)
+  threshold, 10.0 for proteins, and 2.0 for DNA:DNA and
+  protein:translated DNA.  The \texttt{-b \#} option can limit the
+  display further.  There are two ``sub-modes'' of \texttt{-b}.
+  \texttt{-b =100} will force 100 high scores to be displayed,
+  regardless of the expectation (\texttt{-E}) threshold, and
+  \texttt{-b >1} will show at least \texttt{1}, but is otherwise
+  limited by \texttt{-E}.  Thus, \texttt{-b 10} will show \emph{no
+    more than} 10 results, limited by \texttt{-E}; \texttt{-b =10}
+  will always show \emph{exactly} \texttt{10} results, and \texttt{-b
+    >5} will show \emph{at least} \texttt{5} results, but could show
+  many more if more results have e\-values $\le$ \texttt{-E e\_cut}.
+\item[\texttt{-c \#,\#}] (\texttt{fasta36}, \texttt{[t]fast[x,y]36}
+  only) Fraction of alignments optimized (second value is fraction of
+  sequences joined). FASTA36 uses a statistical threshold strategy
+  that joins and optimizes only the fraction of the alignments with an
+  \texttt{initn} score expected \texttt{-c} times.  Thus, \texttt{-c
+    0.05} should optimize about 5\% of sequences.  The actual number
+  of sequences optimized (and joined) is displayed in the scoring
+  parameters line.  Thus:
+\begin{quote}
+\begin{verbatim}
+Parameters: BL50 matrix (15:-5), open/ext: -10/-2
+ ktup: 2, E-join: 1 (0.687), E-opt: 0.2 (0.294), width:  16
+\end{verbatim}
+\end{quote}
+reports that 20\% of the sequences in the database should have been
+band-optimized, and 29.4\% were. Reducing the \texttt{-c opt} fraction
+improves performance, but dropping the fraction below 0.02 can
+reduce the accuracy of the statistical estimates.
+
+\texttt{-c O} (letter 'O') sets the joining/optimization
+thresholds as they were prior to \texttt{fasta-36.3.3} (original thresholds). Positive
+values set the thresholds to specific score values, as was the case
+in older versions of \texttt{fasta}.
+
+\item[\texttt{-C}]
+length of the sequence name printed at the beginning of alignment
+lines (default 6 characters).
+\item[\texttt{-d \#}]
+Maximum number of alignments to be displayed (must be \texttt{<=} to the number of descriptions, \texttt{-b \#})
+\item[\texttt{-D}]
+  Provide some debugging output.  Used in conjunction
+  with the \texttt{-e expand\_script.sh}, the \texttt{link\_acc\_file}
+  and \texttt{link\_lib\_file} are not deleted during the run; so that
+  \texttt{expand\_script.sh} scripts can be tested.
+
+\item[\texttt{-e expand\_script.sh}]
+
+  Expand the set of sequences that
+  are aligned to beyond the set of sequences searched. When the
+  \texttt{-e expand\_script.sh} option is used, the
+  \texttt{expand\_script.sh} script is run after the initial search
+  scan but before the list of high-scoring sequences is displayed.
+  \texttt{expand\_script.sh} is given a single argument, the name of a
+  file that contains a list of accession strings (the text between the
+  \texttt{>} and the first space ('\textvisiblespace') character
+  followed by the E()-value for the sequence (separated by a
+  \texttt{<tab>} character), e.g.:
+\begin{quote}
+\begin{verbatim}
+gi|121719|sp|P08010|GSTM2_RAT<tab>2.69e-86
+gi|121746|sp|P09211|GSTP1_HUMAN<tab>1.51e-20
+gi|121749|sp|P04906|GSTP1_RAT<tab>1.16e-19
+gi|62822551|sp|P00502|GSTA1_RAT<tab>9.5e-12
+\end{verbatim}
+\end{quote}
+The script should produce a fasta-formatted list of additional
+sequences printed to \texttt{stdout}.  The script is run with the command:
+\begin{quote}
+\begin{verbatim}
+expand_script.sh link_acc.tmp_file > link_lib.tmp_file
+\end{verbatim}
+\end{quote}
+The sequences in \texttt{link\_lib.tmp\_file} (a temporary file name is
+actually used, and the file is deleted unless the \texttt{-D} option
+is used) are then compared and, if they are significant, included in
+the list of high scoring sequences and the alignments.  The expanded
+set of sequences does not change the database size or statisical
+parameters, it simply expands the set of high-scoring sequences.
+
+The \texttt{fasta36/misc} directory contains
+\texttt{expand\_uniref50.pl} that uses a mySQL table based on the
+\texttt{uniref50} clusters.  Using the script and the
+\texttt{uniref50} cluster information, one can search
+\texttt{uniref50.fasta}, but then expand the hits so that
+\texttt{uniprot} appears to be searched.
+
+\item[\texttt{-E e\_cut [e\_cut\_r]}] Limit the number of scores and
+  alignments shown based on the expected number of scores.  Used to
+  override the expectation value of 10.0 (protein:protein; 5.0
+  translated-DNA:protein; 2.0 DNA:DNA) used by default.  \texttt{-E
+    2.0} will show all library sequences with scores with an
+  expectation value $<=$ 2.0.  With \texttt{fasta-36}, a second
+  value, \texttt{e\_cut\_r} is available to limit the E()-values of
+  additional sequence alignments between the query and library
+  sequences.  If not given, the threshold is \texttt{e\_cut}/10.0.  If
+  given with a value $>$ 1.0, \texttt{e\_cut\_r} = \texttt{e\_cut} /
+  value; for a value $<$ 1.0, \texttt{e\_cut\_r} = value; If
+  \texttt{e\_cut\_r} $<$ 0, then the additional alignment option is
+  disabled.
+\item[\texttt{-f \#}]
+Gap open penalty (-10 by default for proteins,
+-12 for DNA, -12 for \texttt{[t]fast[xy]}).
+\item[\texttt{-F \#}]
+Limit the number of scores and alignments shown based on the expected
+number of scores. \texttt{-E \#} sets the highest E()-value shown; \texttt{-F \#} sets
+the lowest E()-value displayed. Thus, \mbox{\texttt{-F 0.0001}} will not show any matches or
+alignments with E() $<$ 0.0001.  This allows one to skip over close
+relationships to search for more distant relationships.
+\item[\texttt{-g \#}]
+Penalty per residue in a gap (-2 by default for proteins,
+-4 for DNA, -2 for \texttt{[t]fast[xy]}).  A single residue gap costs \texttt{f} $+$ \texttt{g}.
+\item[\texttt{-h}]
+Short help message. Help options with \texttt{':'}, e.g. \texttt{-s:},
+require an argument (\texttt{-s BP62}).  Defaults are shown in square
+brackets, e.g.: \texttt{-s:\ [BL50]}.
+\item[\texttt{-help}]
+Long help message
+\item[\texttt{-H}]
+Show histogram.
+\item[\texttt{-i}]
+DNA queries - search with reverse complement.  For 
+\texttt{tfastx36/y36}, search the reverse complement of the library sequence
+only (complement of \texttt{-3} option).
+\item[\texttt{-I}]
+Interactive mode (the default for versions older than \texttt{fasta-36.3.4}).
+\item[\texttt{-j \#}]
+Penalty for frameshift between codons (\texttt{[t]fastx36}, \texttt{[t]fasty36}) and within a codon (\texttt{fasty36}/ \texttt{tfasty36} only).
+\item[\texttt{-J}]
+(\texttt{lalign36} only) show the identity alignment (normally
+  suppressed, \texttt{-I} in versions before \texttt{fasta-36.3.4}).
+\item[\texttt{-k \#}]
+number of shuffles for statistical estimates from shuffling.
+\item[\texttt{-l file}]
+Location of library menu file (FASTLIBS).
+\item[\texttt{-L}]
+Display longer library sequence description.
+\item[\texttt{-M low-high}]
+Range of amino acid sequence lengths to be included in the search.
+\item[\texttt{-m \#}]
+Specify alignment type: 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, B, BB, ``F\# out\_file''
+\begin{small}
+\begin{verbatim}
+    -m 0        -m 1          -m 2          -m 3        -m 4
+MWRTCGPPYT   MWRTCGPPYT    MWRTCGPPYT                 MWRTCGPPYT
+::..:: :::     xx  X       ..KS..Y...    MWKSCGYPYT   ----------
+MWKSCGYPYT   MWKSCGYPYT
+\end{verbatim}
+\end{small}
+If the \texttt{-V '*@\%'} annotation option has been used, 
+annotations can be included in either the coordinate line (the default) or the
+middle alignment line (\texttt{-m 0M}, \texttt{-m 1M}), or both
+(\texttt{-m 0B}, \texttt{-m 1B}).  See \texttt{-V} for more details.
+
+\indent \texttt{-m 5}: a combination of \texttt{-m 4} and \texttt{-m
+  0}. \texttt{-m 6} provides \texttt{-m 5} plus HTML formatting.  In
+addition, independent \texttt{-m} options can be combined. Thus, one
+can use \texttt{-m 1 -m 6 -m 9}.
+
+\item[\texttt{-m 8}] provides BLAST tabular format output (a tab
+  delimited line with the query name, library name, percent identity,
+  and other alignment information). ``\texttt{-m 8C}'' provides the
+  additional information provided by the BLAST tabular format with
+  comment lines.  BLAST tabular format has been extended to include
+  either a CIGAR string alignment encoding (\texttt{-m 8CC} with BLAST
+  comments, \texttt{-m 8XC} without comments) and, if available, an
+  annotation encoding matching FASTA \texttt{-m 9C} output. All the
+  \texttt{-m 9c/C/d/D} encodings are available with BLAST tabular
+  output using \texttt{-m 8C[c/C/d/D]}.
+
+\item[\texttt{-m 9}] display alignment coordinates and scores with the
+  best score information.  \texttt{-m 9i} provides alignment length,
+  percent identity, and percent similarity only. \texttt{-m 9i} also
+  provides variation information if it improves the score. \texttt{-m
+    9I} provides both identity and domain information on the summary
+  line.
+
+ \texttt{ -m 9, -m 9c} and \texttt{-m 9C} extend the normal best score information:
+\begin{footnotesize}
+\begin{verbatim}
+The best scores are:                                      opt bits E(14548)
+XURTG4 glutathione transferase (EC 2.5.1.18) 4 -   ( 219) 1248 291.7 1.1e-79
+\end{verbatim}
+\end{footnotesize}
+
+to include the additional information (on the same line, separated by
+$<$tab$>$ characters):
+\begin{footnotesize}
+\begin{verbatim}
+%_id  %_gid   sw  alen  an0  ax0  pn0  px0  an1  ax1 pn1 px1 gapq gapl  fs
+0.771 0.771 1248  218    1  218    1  218    1  218    1  219   0   0   0
+\end{verbatim}
+\end{footnotesize}
+
+The first two values are fraction identical and fraction similar
+(score $\ge 0$), followed by the Smith-Waterman alignment score (\texttt{sw}), the
+alignment length (\texttt{alen}), and the coordinates of the beginning
+and end of the alignment in the query and target (library) sequences
+(\texttt{an0} beginning, \texttt{ax1} end in query; \texttt{an1}
+beginning, \texttt{ax1} end in target/library), and the coordinate
+system for the beginning and end of the query and target/library
+sequence (\texttt{pn0} is the displayed coordinate of the first
+residue of the query sequence, \texttt{px0} is the displayed
+coordinate of the last residue, \texttt{pn1},\texttt{px1} provide the
+coordinates for the target/library sequence). \texttt{gapq},
+\texttt{gapl} report the number of gaps in the query and library
+sequence; \texttt{fs} reports the number of frameshifts.
+
+\texttt{ -m 9c} provides additional information: an encoded alignment string.  For example, the alignment:
+\begin{footnotesize}
+\begin{verbatim}
+       10        20        30        40        50          60         70  
+GT8.7  NVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKL--GLDFPNLPYL-IDGSHKITQ
+       :.::  . :: ::  .   .:::         : .:    ::.:   .: : ..:.. :::  :..:
+XURTG  NARGRMECIRWLLAAAGVEFDEK---------FIQSPEDLEKLKKDGNLMFDQVPMVEIDG-MKLAQ
+               20        30                 40        50        60        
+\end{verbatim}
+\end{footnotesize}
+would be encoded:
+\begin{footnotesize}
+\texttt{=23+9=13-2=10-1=3+1=5}
+\end{footnotesize}.
+The numbers in the alignment encoding is with repect to the beginning
+of the alignment, not the sequences.  The beginning coordinate of the
+alignment is given earlier in the \texttt{-m 9c} line.  \texttt{-m 9C}
+provides the alignment encoding in CIGAR format:
+\begin{footnotesize}
+\texttt{28M9D13M2I10M1I3M1D5M}
+\end{footnotesize}.
+
+(June, 2014) The \texttt{-m 9c/C} option has been extended to
+\texttt{-m 9d/D}, which encodes the positions of mismatches as well as
+insertions and deletions.  For the example above, the \texttt{-m 9d}
+encoding would be:
+\begin{footnotesize}
+\texttt{=1x1=2x4=2x1=2x7=3-9=1x2=1x4=2x1=1x1+2x1=1x1=1x3=1x2+1=3-1x1=1x2=1}
+\end{footnotesize}
+while \texttt{-m 9D} would be:
+\begin{footnotesize}
+\texttt{1M1X2M4X2M1X2M7X3M9D1M2X1M4X2M1X1M1X2I1X1M1X1M3X1M2X1I3M1D1X1M2X1M}
+\end{footnotesize}
+\item[\texttt{-m 10}]
+a parseable format for use with other programs.
+\item[\texttt{-m 11}]
+Provide \texttt{lav}-like output (used by \texttt{lalign}) for graphical output.
+\begin{quote}
+\texttt{lalign36 -m 11 mchu.aa mchu.aa | lav2plt.pl --dev ps  > mchu\_laln.ps}
+\end{quote}
+Produces a postscript plot of the local alignments.  Likewise,
+\texttt{lav2plt.pl --dev svg} produces SVG output.
+
+\item[\texttt{-m BB}] Format output to mimic BLAST format.  \texttt{-m
+  B} formats alignments to look like BLAST alignments (Query/Sbjct),
+  but is FASTA output otherwise. \texttt{-mBB} imitates BLAST as much
+  as possible, and cannot be used with other \texttt{-m} options.
+
+\item[\texttt{-m "F\# out.file"}] Send an alternate result format to \texttt{out.file}.
+Normally, the \texttt{-m out\_fmt} option applies to the default output
+file, which is either \texttt{stdout}, or specified with \texttt{-O out\_file} (or within
+the program in interactive mode). With \texttt{-m F}, an output format can be
+associated with a separate output file, which will contain a complete
+FASTA program output.  Thus,
+\begin{quote}
+\begin{small}
+\begin{verbatim}
+  ssearch36 -m 9c -m "FBB blast.out" -m "F9c,10 m9c_10.out" query library
+\end{verbatim}
+\end{small}
+\end{quote}
+Sends the \texttt{-m 9c} output to \texttt{stdout}, but will also send
+\texttt{-m BB} output to the \texttt{blast.out} file, and \texttt{-m 9c -m
+  10} output to \texttt{m9\_c10.out}.  Consistent \texttt{-m out\_fmt}
+commands can be set to the same file by separating them with ','.
+Producing alternative format alignments in different files has little
+additional computational cost.
+
+Because a space (\textvisiblespace) is used to separate the output
+format (\texttt{-m}) values from the file name, the \texttt{-m F}
+argument must typically be surrounded by quotation marks (\texttt{"}).
+
+One of the shortcomings of this approach is that it affects only the
+output format, not the other options that modify the amount of output.
+Thus, if you specify \texttt{-E 0.001}; that expect threshold will be
+used for all the output files.  When a \texttt{-m} option does modify
+the output (e.g. \texttt{-m 8} sets \texttt{-d 0}), that modification
+is specific to the output file.
+
+\item[\texttt{-M low-high}]
+Include library sequences with lengths between low and
+high.
+\item[\texttt{-n}]
+Force the query sequence to be treated as a DNA sequence.  
+Useful when query sequences contain a large number of
+ambiguous residues, e.g. transcription factor binding sites.
+\item[\texttt{-N \#}]
+break long library sequences into blocks of \# residues.  Useful for
+bacterial genomes, which have only one sequence entry.  -N 2000 works
+well for well for bacterial genomes. (This option was required when
+FASTA only provided one alignment between the query and library
+sequence.  It is not as useful, now that multiple alignments are
+available.)
+
+\item[\texttt{-o off1,off2}]
+(Previously \texttt{-X}.) Specifies offsets for the beginning of the query and library sequence.
+For example, if you are comparing upstream regions for two genes, and
+the first sequence contains 500 nt of upstream sequence while the
+second contains 300 nt of upstream sequence, you might try:
+\begin{quote}
+\texttt{fasta -o "-500 -300" seq1.nt seq2.nt}
+\end{quote}
+If the \texttt{-o} option is not used, FASTA assumes numbering starts with 1.
+(You should double check to be certain the negative numbering works
+properly.)
+
+\item[\texttt{-O}] Send a copy of results to \texttt{filename}.
+  Helpful for environments without STDOUT, but should be avoided (use
+  \texttt{> filename} instead).
+
+\item[\texttt{-p}]
+Force query to be treated as protein sequence.
+
+\item[\texttt{-P PSSM\_file}]
+Specify a PSI-BLAST format PSSM (Position Specific Scoring Matrix)
+file.  \texttt{ssearch36}, \texttt{ggsearch36}, and
+\texttt{glsearch36} can use a PSSM file to improve the sensitivity of
+a search. The FASTA programs accept two PSSM file formats:\\[2ex]
+\begin{tabular}{l l l}
+\hline\\[-1.5ex]
+format & \texttt{blastpgp} & option \\[0.5ex]
+\hline\\[-1.5ex]
+0 & \texttt{blastpgp -C pssm.chk -u 0} & byte-encoded \\
+2 & \texttt{blastpgp -C pssm.asnb -u 2} & binary ASN.1 \\
+%  & \texttt{psiblast -out\_pssm\_text} \\[1.5ex]
+\hline\\[-0.5ex]
+\end{tabular}\\
+which can be specified after the file name, e.g.:
+\begin{quote}
+\texttt{ssearch36 -P 'pssm.asnb 2' pssm\_query.aa +sp+}
+\end{quote}
+Searches with a PSI-BLAST PSSM must still require a query sequence
+file, and the query sequence file must match the PSSM seed sequence.
+The format 0 byte-encoded PSSM is machine dependent; it must be
+created by \texttt{blastpgp} on the same architecture as
+\texttt{ssearch36}.  In general, you should use the binary ASN.1 (format 2) file.
+
+With the release of \texttt{NCBI-BLAST+}, \texttt{psiblast} replaces
+\texttt{blastpgp}, and \texttt{psiblast} does not produce the binary
+ASN.1 PSSM checkpoint data.  However, the text ASN.1 PSSM checkpoint
+file (produced with the \texttt{psiblast} option \texttt{-out\_pssm})
+can be converted to a binary ASN.1 format that \texttt{ssearch36} can
+read using the NCBI \texttt{datatool} program (available from
+\url{ftp://ftp.ncbi.nlm.nih.gov/toolbox/ncbi_tools++/BIN/CURRENT/datatool})
+together with
+\url{http://www.ncbi.nlm.nih.gov/data_specs/asn/NCBI_all.asn}. More
+information about \texttt{datatool} is available from
+\url{http://www.ncbi.nlm.nih.gov/data_specs/NCBI_data_conversion.html}.
+The NCBI BLASTP/PSI-BLAST website provides the same PSSM text ASN.1
+file with the downloads link.  A text ASN.1 PSSM file can be converted
+to a binary ASN.1 file using the command:
+\begin{quote}
+\texttt{datatool -m NCBI\_all.asn -v pssm.asn\_txt -e pssm.asnb}
+\end{quote}
+The \texttt{pssm.asnb} can then be used with
+\texttt{ssearch36} with the \texttt{-P 'pssm.asnb 2'}
+option shown above.
+
+\item[\texttt{-Q,-q}]
+Quiet - does not prompt for any input.  Writes scores and alignments
+to the terminal or standard output file (on by default, turned off
+with \texttt{-I}).
+\item[\texttt{-r +n/-m}]
+Specify match/mismatch scores for DNA comparisons.  The default is
+\texttt{+5/-4}. \texttt{+3/-2} can perform better in some cases.
+\item[\texttt{-R file}]
+Save a results summary line for every sequence in the sequence
+library.  The summary line includes the sequence identifier,
+superfamily number (if available) position 
+in the library, and the similarity scores calculated.  This option can
+be used to evaluate the sensitivity and selectivity of different
+search strategies \cite{wrp951,wrp981}.
+\item[\texttt{-s file}] Specify the scoring matrix file.
+  \texttt{fasta36} uses the same scoring matrice format as Blast.
+  Several scoring matrix files are included in the standard
+  distribution in the \texttt{data/} directory.  For protein
+  sequences: \texttt{codaa.mat} - based on minimum mutation matrix;
+  \texttt{idnaa.mat} - identity matrix; \texttt{pam250.mat} - the
+  PAM250 matrix; \cite{day787}, (\texttt{-s P250}), and
+  \texttt{pam120.mat} - a PAM120 matrix (\texttt{-s P120}).  The
+  default scoring matrix is BLOSUM50 (\texttt{-s BL50}). Other
+  matrices include a series of modern PAM-based matrices
+  \cite{tay925}: MDM40/\texttt{-s MD40}, MDM20/\texttt{-s MD20}, and
+  MDM10/\texttt{-s MD10}, and a selection from the BLOSUM series
+  \cite{hen929} BLOSUM50, 62, and 80/\texttt{-s BL50}, \texttt{-s
+    BL62}, \texttt{-s BL80}.  \texttt{-s BP62} sets the scoring matrix
+  to BLOSUM62 and the gap penalties to -11/-1, identical to
+  \texttt{BLASTP}.  In addition, the VTML160 matrix (\texttt{-s
+    VT160}) \cite{muller2002} and OPTIMA\_5 (\texttt{-s OPT5})
+  \cite{kan023} are available.
+
+If the scoring matrix is prefaced by a question mark,
+e.g. \texttt{?BP62}, then the scoring matrix is adjusted for each
+query to ensure that a 100\% identical match can produce a score of at
+least 40 bits.  This is designed for \texttt{fastx36} searches with
+potentially short DNA queries; A 120 nt DNA query can only produce a
+40 amino-acid alignment, which, with BLOSUM62 -11/-1, cannot produce
+more than 23 bits of score. A scoring matrix with a higher information
+content is required; in the set available by default, MD40, with 2.22
+bits/position, would be used.  For more information about alignment
+length and information content, see \cite{alt915}.
+
+\item[\texttt{-S}] Filter out lower-case characters in the query or
+  library sequences for the initial score calculation (used to filter
+  low-complexity -- \texttt{seg}-ed -- residues).  The \texttt{pseg}
+  program \cite{woo935} can be used to lower-case mask low complexity
+  regions in protein sequences. With the \texttt{-S} option, lower
+  case characters in the query or database sequences are treated as
+  \texttt{X}'s during the initial scan, but are treated as normal
+  residues during the final alignment display.  Since statistical
+  significance is calculated from the similarity score calculated
+  during the library search, the lower case residues do not contribute
+  to the score.  However, if a significant alignment contains low
+  complexity regions, the residues are shown (as lower
+  case characters, Fig. \ref{seg-aln}).
+
+The \texttt{pseg} program can be used to produce databases (or query
+sequences) with lower case residues indicating low complexity regions
+using the command:
+\begin{verbatim}
+pseg ./swissprot.fasta -z 1 -q > swissprot.lseg
+\end{verbatim}
+
+The \texttt{-S} option should always be used with \texttt{FASTX/Y} and
+\texttt{TFASTX/Y} because out-of-frame translations often generate
+low-complexity protein sequences.  However, only lower case characters
+in the protein sequence (or protein database) are masked; lower case
+DNA sequences are translated into upper case protein sequences, and
+not treated as low complexity by the translated alignment
+programs. (There is an option in the \texttt{Makefile},
+\texttt{-DDNALIB\_LC}, to enable preserving case in DNA sequences.)
+
+\item[\texttt{-t \#}]
+Translation table - fastx36, tfastx36, fasty36, and
+tfasty3 now support the BLAST translation tables.  See
+\url{http://www.ncbi.nih.gov/Taxonomy/Utils/wprintgc.cgi}.
+
+\texttt{-t t} or \texttt{-t t\#} enables the addition of
+an implicit termination codon to a protein:translated DNA match.  That
+is, each protein sequence implicitly ends with \texttt{*}, which
+matches the termination codes for the appropriate genetic code.
+\texttt{-t t\#} sets implicit termination and a different genetic
+code.
+\item[\texttt{-T \#}]
+set number of threads/workers.  Normally on a multi-core machine, the maximum
+number of processors/cores is used.
+\item[\texttt{-U}]
+Treat the query sequence an RNA sequence.  In addition to selecting a
+DNA/RNA alphabet, this option causes changes to the scoring matrix so
+that \texttt{G:A} , \texttt{T:C} or \texttt{U:C} are scored as \mbox{\texttt{G:G -3}}.
+\item[\texttt{-v \#}]
+Do window shuffles with the window size specified.
+\item[\texttt{-V str}] Specify annotation characters that can be
+  included (and will be ignored), in the query sequence file, but are
+  displayed in the alignments.  If a query file contains
+  \texttt{"ACVS*ITRLFT?"}, where \texttt{"*"} and \texttt{"?"}  are
+  used to indicate phosphorylation, giving the option \mbox{\texttt{-V
+      '*?'}}, the annotated characters in the query will (\texttt{S*},
+  \texttt{F?}) will be highlighted in the alignment (on the number
+  line). A \texttt{fasts36} alignment of \texttt{seq/ngts.aa} compared
+  to \texttt{seq/mgstm1.aa} with \texttt{-V '*?'} produces:
+\begin{footnotesize}
+\begin{verbatim}
+             *                10??                                 
+GT8.7     ILGYWN------------EYTDSSYDEKR----------------------------
+          ::::::            :::::::::::                            
+GT8.7  MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKLGLDFPNL
+               10        20        30        40        50        60
+\end{verbatim}
+\end{footnotesize}
+In addition to showing the alignments of post-translationally modified
+sites, the \texttt{-V} option can be used to highlight active sites in
+library sequences. In the \texttt{-m 9c} output, the state of the
+annotated sites is summarized when \texttt{-V} is used.
+
+(fasta-36.3.6 June 2012) The \texttt{-V} option has been extended to:
+(1) allow feature descriptions to be specified in a file,
+e.g. \texttt{-V =annot.defs} where \texttt{annot.defs} contains:
+\begin{footnotesize}
+\begin{verbatim}
+*:phosphorylation
+@:active site
+^:binding site
+\end{verbatim}
+\end{footnotesize}
+The annotation character is left of the  ':', the definition is on the
+right.  The \texttt{annot.defs} file can also be specified by setting
+the \texttt{FA\_ANNOT\_DEF} environment variable to the file name;
+
+(2) to include optional annotation file, e.g. \texttt{-V
+  '<features.annot'}, or script, e.g. \texttt{-V '!features.pl'} for
+library annotations and \texttt{-V 'q!features.pl'} for query annotations. (Some shells require \texttt{\textbackslash!features.pl}.) Similar to the library expansion script, the
+\texttt{features.pl} script is run against a temporary file containing
+the list of high scoring sequence accessions (the text before the
+first space), e.g.
+\begin{footnotesize}
+\begin{verbatim}
+gi|121735|sp|P09488.3|GSTM1_HUMAN
+gi|1170096|sp|Q03013.3|GSTM4_HUMAN
+gi|67461004|sp|Q5R8E8.3|GSTM2_PONAB
+...
+\end{verbatim}
+\end{footnotesize}
+The \texttt{features.pl} script then produces a file of annotations on
+those sequences, in the format:
+\begin{verbatim}
+>accession1
+position label value
+>accession2
+...
+\end{verbatim}
+For example:
+\begin{footnotesize}
+\begin{verbatim}
+>gi|121735|sp|P09488.3|GSTM1_HUMAN
+23	*	
+33	*	
+34	*	
+116	^	
+173	V	N
+210	V	T
+>gi|1170096|sp|Q03013.3|GSTM4_HUMAN
+2	V	P
+116	^	
+160	V	V
+208	V	V
+209	V	F
+211	V	K
+212	V	M
+>gi|67461004|sp|Q5R8E8.3|GSTM2_PONAB
+...
+\end{verbatim}
+\end{footnotesize}
+The same format is used for the \texttt{-V '<feature.annot'} file.
+
+The \texttt{V} label is special; it indicates that the feature is a
+variant residue and specifies the alternative residue in the label
+field.  Thus, \texttt{GSTM4\_HUMAN} can have a \texttt{M} at position
+2.  Unlike modification or active site annotations, variant residues
+can change the sequence of the library sequence if replacing the
+canonical library residue with the variant residue improves the score.
+Thus, without the \texttt{-V '!feature.pl'} script, the human
+\texttt{GSTM1B} variant with dbSNP:rs449856 would align to
+\texttt{GSTM1\_HUMAN} (\texttt{P04988}) like this (the \texttt{-m 1}
+format option was used to highlight differences) :
+\begin{footnotesize}
+\begin{verbatim}
+The best scores are:                                                          opt bits E(1)
+sp|P09488.3|GSTM1_HUMAN Glutathione S-transferase Mu 1; GST HB subuni  ( 218) 1490 335.9 3.7e-97
+
+>>sp|P09488.3|GSTM1_HUMAN Glutathione S-transferase Mu 1; GST HB s            (218 aa)
+ initn: 1490 init1: 1490 opt: 1490  Z-score: 1776.8  bits: 335.9 E(1): 3.7e-97
+Smith-Waterman score: 1490; 99.1% identity (100.0% similar) in 218 aa overlap (1-218:1-218)
+
+...
+              170       180       190       200       210        
+gtm1_h YDVLDLHRIFEPNCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPRPVFTKMAVWGNK
+                   x                                    x        
+sp|P09 YDVLDLHRIFEPKCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPRPVFSKMAVWGNK
+              170       180       190       200       210        
+\end{verbatim}
+\end{footnotesize}
+
+With a \texttt{-V '!feature.pl'} script to annotate the variants,
+the alignment becomes:
+\begin{footnotesize}
+\begin{verbatim}
+The best scores are:                                                          opt bits E(1)
+sp|P09488.3|GSTM1_HUMAN Glutathione S-transferase Mu 1; GST HB s       ( 218) 1500 338.1   8e-98
+
+>>sp|P09488.3|GSTM1_HUMAN Glutathione S-transferase Mu 1; GST HB s            (218 aa)
+ Variant: K173N;S210T;
+ initn: 1500 init1: 1500 opt: 1500  Z-score: 1788.7  bits: 338.1 E(1): 8e-98
+Smith-Waterman score: 1500; 100.0% identity (100.0% similar) in 218 aa overlap (1-218:1-218)
+...
+              170       180       190       200       210        
+gtm1_h YDVLDLHRIFEPNCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPRPVFTKMAVWGNK
+                                                                 
+sp|P09 YDVLDLHRIFEPNCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPRPVFTKMAVWGNK
+              170  V    180       190       200       21V        
+\end{verbatim}
+\end{footnotesize}
+In addition to removing the two differences as residues 173 and 210,
+which produces a 100\% identical alignment, inclusion of the variant
+library sequence also improves the raw similarity score and $E()$-value.
+An example script (\texttt{misc/up\_feats.pl}) that extracts
+annotations from a mysql database of Uniprot features is provided.
+
+If the annotation script produces lines beginning with '=', then these
+lines are taken as annotation definitions, similar to the
+\texttt{annot.defs} file described above.  Thus:
+\begin{footnotesize}
+\begin{verbatim}
+=*:phosphorylation
+=@:active site
+=^:binding site
+>gi|121735|sp|P09488.3|GSTM1_HUMAN
+23	*	
+33	*	
+34	*	
+116	^	
+173	V	N
+210	V	T
+\end{verbatim}
+\end{footnotesize}
+will produce the same annotation descriptions as the
+\texttt{annot.defs} file.
+
+Scripts to produce annotations are available in the \texttt{scripts/}
+directory as \texttt{scripts/ann\_feats*.pl}. Scripts with
+\texttt{www} in the name,
+e.g. \texttt{scripts/ann\_feats\_up\_www2.pl} and
+\texttt{scripts/ann\_pfam\_www.pl} download annotation information
+from Uniprot or Pfam web services, respectively.  Scripts lacking
+\texttt{www} require require a MySQL database that associates features
+or domains with sequence identifiers (accessions).  With \CURRENT,
+domain annotations are allows to overlap each other (which often
+happens in Pfam and UniProt); FASTA 36.3.6 did not support overlapping
+domains.  Scripts that can produce overlapping domain annotations have
+\texttt{\_e} in their names, but will produce non-overlapping domain
+annotations with the \texttt{--no-over} option. Thus:
+\texttt{scripts/ann\_pfam\_www\_e.pl --acc sp|P43553|ALR2\_YEAST}
+produces:
+\begin{quote}
+\begin{verbatim}
+>sp|P43553|ALR2_YEAST
+451  -  683   PF01544 :1
+667  -  799   PF01544 :1
+\end{verbatim}
+\end{quote}
+While \texttt{scripts/ann\_pfam\_www\_e.pl --acc --no-over
+  sp|P43553|ALR2\_YEAST} produces:
+\begin{quote}
+\begin{verbatim}
+>sp|P43553|ALR2_YEAST
+451  -  675  PF01544 :1
+676  -  799  PF01544 :1
+\end{verbatim}
+\end{quote}
+
+\item[\texttt{-w \#}] 
+  Display width value ($<$200). Sets the approximate width of the
+  high-score descriptions and the length of residue
+  alignments. \texttt{-w 60} by default.
+
+\item[\texttt{-W \#}] context length (default is 1/2 of line width -w)
+  for alignment, for programs like \texttt{fasta36} and
+  \texttt{ssearch36}, that provide additional sequence context.
+
+\item[\texttt{-X extended\_option}]
+A number of rarely used options are now only available as extended options:
+
+\begin{description}
+
+\item[\texttt{X1}] sort output by \texttt{init1} score (for
+  compatibility with FASTP; obsolete).
+
+\item[\texttt{XB}] (Previously \texttt{-B}.)  Show the z-score, rather
+  than the bit-score in the list of best scores (rarely used, provided
+  for backward compatibility).
+
+\item[\texttt{XI}] Modify rounding used in percent identity/percent
+  similarity display to ensure that sequences that have a mismatch are
+  not shown as 100.0\% identical.  Without this option, a single
+  mismatch in a 10,000 residue alignment would be shown as 100.0\%
+  identical; with this option, it would be shown as 99.9\%
+  identical.
+
+\item[\texttt{Xo}] (\texttt{fasta36}, \texttt{[t]fast[x/y]36} only)
+  (Previously \texttt{-o}.) Turn off the default \texttt{opt} score
+  calculation and sort results by \texttt{initn} scores (reduces
+  sensitivity and statistical accuracy, obsolete).
+
+\item[\texttt{XM}] The maximum amount of memory available for storing
+  the library in multi-sequence searches. The value is specified in
+  MBytes (\texttt{-XL16}) or GBytes (\texttt{-XL4G}) and can also be
+  set using the \texttt{LIB\_MEMK} environment variable
+  (\texttt{LIB\_MEMK=4G}).  Negative values remove the memory
+  restriction. By default (set as a compile-time option,
+  \texttt{-DMAX\_MEMK=2}), set to 2 GBytes in 32-bit environments and
+  12 GBytes in 64-bit environments.
+
+\item[\texttt{XN/XX}] Alter the treatment of N:N (DNA) or X:X
+  (protein) alignments for counts of identities and similarities. By
+  default the \texttt{FASTA} programs count N:N or X:X as identical,
+  but not similar, because their alignment scores are typically
+  negative. \texttt{-XNS}, \texttt{-XN+}, \texttt{-XXS}, and
+  \texttt{-XX+} treat N:N and X:X alignments as ``similar'' , even
+  though their alignment scores are negative, when calculating percent
+  similarity. \texttt{-XND}, \texttt{-XN-}, \texttt{-XXD}, and
+  \texttt{-XX-} treat N:N and X:X alignments as non-identical for
+  calculating percent identity.
+
+\item[\texttt{Xx}] (Previously \texttt{-x}) Specify the penalty for a
+  match to an \texttt{X}, and mismatch to \texttt{X}, independently of
+  the PAM matrix.  Particularly useful for \texttt{fastx3/fasty36},
+  where termination codons are encoded as \texttt{X}.  For example,
+  \texttt{-Xx=0,-1} scores an \texttt{X:X} match as 0, and
+  \texttt{X:not-X} as -1.
+
+\item[\texttt{Xy}] (Previously \texttt{-y}.) Set the width of the band
+  used for calculating "optimized" scores.  For proteins and ktup=2,
+  the width is 16.  For proteins with ktup=1, the width is 32 by
+  default.  For DNA the width is 16.
+
+\end{description}
+
+\item[\texttt{-z -1,0,1,2,3,4,5,6}]\hfill\\
+\texttt{-z -1} turns off statistical calculations. \texttt{z 0} estimates
+the significance of the match from the mean and standard deviation of
+the library scores, without correcting for library sequence length.
+\texttt{-z 1} (the default) uses a weighted regression of average score
+vs library sequence length; \texttt{-z 2} uses maximum likelihood
+estimates of $\lambda$
+and $K$; \texttt{-z 3} uses Altschul-Gish parameters \cite{alt960};
+\texttt{-z 4 - 5} uses two variations on the \texttt{-z 1}
+strategy. \texttt{-z 1} and \texttt{-z 2} are the best methods, in
+general.
+\item[\texttt{-z 11,12,14,15,16}]\hfill\\
+estimate the statistical parameters from shuffled copies of each
+library sequence.  This allows accurate statistics to be estimated for libraries comprised of a single protein family.
+
+\item[\texttt{-z 21,22,24,25,26}]\hfill\\
+estimate the statistical parameters from shuffled copies of the
+highest scoring sequences reported in the search.
+library sequence. This shuffling strategy is much more like
+\texttt{prss}, since the sequences shuffled share compositional
+similarity to the query.
+\item[\texttt{-Z db\_size}]
+sets the apparent size of the database to be used when calculating
+expectation E()-values.  If you searched a database with 1,000
+sequences, but would like to have the E()-values calculated in the
+context of a 100,000 sequence database, use \texttt{-Z 100000}.
+\item[\texttt{-3}]
+translate only three forward frames or search with only the forward
+strand (complement of \texttt{-i}).
+\end{description}
+
+Thus, to tell \texttt{fasta36} to align \texttt{seq1.aa} with \texttt{seq2.aa} showing the entirety of both sequences, with 80 characters per line, one would type:
+\begin{verbatim}
+fasta36 -w 80 -s BP62 -a seq1.aa seq2.aa
+\end{verbatim}
+The \texttt{-w 80} and \texttt{-a} options must precede the file
+names.  If you just enter the options on the command line followed by
+\texttt{-I}, the program will prompt for the file names.
+
+In addition, the FASTA programs can accept query sequence data from
+\texttt{STDIN}.  To specify that stdin be used as the query or library
+file, the file name should be specified as \texttt{@}.  Thus:
+\begin{quote}
+\texttt{cat query.aa | fasta36 @:25-75 /slib/swissprot }
+\end{quote}
+would take residues 25-75 from \texttt{query.aa} and search the
+\texttt{/slib/swissprot}.
+
+\subsubsection{Environment variables}
+
+FASTA allows virtually every option to be set on the command line
+(except the \emph{ktup}, which must be set as the third command line
+argument), but it is often convenient to set the \texttt{FASTLIBS}
+environment variable to specify the location of the \texttt{fastlibs}
+database description file.
+
+\texttt{FASTLIBS} -- \texttt{FASTLIBS}
+specifies the location of the file that contains the list of library
+descriptions, locations, and library types (see section on finding
+library files).
+
+\texttt{LIB\_MEMK} -- Set the maximum amount of memory (MBytes) to be
+available for library buffering (equivalent to \texttt{-XM\#}, see
+above).  By default, \texttt{2GB} is available on 32-bit systems
+(\texttt{LIB\_MEMK=2G}); \texttt{8GB} on 64-bit systems.
+
+\texttt{REF\_URL}, \texttt{SRCH\_URL} and \texttt{SRCH\_URL1} -- These
+environment variables are used in HTML mode (\texttt{-m 6}) to provide
+links from the sequence alignment (see the links at
+\url{http://fasta.bioch.virginia.edu/fasta_www2/}). \texttt{REF\_URL}
+is associated with the \texttt{Entrez Lookup} link; \texttt{SRCH\_URL}
+with the \texttt{Re-search database} link, and \texttt{SRCH\_URL1}
+with the \texttt{General re-search} link.  In each case, the text
+corresponds to a HTML URL, but with positions containing the
+\texttt{\%s} or \texttt{\%ld} (for numbers) part of a 'C'
+\texttt{sprintf()} call for specific variables. \texttt{REF\_URL} uses
+the database (\texttt{protein} or \texttt{nucleotide}), together with
+a query term (typically the \texttt{gi} number). \texttt{SRCH\_URL}
+and \texttt{SRCH\_URL1} use \texttt{db}, \texttt{query} (\texttt{gi},
+\texttt{pgm} (\texttt{fa}, \texttt{ss}, \texttt{fx}, etc.), and
+\texttt{start}, \texttt{stop}, and \texttt{n1} (library sequence
+length), where \texttt{start} and \texttt{stop} are the boundaries of
+the alignment, for sub-sequence searches.  The values of these
+environment variables are used with \texttt{sprintf} to build a new
+URL that is linked in the output.
+
+\texttt{TMP\_DIR} -- Location (if defined) of the temporary files used
+by the \texttt{-e expand\_script.sh} option.
+
+In addition, environment variables can be used inside both the
+\texttt{fastlibs} file and in the \texttt{@db.nam} files of file
+names. The \texttt{fasta36/conf/fast\_libs\_e.www} file, included with
+the distribution, shows an example, as do the descriptions of file of
+file names files shown below. Whenever a word of the form
+\texttt{\$\{WORD\}} is found in \texttt{fastlibs} or a file of file
+names, the \texttt{\$\{WORD\}} environment variable is expanded and
+inserted in the string.  Thus, if \texttt{<\$\{SLIB\}/blast\_dbs/}
+describes where a list of files will be found and \texttt{\$\{SLIB\}}
+is \texttt{"/seqdata"}, then the resulting substitution yields:
+\texttt{</seqdata/blast\_dbs/}.
+
+\section{Installing FASTA and the sequence databases}
+
+\subsection{Obtaining/preparing the sequence libraries}
+
+The FASTA program package does not include any protein or DNA sequence
+libraries.  Protein and DNA sequence databases are available via
+anonymous FTP from the NCBI (\url{ftp://ftp.ncbi.nih.gov/blast/db},
+\url{ftp://ftp.ncbi.nih.gov/blast/db}), UniProt
+(\url{ftp://ftp.uniprot.org/pub/databases/uniprot}), and the EBI
+(\texttt{ftp.ebi.ac.uk/pub/databases}).
+
+\emph{Protein Sequence Databases} -- Protein sequence databases are
+available from the NCBI, UniProt, and the EBI.  The NCBI provides a
+``raw'' database, \texttt{nr}, and a well-curated, less redundant
+database, \texttt{refseq\_protein}, and a copy of the very well
+annotated \texttt{swissprot} database. Protein sequence databases can
+also be downloaded from UniProt and the EBI; both sites provide the
+same UniProt\cite{uniprot11} database.
+
+Protein libraries, particularly those used for translated-DNA:protein
+comparisons with \texttt{fastx36} or \texttt{fasty36}, show be scanned
+to remove low-complexity regions.  Matches between low complexity
+regions can violate the composition assumptions used by the FASTA
+statistical estimates. The \texttt{pseg} program (\cite{woo935},
+\url{ftp://ftp.ncbi.nih.gov/pub/seg/pseg}) can be used to lower-case
+low complexity regions, which then can be ignored during the initial
+database search by using the \texttt{-S} option.  To lower-case low
+complexity regions, run the \texttt{pseg} program against the protein sequence database:
+\begin{quote}
+\begin{verbatim}
+pseg /seqdata/swissprot.fa -z 1 -q > /seqdata/swissprot.lseg
+\end{verbatim}
+\end{quote}
+And then you can run most FASTA programs with \texttt{-S}:
+\begin{quote}
+\begin{verbatim}
+ssearch36 -S mgstm1.aa /seqdata/swissprot.lseg
+\end{verbatim}
+\end{quote}
+
+Fig. \ref{seg-aln} shows the effect of including the \texttt{-S}
+option with lower-cased low-complexity sequences.  The \texttt{opt}
+score (407), which is used to sort the results and calculate
+statistics, is lower than the Smith-Waterman score (451), even though
+exactly the same residues are aligned for each score. The \texttt{opt}
+score excludes residues 19-30, because they were marked as
+low-complexity by \texttt{pseg}; thus they are shown as lower-case.
+The Smith-Waterman score includes the contribution from that part of
+the alignment.
+
+Out-of-frame translated DNA sequences often produce low-complexity
+regions \cite{wrp973}, so it is particularly important to avoid
+low-complexity alignments when using \texttt{fastx36} and
+\texttt{fasty36}
+
+\subsection{Searching taxonomic subsets}
+
+Because increasing database size reduces search sensitivity (an
+alignment with an $E()$-value of $0.001$ in a search of a 100,000
+entry database will have an $E()$-value of 0.1, not significant, if
+found in a database of 10,000,000 sequences), it is much more
+effective to search smaller, less redundant databases (you can always
+search the larger database later).  Thus, the \texttt{refseq\_protein}
+database from the NCBI is preferred over \texttt{nr}; even better are
+databases that reflect a limited phylogenetic range
+(e.g. \texttt{refseq\_human} for vertebrate sequences).
+
+While the NCBI provides organism-specific \texttt{refseq} subsets on
+their FTP site, they can be difficult to find.  Alternatively, you can
+use the NCBI \texttt{Entrez} web site to download a list of
+\texttt{gi} numbers specific to a particular organism or taxonomic
+range. The FASTA programs can search a subset of a large sequence
+database that is specified by a list of \texttt{gi} numbers by using
+library format 10.  For example, given a list of \texttt{gi} numbers
+for the human proteins in \texttt{swissprot.lseg}, the file
+\texttt{sp\_human.db}, with the content:
+\begin{quote}
+\begin{verbatim}
+<${SLIB}/swissprot.lseg 0:2 4|
+3121763
+51701705
+7404340
+205831112
+74735515
+...
+\end{verbatim}
+\end{quote}
+could be used to search the human subset of
+\texttt{swissprot.lseg}. The \texttt{gi} numbers for the SwissProt
+entries begin with the second line. The first line specifies the
+location of the file where the sequences containing the \texttt{gi}
+numbers can be found (\texttt{\$\{SLIB\}/swissprot.lseg}, the
+\texttt{libtype} of that file (\texttt{0:fasta}), the character offset
+to the beginning of the sequence identifier in that file (\texttt{2}),
+the identifier type (\texttt{4}), and the character
+that separates the fields in the FASTA descriptor (\texttt{|}).  The
+identifier type can take four formats:
+
+\begin{tabular}{l l}
+\hline\\[-1.5ex]
+1 & ordered accession strings  (letters or numbers)\\
+2 & ordered numbers (digits only) \\
+3 & un-ordered accession strings \\
+4 & un-ordered numbers \\
+\hline\\
+\end{tabular}\\
+(Ordered accession strings/numbers are ordered in both the library and the subset file.)
+
+Thus, given the \texttt{0:2 4|} specification above, the line:
+\begin{quote}
+\texttt{>gi|3121763|sp|O15143.3|ARC1B\_HUMAN Actin-related protein 2/3 ...}
+\end{quote}
+would be parsed, looking for an number starting at column 4 (the first
+column is numbered 0), and ending with \texttt{|}. The order of
+sequences in the library do not have to correspond to the order in the
+\texttt{sp\_human.db} file (un-ordered). Given a the
+\texttt{sp\_human.db} file, a file \texttt{swissprot.lseg} in the
+directory specified by the environment variable \texttt{\$\{SLIB\}},
+and a command of the form:
+\begin{quote}
+\texttt{fasta36 -S mgstm1.aa 'sp\_human.db 10'}
+\end{quote}
+Would use the \texttt{sp\_human.db} file to search the subset of
+\texttt{swissprot.lseg} that contained the specified \texttt{gi}
+numbers.
+
+\subsection{DNA sequence libraries}
+
+Because of the large size of DNA databases, you will probably want to
+keep DNA databases in only one format.  The FASTA3 programs that
+search DNA databases --- \texttt{fasta36}, \texttt{fastm36}, and
+\texttt{tfastx/y36} --- can read DNA databases in Genbank flatfile (not
+ASN.1), FASTA, and BLAST2.0 (\texttt{formatdb}) formats, as well as
+EMBL format.  BLAST2.0 format is preferred for DNA sequence libraries,
+because the files are considerably more compact than GenBank format.
+The NCBI does not provide software for converting from Genbank flat
+files to Blast2.0 DNA databases, but you can use the Blast
+\texttt{formatdb} program to convert ASN.1 formatted Genbank files,
+which are available from the NCBI \texttt{ftp} site.
+
+The NCBI also provides the comprehensive \texttt{nt} DNA database, and
+several EST databases in Blast2.0/\texttt{formatdb} format from
+\texttt{ftp://ncbi.nih.gov/blast/db}.
+
+
+\subsection{Finding the library files}
+
+All the FASTA programs comparison programs have the command line syntax:
+\begin{quote}
+\texttt{fasta36 query.file /seqdata/library}
+\end{quote}
+However, in addition to simply specifying the location of the database
+to be searched
+(\texttt{/seqdata/library}), the FASTA programs
+provide several methods for referring to sequence databases without specifying a specific file.  These methods can be used to provide abbreviations for sequence libraries, e.g.:
+\begin{quote}
+\texttt{fasta36 query.file s} 
+or
+\texttt{fasta36 query.file +sp+}
+\end{quote}
+To use abbreviations like \texttt{'s'} or \texttt{'+sp+'} to reference a
+sequence database, a \texttt{FASTLIBS} file must be used, see section
+\ref{fastlibs}.
+
+Large DNA and protein databases are often distributed across several
+files.  For example, the NCBI \texttt{nr} protein database is found in
+5 files, \texttt{nr.00} ... \texttt{nr.04}.  To search databases in
+multiple files, the names of the files are specified in a file of
+filenames, \texttt{nr.nam}:
+\begin{quote}
+\begin{verbatim}
+<${SLIB}/blast_dbs/
+nr.00 12
+nr.01 12
+nr.02 12
+nr.03 12
+nr.04 12
+\end{verbatim}
+\end{quote}
+In this file, the first line \texttt{<\$\{SLIB2\}/blast\_dbs/},
+beginning with \texttt{<}, specifies the location and format (Blast2.0
+\texttt{formatdb}) the data files. Text of the form
+\texttt{\$\{SLIB\}} refers to Unix/MacOSX/Windows environment
+variables; the value of \texttt{\$\{SLIB\}} is set by a Unix/MacOSX
+shell environment command. Thus, if the value of \texttt{\$\{SLIB\}}
+is \texttt{/seqdata}, then the first sequence library file to be read
+will be \texttt{/seqdata/blast\_dbs/nr.00}, in format 12 (Blast2.0
+\texttt{formatdb}).
+
+To refer to the \texttt{nr.nam} file as a file of file names, it must
+be prefixed by a \texttt{@} character, e.g.
+\begin{quote}
+\texttt{fasta36 query.file \textbf{@}nr.nam}
+\end{quote}
+Files of file names can contain references to other files of file names:
+\begin{quote}
+\begin{verbatim}
+<${SLIB}/fasta_dbs/
+ at pdb.nam
+ at swissprot.nam
+\end{verbatim}
+\end{quote}
+The FASTA file of file names is similar to the NCBI
+\texttt{prot\_db.pal} and \texttt{dna\_db.nal}, files, but
+unfortunately they are different, and currently FASTA cannot read NCBI
+\texttt{.pal} or \texttt{.nal} files that contain a \texttt{DBLIST}
+line. FASTA can read NCBI \texttt{.pal} or \texttt{.nal} files that do
+not contain a \texttt{DBLIST} line.
+
+FASTA version \texttt{fasta-36.3.6} provides an alternative way to
+generate a database to be searched: the \texttt{!script.sh} file.
+Like the \texttt{-e expand\_file.sh} script, a shell script or program
+can be used to produce a database to a temporary file, which is then
+seached.  For example, if the file \texttt{cat\_db.sh} contains the
+command \texttt{echo /seqdb/swissprot.lseg}, the command:
+\begin{quote}
+\begin{verbatim}
+fasta36 query.aa \!@cat_db.sh
+\end{verbatim}
+\end{quote}
+will cause \texttt{cat\_db.sh} to produce a temporary file with the
+line \texttt{swissprot.lseg}, which is interpreted as an indirect file
+of filenames; thus, because of the \texttt{@}, the file will be
+interpreted as an indirect file, and the \texttt{swissprot.lseg} file
+will be searched.  Note that on Unix systems, the \texttt{'!'} must be
+preceeded by a \texttt{'\textbackslash'} so that it is not interpreted by the
+shell, as shown above.
+
+\subsection{\texttt{FASTLIBS}}
+\label{fastlibs}
+
+All the search programs in the FASTA3 package can use the environment
+variable \texttt{FASTLIBS} to find the protein and DNA sequence
+libraries. (Alternatively, you can specify the \texttt{FASTLIBS} file
+with the \texttt{-l fastlibs.file} option.) The \texttt{FASTLIBS}
+variable contains the name of a file that has the actual filenames of
+the libraries.  The \texttt{fastlibs} file included with the
+distribution is an example of a file that can be referred to by
+FASTLIBS. To use the \texttt{fastlibs} file, type:
+\begin{quote}
+\texttt{setenv FASTLIBS /seqdata/info/fastgbs} (csh/tcsh)\\
+ or\\
+\texttt{export FASTLIBS=/seqdata/info/fastgbs} (bash/ksh)
+\end{quote}
+ Then edit the \texttt{fastlibs} file to indicate the location of the
+ protein and DNA sequence libraries.  If the protein sequence library is
+ kept in the file \texttt{/seqdata/aa/swissprot.lseg} and your Genbank
+ DNA sequence library is kept in the directory:
+ \texttt{/seqdata/genbank}, then the \texttt{fastlibs} file might
+ contain:
+%%\pagebreak
+\begin{verbatim}
+SwissProt$0P/seqdata/aa/swissprot.lseg 0
+UniProt$0+uniprot+@/seqdata/aa/uniprot.nam
+GB Primate$1P@/seqdata/genbank/gpri.nam
+GB Rodent$1R@/seqdata/genbank/grod.nam 
+GB Mammal$1M@/seqdata/genbank/gmammal.nam
+^   1    ^^^^       4                ^ ^
+          23                           (5)
+\end{verbatim}
+The first line of this file says that there is a copy of the SwissProt
+sequence database (a protein database) that can be selected by typing
+"P" on the command line or when the database menu is presented in
+interactive mode.
+
+Note that there are 4 (or 5) fields in the lines in the
+\texttt{fastlibs} file.  The first field describes library and is
+displayed by FASTA program; it ends with the '\$'.  The second field
+(1 character), is a 0 if the library is a protein library and 1 if it
+is a DNA library.  The third field can either be a single character
+(\texttt{P}) or a word surrounded by the \texttt{+} symbol
+(\texttt{+uniprot+}), and can be used to specify the library on the command line or in interactive mode.
+
+The fourth field is the name of the library file.  In the example
+above, the \texttt{/seqdata/aa/swissprot.lseg} file contains the
+entire protein sequence library.  Alternatively,
+\texttt{/seqdata/aa/uniprot.nam} is a file of file names, which
+contains a list of one or more library files.  Likewise, the DNA
+library files are files of file names.
+
+In addition, an optional fifth field can be used to specify the format
+of the library file.  Alternatively, you can specify the library
+format in a file of file names.  This field must be separated from the
+file name by a space character ('\ ') from the filename.  FASTA can
+read the libraries in the following formats:\\
+
+\begin{tabular}{r l}
+0 & FASTA (\texttt{>SEQID} - comment/sequence) \\
+1 & Uncompressed Genbank (LOCUS/DEFINITION/ORIGIN)\\
+2 & NBRF CODATA (ENTRY/SEQUENCE) (obsolete)\\
+3 & EMBL/SWISS-PROT (ID/DE/SQ)\\
+4 & Intelligenetics (;comment/SEQID/sequence) (obsolete)\\
+5 & NBRF/PIR VMS (\texttt{>P1;SEQID}/comment/sequence) (obsolete)\\
+6 & GCG (version 8.0) Unix Protein and DNA (compressed)\\
+7 & FASTQ (sequence only, quality ignored)\\
+10 & subset format (</slib2/swissprot.lseg 0:2 4|) \\
+11 & NCBI Blast1.3.2 format  (unix only) (obsolete)\\
+12 & NCBI Blast2.0 format\\
+16 & MySQL (requires special compilation) \\
+17 & Postgres (requires special compilation) \\
+\end{tabular}
+
+Today, the most popular formats are \texttt{FASTA}, type \texttt{'0'},
+the default, and the NCBI Blast2.0 \texttt{formatdb} formats (type
+\texttt{'12'}).  The FASTA programs cannot read NCBI ASN.1 formatted databases.
+If a library format is not specified, for example, because
+you are just comparing two sequences, FASTA (format 0) is used by
+default. To specify a library type on the command line, add it to the
+library filename and surround the filename and library type in quotes:
+\begin{quote}
+\begin{verbatim}
+fasta36 query.file "/seqdb/genbank/gbmam 12"
+\end{verbatim}
+\end{quote}
+NCBI \texttt{formatdb} databases are built from multiple files,
+e.g. \texttt{gbmam.nsq}, \texttt{gbmam.nhr}, \texttt{gbmam.nin}; to
+refer to the complete set of files, simply use name before the
+suffixes, e.g. \texttt{gbmam}.  When NCBI databases distributed across
+several files, e.g. \texttt{gbbct.00}, \texttt{gbbct.01}, etc, those
+files must be included in a \texttt{gbbct.nam} file of file names.
+
+FASTA subset format ({\tt 10}) allows users to search a subset of a
+sequence database, by specifying a list of {\tt gi} numbers or accessions in
+a larger database.  The format begins with a line naming the file
+sequence file followed by information about how to
+extract the {\tt gi} number or accession. Thus, the line.
+\begin{quote}
+\texttt{<library\_file lib\_fmt:id\_fmt id\_loc}
+\end{quote}
+where {\tt lib\_fmt} is the library format (0), {\tt id\_fmt} is the
+format of the sequence identifier (:1, :2 - ordered strings or
+numbers; :3, :4 - unordered strings or numbers),
+and {\tt id\_loc} is the location of the sequence identifier. For example,
+\begin{quote}
+\begin{verbatim}
+</slib2/blast/swissprot.lseg 0:2 4|
+3121763
+51701705
+7404340
+74735515
+...
+\end{verbatim}
+\end{quote}
+specifies the file containing all the sequences and the file is in
+FASTA format ('0:'), the sequence identifier is a number (':2'),
+and the identifier starts at character 4 and ends with the \texttt{'|'} symbol. 
+
+The major problem that most new users of the FASTA package have is in
+setting up the program to find the databases and their library type.
+In general, if you cannot get \texttt{fasta36} to read a sequence
+database, there is probably something wrong with the \texttt{FASTLIBS}
+file.  A common problem is that the database file is found, but either
+no sequences are read, or an incorrect number of entries is read.
+This is almost always because the library format (\texttt{libtype}) is
+incorrect.
+
+Test the setup by running FASTA.  Enter the sequence
+file '\texttt{mgstm1.aa}' when the program requests it (this file is
+included with the programs).  The program should then ask you to
+select a protein sequence library.  Alternatively, if you run the
+\texttt{tfastx36 -I} program and use the mgstm1.aa query sequence, the program
+should show you a selection of DNA sequence libraries.
+Once the \texttt{fastlibs} file has been set up correctly, you can
+set FASTLIBS=fastgbs in your AUTOEXEC.BAT file, and you will not need to
+remember where the libraries are kept or how they are named.
+
+%%\pagebreak
+\section{Frequently Asked Questions (FAQs)}
+
+{\noindent}\textbf{Where can I get FASTA?} --
+\url{http://faculty.virginia.edu/wrpearson/fasta} has the latest
+versions of the FASTA programs.  This document describes
+\texttt{\CURRENT}, which is available from
+\url{http://faculty.virginia.edu/wrpearson/fasta/fasta3.tar.gz}.
+In addition, pre-compiled versions of the programs are available for
+MacOSX and Windows.
+
+\needspace{4\baselineskip}
+{\noindent}\textbf{Which program should I use?} -- See Table I, also:\\
+
+\begin{tabular}{l l l l l }
+\hline \\[-1.0ex]
+Query & Library & FASTA pgm. & BLAST pgm. & \\[1.2ex]
+\hline \\[-1.0ex]
+Prot. & Prot. & \texttt{fasta36} & \texttt{blastp} & heuristic local similarity \\
+ &  & \texttt{ssearch36} &  & optimal local sim.\\
+ &  & \texttt{ggearch36} &  & global:global sim. \\
+ &  & \texttt{ggearch36} &  & global:local sim.\\
+DNA & DNA & \texttt{fasta36}$^*$ & \texttt{blastn} & \\[1.2ex]
+\hline \\[-1.0ex]
+Prot. & Prot. & \texttt{lalign36} & & multiple non-intersecting \\
+DNA & DNA & & & alignments \\[1.2ex]
+\hline \\[-1.0ex]
+DNA & Prot. & \texttt{fastx36} & \texttt{blastx} & trans. DNA:protein sim. \\ 
+ &  & \texttt{fasty36} & & \\[1.2ex]
+\hline \\[-1.0ex]
+Prot. & DNA & \texttt{tfastx36} & \texttt{blastn} & protein:trans. DNA \\
+ &  & \texttt{tfasty36} & & \\[1.2ex]
+\hline \\[-1.0ex]
+Prot. & Prot. & \texttt{fasts36} & & Unordered peptides \\
+Prot. & DNA & \texttt{tfasts36} & & Unordered peptides \\
+DNA & DNA & \texttt{fasts36} & & Unordered oligonucleotides \\
+Prot. & Prot. & \texttt{fastm36} & & Ordered peptides \\
+DNA & DNA & \texttt{fastm36} & & Ordered oligos \\[1.2 ex]
+\hline \\[-1.0ex]
+\multicolumn{5}{l}{$^*$\texttt{ssearch36} can also be used for DNA:DNA, but is much slower and no more sensitive.}\\[0.2ex]
+\hline \\
+\end{tabular}
+
+\needspace{4 ex}
+{\noindent}\textbf{How do I make FASTA act/look like BLAST}? --
+\vspace{-0.5ex}
+\begin{quote}
+\texttt{fasta36 -s BP62 -m BB query.file library.file}
+\end{quote}
+\vspace{-0.5ex}
+\texttt{-s BP62} sets the same scoring matrix (BLOSUM62) and
+gap-penalties (-11/-1) as BLAST (FASTA uses BLOSUM50 by
+default). \texttt{-m BB} produces very BLAST-like output.
+
+In addition, the \texttt{-m 8} and \texttt{-m 8C} options provide
+BLAST tabular output, optionally with comments (\texttt{-m 8C}). This
+compact output is effective for analysis pipelines.  In addtion,
+\texttt{-m 8XC} (no comments) or \texttt{-m 8CC} provides two
+additional blast-tabular fields, a CIGAR alignment string and, if
+available, an annotation string.
+
+{\noindent}\textbf{When I search Genbank - the program reports:} \texttt{0 residues in 0
+sequences}?  This typically happens because the program does not
+know that you are searching a Genbank flatfile database and is looking
+for a FASTA format database.  Be certain to specify the library type
+("1" for Genbank flatfile) with the database name.
+
+{\noindent}\textbf{The search seemed to work, but I do not see any results.} -- In
+command line mode (the default), all the FASTA programs limit the
+number of high scoring sequences shown using an expectation value
+cutoff ($E()<10$ for proteins; $E()<2$ for DNA).  Sometimes, a search
+will complete successfully (you see the message \texttt{XXXX residues
+  in YYY sequences}) but the message: \texttt{!! No sequences with E()
+  < 10} instead of \texttt{The best scores are:}.  Typically, this
+happens because of a problem with the statistical estimation process;
+in particular, if the library contains only related sequences and
+\texttt{-z 11} was not used, none of the hits may be ``significant''.
+To trouble shoot this problem, you can search with \texttt{-z -1},
+which turns off all the statistical estimation procedures, and will
+show the 20 highest scoring sequences (\texttt{-b \#} sets the default
+number of sequences shown).
+
+{\noindent}\textbf{What is the difference between} \texttt{fastx3} and
+\texttt{fasty3}? (or \texttt{tfastx3} and \texttt{tfasty3})? --
+\texttt{[t]fastx3} uses a simpler codon based model for alignments
+that does not allow frameshifts in some codon positions (see
+ref. \cite{wrp971}).  \texttt{fastx3} is about 30\% faster, but
+\texttt{fasty3} can produce higher quality alignments in some cases.
+
+\vspace{0.5ex}
+{\noindent}\textbf{What is ktup}? -- All of the programs with \texttt{fast} in their
+name use a computer science method called a lookup table to speed the
+search.  For proteins with \emph{ktup}=2, this means that the program
+does not look at any sequence alignment that does not involve matching
+two identical residues in both sequences.  Likewise with DNA and
+\emph{ktup} = 6, the initial alignment of the sequences looks for 6
+identical adjacent nucleotides in both sequences.  Because it is less
+likely that two identical amino-acids will line up by chance in two
+unrelated proteins, this speeds up the comparison.  But very distantly
+related sequences may never have two identical residues in a row but
+will have single aligned identities.  In this case, \emph{ktup} = 1 may
+find alignments that \emph{ktup}=2 misses.
+
+\vspace{0.5ex} {\noindent}\textbf{How do I turn off statistics}? --
+The FASTA programs are designed to identify homologs based on
+statistically significant similarity; to infer homology you need
+accurate statistical estimates.  Sometimes, however, you know the
+sequences are related, and searching against libraries of related
+sequences can confuse FASTA if you do not use \texttt{-z 11}. If all
+you want are scores and alignments, use \texttt{-z -1} to turn off
+statistical estimates.
+
+\vspace{0.5ex}
+{\noindent}\textbf{Where are} \texttt{prss} {\noindent}\textbf{and} \texttt{prfx}? -- Earlier FASTA3
+releases included \texttt{prss3} and \texttt{prfx3}. With FASTA
+version 35 and 36, these programs have been incorporated into
+\texttt{ssearch36} and \texttt{fastx36}.  FASTA version 35 and 36
+programs now automatically estimate statistical parameters by
+shuffling - the function of \texttt{prss} and \texttt{prfx}, when
+searching for libraries with fewer than 500 members.
+
+\vspace{0.5ex}
+{\noindent}\textbf{Where is} \texttt{tfasta}? -- Although it is possible to make
+\texttt{tfasta36}, it is not compiled by default.  \texttt{tfastx36}
+and \texttt{tfasty36} allow frame-shifts to be joined into a single
+alignment; \texttt{tfasta} did not. \texttt{tfastx36} produces better
+alignments with better statistics.
+
+\vspace{0.5ex}
+{\noindent}\textbf{Can I run the FASTA programs on a cluster}? -- With version
+36.3.4, almost all of the FASTA programs can be run on clusters of
+computers using MPI (Message Passaging Interface).  The programs can
+be compiled using \texttt{make -f ../make/Makefile.mpi\_sse2} from the
+\texttt{fasta36/src} directory.  Except for \texttt{lalign36}, all the
+programs in Table I are available as \texttt{fasta36\_mpi},
+\texttt{ssearch36\_mpi}, etc.
+
+Unfortunately, the current MPI implementation involves substantially
+more communications overhead than the threaded versions.  The FASTA
+programs are very efficient on threaded machines; if the preload
+option is used (edit \texttt{make/Makefile36m.common} to use
+\texttt{comp\_lib8.c}), the FASTA programs can obtain more than 40-fold speedup on a 48-core machine (the largest I have tested). 
+
+\vspace{0.5ex}
+{\noindent}\textbf{Sometimes, in the list of best scores, the same sequence is
+  shown twice with exactly the same score.  Sometimes, the sequence is
+  there twice, but the scores are slightly different}? -- When any of
+the FASTA programs searches a long sequence, it breaks the sequence up
+into \emph{overlapping} pieces.  If the highest scoring alignment is
+at the end of one piece, it will be scored again at the beginning of
+the next piece.  If the alignment is not be completely included in the
+overlap region, one of the pieces will give a higher score than the
+other.  These duplications can be detected by looking at the
+coordinates of the alignment.  If either the beginning or end
+coordinate is identical in two alignments, the alignments are at least
+partially duplicates.
+
+\vspace{2ex}
+As always, please inform me of bugs as soon as possible.
+
+\begin{quote}
+William R. Pearson\\
+Department of Biochemistry\\
+Jordan Hall Box 800733\\
+U. of Virginia\\
+Charlottesville, VA\\
+wrp at virginia.EDU
+\end{quote}
+
+\bibliographystyle{plain}
+\bibliography{fasta_guide}
+
+\appendix
+\section*{Appendix}
+
+\section{FASTA Makefile compile time options}
+
+\begin{table}
+\caption{\label{make-defs}FASTA \texttt{Makefile} compile time \texttt{\#defines}}
+\vspace{1.0ex}
+\begin{tabular}{l l p{1.00 in} p{3.0 in}}
+\hline\\[-1.2ex]
+\texttt{\#define} & Status$^*$ & Target file(s) & Function \\[1.0ex]
+\hline\\[-1.5ex]
+\texttt{ALLOCN0} & obs & \texttt{dropnfa.c}, \texttt{dropfx.c}, \texttt{dropfz2.c} & allows FASTA algorithm to use memory $\sim$ query length (n0), not query $+$ library (n0+n1). \\
+\texttt{DNALIB\_LC} & undef & \texttt{initfa.c} & enable lower case masking for DNA libraries \\
+\texttt{HTML\_HEAD} & undef & \texttt{comp\_lib5e.c}, \texttt{comp\_lib8.c} & wrap \texttt{-m 6} HTML output with \texttt{<html> <body> </body> </html>} \\
+\texttt{M10\_CONS} & def & \texttt{c\_dispn.c} & show consensus line (\texttt{:. }) with \texttt{-m 10} output. \\
+\texttt{OLD\_FASTA\_GAP} & undef & \texttt{drop*.c} & use first-residue/additional residue penalties, not open/extend. \\
+\texttt{PGM\_DOC} & def & \texttt{comp\_lib5e.c}, \texttt{comp\_lib8.c}  & provide \texttt{\#pgm\_name -opt1 -opt2 query file} copy of command line \\
+\texttt{PROGRESS} & def & \texttt{comp\_lib5e.c}, \texttt{comp\_lib8.c}  & provide progress symbols in interactive mode \\
+\texttt{SAMP\_STATS} & def & \texttt{comp\_lib5e.c}, \texttt{comp\_lib8.c}  & scores are sampled for statistical estimates \\
+\texttt{SAMP\_STATS\_LESS} & def & \texttt{compacc.c} & a slower sampling strategy is used \\
+\texttt{SHOW\_ALIGN\_SCORE} & undef & \texttt{wm\_align.c} & print score, cummulative score, during alignment (for teaching) \\
+\texttt{SHOW\_HELP} & def & \texttt{comp\_lib5e.c}, \texttt{comp\_lib8.c}, \texttt{initfa.c}, \texttt{doinit.c} & print out help information with '-help', or no arguments given.  Undef \texttt{SHOW\_HELP} reverts to pre-\texttt{fasta-35.4.4}.\\
+\texttt{SHOW\_HIST} & undef & \texttt{doinit.c} & inverts current meaning of \texttt{-H} (shows by default for non-PCOMPLIB (MPI) programs). \\
+\texttt{SHOWSIM} & def & \texttt{mshowbest.c} \texttt{mshowalign2.c} & display percent similarity \\
+\texttt{USE\_LNSTATS} & obs & \texttt{scaleswn.c} & use $ln()$-scaling for scores, removed in \texttt{fasta2.0}.\\
+\hline \\
+\end{tabular}
+$^*$Status: def: \#defined in standard \texttt{Makefiles}; undef: undefined; obs: obsolete, provided backwards compatibility with FASTA2.0 or earlier.
+\end{table}
+
+The \texttt{fasta-36/make} directory includes \texttt{Makefile}s
+appropriate for a broad range of environments, including Linux/Unix,
+BSD, MacOSX, and Windows.  Makefiles are regularly tested against
+MacOSX, Linux, and Windows. Table \ref{make-defs} summarizes the
+Makefile options that can be modified.
+
+As distributed, the \texttt{Makefiles} in \texttt{fasta36/make}, build
+a version of the FASTA programs that is optimized for single searches
+against arbitrary sized databases, using bit scores, efficient sampled
+statistics, and gap-open/extend penalties.  The default compilation
+configuration can be changed either by changing the compile time
+defines (Table \ref{make-defs}) in the main \texttt{Makefile},
+e.g. \texttt{make/Makefile.linux64\_sse2}, or by editing
+\texttt{make/Makefile36m.common}.
+
+\emph{High-performance searches with many queries} -- By default, the
+\texttt{comp\_lib5e.c} program specified in
+\texttt{Makefile36m.common} builds FASTA programs that re-read the
+library sequence database for every query sequence.  This has the
+advantage that sequence comparison begins almost immediately, but if
+thousands of searches are being performed, the database is re-read
+thousands of times.  \texttt{Makefile36m.common} can be edited to use
+\texttt{comp\_lib8.c} in place of \texttt{comp\_lib5e.c} and the
+database is read only once, then held in memory for additional
+searches.  Of course, if \texttt{comp\_lib8.c} is used, the computer
+must have enough memory to store the complete database.  Keeping the
+database in memory allows the FASTA programs to very efficiently used
+large, multicore computers.
+
+\needspace{5\baselineskip}
+\emph{Parallel searches with MPI} -- By default under
+Unix/Linux/MacOSX, the FASTA programs are threaded; they will spawn as
+many threads as CPU cores are available (this can be limited with the
+\texttt{-t n-threads} option).  Using \texttt{comp\_lib8.c}, we see
+almost 48-fold speedup on a 48-core machine.  The FASTA programs can
+also be run in parallel in the MPI environment on clusters of
+computers.  To build the MPI versions of the programs, use
+\texttt{make ../make/Makefile.mpi\_sse2 ssearch36\_mpi},
+\texttt{fastx36\_mpi}, etc.  The MPI programs currently substantially
+more communications overhead than the threaded versions, so they may
+not scale as well to large clusters.
+
+\include{fasta.history}
+\end{document}
diff --git a/doc/fasta_versions.html b/doc/fasta_versions.html
new file mode 100644
index 0000000..14b186f
--- /dev/null
+++ b/doc/fasta_versions.html
@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+<title>Major FASTA versions</title>
+<style type="text/css">
+body {  margin-left: 6px; }
+.sidebar { 
+font-size: 12px; font-family: sans-serif; text-decoration:none; background-color: #FFFFCC; }
+.fasta { font-family: sans-serif; }
+.fasta h2 { font-size: 16px; color: #000000 }
+.fasta h3 { font-size: 14px; color: #000000 }
+.fasta td {background-color: #FFFFCC }
+.fasta a { text-decoration: none; }
+.fasta li { margin-left:-1em }
+</style>
+<head>
+<body>
+<div class=fasta>
+<h2>FASTA version summary</h2>
+<code> $Id: fasta_versions.html 256 2010-03-29 13:41:48Z wrp $</code>
+<hr>
+<b>March, 2010</b>
+<p />
+Currently, there are five different versions of the FASTA programs that can be downloaded from the <a href="http://faculty.virginia.edu/wrpearson/fasta">FASTA Software WWW site</a>.  I recommend that you use either <a href="fasta36/">fasta-36</a>, the very latest version, or <a href="CURRENT/">fasta-35</a>, the "classic" version that has been available for since 2007.
+<p />
+Here I try to explain the major differences.
+<p />
+<hr />
+<table>
+<tr><th>Version</th><th>New Programs</th><th>Other changes</th></tr>
+<tr>
+<td><a href="fasta36/">fasta-36</a></td><td> None </td><td>
+The major improvement in FASTA v36 is the ability to calculate and
+display multiple significant alignments (multiple HSP's) between a
+query sequence and a library sequence.  Previous FASTA versions had
+the serious shortcoming of only showing the best
+alignment. A <tt>tfastx36</tt> alignment between a protein and its
+exon-containing gene will show all of the exon alignments that are
+long enough to be significant.
+<p>
+FASTA v36 also provides more flexible strategies for searching lists
+of datbase files, and versions after fasta-36.2 are fully threaded, so
+both searches and alignments can be distributed among multiple
+processors with efficient speedup.
+<p>
+PVM/MPI parallel versions of FASTA v36 are under development.
+</td>
+</tr>
+<tr><td colspan=3><hr /></td></tr>
+<tr>
+<td><a href="fasta3/">fasta-35</a></td>
+<td><code>+lalign35</code><br /><code>+ggsearch35</code><br /><code>+glsearch35</code><br /><code>-prss3</code><br /><code>-prfx3</code>
+</td>
+<td>FASTA v35 provides significant improvements in statistical accuracy and program efficiency in threaded (multi-CPU, multi-core) environments. The program now automatically produces 500 random shuffles when small libraries are searched.  Thus, <code>prss</code> and <code>prfx</code> are no longer required; <code>ssearch35</code> and <code>fastx35</code> provide the same function.</td>
+</tr>
+<tr><td colspan=3><hr /></td></tr>
+<tr>
+<td><a href="fasta3/">fasta34</a></td>
+<td><code>+ssearch3</code><br /><code>+fasts3/tfasts3</code><br /><code>+fastf3/tfastf3</code><br /><code>-lfasta</code><br /><code>-lalign</code><br /><code>-tfasta</code>
+</td>
+<td>FASTA v34 is the last version of the FASTA3 series before the
+  significant changes introduced with FASTA v35.  The FASTA v3
+  programs were first introduced in 1996, with threaded code
+  for multi-processors, more accurate statistical estimates, and
+  optimal Smith-Waterman alignments with <code>ssearch</code>.  More
+  recent versions (v34) provided accelerated Smith-Waterman searches
+  using Altivec and SSE2 vector processors. FASTA v34 is no longer
+  updated for bug fixes. </td>
+</tr>
+<tr><td colspan=3><hr /></td></tr>
+<tr>
+<td><a href="fasta2/">fasta2</a></td>
+<td><code>+lfasta</code><br /><code>+tfasta</code>
+</td>
+<td>FASTA v2 was introduced in 1995, improving sensitivity by
+  calculated gapped alignment scores. Modern (V34 and V35) versions of
+  FASTA have significantly more robust statistical estimates, threaded
+  code, and vectorized Smith-Waterman, so FASTA v2 should not be used
+  for database searching.  However, until <code>lalign</code> was addeed
+  to FASTA v35 in March, 2007, FASTA v2 was the only source for
+  the <code>lalign</code> program.  Today, the only programs provided in
+  FASTA v2 that are not provided in FASTA v35 are the Kyte-Doolittle
+  hydropathy plotter <code>grease</code> and the classic (though very
+  inaccurate) secondary structure prediction programs <code>chofas</code>
+  and <code>garnier</code></td>
+</tr>
+</table>
+<p />
+The current stable version of the FASTA programs is version 35,
+and older releases of version 35
+are <a href="fasta3/">available</a>
+to make it easier for software packagers to work with a consistent
+version of the software.  However,
+the <a href="CURRENT/">CURRENT</a>
+version of FASTA should be used whenever possible.  Many bug reports
+reflect older versions of the software.
+<hr /> <p /> Description of minor changes: <a href="changes_v35.html">changes_v35.html</a> <p />
+<hr />
+</html>
diff --git a/doc/fastf3.1 b/doc/fastf3.1
new file mode 100644
index 0000000..91bd7ff
--- /dev/null
+++ b/doc/fastf3.1
@@ -0,0 +1,176 @@
+.TH FASTF/TFASTFv3 1 local
+.SH NAME
+fastf3, fastf3_t \- compare a mixed peptide sequence against a protein
+database using a modified fasta algorithm.
+
+tfastf3, tfastf3_t \- compare a mixed pepide sequence against a
+translated DNA database.
+
+.SH DESCRIPTION
+
+.B fastf3
+and
+.B tfastf3
+are designed to compare a sequence of mixed peptides to a protein
+(fastf3) or translated DNA (tfastf3) database.  Unlike the traditional
+.B fasta3
+search, which uses a protein or DNA sequence,
+.B fastf3
+and
+.B tfastf3
+work with a query sequence of the form:
+.in +5
+.nf
+>testf from mgstm1
+MGCEN,
+MIDYP,
+MLLAY,
+MLLGY
+.fi
+.in 0
+This sequence indicates that a mixture of four peptides has been
+found, with 'M' in the first position of each one (as from a CNBr
+cleavage), in the second position 'G', 'I', or 'L' (twice), at the
+third position 'C', 'D', or 'L' (twice), at the fourth position 'E',
+'Y', 'A', or 'G', etc.  When this sequence is compared against mgstm1.aa 
+(included with the distribution), the mixture is deconvolved to form:
+.nf
+.ft C
+.in +5
+testf    MILGY-----------MLLEY-----------MGDAP-----------
+         :::::           :::::           :::::           
+GT8.7  MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEK
+               10        20        30        40        50
+
+testf  --------------------------------------------------
+                                                         
+GT8.7  FKLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIV
+               60        70        80        90       100
+
+                      20                                 
+testf  ------------MLCYN                                 
+                   :::::                                 
+GT8.7  ENQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAG
+              110       120       130       140       150
+.in 0
+.ft P
+.fi
+.SH Options
+.LP
+.B fastf3
+and
+.B tfastf3
+can accept a query sequence from the unix "stdin" data stream.  This makes it much
+easier to use fasta3 and its relatives as part of a WWW page. To
+indicate that stdin is to be used, use "-" or "@" as the query
+sequence file name.
+.TP
+\-b #
+number of best scores to show (must be < -E cutoff)
+.TP
+\-d #
+number of best alignments to show ( must be < -E cutoff)
+.TP
+\-D
+turn on debugging mode.  Enables checks on sequence alphabet that
+cause problems with tfastx3, tfasty3, tfasta3.
+.TP
+\-E #
+Expectation value limit for displaying scores and
+alignments.  Expectation values for
+.B fastf3
+and
+.B tfastf3
+are not as accurate as those for the other 
+.B fasta3
+programs.
+.TP
+\-H
+turn off histogram display
+.TP
+\-i
+compare against only the reverse complement of the library sequence.
+.TP
+\-L
+report long sequence description in alignments
+.TP
+\-m 0,1,2,3,4,5,6,10
+alignment display options
+.TP
+\-n
+force query to nucleotide sequence
+.TP
+\-N #
+break long library sequences into blocks of # residues.  Useful for
+bacterial genomes, which have only one sequence entry.  -N 2000 works
+well for well for bacterial genomes.
+.TP
+\-O file
+send output to file
+.TP
+\-q/-Q
+quiet option; do not prompt for input
+.TP 
+\-R file
+save all scores to statistics file
+.TP
+\-S #
+offset substitution matrix values by  a constant #
+.TP
+\-s name
+specify substitution matrix.  BLOSUM50 is used by default;
+PAM250, PAM120, and BLOSUM62 can be specified by setting -s P120,
+P250, or BL62.  With this version, many more scoring matrices are
+available, including BLOSUM80 (BL80), and MDM_10, MDM_20, MDM_40 (M10,
+M20, M40). Alternatively, BLASTP1.4 format scoring matrix files can be
+specified.
+.TP
+\-T #
+(threaded, parallel only) number of threads or workers to use (set by
+default to 4 at compile time).
+.TP
+\-t #
+Translation table - tfastf3 can use the BLAST tranlation tables.  See
+\fChttp://www.ncbi.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c/\fP.
+.TP
+\-w #
+line width for similarity score, sequence alignment, output.
+.TP
+\-x "#,#"
+offsets query, library sequence for numbering alignments
+.TP
+\-z #
+Specify statistical calculation. Default is -z 1, which uses
+regression against the length of the library sequence. -z 0 disables
+statistics.  -z 2 uses the ln() length correction. -z 3 uses Altschul
+and Gish's statistical estimates for specific protein BLOSUM scoring
+matrices and gap penalties. -z 4: an alternate regression method.
+.TP
+\-Z db_size
+Set the apparent database size used for expectation value calculations.
+.TP
+\-1
+Sort by "init1" score.
+.TP
+\-3
+(TFASTF3 only) use only forward frame translations
+.SH Environment variables:
+.TP
+FASTLIBS
+location of library choice file (-l FASTLIBS)
+.TP
+SMATRIX
+default scoring matrix (-s SMATRIX)
+.TP
+SRCH_URL
+the format string used to define the option to re-search the
+database.
+.TP
+REF_URL
+the format string used to define the option to lookup the library
+sequence in entrez, or some other database.
+
+.SH AUTHOR
+Bill Pearson
+.br
+wrp at virginia.EDU
diff --git a/doc/fasts3.1 b/doc/fasts3.1
new file mode 100644
index 0000000..74af8f2
--- /dev/null
+++ b/doc/fasts3.1
@@ -0,0 +1,169 @@
+.TH FASTS/TFASTSv3 1 local
+.SH NAME
+fasts3, fasts3_t \- compare several short peptide sequences against a protein
+database using a modified fasta algorithm.
+
+tfasts3, tfasts3_t \- compare short pepides against a
+translated DNA database.
+
+.SH DESCRIPTION
+
+.B fasts3
+and
+.B tfasts3
+are designed to compare set of (presumably non-contiguous) peptides to
+a protein (fasts3) or translated DNA (tfasts3) database.
+fasts3/tfasts3 are designed particularly for short peptide data from
+mass-spec analysis of protein digests.  Unlike the traditional
+.B fasta3
+search, which uses a protein or DNA sequence,
+.B fasts3
+and
+.B tfasts3
+work with a query sequence of the form:
+.in +5
+.nf
+>tests from mgstm1
+MLLE,
+MILGYW,
+MGADP,
+MLCYNP
+.fi
+.in 0
+This sequence indicates that four peptides are to be used.  When this
+sequence is compared against mgstm1.aa (included with the
+distribution), the result is:
+.nf
+.ft C
+.in +5
+testf    MILGYW----------MLLE------------MGDAP-----------
+         ::::::          ::::            :::::           
+GT8.7  MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEK
+               10        20        30        40        50
+
+testf  --------------------------------------------------
+                                                         
+GT8.7  FKLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIV
+               60        70        80        90       100
+
+                      20                                 
+testf  ------------MLCYNP
+                   ::::::
+GT8.7  ENQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAG
+              110       120       130       140       150
+.in 0
+.ft P
+.fi
+.SH Options
+.LP
+.B fasts3
+and
+.B tfasts3
+can accept a query sequence from the unix "stdin" data stream.  This makes it much
+easier to use fasta3 and its relatives as part of a WWW page. To
+indicate that stdin is to be used, use "-" or "@" as the query
+sequence file name.
+.TP
+\-b #
+number of best scores to show (must be < -E cutoff)
+.TP
+\-d #
+number of best alignments to show ( must be < -E cutoff)
+.TP
+\-D
+turn on debugging mode.  Enables checks on sequence alphabet that
+cause problems with tfastx3, tfasty3, tfasta3.
+.TP
+\-E #
+Expectation value limit for displaying scores and
+alignments.  Expectation values for
+.B fasts3
+and
+.B tfasts3
+are not as accurate as those for the other 
+.B fasta3
+programs.
+.TP
+\-H
+turn off histogram display
+.TP
+\-i
+compare against only the reverse complement of the library sequence.
+.TP
+\-L
+report long sequence description in alignments
+.TP
+\-m 0,1,2,3,4,5,6,9,10
+alignment display options
+.TP
+\-N #
+break long library sequences into blocks of # residues.  Useful for
+bacterial genomes, which have only one sequence entry.  -N 2000 works
+well for well for bacterial genomes.
+.TP
+\-O file
+send output to file
+.TP
+\-q/-Q
+quiet option; do not prompt for input
+.TP 
+\-R file
+save all scores to statistics file
+.TP
+\-S #
+offset substitution matrix values by  a constant #
+.TP
+\-s name
+specify substitution matrix.  BLOSUM50 is used by default;
+PAM250, PAM120, and BLOSUM62 can be specified by setting -s P120,
+P250, or BL62.  With this version, many more scoring matrices are
+available, including BLOSUM80 (BL80), and MDM_10, MDM_20, MDM_40 (M10,
+M20, M40). Alternatively, BLASTP1.4 format scoring matrix files can be
+specified.
+.TP
+\-T #
+(threaded, parallel only) number of threads or workers to use (set by
+default to 4 at compile time).
+.TP
+\-t #
+Translation table - tfasts3 can use the BLAST tranlation tables.  See
+\fChttp://www.ncbi.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c/\fP.
+.TP
+\-w #
+line width for similarity score, sequence alignment, output.
+.TP
+\-x "#,#"
+offsets query, library sequence for numbering alignments
+.TP
+\-z #
+Specify statistical calculation. Default is -z 1, which uses
+regression against the length of the library sequence. -z 0 disables
+statistics.  -z 2 uses the ln() length correction. -z 3 uses Altschul
+and Gish's statistical estimates for specific protein BLOSUM scoring
+matrices and gap penalties. -z 4: an alternate regression method.
+.TP
+\-Z db_size
+Set the apparent database size used for expectation value calculations.
+.TP
+\-3
+(TFASTS3 only) use only forward frame translations
+.SH Environment variables:
+.TP
+FASTLIBS
+location of library choice file (-l FASTLIBS)
+.TP
+SMATRIX
+default scoring matrix (-s SMATRIX)
+.TP
+SRCH_URL
+the format string used to define the option to re-search the
+database.
+.TP
+REF_URL
+the format string used to define the option to lookup the library
+sequence in entrez, or some other database.
+
+.SH AUTHOR
+Bill Pearson
+.br
+wrp at virginia.EDU
diff --git a/doc/map_db.1 b/doc/map_db.1
new file mode 100644
index 0000000..173f119
--- /dev/null
+++ b/doc/map_db.1
@@ -0,0 +1,45 @@
+.TH MAP_DB "September, 1999"
+.SH NAME
+.B map_db
+\- read a FASTA (0), GENBANK flat file (1) PIR/VMS (5) or GCG binary
+(6) sequence database and produce the offsets necessary for efficient
+memory mapping.
+.SH SYNOPSIS
+.B map_db
+[-n] filename | "filename libtype"
+.SH DESCRIPTION
+.B map_db
+.I filename
+reads the sequence database in
+.I filename
+and produce a new file
+.I filename.xin
+with the offset information necessary for efficient memory mapping.
+.LP
+The programs in fasta version 32t08 can use memory mapped i/o to load
+sequence database files and read them efficiently.  Memory mapping is
+used only if a "\c
+.I .xin\c
+\&" file is available.  The "\c
+.I .xin\c
+\&" file is created by
+.B map_db\c
+\&.
+.LP
+In addition to
+.B map_db\c
+\&,
+.B list_db
+is available to display the database size, etc, and set of offsets calculated
+by
+.B map_db\c
+\&.
+.SH OPTIONS
+.TP
+\-n 
+Read file as DNA database.
+.SH BUGS
+.SH AUTHOR
+Bill Pearson
+.br
+wrp at virginia.EDU
diff --git a/doc/prss3.1 b/doc/prss3.1
new file mode 100644
index 0000000..dd407ce
--- /dev/null
+++ b/doc/prss3.1
@@ -0,0 +1,170 @@
+.TH PRSS3 1 local
+.SH NAME
+prss \- test a protein sequence similarity for significance
+.SH SYNOPSIS
+.B prss34
+\&[-Q -A -f # -g # -H -O file -s SMATRIX -w # -Z #
+.I -k # -v #
+]
+sequence-file-1 sequence-file-2
+[
+.I #-of-shuffles
+]
+
+.B prfx34
+\&[-Q -A -f # -g # -H -O file -s SMATRIX -w # -z 1,3 -Z #
+.I -k # -v #
+]
+sequence-file-1 sequence-file-2
+[
+.I ktup
+]
+[
+.I #-of-shuffles
+]
+
+.B prss34(_t)/prfx34(_t)
+[-AfghksvwzZ]
+\- interactive mode
+
+.SH DESCRIPTION
+.B prss34
+and
+.B prfx34
+are used to evaluate the significance of a protein:protein, DNA:DNA
+(
+.B prss34
+), or translated-DNA:protein (
+.B prfx34
+) sequence similarity score
+by comparing two sequences and calculating optimal similarity scores,
+and then repeatedly shuffling the second sequence, and calculating
+optimal similarity scores using the Smith-Waterman algorithm. An
+extreme value distribution is then fit to the shuffled-sequence
+scores.  The characteristic parameters of the extreme value
+distribution are then used to estimate the probability that each of
+the unshuffled sequence scores would be obtained by chance in one
+sequence, or in a number of sequences equal to the number of shuffles.
+This program is derived from
+.B rdf2\c
+\&, described by Pearson and Lipman, PNAS (1988) 85:2444-2448, and
+Pearson (Meth. Enz.  183:63-98).  Use of the extreme value
+distribution for estimating the probabilities of similarity scores was
+described by Altshul and Karlin, PNAS (1990) 87:2264-2268.  The
+'z-values' calculated by rdf2 are not as informative as the P-values
+and expectations calculated by prdf.
+.B prss34
+calculates optimal scores using the same rigorous Smith-Waterman
+algorithm (Smith and Waterman, J. Mol. Biol. (1983) 147:195-197) used by the
+.B ssearch34
+program.
+.B prfx34
+calculates scores using the FASTX algorithm (Pearson et al. (1997) Genomics 46:24-36.
+.PP
+.B prss34
+and 
+.B prfx34
+also allow a more sophisticated shuffling method: residues can be shuffled
+within a local window, so that the order of residues 1-10, 11-20, etc,
+is destroyed but a residue in the first 10 is never swapped with a residue
+outside the first ten, and so on for each local window.
+.SH EXAMPLES
+.TP
+(1)
+.B prss34
+\& -v 10 musplfm.aa lcbo.aa
+.PP
+Compare the amino acid sequence in the file musplfm.aa with that
+in lcbo.aa, then shuffle lcbo.aa 200 times using a local shuffle with
+a window of 10.  Report the significance of the
+unshuffled musplfm/lcbo comparison scores with respect to the shuffled
+scores.
+.TP
+(2)
+.B prss34
+musplfm.aa lcbo.aa 1000
+.PP
+Compare the amino acid sequence in the file musplfm.aa with the sequences
+in the file lcbo.aa, shuffling \fClcbo.aa\fP 1000 times.  Shuffles can also be specified with the -k # option.
+.TP
+(3)
+.B prfx34
+mgstm1.esq xurt8c.aa 2 1000
+.PP
+Translate the DNA sequence in the \fCmgstm1.esq\fP file in all six
+frames and compare it to the amino acid sequence in the file
+\fCxurt8c.aa\fP, using ktup=2 and shuffling \fCxurt8c.aa\fP 1000
+times.  Each comparison considers the best forward or reverse
+alignment with frameshifts, using the fastx algorithm (Pearson et al
+(1997) Genomics 46:24-36).
+.TP
+(4)
+.B prss34/prfx34
+.PP
+Run prss in interactive mode.  The program will prompt for the file
+name of the two query sequence files and the number of shuffles to be
+used.
+.SH OPTIONS
+.PP
+.B prss34/prfx34
+can be directed to change the scoring matrix, gap penalties, and
+shuffle parameters by entering options on the command line (preceeded
+by a `\-'). All of the options should preceed the file names number of
+shuffles.
+.TP
+\-A
+Show unshuffled alignment.
+.TP
+\-f #
+Penalty for opening a gap (-10 by default for proteins).
+.TP
+\-g #
+Penalty for additional residues in a gap (-2 by default) for proteins.
+.TP
+\-H
+Do not display histogram of similarity scores.
+.TP
+\-k #
+Number of shuffles (200 is the default)
+.TP
+\-Q -q
+"quiet" - do not prompt for filename.
+.TP
+\-O filename
+send copy of results to "filename."
+.TP
+\-s str
+specify the scoring matrix.  BLOSUM50 is used by default for proteins;
++5/-4 is used by defaul for DNA. 
+.B prss34
+recognizes the same scoring matrices as fasta34, ssearch34, fastx34, etc;
+e.g. BL50, P250, BL62, BL80, MD10, MD20, and other matrices in BLAST1.4
+matrix format.
+.TP
+\-v #
+Use a local window shuffle with a window size of #.
+.TP
+\-z #
+Calculate statistical significance using the mean/variance
+(moments) approach used by fasta34/ssearch or from maximum likelihood
+estimates of lambda and K.
+.TP
+\-Z #
+Present statistical significance as if a '#' entry database had
+been searched (e.g. "-Z 50000" presents statistical significance as if
+50,000 sequences had been compared).
+.SH ENVIRONMENT VARIABLES
+.PP
+.B (SMATRIX)
+the filename of an alternative scoring matrix file.  For protein
+sequences, BLOSUM50 is used by default; PAM250 can be used with the
+command line option
+.B -s P250\c
+(or with -s pam250.mat).  BLOSUM62 (-s BL62) and PAM120 (-S P120).
+.SH "SEE ALSO"
+ssearch3(1), fasta3(1).
+.SH AUTHOR
+Bill Pearson
+.br
+wrp at virginia.EDU
+
diff --git a/doc/ps_lav.1 b/doc/ps_lav.1
new file mode 100644
index 0000000..c3bd02f
--- /dev/null
+++ b/doc/ps_lav.1
@@ -0,0 +1,20 @@
+.TH PS_LAV 1 local
+.SH NAME
+ps_lav [-B] [-Z db_size] \- plot an "lav" file in postscript.
+.SH DESCRIPTION
+ps_lav is a simple program to take "lav" format output from "lalign35"
+and produce postscript alignment plots (which look like "dot-plots").
+It was designed to work with the output of the \fClalign35\fP program,
+but should work with other \fClav\fP output as well.
+.TP
+\-B
+color alignment lines using the "bit" score.
+.TP
+\-Z db_size
+set the effective database size for an E()-value calculate to db_size,
+and color alignment lines using E()-values.  Requires bit scores in
+the \fClav\fP ouput.
+.SH AUTHOR
+Bill Pearson
+.br
+wrp at virginia.EDU
diff --git a/doc/readme.v30 b/doc/readme.v30
new file mode 100644
index 0000000..f445ec0
--- /dev/null
+++ b/doc/readme.v30
@@ -0,0 +1,38 @@
+
+Because of interdependencies in the Makefile, sometimes you must
+type "make" a second time to get everything built.
+
+June 12, 1996 - fasta30t1
+
+	Fixed bug in reading blast-format DNA sequence files.
+	Fixed core-dump for some large libraries on some machines.
+
+June 19, 1996 - fasta30t2
+
+	Fixed a serious bug in the Smith-Waterman alignment routines used
+	by both fasta3 (dropnfa.c) and ssearch3 (dropgsw.c) that caused
+	the amount of memory required to depend on the library sequence
+	size, rather than the query sequence size.
+
+	Fixed some memory-overwrite errors in showalign.c
+
+June 27, 1996 - fasta30t3
+
+	Found and fixed bugs in comp_thr.c and nxgetaa.c that caused core
+	dumps when reading DNA libraries with long sequences in fasta
+	format.
+
+July 6, 1996  - fasta30t4
+
+	ibm_pthread_subs.c available, Makefile.ibm for multiprocessor
+	IBM RS/6000 AIX systems.
+
+	Finally (?) fixed the previous bug that caused core dumps when
+	reading DNA libraries in fasta format.
+
+	Corrections to the fastx algorithm.
+
+July 10, 1996
+
+	Fixed reading of compressed GCG DNA format.
+
diff --git a/doc/readme.v30t6 b/doc/readme.v30t6
new file mode 100644
index 0000000..bb35f83
--- /dev/null
+++ b/doc/readme.v30t6
@@ -0,0 +1,74 @@
+
+>>August 24, 1996
+
+New programs - tfastx3, tfastx3_t, compare a protein sequence to
+forward and reverse translations of a DNA sequence database.  An excellent
+replacement for tfasta3.
+
+Sun multiprocessing - change in thr_create() to use all CPU's if available.
+
+GCG formats - now can search with simple GCG-format query sequences and
+results with GCG format Swissprot and Genpept are more readable.
+
+>>August 26, 1996
+
+Fixed bugs in tfastx3(_t) and fastx3(_t) including an ancient problem
+with aatran().  Less redundancy in gcg_ranlib().
+
+
+>>August 31, 1996
+
+Included support for BLOSUM62 (-s BL62) as per documentation.
+
+Rearranged Makefile's so that they would make everything in one pass.
+
+>>September 6, 1996
+
+Corrected yet another problem with the fastx/tfastx code.
+
+Noticed that searching without optimized scores gave no optimized
+scores on the final list of scores - fixed this.
+
+The pvm version now does alignments - not thoroughly tested.
+
+>>September 13, 1996
+
+Fixed display of best scores to stdout.
+
+Fixed problem with alignments when -o flag used.
+
+pvcompfa/pvcompsw have now been tested on DEC Alpha, Solaris X86, and
+SGI PVM implementations.  Several bugs were corrected.
+
+>>September 18, 1996
+
+Fixed bug selectbestz() that caused core dumps in pvcomplib.c
+(changes to pvcomplib.c, comp_thr.c, complib.c).
+
+>>September 23, 1996
+
+Corrected showalign.c/pvm_showalign.c addressing bug found and fixed
+by Erik Wallin. (erikw at biokemi.su.se).
+
+>>October 15, 1996
+
+Corrected bug so alternative scoring matrices are used.
+
+>>October 22, 1996
+
+Remove singularities from regression routine.
+
+-z 0 now means no statistics (same as -z -1).
+
+No longer show alignment for 0 score.
+
+>>October 26, 1996
+
+Fix problem with -b, -d when Z-values disabled.
+
+>>November 1, 1996
+
+Altschul-Gish statistical estimates (-z 3) now work properly.
+
+Fix problem with mean_var==0.0.
+
diff --git a/doc/readme.v30t7 b/doc/readme.v30t7
new file mode 100644
index 0000000..42682d1
--- /dev/null
+++ b/doc/readme.v30t7
@@ -0,0 +1,175 @@
+>> October 30, 1996
+
+A new program, sc_to_e, can be used to calculate expectation values
+from the regression coefficients reported from a search.  The
+expectation value is based on similarity score, sequence length, and
+database size.
+
+>> November 8, 1996
+
+fasta30t7 differs from fasta30t6 in the amount of information provided
+with the -m 10 option.
+
+(1) The query and library sequence identifiers are no longer abbreviated.
+
+(2) New information about the program and program version are provided:
+
+The new information provided is:
+
+	mp_name: program name (actually argv[0])
+	mp_ver: main program version (can be different from function version)
+	mp_argv: command line arguments (duplicates argv[0])
+
+    Some statistical information is provided as well:
+	mp_extrap: XXXX YYY - statistics extrapolated from XXX to YYY
+	mp_stats: indicates type of statistics used for E() value
+	mp_KS: Kolmogorov-Smirnoff statistic
+
+The "mp_" (main program) information is function independent, while the "pg_"
+information is produced by a particular comparison function (ssearch,
+fastx, fasta, etc).  "pg_" should probably be called "fn_", and "mp_"
+called "pg_", but I remain backwards compatible.
+
+(3) The end of the "parseable" records is denoted with:
+
+	>>><<<
+
+(4) There now an compile-time option -DM10_CONS, that allows you to
+display a final alignment summary:
+
+;al_cons:
+     .::.:-   .:: ..  :.    .:.---:   :  .--.:. : 
+..  .---  ..: :: ... :..: .::.:. .  .---.  .   .: 
+ : .  . . :    ..   .    :..: .--. . : .:. .. :  .
+ .:.:::  ..:. :
+
+or, if M10_CONS_L is defined (in addition to M10_CONS), the output is:
+;al_cons:
+     p==p=-mmmp==mpzmm=pmmmmz=p---=mmm=mmp--p=zm=m
+pzmmp---mmzp=m==mzzzm=zp=mz==z=pmzmmz---pmmpmmmp=m
+m=mzmmzmpm=mmmmppmmmpmmmm=pp=mp--pmpm=mp=pmzzm=mmp
+mp=z===mmpz=zm=
+
+where '=' indicates identical residues, '-' a gap in one or the other
+sequence, 'p' indicates a positive pam value, 'm' indicates a negative
+pam value, and 'z' indicates a zero pam value.
+
+A typical run now looks like:
+
+>>>gtm1_mouse.aa, 217 aa vs s library
+; mp_name: fasta3_t
+; mp_ver: version 3.0t7 November, 1996
+; mp_argv: fasta3_t -q -m 10 gtm1_mouse.aa s
+; pg_name: FASTA
+; pg_ver: 3.06 Sept, 1996
+; pg_matrix: BL50
+; pg_gap-pen: -12 -2
+; pg_ktup: 2
+; pg_optcut: 24
+; pg_cgap: 36
+; mp_extrap: 50000 51933
+; mp_stats: Expectation fit: rho(ln(x))= 5.8855+/-0.000527; mu= 1.5386+/- 0.029;  mean_var=73.0398+/-15.283
+; mp_KS: 0.0133 (N=29) at  42
+>>GTM1_MOUSE GLUTATHIONE S-TRANSFERASE GT8.7 (EC 2.5.1.18) (GST 1-1) (CLASS-MU).
+; fa_initn: 1490
+; fa_init1: 1490
+; fa_opt: 1490
+; fa_z-score: 1754.6
+; fa_expect:      0
+; sw_score: 1490
+; sw_ident: 1.000
+; sw_overlap: 217
+>GTM1_MOUSE ..
+; sq_len: 217
+; sq_type: p
+; al_start: 1
+; al_stop: 217
+; al_display_start: 1
+PMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKF
+KLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIVE
+NQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAGD
+KVTYVDFLAYDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSS
+RYIATPIFSKMAHWSNK
+>GTM1_MOUSE ..
+; sq_len: 217
+; sq_type: p
+; al_start: 1
+; al_stop: 217
+; al_display_start: 1
+PMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKF
+KLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIVE
+NQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAGD
+KVTYVDFLAYDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSS
+RYIATPIFSKMAHWSNK
+>>GTM1_RAT GLUTATHIONE S-TRANSFERASE YB1 (EC 2.5.1.18) (CHAIN 3) (CLASS-MU).
+; fa_initn: 1406
+; fa_init1: 1406
+; fa_opt: 1406
+; fa_z-score: 1656.3
+; fa_expect:      0
+; sw_score: 1406
+; sw_ident: 0.931
+; sw_overlap: 217
+>GTM1_MOUSE ..
+; sq_len: 217
+; sq_type: p
+; al_start: 1
+; al_stop: 217
+; al_display_start: 1
+PMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKF
+KLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIVE
+NQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAGD
+KVTYVDFLAYDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSS
+RYIATPIFSKMAHWSNK
+>GTM1_RAT ..
+; sq_len: 217
+; sq_type: p
+; al_start: 1
+; al_stop: 217
+; al_display_start: 1
+PMILGYWNVRGLTHPIRLLLEYTDSSYEEKRYAMGDAPDYDRSQWLNEKF
+KLGLDFPNLPYLIDGSRKITQSNAIMRYLARKHHLCGETEEERIRADIVE
+NQVMDNRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAGD
+KVTYVDFLAYDILDQYHIFEPKCLDAFPNLKDFLARFEGLKKISAYMKSS
+RYLSTPIFSKLAQWSNK
+;al_cons:
+:::::::::::::::::.:::::::::.::::.::::::.::::::::::
+::::::::::::::::.::::::::.::::::::: ::::::::::::::
+:::::.::::::::::::::::::::::::::::::::::::::::::::
+::::::::::::::::..::::::::::::.:::::::::::::::::::
+::..::::::.:.::::
+>>><<<
+
+
+217 residues in 1 query   sequences
+18531385 residues in 52205 library sequences
+ Tcomplib (4 proc)[version 3.0t7 November, 1996]
+ start: Fri Nov  8 18:20:26 1996 done: Fri Nov  8 18:20:41 1996
+ Scan time: 38.434 Display time:  2.166
+
+Function used was  FASTA 
+
+================================================================
+
+>> November 11, 1996
+
+ --> v30t71
+
+Made changes to complib.c, comp_thr.c, nxgetaa.c to allow scoring
+matrix to be modified in fastx3, fastx3_t.
+
+================================================================
+
+>> November 15, 1996
+
+ --> v30t72
+
+nxgetaa.c now accepts query sequences from "stdin" by using "-" as the
+input file name.  If DNA sequences are read in this mode, the "-n"
+option must be used.
+
+> November 23, 1996
+
+Included code in nxgetaa.c and Makefile.sgi to get around a bug in SGI's
+sscanf() that prevented compressed GCG databases from being read properly.
+
diff --git a/doc/readme.v31t0 b/doc/readme.v31t0
new file mode 100644
index 0000000..0018ded
--- /dev/null
+++ b/doc/readme.v31t0
@@ -0,0 +1,160 @@
+
+>>November 1, 1997
+
+ --> v31t0
+
+version 31t of the fasta program package uses a more modular
+structure for comparison functions.  In addition to modular functions
+to initialize, calculate and align sequences, v31 provides a modular
+function for creating the alignment display.  This was required for
+fasty and fastf, which have very different alignment strategies from
+the other search programs.
+
+>>February 13, 1998
+
+modified nascii[] so that 0, 1, 2 are no longer end of sequence
+characters.
+
+prss3 added.  Unlike prss, prss3 uses -d # to specify the number of
+shuffles.
+
+>>March 18, 1998
+
+First public release.  Corrected problems with dropfz.c (which is
+used in fasty3, tfasty3).  Makefile is well tested, but other Makefile's
+are not.  PVM versions not tested.
+
+>>March 19, 1998
+
+Problem with unthreaded tfastx3, tfasty3 caused by bug in complib.c
+fixed. All Makefiles (Makefile.alpha Makefile.sun, Makefile.sgi,
+Makefile.linux) have been tested and work properly. Threaded versions
+do not work on linux (yet).  Function labeling problems with fasty3,
+tfasty3 corrected.
+
+>>March 20, 1998
+
+ --> v31t02
+
+Fixed problem with inconsistent openlib() calls that broke BLAST databases
+on some platforms.
+
+>>March 27, 1998
+
+ --> v31t04
+
+Fixed a long standing problem with fastx/tfastx and fasty/tfasty that
+caused various memory allocation problems and core dumps.
+
+The PVM version works again, but cannot produce alignments.  The
+change in the location of the modular display functions will require
+significant changes in the pvm display functions.  For the moment,
+showalign() has been commented out.
+
+Code tested on Macintosh without changes.
+
+Added some additional information in the results file.
+
+
+Please report bugs to wrp at virginia.edu
+
+>>April 3, 1998
+
+Removed some debugging code in faatran.c now that fastx/fasty bugs
+seem corrected.
+
+  FASTA --> v3.14
+
+Corrected uninitialized array elements in dropnfa.c.
+
+>>April 10, 1998
+
+Added facility for specifying SRCH_URL (the URL string that will be
+used to re-search the database) and REF_RUL (the URL string that
+will be used to lookup the sequence) ini url_subs.c.  This allows perl
+scripts to provide different databases for re-searching dynamically.
+
+>>April 16, 1998
+
+ --> v31t05
+
+Corrected problem with ignoring ','s in databases (','s are found in
+PIR).
+
+>>April 18, 1998
+
+Corrected some problems with sequence names for Entrez lookups and
+re-searching databases.
+
+Made minor modifications to nxgetaa.c and compacc.c for compatibility
+with Borland 'C' compiler for Win32 systems.  Including makefile.tc
+fasta.rsp, prss.rsp, and test.bat for Borland 'C'/win32.
+
+>>April 24, 1998
+
+ --> v31t06
+
+Fixed another bug in fasty3/tfasty3 alignment routines.
+
+Added additional information to the do_url1() (url_subs.c) function.
+The re-search URL can now reference the start, stop, and length of the
+library sequence to be re-searched with.  For DNA library sequences,
+these values are always in nucleotides, even with tfasta/x/y.
+
+
+>>May 12, 1998
+
+(no version change as v31t06 was not released prior to this)
+
+Correct nxgetaa.c GETLIB to deal correctly with BLAST NR database
+sequences with exceptionally long title lines.
+
+Fix bug with long -O results files.
+
+>>May 18, 1998
+
+ --> v31t07
+
+Corrected some bugs in information string lengths (e.g. gstring1,
+stat_str), disabling statistics with -z 0, translation of 'X' by
+saatran() (faatran.c) that caused problems with FASTX.
+
+A serious bug has been fixed in the FASTX alignment routines.
+For some pathological sequences, % identity increases from < 10%
+to 40%. The version number of the main program has not changed,
+but the version number of the fastx function has changed to 3.2.
+
+>>June 19, 1998
+
+ --> v31t08
+
+Corrected some problems with alignments with -m 10.
+
+Added -Z db_size option to modify apparent database size for
+expectation value calculation (used only for protein/protein FASTA and
+SSEARCH, FASTX, FASTY, TFASTX, and TFASTY).
+
+>>July 1, 1998
+
+ (no version change)
+
+Corrected size of lbnames[], lb_size[] in structs.h to accomodate MAX_LF
+files.
+
+>>July 13, 1998
+
+ --> v31t09
+
+Corrected problem in nxgetaa.c encountered when reading long sequences
+(that must be split) in fasta format.
+
+Corrected problem in statistics calculation encountered with a small number
+of very long DNA sequences.
+
+>>July 17, 1998
+
+ (no version change, date change for ssearch3)
+
+Corrected default expectation cutoff (it was 10, now it is 2.0) for
+DNA with ssearch3.
+
diff --git a/doc/readme.v31t1 b/doc/readme.v31t1
new file mode 100644
index 0000000..dd9fc7d
--- /dev/null
+++ b/doc/readme.v31t1
@@ -0,0 +1,113 @@
+>>July 22, 1998
+
+ --> v31t10
+
+Corrected problem with histogram when unscaled statistics used (e.g. prss3).
+
+Corrected problems with prss3 shuffled sequence prompt.  Provided option
+to enter number of shuffles, window size, for prss3.  Number of shuffles
+for prss3 can be entered as an option (-d #) or as the third argument
+on the command line (prss3 query lib 1000).
+
+Modified nrand.c, nrand48.c to use time to set random number.
+
+Corrected problems reading GCG formatted files with prss3.
+
+Corrected various problems with pvcomp* programs, but they still do
+not produce alignments with version 3.1.
+
+Two new programs, fastf3(_t) and tfastf3(_t) are available.  These
+programs compare a set of mixed peptide sequences from an Edman
+sequencer to a protein (fastf3) or DNA (tfastf3) database, using 
+the database sequences to de-convolve the peptide mixture.
+
+See fastf3.1
+
+>>August 11, 1998
+
+(no version change)
+
+Modified initfa.c so that using '-n' on the fastx/fasty command line
+would not cause problems.
+
+Changed labeling of query sequence length for fastx/fasty from 'aa' to 'nt'.
+
+>>August 18, 1998
+
+(no version change)
+
+Modified complib.c, comp_thr.c scaleswn.c, to report E()-value for only
+one related sequence if -z 3 is used.
+
+>>August 23, 1998
+
+ -->v31t11
+
+Some serious problems with prss3 have been corrected:
+
+(1) use dropnsw.c rather than dropgsw.c for more accurate low scores
+
+(2) modify estimation program; use scaleswe.c rather than scaleswn.c.
+    scaleswe.c has some improvements for estimation by moments and can
+    use MLE as well as mu/var (-z 3).
+
+(3) add p() estimate.
+
+(4) correct bugs in nrand48, which caused bad sequences for llgetaa.c
+
+(5) -Z number works properly for prss3 and other programs (fixed histogram).
+
+(6) a new program, ssearch3e, is available that uses the same scaling
+    routines as prss3 (scaleswe.c). prss3 will save the random
+    sequences it generates when the -r file option is given; the
+    sequences are in file_rlib.  ssearch3e (or ssearch3 or fasta) can
+    then do a search on exactly the same sequences that were used by prss3.
+
+A bug reading GCG format compressed DNA databases was fixed.
+
+Fixed a bug that caused query sequence not to be displayed with -m 10.
+
+Simple optimization in dropnfa.c improves performance 10%.
+
+>>Sept. 1, 1998
+
+(no version change)
+
+Modified nxgetaa.c to recognize "ACGTX" as nucleotides.
+
+>>Sept. 7, 1998
+
+ --> v31t12
+
+Added -z 11 - 15, which use shuffled sequences, rather than real
+sequences to calculate statistical estimates.  Because a shuffled
+sequence score is calculated for each sequence score, the search
+process takes twice as long.  In this first version, codons are not
+preserved during shuffles, so tfasta/x/y shuffles may not be as
+informative as they should be.
+
+Also fix a problem with prss3 shuffles.
+
+>>Sept. 14, 1998
+
+ (no version change; previous version not released)
+
+Corrected bugs in tfastx3/tfasty3 caused by using the -3 option with
+or without -i.  With the bug fixes; "-3" and "-3 -i" work as expected;
+"-3" gives the forward three frames, while "-3 -i" gives the reverse
+three frames.
+
+In addition, tfasta3/tfasta3_t was upgraded to perform the same way
+that tfastx/y3 does - i.e. a search with "-i -3" searches only frames
+4,5, and 6, while "-3" searches only frames 1, 2, and 3.
+
+>>Sept. 29, 1998
+
+ --> v31t13
+
+Corrected bugs in dropfx.c that were corrected in fasta30 last May,
+but lingered in fasta31.  Also included code to ensure that tfastx/y
+alignments against long introns would not overrun the alignment
+buffer.  Instead of overrunning the buffer, the message: ***aligment
+truncated *** is displayed.
+
diff --git a/doc/readme.v32t0 b/doc/readme.v32t0
new file mode 100644
index 0000000..4535a89
--- /dev/null
+++ b/doc/readme.v32t0
@@ -0,0 +1,407 @@
+
+FASTX/Y and FASTA (DNA) are now half as fast, because the programs now
+search both the forward and reverse strands by default.
+
+The documentation in fasta3x.me/fasta3x.doc has been substantially
+revised.
+
+>>October 9, 1999
+ --> v32t08 (no version number change)
+
+Added "-M low-high" option, where low and high are inclusion limits
+for library sequences.  If a library sequence is shorter than "low" or
+longer than "high", it will not be considered in the search.  Thus,
+"-M 200-250" limits the database search to proteins between 200 and
+250 residues in length.  This should be particularly useful for fasts3
+and fastf3.  This limit applies only to protein sequences.
+
+Modified scaleswn.c to fall back to maximum likelihood estimates of
+lambda, K rather than mean/variance estimates. (This allows MLE
+estimation to be used instead of proc_hist_n when a limited range of
+scores is examined.)
+
+>>October 20, 1999
+(no version change)
+
+Modify nxgetaa.c/nmgetaa.c to recognize 'N' as a possible DNA character.
+
+>>October 9, 1999
+ --> v32t08 (no version number change)
+
+Added "-M low-high" option, where low and high are inclusion limits
+for library sequences.  If a library sequence is shorter than "low" or
+longer than "high", it will not be considered in the search.  Thus,
+"-M 200-250" limits the database search to proteins between 200 and
+250 residues in length.  This should be particularly useful for fasts3
+and fastf3.  -M -500 searches library sequences < 500; -M 200 -
+searches sequences > 200. This limit applies only to protein
+sequences.
+
+Modified scaleswn.c to fall back to maximum likelihood estimates of
+lambda, K rather than mean/variance estimates. (This allows MLE
+estimation to be used instead of proc_hist_n when a limited range of
+scores is examined.)
+
+>>October 2, 1999
+ --> v32t08
+
+Many changes:
+
+(1) memory mapped (mmap()ed) database reading - other database reading fixes
+(2) BLAST2 databases supported
+(3) true maximum likelihood estimates for Lambda, K
+(4) Misc. minor fixes
+
+(1) (Sept. 26 - Oct. 2, 1999) Memory mapped database access.
+It is now possible to use mmap()ed access to FASTA format databases,
+if the "map_db" program has been used to produce an ".xin" file.  If
+USE_MMAP is defined at compile time and a ".xin" file is present, the
+".xin" will be used to access sequences directly after the file is
+mmap()ed.  On my 4-processor Alpha, this can reduce elapsed time by
+50%. It is not quite as efficient as BLAST2 format, but it is close.
+
+Currently, memory mapping is supported for type 0 (FASTA), 5
+(PIR/GCG ascii), and 6 (GCG binary).  Memory mapping is used if a
+".xin" file is present. ".xin" files are created by the new program
+"map_db".  The syntax for "map_db" is:
+
+	map_db [-n] "/dir/database.fa"
+
+which creates the file /dir/database.fa.xin.  Library types can be
+included in the filename; thus:
+
+	map_db -n "/gcggenbank/gb_om.seq 6"
+
+would be used for a type 6 GCG binary file. 
+
+The ".xin" file must be updated each time the database file changes.
+map_db writes the size of the database file into the ".xin" file, so
+that if the database file changes, making the ".xin" offset
+information invalid, the ".xin" file is not used. "list_db" is
+provided to print out the offset information in the ".xin" file.
+
+(Oct 2, 1999) The memory mapping routines have been changed to
+allow several files to be memory mapped simultaneously. Indeed, once a
+database has been memory mapped, it will not be unmap()ed until the
+program finishes.  This fixes a problem under Digital Unix, and should
+make re-access to mmap()ed files (as when displaying high scores and
+alignments) much more efficient.  If no more memory is available for
+mmap()ing, the file will be read using conventional fread/fgets.
+
+(Oct 2, 1999) The names of the database reading functions has been
+changed to allow both Blast1.4 and Blast2.0 databases to be read.  In
+addition, Makefile.common now includes an option to link both
+ncbl_lib.o and ncbl2_lib.o, which provides support for both libraries.
+However, Blast1.4 support has not been tested.
+
+The Makefile structure has been improved.  Each architecture specific
+Makefile (Makefile.alpha, Makefile.linux, etc) now includes
+Makefile.common.  Thus, changes to the program structure should be
+correct for all platforms.  "map_db" and "list_db" are not made with
+"make all".
+
+The database reading functions in nxgetaa.c can now return a database
+length of 0, which indicates that no residues were read.  Previously,
+0-length sequences returned a length of 1, which were ignored.
+Complib.c and comp_thr.c have changed to accommodate this
+modification.  This change was made to ensure that each residue,
+including the last, of each sequence is read.
+
+Corrected bug in nxgetaa.c with FASTA format files with very long
+(>512 char) definition lines.
+
+(2) (September 20, 1999) BLAST2 format databases supported
+
+This release supports NCBI Blast2.0 format databases, using either
+conventional file reading or memory mapped files.  The Blast2.0 format
+can be read very efficiently, so there is only a modest improvement in
+performance with memory mapping.  The decision to use mmap()'ed files
+is made at compile time, by defining USE_MMAP.  My thanks to Eamonn
+O'Toole of DEC/Compaq, and Daryl Madura of Sun Microsystems, for
+providing mmap()'ed modifications to fasta3.  On my machines, Blast2.0
+format reduces search time by about 30%.  At the moment, ambiguous DNA
+sequences are not decoded properly.
+
+(3) (September 30, 1999) A new statistical estimation option is
+available.  -z 2 has been changed from ln()-scaling, which never
+should have been used, to scaling using Maximum Likelihood Estimates
+(MLEs) of Lambda and K.  The MLE estimation routines were written by
+Aaron Mackey, based on a discussion of MLE estimates of Lambda and K
+written by Sean Eddy.  The MLE estimation examines the middle 95% of
+scores, if there are fewer than 10000 sequences in the database;
+otherwise it excludes (censors) the top 250 scores and the bottom 250
+scores.  This approach seems to effectively prevent related sequences
+from contaminating the estimation process.  As with -z 1, -z 12 causes
+the program to generate a shuffled sequence score for each of the
+library sequences; in this case, no censoring is done.  If the
+estimation process is reliable, Lambda and K should not vary much with
+different queries or query lengths.  Lambda appears not to vary much
+with the comparison algorithm, although K does.
+
+(4) Minor changes include fixes to some of the alignment display routines,
+individual copies of the pstruct structure for each thread, and some
+changes to ensure that every last residue in a library is available
+for matching (sometime the last residue could be ignored).  This
+version has undergone extensive testing with high-throughput sequences
+to confirm that long sequences are read properly.  Problems with
+fastf3/fasts3 alignment display have also been addressed.
+
+>>August 26, 1999 (no version change - not released)
+
+Corrected problem in "apam.c" that prevented scoring matrices from
+being imported for [t]fasts3/[t]fastf3.
+
+>>August 17, 1999
+ --> v32t07
+
+Corrected problem with opt_cut initialization that only appeared
+with pvcomp* programs.
+
+Improved calculation of FASTA optcut threshold for DNA sequence
+comparison for match scores much less than +5 (e.g. +3).  The previous
+optcut theshold was too high when the match penalty was < 4 and
+ktup=6; it is now scaled more appropriately.
+
+Optcut thresholds have also been raised slightly for
+fastx/y3/tfastx/y3.  This should improve performance with minimal
+effects on sensitivity.
+
+>>July 29, 1999
+(no version change - date change)
+
+Corrected various uninitialized variables and buffer overruns
+detected.
+
+>>July 26, 1999 - new distribution
+(no version change - v32t06, previous version not released)
+
+Changed the location of "(reverse complement)" label in tfasta/x/y/s/f
+programs.
+
+Statistical calculations for tfasta/x/y in unthreaded version
+corrected.  Statistical estimates for threaded and unthreaded versions
+of the tfasta/x/y/s/f programs should be much more consistent.
+
+Substantial modifications in alignment coordinate calculation/
+presentation.  Minor error in fastx/y/tfastx/y end of alignment
+corrected.  Major problems with tfasta alignment coordinates
+corrected.  tfasta and tfastx/y coordinates should now be consistent.
+
+Corrected problem with -N 5000 in tfasta/x/y3(_t) searches encountered
+with long query sequences.
+
+Updated pthr_subs.c/Makefile.linux to increase the pthreads stacksize
+to try to avoid "cannot allocate diagonal arrays" error message.
+Pthreads stacksize can be changed with RedHat 6.0, but not RedHat 5.2,
+so Makefile.linux uses -DLINUX5 for RedHat5.* (no pthreads stack size).
+I am still getting this message, so it has not been completely
+successful.  Makefile.linux now uses -DALLOCN0 to avoid this problem,
+at some cost in speed.
+
+The pvcomp* programs have been updated to work properly with
+forward/reverse DNA searches.  See readme.pvm_3.2.
+
+>>July 7, 1999 - not released
+ --> v32t06
+
+Corrected bug in complib.c (fasta3, fastx3, etc) that caused core
+dumps with "-o" option.
+
+Corrected a subtle bug in fastx/y/tfastx/y alignment display.
+
+>>June 30, 1999 - new distribution
+(no version change)
+
+Corrected doinit.c to allow DNA substitution matrices with -s matrix
+option.
+
+Changed ".gbl" files to ".h" files.
+
+>>June 2 - 9, 1999 - new distribution
+(no version change)
+
+Added additional DNA lambda/K/H to alt_param.h.  Corrected some
+other problems with those table. for the case where (inf,inf)
+gap penalties were not included.
+
+Fixed complib.c/comp_thr.c error message to properly report filename
+when library file is not found.
+
+Included approximate Lambda/K/H for BL80 in alt_parms.h.
+BL80 scoring matrix changed from 1/3 bit to 1/2 bit units.
+
+Included some additional perl files for searchfa.cgi, searchnn.cgi
+in the distribution (my-cgi.pl, cgi-lib.pl).
+
+>>May 30, 1999, June 2, 1999 - new distribution
+(no version number change)
+
+Added Makefile.NetBSD, if !defined(__NetBSD__) for values.h.  Changed
+zs_to_E() and z_to_E() in scaleswn.c to correctly calculate E() value
+when only one sequence is compared and -z 3 is used.
+
+>>May 27, 1999
+(no version number change)
+
+Corrected bug in alignment numbering on the % identity line
+	27.4% identity in 234 aa (101-234:110-243)
+for reverse complements with offset coordinates (test.aa:101-250)
+
+>>May 23, 1999
+(no version number change)
+
+Correction to Makefile.linux (tgetaa.o : failed to -DTFAST). 
+
+>>May 19, 1999
+(no version number change)
+
+Minor changes to pvm_showalign.c to allow #define FIRSTNODE 1.
+Changes to showsum.c to change off-end reporting.  (Neither of these
+changes is likely to affect anyone outside my research group.)
+
+>>May 12, 1999
+ --> v32t05
+
+Fixed a serious bug in the fastx3/tfastx3 alignment display which
+caused t/fastx3 to produce incorrect alignments (and incorrectly low
+percent identities).  The scores were correct, but the alignment
+percent identities were too low and the alignments were wrong.
+
+Numbering errors were also corrected in fastx3/tfastx3 and
+fasty3/tfasty3 and when partial query sequences were used.
+
+>>May 7, 1999
+
+Fixed a subtle bug in dropgsw.c that caused do_work() to calculate
+incorrect Smith-Waterman scores after do_walign() had been called.
+This affected only pvcompsw searches with the "-m 9" option.
+
+>>May 5, 1999
+
+Modified showalign.c to provide improved alignment information that
+includes explicitly the boundaries of the alignment.  Default
+alignments now say:
+
+Smith-Waterman score: 175;  24.645% identity in 211 aa overlap (5:207-7:207)
+
+>>May 3, 1999
+
+Modified nxgetaa.c, showsum.c, showbest.c, manshowun.c to allow a
+"not" superfamily annotation for the query sequence only.  The
+goal is to be able to specify that certain superfamily numbers be
+ignored in some of the search summaries.  Thus, a description line
+of the form:
+
+>GT8.7 | 40001 ! 90043 | transl. of pa875.con, 19 to 675
+
+says that GT8.7 belongs to superfamily 40001, but any library
+sequences with superfamily number 90043 should be ignored in any
+listing or summary of best scores.
+
+In addition, it is now possible to make a fasta3r/prcompfa, which is
+the converse of fasta3u/pucompfa. fasta3u reports the highest scoring
+unrelated sequences in a search using the superfamily annotation.
+fasta3r shows only the scores of related sequences.  This might be
+used in combination with the -F e_val option to show the scores
+obtained by the most distantly related members of a family.
+
+>>April 25, 1999
+
+ -->v32t04 (not distributed)
+
+Modified nxgetaa.c to remove the dependence of tgetaa.o on TFASTA
+(necessary for a more rational Makefile structure).  No code changes.
+
+>>April 19, 1999
+
+Fixed a bug in showalign.c that displayed incorrect alignment coordinates.
+(no version number change).
+
+>>April 17, 1999
+
+ --> v32t03
+
+A serious bug in DNA alignments when the sequence has been broken into
+multiple segments that was introduced in version fasta32 has been
+fixed.  In addition, several minor problems with -z 3 statistics on
+DNA sequences were fixed.
+
+Added -m 9 option, which unfortunately does different things in
+pvcompfa/sw and fasta3/ssearch3.  In both programs, -m 9 provides the
+id's of the two sequences, length, E(), %_ident, and start and end of
+the alignment in both sequences.  pvcompfa/sw provides this
+information with the list of high scoring sequences.  fasta3/ssearch3
+provides the information in lieu of an alignment.
+
+>>March 18, 1999
+
+ --> v32t02
+
+Added information on the algorithm/parameter description line to
+report the range of the pam matrices.  Useful for matrices like
+MD_10, _20, and _40 which require much higher gap penalties.
+
+>>March 13, 1999 (not distributed)
+
+ --> v32t01 
+
+ -r results.file  has been changed to -R results.file to accomodate
+ DNA match/mismatch penalties of the form: -r "+1/-3".
+
+>>February 10, 1999
+
+Modify functions in scalesw*.c to prevent underflow after exp() on
+Alpha Linux machines.  The Alpha/LINUX gcc compiler is buggy and
+doesn't behave properly with "denormalized" numbers, so "gcc -g -m
+ieee" is recommended.
+
+Add "Display alignments also (y/n)[n] "
+
+pvcomplib.c again provides alignments!!  In addition, there is a
+new "-m 9" option, which reports alignments as:
+
+>>>/home/wrp/slib/hlibs/hum0.aa#5>HS5 gi:1280326 T-cell receptor beta chain 30 aa, 30 aa vs /home/wrp/slib/hlibs/hum0.seg library
+HS5         	  30	HS5         	  30	1.873e-11	1.000	  30	   1	  30	   1	  30
+HS5         	  30	HS2249      	  40	1.061e-07	0.774	  31	   1	  30	   7	  37
+HS5         	  30	HS2221      	  38	1.207e-07	0.833	  30	   1	  30	   7	  35
+HS5         	  30	HS2283      	  40	1.455e-07	0.774	  31	   1	  30	   7	  37
+HS5         	  30	HS2239      	  38	1.939e-07	0.800	  30	   1	  30	   7	  35
+
+where the columns are:
+
+query-name      q-len   lib-name      lib-len   E()             %id    align-len  q-start q-end   l-start l-end
+
+>>February 9, 1999
+
+Corrected bug in showalign.c that offset reverse complement alignments
+by one.
+
+>>Febrary 2, 1999
+
+Changed the formatting slightly in showbest.c to have columns line up better.
+
+>>January 11, 1999
+
+Corrected some bugs introduced into fastf3(_t) in the previous version.
+
+>>December 28, 1998
+
+Corrected various problems in dropfz.c affecting alignment scores
+and coordinates.
+
+Introduced a new program, fasts3(_t), for searching with peptide
+sequences.
+
+>>November 11, 1998
+
+  --> v32t0
+
+Added code to correct problems with coordinate number in long library
+sequences with tfastx/tfasty.  With this release, sequences should be
+numbered properly, and sequence numbers count down with reverse
+complement library sequences.
+
+In addition, with this release, fastx/y and tfastx/y translated
+protein alignments are numbered as nucleotides (increasing by 3,
+labels every 30 nucleotides) rather than codons.
+
diff --git a/doc/readme.v33t0 b/doc/readme.v33t0
new file mode 100644
index 0000000..013453b
--- /dev/null
+++ b/doc/readme.v33t0
@@ -0,0 +1,1268 @@
+
+  $Id: readme.v33t0 342 2010-06-28 19:57:56Z wrp $
+  $Revision: $
+
+================ readme.v33t0 ================
+
+This release includes an MPI implementation of the parallel
+library-vs-library comparison code.  See readme.mpi_3.3 and
+readme.pvm_3.3 for more information.
+
+=====
+>>July 9, 2001
+
+Considerable changes to support no-global library functions. 
+
+(1) Separate ascii/sequence mapping arrays are used by the
+    query-reading (qascii), library-reading (lascii), and sequence
+    comparison function (pascii) routines.  As a result, there is no
+    longer a need for tgetlib.o/lgetlib.o - lgetlib.o can serve both
+    functions.
+
+(2) This also allows us to remove all #ifdef TFAST/FASTX conditionals
+    from complib.c/comp_thr.c/p2_complib.c.  We no longer need
+    tcomp_thr.o, comp_thrx.o, etc.  We still have a variety of
+    p2_complib.o variations to support the different c34.work* files.
+
+(3) Because non-global openlib/getlib functions are available, exactly
+    the same open/get functions are available for reading both the
+    query and reference libraries in pv34comp* programs.  The
+    host-specific openlib/getlib functions in hxgetaa.c are now
+    provided by nmgetlib.c, etc. This has two effect:
+
+    (a) it is now possible to compare a query database generated by an
+        SQL query to a library database generated by a different SQL
+        query.
+
+    (b) pv34comp* has lost (at least in this version) the ability to
+        automatically detect the query sequence type. To search with a
+        DNA query, you MUST use "-n".
+
+(4) the resetp() function is now responsible for almost all of the
+    function sepcific (TFAST/FASTX/etc) initializations.  All of the
+    function specific code has been removed from complib.c/comp_thr.c
+    and most of it has been moved to initfa.c/resetp().
+
+(5) manageacc.c has been merged into compacc.c (mostly prhist()).
+
+(6) Although it may reflect a subtle bug in my code, it is not
+    possible to reliably run threaded/memory mapped versions of the
+    fasta34_t code.  I have spent considerable time tracking down the
+    problem, and have determined that, in threaded code, something
+    happens during the thread initialization to corrupt the
+    description offset information used when files are memory mapped.
+    This never occurs when the unthreaded versions of the code are
+    used.  And it does not occur under MacOSX, Compaq Tru64Unix, Sun
+    Solaris/Sparc, or SGI IRIX.
+
+    Thus, I cannot recommend using the threaded code versions (_t)
+    under Linux (RH6.2 or 7.1).
+
+=====
+>>June 1, 2001
+
+Many changes to accomodate a new - no global variable - strategy for
+reading sequence databases.  Every time a file is opened, a struct
+lmf_str is allocated which can be used for memory mapped files, ncbl2,
+files, and mysql files.
+
+In addition, an open'ed file has a default sequence type: DNA or
+protein, or one can open a file in a mode that will allow the sequence
+type to be changed.
+
+=====
+>>May 18, 2001		CVS: fa33t09d0
+
+A new compile time parameter - -DGAP_OPEN, is available to change the
+definition of the "-f gap-open" parameter from the penalty for the
+first residue in a gap to a true gap-open penalty, as is used in BLAST
+and many other comparison algorithms.  This will probably become the
+default for fasta in version 3.4.
+
+Fixes to conflicts between "-S" and "-s matrix".  When a scoring
+matrix file was specified, lower-case alignments were not displayed
+with -S (although the scores were calculated properly).
+
+More extensive testting of mysql_lib.c (mySQL query-libraries) with
+the pv4comp* and mp4comp* programs.
+
+=====
+>>April 5, 2001		CVS: fa33t08d4b3
+
+Changes in nmgetlib.c and ncbl2_mlib.c to return long sequence
+descriptions for PCOMPLIB (pv4/mp3comp*).  Also fix p2_complib.c to
+request DNA library for translated comparisons.
+
+Fix for prss33(_t) to read both sequences from stdin.
+
+=====
+>>March 27, 2001	CVS: fa33t08d4
+ --> fa33t08d4
+
+Problems in ncbl2_mlib.c found searching NCBI non-redundant nucleotide
+database "nt" were fixed.  Testing revealed a minor memory leak, which
+was fixed by modifying showbest.c, showalign.c, comp_thr.c, complib.c,
+and p2_complib.c to remember the last opened database file more
+effectively.
+
+Modifications to allow 64-bit fseek/ftell on machines like Sun,
+Linux/Intel, that support -D_FILE_OFFSET_BITS=64, -D_LARGE_FILE_SOURCE
+off_t, and fseeko(), ftello() with the option -DUSE_FSEEKO.  Machines
+with 64-bit long's do not need this option.  Machines with 32-bit
+longs that allow files >2 Gb can do so with 64-bit file access
+functions, including fseeko() and ftello(), which work with off_t file
+offsets instead of long's.
+
+=====
+>>March 3, 2001		CVS: fa33t08d2
+
+Corrected problems in nmgetaa.c and mysql_lib.c with parallel
+programs, and one serious problem with alternate DNA scoring matrices
+(initfa.c, initsw.c) not being set properly.  A subtle problem with
+the merge of scaleswn.c and scaleswg.c is fixed.
+
+>>February 17, 2001
+
+Modified mysql_lib.c to use "#", rather than "%ld", to indicate the
+position of the GID.  This change was made because sprintf() cannot be
+used reliably to generate an SQL string, as '"' and '%' are used in 
+such strings.
+
+=====
+>>January 17, 2001
+(no version change, date change)
+
+Minro fixes to initfa.c, initsw.c to deal with DNA scoring matrices
+properly. "-n -s dna.mat" is required for the sequence/matrix to be
+recognized as DNA.
+
+>>January 16, 2001
+-->v34t00
+
+Merge of the main CVS trunk - fa33t06 with the latest release branch,
+fa33t08.
+
+In addition, PCOMPLIB mods have been made to mysql_lib.c.  Because
+p2_complib.c gets sequence description information during the first
+read of the database, the mysql_query must be changed to return:
+result[0]=GID, result[1]=description, result[2]=sequence.  In the
+PCOMPLIB case, the other SQL queries (for GID description, sequence)
+are not necessary but must still be provided.
+
+=====
+>>January 16, 2001
+(no version change, previous version not released)
+
+changes to p2_complib.c to correct openlib() incompatibility.
+
+changes to nmgetaa.c, ncbl2_lib.c to incorporate PCOMPLIB.  nxgetaa.c
+removed.
+
+=====
+>>January 12, 2001
+(no version change, previous version not released)
+
+Change to initfa.c to move ktup check from query_parm() to last_init().
+
+=====
+>>January 10, 2001
+--> v33t08
+
+Fixes to complib.c, comp_thr.c to deal properly with long query
+protein sequences when a short library chunk (e.g. -N 5000) was given.
+In the case where the chunk size is too short, it will be reset to a
+length which allows the search to proceed, by including an amount of
+new sequence that is equal to the amount of overlap sequence.
+
+scaleswn.c and scaleswg.c have been merged.
+
+v33t08 includes the initial implementation for mySQL described below
+for v33t07x.
+
+======
+>>Dec. 20, 2000
+--> v33t07x
+
+Initial implementation of a syntax for mySQL database queries.  A new
+file, mysql_lib.c has been added, and changes have been made to
+nmgetaa.c (which should now replace nxgetaa.c) and altlib.h.  A mySQL
+database search needs a file with 4 parts:
+
+(1) description of the database, user, password
+(2) a select statement that generates the set of protein sequences
+    as: UID, sequence
+(3) a select statement that generates a UID, description given a UID
+(4) a select statement that generats a single UID, sequence given a UID
+    	
+Each of the four parts should be separated by ';'.  For example, in
+the database that we are using for testing, a file "demo.sql" that
+contains:
+
+================
+localhost taxonomy username secret;
+SELECT proteins.gid, proteins.sequence FROM proteins,swissprot WHERE proteins.gid=swissprot.gid AND swissprot.spid IS NOT NULL;
+select proteins.gid, concat(swissprot.spid," ",proteins.description) from proteins,swissprot where proteins.gid=%ld AND swissprot.gid=proteins.gid;
+select gid, sequence from proteins where gid=%ld;
+================
+
+will find all the proteins in the BLAST "nr" database that also have
+SwissProt ID's when given the command line:
+
+	fasta33 -q query.aa "demo.sql 16"
+
+At least for simple queries, there is surprisingly little overhead for the
+search.  For more complex queries involving several tables, the overhead
+can be significant.
+
+At the moment, libraries that need the functions in mysql_lib.c will
+use library type 16.  We may also use file type 17 for SQL queries
+that return binary sequences.
+
+This implementation of mysql_lib.c was written to require a minimal
+amount of change to the other programs.  Only nmgetaa.c and altlib.h
+needed to be changed to incorporate this new capability.  One result
+of this limitation is that one cannot mix mySQL databases queries with
+other databases in the same search.  Eventually, I would like to make
+a mySQL database like any other, so that several mysql database
+queries could be searched in the same run, and mysql databases could
+be mixed with other (flat file) databases, but this will require some
+changes in the function calls throughout the code.  (Right now, the
+various programs do not distinguish between an openlib() that is made
+before searching a large database, and one before retrieving a single
+sequence.  This must be changed for a database query like mySQL to
+behave like other databases.
+
+Several mySQL demo files have been provided: mysql_demo*.sql.
+
+(10 January 2001) The mySQL code has been tested on Intel Linux and
+Compaq/Alpha/Tru64 Unix.
+
+>>Dec. 9, 2000
+
+Changes to apam.c that to tie different default gap penalties to
+alternate scoring matrices.  In addition, changes to apam.c, to deal
+with user-specified matrices with or without '*'.
+
+>>Nov. 5, 2000 (date updated)
+
+pst.dnaseq can now have 3 values, -1, or 0-> protein, 1->DNA, and 2->other.
+This becomes important for thing like init_karlin_a, which needs a
+background frequency of residues.
+
+>>Nov. 1, 2000
+
+Significant bug fixes for the -z 6/-z 16 option.  An ininitialized
+variable was fixed in karlin.c, and comp_thr.c did not pass the
+correct composition argument type in find_zp().  The -z 6/16 option
+has now been tested and works correctly on Alphas, Linux x86, SGI, Sun
+and Mac OSX. Another problem was fixed in scaleswn.c (simplex()) that
+prevented the code from being reused by the pv4/mp4 complib programs.
+
+>>Oct. 9, 2000
+
+Several changes made to accomodate Mac OSX.  Longer lists of superfamily
+numbers now supported in p[su]4comp/m[su]4comp programs.
+
+>>Sept 25, 2000
+
+All global variables have been removed from scaleswn.c. The last to
+go, db_struct db, required many edits, because until now, the fasta
+programs have kept two versions of the db_struct data (entries,
+length). One version was kept by the main program, which updated entry
+number and db length as sequences were read; a second copy of this
+information was kept by the statistical estimation routines.  Now
+there is only one copy, which means that the E() values will be a
+function of the complete database, not the database with some high
+scoring sequences removed.
+
+>>Sept 23, 2000
+
+Continued removal of global variables from scaleswn.c.  Only one
+global is left, db_struct db, which contains the number of entries in
+the database and the number of residues.  It will be the next to go
+(changing all the zs_to_*() functions) and scaleswn. will be free
+of globals.  scaleswg.c is gone - scaleswn.c compiles to scaleswg.c
+with -DNORMAL_DIST.
+
+>>Sept 20, 2000
+
+Removal of histogram globals required changes in p2_complib.c as well.
+p_complib.c has not been updated.  scaleswg.c has been modified to
+reflect the new histogram strategy.
+
+>>Sept 19, 2000
+
+Substantial changes to remove globals for printing histogram.  m_msg
+now contains a hist_str, which keeps histogram information.
+
+>>Sept. 19, 2000
+(no version change, previous version not released)
+
+Correct bug introduced into scaleswn.c (inithist()) by changing
+score2_sums[], score_sums[] from int to double.
+
+Reporting of version numbers is more consistent between fasta33,
+fasta33_t, and pv4compfa/mp4compfa.  The programs now report the same
+numbers/dates in similar places.
+
+>>Sept. 15, 2000
+--> v33t07
+
+Changes to fix problems with statistical estimates when a large
+fraction (but not all) of the database is related.  Several users
+reported problems when searching with rRNA genes with version 33t06.
+In some cases, a 100% identitical match over 1500 nt would not be
+statistically significant against a search of the bacterial division
+of Genbank.  This problem was not seen with some releases of v33t05.
+
+The cause of the problem was a change between v33t05 and v33t06 to
+allow scoring matrices with unusual scaling to be used.  In v33t05,
+there was a line that excluded all scores > 300 from the statistical
+estimation procedure.  While 300 is a high score with any "normal"
+scoring matrix, some investigators were using matrices scaled 10X, so
+that a score of 300 was really a score of 30 with a conventional
+matrix, and should not be excluded.  Unfortunately, removing the test
+to exclude scores > 300 meant that when a rRNA sequence was used to
+search the bacterial division, tens of thousands of high scoring
+related sequences were treated as if they were unrelated, with the
+result that the variance estimates were much too high, and thus high
+real scores had low z-scores, and thus were not statistically
+significant.  (There appear to be more than 20,000 rRNA sequences in
+the bacterial division of Genbank, almost 25% of all sequences).
+
+The solution to the problem is a substantial enhancement in the
+strategies used to exclude high-scoring, related sequences, the -z 1,
+4, and 5 parameter estimation strategies.  The programs now estimate
+the expected high scoring sequence by calculating an ungapped Lambda
+and K, and then use a relatively conservative threshold for excluding
+scores that are higher than would be expected 0.01 times by chance.
+By calculating Lambda and K, we can scale the cutoff thresholds to
+allow scoring matrices with unusual scales.  For "normal" searches,
+there should be little change, but there should be an improvement for
+searches with large numbers of related sequences in the database.
+
+As a result of testing for this change, a bug in the karlin() function
+used with -z 6 was found and corrected.
+
+=======
+>>Sept. 9, 2000
+
+Changes to manshowbest.c to include correct display coordinates.
+
+Significant changes to structs.h, param.h, p2_complib.c,
+p2_workcomp.c, to store and use a reliable a_struct for alignment
+coordinates.
+
+Other cosmetic changes.
+
+>>Sept. 7, 2000
+
+Minor changes to complib.c, showrss.c, so that prss33 -q uses 200
+shuffles and prss33 provides bit scores, rather than z-scores.
+(no version number change).
+
+Modifications to p2_complib.c to include superfamily numbers for
+ps4comp* ms4comp*.
+
+>>Aug 22, 2000
+
+Changes to mmgetaa.c, ncbl2_mlib.c, dropfs.c to accomodate AIX.
+00README.1st updated to reflect the current version and correct
+outdated information on threads.
+
+>>Aug. 3, 2000
+
+Modifications to initpam2() in initsw.c to correct a problem with pam_x
+when the -S option is used.
+
+Modifications to compacc.c, scaleswn.c to ensure that residue numbers
+are calculated properly when more than 2 Gb of sequence is searched.
+
+>>July 12, 2000
+
+Modifications to dropnfa.c so that DNA matches to 'N' will be included
+in the "ungapped %identity".  Thus, a sequence that is 100% identical
+for 100 nt on either side of a 100 nt region that has been masked to
+'NNNNN' will be reported as: "67% identical (100% ungapped)".  This
+has been added to deal with masked BAC-end databases.  It would be
+better if masking changed the letters to lowercase, but the mouse
+BAC-end sequences at TIGR use 'NNNNN'.  This is currently available
+only for the fasta function, not [t]fast[x/y], etc, and only for DNA
+sequences.
+
+mk_n_pam() in apam.c modified to ensure that mismatch scores of -1
+remain -1.
+
+>>June 25, 2000
+
+Modification to nxgetaa.c, nmgetaa.c, mmgetaa.c to return Genbank Accession
+number as part of the descriptive string.
+
+>>June 11, 2000
+
+(no version change - not yet released)
+
+Modifications to calcons(), calc_id(), showbest(), p_workcomp.c to
+provide ngap_q (number of alignment gaps in query) , ngap_l (number
+of gaps in library) information for -m 9 output.
+
+>>June 6, 2000
+
+(no version change - not yet released)
+
+Modified scaleswn.c to provide better support for unconventional
+scoring scoring matrices, in particular, scoring matrices where every
+value is 50-times higher.  Previous versions of the MLE estimator (-z
+2) started with lambda = 0.2, which is too high for a scoring matrix
+going from -500:+1500. The initial estimate for lambda is now
+calculated using the formula: lambda = pi/sqrt(6*variance).  For the
+default -z 1, a restriction to limit scores to a maximum of 300 for
+the statistical analysis was removed.
+
+>>June 3, 2000
+
+Modified aligment output, and -m 9 and -m10, to report an "ungapped"
+identity as well as the traditional "gapped" identity.  The
+traditional "gapped" identity reports the number of identities divided
+by the overall length of the alignment, including gaps.  The
+"ungapped" identity does not include gaps in the length of the
+alignment.  This new value is included for alignments that include
+introns; thus, a tfastx33 search might find the 100% identical genomic
+sequence but report the gapped percent identity if a short intron were
+included in the alignment (the alignment probably would not span a
+long exon) as 66%.  The "ungapped" identity would remain 100%.  The
+ungapped identity value is also shown in the "-m 9" output line after
+the "gapped" fraction identical.
+
+>>June 1, 2000
+
+Modified -m 9 output to provide fraction identical, alignment boundary
+information with the initial list of high scoring sequences, just as
+the pv3comp and mp_comp versions do.  The -m 9 option now shows the
+same alignment display as -m 0, but the width of the alignment is
+increased by 40.  Thus, by default, -m 9 will show the list of best
+hits, with percent identity, Smith-Waterman score, and alignment
+boundaries initially, and then show alignments standard (-m 0)
+alignments with 100 residues/line.
+
+>>May 29, 2000
+
+Correct some problems with reading data files with <CR>'s under unix.
+
+nmgetaa.c/nxgetaa.c/mmgetaa.c have been modified to convert <TAB>
+('\t') to <SPC> (' ') in descriptive lines.
+
+=======
+
+>>May 3, 2000
+
+  Corrected problem with very low mean_var in fit_llen() in scaleswn.c.
+
+>>May 2, 2000
+  (no version number change - previous version not released)
+
+  Merged fasta33t05d2 with fasta33t06.  Also removed restriction on
+"-M size-range" to proteins - the size range now can be applied to DNA
+as well.
+
+>>May 1, 2000
+ (changes to v33t05d merged into v33t06) 
+
+Introduced changes to include '*' as a valid sequence character, which
+indicates termination.  Thus, 'TGA', 'TAG', and 'TAA' are now
+tranlated to '*' rather than 'X', and the protein PAM matrices have
+been modified to provide a match score of approximately 1/2 the max
+identity score for a '*:*' match.  Otherise, '*' is the same as 'X'.
+This change only affects query sequences that include a '*' to
+indicate an end of sequence, the '*' is not there by default.
+
+The inclusion of '*' broke some things in tfasts33, tfastf33, fasty33,
+and tfasty33, which were fixed today.
+
+>>March 28, 2000/April 24, 2000
+ --> v33t06
+
+(a) -z 6 statistics that factor in composition
+(b) -smatrix-offset pam-offset parameter
+
+(a) This release provides a new statistics option, -z 6, which
+provides a more sophisticated model that accounts for sequence
+composition.  When -z 6 is used (only for fasta33(_t) and
+ssearch33(_t)), the program calculates a composition parameter
+comp=1/lambda using a modified version of the Karlin-Altschul karlin()
+function.  As a result, every sequence in the database has an
+associated length (n1) and composition (comp).
+
+The length n1 and composition comp are used in the maximum likelihood
+estimation described by Mott (1992) Bull. Math. Biol. 54:59-75.  Four
+parameters are estimated, a0, a1, a2, and b1, and the probability of
+obtaining a score is then:
+
+p(s >= x) = 1-exp(-exp(-( a0 + a1*comp + a2*comp*log(n0*n1) + x)/(b1*comp)))
+
+The maximum likelihood estimates of a0, a1, a2, and b1 are calculated
+using the Nelder-Mead simplex search strategy.
+
+The average Lambda is reported for the search using Lambda =
+1/(b1*ave_comp).  Where ave_comp is the geometric mean of the comp values
+calculated during the statistical estimates.
+
+The "lambda/comp" calculation can fail for sequences with very biased
+amino acid composition.  When this occurs, 'comp' is set to -1.0 (as
+is 'H', the information content parameter) and the 'ave_comp' value is
+used to calculate statistical significance.  (But obviously 'ave_comp'
+is not really appropriate, since if the sequence had an average 'comp'
+value, it would have been calculated.)  When -z 6 is used, the
+alignment display shows the 'comp' and 'H' values for that library
+sequence.
+
+(b) Scoring matrix offsets - The main reason that the "lamdba/comp"
+calculation fails is that, for the particular query/library sequence
+pair, the expected score is not < 0, instead, Sum {p_ij S_ij} >= 0.0.
+This problem is reported to 'stderr' when it occurs.  The simplest
+solution to the problem is to provide an offset to the scoring matrix;
+for example, to use Blosum62 - 1, which ranges from +10 to -5, rather
+than the standard +11 to -4.  This option used to be available with
+the -S offset option, but -S is now used to specify a lower-case
+seg-ed database.  The offset can now be specified as part of the
+scoring matrix name.  Thus, "-s BL62-1" uses Blosum62 reduced by 1 at
+each entry.  The '-' character is used to indicate an offset, so
+scoring matrix files must not have a '-' in their name.
+Alternatively, "-s BL80+1" or "-s BL80--1" would add one to each value.
+
+nxgetaa.c, nmgetaa.c, and mmgetaa.c have been edited to avoid string
+run-off problems after strncpy().
+
+Fixed problem where positive gap extension penalties in ssearch33
+were not converted to negative values.
+
+>>April 8, 2000
+
+Fixed problem in calculating corrected sequence lengths for
+Altschul-Gish probabilities.
+
+>>March 30, 2000
+  (no version change, date updated to March 30, 2000)
+
+Corrected problem with -m 9 option.
+
+The '*' character is now available to allow translated alignments to
+extend through the termination codon. Thus, if a protein sequence ends
+with a '*', and matches in to a translated termination codon, the
+score will be increased.  The *:* match score is set to 1/2 the max
+positive score for the matrix (see upam.h).  This strategy can also be
+used to upweight a match that extends all the way to the end of a
+full-length sequence by putting '*' at the end of both the query and
+library protein sequences.  Recognition of '*' will probably become a
+command line option.
+
+>>March 21, 2000
+  (no version change, previous version not distributed)
+
+Changes to map_db.c, list_db.c, and mmgetaa.c to accomodate large
+sequence files.  Long (64-bit on some systems) variables are now used
+to specify file and memory position for the memory mapped functions.
+As a result, there are now two *.xin (memory mapped index) file
+formats: MP0, which uses 32-bit longs, and MP1, which uses 64-bit
+longs. On 64-bit machines, MP0 32-bit indices are read properly, but
+limit the database size to 2 or 4 Gb; MP1 64-bit indices allow very
+large databases.  Blast2.0 formatdb databases are still limited to
+4Gb.  To compile map_db.c to generate 64-bit index files, include the
+compile time option -DBIG_LIB64 in the Makefile.  (Currently this
+option has been tested only on the DEC Alpha and SGI platforms, and
+will work only with Unix versions that provide 64-bit longs and 64-bit
+ftell()'s.)
+
+The -R results file now uses sfn_cmp() to report a matching
+superfamily number, if one exists, and '0' otherwise.
+
+>>March 12, 2000
+  (no version change, previous version not distributed)
+
+Provide new strategy for specifying library abbreviations.  In
+addition to:
+
+	fasta33 query.aa %anr
+
+one can also specify:
+
+	fasta33 query.aa %pir1+sp+nr
+or
+	fasta33 query.aa +pir1+sp+nr
+or 
+	fasta33 query.aa %+pir1+sp+nr
+
+where the + anywhere in the library name string indicates that
+variable length library names, separated by '+', are being used (the
+last '+' is optional).  The FASTLIBS file then becomes:
+
+================
+PIR1 Annotated Protein Database (rel 56)$0+pir1+/slib2/blast/pir1.lseg
+NBRF Protein database (complete)$0+nbrf+@/seqlib/lib/NBRF.nam
+NRL_3d structure database$0D/seqlib/lib/nrl_3d.seq 5
+NCBI/Blast non-redundant proteins$0+nr+/slib2/blast/nr.lseg
+NCBI/Blast Swissprot$0+sp+/slib2/blast/swissprot.lseg
+================
+
+The two abbreviation types, single letter and +word+, cannot be
+intermixed, and at least initially, +word+ specifiers are
+case-sensitive (single letter abbreviations are not) and will not be
+available interactively, only on the command line.
+
+Removed 'K' estimate for Expectation_n, Expectation_i fits to the
+distribution of unrelated similarity scores.  'K' cannot be calculated
+from the data available.  'Lamdba' can be calculated, it is
+1.28255/sqrt(mean_var), and is still available.
+
+>>March 3, 2000 
+ (no version change)
+
+changed Makefile33.common, Makefile.common, to incorporate $(NRAND)
+rather than "rand48".  Provide nrandom.c which uses random(), as
+replacement for nrand.c, which uses rand48().
+
+>>February 8, 2000
+  --> v33t05
+
+Fixes to scaleswn.c (proc_hist_ml) to set num_db_entries properly.
+Scaleswn.c also provides Lambda estimates for -z 1/11 (Expectation_n),
+and -z 1/14 (Expectation_i) statistical estimates.
+
+Modifications to calc_id() to correct bug in counting identities.
+Modified showalign() to use calc_id() with -m 9, for simpler
+debugging.
+
+Additional modifications to dropfa*.c files to deal properly with 'n's
+and 'x's.
+
+Added new option: -x #, which allows one to override the penalty for a
+match against 'x' (or 'N') provided by the scoring matrix.  This
+option is particularly useful in fast[x/y] searches, where out of
+frame low complexity regions can generate high scores.
+
+The old function of '-x' - to specify an alternate coordinate system,
+is now available as '-X # #'.
+
+Updated scaleswn.c to provide window shuffle information for -z 12.
+
+Updated compacc.c, workacc.c, to fix serious bug in wshuffle()
+that destroyed aa1[n1]=0.
+
+>>January 25, 2000
+  --> v33t04
+
+  A serious bug in all of the fasta related programs has been
+corrected.  The new code in fasta33 which ignores certain residues
+failed to initialize one of the arrays properly.  As a result, in
+pathological situations, a very strong match could be missed.
+
+  Corrected minor bug in initsw.c that cause misplaced "ktup" command
+line argument, which should be ingnored by ssearch, to be read as -d
+ktup.
+
+  Improved error message for 0 length query sequence.
+
+>>January 17, 2000
+  --> no external version number change
+
+Modified mmgetaa.c, map_db.c, and nmgetaa.c to provide memory mapping
+of genbank flatfile (format=1) files.  This format could be read much
+more efficiently, however.
+
+>>January 12, 2000
+  --> no external version number change
+
+Changed the behavior of the options that set the number of high scores
+(-b) and alignments (-d) that are displayed.  Previously, fasta33 -E
+10.0 -d 10 would show 50 best scores, rather than all the scores with
+E() < 10.0.  To get the -E threshold to limit, -E 10.0 -b 10000 -d 10
+was required. This is now fixed. Setting "-d 10" does not affect the
+number of best scores shown.
+
+Minor change in mw.h to remove unused defines.
+
+fasta3x.me (fasta3x.doc) updated.
+
+>>January 6, 2000
+  --> v33t03
+
+Corrected bug in memory mapped reads of gcg_binary format files
+that potentially caused the last 63 residues to be read improperly.
+
+Changes to comp_thr.c, pthr_subs.c, uthr_subs.c, ibm_pthr_subs.c to
+ensure that each thread has its own work_info structure. This solves
+some minor race conditions that sometimes caused some parameters
+not to be reported properly.
+
+Changes to most of the drop*.c files to correct some minor problems
+with sequence alphabets. Code in mmgetaa.c (memory mapped code for
+FASTA, GCG compressed files) reordered to prevent files from being
+memory mapped if appropriate index files are not available.
+
+See readme.pvm_3.3 for updates to the pvm programs.
+
+>>December 10, 1999
+  (no version change - modifications largely affect ps3comp*)
+
+Modifications to showsum.c to deal with 2 scores/sequence.  Modifications
+to mmgetaa.c for superfamily numbers.
+
+>>December 7, 1999
+ (no version change, previous version not released)
+
+Corrected problem in mmgetaa.c that caused searches on a memory mapped
+single long sequence (e.g. Chr22) to fail.  Corrected bug in map_db.c
+that caused it to crash on some architectures if a filename was not
+specified.  Corrected off-by-three error in fasty/tfasty.  Corrected
+indexing error in dropfz2.c.
+
+>>December 5, 1999
+ --> v33t02 
+ 
+corrected some bugs in inifa.c/initsw.c/doinit.c that caused
+abbreviated function names to be lost.
+
+modify showbest.c, showalign.c to include information on position in
+library sequence (bbp->cont) to distinguish subsegment of very long
+sequences.  Currently, the new label is available only with -m 6.
+
+>>November 29, 1999
+ [t]fastz33 uses v33t02 of fasty function.
+
+Replace dropfz.c with dropfz2.c.  Dropfz2.c interprets any codons,
+that include the nucleotide 'N' as the amino 'X'. Previously, 'N' was
+treated as 'A', so 'NNN' ended up 'K'.  This modification, together
+with the -S option and lower-case pseg'ed databases, should ensure
+that DNA queries with large numbers of 'N's do not match low
+complexity regions.
+
+>>November 20, 1999
+ (no version change, previous version not released)
+
+Modify initfa.c to disply initn, init1 scores for [t]fast[fs].
+Include "-B" option to show previous z-scores.
+
+>>November 17, 1999
+ (no version change, previous version not released)
+ 
+Modify dropfx.c to use saatran(), rather than aatran().  saatran
+translates any 'N' containing codon as 'X'.  aatran() treats 'N' as
+an 'A'.  Although more steps are required for translation, the program
+appears to run just as fast.
+
+>>November 7, 1999
+ --> v33t01
+
+Substantial changes to the output format in showbest.c (the list of
+high scoring sequences) and showalign.c (the alignments).  The classic
+list of best scores:
+
+The best scores are:                             initn init1 opt z-sc E(82014)
+gi|121716|sp|P10649|GTM1_MOUSE GLUTATHIO  ( 218) 1497 1497 1497 1761.1 2.3e-91
+gi|121717|sp|P04905|GTM1_RAT GLUTATHIONE  ( 218) 1413 1413 1413 1662.9 6.7e-86
+
+has been replaced by:
+
+The best scores are:                                       opt bits E(82138)
+gi|121716|sp|P10649|GTM1_MOUSE GLUTATHIONE S-TRAN  ( 218) 1497 354 7.6e-98
+gi|121717|sp|P04905|GTM1_RAT GLUTATHIONE S-TRANSF  ( 218) 1413 335 5.3e-92
+
+This display provides more information and removes the outdated initn
+and init1 scores, which are no longer used. The "bit" score is
+comparable to the blast2 bit score.  It is calculated as: (lambda*S -
+ln K)/ln 2, where S is the raw similarity score, lambda and K are
+statistical parameters estimated from the distribution of unrelated
+sequence similarity scores.  All of the similarity scores, including
+init1, initn, and z-scores are reported with the alignment data.
+Z-scores are displayed instead of bit scores in the list of high
+scores if the command line option "-B" is specified.
+
+In addition, the alignment score line has changed from:
+
+>>gi|2506495|sp|P20136|GTM2_CHICK GLUTATHIONE S-TRANSFER  (220 aa)
+ initn: 954 init1: 954 opt: 958 Z-score: 1130.9 expect() 1.1e-56
+Smith-Waterman score: 958;  61.927% identity in 218 aa overlap (1-218:1-218)
+
+to:
+
+>>gi|2506495|sp|P20136|GTM2_CHICK GLUTATHIONE S-TRANSFER  (220 aa)
+ initn: 954 init1: 954 opt: 958  Z-score: 1130.9  bits: 216.4 E(): 2.8e-56
+Smith-Waterman score: 958;  61.927% identity in 218 aa overlap (1-218:1-218)
+
+In addition to the addition of the "bits:" score, the "expect()" label
+has changed to "E()" to save some space.
+
+>>November 4,12, 1999
+(no version change)
+
+Fixed serious bug in -z 2 lambda/K calculation in scaleswn.c
+
+Fixed bugs in llgetaa.c (openlib()) and definition of superfamily
+numbers.
+
+>>October 21, 1999
+(no version change)
+
+Begin using CVS for version control. Correct faulty error message in
+dropfs.c.  Corrected bad "goto loopl;" in dropfz.c.  Corrected prss3.rsp
+for Makefile.tc (Win32 version).
+
+>>October 18, 1999
+ --> v33t0
+
+Corrected some serious bugs with the various fasta/x/y programs when
+the -DALLOCN0 was used to save memory.  Improvements to fasta3x.me/.doc
+documentation.
+
+>>October 12, 1999
+ --> v33tx
+
+For this initial release of version 33 of the FASTA programs, the
+Makefile's have been modified to make "fasta33(_t)", "fastx33(_t)",
+etc, so that you can test fasta33 while retaining fasta3 (from release
+v32t08).  The FASTA33 programs are somewhat slower than previous
+releases, but I believe the ability to handle low complexity regions
+without 'X'ing them out outweighs the slowdown.  By (temporarily)
+changing the names of the programs slightly, it will be easier for you
+to judge the relative cost and benefit.  To "make" the programs as
+"fasta3(_t)", etc, simply replace "Makefile33.common" with
+"Makefile.common" in the "Makefile" that you use.
+
+>>September 30, 1999
+
+ssearch3/fasta3/fastx3/fasty3 have been modified to search databases
+containing both upper and lower case letters, where lower case letters
+indicate low-complexity regions.  With the modified programs, lower
+case letters are treated as 'X's' in the initial scan, but are then
+treated normally in the final alignment.  In addition, alignments can
+contain lower case letters.  Lower case letters are treated as
+low-complexity regions during the seach phase of the program, but as
+"conventional" residues during the alignment phase, with the "-S"
+option.  Currently, lower case letters are mapped to 'X's during the
+scan of the entire library.  In the future, alternate weights will be
+available. This is a substantial improvement for very large scale
+comparison, where one seeks both accurate statistical estimates and
+accurate %identities and alignments, and for translated DNA:protein
+comparisons, like "fastx3" and "fasty3", where out-of-frame
+translations tend to match low complexity regions (see Pearson et
+al. (1997) Genomics 46:24-36).
+
+Protein databases (and query sequences) can be generated in the
+appropriate format using John Wooton's "pseg" program, available from
+ftp://ftp.ncbi.nih.gov/pub/seg/pseg.  Once you have compiled the "pseg"
+program, use the command:
+
+	pseg database.fasta -z 1 -q  > database.lc_seg
+
+Once you have database.lc_seg, run the command "map_db" to generate
+a ".xin" file that can be used to efficiently memory map the database.
+
+You can then search database.lc_seg with or without the "-S" option.
+Without "-S", the database is treated as any other FASTA format file -
+all the residues are present.  With "-S", lower case residues will be
+treated as 'x's' during the initial scan but as normal residues when
+final alignments are displayed.
+
+When the -S option is used, the matrix information line is changed
+from: "BL50 matrix (15:-5)" to "BL50 matrix (15:-5)xS".  The "-S"
+option is no longer available to provide a scoring matrix offset.
+
+Unfortunately, Blast2.0 format files cannot contain lower case
+letters.  We have addressed this problem by providing efficient memory
+mapped access to Fasta and GCG/PIR, and GCG/compressed-binary files in
+the last release of fasta32t08. The memory mapped file I/O
+improvements are provided in fasta33 as well.
+
+================ readme.v32 ================
+
+FASTX/Y and FASTA (DNA) are now half as fast, because the programs now
+search both the forward and reverse strands by default.
+
+The documentation in fasta3x.me/fasta3x.doc has been substantially
+revised.
+
+>>October 20, 1999
+(no version change)
+
+Modify nxgetaa.c/nmgetaa.c to recognize 'N' as a possible DNA character.
+
+>>October 9, 1999
+ --> v32t08 (no version number change)
+
+Added "-M low-high" option, where low and high are inclusion limits
+for library sequences.  If a library sequence is shorter than "low" or
+longer than "high", it will not be considered in the search.  Thus,
+"-M 200-250" limits the database search to proteins between 200 and
+250 residues in length.  This should be particularly useful for fasts3
+and fastf3.  -M -500 searches library sequences < 500; -M 200 -
+searches sequences > 200.  This limit applies only to protein
+sequences.
+
+Modified scaleswn.c to fall back to maximum likelihood estimates of
+lambda, K rather than mean/variance estimates. (This allows MLE
+estimation to be used instead of proc_hist_n when a limited range of
+scores is examined.)
+
+>>October 2, 1999
+ --> v32t08
+
+Many changes:
+
+(1) memory mapped (mmap()ed) database reading - other database reading fixes
+(2) BLAST2 databases supported
+(3) true maximum likelihood estimates for Lambda, K
+(4) Misc. minor fixes
+
+(1) (Sept. 26 - Oct. 2, 1999) Memory mapped database access.
+It is now possible to use mmap()ed access to FASTA format databases,
+if the "map_db" program has been used to produce an ".xin" file.  If
+USE_MMAP is defined at compile time and a ".xin" file is present, the
+".xin" will be used to access sequences directly after the file is
+mmap()ed.  On my 4-processor Alpha, this can reduce elapsed time by
+50%. It is not quite as efficient as BLAST2 format, but it is close.
+
+Currently, memory mapping is supported for type 0 (FASTA), 5
+(PIR/GCG ascii), and 6 (GCG binary).  Memory mapping is used if a
+".xin" file is present. ".xin" files are created by the new program
+"map_db".  The syntax for "map_db" is:
+
+	map_db [-n] "/dir/database.fa"
+
+which creates the file /dir/database.fa.xin.  Library types can be
+included in the filename; thus:
+
+	map_db -n "/gcggenbank/gb_om.seq 6"
+
+would be used for a type 6 GCG binary file. 
+
+The ".xin" file must be updated each time the database file changes.
+map_db writes the size of the database file into the ".xin" file, so
+that if the database file changes, making the ".xin" offset
+information invalid, the ".xin" file is not used. "list_db" is
+provided to print out the offset information in the ".xin" file.
+
+(Oct 2, 1999) The memory mapping routines have been changed to
+allow several files to be memory mapped simultaneously. Indeed, once a
+database has been memory mapped, it will not be unmap()ed until the
+program finishes.  This fixes a problem under Digital Unix, and should
+make re-access to mmap()ed files (as when displaying high scores and
+alignments) much more efficient.  If no more memory is available for
+mmap()ing, the file will be read using conventional fread/fgets.
+
+(Oct 2, 1999) The names of the database reading functions has been
+changed to allow both Blast1.4 and Blast2.0 databases to be read.  In
+addition, Makefile.common now includes an option to link both
+ncbl_lib.o and ncbl2_lib.o, which provides support for both libraries.
+However, Blast1.4 support has not been tested.
+
+The Makefile structure has been improved.  Each architecture specific
+Makefile (Makefile.alpha, Makefile.linux, etc) now includes
+Makefile.common.  Thus, changes to the program structure should be
+correct for all platforms.  "map_db" and "list_db" are not made with
+"make all".
+
+The database reading functions in nxgetaa.c can now return a database
+length of 0, which indicates that no residues were read.  Previously,
+0-length sequences returned a length of 1, which were ignored.
+Complib.c and comp_thr.c have changed to accommodate this
+modification.  This change was made to ensure that each residue,
+including the last, of each sequence is read.
+
+Corrected bug in nxgetaa.c with FASTA format files with very long
+(>512 char) definition lines.
+
+(2) (September 20, 1999) BLAST2 format databases supported
+
+This release supports NCBI Blast2.0 format databases, using either
+conventional file reading or memory mapped files.  The Blast2.0 format
+can be read very efficiently, so there is only a modest improvement in
+performance with memory mapping.  The decision to use mmap()'ed files
+is made at compile time, by defining USE_MMAP.  My thanks to Eamonn
+O'Toole of DEC/Compaq, and Daryl Madura of Sun Microsystems, for
+providing mmap()'ed modifications to fasta3.  On my machines, Blast2.0
+format reduces search time by about 30%.  At the moment, ambiguous DNA
+sequences are not decoded properly.
+
+(3) (September 30, 1999) A new statistical estimation option is
+available.  -z 2 has been changed from ln()-scaling, which never
+should have been used, to scaling using Maximum Likelihood Estimates
+(MLEs) of Lambda and K.  The MLE estimation routines were written by
+Aaron Mackey, based on a discussion of MLE estimates of Lambda and K
+written by Sean Eddy.  The MLE estimation examines the middle 95% of
+scores, if there are fewer than 10000 sequences in the database;
+otherwise it excludes (censors) the top 250 scores and the bottom 250
+scores.  This approach seems to effectively prevent related sequences
+from contaminating the estimation process.  As with -z 1, -z 12 causes
+the program to generate a shuffled sequence score for each of the
+library sequences; in this case, no censoring is done.  If the
+estimation process is reliable, Lambda and K should not vary much with
+different queries or query lengths.  Lambda appears not to vary much
+with the comparison algorithm, although K does.
+
+(4) Minor changes include fixes to some of the alignment display routines,
+individual copies of the pstruct structure for each thread, and some
+changes to ensure that every last residue in a library is available
+for matching (sometime the last residue could be ignored).  This
+version has undergone extensive testing with high-throughput sequences
+to confirm that long sequences are read properly.  Problems with
+fastf3/fasts3 alignment display have also been addressed.
+
+>>August 26, 1999 (no version change - not released)
+
+Corrected problem in "apam.c" that prevented scoring matrices from
+being imported for [t]fasts3/[t]fastf3.
+
+>>August 17, 1999
+ --> v32t07
+
+Corrected problem with opt_cut initialization that only appeared
+with pvcomp* programs.
+
+Improved calculation of FASTA optcut threshold for DNA sequence
+comparison for match scores much less than +5 (e.g. +3).  The previous
+optcut theshold was too high when the match penalty was < 4 and
+ktup=6; it is now scaled more appropriately.
+
+Optcut thresholds have also been raised slightly for
+fastx/y3/tfastx/y3.  This should improve performance with minimal
+effects on sensitivity.
+
+>>July 29, 1999
+(no version change - date change)
+
+Corrected various uninitialized variables and buffer overruns
+detected.
+
+>>July 26, 1999 - new distribution
+(no version change - v32t06, previous version not released)
+
+Changed the location of "(reverse complement)" label in tfasta/x/y/s/f
+programs.
+
+Statistical calculations for tfasta/x/y in unthreaded version
+corrected.  Statistical estimates for threaded and unthreaded versions
+of the tfasta/x/y/s/f programs should be much more consistent.
+
+Substantial modifications in alignment coordinate calculation/
+presentation.  Minor error in fastx/y/tfastx/y end of alignment
+corrected.  Major problems with tfasta alignment coordinates
+corrected.  tfasta and tfastx/y coordinates should now be consistent.
+
+Corrected problem with -N 5000 in tfasta/x/y3(_t) searches encountered
+with long query sequences.
+
+Updated pthr_subs.c/Makefile.linux to increase the pthreads stacksize
+to try to avoid "cannot allocate diagonal arrays" error message.
+Pthreads stacksize can be changed with RedHat 6.0, but not RedHat 5.2,
+so Makefile.linux uses -DLINUX5 for RedHat5.* (no pthreads stack size).
+I am still getting this message, so it has not been completely
+successful.  Makefile.linux now uses -DALLOCN0 to avoid this problem,
+at some cost in speed.
+
+The pvcomp* programs have been updated to work properly with
+forward/reverse DNA searches.  See readme.pvm_3.2.
+
+>>July 7, 1999 - not released
+ --> v32t06
+
+Corrected bug in complib.c (fasta3, fastx3, etc) that caused core
+dumps with "-o" option.
+
+Corrected a subtle bug in fastx/y/tfastx/y alignment display.
+
+>>June 30, 1999 - new distribution
+(no version change)
+
+Corrected doinit.c to allow DNA substitution matrices with -s matrix
+option.
+
+Changed ".gbl" files to ".h" files.
+
+>>June 2 - 9, 1999 - new distribution
+(no version change)
+
+Added additional DNA lambda/K/H to alt_param.h.  Corrected some
+other problems with those table. for the case where (inf,inf)
+gap penalties were not included.
+
+Fixed complib.c/comp_thr.c error message to properly report filename
+when library file is not found.
+
+Included approximate Lambda/K/H for BL80 in alt_parms.h.
+BL80 scoring matrix changed from 1/3 bit to 1/2 bit units.
+
+Included some additional perl files for searchfa.cgi, searchnn.cgi
+in the distribution (my-cgi.pl, cgi-lib.pl).
+
+>>May 30, 1999, June 2, 1999 - new distribution
+(no version number change)
+
+Added Makefile.NetBSD, if !defined(__NetBSD__) for values.h.  Changed
+zs_to_E() and z_to_E() in scaleswn.c to correctly calculate E() value
+when only one sequence is compared and -z 3 is used.
+
+>>May 27, 1999
+(no version number change)
+
+Corrected bug in alignment numbering on the % identity line
+	27.4% identity in 234 aa (101-234:110-243)
+for reverse complements with offset coordinates (test.aa:101-250)
+
+>>May 23, 1999
+(no version number change)
+
+Correction to Makefile.linux (tgetaa.o : failed to -DTFAST). 
+
+>>May 19, 1999
+(no version number change)
+
+Minor changes to pvm_showalign.c to allow #define FIRSTNODE 1.
+Changes to showsum.c to change off-end reporting.  (Neither of these
+changes is likely to affect anyone outside my research group.)
+
+>>May 12, 1999
+ --> v32t05
+
+Fixed a serious bug in the fastx3/tfastx3 alignment display which
+caused t/fastx3 to produce incorrect alignments (and incorrectly low
+percent identities).  The scores were correct, but the alignment
+percent identities were too low and the alignments were wrong.
+
+Numbering errors were also corrected in fastx3/tfastx3 and
+fasty3/tfasty3 and when partial query sequences were used.
+
+>>May 7, 1999
+
+Fixed a subtle bug in dropgsw.c that caused do_work() to calculate
+incorrect Smith-Waterman scores after do_walign() had been called.
+This affected only pvcompsw searches with the "-m 9" option.
+
+>>May 5, 1999
+
+Modified showalign.c to provide improved alignment information that
+includes explicitly the boundaries of the alignment.  Default
+alignments now say:
+
+Smith-Waterman score: 175;  24.645% identity in 211 aa overlap (5:207-7:207)
+
+>>May 3, 1999
+
+Modified nxgetaa.c, showsum.c, showbest.c, manshowun.c to allow a
+"not" superfamily annotation for the query sequence only.  The
+goal is to be able to specify that certain superfamily numbers be
+ignored in some of the search summaries.  Thus, a description line
+of the form:
+
+>GT8.7 | 40001 ! 90043 | transl. of pa875.con, 19 to 675
+
+says that GT8.7 belongs to superfamily 40001, but any library
+sequences with superfamily number 90043 should be ignored in any
+listing or summary of best scores.
+
+In addition, it is now possible to make a fasta3r/prcompfa, which is
+the converse of fasta3u/pucompfa. fasta3u reports the highest scoring
+unrelated sequences in a search using the superfamily annotation.
+fasta3r shows only the scores of related sequences.  This might be
+used in combination with the -F e_val option to show the scores
+obtained by the most distantly related members of a family.
+
+>>April 25, 1999
+
+ -->v32t04 (not distributed)
+
+Modified nxgetaa.c to remove the dependence of tgetaa.o on TFASTA
+(necessary for a more rational Makefile structure).  No code changes.
+
+>>April 19, 1999
+
+Fixed a bug in showalign.c that displayed incorrect alignment coordinates.
+(no version number change).
+
+>>April 17, 1999
+
+ --> v32t03
+
+A serious bug in DNA alignments when the sequence has been broken into
+multiple segments that was introduced in version fasta32 has been
+fixed.  In addition, several minor problems with -z 3 statistics on
+DNA sequences were fixed.
+
+Added -m 9 option, which unfortunately does different things in
+pvcompfa/sw and fasta3/ssearch3.  In both programs, -m 9 provides the
+id's of the two sequences, length, E(), %_ident, and start and end of
+the alignment in both sequences.  pvcompfa/sw provides this
+information with the list of high scoring sequences.  fasta3/ssearch3
+provides the information in lieu of an alignment.
+
+>>March 18, 1999
+
+ --> v32t02
+
+Added information on the algorithm/parameter description line to
+report the range of the pam matrices.  Useful for matrices like
+MD_10, _20, and _40 which require much higher gap penalties.
+
+>>March 13, 1999 (not distributed)
+
+ --> v32t01 
+
+ -r results.file  has been changed to -R results.file to accomodate
+ DNA match/mismatch penalties of the form: -r "+1/-3".
+
+>>February 10, 1999
+
+Modify functions in scalesw*.c to prevent underflow after exp() on
+Alpha Linux machines.  The Alpha/LINUX gcc compiler is buggy and
+doesn't behave properly with "denormalized" numbers, so "gcc -g -m
+ieee" is recommended.
+
+Add "Display alignments also (y/n)[n] "
+
+pvcomplib.c again provides alignments!!  In addition, there is a
+new "-m 9" option, which reports alignments as:
+
+>>>/home/wrp/slib/hlibs/hum0.aa#5>HS5 gi:1280326 T-cell receptor beta chain 30 aa, 30 aa vs /home/wrp/slib/hlibs/hum0.seg library
+HS5         	  30	HS5         	  30	1.873e-11	1.000	  30	   1	  30	   1	  30
+HS5         	  30	HS2249      	  40	1.061e-07	0.774	  31	   1	  30	   7	  37
+HS5         	  30	HS2221      	  38	1.207e-07	0.833	  30	   1	  30	   7	  35
+HS5         	  30	HS2283      	  40	1.455e-07	0.774	  31	   1	  30	   7	  37
+HS5         	  30	HS2239      	  38	1.939e-07	0.800	  30	   1	  30	   7	  35
+
+where the columns are:
+
+query-name      q-len   lib-name      lib-len   E()             %id    align-len  q-start q-end   l-start l-end
+
+>>February 9, 1999
+
+Corrected bug in showalign.c that offset reverse complement alignments
+by one.
+
+>>Febrary 2, 1999
+
+Changed the formatting slightly in showbest.c to have columns line up better.
+
+>>January 11, 1999
+
+Corrected some bugs introduced into fastf3(_t) in the previous version.
+
+>>December 28, 1998
+
+Corrected various problems in dropfz.c affecting alignment scores
+and coordinates.
+
+Introduced a new program, fasts3(_t), for searching with peptide
+sequences.
+
+>>November 11, 1998
+
+  --> v32t0
+
+Added code to correct problems with coordinate number in long library
+sequences with tfastx/tfasty.  With this release, sequences should be
+numbered properly, and sequence numbers count down with reverse
+complement library sequences.
+
+In addition, with this release, fastx/y and tfastx/y translated
+protein alignments are numbered as nucleotides (increasing by 3,
+labels every 30 nucleotides) rather than codons.
+
diff --git a/doc/readme.v34t0 b/doc/readme.v34t0
new file mode 100644
index 0000000..49db859
--- /dev/null
+++ b/doc/readme.v34t0
@@ -0,0 +1,1683 @@
+
+  $Id: readme.v34t0 348 2010-07-20 21:33:22Z wrp $
+  $Revision: $
+
+>>May 28, 2007
+
+Small modification for GCG ASCII (libtype=5) header line.
+
+>>January 12, 2007	fasta-34_26_2
+
+Fix a problem with pssm_asn_subs.c reading strings (sequences) longer
+than 1024 bytes.
+
+Remove searchfa.cgi, searchnn.cgi, cgi-lib.pl, my-cgi.pl - this code
+was used for an ancient FASTA WWW implementation and has been replaced
+by the FASTA_WWW package.
+
+FASTA Version numbers are being modified to make releases easier to
+track, thus fa34t26b5 has become fasta-34_26_1.  I would prefer to use
+decimal versions, but CVS does not allow '.' in tags.
+
+>>January 4, 2007	fasta-34_26_1
+
+Include scripts for building Mac OS X Universal binaries on a PPC
+machine.  Programs are compiled first with Makefile.os_x (gcc-3.3 for
+PPC) and then installed into ./ppc/.  Programs are next compiled with
+Makefile.os_x86 for i386, and the resulting executables installed into
+./i386/.  Finally, the "make_osx_univ.sh" script is run to build the
+universal binaries from the two executables using "lipo".
+
+>>December 12, 2006
+
+Fix some problems with p2_workcomp.c: (1) no longer initialize pad
+characters for non-existant sequences. (2) deal with small libraries
+consistently with the serial versions.
+
+>>November 17, 2006	fa34t26b5
+
+Fixed a problem reading ASN.1 format 2 PSSM's.  It is now possible to
+download a PSI-BLAST PSSM RID and search properly.  Next, the query
+sequence from the PSSM should be used instead of the provided query
+sequence, so that the query sequence is ignored.
+
+>>October 19, 2006	fa34t26b4
+
+Fixed problem with SSE2 code when PSSM's are used.
+
+>>October 6, 2006	fa34t26b3
+
+A new set of WIN32 programs is now available that use the Intel C++
+9.1 compiler, rather than the much older Borland Turbo-C compiler. All
+of the unthreaded programs that are part of the Unix and MacOSX FASTA
+distributions are now available.  Threaded (multiprocessor) versions
+of the program as available as well, as are sse2 accelerated versions
+of ssearch34 (ssearch34sse2.exe, ssearch34sse2_t.exe).
+
+Th new WIN32 code also uses Microsoft's "nmake" program to build the
+programs, which allows much greater consistency between the Unix and
+Windows versions.
+
+
+>>September 18, 2006
+
+Static global alignment variables removed from dropnfa.c, dropfx.c,
+dropfz2.c.  dropnfa.c, dropfx.c and dropfz2.c should be thread safe.
+Together with the earlier changes, all the FASTA functions should now
+be thread safe during the alignment process.
+
+>>August 17, 2006
+
+Begin removal of static variables from Smith-Waterman alignment
+functions.  These variables kept the functions from being thread-safe.
+Now dropgsw.c and dropnsw.c are thread-safe.
+
+>>August 15, 2006	fa34t26b2
+
+Fixed a problem with pv34compfx/mp34compfx (and fy) producing
+improperly labeled alignments and de-allocating memory for the reverse
+complement.
+
+>>July 18, 2006
+
+The library file name parsing programs now provide the option for
+environment variable substitions.  For example, SLIB2=/slib2 as an
+environment variable (e.g. export SLIB2=/slib2 for ksh and bash), then
+
+	fasta34 -q query.aa '${SLIB2}/swissprot.fa'  expands as expected.
+
+While this is not important for command lines, where the Unix shell
+would expand things anyway, it is very helpful for various
+configuration files, such as files of file names, where:
+
+	<${SLIB2}/blast
+	swissprot.fa
+
+now expands properly, and in FASTLIBS files the line:
+
+	NCBI/Blast Swissprot$0S${SLIB2}/blast/swissprot.fa
+
+expands properly.  Currently, Environment variable expansion only
+takes place for library file names, and the <directory in a file of
+file names.
+
+>>July 14, 2006	  fa34t26b1
+
+Updated Farrar smith_waterman_sse2.c code to address possible bug
+(code from Michael Farrar).  Include <sunmedia_intrin.h> for
+compilation with Sun compiler with Makefile.sun_x86.
+
+>>July 2, 2006	  fa34t26b0
+
+This release provides an extremely efficient SSE2 implementation of
+the Smith-Waterman algorithm for the SSE2 vector instructions written
+by Michael Farrar (farrar.michael at gmail.com).  The SSE code speeds up
+Smith-Waterman 8 - 10-fold in my tests, making it comparable to Eric
+Lindahl's Altivec code for the Apple/IBM G4/G5 architecture.
+
+The Farrar code is largely confined to smith_waterman_sse2.c and
+smith_waterman_sse2.h, which are copyright (2006) by Michael Farrar,
+and cannot be redistributed without his permission.  Mr. Farrar has
+agreed to provide his code under the same policy used by FASTA -
+e.g. the code can be used without permission, but not redistributed.
+
+The Farrar code uses GCC version 4.0 SSE2 intrinsic functions to avoid
+assembly language code.  Unfortunately, in my hands, "gcc -O3" causes
+"out of memory" errors, and other problems, so "gcc -O" is used instead.
+
+>>June 23, 2006   fa34t25d10
+
+Modifications to comp_lib.c, compacc.c, and other files to ensure that
+function-specific MAXTOT values are used properly.  MAXTOT is now
+available as m_msg.max_tot, which is set in initfa.c (m_msg.max_tot =
+MAXTOT) to ensure that functions that need very large MAXTOT values
+(e.g. TFASTX) can get them.  tfastx can now search successfully with
+titin, a 27,000 residue protein.
+
+Other changes have been made to accomodate long query sequences.
+
+A serious bug was found in fastx34(_t) that caused alignment
+coordinates to be calculated improperly when the DNA sequence was much
+longer than the protein sequence.
+
+>>May 31, 2006	fa34t25d9
+
+Fixed some problems with fasts/fastf alignments when -m 9 options were
+used.  Unlike the other algorithms, the a_res structure does not
+capture all the information to re-produce an alignment, so do_walign
+now sets bptr->have_ares to indicate whether the a_res structure is
+valid.
+
+Various problems with bad library names, and short query titles were
+also fixed.
+
+Updated version number/date on all drop*.c functions.
+
+>>May 24, 2006	fa34t25d8
+
+Revised code for NCBI *.pal/*.nal databases has been tested on all
+architectures, including Windows.
+
+In addition, support for ASN.1 PSSM:2 files provided by the NCBI
+PSI-BLAST WWW site is included.  This code will not work with
+iteration 0 PSSM's (which have no PSSM information).  For ASN.1
+PSSM's, which provide the matrix name (and in some cases the gap
+penalties), the scoring matrix and gap penalties are set appropriately
+if they were not specified on the command line. ASN.1 PSSM's are type 2:
+	ssearch34 -P "pssm.asn1 2" .....
+
+>>May 18, 2006
+
+Support for NCBI Blast formatdb databases has been expanded.  The
+FASTA programs can now read some NCBI *.pal and *.nal files, which are
+used to specify subsets of databases.  Specifically, the
+swissprot.00.pal and pdbaa.00.pal files are supported.  FASTA supports
+files that refer to *.msk files (i.e. swissprot.00.pal refers to
+swissprot.00.msk); it does not currently support .pal files that
+simply list other .pal or database files (e.g. FASTA does not support
+nr.pal or swissprot.pal).
+
+In the process of providing this support, the routines used to read
+ASN.1 binary formatdb files were substantially improved.  It is now
+possible to see multiple description lines for a single sequence.
+
+IS_BIG_ENDIAN has been removed from all of the Makefiles.  The code
+now looks for the definition of __BIG_ENDIAN__ or _BIG_ENDIAN to
+decide whether the architecture IS_BIG_ENDIAN.  If, for some reason,
+one of these macros is not defined on a BIG_ENDIAN architecture, then
+-DIS_BIG_ENDIAN is required.
+
+>>May 12, 2006	CVS fa34t25d7
+
+Corrected serious problem with coordinate display calculation for
+fasta34 and ssearch34 - in some cases the coordinates and alignment
+symbols were off by the length of the context (typically 30 residues).
+
+Added capability to read ASN.1 binary PSSM information.  This
+information is provided (in an encoded form) from the NCBI PSI-BLAST
+WWW site.  (What is actually provided from the WWW site is a bzip2-ed
+binary file that is converted to ASCII HEX.  The ASCII HEX file must
+be converted to binary, and then bunzip'ed. This bunzip-ed file is
+binary ASN.1.)  These files can also be generated by 
+
+ blastpgp -J T -C pssm.asn1_bin -u 2
+
+I am parsing the ASN.1 binary manually, not using the NCBI toolkit, so
+there may be some files that are not parsed properly - if so, let me
+know.
+
+(May 12, 2006 - The NCBI changed the format of the psi-blast ASN.1
+PSSM - and has not yet provided documentation of the new structure, so
+this code does not work. It does work with blastpgp v 2.2.13, but not
+with the web site version 2.2.14.  A fix was provided 24-May-2006)
+
+>>April 18, 2006
+
+Small modification in mshowbest.c to provide more consistent display
+widths with -m 9i in list of best hits.
+
+>>April 11, 2006 CVS fa34t25d6
+
+Corrected a problem introduced with the new, more efficient method for
+displaying alignments.  For the tfast* programs, which must translate
+the library sequence, translations were not done when alignments were
+re-displayed.
+
+Corrected an older problem with tfastx34 against very long sequence
+databases - the code to more efficiently do the display alignment did
+not use the correct sequence coordinates.
+
+Modifications to dropfs2.c to ensure that exact peptide matches are
+captured more frequently.
+
+>>March 16, 2006 CVS fa34t25d5
+
+Change to initfa.c to allow lower case DNA libraries using the
+-DDNALIB_LC compile time option.
+
+Modify p2_complib.c, p2_worklib.c (and doinit.c, msg.h) to allow the
+-V annotation option for the parallel programs.  Also modify to allow
+specification of the query range (but only for the first query, like
+fasta34) for the parallel programs.
+
+Modification of p2_workcomp.c to correct some problems presenting
+percent similarity.  Also correct unreleased bugs in the alignment
+routines that allow more efficient alignment re-calculation.
+
+>>Nov 20, 2005
+
+Changes to support asymmetric matrices - a scoring matrix read in from
+a file can be asymmetric.  Default matrices are all symmetric.
+
+>>Oct 24, 2005
+
+Modifications extended to p2_complib.c/p2_workcomp.c.  Incorporation
+of drop_func.h into p2_workcomp.c greatly simplifies things.  No
+changes in communication - struct a_res_str is internal to
+p2_workcomp.c.
+
+Additional changes to do_walign() so that aln_func_vals() must be
+called to set llfact, qlfact, etc in a_struct aln before or after
+do_walign is called.  do_walign produces a_res_str a_res, which has
+all the information necessary to produce a calcons() or calc_code()
+alignment.
+
+>>Oct 19, 2005 CVS fa34t26b0
+
+Modifications to drop*.c and c_dispn.c to separate (and simplify) some
+of the alignment coordinate calculations.  Before, the "a_struct" had
+the coordinates of the alignment used in the display (seqc0, seqc1)
+AND in the original sequences (aa0, aa1), as well as other information
+used to calculate alignment coordinates.  In the new version, astruct
+coordinates always refer to seqc0,1, while a new structure, a_res_str,
+has coordinates for aa0, aa1 as well as the alignment encoding in res[nres].
+Eventually, this should make it possible to display multiple local
+alignments from the same two sequences.
+
+In addition, the file "drop_func.h" has been added to the project, and
+is included by many of the files (all the drop*.c functions,
+mshowbest.c, mshowalign.c) to ensure that the various functions are
+declared and used consistently.
+
+>>Sept 19, 2005	CVS fa34t25d4
+
+Changes to support Mac OS 10.4 - Tiger (include sys/types.h in more
+files).  Documentation update for prss34/prfx34. Modifications to
+comp_lib.c to support prss34_t/prfx34_t.  Shuffle numbers for
+prss/prfx can now be specified by "-k #".
+
+>>Sept 2, 2005
+
+The prss34 program has been modified to use the same display routines
+as the other search programs.  To be more consistent with the other
+programs, the old "-w shuffle-window-size" is now "-v window-size".
+
+prss34/prfx34 will also show the optimal alignment for which the
+significance is calculated by using the "-A" option. 
+
+Since the new program reports results exactly like other
+fasta/ssearch/fastxy34 programs, parsing for statistical significance
+is considerably different.  The old format program can be make using
+"make prss34o".
+
+>>Aug 26, 2005
+
+Modifications to save_best() in comp_lib.c to support prss34_t.  It
+did not work before.
+
+>>July 25, 2005
+
+Modify mshowbest.c to suppress gi|12345 in HTML mode.
+
+>>July 18, 2005	CVS fa34t25d3
+
+Modifications to Makefile.tc to support NCBI formatdb formats under
+Windows.
+
+>>May 19, 2005  CVS fa34t25d2
+
+Modifications to dropfs2.c to fix an obscure bug that occurred when
+correctly ordered peptides aligned one residue apart.
+
+>>May 5, 2005 CVS fa34t25d1
+
+Modification to the -x option, so that both an "X:X" match score and
+an "X:not-X" mismatch score can be specified. (This score is also used
+
+give a positive score to a "*:*" match - the end of a reading frame,
+while giving a negative score to "*:not-*".
+
+>>March 14, 2005  CVS fa34t25b4
+
+Fixed some problems caused by padding characters required for
+Smith-Waterman ALTIVEC in the parallel (p2_complib.c, p2_workcomp.c)
+versions.
+
+>>Feb 24, 2005	CVS fa34t25b3
+
+Changes to comp_lib.c (and Makefile.pcom) to support prss34_t.
+
+>>Feb 12, 2005
+
+Modify dropfs.c to dynamically allocate space for alignments, so that
+queries with a large number of fragments can still place all the
+fragments on the alignment.  Also fix a problem produced by removing
+-DBIGMEM from most of the Makefile's, but not fixing defs.h to use
+BIGMEM sizes by default.
+
+>>Jan 24, 2005
+
+Include a new program, "print_pssm", which reads a blastpgp binary
+checkpoint file and writes out the frequency values as text.  These
+values can be used with a new option with ssearch34(_t) and prss34,
+which provides the ability to read a text PSSM file.  To specify a
+text PSSM, use the option -P "query.ckpt 1" where the "1" indicates a
+text, rather than a binary checkpoint file.  "initfa.c" has also been
+modified to work with PSSM files with zero's in the in the frequency
+table.  Presumably these positions (at the ends) do not provide
+information. (Jan 26, 2005) blastpgp actually uses BLOSUM62 values
+when zero frequencies are provided, so read_pssm() has been modified
+to use scoring matrix values for zero frequencies as well.
+
+>>Jan 13, 2005
+
+Change to initfa.c to have fasts34 do a protein comparison by default,
+rather than an unknown sequence type.  Automatic checking for fasts34
+does not work reliably, because queries can be very short.  Likewise
+for fastm34.  [Jan 26, 2004] Undo this change, which broke DNA
+comparison when "-n" was specified.
+
+>>Jan 7, 2005
+
+Changes to tatstats.h, dropfs2.c to allow larger numbers of peptides
+to match when fasts is used to show coverage on a proteomics
+experiment.  Previously fasts could match no more than 30 peptides,
+that has been increased to 50.  In addition, ktup=2 can be used
+to increase the likelihood that short exact matchs trump longer
+mismatched regions.  
+
+>>Nov 11, 2004	   CVS fa34t25
+
+Finished merge of earlier fa34t24 branch with HEAD.  Correct
+labeling of TFASTM.
+
+>>Nov 4-8, 2004	   
+
+Incorporation of Erik Lindahl "anti-diagonal" Altivec code for
+Smith-Waterman, only.  Altivec SSEARCH is now faster than FASTA for
+query sequences < 250 amino acids.
+
+Small modifications to output score display to ensure that the correct
+scores are shown, and that they are correctly labeled.
+
+>>Aug 25,26, 2004  CVS fa34t24b3
+
+Small change in output format for p34comp* programs in
+">>>query_file#1 string" line before alignments.  This line is not present
+in the non-parallel versions - it would be better for them to be consistent.
+
+Change in last_stats.c to properly label fasts statistics with -z != 1.
+
+Change in dropfs2.c to ensure that tatprobs are not precalculated with -z 4.
+
+Modify -m 9i output option to show in HTML output.
+
+Add "#ifdef NOOVERHANG" to dropfs2.c that causes overlapping
+alignments to score a 0, rather than the partial overlap score.
+Useful for SAGE alignments, because "fasts" requires global alignments
+(except for for overhangs, unless NOOVERHANG is defined).
+
+>>Aug 23, 2004
+
+Fix problem with very long definition lines with formatdb version4
+ASN databases.  Fix mshowalign.c to re-enable "-L" option.
+
+>>July 28, 2004 
+
+Fix to re-enable -w window shuffle for PRSS.  Modify comp_lib.c
+for PRSS to ensure that the unshuffled score and probability
+are shown, even for very high probabililty alignments.
+
+>>July 21, 2004
+
+Modifications to support PostgreSQL databases with the same commands
+as MySQL databases.  MySQL database libraries are type 16, PostgreSQL
+are type 17.  Makefile.linux_sql and Makefile.pvm4_sql support both
+database types simultaneously.
+
+>>June 23, 2004 CVS fa34t24b2
+
+Additional fixes to enable -n or -p with fasts34 and
+fastm34. Makefile.pcom was fixed for fastm34_t.  A new file,
+mgstm1.nts, of DNA fragments from mgstm1.seq, is included for testing
+fasts34 and fastm34.
+
+>>May 4, 2004  
+
+Fixes to initfa.c to allow DNA:DNA for FASTS, FASTM.  This change
+introduced a bug that broke FASTS completely, but was fixed June 18,
+2004 (and retagged fa34t24b2).
+
+>>April 23, 2004 CVS fa34t24b1
+
+Fix bug in initfa.c that caused tfasts/tfastf not to examine all six
+frames.
+
+>>May 4, 2004
+
+Fixes to initfa.c to allow DNA:DNA for FASTS, FASTM.
+
+>>March 19, 2004 CVS fa34t24b0
+
+Modify all the drop*.c files, plus mshowbest.c and mshowalign.c, to
+display percent similarity, rather than percent ungapped.  An
+alignment is counted as similar if the score is greater than or equal
+to zero (the same criterion used for placing ".". To disable this
+change, remove -DSHOWSIM from the appropriate Makefile.*.
+
+>>March 18, 2004 CVS fa34t23b8
+
+Fix bug in initfa.c tables that caused prss to generally compare
+proteins.
+
+>>March 15, 2004 
+
+Fix bug in calls to revcomp(); make revcomp() guarantee NULL termination.
+
+>>March 2, 2004	CVS fa34t23b7
+
+Fix a very embarrassing and surprising bug that caused insertions
+in fasta alignments to appear in the wrong sequence.
+
+>>Feb 7, 2004	CVS fa34t23b6
+
+Change initfa.c to allow "-i" (reverse complement) and "-i -3" with
+"fastx34" and "prfx34".  In addition, "prfx34" now examines both query
+DNA strands in calculated the shuffled statistical significance.
+
+>>Feb 5, 2004
+
+Reverse assignments for G:U baseparing in initfa.c.
+
+Fix memory allocation error caused by doubling DNA alignment width.
+
+>>Jan 7, 2004	CVS fa34t23b5
+
+Change in do_walign() in dropnfa.c to make final DNA alignments use a
+band that is 2X as large as the search band width.
+
+>>Dec 22, 2003	CVS fa34t23b4
+
+Fix typo in p2_complib.c that prevented compilation.  Fix problem
+with karlin.c for asymmetrical matrices, such as used with -U.
+
+>>Dec 10, 2003  CVS fa34t23b3
+
+Fix problem in resetp()/initfa.c that disabled banded Smith-Waterman
+DNA alignments.
+
+Allow spam() to do extended alignments for DNA if one of the sequences
+is < 50 nt.
+
+Cause default ktup to drop for short sequences.  For protein < 50, ktup=1;
+for DNA < 20, 50, 100 ktup = 1, 2, 3, respectively.
+
+>>Dec 7, 2003
+
+A new option, "-U" is available for RNA sequence comparison.  "-U"
+functions like "-n", indicating that the query is an RNA sequence.  In
+addition, to account for "G:U" base pairs, "-U" modifies the scoring
+matrices so that a "G:A" match has the same score as "G:G" match,
+and "T:C" match has the same score as a "T:T" match. (Corrected
+13-July-2010 -- the G:A/T:C scores are score(G:G)-3.) The asymmetric
+matrix required changes in dropnfa.c that were similar to the changes
+in dropgsw.c required for profiles.  In addition, m_msg.qdnaseq and pst.dnaseq
+ can now be SEQT_DNA, SEQT_RNA, SEQT_PROT, SEQT_UNK, or SEQT_OTHER.
+m_msg.ldnaseq does not use SEQT_RNA, only SEQT_DNA.  A new member of
+struct pstruct: int nt_align, is used to indicate nucleotide
+alignments.
+
+>>Nov 19, 2003
+
+Changes to Makefile's to distinguish between tatstats_fs.o and
+tatstats_ff.o.
+
+>>Nov 2, 2003
+
+Substantial changes to comp_lib.c, p2_complib.c, mshowbest.c, and
+mshowalign.c to support more sophisticated display options.
+Previously, one could have only on "-m #" option, even though several
+of the options were orthogonal (-m 9c is independent of -m 1 and -m2,
+which is independent of -m 6 (HTML)).  The programs now use a bitmask
+that allows independent options to be combined.  In particular -m 9c
+can be combined with -m 6, which can be very helpful for runs that
+need HTML output but can also exploit the encoding provided by -m 9c.
+
+The "-m 9" option now also allows "-m 9i", which shows the standard
+best score information, plus percent identity and alignment length.
+
+>>Oct 26, 2003	CVS fa34t23b1
+
+Additional fixes to Makefiles to enable tfastf34(_t).  Changes to
+support ossearch34 (a non-Phil Green optimized Smith-Waterman).
+
+>>Oct 8, 2003	CVS fa34t23b0
+
+Fixes to get DNA queries working in both directions, and to fix PCOMPLIB
+programs for "-V" option.  Currently, the parallel programs cannot use
+the "-V" option.
+
+>>Sept 25, 2003
+
+A new option is available for annotating alignments.  -V '@#?!'
+can be used to annotate sites in a sequence, e.g:
+	>GTM1_HUMAN ...
+	PMILGYWDIRGLAHAIRLLLEYTDS at S?YEEKKYT at MG
+	DAPDYDRS at QWLNEKFKLGLDFPNLPYLIDGAHKIT
+might mark known and expected (S,T) phosphorylation sites.  These
+symbols are then displayed on the query coordinate line:
+
+               10        20    @?  30  @     40  @     50        60
+GTM1_H PMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLP
+       ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+gtm1_h PMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLP
+               10        20        30        40        50        60
+
+This annotation is mostly designed to display post-translational
+modifications detected by MassSpec with FASTS, but is also available
+with FASTA and SSEARCH.
+
+>>Sept 22, 2003	  CVS fa34t22b5
+
+The Altivec Smith-Waterman code has been removed.
+
+>>Sept 17, 2003	  CVS fa34t22b4
+
+A variety of different bugs have been fixed.  (1) All the functions in
+the old initsw.c are now in initfa.c; initsw.c will be removed.
+Specifically, the Profile/PSSM code is now in initfa.c.  initfa.c is
+now fully table driven. (2) various problems with prss34 and prfx34
+have been fixed in initfa.c.  (3) An additional ncbl2_mlib.c buffer
+overrun has been fixed. (4) fastf34 is now available in this package.
+Its performance is very similar to, but not identical to, fastf33.  I
+am tracking down the differences.  In general, the raw scores
+calculated by both programs are the same, but the statistical analysis
+seems to be slightly different.
+
+>>July 30, 2003   CVS fa34t22b3
+
+Fix bug in ncbl2_mlib.c that caused buffer overrun with blast/formatdb
+v3 description lines.
+
+>>July 28, 2003
+
+The initfa.c file has been substantially re-structured to use a
+table-driven approach to parameter setting, rather than the previous
+confusing combinations of #ifdef's.  Two tables of parameters are
+used, pgm_def_arr[] and msg_def_arr[], which specify values like the
+program name, reference, scoring matrix, default gap penalties, etc.
+msg_def_arr[] has the sequence types for the query, library, and
+algorithm, as well as other parameters (qframe, nframe, nrelv, etc),
+which greatly simplifies the sequence recognition logic.  ppst->pgm_id
+can be used to identify the program that is running.  Eventually,
+almost all of the program specific #ifdef's will be removed from
+initfa.c.  initfa.c now provides initsw.c functionality, so that
+initsw.c is no longer needed.
+
+>>July 25, 2003
+
+A new file is included - fasta.defaults - that lists the scoring
+matrix, gap penalty, and other defaults for all of the fasta34
+programs.  This file will be used soon to simplify parameter setting
+for the FASTA programs, and should also be used by Javascript WWW
+interfaces to the FASTA programs.
+
+>>July 22, 2003    CVS fa34t22b2
+
+Fixes to dropfs2.c, tatprobs.c to ensure that negative probabilities
+cannot occur.  Negative probabilities were never seen with standard
+matrices, but did occur with BL50.  Another optimization in dropfs.c
+considerably improves fasts34 performance in some cases.
+
+Fix a problem with formatdb v4 ASN.1 format files.
+
+>>July 12, 2003
+
+Fix a bug that prevented "-L" (long sequence descriptions) from
+working.
+
+>>July 9, 2003
+
+Fix reverse complement (M:K) error.  Fix off-by-one error for FASTA
+DNA alignments that caused the first aligned residue pair to be
+missed.
+
+>>July 4 - 8, 2003
+
+Incorporate blast-def-line ASN.1 parsing so that NCBI formatdb version
+4 files can be read.
+
+>>June 26, 2003
+
+The strategy for displaying the match/mismatch line (" .:" for -m 0)
+has been changed dramatically to acommodate more sophisticated
+strategies for indicating conservative replacements, e.g. because of
+PSSM's.  In addition to seqc0 and seqc1, which hold the aligned
+sequences for display, there is also seqca, which holds the alignment
+symbol.  calcons(), do_show(), and discons() have all changed to
+include seqca.  calcons() is somewhat more complex; discons() is much
+simpler.  (June 29, 2003 - dropgsw.c calcons() now displays profile
+similarity accurately - it is very very illuminating.)
+
+>>June 16, 2003	version: fasta34t22
+
+ssearch34 now supports PSI-BLAST PSSM/profiles.  Currently, it only
+supports the "checkpoint" file produced by blastall, and only on
+certain architectures where byte-reordering is unnecessary.  It has not
+been tested extensively with the -S option.
+
+	ssearch34 -P blast.ckpt -f -11 -g -1 -s BL62 query.aa library
+
+Will use the frequency information in the blast.chkpt file to do a
+position specific scoring matrix (PSSM) search using the
+Smith-Waterman algorithm.  Because ssearch34 calculates scores for
+each of the sequences in the database, we anticipate that PSSM
+ssearch34 statistics will be more reliable than PSI-Blast statistics.
+
+The Blast checkpoint file is mostly double precision frequency
+numbers, which are represented in a machine specific way.  Thus, you
+must generate the checkpoint file on the same machine that you run
+ssearch34 or prss34 -P query.ckpt.  To generate a checkpoint file,
+run:
+
+blastpgp -j 2 -h 1e-6 -i query.fa -d swissprot -C query.ckpt -o /dev/null
+
+(This searches swissprot for 2 iterations ("-j 2" using a E()
+threshold 1e-6 saving the resulting position specific frequencies in
+query.ckpt.  Note that the original query.fa and query.ckpt must
+match.)
+
+>>June 5, 2003
+
+Fix to mshowbest.c to get -m 9 coordinates correct on reverse strand
+with pv34comp*.  Some additional fixes for prfx34.
+
+>>May 22, 2003
+
+Changes to llgetaa.c, getseq.c, comp_lib.c to provide a different
+library residue lookup table (sascii) for queries and libraries.  This
+allows one to make a prfx34 (like prss34, but using the fastx
+algorithm).  prfx34 is now available.
+
+>>May 13,14 2003
+
+Fixes to most of the drop*.c files, and mshowbest.c, to ensure that
+coordinates displayed with -m 9(c) and the final alignment are
+consistent.  They were consistent for fasta34/ssearch34/fasts34, but
+not for fastx34/fasty34.  The alignment coordinate system has been
+been revised for consistency in allthe drop*.c programs (coordinates
+used to be off-by-one for some, but not other functions).
+
+Fixes to -m 9c for fasty34/pv34compfy.  In addition, a problem was
+fixed with fastx34/fasty34 that appeared with a protein sequence was
+considerably longer than the DNA query, e.g. an EST vs titin (26K
+residues).  This problem only appeared on pv34compfx/fy on Xserve's
+under OS_X; but it should improve fastx34/fasty34 performance with
+very long protein sequences on all platforms.
+
+>>May 7,8 2003
+
+Changes to p2_workcomp.c, compacc.c, and p_mw.h to fix persistent
+bugs in the -m 9c display.  Previous pv34comp* programs would not
+return the correct coded alignment if more than 100 alignments came
+from the same node, or if an encoding was longer than 127 chars.
+
+Also, fixes to p2_complib.c, comp_lib.c, to allow long query sequences
+to be segmented.  Previously, only the first 20,000 residues were
+used.  The segmented queries are not overlapped; segmented library
+sequences are.
+
+>>May 5, 2003
+
+Changes to last_tat.c, scaleswt.c to ensure that all fasts alignments
+that are likely to have significant scores are displayed.  In previous
+implementations, if the query had more than 10 fragments, only the 100
+best scores were shown.  Now, we rescore up to 2500 alignments.  The
+new approach allows large mixtures to be used for searches, where some
+of the fragments from the mixture match too many proteins
+(e.g. actins).  Some differences between the fasts34 and pv34compfs
+implementations have been fixed.  The two programs typically will not
+give exactly the same results, because of small differences in the
+sampling procedures, but the results are essentially equivalent.
+
+>>Apr 11, 2003  CVS fa34t21b3
+
+Fixes for "-E" and "-F" with ssearch34, which was inadvertantly disabled.
+
+A new option, "-t t", is available to specify that all the protein
+sequences have implicit termination codons "*" at the end.  Thus, all
+protein sequences are one residue longer, and full length matches are
+extended one extra residue and get a higher score.  For
+fastx34/tfastx34, this helps extend alignments to the very end in
+cases where there may be a mismatch at the C-terminal residues.
+
+-m 9c has also been modified to indicate locations of termination
+codons ( *1).
+
+>>Mar 17, 2003  CVS fa34t21b2
+
+A new option on scoring matrices "-MS" (e.g. "BL50-MS") can be used to
+turn the I/L, K/Q identities on or off.  Thus, to make "fastm34" use
+the isobaric identities, use "-s M20-MS".  To turn them off for "fasts34",
+use "-s M20".
+
+More fixes for correct alignment coordinates.  There was a conflict between
+-m 9 and -m 9c and subsequent alignment displays.
+
+>>Mar 13, 2003	
+
+Various fixes to produce correct fastm34 alignments.  Changes to all
+functions to correct potential problem with -m 9 alignment coordinates
+when both -m 9 and actual alignments are shown.
+
+>>Feb 25,27, 2003
+
+Modifications to re-activate showsum.c, which included corrections to
+the showbest() call in p2_complib.c.
+
+>>Feb 13, 2003	CVS fa34t21b1
+
+Modifications to dropfx.c to dramatically improve alignment speed for
+cases where the DNA sequence is considerably longer than the protein
+sequence.  Previously, a 200 aa vs 5000 nt comparison would do a full
+200 x 5000 Smith-Waterman alignment; with this modification, no more
+than a 200 x 1200 (2x3x200) alignment is done.  This optimization has
+not (yet) been applied to dropfz2.c (fasty/tfasty).
+
+>>Feb 11, 2003
+
+Small modifications to comp_lib.c, p2_complib.c, and nmgetlib.c to
+pass openlib() a possibly old lmf_str.  This allows openlib() to
+re-use memory mapped files.  closelib() no longer releases memory
+mapped file buffers.  Under Linux, memory mapped file buffers were not
+really released, so when comparing a set of sequences against nr, the
+program could not mmap() the database after several searches.  This
+will also speed up memory mapped multiple sequence searches.
+
+>>Jan 28-31, 2003  CVS fa34t21b0
+
+Fix another bug (all of v34t20) involved with overlapping long
+sequences.  And another bug that occurred when using sampled
+statistics, but appeared only on the SGI platform - thanks to Dmitri
+Mikhailov.  Several other issues have been addressed based on more
+instrumented runtime testing.
+
+Fix an old (all v34) bug that caused problems with -z 11-16 (shuffled
+sequence array was not allocated properly).  Fixed another bug with -z
+6/16 when using threaded (_t) searches in fasta34_t.
+
+Restructure statistical analysis functions (scaleswn.c, scaleswt.c) to
+return the "final" statistical estimation routine done in pst.zsflag_f.
+This allows the program to cope with searches against a single sequence
+correctly.
+
+Corrected an error for DNA sequences needing Altschul-Gish statistics.
+
+>>Jan 25, 2003
+
+Add option "-J start:stop" to pv34comp*/mp34comp*.  "-J x" used to
+allow one to start at query sequence "x"; now both start and stop can
+be specified.
+
+>>Jan 14, 2003
+
+Changes to apam.c to provide an error message on stderr when a scoring
+matrix cannot be found.
+
+Changes to dropfs2.c, initsw.c, initfa.c to provide -m9c information
+for fasts34 searches.  Modify the alignment algorithm to use
+probabilistic scores properly.
+
+>>Dec 22, 2002
+
+Change to compacc.c (sortbeste()) to do a second sort on zscore when
+several sequences have E() == 0.
+
+>>Nov 27, 2002
+
+Change FSEEK_T to fseek_t to keep Borland BCC5 happy. 
+
+>>Nov 14-22, 2002  CVS fa34t20b6
+
+Include compile-time define (-DPGM_DOC) that causes all the fasta
+programs to provide the same command line echo that is provided by the
+PVM and MPI parallel programs.  Thus, if you run the program:
+
+    fasta34_t -q -S gtt1_drome.aa /slib/swissprot 12
+
+the first lines of output from FASTA will be:
+
+    # fasta34_t -q gtt1_drome.aa /slib/swissprot
+     FASTA searches a protein or DNA sequence data bank
+     version 3.4t20 Nov 10, 2002
+    Please cite:
+     W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448
+
+This has been turned on by default in most FASTA Makefiles.  
+
+Fix p2_complib.c so that qstats[] is always allocated before it is used.
+
+Fix serious bug in non-threaded comp_lib.c that caused some high
+scoring sequences to be missed by fasts34.  New tests are included in
+test.sh to detect this problem in the future.
+
+The shell sort algorithm in sortbeste(), sortbestz(), and sortbesto()
+has been modified to use an improved algorithm that will not go
+quadratic in pathological cases.
+
+nmgetlib.c and mmgetaa.c have been modified to remove "^A" in libstr
+when used with p2_complib.c.
+
+Fix problem with MAXSEG in tatstats.h with IBM/AIX.
+
+Changes to most Makefiles to use -DSAMP_STATS; fixes to p2_complib.c
+for SAMP_STATS.
+
+>>Oct 22, Nov 3, Nov 9, 2002   CVS tag fa34t20b5
+
+Fix problem in comp_lib.c that caused the query sequence length to be
+counted twice.
+
+Fixed problem with prss34 (updated find_zp in showrss.c).
+
+Correct shuffling function in several places.
+
+Add jitter back to addhistz() - improves appearance with prss34.
+
+Changes to fix problems with aln_code using -m 9c.
+
+Fix to serious bug in scaleswt.c (fasts34, etc) that caused sorts on
+the high scores to take much to long.  The program is now 10X faster,
+and scales well on PVM/MPI.
+
+Fix to llgetaa.c to work with new getseq() API with automatic alphabet
+recognition.
+
+>>Oct 12, 2002 CVS tag fa34t20b4
+
+Several very obscure (and sometimes old) bugs that appeared in certain
+MPI environments have been fixed.  This occurred because the pst.sq[]
+array did not always have a '\0' at the end.  In addition,
+mshowalign.c/p2_workcomp.c sometimes failed to put the '\0' at the end
+of seqc0/seqc1.  Correct bug introduced in fa34t20b3 for fasts34(_t).
+
+>>Oct 9, 2002 CVS tag fa34t20b3
+
+Fix to apam.c build_xascii() to not zero-out qascii[0].  Fix
+Makefile.pvm4.  Mix problem with -m 9c with compacc.c.
+
+>>Sept 28, 2002 
+
+Additional fixes to -m 9c in p2_complib.c/compacc.c/mshowbest.c.
+Remove restriction in fasts34(_t) to less than 30 peptides (though no
+more than 30 peptides can be aligned currently).
+
+>>Sept 24, 2002
+
+Fix p2_workcomp.c so that e_scores are delivered correctly when
+last_calc flag is set, and -m 9c provides alignments when only one
+best hit is present.
+
+Fix comp_lib.c to use different maxn and overlap for each different
+query sequence.  fasta34 and fasta34_t now have identical results when
+a long sequence is searched.
+
+Add '@C:101' support to memory mapped FASTA format files.
+
+Fix mshowalign.c so that coordinates returned by cal_coord() use
+loffset+l_off.
+
+>>Sept 14, 2002	CVS tag fa34t20b2
+
+Changes to p2_complib.c, compacc.c to fix statistics problems with
+pv34compfs on query sequences with more than 10 fragments.
+
+>>Aug 27, 2002
+
+Modifications to mshowbest.c and drop*.c (and p2_workcomp.c,
+compacc.c, doinit.c, etc.) to provide more information about the
+alignment with the -m 9 option.  There is now a "-m 9c" option, which
+displays an encoded alignment after the -m 9 alignment information.
+The encoding is a string of the form: "=#mat+#ins=#mat-#del=#mat".
+Thus, an alignment over 218 amino acids with no gaps (not necessarily
+100% identical) would be =218.  The alignment:
+
+       10        20        30        40        50          60         70  
+GT8.7  NVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKL--GLDFPNLPYL-IDGSHKITQ
+       :.::  . :: ::  .   .:::         : .:    ::.:   .: : ..:.. :::  :..:
+XURTG  NARGRMECIRWLLAAAGVEFDEK---------FIQSPEDLEKLKKDGNLMFDQVPMVEIDG-MKLAQ
+               20        30                 40        50        60        
+
+would be encoded: "=23+9=13-2=10-1=3+1=5".  The alignment encoding is
+with respect to the beginning of the alignment, not the beginning of
+either sequence.  The beginning of the alignment in either sequence is
+given by the an0/an1 values. This capability is particularly useful
+for [t]fast[xy], where it can be used to indicate frameshift positions
+"/#\#" compactly.  If "-m 9c" is used, the "The best scores" title
+line includes "aln_code".
+
+>>Aug 14, 2002	CVS tag fa34t20
+
+Changes to nmgetlib.c to allow multiple query searches coming from
+STDIN, either through pipes or input redirection.  Thus, the command
+
+       cat prot_test.lseg | fasta34 -q -S @ /seqlib/swissprot
+
+produces 11 searches.  If you use the multiple query functions, the
+query subset applies only to the first sequence.
+
+Unfortunately, it is not possible to search against a STDIN library,
+because the FASTA programs do not keep the entire library in memory
+and need to be able to re-read high-scoring library sequences.  Since
+it is not possible to fseek() against STDIN, searching against a STDIN
+library is not possible.
+
+>>Aug 5, 2002
+
+fasts34(_t) and fastm34(_t) have been modified to allow searches with
+DNA sequences.  This gives a new capability to search for DNA motifs,
+or to search for ordered or unordered DNA sequences spaced at
+arbitrary distances.
+
+>>Aug 4, 2002
+
+comp_lib.c has been modified to provide comp_mlib.c function.
+comp_mlib.c is no longer used.  comp_lib.c with the "mlib" function
+can now recognize protein or DNA sequences automatically, and reads
+from stdin can now detect DNA/protein sequence types automatically.
+Changes to compacc.c, getseq.c, doinit.c initfa.c, initsw.c, and
+nmgetlib.c to support automatic sequence type detection.
+
+>>July 28-31, 2002
+
+(1) The various Makefile's have been "normalized".  The fast*34[_t]
+    (Makefile.34m.common[_sql]), Makefile.pvm4[_sql], and
+    Makefile.mpi4[_sql] make files all use a common set of filenames,
+    described in Makefile.fcom.  This greatly simplifies adding
+    programs, but requires that all *.o files be deleted when moving
+    from fast*34* to pv34comp* to mp34comp*.
+
+(2) showalign.c/p_showalign.c have been merged into mshowalign.c
+    showbest.c/manshowbest.c have been merged into mshowbest.c.  Some
+    of the related files (showun.c, manshowun.c, have not been merged
+    or tested).
+
+(3) Code for ranking scores with valid e_value's incorporated.
+
+(4) Bug fixes in p2_complib.c, so that fasts34/fasts34_t/pvcompfs
+    provide identical statistics.
+
+>>July 26, 2002
+
+Makefile.pvm4_sql and Makefile.pvm4 have been substantially simplified
+by providing the worker program name from the h_init() function in the
+initfa.c/initsw.c files.
+
+>>July 24, 2002
+
+Substantial modifications to param.h, structs.h to ensure that no
+sequence specific information is kept in struct pstruct.  This
+structure now holds the pam[] matrix, and other scoring parameters,
+but nothing that is dependent on aa0.  The aa0 dependent stuff (nm0,
+Lambda, K, etc) is now stored in struct mngmsg.  This was mostly done
+to support the pv34comp* programs, which have separate mngmsg
+structures but the same pstructs.
+
+The fasts34, fasts34_t, and pv34compfs/c34.workfs have all been tested
+successfully.
+
+>>July 19, 2002
+
+Fix an old bug in the calculation of E()-values in DNA databases
+longer than 2147483647 residues on machines with 32-bit longs.
+
+
+>>July 28-31, 2002
+
+(1) The various Makefile's have been "normalized".  The fast*34[_t]
+    (Makefile.34m.common[_sql]), Makefile.pvm4[_sql], and
+    Makefile.mpi4[_sql] make files all use a common set of filenames,
+    described in Makefile.fcom.  This greatly simplifies adding
+    programs, but requires that all *.o files be deleted when moving
+    from fast*34* to pv34comp* to mp34comp*.
+
+(2) showalign.c/p_showalign.c have been merged into mshowalign.c
+    showbest.c/manshowbest.c have been merged into mshowbest.c.  Some
+    of the related files (showun.c, manshowun.c, have not been merged
+    or tested).
+
+(3) Code for ranking scores with valid e_value's incorporated.
+
+(4) Bug fixes in p2_complib.c, so that fasts34/fasts34_t/pvcompfs
+    provide identical statistics.
+
+>>July 26, 2002
+
+Makefile.pvm4_sql and Makefile.pvm4 have been substantially simplified
+by providing the worker program name from the h_init() function in the
+initfa.c/initsw.c files.
+
+>>July 24, 2002
+
+Substantial modifications to param.h, structs.h to ensure that no
+sequence specific information is kept in struct pstruct.  This
+structure now holds the pam[] matrix, and other scoring parameters,
+but nothing that is dependent on aa0.  The aa0 dependent stuff (nm0,
+Lambda, K, etc) is now stored in struct mngmsg.  This was mostly done
+to support the pv34comp* programs, which have separate mngmsg
+structures but the same pstructs.
+
+The fasts34, fasts34_t, and pv34compfs/c34.workfs have all been tested
+successfully.
+
+>>July 8, 2002
+
+Modifications to comp_lib.c, initfa.c and new scaleswt.c, tatstats.c
+to support FASTS with Tatusov statistics.
+
+last_params() has been introduced to allow aa0 dependent changes in m_msg/pstr.
+
+sortbest() has been moved into initfa.c/initsw.c to make it function specific.
+
+find_z() takes an additional parameter, escore.
+
+The do_work() results structure, beststr, and stat_str all accommodate
+escores as well as integer scores (stat_str also saves segn and segl
+but doesn't need them).
+
+In scaleswt.c, process_hist() now knows much more about Tatusov statistics.
+
+last_stats() provided to accommodate rank-based statistical corrections.
+
+scale_scores() is the last function to modify the beststr scores
+(final calculation of E-value).
+
+Some sortbest*() calls and some bptr[i]->zscore=find_zp() loops have
+been moved into scale_scores();
+
+>>July 3,5, 2002
+
+Modifications to allow mySQL comments (--) in "library.sql 16" files.
+Thus, a first line of:
+
+	--host seqdb user password;
+
+is read by FASTA as the login information to a mySQL server, but is
+ignored by mySQL.  "DO" commands in FASTA mySQL files can also be
+rendered invisible to mySQL in this way.  See "do.sql".
+
+Modifications to mysql_lib.c to allow very long SQL statements.  The
+buffer is now dynamically reallocated in 4Kb chunks.
+
+The fasta3.1 man page has been updated and re-organized.
+
+>>June 26, 2002
+
+Minor modifications to nmgetaa.c (openlib()) to use the same arguments
+for searching and PRSS.  PRSS needs access to all of m_msg, but
+searches do not.  Other small fixes to comp_mlib.c, towards the goal
+of merging comp_mlib.c and comp_lib.c.
+
+>>June 25, 2002
+
+Modify the statistical estimation strategy to sample all the sequences
+in the database, not just the first 60,000.  The histogram is still
+based only on the first 60,000 scores and lengths, though all scores
+an lengths are shown.  The fit to the data may be better than the
+histogram indicates, but it should not be worse.
+
+Currently, this modification is available only if the -DSAMPLE_STATS
+option is defined.
+
+>>June 23, 2002	CVS fa34t11d4
+
+Fix a very long-standing bug in fasty/tfasty that caused 'NNN' to be
+translated as 'S', rather than 'X'.  fastx/tfastx has done this
+correctly for many years, but the fasty/tfasty code that I received
+from Zheng Zhang was not implemented correctly (my fault, his code was
+fine).
+
+>>June 19, 2002
+
+Added "-C #" option, where 6 <= # <= MAX_UID (20), to specify the
+length of the sequence name display on the alignment labels.  Until
+now, only 6 characters were ever displayed.  Now, up to MAX_UID
+characters are available.
+
+>>May 30, 2002	CVS fa34t11d3
+
+Fixed problem with programs using the default -E cutoff when -b was
+provided.  With this implementation, -E can override -b, but -b
+overrides the default -E.
+
+Fixed problem with 64-bit file offsets in param.h (change USE_FSEEK0
+-> USE_FSEEKO, include -D_LARGEFILE_SOURCE and -D_LARGEFILE64_SOURCE
+in Makefile.linux_sql).  Put limits on alignment display length (200
+chars).  More checks for null returns from SQL queries.
+
+>>Apr 17, 2002	CVS fa34t11d2
+
+Fixed bug in mm_file.h/ncbl2_mlib.c that caused the SGI version to be
+unable to read blast2 format files.
+
+Changed "mp_*" tags to "pg_*" for -m 10 option.
+
+>>Mar 30, 2002
+
+Fix embarrassing bug in revcomp() (getseq.c) that failed to complement
+the central nucleotide in a sequence with an odd number of residues.
+
+Small changes to dropfs.c for more segments.
+
+>>Mar 16, 2002
+
+Added create_seq_demo.sql, nt_to_sql.pl to show how to build an SQL
+protein sequence database that can be used with with the mySQL
+versions of the fasta34 programs.  Once the mySQL seq_demo database
+has been installed, it can be searched using the command:
+
+	fasta34 -q mgstm1.aa "seq_demo.sql 16"
+
+mysql_lib.c has been modified to remove the restriction that mySQL
+protein sequence unique identifiers be integers.  This allows the
+program to be used with the PIRPSD database.  The RANLIB() function
+call has been changed to include "libstr", to support SQL text keys.
+Due to the size of libstr[], unique ID's must be < MAX_UID (20)
+characters.
+
+A "pirpsd.sql" file is available for searching the mySQL distribution
+of the PIRPSD database.  PIRPSD is available from
+ftp://nbrfa.georgetown.edu/pir_databases/psd/mysql.
+
+>>Mar 6, 2002
+
+Fix showbest.c showbest() to report pst.zdb_size as database size.
+Fix dropnfa.c spam() to address off-by-one on end of run, and double
+counting on backwards scan.  Fix dropnfa.c do_fasta() to fix another
+problem introduced by -S.  Changes to comp_lib.c to ensure that both
+the beginning and end of the query and library sequence have '\0'
+present.  Changes to initfa.c, initsw.c to ensure that a match to a
+lower-case letter with -S gets exactly the same score as a match to an
+'X'.  Changes to mmgetlib.c to work with 64-bit longs in *.xin files.
+
+>>Feb 26, 2002
+
+Fixes to doinit.c, initfa.c, initsw.c to allow DNA matrices using the
+"-s dna.mat" option.  A new matrix, "d50ry.mat" is available that
+scores +5 for a match, -2 for a transition, and -5 for a
+transversion. "d50ry.mat" corresponds to DNA PAM50 with transitions
+twice as common as transversions.  When "-s dna.mat" is used, "-n"
+MUST be used as well.
+
+Query sequence names ("aa", "nt") should be more accurate.
+
+>>Feb 22, 2002
+
+Fix to getseq.c to allow "plain" sequence files.
+
+>>Feb 12, 2002
+
+Minor fix to res_stats.c.
+
+>>Jan 28, 2002
+
+Fixes to resurrect res_stats.c.  res_stats (cc -o res_stats
+res_stats.c scaleswn.c -lm) takes the output from a current "-R
+file.res" file and calculates statistical significance - this allows
+one to take exactly the same set of scores (and lengths) and calculate
+statistical estimates using different strategies.
+
+>>Jan 24, 2002
+
+modifications to mmgetlib.c, ncbl2_mlib.c to more robustly read memory
+mapped files (*.xin, map_db) on machines lacking "native" 64-bit
+longs.  If the machine provides some definition for a 64-bit long
+(e.g. "long long", "int64_t"), things should work. 64-bit offsets into
+memory mapped files work properly on Alpha, SGI, i386 Linux, and
+MacOSX.  The current implementation depends either on 64 bit longs
+(Compaq Alpha's pre 4.0G) or the <sys/inttype.h> file.  Makefile,
+Makefile.alpha, and Makefile.linux have been modified.
+
+Modifications to nmgetlib.c, mmgetlib.c to provide GI numbers and
+Accession versions for Genbank searches.  If the GI:123456 number is
+available, it will be used and the description line will be formatted:
+
+	gi|123456|gb|ACC1234.1|LOCUS description
+
+This should help FAST_PAN runs, where the version of a sequence
+changes frequently.
+
+>>Jan 10, 2002
+
+Modifications to p2_complib.c, p2_workcomp.c to more reliably allocate
+space for library sequence descriptions on the master and workers.
+
+>>Jan 2-3, 2002		CVS fa34t10c/fa34t10d3
+
+Fixes to comp_lib.c to support Macintosh and Windows/Turbo-C
+compilation.  New Makefile.tc.  Macintosh version supports both
+"Classic" and "Carbon" environments.
+
+"<values.h>" has been replaced with the more modern "<limits.h>"
+
+Fixes to p2_complib.c to support n_libstr (libstr length) in GETLIB().
+
+comp_thr.c, complib.c removed.
+
+>>Dec 16, 2001
+
+Complete integration of comp_mlib.c with both the unthreaded and
+threaded programs.  Comp_mlib allows fasta34 and fasta34_t to compare
+a database with a second database, just as pv34compfa does.  Using
+multiple queries with fasta34_t is not as efficient as pv34compfa (and
+it cannot use networks of Unix workstations), but it is much easier to
+use and install.
+
+With the comp_mlib.c option, fasta34 cannot automatically recognize
+DNA sequences, just as pv34compfa no longer recognizes DNA sequences.
+You must use the "-n" option to search with DNA sequences.  The other
+programs (fastx34, tfastx34, etc) "know" the type of the query and
+database sequences, so "-n" is only required for fasta34(_t).
+
+>>Dec 14, 2001		CVS tag fa34t10b
+
+Fix problems reading DNA databases in blast2 format.
+
+>>Dec 11, 2001
+
+Changes to spam() in dropnfa.c so that, for DNA sequences, the
+previous behavior for finding the boundaries of a local alignment
+region use the same algorithm as previous versions of fasta.  For
+protein sequences, the algorithm will extend the local region beyond
+the "ktup" boundaries if a better score can be found.  For DNA
+sequences, this raises the noise rather than increasing sensitivity,
+so it is turned off and "ktup" boundaries are respected.  The old,
+"ktup" boundary algorithm is available with -DNOSPAM_EXT.
+
+This version also includes a working res_stats.c, which can be used to
+test various statistical estimates on exactly the same set of scores.
+
+Fixed problems with -m 9 percent identity for fastx/fasty/tfastx/tfasty.
+These errors have been present since -m 9 was implemented.
+
+>>Dec 10, 2001
+
+Fix to map_db.c to work correctly with files > 2 Gb when 64-bit longs
+are available.  It is not yet designed to work with ftello() and other
+offset types.
+
+>>Nov 11,21, 2001	CVS tag fa34t10a, fa34t10d1
+
+Substantial changes to revcomp(), getseq(), and other functions to
+correct problems with -S on DNA sequences.  Sequences with lower case
+nucleotides were not recognized or reverse complemented properly.
+
+Fix to dropnfa.c (v34t07, Nov 21, 2001) bg_align() to re-initialize
+static globals - this fixes a problem encountered with pv34compfa.  A
+new main program, comp_mlib.c has been added to the CVS archive,
+although it is not referenced in any of the Makefile.  comp_mlib.c
+works like p2_complib.c and compares a library against another
+library.
+
+>>Nov 4, 2001
+
+Change to dropnfa.c spam () while(1) -> while(lpos <= dmax->stop).
+This fixes a problem with ktup=1 on Suns only, so far.
+
+>>Oct 4, 2001		CVS tag fa34t10
+
+Add comp_lib.c file, which merges complib.c (unthreaded) and
+comp_thr.c (threaded) code into one file.
+
+Modifications to nmgetlib.c, mmgetaa.c to allow Genbank flatfile
+format without DESCRIPTION or ACCESSION lines.
+
+Additional fix for -S with ktup=1.
+
+>>Sept. 24, 2001
+
+Fix to have correct gap-penalties for short scoring matrices with
+tfastx/fastx.
+
+>>Sept. 10, 2001	CVS tag fa34t05d6
+
+Fix a bug introduced by -S fix in fa34t05d5.  Also, try to remove
+changes in p34compfa compared to pv4compfa output.
+
+>>Sept. 6, 2001		CVS tag fa34t05d5
+
+Fix the -S dropnfa/fx/fz2 bug that was not actually fixed in
+fa34t05d4.  Incorporate the correct scaleswn.c refered to in
+fa34t05d4.
+
+>>Sept. 5, 2001		CVS tag fa34t05d4
+
+Fix problem with m_msg.quiet that prevented interactive prompts for
+ktup, file name, etc with threaded programs.
+
+Fix serious bug in dropnfa.c/dropfx.c/dropfz2.c that caused -S to work
+improperly on sequences with effective length of 3 or less.
+
+Change to scaleswn.c to make mle_cen(), mle_cen2() more robust to cases
+where the top and bottom scores are the same.
+
+Change p2_complib.c to avoid compiler complaints with (void *)wstage2p=NULL
+on some platforms.
+
+>>Aug. 30, 2001		CVS tag fa34t05d3
+
+Fixed problem with uthr_subs.c for Suns, but changed Makefile.sun to
+use pthreads rather than Sun Unix threads.  Removed SQL stuff from
+Makefile.mpi4/pvm4 and added Makefile.mpi4_sql/pvm4_sql.  
+
+fa34t05d2 - fix to map_db.c to provide *sascii.
+
+fa34t05d1 - fixes to ibm_pthr_subs.c and Makefile.ibm from IBM.
+
+>>Aug. 20, 2001		CVS tag fa34t05d0
+
+The pvm/mpi complib programs have been substantially updated with
+release 3.4.  See readme.v34t0 for more information.  With version
+3.4, the MPI programs are mp34comp*, mu34comp*, etc.
+
+A major effect of this change is to disable automatic sequence type
+(protein/DNA) recognition with pv34compfa/mp34compfa.  By default,
+protein libraries are assumed.  Thus, pv34compfa/mp34compfa require
+the "-n" command line option when running pv34compfa/mp34compfa on DNA
+sequence libraries.  This issue does not occur with the other
+programs, which will recognize the appropriate sequence type, because
+it is determined by the program (e.g. pv34compfx requires
+DNA:protein).
+
+Fixed substantial problem with 64-bit file offsets for Linux in
+complib.c/comp_thr.c, p2_complib.c.  This problem, solved by Doug
+Blair, was preventing the threaded versions from working properly in
+memory mapped mode.
+
+In all earlier versions of fasta, when very long sequences were
+searched, the sequence length reported was that of the "chunk" that
+was actually searched (typically 80,000-query_length) rather than the
+actual library sequence length.  The peculiar behavior now changed,
+and the full length of the library sequence, not the sequence chunk,
+is reported as the library sequence length.  Note that chunks are
+still used, however, which can cause the same alignment to be shown
+twice.  In addition, the "-m 9" output format has changed to report
+the coordinates of the query and library sequence (see below), which
+may be different from 1-sequence_length because the the query and
+library sequences may have been extracted from larger sequences.  Four
+additional fields have been added, "pn0", "px0","pn1", "px1" that are
+the positions in for the beginning (pn0/1) and end (px0/1) of they
+query/library sequence.  pn0/1 would typically be changed with the
+"@C:#" directive, described below.
+
+Changes to doinit.c/initfa.c/initsw.c to provide a new function -
+f_lastenv() - that allows function-specific adjustments to parameters
+after the command line options have been read but before the first
+sequence is read.  This change solved problems with "mp/pv34compfx -S".
+
+fasts34/tfasts34 now recognize that 'I/L' are the same, as are 'Q/K'
+(which are apparently indistinguishable by Mass-Spec).  The latter
+identity is on by default, but can be turned off with "-h 0".
+
+The MPI/PVM versions of the programs have been tested extensively with
+compfa, compfx, and comptfx.  Makefile.mpi4 now works properly.
+Changes to p2complib.c to support the PVM option "-T 1-4", which
+allows one to run on nodes 1-4 of a (presumably larger) PVM virtual
+machine.  This option has no effect on the mp34comp* programs.  The
+old "-T 4" to run on 4 nodes, is also available.  If each node has 2
+cpu's, as indicated in the "pvmd hostfile", both CPU's will be used
+for a total, in this example, of 8 processes. This allows one to
+specify a large PVM machine and use separate parts of it
+independently.
+
+Changes to nmgetlib.c to fix problems with longer dates in GCG files
+(Y2K).  Fixes to faatran.c for extended alphabets and 'X's.  Various
+code clean-ups to make "gcc -Wall" a little bit (not much) happier.
+
+This is the first distributed fasta34 version.
+
+================
+>>Aug 9, 2001		CVS tag fa34t05
+
+Corrections to initfa.c to allow -S to work with tfastx/y.
+Fix to manshowbest.c for query position with -m 9.
+
+>>July 18, 2001		CVS tag fa34t04
+
+Various changes to complib.c, comp_thr.c, p2_complib.c, showbest.c,
+showalign.c to deal with overlapping alignments in long sequences that
+have been segmented.  When long sequences are segmented (lcont>0), the
+eventual total length (n1tot_v) is saved at beststr->n1tot_p.  If
+there was no lcont, then beststr->n1tot_p = NULL, and beststr->n1
+should be used as the sequence length.  This has the advantage of
+requiring space only when long sequences are encountered, and
+requiring only one integer for several segments.
+
+m_msg.noshow has been removed.
+
+The -m 9 format has been changed - 5 fields have been added, 4
+(pmn0/pmx0/pmn1/pmx1) provide the beginning and end coordinates of the
+query and library sequence; the last (fs) reports the number of
+frameshifts.  The names of the alignment boundaries have been changed
+from min0/max0/min1/max1 to amn0/amx0/amn1/amx1 (Alignment miN/maX).
+
+The SQL format has been extended to provide for statements that do
+things but do not generate results, such as creating and selecting into a temporary table, e.g.:
+================
+    do
+    create temporary table seq_pos (
+    id int unsigned not null auto_increment primary key,
+    prot_id int unsigned not null default 0,
+    start int unsigned not null default 0,
+    length int unsigned not null default 0,
+    )
+    ;
+    do
+    insert into seq_pos (prot_id, start, length)
+      select id, 11, len-10
+      from protein, annot
+      where len > 100
+      and annot.protein_id = protein.id
+      and annot.pref=1
+    ;
+    select seq_pos.id,
+       substring(protein.seq, start, length),
+       concat("@C:", start, " ", descr)
+    from protein, seq_pos, annot
+    where protein.id = annot.protein_id
+      and protein.id = seq_pos.prot_id
+      and annot.pref = 1
+    ;
+    select prot_id,
+       concat("@C:", start, " ", descr)
+    from seq_pos, annot
+    where annot.protein_id = seq_pos.prot_id
+      and seq_pos.id = #
+      and annot.pref = 1
+     ;
+================
+
+  In the current implementation, these statements must start with "DO"
+as the first two characters on the line, and come immediately after a
+line ending with ';'.  The text from "DO" to the next ";", excluding
+the "DO", is executed when the database connection is made.
+
+===== >>July 12, 2001
+
+The allocation of the work_info data structure used to send
+information to the worker threads has been changed.  The old method
+worked, possibly by accident.
+
+A bug in p2_complib.c that caused E()-values to be calculated
+improperly for the first query sequence has been fixed.
+
+>>July 11, 2001	--> fa34t02
+
+It is now possible to specify output coordinates in library sequences
+by including the string: "@C:number" on the description line, e.g.
+
+   >gtm1_human gi|12345 human glutathione transferase M1 @C:21
+
+would label the first residue in the library sequence "21" rather than
+"1".  This capability has been included to provide accurate
+coordinates for searches done against subsequences generated by an SQL
+query.  For example, one could use a query of the form:
+
+ SELECT protein.id, substring(protein.seq,11,length(protein.seq)-20),
+	concat(protein.name," @C:11 ",protein.descr)
+ FROM protein;
+
+to generate a sequence set with each sequence starting with residue
+11.  Without the "@C:11" option on the description line, the program
+would number the alignment positions starting at 1, even though the
+first residue of the sequence really started at 11.  "@C:11" allows
+one to correct the coordinate system.
+
+Currently, "@C:offset" is available only with library type 1 (fasta
+format) and 16 (mySQL).
+
+The SQL-generated database with "@C:offset" can be used with both the
+fast*34(_t) programs and with pv34comp*.  However, the SQL syntax is
+used differently in the fasta34 and pv34compfa programs.  fast*34(_t)
+requires three SQL statements during a search: (1) a statement to
+generate a large set of library sequences; (2) a statement to generate
+a description of a single sequence, given a unique identifier provided
+by (1); and (3) a statement to generate a single sequence given a
+unique identifier provided by (1).  For fast*34 searches, the third
+(3) SQL statement must provide the "@C:offset" information in the
+third results field for the offset to be used.  It is optional in (1)
+and (2).
+
+The pv34comp* programs only require one SQL statement, statement (1)
+above, which must provide three fields, a unique identifier, the
+sequence, and a complete description that must include "@C:offset" if
+substrings are used.  If SQL queries (2) and (3) are provided, they
+are  ignored.  Thus, the same files can be used by both programs, but
+the "@C:offset" is required in different SQL queries by the fast*34
+and pv34comp* programs.
+
+Other changes:
+
+Re-incorporation of GAP_OPEN option; fix to Altschul-Gish stats when
+GAP_OPEN is used.
+
+Re-incorporation of A. Mackey's spam() improvement in dropnfa.
+
+Fixes to include file ordering to allow fast*34(_t) pv34comp* programs
+to compile.
+
+Fix to lascii[] for SQL database queries.
+
+Fix to an old bug in comp_thr.c to send individual worker_info
+structures to threads (does not fix LINUX threads problems, however).
+
+=====
+>>July 9, 2001
+
+Considerable changes to support no-global library functions. 
+
+(1) Separate ascii/sequence mapping arrays are used by the
+    query-reading (qascii), library-reading (lascii), and sequence
+    comparison function (pascii) routines.  As a result, there is no
+    longer a need for tgetlib.o/lgetlib.o - lgetlib.o can serve both
+    functions.
+
+(2) This also allows us to remove all #ifdef TFAST/FASTX conditionals
+    from complib.c/comp_thr.c/p2_complib.c.  We no longer need
+    tcomp_thr.o, comp_thrx.o, etc.  We still have a variety of
+    p2_complib.o variations to support the different c34.work* files.
+
+(3) Because non-global openlib/getlib functions are available, exactly
+    the same open/get functions are available for reading both the
+    query and reference libraries in pv34comp* programs.  The
+    host-specific openlib/getlib functions in hxgetaa.c are now
+    provided by nmgetlib.c, etc. This has two effect:
+
+    (a) it is now possible to compare a query database generated by an
+        SQL query to a library database generated by a different SQL
+        query.
+
+    (b) pv34comp* has lost (at least in this version) the ability to
+        automatically detect the query sequence type. To search with a
+        DNA query, you MUST use "-n".
+
+(4) the resetp() function is now responsible for almost all of the
+    function sepcific (TFAST/FASTX/etc) initializations.  All of the
+    function specific code has been removed from complib.c/comp_thr.c
+    and most of it has been moved to initfa.c/resetp().
+
+(5) manageacc.c has been merged into compacc.c (mostly prhist()).
+
+=====
+>>June 1, 2001
+
+Many changes to accommodate a new - no global variable - strategy for
+reading sequence databases.  Every time a file is opened, a struct
+lmf_str is allocated which can be used for memory mapped files, ncbl2,
+files, and mysql files.
+
+In addition, an open'ed file has a default sequence type: DNA or
+protein, or one can open a file in a mode that will allow the sequence
+type to be changed.
+
+=====
+>>May 18, 2001		CVS: fa33t09d0
+
+A new compile time parameter - -DGAP_OPEN, is available to change the
+definition of the "-f gap-open" parameter from the penalty for the
+first residue in a gap to a true gap-open penalty, as is used in BLAST
+and many other comparison algorithms.  This will probably become the
+default for fasta in version 3.4.
+
+Fixes to conflicts between "-S" and "-s matrix".  When a scoring
+matrix file was specified, lower-case alignments were not displayed
+with -S (although the scores were calculated properly).
+
+More extensive testting of mysql_lib.c (mySQL query-libraries) with
+the pv4comp* and mp4comp* programs.
+
+=====
+>>April 5, 2001		CVS: fa33t08d4b3
+
+Changes in nmgetlib.c and ncbl2_mlib.c to return long sequence
+descriptions for PCOMPLIB (pv4/mp3comp*).  Also fix p2_complib.c to
+request DNA library for translated comparisons.
+
+Fix for prss33(_t) to read both sequences from stdin.
+
+=====
+>>March 27, 2001	CVS: fa33t08d4
+
+Modifications to allow 64-bit fseek/ftell on machines like Sun,
+Linux/Intel, that support -D_FILE_OFFSET_BITS=64, -D_LARGE_FILE_SOURCE
+off_t, and fseeko(), ftello() with the option -DUSE_FSEEKO.  Machines
+with 64-bit long's do not need this option.  Machines with 32-bit
+longs that allow files >2 Gb can do so with 64-bit file access
+functions, including fseeko() and ftello(), which work with off_t file
+offsets instead of long's.
+
+=====
+>>March 3, 2001		CVS: fa33t08d2
+
+Corrected problems in nmgetaa.c and mysql_lib.c with parallel
+programs, and one serious problem with alternate DNA scoring matrices
+(initfa.c, initsw.c) not being set properly.  A subtle problem with
+the merge of scaleswn.c and scaleswg.c is fixed.
+
+>>February 17, 2001
+
+Modified mysql_lib.c to use "#", rather than "%ld", to indicate the
+position of the GID.  This change was made because sprintf() cannot be
+used reliably to generate an SQL string, as '"' and '%' are used in 
+such strings.
+
+=====
+>>January 17, 2001
+(no version change, date change)
+
+Minor fixes to initfa.c, initsw.c to deal with DNA scoring matrices
+properly. "-n -s dna.mat" is required for the sequence/matrix to be
+recognized as DNA.
+
+>>January 16, 2001
+-->v34t00
+
+Merge of the main CVS trunk - fa33t06 with the latest release branch,
+fa33t08.
+
+In addition, PCOMPLIB mods have been made to mysql_lib.c.  Because
+p2_complib.c gets sequence description information during the first
+read of the database, the mysql_query must be changed to return:
+result[0]=GID, result[1]=description, result[2]=sequence.  In the
+PCOMPLIB case, the other SQL queries (for GID description, sequence)
+are not necessary but must still be provided.
diff --git a/doc/readme.v35 b/doc/readme.v35
new file mode 100644
index 0000000..25c52c4
--- /dev/null
+++ b/doc/readme.v35
@@ -0,0 +1,535 @@
+
+  $Id: readme.v35 120 2010-01-31 19:42:09Z wrp $
+  $Revision: 55 $
+
+>>Sep. 10, 2008
+
+Fix problem in init_ascii() call for p2_complib2.c.
+
+>>Sep. 9, 2008
+
+Fix bug in display of library name when written to an output file
+(rather than stdout).
+
+>>Aug. 28, 2008		fa35_04_02	SVN Revision: 45
+
+Fix serious bug in alignment generation that only occurred when large
+libraries were used as a query with [t]fast[x/y].  This bug often
+resulted in a core dump.
+
+Address some other issues with uninitialized variables with -m 9c.
+
+>>Jul. 15, 2008		fa35_04_01	SVN Revision: 38
+
+Correct problems with Makefiles.  Add information on compiling to README.
+Address issue with mp_KS for -m 10 when searching small libraries.
+
+>>Jul. 7, 2008		fa35_04_01	SVN Revision: 35
+
+Fix problems that occurred when statistics are disabled with -z -1,
+both for a normal library search, and for searches of a small library.
+
+>>Jul. 3, 2008	  	  	SVN Revision: 33
+
+Continue to fix an issue with 'J' and -S.
+
+>>Jun. 29, 2008     	 	SVN Revision: 29, 31
+
+Fix additional problems with Makefiles, some issues uncovered with
+Solars 'C' compiler (Rev. 30).
+
+Discover serious bug when searching long, overlapping sequences, such
+as genomes.  The length of the library sequence was not updated to
+reflect the length of the new region plus the overlap.
+
+Fix inconsistency in the value of 'J' between uascii.h/aascii[] and
+pascii[].  Add code to ensure that lascii[], qascii[], never return a
+value outside pam2[][] (all <= pst.nsq) (particularly for 'O' and 'U'
+amino-acids).
+
+exit(0) returns for map_db, list_db.
+
+>>Jun. 11, 2008
+
+Correct bug in scaleswn.c that prevented exact matches to queries < 10
+residues from being scored and displayed.
+
+>>Jun. 1, 2008
+
+Address various cosmetic issues in FASTA output:
+
+(1) Modify comp_lib2.c so that -O outfile works when multiple queries are
+compared in one run.
+
+(2) remove the duplicated query sequence length in the 1>>>query line.
+
+(3) in -m 10 output, the tags "pg_name" and "pg_ver" were duplicated, e.g.
+
+>>>K1HUAG, 109 aa vs a library
+; pg_name: fasta35_t
+; pg_ver: 35.03
+; pg_argv: fasta35_t -q -b 10 -d 5 -m 10 ../seq/prot_test.lseg a
+; pg_name: FASTA
+; pg_ver: 3.5 Sept 2006
+
+The ; pg_ver and ; pg_name produced by the get_param() functions in
+drop*.c have been renamed ; pg_ver_rel and ; pg_ver_alg.
+
+>>>K1HUAG, 109 aa vs a library
+; pg_name: fasta35_t
+; pg_ver: 35.03
+; pg_argv: fasta35_t -q -b 10 -d 5 -m 10 ../seq/prot_test.lseg a
+; pg_name_alg: FASTA
+; pg_ver_rel: 3.5 Sept 2006
+
+Modify mshowbest.c, mshowalign.c to highlight E() values (<font
+color="dark red"></font> in HTML output.
+
+>>Apr. 16, 2008     fa35_03_07
+
+Merge fa35_ann1_br, which allows annotations in library sequences.
+
+The PVM/MPI parallel version now support query sequence annotations
+and -m 9c annotation encoding.  It does not yet support library
+annotations.  Tested with both PVM and MPI.
+
+>>Apr. 2, 2008	    fa35_03_06
+
+Ensure that code in last_init() to modify ktup never increases ktup value.
+
+Add fasta_versions.html to more explicitly describe programs available.
+
+>>Mar. 4, 2008
+
+Fix parsing of parameters (matrix, gap open, gap ext) in ASN.1 PSSM
+files produced by blastpgp.
+
+>>Feb. 18, 2008	  fa35_03_05
+
+Re-implement -M low-high sequence range options.  Sequence range
+restriction has probably been missing since the introduction of
+ggsearch and glsearch, which use a new approach to limiting the
+sequence range.
+
+>>Feb. 7, 2008	  fa35_ann1_br
+
+Add annotations to library sequences (they were already available in
+query sequences).  Currently, annotations are only available within
+sequences, but they should be available in FASTA format, or any of the
+other ascii text formats (EMBL/Swissprot, Genbank, PIR/GCG).  If
+annotations are present in a library and the annotation characters
+includes '*', then the -V '*' option MUST be used.  However, special
+characters other than '*' are ignored, so annotations of '@', '%', or
+'@' should be transparent.
+
+In translated sequence comparisons, annotations are only available for
+the protein sequence.
+
+The format for encoded annotations has changed to support annotations
+in both the query and library sequence.  If the -m 9c flag is provided
+and annotations are present, then an annotated position in the
+alignment will be encoded as:
+
+ '|'q-pos':'l-pos':'q-symbol'l-symbol':'match-symbol'q-residue'l-residue'
+
+For example:
+
+    |7:7:@@:=YY|14:14:##:=TT
+
+In cases where the query or library sequence does not have an
+annotation, then the q-symbol or l-symbol will be 'X' (which is not a
+valid annotion symbol).
+
+>>Jan. 25, 2008	   fa35_03_04
+
+Map 'O' (pyrrolysine) to 'K', 'U' (seleno-cysteine) to 'C' in uascii.h
+('J' is already recognized and mapped to the average of 'I' and 'L').
+Thus, 'J' will appear in alignments, but 'O' and 'U' are transformed
+to 'K' and 'C'.
+
+Because "Oo" and "Uo" are not (currently) part of aax[] ("Uu" is in
+ntx[]), apam.c/build_xascii() was extended to add characters from
+othx[] - "oth" for "other" so that they are not lost.
+
+Double check, and fix, some mappings for 'J/j' and 'Z/z'.
+
+>>Jan. 11, 2008	   fa35_03_03
+
+Clean up some issues with -m 10 output; put "; mp_Algorithm", ";
+mp_Parameters" down with other -m 10 ";" lines.  Also provide ";
+al_code" and "; al_code_ann" if -m 9c is specified.  Remove duplicate
+">>>query" line.
+
+Add "; aln_code" and "; ann_code" to -m 10 -m 9c output.  The
+alignment/annotation encoding is only produced once (in showbest(),
+and is then saved for -m 10 aligment.
+
+>>Dec. 13, 2007	  fa35_03_02m (merge of fa35_03_02 and fa35_02_08_br)
+
+Add ability to search a subset of a library using a file name and a
+list of accession/gi numbers. This version introduces a new filetype,
+10, which consists of a first line with a target filename, format, and
+accession number format-type, and optionally the accession number
+format in the database, followed by a list of accession numbers.  For
+example:
+
+	  </slib2/blast/swissprot.lseg 0:2 4|
+	  3121763
+	  51701705
+	  7404340
+	  74735515
+	  ...
+
+Tells the program that the target database is swissprot.lseg, which is
+in FASTA (library type 0) format.
+
+The accession format comes after the ":".  Currently, there are four
+accession formats, two that require ordered accessions (:1, :2), and
+two that hash the accessions (:3, :4) so they do not need to be
+ordered.  The number and character after the accession format
+(e.g. "4|") indicate the offset of the beginning of the accession and
+the character that terminates the accession.  Thus, in the typical
+NCBI Fasta definition line:
+
+ >gi|1170095|sp|P46419|GSTM1_DERPT Glutathione S-transferase (GST class-mu)
+
+The offset is 4 and the termination character is '|'.  For databases
+distributed in FASTA format from the European Bioinformatics
+Institute, the offset depends on the name of the database, e.g.
+	   
+ >SW:104K_THEAN Q4U9M9 104 kDa microneme/rhoptry antigen precursor (p104).
+
+and the delimiter is ' ' (space, the default).
+
+Accession formats 1 and 3 expect strings; accession formats 2 and 4
+work with integers (e.g. gi numbers).
+
+>>Dec. 12, 2007  fa35_02_08
+
+Correct bug in ssearch35 gapped scores that only occurred in
+non-accelerated code.  This bug has been present since fa35_02_06.
+Modified the Makefiles so that accelerated (ssearch35(_t)) and
+non-accelerated (ssearch35s(_t)) are available. Edited Makefile's to
+provide accelerated ssearch35 more specifically.
+
+Modifications to provide information about annotated residues in the
+-m9c coded output. Previously, -m 9c output added a field:
+
+    =26+9=15-2=9-1=3+1=74-2=3-3=63
+
+after the standard -m 9 output information.  With the new version, an
+annotated query sequence ( -V '*#' ) adds the field:
+
+    |14:16:#<TM|24:26:#>TA|44:37:*>ST|71:66:#=TT
+
+which indicates that residue 14 in the query sequence aligns with
+residue 16 in the target (library) with annotation symbol '#', the
+alignment score is '<' less than zero, and the residues are 'T'
+(query) and 'M' (library). (The '|' is used to separate each
+annotation entry.)
+
+>>Nov. 10, 2007
+
+Parts of p2_complib.c and p2_workcomp.c, and the pvm/mpi Makefiles,
+have been updated to be consistent with name changes in the param.h
+and structs.h directories.
+
+>>Nov. 20, 2007   fa35_02_08
+
+Parts of p2_complib.c and p2_workcomp.c, and the pvm/mpi Makefiles,
+have been updated to be consistent with name changes in the param.h
+and structs.h directories.
+
+>>Nov. 6, 2007	  fa35_02_07
+
+Correct problems with asymmetric RNA matrices in initfa.c and rna.mat.
+
+>>Oct. 18, 2007
+
+Correct problem parsing ASN1 FastaDefLines when the database is local.
+
+Recovering from a misplaced cvs commit of code that was supposed to be
+on a branch, code has been recovered from earlier versions (fa35_02_05
+because fa35_02_06 has some branch contamination).
+
+>>Oct. 4, 2007	  fa35_02_06
+
+Correct error in gap penalties in dropnnw.c.  Due to an unfortunate
+inconsistency, the gap parameter in FLOCAL_ALIGN (in dropgsw2.c) had a
+different meaning than that in almost all the other programs (it was
+the sum of gap_open and gap_ext).  The FLOCAL_ALIGN function call was
+copied for FGLOBAL_ALIGN, even though the the FGLOBAL_ALIGN function
+used the more conventional gap_open, gap_ext parameters.  Thus,
+FGLOBAL_ALIGN was wrong and the subsequent do_walign() in dropnnw.c
+were wrong.  dropgsw2.c:FLOCAL_ALIGN has been modified to use the
+conventional gap_open parameter, and calls to dropnnw.c:
+FGLOBAL_ALIGN() and do_walign() have been fixed.
+
+>>Sept. 20, 2007
+
+Modify the logic used when saving a seq_record *seq_p into beststr
+*bbp to ensure that if the seq_record is replaced, it is replaced at
+all the places where it is referenced.  This involves adding a linked
+list into beststr (*bbp->bbp_link).  When making the link (and freeing
+it up), be certain that the linked seq_p is the same as the one being
+replaced.
+
+>>Sept. 18, 2007   fa35_02_05
+
+A relatively obscure problem was found on the SGI platform when
+searching a library smaller than 500 sequences (thus requiring some
+shuffles). Two bugs were found and corrected; one involved not
+allocating aa1shuff with COMP_THR and not do a m_file_p->ranliba()
+before re_getlib().  The second involved destroying a pointer to the
+list of seq_records when a sequence was being shuffled.  The bugs were
+confirmed with Insure, and have been fixed.
+
+>>Sept. 7, 2007	fa35_02_04
+
+Revamp the offset handling code to provide better uniformity between
+query and library offsets and coordinate systems.
+
+Fix a problem with load_mmap() to load 64-bit sequence locations
+properly on machines with 32-bit integers.
+
+>>Sept. 4, 2007
+
+Modify ncbl2_mlib.c slightly to check to see whether the amino-acid
+mapping in blast databases is identical to the FASTA mapping (it
+should be).  If they are identical, do not re-map the blast amino acid
+sequences (potentially a small speed up).
+
+>>Aug. 22, 2007
+
+Change ps_lav.c to lav2ps.c, and add lav2svg.c.  It is now possible to
+generate a lalign35 HTML output that has both SVG (lav2svg) and PNG
+(lav2ps | gs ), graphics.
+
+>>Aug. 10, 2007	CVS fa35_02_03
+
+Fix faatran.c:aacmap() bug.
+
+>>Aug. 6, 2007
+
+Extensive restructuring of pssm_asn_subs.c to parse PSSM:2 ASN.1's
+downloaded from NCBI WWW PSI-BLAST more robustly.
+
+>>July 25, 2007	  CVS fa35_02_02
+
+Change default gap penalties for OPTIMA5 matrix to -20/-2 from -24/-4.
+
+>>July 24, 2007
+
+Correct bugs introduced by adding 'J' - 'J' was initially put before
+'X' and '*' in the alphabet, which led to problems because the
+one-dimensional lower-triangular pam[] matrices (abl50[], abl62[],
+etc) had entries for 'X', and '*', but not for 'J'.  By placing 'J'
+after the other characters, the problem is resolved.
+
+Modify tatstats.c to accommodate 'J'.
+
+'*' is back in the aascii[] matrix, so that it is present by default
+(like fasta34).
+
+>>July 23, 2007
+
+Changes to support sub-sequence ranges for "library" sequences -
+necessary for fully functional prss (ssearch35) and lalign35.  For all
+programs, it is now possible to specify a subset of both the query and
+the library, e.g.
+
+    lalign35 -q mchu.aa:1-74 mchu.aa:75-148
+
+Note, however, that the subset range applied to the library will be
+applied to every sequence in the library - not just the first - and
+that the same subset range is applied to each sequence.  This probably
+makes sense only if the library contains a single sequence (this is
+also true for the query file).
+
+Correct bugs in the functions that produce lav output from lalign35 -m
+11 to properly report the begin and end coordinates of both sequences.
+Previously, coordinates always began with "1".  Correct associated bug
+in ps_lav.c that assumed coordinates started with "1".
+
+>>June 29, 2007	CVS fa35_02_01
+
+Merge of HEAD with fasta35 branch.
+
+>>June 29, 2007	CVS fa35_01_06
+
+Add exit(0); to ps_lav.c for 0 return code.
+
+>>June 26, 2007
+
+Add amino-acid 'J' for 'I' or 'L'.
+
+Add Mueller and Vingron (2000) J. Comp. Biol. 7:761-776 VT160 matrix,
+"-s VT160", and OPTIMA_5 (Kann et al. (2000) Proteins 41:498-503).
+
+Changes to dropnnw.c documentation functions to remove #ifdef's from
+strncpy() - which apparently is a macro in some versions of gcc.
+
+>>June 7, 2007
+
+Modify initfa.c to allow ggssearch35(_t), glsearch35(_t) to use PSSMs.
+
+>>June 5, 2007  CVS fa35_01_05
+
+Modifications to p2_complib.c, p2_workcomp.c to support Intel C
+compiler.  Fixed bug in p2_workcomp.c - gstring[2][MAX_STR] required -
+[MAX_SSTR] too short.  mp35comp* programs now tested and working (as
+are pv35comp*, c35.work* programs).
+
+Fix problem with fasts/fastm/fastf last_tat.c with limited memory.
+
+Correct problem with lalign35.exe Makefile.nm_[fp]com.
+
+Add $(CFLAGS) to map_db to enable large file support.
+
+Address problem with PSSM's when '*' not defined (initfa.c:extend_pssm()).
+
+>>May 30, 2007	CVS fa35_01_04
+
+Complete work on ps_lav, which converts an lalign35 lav (-m 11) file
+into a postscript plot, which looks identical to the plots produced by
+plalign from fasta2.  (ps_lav has been replaced by lav2ps and lav2svg).
+
+>>May 25,29, 2007
+
+Changes to defs.h, doinit.c mshowalign.c for -m 11, which produces lav
+output only for lalign35.
+
+Changes to comp_lib2.c to add m_msg.std_output, which provides all the
+standard print lines.  This is turned off for -m 11 (lav) output.
+lalign35 -m 11 provides standard lav output, with the addition of
+#lalign35 -q ... .
+
+>>May 18, 2007
+
+Add m_msg.zsflag to preserve pst.zsflag when reset by global/global
+exclusion of many library sequences.
+
+>>May  9, 2007	CVS fa35_01_03
+
+Tested local database size determination with p2_complib2/p2_workcomp2.
+
+>>May 2, 2007	renamed fasta35, pv35comp, etc
+
+Separate thread buffer structures from param.h.
+
+Problems with incorrect alignments has been fixed by re-initializing the
+best_seqs and lib_buf2_list.buf2 structures after each query sequence.
+
+The labels on the alignment scores are much more informative (and more
+diverse).   In the past, alignment scores looked like:
+
+>>gi|121716|sp|P10649|GSTM1_MOUSE Glutathione S-transfer  (218 aa)
+ s-w opt: 1497  Z-score: 1857.5  bits: 350.8 E(): 8.3e-97
+Smith-Waterman score: 1497; 100.0% identity (100.0% similar) in 218 aa overlap (1-218:1-218)
+^^^^^^^^^^^^^^
+
+where the highlighted text was either: "Smith-Waterman" or "banded
+Smith-Waterman". In fact, scores were calculated in other ways,
+including global/local for fasts and fastf.  With the addition of
+ggsearch35, glsearch35, and lalign35, there are many more ways to
+calculate alignments: "Smith-Waterman" (ssearch and protein fasta),
+"banded Smith-Waterman" (DNA fasta), "Waterman-Eggert",
+"trans. Smith-Waterman", "global/local", "trans. global/local",
+"global/global (N-W)".  The last option is a global global alignment,
+but with the affine gap penalties used in the Smith-Waterman
+algorithm.
+
+>>April 24, 2007
+
+The new program structure has been migrated to the PVM and MPI
+versions.  In addition, the new global algorithms (pv35compgg,
+pv35compgl) have been moved, though the the PVM/MPI versions do not
+(yet) to the appropriate size filtering.
+
+>>April 19, 2007
+
+Two new programs, ggsearch35(_t) and glsearch35_t are now available.
+ggsearch35(_t) calculates an alignment score that is global in the
+query and global in the library; glsearch35_t calculates an alignment
+that is global in the query and local, while local in the library
+sequence.  The latter program is designed for global alignments to domains.
+
+Both programs assume that scores are normally distributed.  This
+appears to be an excellent approximation for ggsearch35 scores, but
+the distribution is somewhat skewed for global/local (glsearch)
+scores.  ggsearch35(_t) only compares the query to library sequences
+that are beween 80% and 125% of the length of the query; glsearch
+limits comparisons to library sequences that are longer than 80% of
+the query.  Initial results suggest that there is relatively little
+length dependence of scores over this range (scores go down
+dramatically outside these ranges).
+
+A bug was found and fixed in showalign() and showbest() where the
+aa1save buffer was not preserved when some sequences needed to be
+re-read, while others were stored in the beststr.
+
+>>April 9, 2007
+
+Some of the drop*.c functions have been reconfigured to reduce the
+amount of duplicate code.  For example, dropgsw.c, dropnsw.c, and
+dropnfa.c all used exactly the same code to produce global alignments
+(NW_ALIGN() and nw_align()), this code is now in wm_align.c.
+Likewise, those same files, as well as dropgw2.c, use identical code
+to produce consensus alignments (calcons(), calcons_a(), calc_id(),
+calc_code()).  Rather than working with three or four copies of
+identical code, there is now one version.
+
+>>March 29, 2007
+
+At last, the lalign (SIM) algorithm has been moved from FASTA21 to
+FASTA35.  Currently, only lalign35 is available.  A plotting version
+will be available shortly (or perhaps a more general solution that
+produces lav output).
+
+The statistical estimates for lalign35 should be much more accurate
+than those from the earlier lalign, because lambda and K are estimated
+from shuffles.
+
+Many functions have been modified to reduce the number of times
+structures are passed as arguments, rather than pointers.
+
+>>February 23, 2007
+
+The threading strategy has been modified slightly to separate the end
+of the search phase (and a complete reading of all results buffers)
+from the termination phase.  This will allow future threading of
+subsequent phases, including the Smith-Waterman alignments in
+showbest() and showalign() (though care will be required to ensure
+that the results are presented in the correct order).
+
+>>February 20, 2007	fasta-34_27_0  (released as fasta-35_1)
+
+The FASTA programs have been restructured to reduce the differences
+between the threaded and unthreaded versions (and ultimately the
+parallel versions) and to make more efficient use of modern large
+memory systems.  This is the beginning of a move towards a more robust
+shuffling strategy when searching databases with modest numbers of
+related sequences.
+
+The major changes:
+
+  comp_lib.c -> comp_lib2.c  - comp_lib.c will be removed
+  work_thr.c -> work_thr2.c  - work_thr.c will be removed
+
+  mshowbest.c, mshowalign.c have been modified to remove aa1 as an
+    argument. They must allocate that space if they need it.
+
+  The system is set up to allocate a substantial amount of library
+  sequence memory, either to a single buffer (unthreaded) or to the
+  threaded buffer pool.  For smaller databases, the library sequences
+  are read once, and then subsequently read from memory (this could be
+  extended for RANLIB(bline) as well).
+
+Soon, these changes will allow the program to re-read the beststr[]
+sequences and shuffle them to produce accurate lambda/K estimates.
+
+================================================================
+
+See readme.v34t0 for earlier changes.
+
+================================================================
diff --git a/doc/readme.v36 b/doc/readme.v36
new file mode 100644
index 0000000..5826024
--- /dev/null
+++ b/doc/readme.v36
@@ -0,0 +1,2213 @@
+
+Version 3.6 of the FASTA programs is a significant update over version
+3.5.  It uses the same underlying structure as FASTA35 (specifically
+the strategies for ensuring accurate statistics), but it allows for
+multiple high-scoring alignments to be shown, rather than just one.
+This is the main functional difference between FASTA and BLAST -
+BLAST could show multiple HSPs, FASTA did not.
+
+>>May 23, 2017 [released as fasta-36.3.8f]
+[url_subs.c]
+A small, but major change in the output available to the $SRCH_URL and
+$SRCH_URL2 strings, which are used to enable re-searching, and now
+pairwise alignment. (It would be better to provide a json string of
+the information, rather than using fprintf().)  An additional value,
+the name of the query sequence, is provided to these urls so that
+pairwise alignment becomes possible.
+
+>>May 23, 2017
+[scripts/ann_feats2ipr.pl,ann_feats_up_www2.pl,test_ann_scripts.sh  src/defs.h]
+Changes to ensure that EBI format databases, which place the ID before
+the accession, e.g.  SP:GSTM1_HUMAN P09388, can be processed properly
+by annotation scripts.  This involved displaying more of the
+description line, so that the accession field is included, in the
+annot_XXXXX file.  
+
+>>May 8, 2017
+[compacc2e.c]
+Address problem where initial domain annotation similarity
+score/identity not properly reset.
+
+[scripts/annot_blast_btop2.pl]
+Fix various problems with domain scores, particularly in gaps, and
+domain coordinates.
+
+Modify version string to May, 2017
+
+>>April 18, 2017
+[cal_cons2.c]
+Address problem where identity count not correctly assigned to
+N-terminal domain at the end of a domain.
+
+>>April 14, 2017
+[src/compacc2e.c, scripts/ann_exons_up_www.pl]
+
+Provide a new script to annotate exon positions in Uniprot Proteins
+(scripts/ann_exons_up_www.pl) that uses the EBI proteins/api/coordinate service.
+
+Provide additional error checking on annotates to ensure that domain
+start is always <= domain end.
+
+>>Jan 17, 2017
+[scripts/ann_pfam30_tmptbl.pl]
+ann_pfam30_tmptbl.pl is a modification of ann_pfam30.pl that loads a
+temporary tables of accessions to be annotated, rather than asking for
+one sequence at a time.
+
+>>Dec 14, 2016
+[initfa.c/scaleswn.c]
+Change required shuffle count (down to 100) and introduce an
+median/IQR strategy to robustly estimate mean and S.D. for ggsearch
+(normal) comparisons (-z 3, in place of Altschul-Gish statistics).
+
+Modify version string to Dec., 2016.
+
+>>Nov 18, 2016
+[build_ares.c]
+fix sequence encoding memory leak
+
+>>Sept 30, 2016 [released as fasta-36.3.8e]
+[psisearch2/]
+
+Added a new sub-directory, psisearch2/, which includes scripts and
+documentation for the new iterative psisearch2_msa.pl and
+psisearch2_msa.py programs.  These programs perform iterative PSIBLAST
+(or SSEARCH) searches, but with an option (--query_seed) that
+dramatically reduces false-positives.
+
+Modified most of the scripts/ann_*.pl files to work with new NCBI
+Swissprot accession format.  Modified scripts/ann_feats_up_www2.pl and
+scripts/ann_upfeats_pfam_e.pl to work with JSON format Uniprot
+descriptions.
+
+>>July 28, 2016
+[src/pssm_asn_subs.c]
+Fix another problem with binary ASN.1 file processing where the
+asnp->abp buffer was not refilled in time.
+
+>>July 12, 2016
+[src/mshowbest.c]
+Modified -m8/-m 8CB output to include "eval2" when a second E()-value
+is available (when -z > 20).  "eval2" is shown after the bit score,
+but before BTOP and annotations.
+
+>>May 25, 2016
+[scripts/ann_pfam28.pl]
+Implement --split_over command option, which takes overlapping domains
+and produces virtual like domains from the overlap region.
+
+>>Apr. 12, 2016  [released as fasta-36.3.8d]
+[src/pssm_asn_subs.c]
+Fix another problem with binary ASN.1 file processing where the
+asnp->abp buffer was not refilled in time.
+
+[initfa.c] - version date updated to Apr, 2016
+
+[upam.h] - changes to default gap penalties for VT40 (from -14/-2 to
+-13/-1), VT80 (from -14/-2 to -11/-1), and VT120 (from -10/-1 to 11/-1).
+
+>>Mar. 30, 2016
+[scripts/m9B_btop_msa.pl]
+Provide --bound_file_only, --bound_file_in, --bound_file_out.
+Ensure that alignments outside boundaries are NOT included in MSA.
+
+>>Mar. 22, 2016
+[scripts/m8_btop_msa.pl, m9B_btop_msa.pl]
+Ensure that full length query sequence is included in MSA.
+[pssm_asn_subs.c]
+Fixes to allow IUPACAA sequences in ASN.1 PSSM.  Other fixes to ensure
+that arrays not allocated are not freed when wfreqs2d[] is not available.
+
+>>Mar. 18, 2016
+[scripts/m8_btop_msa.pl, m9B_btop_msa.pl]
+scripts/m8_btop_msa.pl takes a fasta36 -m 8CB output file and produces
+a multiple sequence alignment that can be used with psi-blast.
+
+scripts/m9B_btop_msa.pl takes a fasta36 -m 9B output file and produces
+a multiple sequence alignment that can be used with psi-blast.
+
+>>Feb. 15, 2016
+[mshowbest.c, compacc2e.c, cal_cons2.c, dropfx2.c, dropfz3.c]
+Modify logic for calculating percent identity in sub-alignments to use
+the BLASTP strategy, which does not could gapped regions as part of
+the alignment length.  Fix the -m 8 display (BLAST tabular output) to
+use ungapped alignment length for percent identity (as -m BB does).
+
+[initfa.c] - version date updated to Feb, 2016
+
+>>Feb. 12, 2016
+[compacc2e.c, cal_cons2.c, dropfx2.c, dropfz3.c]
+Modify display_push_features() to use both the rst.score[score_ix],
+which is used to calculate the zscore and bitscore, and also sw_score,
+which is the correct divisor for sub-alignment scores.  Previously,
+only the rst.score[score_ix] was used, which caused some bit scores to
+be out of range, and produced erroneous Q-value scores for
+sub-alignments.
+
+>>Jan. 24, 2016
+[cal_cons2.c]
+Ensure left_domain_link[01] set to NULL before initialized.
+
+Rename ann_feats2l.pl to ann_feats_up_sql.pl for consistency with
+ann_feats_up_www2.pl.  ann_feats_up_www2.pl no longer works because of
+changes at the EBI.
+
+>>Dec. 15, 2015   [re-released as fasta-36.3.8c]
+[pssm_asn_subs.c]
+Fixed another problem parsing ASN.1 because of reading past the end of
+the buffer.
+[cal_cons2.c]
+Fix a serious bug that prevented display of annotated sites using -m9c/-m8CC
+
+>>Nov. 24, 2015   [re-released as fasta-36.3.8c]
+[mshowalign2.c]
+Correct first_line logic to display >>seqid description on first
+alignment line, but >- on remaining lines.
+
+>>Nov. 23, 2015   [released as fasta-36.3.8c]
+[cal_cons2.c, mshowalign2.c, scripts/annot_blast_btop.pl, scripts/ann*_e.pl]
+Fix the problem that lalign36 no longer displayed the library/subject
+accession/description.  Correct some problems introduced with BTOP
+alignment encoding.
+
+A new script, scripts/annot_blast_btop.pl, is available to provide -V
+type sub-alignment scoring to BLASTP BTOP alignments stored in tabular
+files.  In addition, the scripts/ann*.pl scripts were modified to work
+as part of a unix pipe, and the ann*_e.pl scripts replace the older
+non "_e.pl" scripts, and were renamed with out the "_e" (thus,
+ann_pfam_www.pl was removed, and ann_pfam_www_e.pl was renamed
+ann_pfam_www.pl).
+
+
+>>Nov. 6, 2015
+[cal_cons2.c, initfa.c, mshowbest.c, dropfx2.c, dropfz3.c]
+Implement BLAST+ BTOP alignment format, available with -m 8CB or -m 9B.
+Convert previously static calc_code alignment strings to dynamic strings.
+
+>>Oct. 13, 2015	[released as fasta-36.3.8b]
+[initfa.c, pssm_asn_subs.c]
+Fix problems encountered when reading in binary ASN.1 file produced by
+datatool.  Previous versions did not use the final score data provided
+by the tool; this version now uses that information if it is
+available.  If it is not available, the PSSM integer values are
+calculated from the frequency data.
+
+>>Oct. 8, 2015
+[pssm_asn_subs.c]
+Fix a rare condition where the pssm_asn parser reads past the asn
+buffer.
+
+>>Sep. 28, 2015
+[comp_lib9.c, scaleswn.c, dropnfa.c, dropfx2.c dropfz3.c]
+(1) [scaleswn.c] -- changes to drop back to Altschul-Gish statistics
+when other strategies fail. (2) Fix to ensure that adler32() is
+calculated correctly for 1-residue library sequences; definition of
+adler32() added to drop*.c files.
+
+>>Sep. 7, 2015
+[Makefile.nmk_icl, Makefile.nm_pcomp, doinit.c, readme.win32]
+Automatic detection of thread/core number on windows. Changes to
+readme.w32 documentation, Windows programs no longer require sse2 in
+name (since all modern x86 processors have it).
+
+>>Sep. 4, 2015
+[comp_lib9.c, cal_cons2.c, dropfx2.c, dropfz3.c]
+(1) Fix bug with overlapping domains when a domain ends exactly where
+the alignment starts. (2) provide command line in -m 8CC output with -DPGM_DOC
+
+>>Aug. 31, 2015   [git v36.3.8_30Jul15]
+[cal_cons2.c, dropfx2.c, dropfz3.c, mshowbest.c, build_ares.c, doinit.c, comp_lib9.c]
+Modifications to enhance the independence of annotation output to
+different files.  Earlier, annotations could not be properly output to
+different files in different formats.  For example, -m 9c prevented -m
+"F8CC output.m8CC" -m "F9I ouutput.m9I".  Annotation output formats
+are now more independent.  They are not fully independent, however.
+Thus, if CIGAR format is used for one output, it will be used in all
+other alignment encoding outputs.
+
+>>Aug. 21, 2015
+[cal_cons.c, dropfx2.c, dropfz3.c, mshowbest.c, build_ares.c, doinit.c]
+Add -m 9I to -m 9i.  -m 9i reports identity and variation (based on
+annotation scripts).  -m 9I also reports domain content on the initial
+summary line.
+
+>>Aug. 20, 2015 [fasta-36.3.8a]
+[mshowalign2.c]
+Fixed bug in lalign36 E()-value, bit score calculations for highest
+scoring non-identical alignment by reverting to older code.  This bug
+was introduced in fasta-36.3.6d in January, 2014.
+
+>>Jul. 21, 2015 [fasta-36.3.8]
+[compacc2e.c, cal_cons2.c, dropfx2.c dropfz3.c, param.h]
+Fixed a major bug in the annotation code that had been added to
+accomodate overlapping domains.  The original implementation was not
+thread-safe, because the array of annotations was modified during the
+scoring, but was also shared by threads.  The new version keeps
+independent scoring arrays.
+
+>>Jun. 23, 2015 [released as fasta-36.3.7b]
+[dropnnw2.c]
+Fix problem where glsearch reset (ignored) the -M sequence limit.
+
+>>Jun. 18, 2015
+[dropfx.c, dropgsw.c, dropfx.c, dropfx2.c, dropfz3.c]
+Fix problem in do_walign.c with comparison to score_thresh during
+recursive alignment.
+
+>>May. 21, 2015
+[compacc2e.c]
+Add additional checks to ensure that annotations are within the
+sequence boundaries.
+
+>>Jan. 26, 2015	[ re-released as fasta-36.3.7a]
+[compacc2e.c]
+Fix problem with domain boundary calculations for subsets of sequences.
+
+>>Jan. 21, 2015	[ released as fasta-36.3.7a]
+[calc_cons2.c, dropfx2.c, dropfy3.c]
+Fix problems with -m 9c / -m 9C alignment encodings in version
+36.3.7. Apparently, the Nov. 25, 2014 fix was not committed properly.
+In addition, make certain that the query sequence is ALWAYS the
+reference sequence, particularly in translated alignments. As a
+result, the insertion/deletion codes are now reversed for fast[xy]36
+and tfast[xy]36.
+
+>>Jan. 6, 2014
+[data/VTML_*.mat]
+Provided scoring matrix files for the VTML_10,20,40,80,120,160,200
+matrices available internally.
+
+>>Nov. 25, 2014	[ released as fasta-36.3.7]
+[cal_cons.c, dropfx2.c, dropfz3.c]
+Fix problem that prevented -m 9c and -m 8CC unless annotations were
+present.
+
+Added approved copyright notice and Apache 2.0 license to
+appropriate files.
+
+>>Nov. 19, 2014
+[mshowbest.c]
+Add alignment (CIGAR) string and annotation string to BLAST tabular
+(-m 8) aligments with -m 8C[cCdD].  To get alignment and annotation
+encoding without BLAST comments, use -m 8X[cCdD].
+
+>>Nov. 10, 2014
+[cal_cons2.c, dropfx2.c, dropfz3.c]
+Ensure that site annotations are shown when annotations are embedded
+in a sequence, not provided by a script.
+
+>>Oct. 27, 2014
+[cal_cons2.c]
+Fix a bug in the annotation alignment that put annotation symbols off
+by one (or more) in the coordinate lines. Add annotations that align
+in gaps.
+
+>>Oct. 6, 2014
+[most source files]
+The copyright notice for fasta-36.3.7 has been updated to include an
+open software license, Apache2.0, for redistribution.
+
+>>Sept. 28, 2014
+[url_subs.c]
+Substitute annot_p->s_annot_arr_p[] for annot_p->domain_arr_p[i] in
+display_domains(), encode_json_str().  Remove domain_arr_p from struct
+annot_entry.  With domain_arr_p gone, n_domains is less useful, but it
+is still available, and used for checking for domain graphics.
+encode_json_domains() also now uses annot_p->n_annots, and skips over
+non-domains.
+
+>>Sept. 19, 2014
+[dropfx2.c, dropfz3.c]
+Fixes to produce correct coordinates with forward and reverse
+complement [t]fast[x,y].
+
+>>Sept. 17, 2014 [new version, fasta-36.3.7]
+[compacc2e.c, cal_cons2.c, dropfx2.c, dropfz3.c]
+The annotation domain scoring/plotting strategy has been extended to
+allow overlapping domains.  To accommodate overlapping domain
+annotations, the annotation file format (e.g. gstm1_human.annot) has
+been extended to accept the form:
+
+>sp|P09388|GSTM1_HUMAN
+1	-	88	Glutathione_S-Trfase_N :1
+7	V	F	Mutagen: Reduces catalytic activity 100- fold.
+90	-	208	Glutathione-S-Trfase_C-like :2
+108	V	Q	Mutagen: Reduces catalytic activity by half.
+
+where a "-" in the second field indicates that the first and third
+fields specify the beginning and end of the domain. In previous
+versions, a '[' specified the beginning of a domain, and a ']' on a
+later line specified the end of the domain. '[' and ']' on separate
+lines required that domains not overlap (so that the '[' and ']' could
+be paired).  fasta-36.3.7 will still read this format, but the "start -
+stop" format is both simpler and more flexible.
+
+Three new annotation scripts are available that use the new domain
+notation: ann_feats2ipr_e.pl, ann_feats_up_www2_e.pl, ann_pfam_e.pl,
+and ann_pfam_www_e.pl.  All four scripts will report overlapping
+domains.
+
+Overlapping domains also allows domain annotations from different
+sources to be combined (e.g. InterPro Pfam, Panther, and Superfamily
+domain annotations), as well as domain annotations of different types,
+e.g. Uniprot domain and secondary structure annotations.
+
+>>Aug. 28, 2014	[re-released as fasta-36.3.6f]
+[ncbl2_mlib.c]
+The code used to parse blastfmtdb sequence description lines has not
+kept up with NCBI's use of ASN.1 in sequence descriptions.  This code
+has been updated, and now works properly with the protein and DNA
+sequence databases.
+
+[comp_lib9.c]
+Fixed a seg-fault that occurred when an open-file error occurred.
+
+>>Aug. 22, 2014	[released as fasta-36.3.6f]
+[mshowbest.c]
+Change alignment summary display for lalign to not show identical
+alignment score unless '-J' option used.  Add "The best non-identical
+alignments" when no "-J"
+
+[ann_pfam_www.pl] Fix bugs.
+
+[ncbl2_mlib.c]
+modified to read NCBI ambiguity codes in
+blastdbfmt/formatdb nucleotide databases.  Not extensively tested.`
+
+>>Aug. 20, 2014
+[compacc2.c, cal_cons.c, dropfx.c, dropfz2.c]
+Modify sub-alignment score report to calculate bit-score by dividing
+total alignment bit score by sub-alignment raw score divided by total
+alignment raw score.  This produces a bit score that is much more
+sensible than the previous strategy, which calculated a z-score from
+the sub-alignment.
+
+>>Aug. 18, 2014
+[compacc2.c, cal_cons.c]
+Undo removal of '[]' from aa0a/aa1a (they are required to visualize
+domain boundaries in alignment).  cal_cons.c now users PSSMs when they
+are available.
+
+>>Aug. 8, 2014
+[comp_lib9.c, compacc2.c]
+Move the call to get query annotations via scripts out of compacc2.c
+and into comp_lib9.c.
+
+>>July 29,2014
+[comp_lib9.c, mshowbest.c, mshowalign2.c]
+Enable high scoring alignment display (like high scoring sequences)
+with lalign36, when -m 9 (-m 9c/d/C/D) option is provided, or with -m
+8.  This allows lalign36 to provide a compact, tabular list of
+non-overlapping local alignments.
+
+>>June 30, 2014
+[pssm_asn_subs.c]
+Update the code for parsing ASN.1 binary PSSM files produced by
+psiblast+. The new code reads more of the optional fields in
+pssm_intermediate_data().  The fields are not used, but broke the
+earlier parser.
+
+>>June 11, 2014
+[cal_cons.c, initfa.c, dropfx.c, dropfz2.c]
+Extend the match/mismatch encoding provided by -m 9c and -m 9C with -m
+9d and -m 9D.  The -m 9d/D options provide mismatch locations as well
+as insertion/deletion locations.  For -m 9d, the list of codes has
+expanded from '=\/*' to '=\/*x'; for -m 9D, 'MDIMX'.  Current
+implementation works for all programs except [t]fast[fms].  Updated
+version strings to June, 2014.
+
+>>May 28, 2014
+[mshowalign2.c, mshowbest.c, initfa.c, structs.h]
+Add the command line option -XI.  Changes the calculation of percent
+identity to ensure that a single mismatch in a long sequence with >
+99.9\% identity is displayed as 99.9% (0.999) identity, rather than
+100.0% identity. Without this option, a single mismatch in 10,000
+residues displays 100% identity, with the option, 99.9% identity is
+displayed (even though the identity is 99.99%).
+
+[cal_consf.c]
+Fix the false error message "code begins with 0" in cal_consf.c.
+
+>>Feb. 12, 2014
+[compacc2.c]
+When providing "sequence length" to annotation scripts, add offsets.
+Also modify scripts to allow sequence lengths to increase.
+
+>>Jan. 28, 2014  (re-released as fasta-36.3.6d/Jan 2014)
+[dropfs2.c, calconsf.c, tatstats.c]
+The coordinate fix for fasts36/fastm36 (Dec 18, 2013) broke some
+fasts/fastm alignments. The alignment code has been reverted to the
+"classic" code that has been used for more than 10 years.  However,
+that code always marked the first aligned residue as 1, even when the
+first part of the query did not align.  The initial coordinate offset
+has been fixed; the coordinate is now the position in the first
+aligned fragment.  This may be confusing, because with fasts, the
+first aligned fragment may not be the first fragment in the query
+list.  The coordinate provided always provides the offset from the
+beginning of the first fragment in the alignment, not the first
+fragment in the list.  This fix required changes to the definition of
+calc_astruct(), which required changes to build_ares.c, mshowalign.c,
+calc_cons.c, dropfx.c, and dropfz2.c.
+
+>>Jan. 24, 2014
+[mshowalign2.c]
+Add checks to assumption that '>gi|12345' is an NCBI library entry.
+[nmgetlib.c]
+Fix for nmgetlib.c with -DMYSQL_DB
+
+Some cleanup of old Makefiles.
+
+>>Jan. 1, 2014
+[url_subs.c]
+Fix off by one in domain coordinates in display_domains().
+
+>>Dec. 18, 2013
+[dropfs2.c, cal_consf.c]
+Fix problem with alignment display when query sequence is much longer than library sequence.
+
+>>Dec. 11, 2013
+[compacc2.c]
+Modified save_best2() to correctly exclude sequences outside
+-M n1_low-n1_high limits.
+
+>>Nov. 8, 2013   (re-released as fasta-36.3.6d)
+[ncbl2_mlib.c]
+Fix problem with src_long8_read() where int/uint64_t seems to cause
+problems with Linux intel icc. Using int/unsigned int solves the problem.
+
+>>Nov. 1, 2013
+[apam.c, ncbl2_mlib.c, map_db.c]
+[apam.c ] Fix problem with query sequences and libraries that do not
+end in newline ('\n').  [ncbl2_mlib.c, map_db.c] provide grouping for
+shifts for byte extraction in src_int4/long8_read() to remove compiler
+warnings.  [map_db.c] Fix problem reading sequences for indexing that
+caused crash.
+
+>>Oct. 8, 2013	 (released as fasta-36.3.6d)
+[comp_lib9.c, initfa.c]
+Modify initfa.c/re_ascii() function to avoid qascii[] characters that
+had been remapped for annotations.
+
+>>Oct. 4, 2013
+[nmgetlib.c, ncbl2_mlib.c]
+Modify nmgetlib.c/re_openlib() to re-use memory mapped file arrays.
+This had been the intention for some time, but a check for libf != 0
+prevented the memory mapped arrays from being reused.  libf is no
+longer checked, just mm_flag.
+
+>>Sep. 26, 2013
+[ncbl2_mlib.c]
+Fix a bug in ncbl2_mlib.c/parse_fastadl_asn() that prevented
+accessions longer than 20 characters in description lines from BLAST
+formatted libraries.
+
+[compacc2.c]
+Fix a bug in compacc2.c/comment_var() that showed the wrong original
+sequence in qVariant changes.
+
+>>Sep. 2, 2013
+[dropfs2.c]
+Fix bug in dropfs2.c/init_work() that prevents correct tatusov
+statistics with -z >10.
+
+>>Aug. 21, 2013  (released as fasta-36.3.6c)
+[comp_lib9.c]
+Fix bug in comp_lib9.c/new_seqr_chain() that prevented memory from
+being allocated to the chain if a memory mapped database was followed
+by a non-memory mapped database.
+
+>>Aug. 9, 2013
+[scaleswn.c]
+Ensure shift to MLE_STATS if too many scores are excluded by trimming.
+
+>>July 31, 2013 (released as fasta-36.3.6b)
+[url_subs.c]
+Make JSON output for -m 6 (html) dependent on $ENV{JSON_HTML}. JSON
+output is not currently used.
+
+>>July 26, 2013
+[mshowalign2.c, scripts/lavplt_svg.pl]
+Correct offsets in -m 11 lav plots, and modify lav2plt.pl/
+lavplt_svg.pl/ lavplt_ps.pl to reflect the corrections.
+
+Move all perl scripts out of /src into /scripts.
+
+>>July 19, 2013 (released as fasta-36.3.6a)
+[compacc2.c, cal_cons.c, dropfx.c, dropfz2.c, build_ares.c]
+Provide dynamic string allocation/dyn_strcat for annotation string
+output. This fixes problems with long proteins with many domains or
+other annotations, which were too long for the fixed annotation output
+storage.
+
+Version date updated to July, 2013.
+Compiled and tested on Windows32.
+
+>>July 8, 2013
+[cal_cons.c, dropfx.c, dropfz2.c]
+Properly terminate annotions with offsets [cal_cons.c], and with
+domains beyond alignment [dropfx.c, dropfz2.c]
+
+>>July 5, 2013	 (released as fasta-36.3.6)
+[comp_lib9.c, doinit.c, dropfx.c, dropfz2.c]
+Fix conflict between -m 9 and -z -1; fix annotation display using
+non-script annotations. Stop using calc_last_set in dropfx/fz2.c.
+
+>>June 24, 2013
+[scripts/ann_feats_up_www2.pl]
+Add script (ann_feats_up_www2.pl) for annotating UniProt sequences using:
+"http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/uniprotkb".
+
+>>June 6, 2013
+[compacc2.c, cal_cons.c, initfa.c, dropfx.c, dropfz2.c]
+Provide the -XNS/-XXS/-XN+/XX+ and -XND/-XXD/-XN-/-XX- options that
+specify how N:N and X:X alignments are counted for similarity and
+identity.  By default, N:N (DNA) and X:X (protein) alignments are
+considered identical, but not similar (because their scores are
+typically negative to address statistical issues).
+-XNS/-XXS/-XN+/-XX+ cause N:N/X:X alignments to be counted as similar,
+even though their alignment are negative.  Likewise,
+-XND/-XXD/-XN-/-XX- cause N:N and X:X alignments to be considered
+non-identical (and non-similar).
+
+>>May 28, 2013
+[url_subs.c]
+do_url1() has been modified to: (1) require env($REF_URL, $SRCH_URL,
+$SRCH_URL1) for these links to produce printout.  (2) Link text is
+surrounded by <!-- LINK_START "lname" --> <!-- LINK_STOP -->.  (3)
+do_url1() now produces <!-- JSON --> output automatically, which can
+be used to get all the information provided by earlier URL links.
+
+>>May 29, 2013
+[mshowalign2.c]
+Re-instate code in showalign() to ensure that original bbp->rst is
+used for first alignment, rather than that calculated by CHECK_SCORE
+(which is used for later sub-HSP's).  The CHECK_SCORE -S alignment
+score is based on the non-S alignment, and is then re-scored with the
+low-complexity -S matrix. But the best alignment excluding
+low-complexity can have a higher score than the best all-complexity
+alignment rescored with -S.
+
+>>May 27, 2013
+[mshowalign2.c, url_subs.c]
+The plot_domain.cgi SVG code has been expanded to allow the domain
+structure of the entire query and library sequence, not just the
+aligned regions, to be displayed.  Showing domains above the query or
+below the library takes an additional 18 px in each direction (36
+total); this size needs to be provided in the <object data=""
+width="660" height="54"> format string that is provided in
+$DOMAIN_PLOT_URL.
+
+Right now, the argument to $DOMAIN_PLOT_URL can get very long with
+lots of aligned domain (region), and query and library domain
+information.  It would be better to provide this in some separate way.
+YAML might also be a more efficient strategy.
+
+>>May 9, 2013
+[dropfx.c, dropfz2.c, compacc2.c, url_subs.c]
+The web infrastructure for domain plots has been completed --
+plot_domain2.cgi which generates SVG for domain plots now understands
+reverse-complement cDNA fastx/y alignments, and plots coordinates
+accordingly.  Testing with fastx36/fasty36 revealed some memory
+errors, which have been fixed.  In addition, dropfz2.c has been
+updated to properly treat some region/alignment-boundary conditions;
+dropfx.c and dropfz2.c provide equivalent sub-alignment scores.
+
+[../scripts, ../misc]
+A new directory, ./scripts, has been created to collect the scripts
+used for sequence library expansion and domain/feature annotation.
+../scripts/README.scripts provides more information.  Modify code to
+allow expansion scripts (-e) to start with '\!', like annotation
+scripts.
+
+>>Apr. 15, 2013
+(compacc2.c, cal_cons.c, dropfx.c, dropfz2.c, mshowalign2.c)
+Modifications to properly deal with sequence and coordinate offsets in
+annotation alignments.  compacc2.c/get_annot_list() has been modified
+to only print/read an annotation once (the same sequence may appear
+twice with fastx/fasty).  mshowalign2.c now includes <!--
+ANNOT_START/STOP --> and <!-- ALIGN_START/STOP --> in HTML mode.  This
+comments are not on their own line, to save output space, so the
+remainder of the line should be captured.
+
+>>Apr. 5, 2013
+(doinit.c)
+Add the ability to specify HTML output using the -m '0H' option.  This
+addresses the problem that -m "F6" does not fully specify the output
+format.  In addition, -m 6 should probably explicitly set -m 0 (if it
+has not been set), rather than simply 'or'ing it, but right now we do
+not know when it is set.
+
+>>Mar. 17, 2013
+(compacc2.c, url_subs.c, plot_domain.cgi, ann_feats2l.pl)
+Modifications to url_subs.c to support SVG domain maps in HTML output.
+
+A new evironment variable has been defined, DOMAIN_PLOT_URL, which can
+be used to plot (using SVG or PNG) a map of the domains on the library
+sequence.  The argument to DOMAIN_PLOT_URL is the concatenated list of
+annotations provided by the -V options.  All annotations (including
+sites) are passed; non-alpha-numeric characters are URL encoded.
+plot_domain.cgi is an example of a script that can be passed as
+DOMAIN_PLOT_URL.  To use this script:
+
+$ENV{DOMAIN_PLOT_URL}="<object data=\"plot_domain.cgi?n0=%d&query=%s&db=%s&lib=%s&q_start=%ld&q_stop=%ld&l_start=%ld&l_stop=%ld&n1=%d&o_pgm=%s&doms=%s\" width=\"660\" height=\"72\"></object>\n";
+
+ann_feats2l.pl has been extended to allow the --neg
+(or --neg-dom) option, which puts domain a NODOM domain annotation
+between the domain annotations provided by the database.
+
+>>Mar. 7, 2013
+(cal_cons.c)
+Modify update code to properly begin global alignments that start with
+insertions or deletions.
+
+>>Feb. 20, 2013
+(compacc2.c)
+Annotation scripts (-V \!ann_feats.pl) were being inactivated if no
+annotations were returned, fixed.
+
+>>Feb. 2, 2013
+(comp_lib9.c)
+Prevent premature termination of query title in -m 9 mode (guarantees
+the full >accession text to first space is preserved).
+(compacc2.c)
+Provide domain information (;C=PF00016) in -m9 domain scoring.
+
+>>Jan 7-9, 2013
+(initfa.c, pssm_asn_subs.c)
+Modify pssm_asn_subs.c to properly parse binary PssmWithParameters
+produced by NCBI asntool from psiblast (blast+) text ASN.1 output.
+The text ASN.1 uses a binary encoded query sequence; get_lambda() in
+initfa.c was modified to work with a binary encoded query sequence
+(the query is used to find the p_i from rrcounts[query[i]]).
+
+Modify pssm_asn_subs.c to set query=NULL when PSSM does not include
+query sequence.  Modify read_asn_pssm() to set query=aa0 if query==NULL;
+
+>>Dec. 14, 2012
+(cal_cons.c, dropfx.c, dropfz2.c)
+Enable percent identity calculation on domains.  Merge
+cal_cons.c/calc_code() strategies into dropfx.c, dropfz2.c
+
+>>Dec. 6, 2012
+(comp_lib8.c, comp_lib9.c, nmgetlib.c)
+Fix code in close_lib_list() that did not properly re-initialize files
+for re-reading (not seen when library is in memory, or for single
+sequence search).
+
+>>Dec 2, 2012
+(wm_align.c, Makefiles)
+CHECK_SCORE() in wm_align.c must return different scores for local and
+global (#define GGSEARCH in wm_align.c).  Requires modified Makefiles.
+
+>>Sep 24, 2012
+(doinit.c, compacc2.c, cal_cons.c)
+Fix bugs introduced with next_annot_entry() strategy for reallocating
+annot_arr[]; find a bug in cal_cons.c where i1_annot was indexing
+annot0_arr_p[]; ensure that m_msg.ann_arr_def[] is appropriately initialized.
+
+>>Sep 17, 2012
+(lav2plt.pl, lavplt_ps.pl, lavplt_svg.pl, lav_defs.pl, l_feat_dom.pl)
+Convert the lav*.c programs to perl.  This simplifies adding the
+ability to script domain annotation.  The format for domain
+annotations for the lav2plt.pl programs differs slightly from the
+current up_feats_dom.pl program, because it requires a beginning and
+end for each domain, e.g.:
+
+>sp|Q14247.2|SRC8_HUMAN
+80	[]	116	Cortactin 1.
+117	[]	153	Cortactin 2.
+154	[]	190	Cortactin 3.
+191	[]	227	Cortactin 4.
+228	[]	264	Cortactin 5.
+265	[]	301	Cortactin 6.
+302	[]	324	Cort. 7; trunc.
+492	[]	550	SH3.
+
+and takes a single accession from the command line, e.g.:
+"l_annot_dom.pl sp|P09488" rather than reading a file.
+
+>>Sep 4, 2012
+(doinit.c, compacc2.c, fasta_guide.tex)
+Annotations can now be provided within a sequence (-V '%#!'), by a
+script (-V '\!up_feats.pl'), or from a file (-V '<annot.file
+q<annot.file').  Annotation files make particular sense for query
+annotations, where the user may know much more about the query than
+the database does.
+
+(doinit.c, compacc2.c, comp_lib9.c, structs.h)
+Ensure that calc_code() is called if any -m 'F9c file' requires it.
+
+>>Aug 31, 2012
+(cal_cons.c, compacc2.c, dropfx.c, dropfz2.c)
+The region score calculations have been corrected to include regions
+that overlap alignment boundaries, and regions that start in gaps.
+
+>>Aug 10, 2012
+(cal_cons.c, compacc2.c, dropfx.c, dropfz2.c)
+
+Introduce a second kind of annotation feature, the "Region" (denoted
+by '[' and ']'), that specifies a region that should be scored
+separately.  These regions cannot be nested, each residue can belong
+to only one region.  However, the scores in these regions can be
+calculated (perhaps percent identity and length later), and are
+displayed:
+
+>>sp|P09488|gstm1_human GLUTATHIONE S-TRANSFERASE MU 1 (  (218 aa)
+ Site:* : 23Y=23Y : MOD_RES: Phosphotyrosine (By similarity).
+ Site:* : 33Y=33Y : MOD_RES: Phosphotyrosine (By similarity).
+ Site:* : 34T=34T : MOD_RES: Phosphothreonine (By similarity).
+ Region : 3-82 : score=547; bits=146.4 :  GST_N
+ Site:^ : 116Y=116Y : BINDING: Substrate.
+ Region : 104-171 : score=465; bits=125.8 :  GST_C
+
+All information about the region should be provided with the '['
+(start) symbol.
+
+>>Aug 1, 2012
+(dropfx.c, dropfz2.c, c_dispn.c)
+Fix some very old bugs that caused errors in coordinate displays of
+reverse-complement fastx/fasty alignments.  Fix BLAST alignment
+display coordinates.  Enable variant calculations for FASTY
+(dropfz2.c), and simplify calculations for dropfx.c
+
+>>Jul 29,2012
+(doinit.c, compacc2.c, comp_lib9.c)
+Allow annotation descriptions to be delivered by annotation script,
+denoted by '=' in first line, e.g.:
+=*:phosphorylation
+=^:binding site
+=@:active site
+>gi|121735|sp|P09488.3|GSTM1_HUMAN
+7	V	F	Mutagen: Reduces catalytic activity 100- fold.
+23	*	-	MOD_RES: Phosphotyrosine (By similarity).
+33	*	-	MOD_RES: Phosphotyrosine (By similarity).
+34	*	-	MOD_RES: Phosphothreonine (By similarity).
+
+remove requirement for leading space before annotation script: e.g.:
+-V '\!up_feats_c.pl'
+
+>>Jul 27, 2012
+(compacc2.c, cal_cons.c, dropfx.c)
+
+(1) Allow comments/descriptions on features other than type 'V' (variant)
+to be displayed with alignment.  If a '@' SITE feature has a comment
+provided by the annotation script, the comment will be displayed in
+the alignment description , e.g.:
+
+>>sp|P28161.2|GSTM2_HUMAN Glutathione S-transf            (218 aa)
+ ^ :116Y=116Y: BINDING: Substrate (By similarity).
+ @ :210S+210T: SITE: Important for substrate specificity.
+ initn: 632 init1: 632 opt: 632  Z-score: 1414.3  bits: 268.8 E(450603): 2.6e-71
+Smith-Waterman score: 945; 75.2% identity (93.6% similar) in 218 aa overlap (1-218:1-218)
+
+If no comment is provided, the annotation will only appear in the
+coordinate line. This provides a way to show annotation locations in
+BLAST output.
+
+(2) Also add code to ensure that symbols returned by annotation scripts
+are displayed on the coordinate line.
+
+(3) Add environment variable substitution to =${TMP_D}/annot.defs and
+\!${TMP_D}/up_feats_c.pl parsing.
+
+>>Jul 24, 2012
+(uascii.h, map_db.c)
+Modify NANN, a value one more than the largest amino-acid encoding
+value, increasing it from 50 (too small for NCBIStdaa_ext_n) to 60;
+ESS changed to 59.
+
+>>Jul 20, 2012
+(mshowalign2.c, mshowbest.c, compacc2.c, comp_lib8.c)
+(transferred from fasta-36.3.5)
+(a) Fix bug in mshowalign2.c that occurred because of re-use of the
+"tmp_len" variable when adding '\n' to -L long descriptions.  This
+typically occurred with -m 10.  (b) Modify logic used to capture if an
+alignment had been calculated, reducing dramatically the number of
+re-alignments with multiple -m "F" output files.
+
+>>Jun 30, 2012
+(mshowbest.c)
+Ensure that opt score and E()-value are based on initial scan score,
+not later alignment score.  score_delta is used to increment initial
+scan score.  However, currently the E()-value of the alignment score
+is displayed in the alignment list, so the -m 9 and showalign()
+E()-values can be inconsistent.
+
+>>Jun 29, 2012 (from fasta-36.3.5c)
+(pssm_asn_subs.c)
+Add chk_asn_buf() before getting RPSPARAMS_MATRIX.
+
+>>Jun. 27, 2012 (from fasta-36.3.5c))
+(nmgetlib.c, compacc2.c)
+Fix bug that allocated unnecessary space for re-loading sequences in
+pre_load_best() (compacc2.c).  Ensure that closed/NULL memory mapped
+file descriptors are not returned.
+
+>>Jun. 18, 2012
+(compacc2.c)
+Modify pre_load_best() to allocate memory for sequences to be aligned
+only if the sequences are not already in memory.  (Searches against
+hg18 with repetitive queries caused very large amounts of memory to be
+allocated in duplicate.)
+
+>>Jun. 12, 2012
+(compacc2.c, doinit.c, dropfx.c, cal_consf.c)
+Implement variant scoring for fastx36.  Also address problems with
+annotation location when -m markx is not set.  Check function
+definitions for other drop functions where variant scoring is not yet
+implemented.
+
+>>Jun. 9, 2012
+(defs.h, doinit.c, c_dispn.c)
+Add 'M' and 'B' options to -m 0,1 to specify annotation location. For
+example, -m 0M (-m1) causes the annotation to be inserted in the
+"middle" alignment line, rather than in the coordinate line (making
+the sequence with the annotated feature ambiguous).  -m 0B, -m1B
+puts the annotation in both the middle (alignment) line and the
+coordinate line.
+
+>>Jun. 8, 2012
+(doinit.c, compacc2.c, build_ares.c, mshowbest.c, mshowalign2.c,
+structs.h and others)
+
+Implement a script-driven strategy for feature annotation in
+alignments.  In addition to: fasta36 -V '*%^@', which extracts the
+annotation characters from the library sequences, we can also do:
+fasta36 -V '*%^@ \!feature_script.pl' which expects the same
+annotation characters ('*%^@'), but expects them from the script
+'feature_script.pl'.  This script gets the sequence description line,
+e.g: "gi|121746|sp|P09211|GSTP1_HUMAN Glutathione S-transferase P (GST
+class-pi) (GSTP1-1)", and is expected to return a tab-delimited file:
+====
+pos label value
+23	*
+33	*
+34	*
+116	^
+173	V	N
+210	V	T
+====
+
+Currently, the "value" is ignored unless the label is "V", for
+variant. If 'V' annotations are present, then the alternative
+amino-acid residue values are tested in alignments; if the variant
+residue improves the score, the score is updated and the variant
+sequence is displayed, and a 'V' indicates the variant in the
+coordinate line.  Currently, variant annotations can only affect
+library sequences.
+
+By default, annotation symbols are shown in the coordinate line for -m
+0 (default) and -m 1 (difference) alignments, sometimes overwriting
+the coordinate. Annotation symbols (from either sequence) can be shown
+in the middle alignment line by specifying -m 0M or -m 1M, or in both
+the middle alignment line and the coordinate line with -m 0B, -m 1B.
+
+>>May 5, 2012
+(dropnnw2.c)
+Enable rev-comp for ggsearch/glsearch.
+
+>>Mar. 13, 2012
+(defs.h)
+Increase default file name length to 256 from 120 to accommodate long
+file names at the EBI.  Also allow much longer command line arguments
+argv_line[MAX_LSTR=4096] to be reported.
+
+>>Jan. 30, 2012
+(nmgetlib.c, altlib.h)
+Read .fastq sequence libraries (ignoring quality information) as library type '7';
+
+>>Dec. 21, 2011 (released as fasta-36.3.5c)
+(nmgetlib.c)
+Fixed a problem reading multiple library files that produced
+segmentation faults because a data buffer was free()ed and then
+re-used.
+
+>>Nov. 17, 2011
+(initfa.c, mshowalign.c)  (from fasta-36.3.5b)
+Fix problem with ppst->e_cut_r for LALIGN DNA sequences (set
+improperly to 0.001).  Add ':' to s_bits: in -m 10 output.  Also
+remove "score" from "lsw_s-w opt" score description (not present in
+non-LALIGN -m 10).
+
+>>Nov. 9, 2011 (from fasta-36.3.5b)
+(lavplt_svn.c, lavplt_ps.c, ncbl2_mlib.c)
+Fix buffer overrun for lav legend.  Fix old problem re-opening NCBI
+blastdbfmt indirect OID files.
+
+>>Oct. 30, 2011
+(comp_lib9.c)
+Correct re-initialization bug that prevented the second query sequence
+from seeing the entire library.
+
+[from fasta-36.3.5a_svn]
+(comp_lib9.c, comp_lib8.c, ncbl2_mlib.c, nmgetlib.c)
+Address out-of-memory problems when searching memory mapped, and fix
+problem using fopen()/fread() rather mmap for NCBI DNA databases.  On
+32-bit machines, NCBI database files cannot be left open, and are now
+more agressively closed.  However, searches that produce very large
+numbers of alignments may still run out of memory on low-memory 32-bit
+machines.
+
+(compacc2.c, comp_lib8.c, comp_lib9.c, htime.c)
+Correct problems that produce negative scan times.
+
+>>Oct. 21, 2011
+(pcomp_subs2.c, work_thr2.c, mshowalign2.c, make/Makefile.mp_com2, Makefile.fcom)
+Fixes to re-enable MPI compilation and execution.
+
+>>Oct. 18, 2011
+(compacc2.c, mshowbest.c, comp_lib8.c, comp_lib9.c, initfa.c)
+Fix the logic for specifying the number of alignments displayed with
+the -b 123, -b '>123', -b '=123', -b '$' options, particularly when
+statistics are not used.
+
+>>September 21, 2011
+(initfa.c, apam.c, scaleswn.c compacc2.c)
+Two major problems have been addressed (which also affect fasta-36.3.5
+and earlier versions): (a) specifying a -s dna.mat DNA matrix did not
+work properly; (b) too few shuffles, particularly with DNA sequences,
+were produced with pairwise comparisons.  The problem with scoring
+matrix files was exacerbated by the use of fixed library alphabets.
+initfa.c has been modified to recognize that when a DNA scoring matrix
+is specified, the "-n" option is set.  The shuffling problem appeared
+when, for pairwise DNA comparisons, fewer than 50 shuffles were
+reported. This occurred because the buffers used to communicate with
+threads no longer have a fixed amount of sequence buffer associated
+with them.
+
+>>August 23, 2011
+(tatstats.c, upam.h, apam.c)
+The remapping of the amino-acid encoding to NCBIstdaa broke some
+assumptions in tatstats.c, and elsewhere.  In addition to the simple
+mapping problem, which changed the counts[] assignment in
+tatstats.c/calc_priors(), the fact that NCBIstdaa does not have
+contiguous real amino acids (e.g. B is at position 2), broke the
+generate_tatprobs() function because of a very old bug where priorptr
+was not always incremented.
+
+Some of the drop*.c functions have been updated to ensure that the
+space allocated for rapid pam[][] score lookup includes space for
+lower-case characters, which can be present in pseg'ed "map_db -b"
+libraries.  In addition, binary format (currently all mmap'ed)
+libraries cannot include annotations, because common annotation values
+('*', '&') overlap the range of the NCBIstdaa_l (lowercase) mapping.
+
+>>August 1, 2011
+(map_db.c)
+map_db.c has been modified to provide a more efficient memory mapping
+for FASTA format files. map_db -b works like map_db, but, in addition
+to writing the .xin index file of descriptions and sequences in the
+FASTA library, it also produces a new protein_library.bsq file and
+protein_library.xin_b that contains binary encodings of the databases
+and an index for this file.  The binary encoding can be memory mapped,
+so that database searches can proceed directly from memory.  map_db -b
+.bsq files are very similar to the blastfmtdb files, except that they
+accomodate lower-case letters (masked) in the sequences.  The
+implementation of blastfmtdb lower-case masking prevents it from being
+used in directly memory mapped files.
+
+map_db.c introduces a new memory mapped format encoding, MP2.  I
+expect this format to be extended to allow not only directly memory
+mapped files, but also directly memory mapped lookup tables.  A
+database can be hashed, and the hash and link files written to a
+library file, which can then be used for searches without the need to
+re-calculate the hash/link tables.
+
+(comp_lib9.c, mmgetaa.c, ncbl2_mlib.c, initfa.c, dropfz.c)
+Modifications to allow memory mapped files to be read and processed
+directly.  Databases with lower-case characters can be memory mapped,
+which means that lower-case characters are coming into the alignment
+programs even when -S is not specified.  As a result, all the protein
+scoring matrices must be built-out to allow lower-case
+characters. Likewise, the dropfz2.c matrices built by init_weights()
+must always be set for lower-case characters.
+
+>>July 20, 2011
+(mshowbest.c, mshowalign2.c)
+gi|12345 numbers are no longer shown in the list of best hits unless
+-m 8 or -m 9 are used.  They are never shown in the alignments.
+(dropfz2.c)
+Modify MAX_UC, MAX_LC to be consistent with NCBIstdaa alphabet. Modify
+<= nsq for init_weights().
+
+>>July 16, 2011	 fasta-36.3.6
+(comp_lib9.c, drop*.c, cal_cons*.c)
+The internal encoding of amino-acids has changed to NCBIstdaa
+throughout the programs.  This allows the programs to use memory
+mapped NCBI blastdbfmt libraries directly, without re-encoding, but
+lower-case low-complexity mapping is not recognized.  This allows
+substantial speedup in single query searching.  However, to allow
+low-complexity searches, a new memory mapped format/encoding will be
+required.
+
+>>July 5, 2011	 fasta-36.3.6
+(compacc2.c)
+Modify save_best2() logic for identifying scores to be used for
+statistics.  An is_valid_stat is set for multi-frame results that
+specify which scores can be used for the stats[] and qstats[] arrays.
+Modifications to buf_do_work(), buf_shuf_work(), and buf_qshuf_work()
+to cause the calculation to be done in the thread, rather than the
+main program.  Fix some bugs in the qshuffle code to ensure that all
+valid shuffles up to maxshuff are saved.
+
+(complib5e.c, complib7e.c, complib8.c)
+Fix -m 9c/C core dump with -z -1.
+
+(cal_cons.c, cal_consf.c)
+Reverse 'I', 'D' with CIGAR string.
+
+>>June 26, 2011
+(comp_lib8.c, compacc2.c)
+
+Added the ability to search a library produced/specified by a script.
+Like the "-e expand_script.sh", searching against a library that
+begins with a '!', e.g. '!library_script.sh', causes the
+library_script.sh to be executed, producing a temporary file from
+stdout, which is then scanned as the database.  As with expansion
+files, all the standard library syntax can be included.  Thus, if
+cat_db.sh contains the command 'echo /seqdb/swissprot.lseg', the
+command:
+
+  fasta36 query.aa '\!@cat_db.sh'
+
+will cause cat_db.sh to produce a temporary file with the line
+"swissprot.lseg"; the temporary file will be interpreted as an
+indirect file of filenames; and swissprot.lseg will be searched.  Note
+that in Unix systems, the '!' must be preceeded by a '\' as shown
+above, so that it is not interpreted by the shell.
+
+>>June 23,24 2011
+(compacc2.c, comp_lib8.c, mysql_lib.c)
+A new save_best2() function in compacc2.c has been designed to
+simplify the logic involved in saving best scores, with the goal of
+moving some of the save_best() calculations into individual threads.
+
+mysql_lib.c has a new command, close_tables, that allows a script to
+remove a table after it has been used. (It might make more sense to
+add this to the extension script option.)
+
+>>June 14, 2011 (released as fasta-36.3.5a June, 2011)
+(comp_lib7e.c, comp_lib8.c, compacc2.c)
+Fix a serious bug in next_sequence_p() that caused a portion of the library to
+be missed when long sequences filled the sequence buffer before the
+slots were filled.
+
+Make certain that thread buffers are cleared when running an expansion
+script.
+
+Return an extra '\n' before the final summary for consistency with
+earlier versions.
+
+>>June 2, 2011	 (released as fasta-36.3.5 June, 2011)
+(comp_lib8.c, comp_lib5e.c, comp_lib7e.c)
+Fix a bug that indicated that linked expanded sequences were
+pre-loaded for alignment when they were not.
+
+>>May 24, 2011 (released as fasta-36.3.5)
+(comp_lib8.c, comp_lib7e.c, comp_lib5e.c, mshowalign2.c, compacc2.c,
+initfa.c, param.h, scaleswn.c)
+
+The in-memory versions of the program are allocating much more memory
+than they actually use, causing the memory limits to cut in too soon.
+Fix this by using a smaller MAXLIB_P (36000) for searches against
+protein libraries, and expanding/contracting the aa1b_size more
+sensibly.  Also add lost_memK value to track lost memory.  For protein
+searches, lost memory is now around 15% of allocated memory (down from
+40%).
+
+Numerous fixes to improve formatting of HTML output.  Full statistics
+parameters are now available with the fdata output.
+
+Add fset_vars() to comp_lib8.c to set m_msg.max_memK properly.
+Parameters have been modified to ensure less memory waste (all buffers
+have 1000 sequences); Drop default 64-bit library memory limit to 8GB
+(-XM8G, LIB_MEMK=8G).
+
+>>May 25, 2011
+(comp_lib8.c, comp_lib7e.c, comp_lib5e.c, mshowbest.c)
+
+Add the '-b >1' option, guarantees that at least 1 result is shown,
+but otherwise limits by E()-value.  '-b =10' guarantees to show
+exactly 10 results (never more or less if the library is large
+enough), '-b 10' will show no more than 10 results, limited by -E
+e_cut, and '-b >1' will show at least 1 result, but is otherwise
+limited by -E e_cut.
+
+>>May 19, 2011
+(comp_lib8.c, compacc2.c, param.h)
+comp_lib8.c is a version of comp_lib7e.c that keeps sequences in
+memory over multiple searches, but returns seqr_chains of buffers of
+sequences as they are read, rather than waiting for everything to be
+read. comp_lib8.c will automatically allocate up to 2 GB (32-bit
+machines) or 8 GB (64-bit machines) to hold the sequence database in
+a multiple query search.  This number can be increased or decreased
+using the -XM# (megabytes) or -XM#G (gigabytes) option, or by setting
+the LIB_MEMK environment variable.  -XM4G (LIB_MEMK=4G) makes 4GB
+available for sequence libraries; -XM-1 makes all machine memory
+available.
+
+>>May 5 2011
+(mshowbest.c)
+Fix problems that prevented "-b align_number" properly limit output
+with "-z -1".  "-z -1" also broke multiple HSPs (since no threshold
+could be calculated); fixed.
+(dropnfa.c)
+Fix some offset arithmetic that prevented FASTA alignments from
+extending to full length in do_walign().
+
+>>May 4, 2011
+(scaleswn.c)
+Provide additional checks for division by low numbers in fit_llen2()
+and fit_llens().  The similarities between fit_llen(), fit_llens(),
+and fit_llen2() have been highlighted, and their differences
+documented.  scaleswn.c now provides pstat_info, which writes all the
+values required to re-calculate zscores or E()-values from raw scores.
+
+>>May 2, 2011
+(dropnfa.c)
+Fix a problem with the traditional cgap(join)/optcut(opt) thresholds
+(no longer used by default) caused by allowing ktup=3 for proteins.
+The ktup=3 modification increased the cgap/opt thresholds by 6.
+
+(comp_lib5e.c, comp_lib7e.c, comp_lib8.c)
+Confirm identity of -m # and -m "F3 file.out".  Small differences fixed.
+
+(mshowbest.c, mshowalign2.c)
+Remove gi|12345 information from -m B, -m BB blast-like output.  NCBI
+Blast does not display gi numbers.
+
+>>Apr. 22, 2011
+(doinit.c, initfa.c)
+Several of the less common options have been changed to expanded
+options, changing the meaning of -X (which now specifies expanded
+options), as well as -o, -1, -B, -x, and -y.  -o now provides the
+offset coordinates previously specified with -X; -B is now -XB, -o
+-Xo, -x -Xx1,-1, and -y -Xy, e.g. -Xy32.
+
+>>Apr. 19, 2011
+(comp_lib7e.c, comp_lib5e.c, doinit.c, mshowbest.c)
+Test lastest version with -I interactive mode.  Modificiations
+required to ensure that aligments goto outfd, not stdout, when
+filename is entered.  In addition, in interactive mode there can be
+more scores shown than e_cut, so bbp->repeat_thresh must be set in
+showbest() not main() program.
+
+>>Apr. 17, 2011
+(comp_lib7e.c, doinit.c, compacc.c)
+
+The FASTA programs now support multiple output files with different -m
+out_fmt types using the -m "F# out_file" or -m "F#,#,# out_file"
+option.  Normally, the -m out_fmt option applies to the default output
+file, which is either stdout, or specified with -O out_file (or within
+the program in interactive mode). With -m F, an output format can be
+associated with a separate output file, which will contain a complete
+FASTA program output.  Thus,
+
+  ssearch36 -m 9c -m "FBB blast.out_file" -m "F10 m10.out_file" query library
+
+Will sent the -m 9c output to stdout, but will also send -m BB output
+to blast.out_file, and -m 10 output to m10.out_file.  Consistent -m
+out_fmt comands can be set to the same file by separating them with
+','; e.g.:
+
+  ssearch36 -m 9c -m "F9c,10 m9c_10.out_file" query library.
+
+Producing alternative format alignments in different files has little
+additional computational cost.
+
+One of the shortcomings of this approach is that it affects only the
+output format, not the other options that modify the amount of output.
+Thus, if you specify -E 0.001; that expect threshold will be used for
+all the output files.  When a -m option can modify the output (e.g. -m
+8 sets -d 0), that modification persists only for that file.
+
+>>Apr. 14, 2011
+(initfa.c)
+Fix bugs in e_cut_r calculation that made it much too low for
+lalign36, and used the >1.0 divisor improperly for all programs
+(change from e_cut_r = e_cut_r/divisor to e_cut_r = e_cut/divisor).
+
+>>Apr. 11, 2011
+(comp_lib5e.c, comp_lib7e.c, compacc.c)
+
+The non-preload version of FASTA (comp_lib5.c) has been extended to
+allow script expansion (comp_lib5e.c). To do this, the central score
+calculation loops have been moved to getlib_buf_work(), just as
+seqr_chain_work() was created for comp_lib7e.c.  Moreover, the
+function used to build the link_file names is build_link_data() is now
+in compacc.c.  Differences between comp_lib5e.c and comp_lib7e.c have
+been reduced.
+
+>>Apr. 5, 2011
+(comp_lib7e.c)
+Fix issue with closing unopened link_lib_list_p when no results are
+found. Remove no-sequence error message for link library file.
+
+>>Apr. 1, 2011
+(comp_lib7e.c)
+The -e script.sh has been generalized to have all the capabilities of
+a library file, in particular '@' specifies an indirect file, and
+"script.sh #" allows a library type to be specified.  Thus, the
+script.sh invoked by "@script.sh" should not produce a fasta file; it
+should produce a file that contains the name of a fasta file (or
+possibly some other format).  If '@' is used, the link_lib file
+written to stdout will be prepended with '@', and treated as an
+indirect file of file names.
+
+(comp_lib5.c, comp_lib7.c, comp_lib7e.c)
+Fix problem with null refstr (no Please cite:).
+
+>>Mar. 31, 2011
+(comp_lib7.c, comp_lib7e.c)
+close_lib() was being called after each query.  This is incorrect for
+versions (like comp_lib7) that keep the entire database in memory; the
+files must be kept open to allow ranlib() to get long descriptions
+(alternatively, a long description could be read initially).
+
+(comp_lib5.c, comp_lib7.c, comp_lib7e.c)
+Fix query offset coordinates for long queries that are broken up.
+Allow query library to have zero-length sequences without stopping
+(queries now stop when end-of-file is reached).
+
+(upam.h)
+Fix gap penalties for BLOSUM80 matrix (change from -14, -2 to -10, -2).
+
+>>Mar. 29, 2011
+(comp_lib7e.c, doinit.c)
+
+Add the ability to search an expanded set of sequences based on the
+accessions from the initial search using "-e expand.sh" option.
+If "-e expand_script.sh" is specified, the command:
+
+    expand.sh link_acc_file > link_lib_file
+
+is run by the program (fasta36, ssearch36, fastx36, etc), where
+link_acc_file and link_lib_file are temporary file names produced by
+the program. (The location of the temporary files can be specified
+with the $TMP_DIR environment variable.)  link_acc_file contains a
+list of accession strings for the statistically significant hits - the
+information in the description line to the first space, e.g.
+
+gi|121719|sp|P08010|GSTM2_RAT
+gi|121746|sp|P09211|GSTP1_HUMAN
+
+from a search against my pir1.lseg library.
+
+"expand.sh" then reads that file, extracts the accession information,
+expands the accessions to a new set of accessions, extracts the
+expanded set of accessions from a database and writes them to
+standard output (which is saved in the temporary link_lib_file
+name). The sequences in expanded link_lib_file are then added to the
+initial search, and included in the list of best scores (and
+alignments) if their scores are statistically significant. The
+additional sequences do not change the initial library size.
+
+To test the expansion capability, use an expand.sh script that simply
+cat's a file of homologs to stdout (which will go to link_lib_file and
+be read), e.g. expand.sh contains "cat ../seq/gst.lib".
+
+Building a program that can take an arbitrary list of accessions and
+produce a library of homologs is more complicated (and slower), but
+will allow a smaller database to be searched yet produce results
+similar to those found from a larger database.
+
+>>Mar. 24, 2011		(released as fasta-36.3.4)
+(comp_lib7.c, dropfx.c, dropfz2.c, doinit.c)
+Fix a bug in the new help display; identify and correct various memory
+leaks and references to uninitialized data.
+
+>>Mar. 15, 2011
+(doc/fasta3x.me, fasta3x.tex)
+The ancient, rarely updated, fasta3x.me has been replaced with
+fasta3x.tex, with the goal of producing a more up-to-date, accurate,
+and comprehensive document describing the capabilities of the FASTA
+programs.  In addition, fasta36.1 has been updated/corrected.
+
+(make/Makefile.os_x86_64)
+Mac OS X clang 2.0, distributed with Xcode4.0, does not properly
+optimize the smith_waterman_sse2_word() in smith_waterman_sse2.c when
+clang -O is used to compile.
+
+>>Mar. 4, 2011
+(doinit.c)
+Histograms are now turned off by default.  -H shows histograms for all
+programs, not just the *_mpi (PCOMPLIB) programs.
+
+>>Feb. 27, 2011
+(make/Makefile36m.common, Makefile.pcom_t, Makefile.pcom_s)
+
+The threaded programs are now the default, and the *_t versions of
+programs have been removed from the Unix and unix-like (MacOX)
+distributions.  Windows versions can have either threaded or
+non-threaded versions, since the threaded windows programs require an
+additional library. Serial versions of the programs can still be built
+by editing the make/Makefile36m.common file, and using
+include Makefile.pcom_s instead of include Makefile.pcom_t.
+
+The documentation has been edited to reflect these changes.
+
+>>Feb. 24, 2011 (comp_lib5.c, comp_lib7.c, doinit.c, initfa.c,
+structs.h) The FASTA programs have a much more informative help
+system.  If the -DSHOW_HELP option is included in the Makefile, the
+following changes occur: (1) the program is no longer interactive by
+default. To get interaction, use the -I option (-I previously meant
+showing the identity alignment in lalign; that option is now available
+with -J). (2) fasta36 and fasta36 -h present a short help message. (3)
+fasta36 -help provides a complete list of options with a more complete
+set of options.  The getopt() option strings are now built
+dynamically.
+
+>>Feb. 18-21, 2011
+(doinit.c)
+Fix missing -m 9i percent identity/alignment length.  Fix issues with
+short sequence description in -m 6 (html) mode.
+
+>>Feb. 17, 2011
+(comp_lib5.c, comp_lib7.c, doinit.c)
+Implementation of -m BB which provides completely BLAST-like output
+(not just alignments).
+
+Modification of the -b ### option.  Previously, -b 100 guaranteed 100
+alignments; now -b 100 limits to 100 alignments if more than 100
+alignments have E()-values less than the -E threshold. An '=' symbol
+before the number reverts to the previous behavior; e.g. -m =100
+guarantees 100 alignments, regardless of E()-value (-m =100 is
+equivalent to -m 100 -E 100000.0, and disables other setting of the
+E()-value threshold).
+
+>>Feb. 10, 2011
+(doinit.c, mshowalign2.c, c_dispn.c)
+The FASTA programs have a new alignment option, "-m B", which shows
+alignments in BLAST format (no context, coordinates on the same line,
+BLAST symbols for matches and mismatches.)  This version does not
+change the descriptions of the alignments, which are still FASTA like,
+but the alignments themselves should look just like BLAST alignments.
+Option -m BB makes output even more blast-like, showing not only the
+alignments, but the initial set of high scoring sequences, and other
+initial information, like BLAST+.
+
+>>Feb. 9, 2011 	released as fasta-36.3.3
+(dropfs2.c, initfa.c, comp_lib*.c)
+Modify fasts36/fastm36 to allow up to ktup=3 for proteins; ktup=6 for
+DNA (previously the max was ktup=2 for both).
+
+Modify version string to match release version number.
+
+>>Feb. 6, 2011
+(initfa.c)
+Fix bug that prevented fastm36 from working properly with DNA queries.
+
+>>Jan. 31, 2011
+(pcomp_subs2.c, work_thr2.c)
+Fixes to fasty36_mpi/tfastx36_mpi problem.  Only fasty needs pascii[]
+for alignments, but it wasn't being sent to workers. Fixed.  The MPI
+versions of the programs have now been tested much more thoroughly.
+
+>>Jan. 29, 2011
+(comp_lib5.c, comp_lib6.c, comp_lib7.c, work_thr2.c, initfa.c,
+param.h, dropfs2.c, scaleswt.c, dropfx.c)
+
+Translated DNA shuffles (tfastx36, tfasty36) now shuffle DNA as
+codons.  (1) Modify param.h pstruct to include shuffle_dna3,
+initialized in resetp() [initfa.c] (2) modify buf_shuf_work() to use
+ppst-zs_win and ppst->shuffle_dna3. (3) Add ppst->zs_off=0 to
+scaleswt.c/process_hist(). (4) Fix some memory leaks in dropfx.c.
+(5) Fix some other memory leads in dropfs2.c.
+
+>>Jan. 28, 2011
+(initfa.c, scaleswn.c, mshowalign2.c)
+Address crashes that occurred when novel scoring matrices and gap
+penalties were specified, particularly for DNA.  Fix memory problem
+with long (-L) sequence descriptions.
+
+>>Jan. 23, 2011
+(comp_lib7.c)
+comp_lib7.c uses a more efficient strategy for reading chunks of
+sequences that ensures that sequence data is contiguous for *_mpi
+programs.  comp_lib7.c replaces comp_lib6.c, which will be removed.
+
+>>Jan. 22, 2011
+(many files)
+Replace "mw.h" with "best_stats.h", a much more informative name.
+
+(drop*.c, p_mw.h, w_mw.h)
+Remove p_mw.h, w_mw.h from code base and update_params() from
+drop*.c. These files are left over from the old p2_complib.c parallel
+programs.
+
+>>Jan. 21, 2011	released as fasta-36.3.2
+(comp_lib5.c, comp_lib6.c, pcomp_subs2.c)
+Fixes for MPI version of programs.  Earlier versions did not handle
+DNA/translated DNA comparisons properly, because duplicated sequences
+(forward/reverse strand) were not handled properly. The current code
+produces the correct scores and alignments, but probably is much less
+efficient than it should be.
+
+>>Jan. 11, 2011
+(initfa.c, scaleswn.c)
+Re-enable DNALIB_LC (read lower-case DNA sequences as lower case).
+
+Reset ktup to default after change for short query in multi-query
+searches.
+
+Address multiple issues associated with variable scoring matrices,
+i.e. -s '?BP62'.  Introduce pst->pam_name for the actual scoring
+matrix, to distinguish it from pst->pam_file, which can correspond to
+the std_pam->abbrev, for values like BP62 (which encodes both a matrix
+and a specific set of gap penalties).  Ensure that the new scoring
+matrix is initialized and extended correctly.  Fix some issues with
+scoring matrix names in scaleswn.c
+
+>>Jan. 5, 2010
+(dropnnw2.c, dropgsw2.h, global_sse2.c,h, glocal_sse2.c,h)
+Include SSE2 optimization for global/global and global/local alignments
+provided by Michael Farrar.  Global and glocal alignments are now 20X
+faster.
+
+>>Jan. 5, 2011	re-released as fasta-36.3.1
+(initfa.c, last_tat.c)
+Fix bug resetting pst.e_cut_r for DNA sequences.  Modify last_tat.c
+code to use pre-loaded sequence if available. Remove last_tat.c
+PCOMPLIB code.
+
+>>Jan. 3, 2011	 released as fasta-36.3.1
+(comp_lib5.c, comp_lib6.c)
+Add >>><<<, >>>/// to -m 9,10 output for separating multiple query
+searches.  Also clean up extra >>>query line before alignments when no
+alignments are shown.
+
+>>Dec. 16, 2010
+(dropgsw2.c, dropnnw2.c, dropnsw.c, comp_lib5.c, comp_lib6.c)
+Fix bug that caused ssearch to not invert coordinates for
+reverse-complement DNA alignments (I never imagined using ssearch for
+DNA) in dropgsw2.c, dropnnw2.c, and dropnsw.c.  Add SEQ_PAD to aa0[1]
+(rev-comp copy) in comp_lib5.c, comp_lib6.c.
+
+>>Dec. 14, 2010
+Modify CIGAR strings for frameshifts, including 1F and 1R for forward
+and reverse frameshifts.  Extensive documentation updates.
+doc/fasta36.1 is the most comprehensive and accurate description of
+FASTA options.
+
+>>Dec. 1, 2010
+(drop*.c, comp_lib5.c, comp_lib6.c)
+Correct problems with copying for recursive sub-alignments.  Correct
+bug in adler32_crc calculation that suggested a problem with continued
+library sequences that did not exist.
+
+(initfa.c, defs.h)
+Use MAXLIB, rather than MAXLIB+MAXTST for comp_lib6.c, which
+pre-allocates the sequence database.  Increase MAXLIB.
+
+>>Nov. 24, 2010
+(drop*.c, drop_func.h)
+Modify drop*.c functions that do recursive sub-alignments to avoid
+modifying the aa1[] sequence array, which conceivably could be in use
+by other threads. do_walign() now has const *aa0 AND const *aa1.  To
+prevent modification of aa1, sub-regions of aa1 are now copied into
+newly allocated arrays.
+
+>>Nov. 20, 2010
+(cal_cons.c, mshowbest.c, mshowalign2.c, doinit.c)
+The -m 9C option displays an alignment code in CIGAR format. (-m 9c
+shows the older alignment encoding.)
+
+>>Nov. 16, 2010		(beginning of fasta-36.3.*, verstr 36.07)
+(initfa.c, apam.c, upam.h, param.h)
+
+Provide the ability to adjust the scoring matrix based on the length
+of the query sequence for alignments using a protein alphabet (this
+could certainly be extended to DNA as well).  By including a '?'
+before the scoring matrix, e.g. -s '?BP62', a shallower matrix will be
+chosen if the entropy of the selected matrix (i.e. bit score per
+aligned position) times the length of the protein query is
+<=DEF_MIN_BITS (defs.h), currently 40 -- this value should be set
+based on the library size).  The FASTA programs include BLOSUM50 (0.49
+bits/pos) and BLOSUM62 (0.58 bits/pos) but can range to MD10 (3.44
+bits/position). The variable scoring matrix option searches down the
+list of scoring matrices to find one with information content high
+enough to produce a 40 bit alignment score.  This option is included
+primarily for metagenomics scans, which can include relatively short
+DNA reads, and correspondingly short protein translations.
+
+Also correct the short-query modification to ktup, so that it works
+properly with translated FASTX/FASTY searches (ktup is set to 1 when
+the query_length/3 <= 20).
+
+(dropnfa.c, dropfx.c, dropfz2.c)
+Shuffled sequence alignment scores are calculated identically to
+library alignment scores. Previously, optimized scores were calculated
+for all shuffled sequences for FASTA type alignments, even though
+typically 20 - 40% of library sequences were optimized.  Now the two
+sampling strategies are consistent, though this may cause problems
+when only a small fraction of sequences are optimized.
+
+Small changes to provide consistent dropnfa.c, dropfx.c, dropfz2.c
+parameter display, and fix display with -m 10.
+
+>>Nov. 15, 2010
+(initfa.c)
+Enable statistical thresholds by default (previously, they were
+enabled with -c -1 or -c 0.01 or anything < 1.0).  The "classical"
+join/opt threshold behavior can be restored with -c O (upper case
+letter O), or by providing an optimization threshold >
+1.0. Statistical thresholds dramatically speed up searches (typically
+2-fold), and provide more accurate statistical estimates.  The old
+join/optimization thresholds where optimized for BLOSUM50, and other
+1/3-bit scaled scoring matrices, and did not work well with BLOSUM62.
+Statistical thresholds have been tested extensively, particularly with
+-z 21, and produce much more reliable statistical estimates.
+
+>>Oct. 14, 2010
+(Makefile.fcom, cal_cons.c)
+Edits to re-enable compilation and successful execution of
+tfasta36(_t). tfasta36 has been superceeded by tfastx36(_t), which is
+faster, and treats frameshifts as a different type of gap.
+
+>>Oct. 13, 2010
+(mshowbest.c)
+Make it more difficult to request more description/scores than are
+available.
+
+>>Sep. 30, 2010	 (released as fasta-36.2.7)
+(comp_lib5.c, comp_lib6.c, dropnfa.c, dropfx.c, dropfz2.c)
+Fix bugs in DEBUG versions with adler32_crc calculations on
+overlapping sequences.  Add more informative error messages when
+debugging.  Fix a problem with hist2.hist_a != NULL with some
+compilers. Fix formats for some debugging error messages in dropnfa.c,
+dropfx.c, and dropfz2.c.
+
+Also fix repeat_threshold calculation for very short sequences, to
+guarantee that all matches as good as the best match with the sequence
+are found.  Fix some problems that prevented FASTA from finding short
+repeats with short queries.
+
+This version of the FASTA36 package offers an alternate main program
+file, comp_lib6.c, which reads the entire database into memory before
+doing the search.  Using comp_lib6.c can dramatically speed up
+searches with multiple queries (there is no advantage with single
+query sequences) on large multi-core computers, as each search is done
+without re-reading the database.  On a 48-core processor, we see
+speedups greater than 40X with ssearch36_t and fastx36_t.  To enable
+comp_lib6.c, edit the make/Makefile36m.common file to comment out
+lines refering to comp_lib5.c and un-comment lines referring to
+comp_lib6.c.
+
+>>Sep. 29, 2010
+(comp_lib5.c, comp_lib6.c, mshowbest.c)
+Added -m 8C option, which mimics BLAST+ tabular with comment lines
+format.
+
+>>Sep. 17, 2010
+(dropfx.c)
+
+Fix a bug in dropfx.c/do_walign() that modified library sequences.
+(This only caused a problem with comp_lib6.c, which reads the entire
+database into memory and re-uses sequence buffers.  Check sequence
+consistency with adler32 CRC calculation.
+
+>>Sep. 15, 2010
+(mshowbest.c, mshowalign2.c)
+Change the output format slightly.  E2() expect values (-z 21+) no
+longer contain the library size (which is always the same as the
+E(library_size) value), and the -m 9 +- line no longer contains the
+frame information, since it is redundant. (The redundant rev-comp
+remains on the >-- HSP lines.)
+
+>>Sep. 14, 2010
+(comp_lib5.c, mshowbest.c, drop*.c, cal_cons[f].c, etc.)
+Implement BLAST -m 8 tabular output.
+
+>>Sep. 9, 2010
+
+(compacc.c) Fix a bug in pre_load_best() that disabled
+-L long sequence descriptions.
+
+(doinit.c) Fix a bug that prevented non-overlapping alignments from
+being displayed when the -E threshold was changed.  Before -E 0.001
+would disable additional alignments.  Now, -E "0.001 0" is required to
+disable the additional alignments.
+
+(drop*.c) The display of search parameters has changed to ensure that
+gap penalties are displayed on the same line as the scoring
+matrix. Previously, the FASTA "Parameters:" section looked like:
+
+Parameters: BL50 matrix (15:-5)xS ktup: 2
+ join: 42 (0.0944), opt: 30 (0.601), open/ext: -10/-2, width:  16
+ Scan time:  0.450
+
+With fasta-36.2.7 (and later), the Parameters: section is:
+
+Parameters: BL50 matrix (15:-5), open/ext: -10/-2
+ ktup: 2, join: 42 (0.102), opt: 30 (0.574), width:  16
+
+The [T]FAST[X/Y] Parameters: section includes the frameshift/substitution penalties (tfasty36):
+
+Parameters: BL50 matrix (15:-5) open/ext: -12/ -2 shift: -20, subs: -24
+ ktup: 2, E-join: 0.5 (0.224), E-opt: 0.1 (0.0536),  width:  16
+
+>>Aug. 3, 2010	(released as fasta-36.2.6)
+(scaleswn.c)
+
+Modifications to calc_thresh(), proc_hist_ml(), to better accommodate
+search strategies (fast?? with statistical thresholds) that provide
+complete scores only for a high-scoring fraction of sequences.  For
+some query sequences, the E()-values from the database were sometimes
+much "worse" than E2()-values, an observation that is
+counter-intuitive (if parameters are estimated against shuffled
+related sequences, the E()-values should get worse, not better).  For
+some queries, the result was very dramatic (E() < 1E-80, E2() <
+1E-150).  This error appears to occur because the z-trim or mle_cen
+thresholds are including many related sequences.  -z 2 was modified to
+censor more sequences when only a subset are scored, and -z 1 was
+modified to adjust z-trim more carefully.  As a result, z-trim was
+reduced, excluding more sequences.  If too many sequence are excluded,
+then regression statistics do not work, and the program fails over to
+Altschul-Gish statistics.
+
+-z 21+ modified so that MLE statistics are used for shuffle E2()
+values if Altschul-Gish statistics are used for the library
+E()-values.
+
+>>July 30, 2010
+(comp_lib5.c, pcomp_subs2.c)
+
+Fix bug in buf_align_seq() that allowed buffer over-runs with long DNA
+sequences with MPI.  Checks on buffer over-runs are now included in
+pcomp_subs2.c/put_rbuf(),get_wbuf().  Aug. 1, 2010, fixed similar bug
+in buf_shuf_seq().  -z 21 now works with long DNA sequences.
+
+>>July 28, 2010
+(mshowalign2.c)
+Fix lalign36/showalign() to show best sub-optimal E()-value, not
+bptr[0] E()-value (often identical).
+
+>>July 19, 2010	(released as fasta-36.2.5)
+(wm_align.c, dropfx.c,dropfz2.c)
+Fix some off-by-one boundary calculations to ensure that every query
+that can fit into a library is aligned correctly.
+
+>>May 18, 2010
+Implement comp_lib5.c, which simplifies the structure of
+comp_lib4.c by moving some calculations into functions.
+
+>>May 10, 2010
+Fix problem setting nshow with small library in interactive mode.
+
+>>May 5, 2010  fasta-36.2.3
+Fix bug that prevented shuffled scores to be used properly for small
+databases (prss capability was lost).
+
+>>May 2, 2010  fasta-36.2.2
+Fix problem with tat_score values from fasts and fastm.  fasta35 did
+not re-calculate the z-score after last_stats().  fasta36 does, so it
+must ensure that the e-value (sometimes p-value) is used correctly.
+
+>>Apr. 29, 2010
+More extensive testing of the MPI-PCOMPLIB programs revealed some
+problems sending sequences when (or more) frames for the same sequence
+was used.  This problem has been addressed, and large scale testing of
+fastx36_mpi (with 100K sequence queries in a run) works.
+
+>>Apr. 16,19, 2010
+(pcomp_subs2.c, comp_lib4.c, work_thr2.c)
+The MPI-PCOMPLIB parallel version of the FASTA36 programs is
+working. This PCOMPLIB version takes a very different approach from
+the older PVM/MPI parallel programs (p2_complib2.c/p2_workcomp2.c) -
+it works virtually identically to the threaded programs (sharing the
+same work_thr2.c code and get_rbuf/put_rbuf() (manager) and
+get_wbuf/put_wbuf() (worker/thread) functions.  As a result, in this
+initial version, the database is NOT distributed to the nodes.  During
+multiple searches, the library is re-read each time.  However, load is
+distributed to workers exactly the way it would be for the threaded
+system, so the workload should scale.
+
+To distinguish them from the earlier mp35compsw, mp35compfa, etc, the
+new versions are search36_mpi, fasta36_mpi, etc.
+
+The programs work with multiple queries, and producing multiple
+sub-alignments, and work with -m 9c encodings.
+
+>>Apr. 7, 2010
+(various Makefiles, comp_lib4.c, pcomp_subs2.c, thr_bufs2.h,
+thr_buf_structs.h)
+
+The MPI version of the threaded programs, sseach36_mp, now compiles.
+pcomp_subs2.c replaces pthr_subs2.c, and thr_bufs.h ->
+thr_buf_structs.h, thr.h -> thr_bufs2.h, and pcomp_bufs2.h has been
+added as the equivalent of thr_bufs2.h for PCOMPLIB.
+
+>>Apr. 2, 2010
+(comp_lib4.c, work_thr2.c, compacc.c)
+Implement init_aa0(), which isolates code that calls init_work and
+sets up aa0s, aa1s, f_str[1] (reverse complement) and qf_str so that
+the same code is used by the serial, threaded, and (future) PCOMP
+versions.
+
+(work_thr2.c)
+work_thr2.c now contains code for either threaded or PCOMPLIB
+processes. Threaded processes get stuff from work_info; PCOMPLIB
+processes get the same information via messages sent from init_thr()
+called by main().
+
+>>Mar. 30, 2010
+(comp_lib4.c, work_thr2.c, thr_bufs.c +pcomp_subs2.c
+
+The the data buffers used to communicate between workers and threads
+have been restructured to separate the old buf2_str, which contained
+sequence, score results, and alignment results, into three buffers,
+buf2_data_s, buf2_res_s, and buf2_ares_s, separating sequence data
+from scores and alignments.  This was done to simplify communication
+in the MPI/PVM environment. Workers should be able to return results
+directly into the appropriate buffer.
+
+>>Mar. 25, 2010		fasta-36.2.1
+
+(dropfx.c, dropfz2.c)
+Found/removed two "static" declarations in small_global that caused problems
+with [t]fastx/y with threaded alignments.
+
+>>Mar. 24, 2010  (now version 36.06 with threaded alignments)
+(dropnfa.c)
+The DNA band aligner in dropnfa.c was not thread safe.  This has been
+fixed.
+
+>>Mar. 23, 2010
+Code for pre-loading/threaded-aligning sequences has been
+significantly cleaned up.  Checks are made before RANLIB() and
+re_getlib() in showbest() and showalign() that should be consistent
+with annotations AND functions that cannot encode alignments.
+
+Add mshowalign2.c (which does not do PCOMPLIB) to provide threaded
+alignments.  build_ares_code() and buf_do_align() modified to ignore
+MX_M9SUMM so that alignments are produced whenever demanded (still
+does not do alignment if a_res is available).
+
+>>Mar. 22, 2010
+(comp_lib4.c, work_thr2.c, thr_bufs.h)
+
+comp_lib4.c has been modified to thread the alignment encoding
+(build_ares) for -m 9c. If m_msg.quiet and alignments are required for
+showbest(), then the program identifies the number of alignments
+required, reads the sequences (and annotations) into a buffer, and
+sends them to the threads to be encoded.  Then, when showbest() is
+called, bbp->have_ares has been set, and the alignments are not
+re-calculated.  This should be extended to thread actual alignment
+production, and additional work is required to clean-up the sequence
+and bline(description) buffers before a second search.
+
+>>Mar. 17, 2010
+(comp_lib4.c, dropnfa,fx,fz2.c)
+Modifications to provide more sensible E2() statistical estimates with
+threshold-heuristic comparison functions and -z 21.  Also fixed bug
+that caused the wrong zs_off to be used with -z 21. dropnfa,fx,fz2.c
+now optimize all scores when shuff_flg is set.
+
+>>Mar. 16, 2010
+(comp_lib4.c, scaleswn.c, drop*.c)
+
+A new, relatively consistent, statistical estimation strategy has been
+introduced for the heuristic programs that optimize only a fraction of
+scores (fasta36, [t]fast[xy]36).  Statistics-based heuristic
+thresholds can increase search speed 2 - 4-fold by doing band
+optimization on only a small fraction of library sequences (with the
+-c -1 option, about 10% of alignments are band-optimized, compared
+with more than 50% with the classic thresholds).  However, optimizing
+only a small part of the library produces two classes of scores,
+optimized (10% or less) and non-optimized, with different statistical
+properties.  fasta36 addresses this problem by calculating statistical
+estimates only for the optimized scores, and then correcting the
+significance of the score by accounting for the frequency of
+optimization.  For example, sampling only 5% of scores increases the
+z-value (std. deviation above the mean) by -logE(0.05)*sqrt(6)/Pi =
+2.34 which offsets the z-score by 23.4.  This effect is only seen when
+the -c option is used to specify statistical thresholds, and is most
+apparent when looking at the histogram, which will be offset by the
+appropriate z-score.
+
+This strategy appears to produce more accurate statistics in general,
+but can produce less accurate statistics for the heuristic programs when
+the -z 21 option is used.
+
+>>Mar. 3, 2010
+
+(comp_lib4.c)
+Fix the new stats[] sampling strategy to sample >60K sequences more
+more uniformly.  The old code massively over-sampled later sequences,
+because of several bugs. The new code works as expected.  The first
+60K sequences are represented about 30% more than the rest, but after
+60K, sequences are sampled moderately uniformly.  The older
+SAMP_STATS_MORE is uniform across all the scores.
+
+(build_ares.c)
+Move code to produce chains of alignments (a_res) produced by
+do_walign, followed by subsequent calls to calc_id, calc_code, into a
+new function, build_ares_code(), which is shared by the
+serial/threaded and parallel (p2_workcomp.c) programs.  This is a
+first step towards having the parallel programs produce multiple HSP
+alignments.
+
+>>Feb. 27, 2010
+
+(lib_sel.c)
+Fix problem with new chained library access that prevented more than
+two files from being searched.  Also, library name string has been
+lengthened to allow a list of libraries to be displayed.
+
+>>Feb. 26, 2010
+
+Parallel programs have been tested in both PVM and MPI versions, and
+some additional bugs have been fixed.  Currently, the PVM/MPI versions
+are fully functional, but only with FASTA35 capabilities.  The new
+multiple HSP alignments and best-shuffle E2() scores are not yet
+available.
+
+>>Feb. 24, 2010
+
+Fix some leaks, largely do to more complex alignment data structures
+for multiple alignments.  Currently, all the major leaks are in data
+structures allocated in main(), and which I don't bother to
+de-allocate (mostly library buffer memory).
+
+Change zsflag > 10 to zsflag >= 10 && zsflag < 20 in three places.
+Too many shuffles were being done with zsflag==21.
+
+>>Feb. 22, 2010
+
+Begin conversion of p2_complib2.c/p2_workcomp.c.  Very old code to
+allocate aln_d_base removed from v35 and v36. No code for best list
+shuffle, or multiple high-scoring alignments.  However, the code now
+works properly with statistical thresholds. (Changes made to
+p2_complib2.c, p2_workcomp.c to update pst struct after last_param.()).
+
+>>Feb. 19, 2010 fasta-36x6
+
+Fix issues with -z 26 statistics.  Add description of E2() statistics.
+
+Added option to specify statistics routine for best-shuffled
+statistics independently of library statistics by specifying a second
+-z option.  Thus, -z "21 2" uses regression scaled statistics for the
+library estimate, and MLE statistics for the best-shuffled estimates.
+
+>>Feb. 17, 2010	fasta-36x5
+
+Some of the simplifications dealing with threads in comp_lib4.c failed
+on some compilers and architectures. The code for terminating threads
+has been modified to allow sequence buffers with zero entries, to
+simplify the empty_buffer logic.  There is now an explicit option to
+terminate threads by setting lib_bhead_p->stop_thread.  However, this
+flag is never set, as rbuf_done() stops the threads instead.
+
+Also fix problem with stats_idx being associated with wrong buf2_p in
+two frame searches.
+
+>>Feb. 15, 2010	fasta-36x4
+
+fasta36 can now display both "search" (E()) and "shuffled" (E2())
+E()-value calculation and display in the best scores and
+alignments. If the -z option is greater than 20, then two evalues are
+calculated, one from the search (e.g. -z 1 uses regression scaled
+scores) and a second derived from shuffling the high scoring
+sequences.  The high-scoring sequence shuffled scores are
+approximately equivalent to doing a PRSS (pairwise shuffle), but more
+efficient.  High-scoring shuffled E()-values (labled E2()) are
+typically 2 - 5-fold more conservative for average composition
+proteins, and 10 - 20X more conservative for biased composition
+proteins.
+
+Fix another bug in -S alignment scores vs opt scores in ssearch36 (see
+Feb. 8).
+
+>>February 12, 2010
+(prev. version 142)
+
+Create comp_lib4.c (from comp_lib3.c), which simplifies some of the
+processes for handling buffers of results (no more empty_reader_bufs)
+and enables shuffles of high-scoring sequences to evaluate significance.
+
+>>February 8, 2010
+
+Fix a problem with scores and E()-values for SSEARCH sub-alignments
+when the -S option is used.  When the -S option was used to ignore
+lower-case residues in query or library for the initial score, the
+final alignments include the lower-case masked residues.  The
+SSEARCH36 was using the non-masked alignment score, rather than the
+orginal score (FASTA36, and [T]FAST[XY]36 used the masked score).
+This was incorrect, as the statistics are calculated for masked
+sequences.  The corrected version calculates both a non-masked and a
+masked score, where the masked score (for subalignments) uses the
+non-masked alignment.
+
+[T]FAST[XY]36 had a related problem, which is that when multiple
+sequences are in the query with the same pam2p[0] (no -S) score, then
+the wrong alignment could be shown with the initial scores.  Fixing
+this requires that the alignment routine only work on the region
+specified from the initial band (fixed in dropnfa.c, dropfx.c, and
+dropfz2.c).
+
+>>February 4, 2010
+
+The more efficient statistical thresholds in fasta36 have been
+disabled by default.  They can be turned on with -c -1, or by setting
+thesholds (-c "0.05 0.2" would set E_band_opt to 0.05 - target 5% of
+sequences - and E_join at 20% target).
+
+My initial implementation produced very inaccurate statistics,
+presumably because only a small fraction of unrelated sequences were
+being band-optimized (fasta35 typically optimized about 60% of library
+sequences, fasta36 with statistical thresholds optimizes about 2%,
+which causes a 2 - 3X speed increase). The sampling strategy for
+fasta36, and [t]fast[xy]36 scores has been adjusted to provide
+relatively accurate scores for searches that optimize only a small
+fraction of sequences.  On the cases I have tested, statistical
+accuracy is comparable to, or better than, the version 35 programs,
+but probably not as robust as ssearch estimates.
+
+>>January 29, 2010
+
+The logic to predetermine where scores went for shuffling breaks when
+some scores are not calculated (e.g. -M 200 - 300).  Fix by using
+nstats as the index for nstats < MAX_STATS, and then use stats_idx
+afterwards.
+
+Provide more efficient score sampling logic.  The old method (left
+over from fasta34 or earlier) generated a random number for every
+sequence after MAX_STATS; if it was less than MAX_STATS, the sample
+was used. This logic is still available with -DSAMP_STATS_MORE.  The
+new logic samples every other sequence between MAX_STATS and
+2*MAX_STATS, every third between 2*MAX_STATS and 3*MAXSTATS, etc, and
+randomly replaces one of the stats scores.  For 430K SwissProt, this
+reduces the number of samples from 178K to about 145K, and reduces the
+number of calls to the random number generator from 430K to 85K.
+
+>>January 28, 2010
+
+(comp_lib3.c, mrandom.c) Tests of ssearch36 statistical accuracy
+suggests that the default statistical estimates (-z 1) are not as
+accurate as they should be with BLOSUM62, -11/-1.  Both -z 11 and -z 2
+work better.  In FASTA35, -z 11 - 15 caused a 2X-slowdown (actually
+more) because EVERY library sequence was shuffled, even though only a
+fraction of the sequences (for libraries > 60,000 would be used for
+the statistical calculation.  comp_lib3.c uses a more sophisticated
+strategy for sampling scores after 60,000 so that sequences are only
+shuffled and aligned if they will be used in the statistical
+calculation.  Doing this on SwissProt, with 430,000 sequences, means
+that ~180,000 additional shuffle alignments are done, not 430,000
+additional.
+
+However, using -z 11 with the threaded program was much more than
+2X-slower -- random() is not re-entrant, and is designed to provide a
+consistent set of random numbers over threads, so threads were waiting
+on the random number generator, with a big performance penalty.  Using
+code from WikiPedia, I implemented a random number generator
+(mrandom.c) that saves a local copy of state, so threaded -z 11 has
+the correct performance penalty.
+
+>>January 25, 2010 (initfa.c 36.04 January 2010)
+
+(dropfz2.c, aln_struct.h) At long last, tfasty36 correctly produces
+multiple alignments on the reverse strand. (Jan. 26, 2010) Fixed
+introduced bug in fasty36 that used wrong offset in recursion.
+
+>>January 17, 2010
+
+Extensive changes have been made to all the drop_* functions, so that
+multiple alignment results are properly sorted from highest to lowest
+sw_score. dropnfa.c, dropgsw2.c, dropfx.c and dropfz2.c now all use
+similar strategies to calculate non-overlapping alternative alignments.
+score_thresh thresholds are applied to rst.score[ppst->score_ix]
+appropriately for all recursive functions.
+
+>>August 24, 2009
+
+Statistical thresholds have been adjusted to produce more
+approximately the correct number of joins/band optimizations.  The
+approximate fraction of joins/band optimizations is now shown in the
+results.
+
+>>August 21, 2009
+
+fasta/fastx/fasty/tfastx/tfasty now use statistically based thresholds
+for joining short segments and deciding to do a band optimization --
+similar to the threshold strategy used by BLAST.
+
+The statistical thresholds used are set with the
+-c option, which used to be used to set optcut.  The -c option now has three ranges:
+
+-c < 0       -- use the old FASTA thresholds, calculated in the same way
+0 < -c < 1.0 --	use the statistical thresholds and set E_opt_cut.
+c >= 1.0     -- use the old FASTA threshold, and specify it.
+
+For 0 < -c < 1.0, a second argument can be supplied (-c "0.02 0.1")
+for the joining E()-threshold.  If this value is < 1.0, it is used as
+E_join; if it is > 1.0, E_opt_cut is multiplied by the value to get
+E_join.
+
+>>August 19, 2009
+
+Implement Lambda/K/H based c_gap, opt_cut in dropnfa.c, dropfx.c
+(fastx), and dropfz2.c (fasty).  Add ELK_to_s() to scaleswn.c.
+
+>>August 11, 2009
+
+Fix bug in dropfx.c that used the wrong variables for calculating
+offsets into a long DNA sequence for subset alignments.
+
+Stop putting sw_score in score[0] when no score[0] was calculated.
+Use 0 instead.
+
+>>July 31, 2009
+
+(dropgsw2.c) Fix problems with dropgsw2.c that allowed poor
+sub-alignments to be shown.  Consolidate merge_ares_acc() for all the
+functions.  Add pst.do_rep to disable multiple alignments.
+
+>>July 6, 2009
+
+(initfa.c, apam.c, complib2.c, p2_complib.c) move changes for
+validate_novel_aa() from fasta35.
+
+(initfa.c) Enable checks for unusual characters ('Uu' in proteins) for
+many more programs with the -p option.
+
+>>June 16, 2009
+
+Modify statistical sampling strategy to greatly simplify the
+calculation.
+
+>>May 15, 2009
+
+Fix bug in lav2ps.c, lav2svg.c that occured when displaying very long
+sequence alignments (e.g. genome alignments).  The maximum coordinate
+is set properly now.
+
+>>May 5, 2009
+
+(initfa.c) Fix bug (int e_cut in pgm_def_arr[]) that prevented e_cut
+to be set properly for lalign for DNA.
+
+>>May 4, 2009
+
+The functions that return multiple sub-alignments (HSPs) after the
+best alignment have been modified to ensure that alignments are
+returned sorted by score, by merging the list of alignments found to
+the left and right of the best alignment.
+
+>>April 28, 2009
+
+(p2_complib2.c, p2_workcomp2.c, mshowbest.c, mshowalign.c) modified to
+support new coordinate system, preliminary work on multiple HSPs in
+parallel environment.
+
+>>April 14, 2009
+
+(comp_lib2.c, nmgetaa.c) Comprehensive restructuring of library file
+list from a fixed length array to a variable length linked list.  The
+link lists allows library files to insert additional files into the
+list, so that, for example, a file of accession numbers can refer to a
+list of files for the accessions.
+
+Eventually, this should allow FASTA to support .pal/.nal files from
+the NCBI, and to support files of file names most places file names
+are allowed.
+
+>>April 2, 2009		(from fasta35)
+
+(structs.h, comp_lib2.c, doinit.c, mshowbest.c, mshowalign.c) The code
+that selects the number of high scores to display has been reorganized
+to support the -F e_low option (which was not implemented properly if
+-b and -d were specified).  The code is simplified; m_msg.nshow is
+used to specify the number of best scores listed, and min(m_msg.nshow,
+m_msg.ashow) is used to specify the number of alignments shown.
+
+>>March 26, 2009	(from fasta35 - fa35_04_07)
+
+(initfa.c) Fix problems with 'U' recognition in DNA pam matrix,
+correct implementation of -r +mat/-mis.  Previous versions of fasta35
+may not have used the correct DNA matrix when the -r +mat/-mis option
+was specified.
+
+>>March 23, 2009	(initfa.c verstr -> 36.02)
+
+(mshowbest.c, aln_structs.h) Add loop for displaying multiple aligned
+regions with -m 9, -m 9i, and -m 9c in mshowbest.c.
+
+>>March 22, 2009
+
+(dropgsw2.c, dropnnw2.c, wm_align.c) Rearrange code in dropgsw2.c,
+dropnnw2.c (which replaces dropnnw.c) so that a single function,
+wm_align.c:nsw_malign() is responsible for recursive algnments for
+both dropgsw2.c (sw_walign) and dropnnw2.c (nw_walign).  The strategy
+for tnese (Smith-Waterman, Global-Local) alignments is
+identical. nsw_malign() uses a function pointer that calculates S-W or
+N-W that it gets from dropgsw2.c or dropnnw2.c
+
+It might make sense to use a similar strategy for the recursive
+translated alignments.
+
+>>March 19, 2009
+
+(map_db.c, mm_file.h) Fix another bug in map_db.c that appears for
+sequence files larger than 2Gb.  MM_OFF is now consistently used in
+more of the places where an int64_t might is required.
+
+>>March 17, 2009
+
+(list_db.c) Fix a bug in list_db that caused it to misread the maximum
+sequence length, and then be off by 4-bytes for all the offsets.
+Include list_db with map_db in the list of auxiliary programs.
+
+>>Mar. 8, 2009		fa35_04_06
+
+(comp_lib2.c, pthr_subs2.c, pthr_subs.h, doinit.c, dec_pthr_subs.c)
+Dynamically allocate pthread_t *fa_threads, rather than limit it to
+MAX_WORKERS.  MAX_WORKERS is no longer used in the Unix environment;
+it gets its value from sysconf(_SC_NPROCESSORS_CONF).  If sysconf() is
+not available, MAX_WORKERS is used.  The threaded programs should now
+automatically adjust the number of threads to the number of
+processors.  Moreover, the number of threads can be set to more than
+the number of processors with -T #threads.  Also, max_workers was
+renamed fa_max_workers, and pthread_t *threads is now *fa_threads.
+
+>>Mar. 6, 2009
+
+copied comp_lib2.c from v35 (fix for query offset coordinates)
+
+>>Oct. 22, 2008
+
+The programs that allow multiple alignments to be found include:
+
+    ssearch36(_t)
+    fasta36(_t)
+    fastx36(_t)
+    fasty36(_t)
+
+fasts and fastf will probably not be updated in this way, because of
+the difficulty in reconstructing alignments, but fastm may be.
+
+Right now, the pvm/mpi versions of the programs do not support
+multiple sub-alignments.
+
+>>Sep. 25, 2008
+
+Modify the syntax for the -E option to allow the repeat E()-value
+cutoff to be specified in either of two ways.
+
+       -E "e_cut e_rep"
+
+If the value of e_rep is less than one, it is taken as the absolute
+E()-value threshold for additional local domains, for example:
+
+       -E "1.0 0.05" says use 1.0 for the main E()-value threshold,
+        and 0.05 as the threshold for additional local alignments.
+
+Alternatively, if e_rep >= 1.0, it is taken as a divisor for the
+E()-value threshold, thus:
+
+	-E "1.0 10.0"
+
+Sets the E()-value threshold for additional local alignments to
+1.0/10.0 = 0.1.
+
+Finally, if e_rep <= 0.0, no multiple alignments are done (equivalent
+to previous versions of FASTA).
diff --git a/doc/readme.w32 b/doc/readme.w32
new file mode 100644
index 0000000..e98ace7
--- /dev/null
+++ b/doc/readme.w32
@@ -0,0 +1,67 @@
+September 7, 2015
+
+On windows machines, the threaded programs are now capable of
+automatically detecting the number of threads.  The pthreadsVC2.dll is
+still required, but it is now included in the program directory
+(bin/).
+
+October 6, 2006, updated September 7, 2015
+
+The FASTA programs for Windows32 environments (Windows7 and later)
+has undergone a major upgrade, so that now all the programs in the
+Unix/MacOSX distribution are available to Windows users.  Moreover,
+Windows users with modern (SSE2 compatible) processors can run greatly
+accelerated versions of the Smith-Waterman ssearch program.
+
+Moreover, these programs work both with FASTA formatted files, and
+NCBI BLAST formatted files.
+
+The following programs are available:
+
+    fasta36.exe	       	protein-protein or DNA-DNA database searches
+    fastf36.exe
+    fastm36.exe
+    fasts36.exe
+    fastx36.exe		compare DNA query to protein library with frameshifts
+    fasty36.exe		compare DNA query to protein library with frameshifts
+    ssearch36.exe	Smith-Waterman for prot-prot or DNA-DNA searches,
+    			accelerated with SSE2 extensions
+    tfastf36.exe
+    tfastm36.exe
+    tfasts36.exe
+    tfastx36.exe	compare protein to DNA library with frameshifts
+    tfasty36.exe	compare protein to DNA library with frameshifts
+
+Each of these programs also has a "threaded" version, which can run on
+multiple processors (or multiple cores) if they are available.  However,
+they are built using the Unix pthreads API, so to use these programs,
+you must download the pthreadVC2.dll from:
+
+ftp://sources.redhat.com/pub/pthreads-win32/dll-latest/lib/pthreadVC2.dll
+
+see also http://sourceware.org/pthreads-win32/
+
+    fasta36_t.exe
+    fastf36_t.exe
+    fastm36_t.exe
+    fasts36_t.exe
+    fastx36_t.exe
+    fasty36_t.exe
+    ssearch36_t.exe
+    tfastf36_t.exe
+    tfasts36_t.exe
+    tfastx36_t.exe
+    tfasty36_t.exe
+
+Without that DLL, the threaded programs will not run at all. The
+current compilation supports two threads, and speeds up searches about
+2-fold on dual-core processors.
+
+The programs have been tested with protein and DNA databases in FASTA
+format, PIR/GCG-text format, and Genbank flatfile format.  The program
+does not work properly with GCG binary format databases, but it seems
+unlikely that Windows users would need these. 
+
+Please report bugs to:
+
+	wrp at virginia.edu
diff --git a/make/Makefile b/make/Makefile
new file mode 100644
index 0000000..940b482
--- /dev/null
+++ b/make/Makefile
@@ -0,0 +1,53 @@
+#
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+#
+#
+# Dec 8, 2005 - with gcc4.0.2 (or .1) under Redhat Linux Fedora FC4 -03 breaks the alignment code
+#
+
+CC= gcc -g -O2
+
+#CC=gcc -Wall -pedantic -ansi -g -O
+#CC = gcc -g -DDEBUG
+#CC= /usr/local/parasoft/bin.linux2/insure -g -DDEBUG
+
+# EBI uses the following with pgcc, -O3 does not work:
+# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
+
+# this file works for x86 LINUX
+
+# standard options
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS  -DUSE_MMAP -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC
+# -I/usr/local/include/mysql -DMYSQL_DB 
+#
+#(for mySQL databases)  (also requires change to Makefile34.common)
+
+LIB_M = -lm
+#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+
+HFLAGS= -o
+NFLAGS= -o
+
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+
+XDIR = /seqprg/bin
+
+DROPGSW_NA_O = dropgsw2.o wm_align.o calcons_sw.o
+DROPGSW_SSE_O = dropgsw2_sse.o smith_waterman_sse2.o wm_align.o calcons_sw.o
+DROPGSW_ALT_O = dropgsw2_alt.o smith_waterman_altivec.o wm_align.o calcons_sw.o
+DROPGSW_O = $(DROPGSW_SSE_O)
+
+DROPLAL_NA_O = droplal2.o lsim4.o calcons_la.o
+DROPLAL_SSE_O = droplal2_sse.o smith_waterman_sse2.o lsim4.o calcons_la.o
+DROPLAL_ALT_O = droplal2_sse.o smith_waterman_altivec.o lsim4.o calcons_la.o
+DROPLAL_O = $(DROPLAL_SSE_O)
+
+# renamed (fasta36)  programs
+include ../make/Makefile36m.common
+# conventional (fasta3) names
+# include ../make/Makefile.common
+
diff --git a/make/Makefile.NetBSD b/make/Makefile.NetBSD
new file mode 100644
index 0000000..a9c08d5
--- /dev/null
+++ b/make/Makefile.NetBSD
@@ -0,0 +1,40 @@
+#
+# this file works for NetBSD
+#
+# provided by Marc Baudoin <babafou at babafou.eu.org>
+#
+
+CC= cc -O
+#CC= cc -g -DDEBUG
+#CC= gcc -g -Wall
+#
+# standard line for normal searching
+CFLAGS= -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit  -DFASTA_HOST='"your.host.here/fasta/cgi"' -DUSE_MMAP
+
+# special options for SUPERFAMLIES
+#CFLAGS= -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DSFCHAR="'|'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP
+
+LIB_M= -lm
+HFLAGS= -o
+NFLAGS= -o
+
+# for NetBSD
+THR_SUBS = pthr_subs2
+THR_LIBS = -L/usr/pkg/pthreads/lib -lpthread
+THR_CC = -I/usr/pkg/pthreads/include
+
+BIN = ../bin
+XDIR = /seqprg/slib/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_NA_O)
+DROPLAL_O = $(DROPLAL_NA_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+include ../make/Makefile36m.common
diff --git a/make/Makefile.cray_pvp b/make/Makefile.cray_pvp
new file mode 100644
index 0000000..0b8f185
--- /dev/null
+++ b/make/Makefile.cray_pvp
@@ -0,0 +1,41 @@
+#
+# makefile for fasta35
+#
+# for more information on FASTA on CRAY's, see:
+#
+#       http://home.cray.com/~cpsosa/ChemApps/BioInf/fasta/fasta.html
+#	provided by: Carlos P. Sosa, cpsosa at cray.com
+#
+
+CC= cc -h inline1,scalar3,task0,vector2
+
+HFLAGS= -o
+NFLAGS= -o
+
+LIB_M=
+#
+
+CFLAGS= -DUNIX -DTIMES -DSFCHAR="':'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DIS_BIG_ENDIAN
+
+THR_SUBS = pthr_subs
+THR_LIBS = -lpthread
+THR_CC =
+
+BIN = ../bin
+XDIR = /seqprg/slib/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_SSE2_O)
+DROPLAL_O = $(DROPLAL_SSE2_O)
+DROPGNW_O = $(DROPGNW_SSE2_O)
+DROPLNW_O = $(DROPLNW_SSE2_O)
+
+# renamed (fasta35)  programs
+include ../make/Makefile33.nommap
+# conventional (fasta3) names
+# include ../make/Makefile.common
diff --git a/make/Makefile.fcom b/make/Makefile.fcom
new file mode 100644
index 0000000..75af885
--- /dev/null
+++ b/make/Makefile.fcom
@@ -0,0 +1,344 @@
+
+#================ common .o files 
+
+doinit.o : doinit.c defs.h param.h rstruct.h upam.h structs.h uascii.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c doinit.c
+
+init_sw.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DSSEARCH initfa.c -o init_sw.o
+
+init_sw_sse.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DSW_SSE2 -DSSEARCH initfa.c -o init_sw_sse.o
+
+init_sw_alt.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DSW_ALTIVEC -DSSEARCH initfa.c -o init_sw_alt.o
+
+init_lal.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DLALIGN initfa.c -o init_lal.o
+
+init_lnw.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DGLSEARCH initfa.c -o init_lnw.o
+
+init_lnw_sse.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DSW_SSE2 -DGLSEARCH initfa.c -o init_lnw_sse.o
+
+init_gnw.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DGGSEARCH initfa.c -o init_gnw.o
+
+init_gnw_sse.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DSW_SSE2 -DGGSEARCH initfa.c -o init_gnw_sse.o
+
+init_rss.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DPRSS initfa.c -o init_rss.o
+
+init_rfx.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DPRSS -DFASTX initfa.c -o init_rfx.o
+
+init_fa.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTA initfa.c -o init_fa.o
+
+init_ff.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTF initfa.c -o init_ff.o
+
+init_tf.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTF -DTFAST initfa.c -o init_tf.o
+
+init_fs.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTS initfa.c -o init_fs.o
+
+init_fm.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTM initfa.c -o init_fm.o
+
+init_tfs.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTS -DTFAST  initfa.c -o init_tfs.o
+
+init_tfm.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTM -DTFAST  initfa.c -o init_tfm.o
+
+init_tfa.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTA -DTFAST initfa.c -o init_tfa.o
+
+init_fx.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTX initfa.c -o init_fx.o
+
+init_tfx.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTX -DTFAST initfa.c -o init_tfx.o
+
+init_fy.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTY initfa.c -o init_fy.o
+
+init_tfy.o : initfa.c defs.h param.h rstruct.h upam.h structs.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTY -DTFAST initfa.c -o init_tfy.o
+
+#================ miscellaneous
+
+htime.o : htime.c
+	$(CC) $(THR_CC) $(CFLAGS) -c htime.c
+
+compacc2_t.o : compacc2e.c upam.h uascii.h param.h rstruct.h structs.h $(MWH) defs.h aln_structs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR  -DCOMP_MLIB -c compacc2e.c -o compacc2_t.o
+
+compacc2_s.o : compacc2e.c upam.h uascii.h param.h rstruct.h structs.h $(MWH) defs.h aln_structs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c compacc2e.c -o compacc2_s.o
+
+compacc2_p.o : compacc2e.c upam.h uascii.h param.h rstruct.h structs.h $(MWH) defs.h aln_structs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -DMPI_SRCB -c compacc2e.c -o compacc2_p.o
+
+compacc.o : compacc.c upam.h uascii.h param.h rstruct.h structs.h $(MWH) defs.h aln_structs.h drop_func.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -c compacc.c -o compacc.o
+
+apam.o : apam.c defs.h param.h uascii.h upam.h
+	$(CC) $(THR_CC) $(CFLAGS) -c apam.c
+
+pssm_asn_subs.o : pssm_asn_subs.c defs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c pssm_asn_subs.c
+
+#================ display list of best hits / alignments
+
+showbest.o : $(SHOWBESTC) $(MWH) defs.h param.h rstruct.h structs.h  aln_structs.h drop_func.h
+	$(CC) $(THR_CC) $(CFLAGS) -c $(SHOWBESTC) -o showbest.o
+
+build_ares.o : build_ares.c $(MWH) defs.h param.h rstruct.h structs.h  aln_structs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c build_ares.c -o build_ares.o
+
+$(SHOWALIGN_T).o : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h rstruct.h aln_structs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -c $(SHOWALIGN).c -o $(SHOWALIGN_T).o
+
+$(SHOWALIGN_P).o : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h rstruct.h aln_structs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -DMPI_SRC -c $(SHOWALIGN).c -o $(SHOWALIGN_P).o
+
+$(SHOWALIGN_S).o : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h rstruct.h aln_structs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c $(SHOWALIGN).c -o $(SHOWALIGN_S).o
+
+$(LSHOWALIGN).o : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h rstruct.h aln_structs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -DLALIGN -c $(SHOWALIGN).c -o $(LSHOWALIGN).o
+
+re_getlib.o : re_getlib.c mw.h mm_file.h
+	$(CC) $(THR_CC) $(CFLAGS) -c re_getlib.c
+
+lib_sel.o : lib_sel.c defs.h structs.h rstruct.h
+	$(CC) $(THR_CC) $(CFLAGS) -c lib_sel.c
+
+c_dispn.o : c_dispn.c defs.h structs.h param.h rstruct.h aln_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c c_dispn.c
+
+#================ statistical functions
+
+karlin.o : karlin.c param.h rstruct.h
+	$(CC) $(THR_CC) $(CFLAGS) -c karlin.c
+
+scale_se.o : scaleswn.c defs.h param.h rstruct.h structs.h $(MWH) alt_parms.h
+	$(CC) $(THR_CC) $(CFLAGS) -DLOCAL_SCORE -c scaleswn.c -o scale_se.o
+
+scale_sn.o : scaleswn.c defs.h param.h rstruct.h structs.h $(MWH) alt_parms.h
+	$(CC) $(THR_CC) -DNORMAL_DIST $(CFLAGS) -c scaleswn.c -o scale_sn.o
+
+scaleswtf.o : scaleswt.c defs.h param.h rstruct.h structs.h $(MWH) alt_parms.h
+	$(CC) $(THR_CC) $(CFLAGS) -DFASTF -c scaleswt.c -o scaleswtf.o
+
+scaleswts.o : scaleswt.c defs.h param.h rstruct.h structs.h $(MWH) alt_parms.h
+	$(CC) $(THR_CC) $(CFLAGS) -c scaleswt.c -o scaleswts.o
+
+tatstats_fs.o : tatstats.c tatstats.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTS tatstats.c -o tatstats_fs.o
+
+tatstats_ff.o : tatstats.c tatstats.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTF tatstats.c -o tatstats_ff.o
+
+tatstats_fm.o : tatstats.c tatstats.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTM tatstats.c -o tatstats_fm.o
+
+last_tat.o : last_tat.c defs.h mm_file.h structs.h param.h rstruct.h
+	$(CC) $(THR_CC) $(CFLAGS) -c last_tat.c
+
+last_thresh.o : last_thresh.c defs.h mm_file.h structs.h param.h rstruct.h
+	$(CC) $(THR_CC) $(CFLAGS) -c last_thresh.c
+
+#================ drop functions - actual scores/alignments
+
+drop_nfa.o : dropnfa.c dropnfa.h param.h rstruct.h defs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c dropnfa.c -o drop_nfa.o
+
+dropsbd.o : dropnfa.c dropnfa.h param.h rstruct.h defs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c dropsbd.c -o dropsbd.o
+
+# drop_ff, _fs, _fm must define FASTF, FASTS, and FASTM to ensure
+# that tatstats.h is built appropriately
+
+drop_ff2.o : dropff2.c param.h rstruct.h defs.h tatstats.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTF  dropff2.c -o drop_ff2.o
+
+drop_tff.o : dropff2.c param.h rstruct.h defs.h tatstats.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTF -DTFAST dropff2.c -o drop_tff.o
+
+drop_fs2.o : dropfs2.c param.h rstruct.h defs.h tatstats.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -DFASTS -c dropfs2.c -o drop_fs2.o
+
+drop_tfs.o : dropfs2.c param.h rstruct.h defs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DTFAST -DFASTS dropfs2.c -o drop_tfs.o
+
+drop_fm.o : dropfs2.c param.h rstruct.h defs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DFASTM dropfs2.c -o drop_fm.o
+
+drop_tfm.o : dropfs2.c param.h rstruct.h defs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DTFAST -DFASTM dropfs2.c -o drop_tfm.o
+
+drop_tfa.o : dropnfa.c dropnfa.h upam.h param.h rstruct.h defs.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DTFASTA dropnfa.c -o drop_tfa.o
+
+drop_fx.o : dropfx2.c upam.h param.h rstruct.h defs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c dropfx2.c -o drop_fx.o
+
+drop_tfx.o : dropfx2.c upam.h param.h rstruct.h defs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DTFAST dropfx2.c -o drop_tfx.o
+
+drop_fz.o : dropfz3.c upam.h param.h rstruct.h defs.h aamap.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c dropfz3.c -o drop_fz.o
+
+drop_tfz.o : dropfz3.c upam.h param.h rstruct.h defs.h aamap.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DTFAST dropfz3.c -o drop_tfz.o
+
+dropnsw.o : dropnsw.c upam.h param.h rstruct.h structs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c dropnsw.c
+
+#dropgsw.o : dropgsw.c dropgsw.h defs.h param.h rstruct.h drop_func.h a_mark.h dyn_string.h
+#	$(CC) $(THR_CC) $(CFLAGS) -c dropgsw.c -o dropgsw.o
+
+dropgsw2.o : dropgsw2.c dropgsw2.h defs.h param.h rstruct.h drop_func.h a_mark.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c dropgsw2.c -o dropgsw2.o
+
+dropgsw2_sse.o : dropgsw2.c dropgsw2.h defs.h param.h rstruct.h drop_func.h a_mark.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -DSW_SSE2 -c dropgsw2.c -o dropgsw2_sse.o
+
+dropgsw2_alt.o : dropgsw2.c dropgsw2.h defs.h param.h rstruct.h drop_func.h a_mark.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -DSW_ALTIVEC -c dropgsw2.c -o dropgsw2_alt.o
+
+droplal2.o : dropgsw2.c dropgsw2.h defs.h param.h rstruct.h drop_func.h a_mark.h dyn_string.h
+	$(CC) $(THR_CC) -DLALIGN $(CFLAGS) -c dropgsw2.c -o droplal2.o
+
+droplal2_sse.o : dropgsw2.c dropgsw2.h defs.h param.h rstruct.h drop_func.h a_mark.h dyn_string.h
+	$(CC) $(THR_CC) -DLALIGN $(CFLAGS) -DSW_SSE2 -c dropgsw2.c -o droplal2_sse.o
+
+droplal2_alt.o : dropgsw2.c dropgsw2.h defs.h param.h rstruct.h drop_func.h a_mark.h dyn_string.h
+	$(CC) $(THR_CC) -DLALIGN $(CFLAGS) -DSW_ALTIVEC -c dropgsw2.c -o droplal2_alt.o
+
+lsim4.o : lsim4.c lsim4.h param.h rstruct.h defs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c lsim4.c
+
+smith_waterman_altivec.o : smith_waterman_altivec.c smith_waterman_altivec.h dropgsw2.h defs.h param.h rstruct.h
+	$(CC) $(THR_CC) $(CFLAGS) -DSW_ALTIVEC -c smith_waterman_altivec.c
+
+smith_waterman_sse2.o : smith_waterman_sse2.c smith_waterman_sse2.h dropgsw2.h defs.h param.h rstruct.h
+	$(CC) $(THR_CC) $(CFLAGS) -DSW_SSE2 -c smith_waterman_sse2.c
+
+global_sse2.o : global_sse2.c global_sse2.h dropgsw2.h defs.h param.h rstruct.h
+	$(CC) $(THR_CC) $(CFLAGS) -DSW_SSE2 -c global_sse2.c
+
+glocal_sse2.o : glocal_sse2.c glocal_sse2.h dropgsw2.h defs.h param.h rstruct.h
+	$(CC) $(THR_CC) $(CFLAGS) -DSW_SSE2 -c glocal_sse2.c
+
+droplnw.o : dropnnw2.c upam.h param.h rstruct.h structs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) $(CFLAGS) -c dropnnw2.c -o droplnw.o
+
+droplnw_sse.o : dropnnw2.c upam.h param.h rstruct.h structs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) -DSW_SSE2 $(CFLAGS) -c dropnnw2.c -o droplnw_sse.o
+
+dropgnw.o : dropnnw2.c upam.h param.h rstruct.h structs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) -DGLOBAL_GLOBAL $(CFLAGS) -c dropnnw2.c -o dropgnw.o
+
+dropgnw_sse.o : dropnnw2.c upam.h param.h rstruct.h structs.h drop_func.h dyn_string.h
+	$(CC) $(THR_CC) -DGLOBAL_GLOBAL -DSW_SSE2 $(CFLAGS) -c dropnnw2.c -o dropgnw_sse.o
+
+lwm_align.o : wm_align.c defs.h param.h rstruct.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -c wm_align.c -o lwm_align.o
+
+gwm_align.o : wm_align.c defs.h param.h rstruct.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DGGSEARCH -c wm_align.c -o gwm_align.o
+
+calcons_fa.o : cal_cons2.c defs.h param.h rstruct.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DFASTA -c cal_cons2.c -o calcons_fa.o
+
+calcons_tfa.o : cal_cons2.c defs.h param.h rstruct.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DTFASTA -c cal_cons2.c -o calcons_tfa.o
+
+calcons_sw.o : cal_cons2.c defs.h param.h rstruct.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DSSEARCH -c cal_cons2.c -o calcons_sw.o
+
+calcons_la.o : cal_cons2.c defs.h param.h rstruct.h a_mark.h
+	$(CC) $(THR_CC) -DLALIGN -DLCAL_CONS $(CFLAGS) -c cal_cons2.c -o calcons_la.o
+
+calcons_ff.o : cal_consf.c defs.h param.h rstruct.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DFASTF -c cal_consf.c -o calcons_ff.o
+
+calcons_fs.o : cal_consf.c defs.h param.h rstruct.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DFASTS -c cal_consf.c -o calcons_fs.o
+
+calcons_fm.o : cal_consf.c defs.h param.h rstruct.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DFASTM -c cal_consf.c -o calcons_fm.o
+
+calcons_tff.o : cal_consf.c defs.h param.h rstruct.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DTFAST -DFASTF -c cal_consf.c -o calcons_tff.o
+
+calcons_tfs.o : cal_consf.c defs.h param.h rstruct.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DTFAST -DFASTS -c cal_consf.c -o calcons_tfs.o
+
+calcons_tfm.o : cal_consf.c defs.h param.h rstruct.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DTFAST -DFASTM -c cal_consf.c -o calcons_tfm.o
+
+#================ reading query, libraries
+
+getseq.o : getseq.c defs.h uascii.h structs.h rstruct.h upam.h mm_file.h
+	$(CC) $(THR_CC) $(CFLAGS) -c getseq.c
+
+llgetaa.o : llgetaa.c upam.h uascii.h mm_file.h
+	$(CC) $(THR_CC) $(CFLAGS) -c -DNOLIB llgetaa.c
+
+lgetlib.o : $(NGETLIB).c altlib.h upam.h uascii.h mm_file.h
+	$(CC) $(THR_CC) $(CFLAGS) -c $(NGETLIB).c -o lgetlib.o
+
+lgetaa_m.o : mmgetaa.c altlib.h ncbl2_head.h upam.h uascii.h mm_file.h
+	$(CC) $(THR_CC) $(CFLAGS) -c mmgetaa.c -o lgetaa_m.o
+
+ncbl_lib.o : ncbl_lib.c ncbl_head.h
+	$(CC) $(THR_CC) $(CFLAGS) -c ncbl_lib.c
+
+ncbl2_mlib.o : ncbl2_mlib.c ncbl2_head.h mm_file.h
+	$(CC) $(THR_CC) $(CFLAGS) -c ncbl2_mlib.c -o ncbl2_mlib.o
+
+mysql_lib.o : mysql_lib.c mm_file.h
+	$(CC) $(THR_CC) $(CFLAGS) -c mysql_lib.c
+
+pgsql_lib.o : pgsql_lib.c mm_file.h
+	$(CC) $(THR_CC) $(CFLAGS) -c pgsql_lib.c
+
+#================ threading functions
+
+pthr_subs2.o : pthr_subs2.c thr_bufs2.h pthr_subs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c pthr_subs2.c
+
+uthr_subs.o : uthr_subs.c thr_bufs2.h uthr_subs.h 
+	$(CC) $(THR_CC) $(CFLAGS) -c uthr_subs.c
+
+#================ MPI worker function
+
+mpi_subs2.o : pcomp_subs2.c pcomp_bufs.h pcomp_bufs.h thr_buf_structs.h
+	$(CC) -DMPI_SRC $(CFLAGS) -c pcomp_subs2.c -o mpi_subs2.o
+
+#================ translation
+
+faatran.o : faatran.c upam.h uascii.h
+	$(CC) $(THR_CC) $(CFLAGS) -c faatran.c
+
+url_subs.o : url_subs.c structs.h param.h rstruct.h
+	$(CC) $(THR_CC) $(CFLAGS) -c url_subs.c
+
+#================ lav plotting functions
+
+lav2plt.o : lav2plt.c lav_defs.h
+	$(CC) $(CFLAGS) -c lav2plt.c
+
+lavplt_ps.o : lavplt_ps.c lav_defs.h
+	$(CC) $(CFLAGS) -c lavplt_ps.c
+
+lavplt_svg.o : lavplt_svg.c lav_defs.h
+	$(CC) $(CFLAGS) -c lavplt_svg.c
diff --git a/make/Makefile.freebsd b/make/Makefile.freebsd
new file mode 100644
index 0000000..e837696
--- /dev/null
+++ b/make/Makefile.freebsd
@@ -0,0 +1,72 @@
+#
+# Makefile for building fasta3 on FreeBSD
+#
+# Fernan Aguero - <fernan at iib.unsam.edu.ar>
+
+# we take care of doing variable assignment using the '?=' and '+='
+# operators to preserve the value of variables if they are already
+# defined. In FreeBSD this happens when fasta3 is build from the port or
+# when the user has set these variables -- most notably CC and/or CFLAGS
+# -- in /etc/make.conf
+
+# Compiler executable, and optional flags
+CC?=		gcc
+CFLAGS?=	-g -O2
+
+# your FASTA host
+FASTA_HOST?=	"your_fasta_host"
+
+# common CFLAGS. These are the set of CFLAGS that are always used
+COMMON_CFLAGS=	-DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=2 \
+		-DTHR_EXIT=pthread_exit -DPROGRESS -DUSE_MMAP -D_REENTRANT \
+		-D_LARGE_FILE_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO \
+		-DHAS_INTTYPES -DSAMP_STATS
+
+# standard options, these will be added to the common CFLAGS if
+# selected below
+STANDARD_CFLAGS=	-DSFCHAR="':'" -DFASTA_HOST='${FASTA_HOST}' \
+			-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DPGM_DOC
+
+# options for superfamily validations, these will be added to the common
+# CFLAGS if selected below
+SUPERFAMILY_CFLAGS=	-DSFCHAR="'|'" -DSUPERFAMNUM -DBIG_LIB64
+
+# here we define CFLAGS to be the sum of common flags plus a subset of
+# optional flags that define our intended use.
+# The default standard flags are selected by default, although the user
+# can override this if s/he wants
+CFLAGS+=	${COMMON_CFLAGS} ${STANDARD_CFLAGS}
+
+BIN = ../bin
+XDIR =		/usr/local/bin
+
+LIB_M+=		-lm
+
+HFLAGS+=	-o
+NFLAGS+=	-o
+
+# FreeBSD users BEWARE! Different threading models ahead!
+
+# The threading model has changed along the way from FreeBSD-4 to
+# FreeBSD-6. If you're building fasta3 on your own, you will need to
+# adjust this accordingly. The default works in FreeBSD-6x (currently
+# the recommended major version for use in production). Or better yet,
+# use the biology/fasta3 port from the ports collection, which will use
+# the correct threading library for your OSVERSION
+
+THR_SUBS?=	pthr_subs2
+THR_LIBS?=	-lpthread
+THR_CC?=	
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_NA_O)
+DROPLAL_O = $(DROPLAL_NA_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+include ../make/Makefile36m.common
diff --git a/make/Makefile.hpux_it b/make/Makefile.hpux_it
new file mode 100644
index 0000000..b05e07e
--- /dev/null
+++ b/make/Makefile.hpux_it
@@ -0,0 +1,56 @@
+#
+# makefile for fasta3, fasta3_t
+#
+# flags for HP-UX #
+
+CC= cc -g -O2 +Onolimit -Wl,+pi,1M -Wl,+pd,1M -Wl,+mergeseg
+#CC = gcc -g -DDEBUG
+
+#CC=gcc -Wall -pedantic -ansi -g -O
+#CC= /usr/local/parasoft/bin.linux2/insure -g -DDEBUG
+
+# EBI uses the following with pgcc, -O3 does not work:
+# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
+
+# this file works for x86 LINUX
+
+# use options below for superfamily validations
+#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -DBIG_LIB64 -D_LARGE_FILE_SOURCE -DUSE_FSEEKO -D_FILE_OFFSET_BITS=64 -DHAS_INTTYPES -DSAMP_STATS
+
+# standard options
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS  -DUSE_MMAP -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC
+# -I/usr/local/include/mysql -DMYSQL_DB 
+#
+#(for mySQL databases)  (also requires change to Makefile35.common)
+
+LIB_M = -lm
+#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+
+HFLAGS= -o
+NFLAGS= -o
+
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+
+BIN = ../bin
+XDIR = /seqprg/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_NA_O)
+DROPLAL_O = $(DROPLAL_NA_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+# renamed (fasta35)  programs
+include ../make/Makefile36m.common
+# conventional (fasta3) names
+# include ../make/Makefile.common
+
diff --git a/make/Makefile.ibm b/make/Makefile.ibm
new file mode 100644
index 0000000..160b10b
--- /dev/null
+++ b/make/Makefile.ibm
@@ -0,0 +1,37 @@
+#
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+#
+
+CC= xlc_r -O3 -qarch=auto -qtune=auto -qcache=auto
+
+# for IBM with current pthreads
+CFLAGS= -DUNIX -DTIMES -DSFCHAR="':'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DIS_BIG_ENDIAN -DUSE_MMAP -DIBM_AIX -D_LARGE_FILES -DHAS_INTTYPES -D_LARGE_FILES -UMAXSEG -DSAMP_STATS -DPGM_DOC
+
+# consider -D_LARGE_FILE_API -D_LARGE_FILES for files > 2 GB
+
+LIB_M = -lm
+
+HFLAGS= -o
+NFLAGS= -o
+
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthreads
+THR_CC =
+
+BIN = ../bin
+XDIR = /seqprg/slib/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_NA_O)
+DROPLAL_O = $(DROPLAL_NA_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+# renamed (fasta34)  programs
+include ../make/Makefile36m.common
+
diff --git a/make/Makefile.linux b/make/Makefile.linux
new file mode 120000
index 0000000..b1a4d6b
--- /dev/null
+++ b/make/Makefile.linux
@@ -0,0 +1 @@
+Makefile.linux64_sse2
\ No newline at end of file
diff --git a/make/Makefile.linux32 b/make/Makefile.linux32
new file mode 100644
index 0000000..82f96a9
--- /dev/null
+++ b/make/Makefile.linux32
@@ -0,0 +1,63 @@
+# $ Id: $
+#
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+#
+# This file is designed for 32-bit Linux systems using an X86
+# architecture without SSE2 extensions.
+#
+# To use on a 64-bit linux system, add -D_LARGEFILE64_SOURCE and -DBIG_LIB64
+# (or use Makefile.linux64)
+#
+
+SHELL=/bin/bash
+
+#CC= gcc -g -O
+#CC = gcc -g -DDEBUG
+
+#CC=gcc -Wall -pedantic -ansi -g -O
+CC= /usr/local/parasoft/bin/insure -g -DDEBUG
+
+# EBI uses the following with pgcc, -O3 does not work:
+# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
+
+# this file works for x86 LINUX
+
+# standard options
+CFLAGS= -DSHOW_HELP -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS  -DUSE_MMAP -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC
+# -DSFCHAR="'|'" -dSUPERFAMNUM
+
+# -I/usr/local/include/mysql -DMYSQL_DB 
+#
+#(for mySQL databases)  (also requires change to Makefile35.common)
+
+LIB_M = -lm
+#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+
+HFLAGS= -o
+NFLAGS= -o
+
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+
+BIN = ../bin
+XDIR = /seqprg/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_NA_O)
+DROPLAL_O = $(DROPLAL_NA_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+# renamed (fasta35)  programs
+include ../make/Makefile36m.common
+# conventional (fasta3) names
+# include ../make/Makefile.common
+
diff --git a/make/Makefile.linux32_sse2 b/make/Makefile.linux32_sse2
new file mode 100644
index 0000000..05d562c
--- /dev/null
+++ b/make/Makefile.linux32_sse2
@@ -0,0 +1,68 @@
+#
+# $Id: Makefile.linux32_sse2 479 2011-01-12 13:13:03Z wrp $
+#
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+#
+# This file is designed for 32-bit Linux systems using an X86
+# architecture with SSE2 extensions.  SSE2 is used for ssearch35(_t)
+#
+# To use on a 64-bit linux system, add -D_LARGEFILE64_SOURCE and -DBIG_LIB64
+# (or use Makefile.linux64_sse2)
+#
+
+SHELL=/bin/bash
+
+CC= gcc -g  -O -msse2 -ffast-math
+#CC = gcc -g -DDEBUG -msse2
+
+#CC= /usr/local/parasoft/bin/insure -g -DDEBUG
+
+#CC=gcc -Wall -pedantic -ansi -g -O
+
+# EBI uses the following with pgcc, -O3 does not work:
+# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
+
+# this file works for x86 LINUX
+
+# standard options
+
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=8 -DTHR_EXIT=pthread_exit -DPROGRESS  -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC -DUSE_MMAP
+
+# -DSUPERFAMNUM -DSFCHAR="'|'" 
+
+# -I/usr/local/include/mysql -DMYSQL_DB 
+#
+#(for mySQL databases)  (also requires change to Makefile35.common)
+
+LIB_M = -lm
+#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+
+HFLAGS= -o
+NFLAGS= -o
+
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+
+BIN = ../bin
+XDIR = /seqprg/bin
+#XDIR = ~/bin/LINUX
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# SSE2 acceleration
+#
+DROPGSW_O = $(DROPGSW_SSE_O)
+DROPLAL_O = $(DROPLAL_SSE_O)
+DROPGNW_O = $(DROPGNW_SSE_O)
+DROPLNW_O = $(DROPLNW_SSE_O)
+
+# renamed (fasta35)  programs
+include ../make/Makefile36m.common
+# conventional (fasta3) names
+# include ../make/Makefile.common
+
diff --git a/make/Makefile.linux64 b/make/Makefile.linux64
new file mode 120000
index 0000000..b1a4d6b
--- /dev/null
+++ b/make/Makefile.linux64
@@ -0,0 +1 @@
+Makefile.linux64_sse2
\ No newline at end of file
diff --git a/make/Makefile.linux64_sse2 b/make/Makefile.linux64_sse2
new file mode 100644
index 0000000..addc5ab
--- /dev/null
+++ b/make/Makefile.linux64_sse2
@@ -0,0 +1,65 @@
+# $ Id: $
+#
+# makefile for fasta3, fasta3_t Use Makefile.mpi for fasta36_mpi
+#
+# This file is designed for 64-bit Linux systems using an X86
+# architecture with SSE2 extensions.  -D_LARGEFILE64_SOURCE and
+# -DBIG_LIB64 require a 64-bit linux system.
+# SSE2 extensions are used for ssearch35(_t)
+#
+# Use Makefile.linux32_sse2 for 32-bit linux x86
+#
+
+SHELL=/bin/bash
+
+CC = gcc -g -O -msse2
+#CC= gcc -pg -g -O -msse2 -ffast-math
+#CC = gcc -g -DDEBUG -msse2
+#CC=gcc -Wall -pedantic -ansi -g -msse2 -DDEBUG
+
+# EBI uses the following with pgcc, -O3 does not work:
+# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
+
+# this file works for x86 LINUX
+
+# standard options
+
+CFLAGS= -DSHOW_HELP -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=8 -DTHR_EXIT=pthread_exit  -DM10_CONS  -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC -DUSE_MMAP  -D_LARGEFILE64_SOURCE  -DBIG_LIB64
+# -I/usr/include/mysql -DMYSQL_DB
+# -DSUPERFAMNUM -DSFCHAR="'|'" 
+
+#
+#(for mySQL databases)  (also requires change to Makefile36m.common or use of Makefile36m.common_mysql)
+# run 'mysql_config' so find locations of mySQL files
+
+LIB_M = -lm
+# for mySQL databases
+# LIB_M = -L/usr/lib64/mysql -lmysqlclient -lm
+
+HFLAGS= -o
+NFLAGS= -o
+
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+
+BIN = ../bin
+XDIR = /seqprg/bin
+#XDIR = ~/bin/LINUX
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# SSE2 acceleration
+#
+DROPGSW_O = $(DROPGSW_SSE_O)
+DROPLAL_O = $(DROPLAL_SSE_O)
+DROPGNW_O = $(DROPGNW_SSE_O)
+DROPLNW_O = $(DROPLNW_SSE_O)
+
+# renamed (fasta36)  programs
+include ../make/Makefile36m.common
+# conventional (fasta3) names
+# include ../make/Makefile.common
diff --git a/make/Makefile.linux_icc b/make/Makefile.linux_icc
new file mode 100644
index 0000000..438c264
--- /dev/null
+++ b/make/Makefile.linux_icc
@@ -0,0 +1,58 @@
+# $Id: Makefile.linux_icc 499 2011-01-28 10:20:04Z wrp $
+#
+# makefile for fasta3, fasta3_t using the Intel icc compiler
+#
+# This file is designed for 64-bit Linux systems.
+# -D_LARGEFILE64_SOURCE and # -DBIG_LIB64 require a 64-bit linux system.
+
+SHELL=/bin/bash
+
+CC= icc -g -O3
+#CC = icc -g -DDEBUG
+
+#CC=gcc -Wall -pedantic -ansi -g -O
+#CC= /usr/local/parasoft/bin/insure -g -DDEBUG
+
+# EBI uses the following with pgcc, -O3 does not work:
+# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
+
+# this file works for x86 LINUX
+
+# standard options
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=8 -DTHR_EXIT=pthread_exit  -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DPGM_DOC -DBIG_LIB64 -DSAMP_STATS
+
+# -I/usr/local/include/mysql -DMYSQL_DB 
+#
+#(for mySQL databases)  (also requires change to Makefile36.common)
+
+LIB_M = -lm
+#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+
+HFLAGS= -o
+NFLAGS= -o
+
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+
+BIN = ../bin
+XDIR = /seqprg/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_NA_O)
+DROPLAL_O = $(DROPLAL_NA_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+# renamed (fasta36)  programs
+include ../make/Makefile36m.common
+# conventional (fasta3) names
+# include ../make/Makefile.common
+
diff --git a/make/Makefile.linux_icc_sse2 b/make/Makefile.linux_icc_sse2
new file mode 100644
index 0000000..72724ec
--- /dev/null
+++ b/make/Makefile.linux_icc_sse2
@@ -0,0 +1,55 @@
+ # $Id: Makefile.linux_icc_sse2 1162 2013-05-27 16:48:11Z wrp $
+#
+# makefile for fasta3, fasta3_t using the Intel icc compiler
+#
+# This file is designed for 64-bit Linux systems.
+# -D_LARGEFILE64_SOURCE and # -DBIG_LIB64 require a 64-bit linux system.
+#
+# uses SSE2 extensions for ssearch36(_t)
+
+SHELL=/bin/bash
+
+CC= icc -O3 -g
+#CC = icc -g -DDEBUG
+
+#CC=gcc -Wall -pedantic -ansi -g -O
+#CC= /usr/local/parasoft/bin/insure -g -DDEBUG
+
+# this file works for x86 LINUX
+
+# standard options
+CFLAGS= -DSHOW_HELP -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=8 -DTHR_EXIT=pthread_exit  -DUSE_MMAP -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DPGM_DOC -DBIG_LIB64 -DSAMP_STATS
+# -I/usr/include/mysql -DMYSQL_DB
+#(for mySQL databases)  (also requires change to Makefile36.common)
+
+LIB_M = -lm
+#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+
+HFLAGS= -o
+NFLAGS= -o
+
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+
+BIN = ../bin
+XDIR = /seqprg/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_SSE_O)
+DROPLAL_O = $(DROPLAL_SSE_O)
+DROPGNW_O = $(DROPGNW_SSE_O)
+DROPLNW_O = $(DROPLNW_SSE_O)
+
+# renamed (fasta36)  programs
+include ../make/Makefile36m.common
+# conventional (fasta3) names
+# include ../make/Makefile.common
+
diff --git a/make/Makefile.linux_mysql b/make/Makefile.linux_mysql
new file mode 100644
index 0000000..d7c7ea4
--- /dev/null
+++ b/make/Makefile.linux_mysql
@@ -0,0 +1,57 @@
+# $ Id: $
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+#
+# Includes files for reading mysql databases
+#
+# This file is designed for 64-bit Linux systems
+# -D_LARGEFILE64_SOURCE and  -DBIG_LIB64 require a 64-bit linux system.
+# This makefile does not use SSE2 extensions.
+#
+
+SHELL=/bin/bash
+
+CC= gcc -g -O2
+#CC= gcc -g -DDEBUG
+
+# this file works for x86 LINUX
+
+# standard options
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit  -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -I/usr/include/mysql -DMYSQL_DB -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DM10_CONS -DBIG_LIB64
+
+# use options below for superfamily validations
+#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT
+
+# -I/usr/local/include/mysql -DMYSQL_DB 
+#
+#(for mySQL databases)  (also requires change to Makefile35.common)
+
+#LIB_M = -lm
+#LIB_M = -L/usr/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+
+HFLAGS= -o
+NFLAGS= -o
+
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+
+BIN = ../bin
+XDIR = /seqprg/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_NA_O)
+DROPLAL_O = $(DROPLAL_NA_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+# renamed (fasta34)  programs
+include ../make/Makefile36m.common_mysql
+# conventional (fasta3) names
+# include ../make/Makefile.common
diff --git a/make/Makefile.linux_pgsql b/make/Makefile.linux_pgsql
new file mode 100644
index 0000000..da088d9
--- /dev/null
+++ b/make/Makefile.linux_pgsql
@@ -0,0 +1,58 @@
+# $ Id: $
+#
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+#
+# Includes files for reading postgres (pgsql) databases
+#
+# This file is designed for 64-bit Linux systems.
+# -D_LARGEFILE64_SOURCE and -DBIG_LIB64 require a 64-bit linux system.
+# 
+
+SHELL=/bin/bash
+
+CC= gcc -g -O
+#CC= gcc -g -DDEBUG
+#CC=/opt/parasoft/bin.linux2/insure -g -DDEBUG
+
+# this file works for x86 LINUX
+
+# standard options
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit  -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -I/usr/local/pgsql/include -DPGSQL_DB -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DM10_CONS -DBIG_LIB64
+
+# use options below for superfamily validations
+#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT
+
+# -I/usr/local/include/mysql -DMYSQL_DB 
+#
+#(for mySQL databases)  (also requires change to Makefile35.common)
+
+#LIB_M = -lm
+#LIB_M = -L/usr/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+
+HFLAGS= -o
+NFLAGS= -o
+
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+
+BIN = ../bin
+XDIR = /seqprg/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_NA_O)
+DROPLAL_O = $(DROPLAL_NA_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+# renamed (fasta34)  programs
+include ../make/Makefile36m.common_pgsql
+# conventional (fasta3) names
+# include ../make/Makefile.common
diff --git a/make/Makefile.linux_sql b/make/Makefile.linux_sql
new file mode 100644
index 0000000..57166bf
--- /dev/null
+++ b/make/Makefile.linux_sql
@@ -0,0 +1,58 @@
+# $ Id: $
+#
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+#
+# Includes files for reading MySQL and Postgres (pgsql) databases
+#
+# This file is designed for 64-bit Linux systems.
+# -D_LARGEFILE64_SOURCE and -DBIG_LIB64 require a 64-bit linux system.
+#
+
+SHELL=/bin/bash
+
+CC= gcc -g -O
+#CC= gcc -g -DDEBUG
+#CC=/opt/parasoft/bin.linux2/insure -g -DDEBUG
+
+# this file works for x86 LINUX
+
+# standard options
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit  -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -I/usr/local/pgsql/include -I/usr/include/mysql -DPGSQL_DB -DMYSQL_DB -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DM10_CONS -DBIG_LIB64
+
+# use options below for superfamily validations
+#CFLAGS= -DSHOWSIM -DLINUX6 -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT
+
+# -I/usr/local/include/mysql -DMYSQL_DB 
+#
+#(for mySQL databases)  (also requires change to Makefile35.common)
+
+#LIB_M = -lm
+#LIB_M = -L/usr/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+
+HFLAGS= -o
+NFLAGS= -o
+
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+
+BIN = ../bin
+XDIR = /seqprg/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_NA_O)
+DROPLAL_O = $(DROPLAL_NA_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+# renamed (fasta34)  programs
+include ../make/Makefile36m.common_sql
+# conventional (fasta3) names
+# include ../make/Makefile.common
diff --git a/make/Makefile.linux_sse2 b/make/Makefile.linux_sse2
new file mode 120000
index 0000000..b1a4d6b
--- /dev/null
+++ b/make/Makefile.linux_sse2
@@ -0,0 +1 @@
+Makefile.linux64_sse2
\ No newline at end of file
diff --git a/make/Makefile.mp_com2 b/make/Makefile.mp_com2
new file mode 100644
index 0000000..d4a63e9
--- /dev/null
+++ b/make/Makefile.mp_com2
@@ -0,0 +1,116 @@
+
+# combinations of files for "composite" drop* functions
+#
+DROPLNW_O = droplnw.o wm_align.o calcons_sw.o
+DROPGNW_O = dropgnw.o wm_align.o calcons_sw.o
+DROPNSW_O = dropnsw.o  wm_align.o calcons_sw.o
+DROPNFA_O = drop_nfa.o wm_align.o calcons_fa.o
+DROPBD_O = dropsbd.o wm_align.o calcons_fa.o
+DROPTFA_O = drop_tfa.o
+DROPFF_O = drop_ff2.o calcons_ff.o
+DROPFS_O = drop_fs2.o calcons_fs.o
+DROPFM_O = drop_fm.o calcons_fm.o
+DROPTFF_O = drop_tff.o calcons_tff.o
+DROPTFS_O = drop_tfs.o calcons_tfs.o
+DROPTFM_O = drop_tfm.o calcons_tfm.o
+
+COMPACC_TO = compacc2_t.o  # used with comp_lib5e.c/comp_lib7e.c/comp_lib8.c
+COMPACC_SO = compacc2_s.o
+COMPACC_PO = compacc2_p.o
+
+SHOWBESTC = mshowbest.c
+SHOWBESTO = showbest.o build_ares.o
+
+SHOWALIGN = mshowalign2
+SHOWALIGN_P = mshowalign2_p
+SHOWALIGN_S = mshowalign2_s
+SHOWALIGN_T = mshowalign2_t
+LSHOWALIGN = lshowalign
+
+MWH = mw.h 
+MWHP = mw.h
+
+MP_PROGS = ssearch36_mpi fasta36_mpi  fasts36_mpi fastx36_mpi tfastx36_mpi fasty36_mpi tfasty36_mpi tfasts36_mpi fastm36_mpi fastf36_mpi tfastf36_mpi glsearch36_mpi ggsearch36_mpi
+
+PROGS = $(MP_PROGS)
+
+all: $(PROGS)
+
+clean-up:
+	rm -f *.o $(PROGS); 	rm -rf $(BIN)/*
+
+install: $(PROGS)
+	pushd $(BIN); cp $(PROGS)  $(XDIR); popd
+
+
+ssearch36_mpi : $(COMP_THRO) ${WORK_THR_O}  $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o scale_se.o karlin.o $(DROPGSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ssearch36_mpi $(COMP_THRO) ${WORK_THR_O}  $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o $(DROPGSW_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+ssearch36s_mpi : $(COMP_THRO) ${WORK_THR_O}  $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o scale_se.o karlin.o $(DROPGSW_NA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ssearch36s_mpi $(COMP_THRO) ${WORK_THR_O}  $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o $(DROPGSW_NA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+glsearch36_mpi : $(COMP_THRO) ${WORK_THR_O}  $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_lnw.o scale_sn.o karlin.o $(DROPLNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/glsearch36_mpi $(COMP_THRO) ${WORK_THR_O}  $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_lnw.o $(DROPLNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+glsearch36s_mpi : $(COMP_THRO) ${WORK_THR_O}  $(THR_SUBS).o ${COMPACC_PO} showsum.o re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_lnw.o scale_sn.o karlin.o $(DROPLNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/glsearch36s_mpi $(COMP_THRO) ${WORK_THR_O}  $(THR_SUBS).o ${COMPACC_PO} showsum.o re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_lnw.o $(DROPLNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+ggsearch36_mpi : $(COMP_THRO) ${WORK_THR_O}  $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_gnw.o scale_sn.o karlin.o $(DROPGNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ggsearch36_mpi $(COMP_THRO) ${WORK_THR_O}  $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_gnw.o $(DROPGNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+fasta36_mpi : $(COMP_THRO) $(WORK_THR_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_fa.o scale_se.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fasta36_mpi $(COMP_THRO) $(WORK_THR_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_fa.o $(DROPNFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+fastf36_mpi : $(COMP_THRO) $(WORK_THR_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_ff.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(DROPFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fastf36_mpi $(COMP_THRO) $(WORK_THR_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_ff.o $(DROPFF_O) scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fastf36s_mpi : $(COMP_THRO) $(WORK_THR_O) $(THR_SUBS).o ${COMPACC_PO} showsum.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_ff.o scaleswtf.o karlin.o $(DROPFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fastf36s_mpi $(COMP_THRO) $(WORK_THR_O) $(THR_SUBS).o ${COMPACC_PO} showsum.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_ff.o $(DROPFF_O) scaleswtf.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fasts36_mpi : $(COMP_THRO) $(WORK_THR_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_fs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(DROPFS_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fasts36_mpi $(COMP_THRO) $(WORK_THR_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_fs.o $(DROPFS_O) scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fastm36_mpi : $(COMP_THRO) $(WORK_THR_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_fs.o scaleswts.o last_tat.o tatstats_fm.o karlin.o $(DROPFM_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fastm36_mpi $(COMP_THRO) $(WORK_THR_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_fs.o $(DROPFM_O) scaleswts.o last_tat.o tatstats_fm.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fastx36_mpi : $(COMP_THRO) $(WORK_THRX_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o c_dispn.o htime.o apam.o mpi_doinit.o init_fx.o faatran.o scale_se.o karlin.o drop_fx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fastx36_mpi $(COMP_THRO) $(WORK_THRX_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_fx.o drop_fx.o faatran.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+fasty36_mpi : $(COMP_THRO) $(WORK_THRX_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o c_dispn.o htime.o apam.o mpi_doinit.o init_fy.o faatran.o scale_se.o karlin.o drop_fz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fasty36_mpi $(COMP_THRO) $(WORK_THRX_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_fy.o drop_fz.o faatran.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+tfastf36_mpi : $(COMP_THRO) $(WORK_THRX_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o c_dispn.o htime.o apam.o mpi_doinit.o init_tf.o  scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(DROPTFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfastf36_mpi $(COMP_THRO) $(WORK_THRX_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_tf.o $(DROPTFF_O) scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+tfasts36_mpi : $(COMP_THRO) $(WORK_THRX_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o c_dispn.o htime.o apam.o mpi_doinit.o init_tfs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(DROPTFS_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfasts36_mpi $(COMP_THRO) $(WORK_THRX_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_tfs.o $(DROPTFS_O) scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+tfastx36_mpi : $(COMP_THRO) $(WORK_THRX_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_tfx.o scale_se.o karlin.o drop_tfx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfastx36_mpi $(COMP_THRO) $(WORK_THRX_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_tfx.o drop_tfx.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+tfasty36_mpi : $(COMP_THRO) $(WORK_THRX_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_tfy.o scale_se.o karlin.o drop_tfz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfasty36_mpi $(COMP_THRO) $(WORK_THRX_O) $(THR_SUBS).o ${COMPACC_PO} $(SHOWBESTO) re_getlib.o $(SHOWALIGN_P).o htime.o apam.o mpi_doinit.o init_tfy.o drop_tfz.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+comp_mpi4.o : comp_lib4.c mw.h structs.h defs.h param.h pcomp_bufs.h thr_buf_structs.h
+	$(CC) $(CFLAGS) -DMPI_SRC -DCOMP_MLIB -c comp_lib4.c -o comp_mpi4.o
+
+comp_mpi5.o : comp_lib5.c mw.h structs.h defs.h param.h pcomp_bufs.h thr_buf_structs.h
+	$(CC) $(CFLAGS) -DMPI_SRC -DCOMP_MLIB -c comp_lib5.c -o comp_mpi5.o
+
+comp_mpi6.o : comp_lib6.c mw.h structs.h defs.h param.h pcomp_bufs.h thr_buf_structs.h
+	$(CC) $(CFLAGS) -DMPI_SRC -DCOMP_MLIB -c comp_lib6.c -o comp_mpi6.o
+
+comp_mpi7.o : comp_lib7.c mw.h structs.h defs.h param.h pcomp_bufs.h thr_buf_structs.h
+	$(CC) $(CFLAGS) -DMPI_SRC -DCOMP_MLIB -c comp_lib7.c -o comp_mpi7.o
+
+comp_mpi9.o : comp_lib9.c mw.h structs.h defs.h param.h pcomp_bufs.h thr_buf_structs.h
+	$(CC) $(CFLAGS) -DMPI_SRC -DCOMP_MLIB -c comp_lib9.c -o comp_mpi9.o
+
+work_mpi2.o : work_thr2.c mw.h structs.h defs.h param.h pcomp_bufs.h thr_buf_structs.h
+	$(CC) -DMPI_SRC $(CFLAGS) -c work_thr2.c -o work_mpi2.o
+
+work_mpi2x.o : work_thr2.c mw.h structs.h defs.h param.h pcomp_bufs.h thr_buf_structs.h
+	$(CC) -DMPI_SRC -DTFAST $(CFLAGS) -c work_thr2.c -o work_mpi2x.o
+
+mpi_doinit.o : doinit.c defs.h param.h rstruct.h upam.h structs.h uascii.h aln_structs.h
+	$(CC) -DMPI_SRC $(CFLAGS) -c doinit.c -o mpi_doinit.o
diff --git a/make/Makefile.mpi_icc_sse2 b/make/Makefile.mpi_icc_sse2
new file mode 100644
index 0000000..9b7bb56
--- /dev/null
+++ b/make/Makefile.mpi_icc_sse2
@@ -0,0 +1,55 @@
+# $Id: Makefile.mpi_icc_sse2 849 2011-10-21 20:09:55Z wrp $
+#
+# makefile for fasta3, fasta3_t using the Intel icc compiler
+#
+# This file is designed for 64-bit Linux systems.
+# -D_LARGEFILE64_SOURCE and # -DBIG_LIB64 require a 64-bit linux system.
+#
+# uses SSE2 extensions for ssearch35(_t)
+
+SHELL=/bin/bash
+
+CC= mpicc
+#CC= mpicc-dbg -g -DDEBUG
+
+#CC=gcc -Wall -pedantic -ansi -g -O
+#CC= /usr/local/parasoft/bin/insure -g -DDEBUG
+
+# this file works for x86 LINUX
+
+# standard options
+CFLAGS= -DPCOMPLIB=MPI -DSAMP_STATS_MORE -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=8 -DTHR_EXIT=pthread_exit  -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DPGM_DOC -DBIG_LIB64 -DSAMP_STATS 
+# -DSAMP_STATS_FAST -DSUPERFAMNUM -DSFCHAR="'|'" 
+
+# -I/usr/local/include/mysql -DMYSQL_DB 
+#
+#(for mySQL databases)  (also requires change to Makefile35.common)
+
+LIB_M = -lm -lz
+#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+
+HFLAGS= -o
+NFLAGS= -o
+
+# for Linux
+THR_SUBS = mpi_subs2
+THR_LIBS = 
+THR_CC =
+
+BIN = ../bin
+XDIR = /seqprg/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_SSE_O)
+DROPLAL_O = $(DROPLAL_SSE_O)
+DROPGNW_O = $(DROPGNW_SSE_O)
+DROPLNW_O = $(DROPLNW_SSE_O)
+
+include ../make/Makefile36mpi.common
+
diff --git a/make/Makefile.nm_fcom b/make/Makefile.nm_fcom
new file mode 100755
index 0000000..646fc77
--- /dev/null
+++ b/make/Makefile.nm_fcom
@@ -0,0 +1,304 @@
+
+#================ common .obj files 
+
+doinit.obj : doinit.c defs.h param.h upam.h structs.h uascii.h
+	$(CC) $(CFLAGS) -c doinit.c
+
+init_sw.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DSSEARCH initfa.c /Foinit_sw.obj
+
+init_lal.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DLALIGN initfa.c /Foinit_lal.obj
+
+init_gnw.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DGGSEARCH initfa.c /Foinit_gnw.obj
+
+init_lnw.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DGLSEARCH initfa.c /Foinit_lnw.obj
+
+init_ssw.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DOSEARCH initfa.c /Foinit_ssw.obj
+
+init_rss.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DPRSS initfa.c /Foinit_rss.obj
+
+init_rfx.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DPRSS -DFASTX initfa.c /Foinit_rfx.obj
+
+init_fa.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DFASTA initfa.c /Foinit_fa.obj
+
+init_ff.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DFASTF initfa.c /Foinit_ff.obj
+
+init_tf.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DFASTF -DTFAST initfa.c /Foinit_tf.obj
+
+init_fs.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DFASTS initfa.c /Foinit_fs.obj
+
+init_fm.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DFASTM initfa.c /Foinit_fm.obj
+
+init_tfs.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DFASTS -DTFAST  initfa.c /Foinit_tfs.obj
+
+init_tfm.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DFASTM -DTFAST  initfa.c /Foinit_tfm.obj
+
+init_tfa.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DFASTA -DTFAST initfa.c /Foinit_tfa.obj
+
+init_fx.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DFASTX initfa.c /Foinit_fx.obj
+
+init_tfx.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DFASTX -DTFAST initfa.c /Foinit_tfx.obj
+
+init_fy.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DFASTY initfa.c /Foinit_fy.obj
+
+init_tfy.obj : initfa.c defs.h param.h upam.h structs.h
+	$(CC) $(CFLAGS) -c -DFASTY -DTFAST initfa.c /Foinit_tfy.obj
+
+#================ miscellaneous
+
+htime.obj : htime.c
+	$(CC) $(CFLAGS) -c htime.c
+
+compacc.obj : compacc.c upam.h uascii.h param.h structs.h $(MWH) defs.h
+	$(CC) $(CFLAGS) -c compacc.c
+
+compacc2_t.obj : compacc2e.c upam.h uascii.h param.h rstruct.h structs.h $(MWH) defs.h aln_structs.h drop_func.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c compacc2e.c /Focompacc2_t.obj
+
+compacc2_s.obj : compacc2e.c upam.h uascii.h param.h rstruct.h structs.h $(MWH) defs.h aln_structs.h drop_func.h
+	$(CC) $(THR_CC) $(CFLAGS) -c compacc2e.c /Focompacc2_s.obj
+
+pssm_asn_subs.obj : pssm_asn_subs.c defs.h
+	$(CC) $(CFLAGS) -c pssm_asn_subs.c
+
+#================ display list of best hits / alignments
+
+showbest.obj : $(SHOWBESTC) $(MWH) defs.h param.h structs.h  aln_structs.h drop_func.h
+	$(CC) $(CFLAGS) -c $(SHOWBESTC) /Foshowbest.obj
+
+showrss.obj : showrss.c $(MWH) defs.h param.h structs.h  aln_structs.h drop_func.h
+	$(CC) $(CFLAGS) -c showrss.c
+
+showun.obj : mshowbest.c $(MWH) defs.h aln_structs.h drop_func.h
+	$(CC) $(CFLAGS) -c -DSHOWUN mshowbest.c /Foshowun.obj
+
+showrel.obj : $(SHOWBESTC) $(MWH) defs.h aln_structs.h drop_func.h
+	$(CC) $(CFLAGS) -c -DSHOWREL $(SHOWBESTC) /Foshowrel.obj
+
+showsum.obj : showsum.c $(MWH) defs.h drop_func.h
+	$(CC) $(CFLAGS) -c showsum.c
+
+$(LSHOWALIGN).obj : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h aln_structs.h drop_func.h
+	$(CC) $(THR_CC) $(CFLAGS) -DLALIGN -c $(SHOWALIGN).c /Fo$(LSHOWALIGN).obj
+
+$(SHOWALIGN).obj : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h aln_structs.h drop_func.h
+	$(CC) $(CFLAGS) -c $(SHOWALIGN).c /Fo$(SHOWALIGN).obj
+
+$(SHOWALIGN)_u.obj : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h aln_structs.h drop_func.h
+	$(CC) $(CFLAGS) -DSHOWUN -c /Fo$(SHOWALIGN)_u.obj $(SHOWALIGN).c
+re_getlib.obj : re_getlib.c mw.h mm_file.h
+	$(CC) $(CFLAGS) -c re_getlib.c
+
+lib_sel.obj : lib_sel.c defs.h structs.h
+	$(CC) $(CFLAGS) -c lib_sel.c
+
+c_dispn.obj : c_dispn.c defs.h structs.h param.h 
+	$(CC) $(CFLAGS) -c c_dispn.c
+
+build_ares.obj : build_ares.c $(MWH) defs.h param.h rstruct.h structs.h  aln_structs.h drop_func.h
+	$(CC) $(THR_CC) $(CFLAGS) -c build_ares.c /Fobuild_ares.obj
+
+#================ statistical functions
+
+karlin.obj : karlin.c param.h
+	$(CC) $(CFLAGS) -c karlin.c
+
+scale_se.obj : scaleswn.c defs.h param.h structs.h $(MWH) alt_parms.h
+	$(CC) $(CFLAGS) -DLOCAL_SCORE -c scaleswn.c /Foscale_se.obj
+
+scale_sn.obj : scaleswn.c defs.h param.h structs.h $(MWH) alt_parms.h
+	$(CC) -DNORMAL_DIST $(CFLAGS) -c scaleswn.c /Foscale_sn.obj
+
+scaleswtf.obj : scaleswt.c defs.h param.h structs.h $(MWH) alt_parms.h
+	$(CC) $(CFLAGS) -DFASTF -c scaleswt.c /Foscaleswtf.obj
+
+scaleswts.obj : scaleswt.c defs.h param.h structs.h $(MWH) alt_parms.h
+	$(CC) $(CFLAGS) -c scaleswt.c /Foscaleswts.obj
+
+tatstats_fs.obj : tatstats.c tatstats.h
+	$(CC) $(CFLAGS) -c -DFASTS tatstats.c /Fotatstats_fs.obj
+
+tatstats_ff.obj : tatstats.c tatstats.h
+	$(CC) $(CFLAGS) -c -DFASTF tatstats.c /Fotatstats_ff.obj
+
+tatstats_fm.obj : tatstats.c tatstats.h
+	$(CC) $(CFLAGS) -c -DFASTM tatstats.c /Fotatstats_fm.obj
+
+last_tat.obj : last_tat.c defs.h mm_file.h structs.h param.h
+	$(CC) $(CFLAGS) -c last_tat.c
+
+last_thresh.obj : last_thresh.c defs.h mm_file.h structs.h param.h
+	$(CC) $(CFLAGS) -c last_thresh.c
+
+#================ drop functions - actual scores/alignments
+
+drop_nfa.obj : dropnfa.c dropnfa.h param.h defs.h drop_func.h
+	$(CC) $(CFLAGS) -c dropnfa.c /Fodrop_nfa.obj
+
+# drop_ff, _fs, _fm must define FASTF, FASTS, and FASTM to ensure
+# that tatstats.h is built appropriately
+
+drop_ff.obj : dropff2.c param.h defs.h tatstats.h drop_func.h
+	$(CC) $(CFLAGS) -DFASTF -c dropff2.c /Fodrop_ff.obj
+
+drop_tff.obj : dropff2.c param.h defs.h tatstats.h drop_func.h
+	$(CC) $(CFLAGS) -DFASTF -DTFAST -c dropff2.c /Fodrop_tff.obj
+
+drop_ff2.obj : dropff2.c param.h defs.h tatstats.h drop_func.h
+	$(CC) $(CFLAGS) -c -DFASTF  dropff2.c /Fodrop_ff2.obj
+
+drop_tff2.obj : dropff2.c param.h defs.h tatstats.h drop_func.h
+	$(CC) $(CFLAGS) -c -DFASTF -DTFAST dropff2.c /Fodrop_tff.obj
+
+drop_fs2.obj : dropfs2.c param.h defs.h tatstats.h drop_func.h
+	$(CC) $(CFLAGS) -DFASTS -c dropfs2.c /Fodrop_fs2.obj
+
+drop_tfs.obj : dropfs2.c param.h defs.h drop_func.h
+	$(CC) $(CFLAGS) -c -DTFAST -DFASTS dropfs2.c /Fodrop_tfs.obj
+
+drop_fm.obj : dropfs2.c param.h defs.h drop_func.h
+	$(CC) $(CFLAGS) -c -DFASTM dropfs2.c /Fodrop_fm.obj
+
+drop_tfm.obj : dropfs2.c param.h defs.h drop_func.h
+	$(CC) $(CFLAGS) -c -DTFAST -DFASTM dropfs2.c /Fodrop_tfm.obj
+
+drop_tfa.obj : dropnfa.c dropnfa.h upam.h param.h defs.h
+	$(CC) $(CFLAGS) -c -DTFASTA dropnfa.c /Fodrop_tfa.obj
+
+drop_fx.obj : dropfx2.c upam.h param.h defs.h drop_func.h
+	$(CC) $(CFLAGS) -c dropfx2.c /Fodrop_fx.obj
+
+drop_tfx.obj : dropfx2.c upam.h param.h defs.h drop_func.h
+	$(CC) $(CFLAGS) -c -DTFAST dropfx2.c /Fodrop_tfx.obj
+
+drop_fz.obj : dropfz3.c upam.h param.h defs.h aamap.h drop_func.h
+	$(CC) $(CFLAGS) -c dropfz3.c /Fodrop_fz.obj
+
+drop_tfz.obj : dropfz3.c upam.h param.h defs.h aamap.h drop_func.h
+	$(CC) $(CFLAGS) -c -DTFAST dropfz3.c /Fodrop_tfz.obj
+
+dropnsw.obj : dropnsw.c upam.h param.h structs.h drop_func.h
+	$(CC) $(CFLAGS) -c dropnsw.c
+
+dropgsw2.obj : dropgsw2.c dropgsw2.h upam.h param.h structs.h drop_func.h
+	$(CC) $(CFLAGS) -c dropgsw2.c
+
+dropgnw2.obj : dropnnw2.c upam.h param.h structs.h drop_func.h
+	$(CC) $(CFLAGS) -c dropnnw2.c /Fodropgnw2.obj
+
+droplnw2.obj : dropnnw2.c upam.h param.h structs.h drop_func.h
+	$(CC) $(CFLAGS) -c dropnnw2.c /Fodroplnw2.obj
+
+droplal.obj : dropgsw2.c dropgsw2.h upam.h param.h drop_func.h a_mark.h
+	$(CC) $(CFLAGS) -DLALIGN -c dropgsw2.c /Fodroplal.obj
+
+droplal_sse2.obj : dropgsw2.c dropgsw2.h upam.h param.h drop_func.h a_mark.h
+	$(CC) $(CFLAGS) -DLALIGN -DSW_SSE2 -c dropgsw2.c /Fodroplal.obj
+
+dropgsw2_sse2.obj : dropgsw2.c dropgsw2.h upam.h param.h structs.h drop_func.h
+	$(CC) $(CFLAGS) -DSW_SSE2 -c dropgsw2.c /Fodropgsw2_sse2.obj
+
+smith_waterman_altivec.obj : smith_waterman_altivec.c smith_waterman_altivec.h dropgsw2.h defs.h param.h
+	$(CC) $(CFLAGS) -c smith_waterman_altivec.c
+
+smith_waterman_sse2.obj : smith_waterman_sse2.c smith_waterman_sse2.h dropgsw2.h defs.h param.h
+	$(CC) $(CFLAGS) -DSW_SSE2 -c smith_waterman_sse2.c
+
+dropnw.obj : dropnw.c upam.h param.h structs.h drop_func.h
+	$(CC) $(CFLAGS) -c dropnw.c
+
+wm_align.obj : wm_align.c defs.h param.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -c wm_align.c
+
+calcons_fa.obj : cal_cons2.c defs.h param.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DFASTA -c cal_cons2.c /Focalcons_fa.obj
+
+calcons_sw.obj : cal_cons2.c defs.h param.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DSSEARCH -c cal_cons2.c /Focalcons_sw.obj
+
+calcons_la.obj : cal_cons2.c defs.h param.h a_mark.h
+	$(CC) $(THR_CC) -DLALIGN -DLCAL_CONS $(CFLAGS) -c cal_cons2.c /Focalcons_la.obj
+
+calcons_ff.obj : cal_consf.c defs.h param.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DFASTF -c cal_consf.c /Focalcons_ff.obj
+
+calcons_fs.obj : cal_consf.c defs.h param.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DFASTS -c cal_consf.c /Focalcons_fs.obj
+
+calcons_fm.obj : cal_consf.c defs.h param.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DFASTM -c cal_consf.c /Focalcons_fm.obj
+
+calcons_tff.obj : cal_consf.c defs.h param.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DTFAST -DFASTF -c cal_consf.c /Focalcons_tff.obj
+
+calcons_tfs.obj : cal_consf.c defs.h param.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DTFAST -DFASTS -c cal_consf.c /Focalcons_tfs.obj
+
+calcons_tfm.obj : cal_consf.c defs.h param.h a_mark.h
+	$(CC) $(THR_CC) $(CFLAGS) -DTFAST -DFASTM -c cal_consf.c /Focalcons_tfm.obj
+
+#================ reading query, libraries
+
+getseq.obj : getseq.c defs.h uascii.h structs.h upam.h 
+	$(CC) $(CFLAGS) -c getseq.c
+
+llgetaa.obj : llgetaa.c upam.h uascii.h
+	$(CC) $(CFLAGS) -c -DNOLIB llgetaa.c
+
+lgetlib.obj : $(NGETLIB).c altlib.h upam.h uascii.h mm_file.h
+	$(CC) $(CFLAGS) -c $(NGETLIB).c /Folgetlib.obj
+
+lgetaa_m.obj : mmgetaa.c altlib.h ncbl2_head.h upam.h uascii.h mm_file.h
+	$(CC) $(CFLAGS) -c mmgetaa.c /Folgetaa_m.obj
+
+ncbl_lib.obj : ncbl_lib.c ncbl_head.h
+	$(CC) $(CFLAGS) -c ncbl_lib.c
+
+ncbl2_mlib.obj : ncbl2_mlib.c ncbl2_head.h mm_file.h
+	$(CC) $(CFLAGS) -c ncbl2_mlib.c
+
+mysql_lib.obj : mysql_lib.c mm_file.h
+	$(CC) $(CFLAGS) -c mysql_lib.c
+
+pgsql_lib.obj : pgsql_lib.c mm_file.h
+	$(CC) $(CFLAGS) -c pgsql_lib.c
+
+#================ threading functions
+
+pthr_subs2.obj : pthr_subs2.c thr_bufs2.h pthr_subs.h
+	$(CC) $(CFLAGS) -c pthr_subs2.c
+
+uthr_subs.obj : uthr_subs.c thr_bufs2.h uthr_subs.h 
+	$(CC) $(CFLAGS) -c uthr_subs.c
+
+#================ translation
+
+faatran.obj : faatran.c upam.h uascii.h
+	$(CC) $(CFLAGS) -c faatran.c
+
+url_subs.obj : url_subs.c structs.h param.h
+	$(CC) $(CFLAGS) -c url_subs.c
+
+$(NRAND).obj : $(NRAND).c
+	$(CC) $(CFLAGS) -c $(NRAND).c
+#================ windows getopt()
+
+getopt.obj : getopt.c
+	$(CC) $(CFLAGS) -c getopt.c
diff --git a/make/Makefile.nm_pcom b/make/Makefile.nm_pcom
new file mode 100755
index 0000000..b1530ec
--- /dev/null
+++ b/make/Makefile.nm_pcom
@@ -0,0 +1,217 @@
+
+# combinations of files for "composite" drop* functions
+#
+DROPLNW_O = droplnw2.obj wm_align.obj calcons_sw.obj
+DROPGNW_O = dropgnw2.obj wm_align.obj calcons_sw.obj
+DROPNFA_O = drop_nfa.obj wm_align.obj calcons_fa.obj
+DROPTFA_O = drop_tfa.obj
+DROPFF_O = drop_ff2.obj calcons_ff.obj
+DROPFS_O = drop_fs2.obj calcons_fs.obj
+DROPFM_O = drop_fm.obj calcons_fm.obj
+DROPTFF_O = drop_tff.obj calcons_tff.obj
+DROPTFS_O = drop_tfs.obj calcons_tfs.obj
+DROPTFM_O = drop_tfm.obj calcons_tfm.obj
+
+COMPACC_TO = compacc2_t.obj  # used with comp_lib9.c
+COMPACC_SO = compacc2_s.obj
+
+SHOWBESTC = mshowbest.c
+SHOWBESTO = showbest.obj build_ares.obj
+SHOWALIGN = mshowalign2
+LSHOWALIGN = lshowalign2
+MWH = mw.h 
+MWHP = mw.h
+
+TPROGS = ssearch36_t.exe fasta36_t.exe fasts36_t.exe fastx36_t.exe tfastx36_t.exe fasty36_t.exe tfasty36_t.exe tfasts36_t.exe fastm36_t.exe fastf36_t.exe tfastf36_t.exe ggsearch36_t.exe glsearch36_t.exe
+
+SPROGS = fasta36.exe ssearch36.exe fasts36.exe fastx36.exe tfastx36.exe fasty36.exe tfasty36.exe tfasts36.exe fastm36.exe tfastm36.exe fastf36.exe tfastf36.exe lalign36.exe ggsearch36.exe glsearch36.exe
+
+MAPROGS = map_db.exe
+
+XTPROGS = fastx36_t.exe tfastx36_t.exe fasty36_t.exe tfasty36_t.exe
+XPROGS = fastx36.exe tfastx36.exe .exe fasty36 tfasty36.exe
+
+PROGS = $(SPROGS) $(TPROGS)
+
+all : $(PROGS)
+
+tall: $(TPROGS)
+
+sall: $(SPROGS)
+
+xall: $(XTPROGS) $(XPROGS) $(ZTPROGS) $(ZPROGS) 
+
+clean-up:
+	del *.obj $(PROGS)
+
+install: $(PROGS)
+	copy $(PROGS) $(XDIR)
+
+sinstall: $(SPROGS)
+	copy $(SPROGS) $(XDIR)
+
+tinstall: $(TPROGS)
+	cp $(TPROGS) $(XDIR)
+
+fasta36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scale_se.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\fasta36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+
+fastx36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fx.obj scale_se.obj karlin.obj drop_fx.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\fastx36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fx.obj drop_fx.obj scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+
+fasty36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fy.obj scale_se.obj karlin.obj drop_fz.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\fasty36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fy.obj drop_fz.obj scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+
+fastf36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswts.obj last_tat.obj tatstats_ff.obj karlin.obj $(DROPFF_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\fastf36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj $(DROPFF_O) scaleswts.obj last_tat.obj tatstats_ff.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+
+fastf36u : $(COMP_LIBO) $(COMPACC_SO) showun.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswtf.obj karlin.obj $(DROPFF_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\fastf36u.exe $(COMP_LIBO) $(COMPACC_SO) showun.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj $(DROPFF_O) scaleswtf.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+
+fastf36s : $(COMP_LIBO) $(COMPACC_SO) showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswtf.obj karlin.obj $(DROPFF_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\fastf36s.exe $(COMP_LIBO) $(COMPACC_SO) showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj $(DROPFF_O) scaleswtf.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+
+fasts36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj $(DROPFS_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\fasts36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj $(DROPFS_O) scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+
+fastm36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fm.obj scaleswts.obj last_tat.obj tatstats_fm.obj karlin.obj $(DROPFM_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\fastm36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fm.obj $(DROPFM_O) scaleswts.obj last_tat.obj tatstats_fm.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+
+tfastx36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfx.obj scale_se.obj karlin.obj drop_tfx.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\tfastx36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfx.obj drop_tfx.obj scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+
+tfasty36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfy.obj scale_se.obj karlin.obj drop_tfz.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\tfasty36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfy.obj drop_tfz.obj scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+
+tfastf36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj $(DROPTFF_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\tfastf36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj $(DROPTFF_O) scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+
+tfastf36s : $(COMP_LIBO) $(COMPACC_SO) showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj scaleswtf.obj karlin.obj $(DROPTFF_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\tfastf36s.exe $(COMP_LIBO) $(COMPACC_SO) showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj $(DROPTFF_O) scaleswtf.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+
+tfasts36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfs.obj scaleswts.obj tatstats_fs.obj last_tat.obj karlin.obj $(DROPTFS_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\tfasts36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfs.obj $(DROPTFS_O) scaleswts.obj tatstats_fs.obj last_tat.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+
+tfastm36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfm.obj scaleswts.obj tatstats_fm.obj last_tat.obj karlin.obj $(DROPTFM_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\tfastm36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfm.obj $(DROPTFM_O) scaleswts.obj tatstats_fm.obj last_tat.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+
+ssearch36nosse2.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scale_se.obj karlin.obj $(DROPGSW_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+	$(CL) /Fe..\bin\ssearch36nosse2.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj $(DROPGSW_O) scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+
+ggsearch36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_gnw.obj scale_sn.obj karlin.obj $(DROPGNW_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+	$(CL) /Fe..\bin\ggsearch36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_gnw.obj $(DROPGNW_O) scale_sn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+
+glsearch36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_lnw.obj scale_sn.obj karlin.obj $(DROPLNW_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+	$(CL) /Fe..\bin\glsearch36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_lnw.obj $(DROPLNW_O) scale_sn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+
+lalign36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(LSHOWALIGN).obj htime.obj apam.obj doinit.obj init_lal.obj scale_se.obj karlin.obj last_thresh.obj $(DROPLAL_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+	$(CL) /Fe..\bin\lalign36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(LSHOWALIGN).obj htime.obj apam.obj doinit.obj init_lal.obj last_thresh.obj $(DROPLAL_O) scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+
+ssearch36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scale_se.obj karlin.obj $(DROPGSW_SSE2_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+	$(CL) /Fe..\bin\ssearch36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj $(DROPGSW_SSE2_O) scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+
+osearch36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ssw.obj scale_se.obj karlin.obj dropnsw.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\osearch36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ssw.obj dropnsw.obj scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+
+usearch36.exe : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scale_se.obj karlin.obj dropnsw.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\usearch36.exe $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj dropnsw.obj scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+
+ssearch36nosse2_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scale_se.obj karlin.obj $(DROPGSW_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+	$(CL) /Fe..\bin\ssearch36nosse2_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj $(DROPGSW_O) scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj $(THR_LIBS)
+
+ssearch36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scale_se.obj karlin.obj $(DROPGSW_SSE2_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+	$(CL) /Fe..\bin\ssearch36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj $(DROPGSW_SSE2_O) scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj $(THR_LIBS)
+
+osearch36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scale_se.obj karlin.obj dropnsw.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\osearch36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj dropnsw.obj scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+
+usearch36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scale_se.obj karlin.obj dropnsw.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\usearch36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj dropnsw.obj scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+
+ggsearch36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_gnw.obj scale_sn.obj karlin.obj $(DROPGNW_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+	$(CL) /Fe..\bin\ggsearch36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_gnw.obj $(DROPGNW_O) scale_sn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj $(THR_LIBS)
+
+glsearch36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_lnw.obj scale_sn.obj karlin.obj $(DROPLNW_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+	$(CL) /Fe..\bin\glearch36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_lnw.obj $(DROPLNW_O) scale_sn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj $(THR_LIBS)
+
+fasta36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scale_se.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\fasta36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+
+fasta36s_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) showsum.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scale_se.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\fasta36s_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) showsum.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+
+fasta36u_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) showun.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scale_se.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\fasta36u_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) showun.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+
+fasta36r_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) showrel.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scale_se.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\fasta36r_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) showrel.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+
+fastf36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj $(DROPFF_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\fastf36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj $(DROPFF_O) scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+
+fastf36s_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswtf.obj karlin.obj $(DROPFF_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\fastf36s_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj $(DROPFF_O) scaleswtf.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+
+fasts36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj $(DROPFS_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\fasts36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj $(DROPFS_O) scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+
+fastm36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj scaleswts.obj last_tat.obj tatstats_fm.obj karlin.obj $(DROPFM_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\fastm36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj $(DROPFM_O) scaleswts.obj last_tat.obj tatstats_fm.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+
+fastx36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_fx.obj faatran.obj scale_se.obj karlin.obj drop_fx.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\fastx36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fx.obj drop_fx.obj faatran.obj scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+
+fasty36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_fy.obj faatran.obj scale_se.obj karlin.obj drop_fz.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\fasty36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fy.obj drop_fz.obj faatran.obj scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+
+tfasta36.exe : $(COMP_LIBO) $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfa.obj scale_se.obj karlin.obj $(DROPTFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\tfasta36.exe $(COMP_LIBO) $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfa.obj $(DROPTFA_O) scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+
+tfasta36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_tfa.obj scale_se.obj karlin.obj $(DROPTFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\tfasta36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfa.obj $(DROPTFA_O) scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+
+tfastf36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_tf.obj  scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj $(DROPTFF_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\tfastf36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj $(DROPTFF_O) scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+
+tfasts36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_tfs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj $(DROPTFS_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+	$(CL) /Fe..\bin\tfasts36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfs.obj $(DROPTFS_O) scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+
+tfastx36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfx.obj scale_se.obj karlin.obj drop_tfx.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\tfastx36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfx.obj drop_tfx.obj scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+
+tfasty36_t.exe : $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfy.obj scale_se.obj karlin.obj drop_tfz.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+	$(CL) /Fe..\bin\tfasty36_t.exe $(COMP_THRO) work_thr2.obj $(THR_SUBS).obj $(COMPACC_TO) $(SHOWBESTO) re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfy.obj drop_tfz.obj scale_se.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+
+comp_lib9.obj : comp_lib9.c mw.h structs.h defs.h param.h
+	$(CC) $(CFLAGS) -c comp_lib9.c
+
+comp_mlib9.obj : comp_lib9.c mw.h structs.h defs.h param.h
+	$(CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib9.c /Focomp_mlib9.obj
+
+comp_mthr9.obj : comp_lib9.c mw.h structs.h defs.h param.h thr_bufs2.h
+	$(CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib9.c /Focomp_mthr9.obj
+
+comp_lib4.obj : comp_lib4.c mw.h structs.h defs.h param.h
+	$(CC) $(CFLAGS) -c comp_lib4.c
+
+comp_mlib4.obj : comp_lib4.c mw.h structs.h defs.h param.h
+	$(CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib4.c /Focomp_mlib4.obj
+
+comp_thr2.obj : comp_lib4.c mw.h structs.h defs.h param.h thr_bufs2.h
+	$(CC) $(CFLAGS) -DCOMP_THR -c comp_lib4.c /Focomp_thr2.obj
+
+comp_mthr4.obj : comp_lib4.c mw.h structs.h defs.h param.h thr_bufs2.h
+	$(CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib4.c /Focomp_mthr4.obj
+
+work_thr2.obj : work_thr2.c mw.h structs.h defs.h param.h thr_bufs2.h
+	$(CC) $(CFLAGS) -c work_thr2.c
+
+print_pssm.exe : print_pssm.c getseq.c karlin.c apam.c
+	$(CC) /Fe..\bin\print_pssm.exe $(CFLAGS) print_pssm.c getseq.c karlin.c apam.c getopt.obj
+
+map_db.exe : map_db.c  uascii.h ncbl2_head.h
+	$(CC) /Fe..\bin\map_db.exe map_db.c
+
+list_db.exe : list_db.c
+	$(CC) /Fe..\bin\list_db.exe list_db.c
+
diff --git a/make/Makefile.nmk_icl b/make/Makefile.nmk_icl
new file mode 100755
index 0000000..35a706c
--- /dev/null
+++ b/make/Makefile.nmk_icl
@@ -0,0 +1,35 @@
+#
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+#
+# options for Intel C compiler (v9.1) 
+#
+# must be compiled/linked with /MT (or /MTd for debugging) to ensure
+# multi-threaded staticly linked executables.  /MD uses dynamic
+# linking to DLL's, which may not be available on the users machine
+
+CC= icl /O2 /MT /W1
+#CC= icl /Zi /MTd /W1
+CL= icl /O2 /MT
+#CL= icl /Zi /MTd
+
+# standard options
+CFLAGS= -DSHOW_HELP -DSHOWSIM -DWIN32 -DHZ=100  -DSAMP_STATS -DPGM_DOC -DTHR_EXIT=pthread_exit -D_CRT_SECURE_NO_WARNINGS=1 -DMAX_WORKERS=2
+
+BIN = ../bin
+XDIR = /seqprg/bin
+
+THR_SUBS = pthr_subs2
+THR_LIBS= pthreadVC2.lib
+
+DROPGSW_SSE2_O = dropgsw2_sse2.obj smith_waterman_sse2.obj wm_align.obj calcons_sw.obj
+DROPGSW_O = dropgsw2.obj wm_align.obj calcons_sw.obj
+DROPLAL_SSE2_O = droplal_sse2.obj smith_waterman_sse2.obj lsim4.obj calcons_la.obj
+DROPLAL_O = droplal.obj lsim4.obj calcons_la.obj
+
+# 
+
+# renamed (fasta36)  programs
+include ../make/Makefile36.nmk_com
+# conventional (fasta3) names
+# include ../make/Makefile.common
+
diff --git a/make/Makefile.os_x b/make/Makefile.os_x
new file mode 100644
index 0000000..f68f4e1
--- /dev/null
+++ b/make/Makefile.os_x
@@ -0,0 +1,69 @@
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+# this file works for Mac OS X (PPC)
+#
+# this file supports mmap()'ed databases in BLAST2 format use -DUSE_MMAP
+# for mmap()ed BLAST2 format.
+
+# the -DDEBUG option provides additional debugging information, particularly
+# with -D on the command line.
+
+# use -DBIG_LIB64 to generate 64-bit offsets in map_db .xin files
+
+SHELL=/bin/bash
+
+# in my hands, gcc-4.0 is about 40% slower than gcc-3.3 on the Altivec code
+CC= gcc -g -O3 -arch ppc -falign-loops=32 -O3 -maltivec -mpim-altivec -force_cpusubtype_ALL 
+# -pg -finstrument-functions  -lSaturn
+
+#CC= gcc-3.3 -g -falign-loops=32 -O3 -mcpu=7450 -faltivec
+#CC= gcc-3.3 -g -DDEBUG -mcpu=7450 -faltivec
+#CC= cc -g -Wall -pedantic -faltivec
+#
+# standard line for normal searching
+CFLAGS= -DSHOW_HELP -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit  -DFASTA_HOST='"fasta.bioch.virginia.edu/fasta_www2"' -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DBIG_LIB64 -DMAX_MEMK=2*1024*1024
+
+# -DSUPERFAMNUM -DSFCHAR="'|'"
+
+# add for MySQL support
+# -I/usr/local/mysql/include -DMYSQL_DB
+
+HFLAGS= -o
+NFLAGS= -o
+
+THR_SUBS = pthr_subs2
+THR_LIBS =
+THR_CC =
+
+# for IBM with current pthreads
+#CC= xlc_r -v -g
+#THR_SUBS = ibm_pthr_subs2
+#THR_LIBS = -lpthreads
+#THR_CC =
+
+
+BIN = ../bin
+# diectory for universal binary process
+UDIR = $(BIN)/ppc
+
+#XDIR = ${HOME}/bin
+#XDIR = /home/slib/bin/MACOSX/
+#XDIR = /Users/seqprg/bin
+XDIR = /seqprg/bin
+#XDIR = ./ppc
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# Altivec acceleration
+#
+DROPGSW_O = $(DROPGSW_ALT_O)
+DROPLAL_O = $(DROPLAL_ALT_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+# provide mysql function
+#include ../make/Makefile36m.common_sql
+
+# no mysql
+include ../make/Makefile36m.common
diff --git a/make/Makefile.os_x86 b/make/Makefile.os_x86
new file mode 100644
index 0000000..afe2e1b
--- /dev/null
+++ b/make/Makefile.os_x86
@@ -0,0 +1,61 @@
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+#
+#  $Name:  $ - $Id: Makefile.os_x86 750 2011-05-19 17:07:40Z wrp $
+#
+# 12-Dec-2007 - modified to allow compilation of both accelerated and
+# non-accelerated Smith-Waterman
+
+# the -DDEBUG option provides additional debugging information, particularly
+# with -D on the command line.
+
+# use -DBIG_LIB64 to generate 64-bit offsets in map_db .xin files
+
+SHELL=/bin/bash
+
+CC= gcc -g -O3 -arch i386 -msse2
+#CC= gcc -g -DDEBUG -arch i386 -msse2
+
+#CC= cc -g -Wall -pedantic
+#
+# standard line for normal searching
+CFLAGS= -DSHOW_HELP -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit  -DFASTA_HOST='"your.fasta.host"' -DIS_LITTLE_ENDIAN -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DBIG_LIB64 -DMAX_MEMK=2*1024*1024
+# -I/usr/local/mysql/include -DMYSQL_DB  # add for MySQL support
+
+#CFLAGS= -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your.fasta.host"' -DIS_LITTLE_ENDIAN -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DSUPERFAMNUM -DSFCHAR="'|'"
+
+LDFLAGS= -arch i386
+
+HFLAGS= -o
+NFLAGS= -o
+
+#for Linux, MacOS, DEC Unix V4.0
+THR_SUBS = pthr_subs2
+THR_LIBS =
+THR_CC =
+
+BIN = ../bin
+#XDIR = ${HOME}/bin
+#XDIR = /home/slib/bin/MACOSX/
+#XDIR = /Users/seqprg/bin
+XDIR = /seqprg/bin
+#XDIR = ./i386
+
+# diectory for universal binary process
+UDIR = $(BIN)/i386
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# Altivec acceleration
+#
+DROPGSW_O = $(DROPGSW_SSE_O)
+DROPLAL_O = $(DROPLAL_SSE_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+# provide mysql function
+#include ../make/Makefile36m.common_mysql
+
+# no mysql
+include ../make/Makefile36m.common
diff --git a/make/Makefile.os_x86_64 b/make/Makefile.os_x86_64
new file mode 100644
index 0000000..8d3f7d3
--- /dev/null
+++ b/make/Makefile.os_x86_64
@@ -0,0 +1,61 @@
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+#
+#  $Name:  $ - $Id: Makefile.os_x86_64 1228 2013-09-26 19:46:29Z wrp $
+#
+# 12-Dec-2007 - modified to allow compilation of both accelerated and
+# non-accelerated Smith-Waterman
+
+# the -DDEBUG option provides additional debugging information, particularly
+# with -D on the command line.
+
+# use -DBIG_LIB64 to generate 64-bit offsets in map_db .xin files
+
+SHELL=/bin/bash
+
+CC= cc -O -g -arch x86_64 -msse2
+#CC= cc -g -DDEBUG -fsanitize=address -arch x86_64 -msse2
+
+#CC= cc -g -Wall -pedantic
+#
+# standard line for normal searching
+CFLAGS= -DSHOW_HELP -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit  -DM10_CONS -DFASTA_HOST='"your.fasta.host.here"' -DIS_LITTLE_ENDIAN -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DBIG_LIB64 -DLIB_MEM=12
+
+#-DSHOW_ALIGN_SCORE
+# -I/usr/include/mysql -DMYSQL_DB  # add for MySQL support
+
+LDFLAGS= -arch x86_64
+
+HFLAGS= -o
+NFLAGS= -o
+
+#for Linux, MacOS, DEC Unix V4.0
+THR_SUBS = pthr_subs2
+THR_LIBS =
+THR_CC =
+
+BIN = ../bin
+#XDIR = ${HOME}/bin
+#XDIR = /home/slib/bin/MACOSX/
+#XDIR = /Users/seqprg/bin
+XDIR = /seqprg/bin
+#XDIR = ./x86_64
+
+# diectory for universal binary process
+UDIR = $(BIN)/x86_64
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# Altivec acceleration
+#
+DROPGSW_O = $(DROPGSW_SSE_O)
+DROPLAL_O = $(DROPLAL_SSE_O)
+DROPGNW_O = $(DROPGNW_SSE_O)
+DROPLNW_O = $(DROPLNW_SSE_O)
+
+# provide mysql function
+#include ../make/Makefile36m.common_mysql
+
+# no mysql
+include ../make/Makefile36m.common
diff --git a/make/Makefile.os_x86_clang b/make/Makefile.os_x86_clang
new file mode 100644
index 0000000..83397a8
--- /dev/null
+++ b/make/Makefile.os_x86_clang
@@ -0,0 +1,59 @@
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+#
+#  $Name:  $ - $Id: Makefile.os_x86_clang 581 2011-02-28 03:45:26Z wrp $
+#
+# 12-Dec-2007 - modified to allow compilation of both accelerated and
+# non-accelerated Smith-Waterman
+
+# the -DDEBUG option provides additional debugging information, particularly
+# with -D on the command line.
+
+# use -DBIG_LIB64 to generate 64-bit offsets in map_db .xin files
+
+SHELL=/bin/bash
+
+CC= clang -g -O -arch x86_64 -msse2
+#CC= clang -g -DDEBUG -arch x86_64 -msse2
+
+#CC= cc -g -Wall -pedantic
+#
+# standard line for normal searching
+CFLAGS= -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit  -DFASTA_HOST='"xs00.achs.virginia.edu/fasta_www/cgi"' -DIS_LITTLE_ENDIAN -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DBIG_LIB64
+# -I/usr/local/mysql/include -DMYSQL_DB  # add for MySQL support
+
+#CFLAGS= -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"xs00.achs.virginia.edu/fasta_www/cgi"' -DIS_LITTLE_ENDIAN -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DSUPERFAMNUM -DSFCHAR="'|'"
+
+LDFLAGS= -arch x86_64
+
+HFLAGS= -o
+NFLAGS= -o
+
+#for Linux, MacOS, DEC Unix V4.0
+THR_SUBS = pthr_subs2
+THR_LIBS =
+THR_CC =
+
+BIN = ../bin
+#XDIR = ${HOME}/bin
+#XDIR = /home/slib/bin/MACOSX/
+#XDIR = /Users/seqprg/bin
+XDIR = /seqprg/bin
+#XDIR = ./x86_64
+
+# diectory for universal binary process
+UDIR = $(BIN)/x86_64
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# Altivec acceleration
+#
+DROPGSW_O = $(DROPGSW_SSE_O)
+DROPLAL_O = $(DROPLAL_SSE_O)
+
+# provide mysql function
+#include ../make/Makefile36m.common_mysql
+
+# no mysql
+include ../make/Makefile36m.common
diff --git a/make/Makefile.os_x86_icc b/make/Makefile.os_x86_icc
new file mode 100644
index 0000000..290093f
--- /dev/null
+++ b/make/Makefile.os_x86_icc
@@ -0,0 +1,61 @@
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+#
+#  $Name:  $ - $Id: Makefile.os_x86_icc 488 2011-01-21 17:38:53Z wrp $
+#
+# 12-Dec-2007 - modified to allow compilation of both accelerated and
+# non-accelerated Smith-Waterman
+
+# the -DDEBUG option provides additional debugging information, particularly
+# with -D on the command line.
+
+# use -DBIG_LIB64 to generate 64-bit offsets in map_db .xin files
+
+SHELL=/bin/bash
+
+CC= icc -g -O -m64	# intel icc compiler
+#CC= icc -g -DDEBUG -m64
+
+#CC= cc -g -Wall -pedantic
+#
+# standard line for normal searching
+CFLAGS= -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit  -DFASTA_HOST='"xs00.achs.virginia.edu/fasta_www/cgi"' -DIS_LITTLE_ENDIAN -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DBIG_LIB64
+# -I/usr/local/mysql/include -DMYSQL_DB  # add for MySQL support
+
+#CFLAGS= -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"xs00.achs.virginia.edu/fasta_www/cgi"' -DIS_LITTLE_ENDIAN -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DSUPERFAMNUM -DSFCHAR="'|'"
+
+LDFLAGS= -arch x86_64
+
+HFLAGS= -o
+NFLAGS= -o
+
+#for Linux, MacOS, DEC Unix V4.0
+THR_SUBS = pthr_subs2
+THR_LIBS =
+THR_CC =
+
+BIN = ../bin
+#XDIR = ${HOME}/bin
+#XDIR = /home/slib/bin/MACOSX/
+#XDIR = /Users/seqprg/bin
+XDIR = /seqprg/bin
+#XDIR = ./x86_64
+
+# diectory for universal binary process
+UDIR = $(BIN)/x86_64
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# Altivec acceleration
+#
+DROPGSW_O = $(DROPGSW_SSE_O)
+DROPLAL_O = $(DROPLAL_SSE_O)
+DROPGNW_O = $(DROPGNW_SSE_O)
+DROPLNW_O = $(DROPLNW_SSE_O)
+
+# provide mysql function
+#include ../make/Makefile36m.common_mysql
+
+# no mysql
+include ../make/Makefile36m.common
diff --git a/make/Makefile.pLinux b/make/Makefile.pLinux
new file mode 100644
index 0000000..bed1ec0
--- /dev/null
+++ b/make/Makefile.pLinux
@@ -0,0 +1,83 @@
+# $Name:  $ - $Id: Makefile.pLinux 488 2011-01-21 17:38:53Z wrp $
+#
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+# this file works for DEC Alphas
+#
+# this file supports mmap()'ed databases in BLAST2 format use -DUSE_MMAP
+# for mmap()ed BLAST2 format.
+
+# the -DDEBUG option provides additional debugging information, particularly
+# with -D on the command line.
+
+# use -DBIG_LIB64 to generate and use 64-bit offsets in map_db .xin
+# files
+
+# for Tru64 4.0F, no "<inttypes.h>" 4.0G has inttypes.h
+
+CC= xlc_r
+
+#CC= cc -g3 -O -std1
+#CC= insure -g -DDEBUG
+#CC= cc -g -DDEBUG -std1
+
+#CC= gcc -g -Wall
+#
+# standard line for normal searching
+CFLAGS= -O3 -qtune=auto -qarch=auto -DUNIX -DTIMES -DBIGMEM -DMAX_WORKERS=4 -DSFCHAR="':'" -DTHR_EXIT=pthread_exit  -DUSE_MMAP -DIS_BIG_ENDIAN -DSAMP_STATS -DPGM_DOC -D_LARGE_FILES -DHAS_INTTYPES -D__pLinux__ -DBIG_LIB64
+#
+#(-DMYSQL_DB for mySQL databases)  (also requires change to Makefile35.common)
+
+# special options for SUPERFAMLIES
+#CFLAGS= -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DBIGMEM -DSFCHAR="'|'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DIS_LITTLE_ENDIAN -DUSE_MMAP -DMAXBEST=200000
+ 
+LIB_M = -lm
+#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+
+HFLAGS= -o
+NFLAGS= -o
+
+#for DEC Unix V4.0 
+#THR_SUBS = pthr_subs2
+#THR_LIBS = -lpthreads
+#THR_CC =
+
+#for Sun
+#THR_SUBS = uthr_subs
+#THR_LIBS = -lthread
+#THR_CC =
+#
+# for SGI with current pthreads
+#THR_SUBS = pthr_subs
+#THR_LIBS = -lpthreads
+#THR_CC = 
+#
+# for IBM with current pthreads
+#CC= xlc_r -v -g
+#THR_SUBS = ibm_pthr_subs
+#THR_LIBS = -lpthreads
+#THR_CC =
+
+
+# for IBM Linux with current pthreads
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+
+BIN = ../bin
+XDIR = /seqprg/slib/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_NA_O)
+DROPLAL_O = $(DROPLAL_NA_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+# renamed (fasta34)  programs
+#include ../make/Makefile36m.common_sql
+include ../make/Makefile36m.common
+
diff --git a/make/Makefile.pLinux_sql b/make/Makefile.pLinux_sql
new file mode 100644
index 0000000..04ea521
--- /dev/null
+++ b/make/Makefile.pLinux_sql
@@ -0,0 +1,81 @@
+# $Name:  $ - $Id: Makefile.pLinux_sql 488 2011-01-21 17:38:53Z wrp $
+#
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+# this file works for DEC Alphas
+#
+# this file supports mmap()'ed databases in BLAST2 format use -DUSE_MMAP
+# for mmap()ed BLAST2 format.
+
+# the -DDEBUG option provides additional debugging information, particularly
+# with -D on the command line.
+
+# use -DBIG_LIB64 to generate and use 64-bit offsets in map_db .xin
+# files
+
+# for Tru64 4.0F, no "<inttypes.h>" 4.0G has inttypes.h
+
+CC= xlc_r
+
+#CC= cc -g3 -O -std1
+#CC= insure -g -DDEBUG
+#CC= cc -g -DDEBUG -std1
+
+#CC= gcc -g -Wall
+#
+
+CFLAGS= -O3 -qtune=auto -qarch=auto -DUNIX -DTIMES -DBIGMEM -DMAX_WORKERS=4 -DSFCHAR="':'" -DTHR_EXIT=pthread_exit  -DUSE_MMAP -DIS_BIG_ENDIAN -DSAMP_STATS -DPGM_DOC -D_LARGE_FILES -DHAS_INTTYPES -D__pLinux__ -DFASTA_HOST='"fasta.bioch.virginia.edu/fasta/cgi"' -I/usr/include/mysql -DMYSQL_DB   
+#
+#(-DMYSQL_DB for mySQL databases)  (also requires change to Makefile35.common)
+
+# special options for SUPERFAMLIES
+#CFLAGS= -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DBIGMEM -DSFCHAR="'|'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DIS_LITTLE_ENDIAN -DUSE_MMAP -DMAXBEST=200000
+ 
+#LIB_M = -lm
+LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+
+HFLAGS= -o
+NFLAGS= -o
+
+#for DEC Unix V4.0 
+#THR_SUBS = pthr_subs2
+#THR_LIBS = -threads
+#THR_CC =
+
+#for Sun
+#THR_SUBS = uthr_subs
+#THR_LIBS = -lthread
+#THR_CC =
+#
+# for SGI with current pthreads
+#THR_SUBS = pthr_subs
+#THR_LIBS = -lpthreads
+#THR_CC = 
+#
+# for IBM with current pthreads
+#CC= xlc_r -v -g
+#THR_SUBS = ibm_pthr_subs
+#THR_LIBS = -lpthreads
+#THR_CC =
+
+# for IBM Linux with current pthreads
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+
+BIN = ../bin
+XDIR = /seqprg/slib/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_NA_O)
+DROPLAL_O = $(DROPLAL_NA_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+# renamed (fasta34)  programs
+include ../make/Makefile36m.common_sql
+
diff --git a/make/Makefile.pcom b/make/Makefile.pcom
new file mode 100644
index 0000000..ee00810
--- /dev/null
+++ b/make/Makefile.pcom
@@ -0,0 +1,229 @@
+
+# combinations of files for "composite" drop* functions
+#
+DROPNSW_O = dropnsw.o  lwm_align.o calcons_sw.o
+DROPNFA_O = drop_nfa.o lwm_align.o calcons_fa.o
+DROPGGL_O = drop_nfa.o gwm_align.o calcons_fa.o
+DROPBD_O = dropsbd.o wm_align.o calcons_fa.o
+DROPTFA_O = drop_tfa.o wm_align.o calcons_tfa.o
+DROPFF_O = drop_ff2.o calcons_ff.o
+DROPFS_O = drop_fs2.o calcons_fs.o
+DROPFM_O = drop_fm.o calcons_fm.o
+DROPTFF_O = drop_tff.o calcons_tff.o
+DROPTFS_O = drop_tfs.o calcons_tfs.o
+DROPTFM_O = drop_tfm.o calcons_tfm.o
+
+#COMPACC_TO = compacc.o	# used with comp_lib5.c/comp_lib7.c
+#COMPACC_SO = compacc.o
+COMPACC_TO = compacc2_t.o  # used with comp_lib5e.c/comp_lib7e.c/comp_lib8.c
+COMPACC_SO = compacc2_s.o
+
+SHOWBESTC = mshowbest.c
+SHOWBESTO = showbest.o build_ares.o
+SHOWALIGN = mshowalign2
+SHOWALIGN_T = mshowalign2_t
+SHOWALIGN_S = mshowalign2_s
+LSHOWALIGN = lshowalign
+MWH = mw.h 
+MWHP = mw.h
+
+TPROGS = ssearch36_t fasta36_t  fasts36_t fastx36_t tfastx36_t fasty36_t tfasty36_t tfasts36_t fastm36_t fastf36_t tfastf36_t glsearch36_t ggsearch36_t
+
+SPROGS = fasta36 ssearch36 lalign36 fasts36 fastx36 tfastx36 fasty36 tfasty36 tfasts36 fastm36 tfastm36 fastf36 tfastf36 glsearch36 ggsearch36
+
+APROGS = map_db
+
+XTPROGS = fastx36_t tfastx36_t fasty36_t tfasty36_t
+XPROGS = fastx36 tfastx36  fasty36 tfasty36
+
+PROGS = $(SPROGS) $(TPROGS) $(APROGS)
+
+all: $(PROGS)
+
+tall: $(TPROGS)
+
+sall: $(SPROGS)
+
+xall: $(XTPROGS) $(XPROGS) $(ZTPROGS) $(ZPROGS) 
+
+clean-up:
+	rm -f *.o $(PROGS) $(APROGS); 	rm -rf $(BIN)/*
+
+install: $(PROGS)
+	pushd $(BIN); cp $(PROGS)  $(XDIR); popd
+
+uinstall: $(PROGS)
+	pushd $(BIN); cp $(PROGS)  $(UDIR); popd
+
+sinstall: $(SPROGS)
+	pushd $(BIN); cp $(SPROGS) $(XDIR); popd
+
+tinstall: $(TPROGS)
+	pushd $(BIN); cp $(TPROGS) $(XDIR); popd
+
+fasta36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fa.o scale_se.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fasta36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M)
+
+fastx36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fx.o scale_se.o karlin.o drop_fx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fastx36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fx.o drop_fx.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M)
+
+fasty36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fy.o scale_se.o karlin.o drop_fz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fasty36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fy.o drop_fz.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M)
+
+fastf36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_ff.o scaleswts.o last_tat.o tatstats_ff.o karlin.o $(DROPFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fastf36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_ff.o $(DROPFF_O) scaleswts.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M)
+
+fasts36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(DROPFS_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fasts36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fs.o $(DROPFS_O) scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M)
+
+fastm36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fm.o scaleswts.o last_tat.o tatstats_fm.o karlin.o $(DROPFM_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fastm36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fm.o $(DROPFM_O) scaleswts.o last_tat.o tatstats_fm.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M)
+
+tfastx36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfx.o scale_se.o karlin.o drop_tfx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfastx36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfx.o drop_tfx.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M)
+
+tfasty36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfy.o scale_se.o karlin.o drop_tfz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfasty36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfy.o drop_tfz.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M)
+
+tfastf36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tf.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(DROPTFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfastf36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tf.o $(DROPTFF_O) scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M)
+
+tfastf36s : $(COMP_LIBO) $(COMPACC_SO) showsum.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tf.o scaleswtf.o karlin.o $(DROPTFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfastf36s $(COMP_LIBO) $(COMPACC_SO) showsum.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tf.o $(DROPTFF_O) scaleswtf.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M)
+
+tfasts36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o $(DROPTFS_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfasts36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfs.o $(DROPTFS_O) scaleswts.o tatstats_fs.o last_tat.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M)
+
+tfastm36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfm.o scaleswts.o tatstats_fm.o last_tat.o karlin.o $(DROPTFM_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfastm36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfm.o $(DROPTFM_O) scaleswts.o tatstats_fm.o last_tat.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M)
+
+ssearch36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o scale_se.o karlin.o $(DROPGSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ssearch36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o $(DROPGSW_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M)
+
+# do not use accelerated Smith-Waterman
+ssearch36s : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o scale_se.o karlin.o $(DROPGSW_NA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ssearch36s $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o $(DROPGSW_NA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M)
+
+lalign36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(LSHOWALIGN).o htime.o apam.o doinit.o init_lal.o scale_se.o karlin.o last_thresh.o $(DROPLAL_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/lalign36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(LSHOWALIGN).o htime.o apam.o doinit.o init_lal.o $(DROPLAL_O) scale_se.o karlin.o last_thresh.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M)
+
+osearch36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_ssw.o scale_se.o karlin.o $(DROPNSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/osearch36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_ssw.o $(DROPNSW_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M)
+
+glsearch36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o scale_sn.o karlin.o $(DROPLNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/glsearch36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o $(DROPLNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M)
+
+ggsearch36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o scale_sn.o karlin.o $(DROPGNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ggsearch36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o $(DROPGNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M)
+
+prss36 : ssearch36
+	ln -sf ssearch36 prss36
+
+ssearch36_t : $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o scale_se.o karlin.o $(DROPGSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ssearch36_t $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o $(DROPGSW_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+ssearch36s_t : $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o scale_se.o karlin.o $(DROPGSW_NA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ssearch36s_t $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o $(DROPGSW_NA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+glsearch36_t : $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o scale_sn.o karlin.o $(DROPLNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/glsearch36_t $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o $(DROPLNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+glsearch36s_t : $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) showsum.o re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o scale_sn.o karlin.o $(DROPLNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/glsearch36s_t $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) showsum.o re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o $(DROPLNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+ggsearch36_t : $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o scale_sn.o karlin.o $(DROPGNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ggsearch36_t $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o $(DROPGNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+ggsearch36s_t : $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) showsum.o re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o scale_sn.o karlin.o $(DROPGNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ggsearch36s_t $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) showsum.o re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o $(DROPGNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+fasta36_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o scale_se.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fasta36_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+fasta36sum_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showsum.o re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o scale_se.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fasta36sum_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showsum.o re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+fasta36u_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showun.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o scale_se.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fasta36u_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showun.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fasta36r_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showrel.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o scale_se.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fasta36r_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showrel.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fastf36_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_ff.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(DROPFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fastf36_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_ff.o $(DROPFF_O) scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fastf36s_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showsum.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_ff.o scaleswtf.o karlin.o $(DROPFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fastf36s_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showsum.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_ff.o $(DROPFF_O) scaleswtf.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fasts36_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(DROPFS_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fasts36_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fs.o $(DROPFS_O) scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fastm36_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fm.o scaleswts.o last_tat.o tatstats_fm.o karlin.o $(DROPFM_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fastm36_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fm.o $(DROPFM_O) scaleswts.o last_tat.o tatstats_fm.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fastx36_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o c_dispn.o htime.o apam.o doinit.o init_fx.o faatran.o scale_se.o karlin.o drop_fx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fastx36_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fx.o drop_fx.o faatran.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+fasty36_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o c_dispn.o htime.o apam.o doinit.o init_fy.o faatran.o scale_se.o karlin.o drop_fz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fasty36_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fy.o drop_fz.o faatran.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+tfasta36 : $(COMP_LIBO) compacc.o $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfa.o scale_se.o karlin.o $(DROPTFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfasta36 $(COMP_LIBO) compacc.o $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfa.o $(DROPTFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M)
+
+tfasta36_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o c_dispn.o htime.o apam.o doinit.o init_tfa.o scale_se.o karlin.o $(DROPTFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfasta36_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfa.o $(DROPTFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+tfastf36_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o c_dispn.o htime.o apam.o doinit.o init_tf.o  scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(DROPTFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfastf36_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tf.o $(DROPTFF_O) scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+tfasts36_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o c_dispn.o htime.o apam.o doinit.o init_tfs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(DROPTFS_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfasts36_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfs.o $(DROPTFS_O) scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+tfastx36_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfx.o scale_se.o karlin.o drop_tfx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfastx36_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfx.o drop_tfx.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+tfasty36_t : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfy.o scale_se.o karlin.o drop_tfz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfasty36_t $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfy.o drop_tfz.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+comp_mlib5e.o : comp_lib5e.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib5e.c -o comp_mlib5e.o
+
+comp_mthr5e.o : comp_lib5e.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib5e.c -o comp_mthr5e.o
+
+comp_mlib7e.o : comp_lib7e.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib7e.c -o comp_mlib7e.o
+
+comp_mthr7e.o : comp_lib7e.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib7e.c -o comp_mthr7e.o
+
+comp_mlib8.o : comp_lib8.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib8.c -o comp_mlib8.o
+
+comp_mthr8.o : comp_lib8.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib8.c -o comp_mthr8.o
+
+comp_mlib9.o : comp_lib9.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib9.c -o comp_mlib9.o
+
+comp_mthr9.o : comp_lib9.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib9.c -o comp_mthr9.o
+
+work_thr2.o : work_thr2.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c work_thr2.c
+
+print_pssm : print_pssm.c getseq.c karlin.c apam.cn pssm_asn_subs.c
+	$(CC) -o print_pssm $(CFLAGS) print_pssm.c getseq.c karlin.c apam.c pssm_asn_subs.c $(LIB_M)
+
+map_db : map_db.c uascii.h ncbl2_head.h
+	$(CC) $(CFLAGS) -o $(BIN)/map_db map_db.c
+
+list_db : list_db.c
+	$(CC) $(CFLAGS) -o $(BIN)/list_db list_db.c
+
+
+lav2ps : lav2plt.o lavplt_ps.o
+	$(CC) -DUNIX -o $(BIN)/lav2ps lav2plt.o lavplt_ps.o -lm
+
+lav2svg : lav2plt.o lavplt_svg.o
+	$(CC) -DUNIX -o $(BIN)/lav2svg  lav2plt.o lavplt_svg.o -lm
diff --git a/make/Makefile.pcom_s b/make/Makefile.pcom_s
new file mode 100644
index 0000000..0cad72b
--- /dev/null
+++ b/make/Makefile.pcom_s
@@ -0,0 +1,162 @@
+
+# combinations of files for "composite" drop* functions
+#
+DROPNSW_O = dropnsw.o  wm_align.o calcons_sw.o
+DROPNFA_O = drop_nfa.o wm_align.o calcons_fa.o
+DROPBD_O = dropsbd.o wm_align.o calcons_fa.o
+DROPTFA_O = drop_tfa.o wm_align.o calcons_tfa.o
+DROPFF_O = drop_ff2.o calcons_ff.o
+DROPFS_O = drop_fs2.o calcons_fs.o
+DROPFM_O = drop_fm.o calcons_fm.o
+DROPTFF_O = drop_tff.o calcons_tff.o
+DROPTFS_O = drop_tfs.o calcons_tfs.o
+DROPTFM_O = drop_tfm.o calcons_tfm.o
+
+#COMPACC_TO = compacc.o	# used with comp_lib5.c/comp_lib7.c
+#COMPACC_SO = compacc.o
+COMPACC_TO = compacc2_t.o  # used with comp_lib5e.c/comp_lib7e.c/comp_lib8.c
+COMPACC_SO = compacc2_s.o
+
+SHOWBESTC = mshowbest.c
+SHOWBESTO = showbest.o build_ares.o
+SHOWALIGN = mshowalign2
+SHOWALIGN_T = mshowalign2_t
+SHOWALIGN_S = mshowalign2_s
+LSHOWALIGN = lshowalign
+MWH = mw.h 
+MWHP = mw.h
+
+SPROGS = fasta36 ssearch36 lalign36 fasts36 fastx36 tfastx36 fasty36 tfasty36 tfasts36 fastm36 tfastm36 fastf36 tfastf36 glsearch36 ggsearch36
+
+APROGS = map_db
+
+XPROGS = fastx36 tfastx36  fasty36 tfasty36
+
+PROGS = $(SPROGS) $(APROGS)
+
+all: $(PROGS)
+
+tall: $(TPROGS)
+
+sall: $(SPROGS)
+
+xall: $(XPROGS)
+
+clean-up:
+	rm -f *.o $(PROGS); 	rm -rf $(BIN)/*
+
+install: $(PROGS)
+	pushd $(BIN); cp $(PROGS)  $(XDIR); popd
+
+uinstall: $(PROGS)
+	pushd $(BIN); cp $(PROGS)  $(UDIR); popd
+
+sinstall: $(SPROGS)
+	pushd $(BIN); cp $(SPROGS) $(XDIR); popd
+
+tinstall: $(TPROGS)
+	pushd $(BIN); cp $(TPROGS) $(XDIR); popd
+
+fasta36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fa.o scale_se.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fasta36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M)
+
+fastx36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fx.o scale_se.o karlin.o drop_fx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fastx36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fx.o drop_fx.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M)
+
+fasty36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fy.o scale_se.o karlin.o drop_fz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fasty36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fy.o drop_fz.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M)
+
+fastf36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_ff.o scaleswts.o last_tat.o tatstats_ff.o karlin.o $(DROPFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fastf36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_ff.o $(DROPFF_O) scaleswts.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M)
+
+fasts36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(DROPFS_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fasts36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fs.o $(DROPFS_O) scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M)
+
+fastm36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fm.o scaleswts.o last_tat.o tatstats_fm.o karlin.o $(DROPFM_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fastm36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_fm.o $(DROPFM_O) scaleswts.o last_tat.o tatstats_fm.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M)
+
+tfastx36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfx.o scale_se.o karlin.o drop_tfx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfastx36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfx.o drop_tfx.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M)
+
+tfasty36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfy.o scale_se.o karlin.o drop_tfz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfasty36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfy.o drop_tfz.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M)
+
+tfasta36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfa.o scale_se.o karlin.o $(DROPTFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfasta36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfa.o $(DROPTFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M)
+
+tfastf36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tf.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(DROPTFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfastf36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tf.o $(DROPTFF_O) scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M)
+
+tfastf36s : $(COMP_LIBO) $(COMPACC_SO) showsum.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tf.o scaleswtf.o karlin.o $(DROPTFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfastf36s $(COMP_LIBO) $(COMPACC_SO) showsum.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tf.o $(DROPTFF_O) scaleswtf.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M)
+
+tfasts36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o $(DROPTFS_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfasts36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfs.o $(DROPTFS_O) scaleswts.o tatstats_fs.o last_tat.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M)
+
+tfastm36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfm.o scaleswts.o tatstats_fm.o last_tat.o karlin.o $(DROPTFM_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfastm36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfm.o $(DROPTFM_O) scaleswts.o tatstats_fm.o last_tat.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M)
+
+ssearch36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o scale_se.o karlin.o $(DROPGSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ssearch36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o $(DROPGSW_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M)
+
+# do not use accelerated Smith-Waterman
+ssearch36s : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o scale_se.o karlin.o $(DROPGSW_NA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ssearch36s $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o $(DROPGSW_NA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M)
+
+lalign36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(LSHOWALIGN).o htime.o apam.o doinit.o init_lal.o scale_se.o karlin.o last_thresh.o $(DROPLAL_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/lalign36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(LSHOWALIGN).o htime.o apam.o doinit.o init_lal.o $(DROPLAL_O) scale_se.o karlin.o last_thresh.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M)
+
+osearch36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_ssw.o scale_se.o karlin.o $(DROPNSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/osearch36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_ssw.o $(DROPNSW_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M)
+
+glsearch36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o scale_sn.o karlin.o $(DROPLNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/glsearch36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o $(DROPLNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M)
+
+ggsearch36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o scale_sn.o karlin.o $(DROPGNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ggsearch36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o $(DROPGNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M)
+
+prss36 : ssearch36
+	ln -sf ssearch36 prss36
+
+comp_mlib5e.o : comp_lib5e.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib5e.c -o comp_mlib5e.o
+
+comp_mthr5e.o : comp_lib5e.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib5e.c -o comp_mthr5e.o
+
+comp_mlib7e.o : comp_lib7e.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib7e.c -o comp_mlib7e.o
+
+comp_mthr7e.o : comp_lib7e.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib7e.c -o comp_mthr7e.o
+
+comp_mlib8.o : comp_lib8.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib8.c -o comp_mlib8.o
+
+comp_mthr8.o : comp_lib8.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib8.c -o comp_mthr8.o
+
+comp_mlib9.o : comp_lib9.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib9.c -o comp_mlib9.o
+
+comp_mthr9.o : comp_lib9.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib9.c -o comp_mthr9.o
+
+work_thr2.o : work_thr2.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c work_thr2.c
+
+print_pssm : print_pssm.c getseq.c karlin.c apam.c pssm_asn_subs.c
+	$(CC) -o print_pssm $(CFLAGS) print_pssm.c getseq.c karlin.c apam.c pssm_asn_subs.c $(LIB_M)
+
+map_db : map_db.c uascii.h ncbl2_head.h
+	$(CC) $(CFLAGS) -o $(BIN)/map_db map_db.c
+
+list_db : list_db.c
+	$(CC) $(CFLAGS) -o $(BIN)/list_db list_db.c
+
+
+lav2ps : lav2plt.o lavplt_ps.o
+	$(CC) -DUNIX -o $(BIN)/lav2ps lav2plt.o lavplt_ps.o -lm
+
+lav2svg : lav2plt.o lavplt_svg.o
+	$(CC) -DUNIX -o $(BIN)/lav2svg  lav2plt.o lavplt_svg.o -lm
diff --git a/make/Makefile.pcom_t b/make/Makefile.pcom_t
new file mode 100644
index 0000000..ead70a1
--- /dev/null
+++ b/make/Makefile.pcom_t
@@ -0,0 +1,184 @@
+
+# combinations of files for "composite" drop* functions
+#
+DROPNSW_O = dropnsw.o  wm_align.o calcons_sw.o
+DROPNFA_O = drop_nfa.o wm_align.o calcons_fa.o
+DROPBD_O = dropsbd.o wm_align.o calcons_fa.o
+DROPTFA_O = drop_tfa.o wm_align.o calcons_tfa.o
+DROPFF_O = drop_ff2.o calcons_ff.o
+DROPFS_O = drop_fs2.o calcons_fs.o
+DROPFM_O = drop_fm.o calcons_fm.o
+DROPTFF_O = drop_tff.o calcons_tff.o
+DROPTFS_O = drop_tfs.o calcons_tfs.o
+DROPTFM_O = drop_tfm.o calcons_tfm.o
+COMPACC_SO = compacc.o
+COMPACC_TO = compacc.o
+
+#COMPACC_TO = compacc.o	# used with comp_lib5.c/comp_lib7.c
+#COMPACC_SO = compacc.o
+COMPACC_TO = compacc2_t.o  # used with comp_lib5e.c/comp_lib7e.c/comp_lib8.c
+COMPACC_SO = compacc2_s.o
+
+SHOWBESTC = mshowbest.c
+SHOWBESTO = showbest.o build_ares.o
+SHOWALIGN = mshowalign2
+SHOWALIGN_T = mshowalign2_t
+SHOWALIGN_S = mshowalign2_s
+LSHOWALIGN = lshowalign
+MWH = mw.h 
+MWHP = mw.h
+
+TPROGS = fasta36 ssearch36 lalign36 fasts36 fastx36 tfastx36 fasty36 tfasty36 tfasts36 fastm36 tfastm36 fastf36 tfastf36 glsearch36 ggsearch36
+
+APROGS = map_db
+
+PROGS = $(SPROGS) $(TPROGS) $(APROGS)
+
+all: $(PROGS)
+
+tall: $(TPROGS)
+
+clean-up:
+	rm -f *.o $(PROGS); 	rm -rf $(BIN)/*
+
+install: $(PROGS)
+	pushd $(BIN); cp $(PROGS)  $(XDIR); popd
+
+uinstall: $(PROGS)
+	pushd $(BIN); cp $(PROGS)  $(UDIR); popd
+
+sinstall: $(SPROGS)
+	pushd $(BIN); cp $(SPROGS) $(XDIR); popd
+
+tinstall: $(TPROGS)
+	pushd $(BIN); cp $(TPROGS) $(XDIR); popd
+
+lalign36 : $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(LSHOWALIGN).o htime.o apam.o doinit.o init_lal.o scale_se.o karlin.o last_thresh.o $(DROPLAL_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/lalign36 $(COMP_LIBO) $(COMPACC_SO) $(SHOWBESTO) re_getlib.o $(LSHOWALIGN).o htime.o apam.o doinit.o init_lal.o $(DROPLAL_O) scale_se.o karlin.o last_thresh.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M)
+
+ssearch36 : $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o scale_se.o karlin.o $(DROPGSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ssearch36 $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o $(DROPGSW_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+ssearch36s : $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o scale_se.o karlin.o $(DROPGSW_NA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ssearch36s $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o $(DROPGSW_NA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+glsearch36 : $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o scale_sn.o karlin.o $(DROPLNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/glsearch36 $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o $(DROPLNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+glsearch36s : $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) showsum.o re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o scale_sn.o karlin.o $(DROPLNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/glsearch36s $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) showsum.o re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o $(DROPLNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+ggsearch36 : $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o scale_sn.o karlin.o $(DROPGNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ggsearch36 $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o $(DROPGNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+ggsearch36s : $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) showsum.o re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o scale_sn.o karlin.o $(DROPGNW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o
+	$(CC) $(HFLAGS) $(BIN)/ggsearch36s $(COMP_THRO) ${WORK_THRO}  $(THR_SUBS).o $(COMPACC_TO) showsum.o re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o $(DROPGNW_O) scale_sn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+
+fasta36 : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o scale_se.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fasta36 $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+fasta36sum : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showsum.o re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o scale_se.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fasta36sum $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showsum.o re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+fasta36u : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showun.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o scale_se.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fasta36u $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showun.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fasta36r : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showrel.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o scale_se.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fasta36r $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showrel.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fastf36 : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_ff.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(DROPFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fastf36 $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_ff.o $(DROPFF_O) scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fastf36s : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showsum.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_ff.o scaleswtf.o karlin.o $(DROPFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fastf36s $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) showsum.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_ff.o $(DROPFF_O) scaleswtf.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fasts36 : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(DROPFS_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fasts36 $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fs.o $(DROPFS_O) scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fastm36 : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fm.o scaleswts.o last_tat.o tatstats_fm.o karlin.o $(DROPFM_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/fastm36 $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fm.o $(DROPFM_O) scaleswts.o last_tat.o tatstats_fm.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+fastx36 : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o c_dispn.o htime.o apam.o doinit.o init_fx.o faatran.o scale_se.o karlin.o drop_fx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fastx36 $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fx.o drop_fx.o faatran.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+fasty36 : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o c_dispn.o htime.o apam.o doinit.o init_fy.o faatran.o scale_se.o karlin.o drop_fz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/fasty36 $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_fy.o drop_fz.o faatran.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+tfasta36 : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o c_dispn.o htime.o apam.o doinit.o init_tfa.o scale_se.o karlin.o $(DROPTFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfasta36 $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfa.o $(DROPTFA_O) scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+tfastf36 : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o c_dispn.o htime.o apam.o doinit.o init_tf.o  scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(DROPTFF_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfastf36 $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tf.o $(DROPTFF_O) scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+tfasts36 : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o c_dispn.o htime.o apam.o doinit.o init_tfs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(DROPTFS_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfasts36 $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfs.o $(DROPTFS_O) scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+tfastm36 : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfm.o scaleswts.o tatstats_fm.o last_tat.o karlin.o $(DROPTFM_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o
+	$(CC) $(HFLAGS) $(BIN)/tfastm36 $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_S).o htime.o apam.o doinit.o init_tfm.o $(DROPTFM_O) scaleswts.o tatstats_fm.o last_tat.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o mrandom.o url_subs.o $(LIB_M) $(THR_LIBS)
+
+tfastx36 : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfx.o scale_se.o karlin.o drop_tfx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfastx36 $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfx.o drop_tfx.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+tfasty36 : $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfy.o scale_se.o karlin.o drop_tfz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o
+	$(CC) $(HFLAGS) $(BIN)/tfasty36 $(COMP_THRO) $(WORK_THRO) $(THR_SUBS).o $(COMPACC_TO) $(SHOWBESTO) re_getlib.o $(SHOWALIGN_T).o htime.o apam.o doinit.o init_tfy.o drop_tfz.o scale_se.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o mrandom.o $(LIB_M) $(THR_LIBS)
+
+comp_mlib4.o : comp_lib4.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib4.c -o comp_mlib4.o
+
+comp_mthr4.o : comp_lib4.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib4.c -o comp_mthr4.o
+
+comp_mlib5.o : comp_lib5.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib5.c -o comp_mlib5.o
+
+comp_mthr5.o : comp_lib5.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib5.c -o comp_mthr5.o
+
+comp_mlib5e.o : comp_lib5e.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib5e.c -o comp_mlib5e.o
+
+comp_mthr5e.o : comp_lib5e.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib5e.c -o comp_mthr5e.o
+
+comp_mlib7.o : comp_lib7.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib7.c -o comp_mlib7.o
+
+comp_mthr7.o : comp_lib7.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib7.c -o comp_mthr7.o
+
+comp_mlib7e.o : comp_lib7e.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib7e.c -o comp_mlib7e.o
+
+comp_mthr7e.o : comp_lib7e.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib7e.c -o comp_mthr7e.o
+
+comp_mlib8.o : comp_lib8.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib8.c -o comp_mlib8.o
+
+comp_mthr8.o : comp_lib8.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib8.c -o comp_mthr8.o
+
+comp_mlib9.o : comp_lib9.c mw.h structs.h defs.h param.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib9.c -o comp_mlib9.o
+
+comp_mthr9.o : comp_lib9.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib9.c -o comp_mthr9.o
+
+work_thr2.o : work_thr2.c mw.h structs.h defs.h param.h thr_bufs2.h thr_buf_structs.h
+	$(CC) $(THR_CC) $(CFLAGS) -c work_thr2.c
+
+print_pssm : print_pssm.c getseq.c karlin.c apam.c pssm_asn_subs.c
+	$(CC) -o print_pssm $(CFLAGS) print_pssm.c getseq.c karlin.c apam.c pssm_asn_subs.c $(LIB_M)
+
+map_db : map_db.c uascii.h ncbl2_head.h
+	$(CC) $(CFLAGS) -o $(BIN)/map_db map_db.c
+
+list_db : list_db.c
+	$(CC) $(CFLAGS) -o $(BIN)/list_db list_db.c
+
+
+lav2ps : lav2plt.o lavplt_ps.o
+	$(CC) -DUNIX -o $(BIN)/lav2ps lav2plt.o lavplt_ps.o -lm
+
+lav2svg : lav2plt.o lavplt_svg.o
+	$(CC) -DUNIX -o $(BIN)/lav2svg  lav2plt.o lavplt_svg.o -lm
diff --git a/make/Makefile.sgi b/make/Makefile.sgi
new file mode 100644
index 0000000..b0ce690
--- /dev/null
+++ b/make/Makefile.sgi
@@ -0,0 +1,58 @@
+#
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+#
+# for more information on FASTA on SGI's, see:
+#
+#	http://www.sgi.com/chembio/resources/fasta/index.html
+#
+# use -DBIG_LIB64 to generate 64-bit offsets in map_db .xin files.  This
+# only works on SGI's with the -64 option.
+
+CC= cc -w -64 -mips4 -O2 -TENV:X=3 -DSGI_BUG -Wl,-multigot -DIRIX
+#CC= cc -64 -mips4 -g -DSGI_BUG -DDEBUG -DIRIX
+
+HFLAGS= -64 -mips4 -o
+NFLAGS= -64 -mips4 -o
+
+#CC= cc -g
+#HFLAGS= -o
+#NFLAGS= -o
+
+LIB_M= -lm
+# For R2000/R3000 MIPS Processors, use -mips1
+#
+#CC= cc -mips1 -O2 
+#HFLAGS= -mips1 -o
+#NFLAGS= -mips1 -o
+#
+# For R4000 MIPS Processors, use -mips2:
+#
+#CC = cc -mips2 -O2
+#HFLAGS= -mips2 -o
+#NFLAGS= -mips2 -o
+#
+
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DBIGMEM -DSFCHAR="':'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit  -DFASTA_HOST='"crick.med.virginia.edu/fasta/cgi"' -DIS_BIG_ENDIAN -DUSE_MMAP -DBIG_LIB64 -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DBIG_LIB64
+
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+
+BIN = ../bin
+XDIR = /seqprg/slib/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_NA_O)
+DROPLAL_O = $(DROPLAL_NA_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+# renamed (fasta34)  programs
+include ../make/Makefile36m.common
+# conventional (fasta3) names
+# include ../make/Makefile.common
diff --git a/make/Makefile.sse_alt b/make/Makefile.sse_alt
new file mode 100644
index 0000000..5bc1043
--- /dev/null
+++ b/make/Makefile.sse_alt
@@ -0,0 +1,23 @@
+#
+# $Name:  $ - $ Id: $
+#
+# sets of files for no (DROPGSW_NA_O), SSE2, or Altivec Smith-Waterman acceleration
+#
+DROPGSW_NA_O = init_sw.o dropgsw2.o lwm_align.o calcons_sw.o
+DROPGSW_SSE_O = init_sw_sse.o dropgsw2_sse.o smith_waterman_sse2.o lwm_align.o calcons_sw.o
+DROPGSW_ALT_O = init_sw_alt.o dropgsw2_alt.o smith_waterman_altivec.o lwm_align.o calcons_sw.o
+
+P_DROPNSW_NA_O = dropnsw.o lwm_align.o calcons_sw.o
+P_DROPGSW_NA_O = dropgsw2.o lwm_align.o calcons_sw.o
+P_DROPGSW_SSE_O = dropgsw2_sse.o smith_waterman_sse2.o lwm_align.o calcons_sw.o
+P_DROPGSW_ALT_O = dropgsw2_alt.o smith_waterman_altivec.o lwm_align.o calcons_sw.o
+
+DROPLAL_NA_O = droplal2.o lsim4.o calcons_la.o
+DROPLAL_SSE_O = droplal2_sse.o smith_waterman_sse2.o lsim4.o calcons_la.o
+DROPLAL_ALT_O = droplal2_alt.o smith_waterman_altivec.o lsim4.o calcons_la.o
+
+DROPGNW_NA_O = init_gnw.o dropgnw.o gwm_align.o calcons_sw.o
+DROPGNW_SSE_O = init_gnw_sse.o dropgnw_sse.o global_sse2.o gwm_align.o calcons_sw.o
+
+DROPLNW_NA_O = init_lnw.o droplnw.o gwm_align.o calcons_sw.o
+DROPLNW_SSE_O = init_lnw_sse.o droplnw_sse.o glocal_sse2.o gwm_align.o calcons_sw.o
diff --git a/make/Makefile.sun b/make/Makefile.sun
new file mode 100644
index 0000000..5492aa0
--- /dev/null
+++ b/make/Makefile.sun
@@ -0,0 +1,52 @@
+#
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+
+SHELL=/usr/bin/bash
+
+#CC= cc -g -xarch=v8plusa
+
+# switches for 64-bit addressing
+CC= cc -fast -xO4 -xarch=v9
+#CC= cc -g -xarch=v9
+
+# for SUNMP, use -DTHR_EXIT=thr_exit
+# HZ=100 for Solaris x86
+# -DIS_LITTLE_ENDIAN for Solaris x86
+
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DBIGMEM -DSFCHAR="':'" -DMAX_WORKERS=2 -DTHR_EXIT=thr_exit  -DFASTA_setscope -DUSE_MMAP -DBIG_LIB64 -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DM10_CONS -DSAMP_STATS -DPGM_DOC
+HFLAGS= -o
+NFLAGS= -o
+
+# use -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+# for files > 2 GB
+
+#for Sun pthreads (preferred, pthreads used on all other platforms)
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+
+#for Sun threads (no longer necessary as Sun supports pthreads)
+#THR_SUBS = uthr_subs2
+#THR_LIBS = -lthread
+#THR_CC =
+
+LIB_M= -lmopt
+
+BIN = ../bin
+XDIR = /seqprg/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+# no acceleration
+#
+DROPGSW_O = $(DROPGSW_NA_O)
+DROPLAL_O = $(DROPLAL_NA_O)
+DROPGNW_O = $(DROPGNW_NA_O)
+DROPLNW_O = $(DROPLNW_NA_O)
+
+# renamed (fasta34)  programs
+include ../make/Makefile36m.common
+# conventional (fasta3) names
+# include ../make/Makefile.common
diff --git a/make/Makefile.sun_x86 b/make/Makefile.sun_x86
new file mode 100644
index 0000000..2a8c8ec
--- /dev/null
+++ b/make/Makefile.sun_x86
@@ -0,0 +1,51 @@
+#
+# makefile for fasta3, fasta3_t.  Use makefile.pvm for pvcompxx.
+
+SHELL=/usr/bin/bash	# used for make install
+
+# switches for 64-bit addressing - AMD64
+CC= cc -g -fast -m64
+
+# debugging options
+#CC= cc -g -DDEBUG -xarch=amd64
+
+# for SUNMP, use -DTHR_EXIT=thr_exit
+# HZ=100 for Solaris x86
+# Solaris X86 is little endian - be certain IS_BIG_ENDIAN is not defined
+
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DBIGMEM -DSFCHAR="':'" -DMAX_WORKERS=2 -DTHR_EXIT=thr_exit  -DFASTA_setscope -DUSE_MMAP -DBIG_LIB64 -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DM10_CONS -DSAMP_STATS -DPGM_DOC
+HFLAGS= -o
+NFLAGS= -o
+
+# use -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+# for files > 2 GB
+
+#for Sun pthreads (preferred, pthreads used on all other platforms)
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+
+#for Sun threads (no longer necessary as Sun supports pthreads)
+#THR_SUBS = uthr_subs2
+#THR_LIBS = -lthread
+#THR_CC =
+
+LIB_M= -lmopt
+
+BIN = ../bin
+XDIR = /seqprg/bin
+
+# set up files for SSE2/Altivec acceleration
+#
+include ../make/Makefile.sse_alt
+
+#
+DROPGSW_O = $(DROPGSW_SSE_O)
+DROPLAL_O = $(DROPLAL_SSE_O)
+DROPGNW_O = $(DROPGNW_SSE_O)
+DROPLNW_O = $(DROPLNW_SSE_O)
+
+# renamed (fasta35)  programs
+include ../make/Makefile36m.common
+# conventional (fasta3) names
+# include ../make/Makefile.common
diff --git a/make/Makefile35.common b/make/Makefile35.common
new file mode 100644
index 0000000..b7b04bf
--- /dev/null
+++ b/make/Makefile35.common
@@ -0,0 +1,45 @@
+#
+# $Name:  $ - $Id: Makefile35.common 344 2010-06-29 18:20:22Z wrp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+
+# use for multiple query sequences
+COMP_LIBO=comp_mlib2.o
+COMP_THRO=comp_mthr2.o
+WORK_THRO=work_thr2.o
+GETSEQO = 
+
+# standard nxgetaa, no memory mapping for 0 - 6
+LGETLIB=getseq.o lgetlib.o
+NGETLIB=nmgetlib
+
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB=getseq.o lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+
+NRAND=nrandom
+
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+
+# this option should support both formats (BLAST1.4 not currently supported): 
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm
+
+# this option supports NCBI BLAST2 and mySQL
+# it requires  "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/local/lib/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+#LIB_M= -L/usr/local/lib/mysql -lmysqlclient -lz -lm
+LIB_M= -lm
+#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+NCBL_LIB=ncbl2_mlib.o
+
+include ../make/Makefile.pcom
+
+include ../make/Makefile.fcom
diff --git a/make/Makefile35.common_sql b/make/Makefile35.common_sql
new file mode 100644
index 0000000..3eda77c
--- /dev/null
+++ b/make/Makefile35.common_sql
@@ -0,0 +1,50 @@
+#
+# $Name:  $ - $Id: Makefile35.common_sql 344 2010-06-29 18:20:22Z wrp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+
+# use for "normal" fasta34(_t) programs - only one query
+COMP_LIBO=comp_lib2.o
+COMP_THRO=comp_thr2.o
+WORK_THRO=work_thr2.o
+GETSEQO = getseq.o
+# use for multiple query sequences, requires "-n" for DNA fasta, does not
+# work with prss34 (yet)
+#COMP_LIB=comp_mlib.o
+#COMP_THRO=comp_mthr.o
+#
+# standard nxgetaa, no memory mapping for 0 - 6
+LGETLIB=getseq.o lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+# LGETLIB=getseq.o lgetlib.o lgetaa_m.o
+# NGETLIB=nmgetlib
+
+NRAND=nrandom
+
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+
+# this option should support both formats (BLAST1.4 not currently supported): 
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm
+
+# this option supports NCBI BLAST2 and mySQL
+# it requires  "-I/usr/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/lib/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm
+#LIB_M= -lm
+NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+#NCBL_LIB=ncbl2_mlib.o
+
+include ../make/Makefile.pcom
+
+include ../make/Makefile.fcom
+
diff --git a/make/Makefile35.nmk_com b/make/Makefile35.nmk_com
new file mode 100755
index 0000000..11792cf
--- /dev/null
+++ b/make/Makefile35.nmk_com
@@ -0,0 +1,30 @@
+#
+# $Name:  $ - $Id: Makefile35.nmk_com 344 2010-06-29 18:20:22Z wrp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+
+# use for multiple query sequences
+COMP_LIBO=comp_mlib2.obj
+COMP_THRO=comp_mthr2.obj
+WORK_THRO=work_thr2.obj
+GETSEQO = 
+
+# standard nxgetaa, no memory mapping for 0 - 6
+LGETLIB=getseq.obj lgetlib.obj 
+NGETLIB=nmgetlib
+
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+# no memory mapping for Win32
+#LGETLIB= lgetlib.obj lgetaa_m.obj
+
+NRAND=nrand
+
+# normally use ncbl2_mlib.c
+NCBL_LIB=ncbl2_mlib.obj
+#LIB_M= -lm
+
+include ../make/Makefile.nm_pcom
+
+include ../make/Makefile.nm_fcom
diff --git a/make/Makefile35m.common_mysql b/make/Makefile35m.common_mysql
new file mode 100644
index 0000000..03f4741
--- /dev/null
+++ b/make/Makefile35m.common_mysql
@@ -0,0 +1,49 @@
+#
+# $Name:  $ - $Id: Makefile35m.common_mysql 344 2010-06-29 18:20:22Z wrp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+
+# use for multiple query sequences
+# work with prss34 (yet)
+COMP_LIBO=comp_mlib2.o
+COMP_THRO=comp_mthr2.o
+WORK_THRO=work_thr2.o
+GETSEQO = 
+
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=getseq.o lgetlib.o
+#NGETLIB=nmgetlib
+
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+
+NRAND=nrandom
+
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+
+# this option should support both formats (BLAST1.4 not currently supported): 
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm
+
+# this option supports NCBI BLAST2 and mySQL
+# it requires  "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/local/lib/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+LIB_M= -L/usr/lib64/mysql -lmysqlclient -lz -lm
+#LIB_M= -L/usr/lib/pgsql/ -lpq -lm -lcrypto -lssl
+# LIB_M= -lm
+NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+#NCBL_LIB=ncbl2_mlib.o pgsql_lib.o
+# NCBL_LIB=ncbl2_mlib.o
+
+include ../make/Makefile.pcom
+
+include ../make/Makefile.fcom
+
diff --git a/make/Makefile35m.common_pgsql b/make/Makefile35m.common_pgsql
new file mode 100644
index 0000000..babe0e5
--- /dev/null
+++ b/make/Makefile35m.common_pgsql
@@ -0,0 +1,49 @@
+#
+# $Name:  $ - $Id: Makefile35m.common_pgsql 344 2010-06-29 18:20:22Z wrp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+
+# use for multiple query sequences
+# work with prss34 (yet)
+COMP_LIBO=comp_mlib2.o
+COMP_THRO=comp_mthr2.o
+WORK_THRO=work_thr2.o
+GETSEQO = 
+
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=getseq.o lgetlib.o
+#NGETLIB=nmgetlib
+
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+
+NRAND=nrandom
+
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+
+# this option should support both formats (BLAST1.4 not currently supported): 
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm
+
+# this option supports NCBI BLAST2 and mySQL
+# it requires  "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/local/lib/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+# LIB_M= -L/usr/local/lib/mysql -lmysqlclient -lz -lm
+LIB_M= -L/usr/local/pgsql/lib -lpq -lm -lcrypto -lssl
+# LIB_M= -lm
+#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+NCBL_LIB=ncbl2_mlib.o pgsql_lib.o
+# NCBL_LIB=ncbl2_mlib.o
+
+include ../make/Makefile.pcom
+
+include ../make/Makefile.fcom
+
diff --git a/make/Makefile35m.common_sql b/make/Makefile35m.common_sql
new file mode 100644
index 0000000..d2cb6db
--- /dev/null
+++ b/make/Makefile35m.common_sql
@@ -0,0 +1,48 @@
+#
+# $Name:  $ - $Id: Makefile35m.common_sql 344 2010-06-29 18:20:22Z wrp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+
+# use for multiple query sequences
+# work with prss34 (yet)
+COMP_LIBO=comp_mlib2.o
+COMP_THRO=comp_mthr2.o
+WORK_THRO=work_thr2.o
+GETSEQO = 
+
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=getseq.o lgetlib.o
+#NGETLIB=nmgetlib
+
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+
+NRAND=nrandom
+
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+
+# this option should support both formats (BLAST1.4 not currently supported): 
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm
+
+# this option supports NCBI BLAST2 and mySQL
+# it requires  "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/lib/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+# LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm
+LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -L/usr/local/pgsql/lib -lpq -lm -lcrypto -lssl
+# LIB_M= -lm
+NCBL_LIB=ncbl2_mlib.o mysql_lib.o pgsql_lib.o
+# NCBL_LIB=ncbl2_mlib.o
+
+include ../make/Makefile.pcom
+
+include ../make/Makefile.fcom
+
diff --git a/make/Makefile36.nmk_com b/make/Makefile36.nmk_com
new file mode 100644
index 0000000..8303d8f
--- /dev/null
+++ b/make/Makefile36.nmk_com
@@ -0,0 +1,30 @@
+#
+# $Name:  $ - $Id: Makefile36.nmk_com 1203 2013-07-20 12:55:48Z wrp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+
+# use for multiple query sequences
+COMP_LIBO=comp_mlib9.obj
+COMP_THRO=comp_mthr9.obj
+WORK_THRO=work_thr2.obj
+GETSEQO = 
+
+# standard nxgetaa, no memory mapping for 0 - 6
+LGETLIB=getseq.obj lgetlib.obj 
+NGETLIB=nmgetlib
+
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+# no memory mapping for Win32
+#LGETLIB= lgetlib.obj lgetaa_m.obj
+
+NRAND=mrandom
+
+# normally use ncbl2_mlib.c
+NCBL_LIB=ncbl2_mlib.obj
+#LIB_M= -lm
+
+include ../make/Makefile.nm_pcom
+
+include ../make/Makefile.nm_fcom
diff --git a/make/Makefile36m.common b/make/Makefile36m.common
new file mode 100644
index 0000000..3d86322
--- /dev/null
+++ b/make/Makefile36m.common
@@ -0,0 +1,51 @@
+#
+# $Name:  $ - $Id: Makefile36m.common 1250 2014-01-24 21:33:39Z wrp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+
+COMP_LIBO=comp_mlib9.o	# reads database into memory for multi-query without delay
+COMP_THRO=comp_mthr9.o	# threaded version
+
+WORK_THRO=work_thr2.o
+GETSEQO = 
+
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=getseq.o lgetlib.o
+#NGETLIB=nmgetlib
+
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+
+# this option should support both formats (BLAST1.4 not currently supported): 
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm
+
+# this option supports NCBI BLAST2 and mySQL
+# it requires  "-I/usr/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/lib64/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+
+LIB_M= -lm -lz
+#LIB_M= -L/usr/lib64/mysql -lmysqlclient -lz -lm
+NCBL_LIB=ncbl2_mlib.o
+#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+
+# threaded as _t, serial
+# include ../make/Makefile.pcom
+
+# threaded without _t
+include ../make/Makefile.pcom_t
+
+# serial only 
+# include ../make/Makefile.pcom_s
+
+include ../make/Makefile.fcom
diff --git a/make/Makefile36mpi.common b/make/Makefile36mpi.common
new file mode 100644
index 0000000..32fe30d
--- /dev/null
+++ b/make/Makefile36mpi.common
@@ -0,0 +1,43 @@
+#
+# $Name:  $ - $Id: Makefile36mpi.common 849 2011-10-21 20:09:55Z wrp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+
+# use for multiple query sequences
+COMP_THRO=comp_mpi9.o
+WORK_THR_O=work_mpi2.o
+WORK_THRX_O=work_mpi2x.o
+GETSEQO = 
+
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=getseq.o lgetlib.o
+#NGETLIB=nmgetlib
+
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+
+# this option should support both formats (BLAST1.4 not currently supported): 
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm -lz
+
+# this option supports NCBI BLAST2 and mySQL
+# it requires  "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/local/lib/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+#LIB_M= -L/usr/local/lib/mysql -lmysqlclient -lz -lm
+#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+NCBL_LIB=ncbl2_mlib.o
+LIB_M = -lm -lz
+
+include ../make/Makefile.mp_com2
+
+include ../make/Makefile.fcom
diff --git a/make/Makefile36t.common b/make/Makefile36t.common
new file mode 100644
index 0000000..e6d4952
--- /dev/null
+++ b/make/Makefile36t.common
@@ -0,0 +1,43 @@
+#
+# $Name:  $ - $Id: Makefile36t.common 344 2010-06-29 18:20:22Z wrp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+
+# use for multiple query sequences
+COMP_LIBO=comp_mlib4.o
+COMP_THRO=comp_mthr4.o
+WORK_THRO=work_thr2.o
+GETSEQO = 
+
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=getseq.o lgetlib.o
+#NGETLIB=nmgetlib
+
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+
+# this option should support both formats (BLAST1.4 not currently supported): 
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm
+
+# this option supports NCBI BLAST2 and mySQL
+# it requires  "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/local/lib/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+#LIB_M= -L/usr/local/lib/mysql -lmysqlclient -lz -lm
+#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+NCBL_LIB=ncbl2_mlib.o
+LIB_M= -lm
+
+include ../make/Makefile.pcom
+
+include ../make/Makefile.fcom
diff --git a/make/README b/make/README
new file mode 100644
index 0000000..b492066
--- /dev/null
+++ b/make/README
@@ -0,0 +1,44 @@
+
+22-Jan-2014
+
+fasta36/Make
+
+================
+Makefiles for different Unix/Linux/MacOS configurations
+
+****************
+These make files are designed to be run from the ../src directory, e.g.
+
+cd ~/fasta36/src
+make -f ../make/Makfile.linux64_sse2 all
+****************
+
+While several different architectures are specified here, the files
+that are used the most are:
+
+Makefile.linux64_sse2 -- standard Linux(64-bit) Makefile
+
+ -- now equivalent to Makefile.linux64 and Makefile.linux.  For
+    non-sse2 compiles, use Makefile.linux64_nosse2
+
+Makefile.linux_icc_sse2 -- Linux (64-bit) with Intel icc compiler
+
+Makefile.os_x86_64 -- standard MacOS (64-bit) Makefile (also sse2)
+
+Makfile.nmk_ics -- Windows 32-bit with Intel icc compiler
+
+================
+
+Most of the other Makefiles have not been tested for months or years.
+
+================================================================
+
+The major Makefiles above include other makefiles, including
+Makefile36m.common, Makefile.pcom_t, Makefile.fcom to compile and link
+the appropriate programs.
+
+The Windows Makefile.nmk_icl uses Makefile.nm_fcom and Makefile.nm_pcom
+
+The windows environment requires Microsoft nmake.
+
+================================================================
diff --git a/make/make_osx_univ.sh b/make/make_osx_univ.sh
new file mode 100755
index 0000000..a9d0420
--- /dev/null
+++ b/make/make_osx_univ.sh
@@ -0,0 +1,31 @@
+#!/bin/csh
+
+set bin = ../bin
+if (! -d ../bin ) mkdir $bin
+#if (! -d ../bin/ppc) mkdir $bin/ppc
+if (! -d ../bin/i386) mkdir $bin/i386
+if (! -d ../bin/x86_64) mkdir $bin/x86_64
+
+# cd ../src
+# rm *.o
+# make -f ../make/Makefile.os_x all
+# make -f ../make/Makefile.os_x uinstall
+
+rm *.o
+make -f ../make/Makefile.os_x86 all
+make -f ../make/Makefile.os_x86 uinstall
+
+rm *.o
+make -f ../make/Makefile.os_x86_64 all
+make -f ../make/Makefile.os_x86_64 uinstall
+rm *.o
+cd ../bin
+foreach n ( i386/* )
+set f=$n:t
+#lipo -create ppc/$f i386/$f x86_64/$f -output $f
+lipo -create i386/$f x86_64/$f -output $f
+echo "Universal $f built"
+end
+#rm -rf ppc/ i386/ x86_64/
+#rm -rf i386/ x86_64/
+echo "Done!"
diff --git a/misc/README b/misc/README
new file mode 100644
index 0000000..5abd2ca
--- /dev/null
+++ b/misc/README
@@ -0,0 +1,14 @@
+
+22-Jan-2014
+
+fasta36/misc
+
+Perl scripts for simple tasks
+
+parse_m9.pl -- parse -m 9 output to produce tab-delimited files with tab-delimited:
+	     query_acc, query_len, lib_acc, lib_len, score, bits, evalue, f_id, f_sim, alen, and start/stop coordinates
+
+res2R.pl    -- convert fasta36 -R raw.results files into something 'R' can digest
+
+shuffle_embed.pl -- take a sequence and embed it into a shuffled version of itself
+
diff --git a/misc/parse_m9.pl b/misc/parse_m9.pl
new file mode 100755
index 0000000..2ddffbe
--- /dev/null
+++ b/misc/parse_m9.pl
@@ -0,0 +1,139 @@
+#!/usr/bin/perl -w
+#
+# parse_m9.pl -- a simple script to parse fasta/fastx/ssearch -m 9 output and produce a simple set of results:
+# >query_id<tab>len
+# hit_acc<tab>len<tab>score<tab>bits<tab>expect<tab>f_id<tab>f_sim<tab>...
+#
+use strict;
+use Getopt::Long;
+use vars qw($e_cutoff $p_cutoff $head);
+
+die "usage -- parse_m9.pl [--head] [--expect e_cut] [--percid p_cut] m9_out.file\n" unless @ARGV;
+
+$e_cutoff = 10.0;
+$p_cutoff = 0.0;
+$head = 0;
+
+GetOptions("expect=s" => \$e_cutoff,
+	   "percid=s" => \$p_cutoff,
+	   "head" => \$head,
+	  );
+
+my @hit_fields = ();
+my @m9_fields = ();
+my $first_hit = 1;
+
+my $res_handle;
+for my $s_res_file ( @ARGV ) {
+
+  next unless open($res_handle, $s_res_file);
+
+  while (my ($q_num, $query_descr, $query_len, $best_yes) = skip_to_results($res_handle)) {
+    last unless $query_descr;
+
+    unless ($best_yes) {
+      <$res_handle>;	# skip >>><<<
+# uncomment for queries with no hits
+#    print ">$query_descr\t$query_len\n";
+      next;
+    }
+
+    print ">$query_descr\t$query_len\n" unless $head;
+
+    while (my $line = <$res_handle>) { # for each result
+      last if $line =~ m/>>><<</;
+      next if $line =~ m/^\+\-/;	# skip over HSPs
+      chomp ($line);
+      my ($left, $right) = split(/\t/,$line);
+      my @fields = split(/\s+/,$left);
+      my @afields = split(/\s+/,$right);
+
+      my $evalue = $fields[-1];
+      my $percid = $afields[0]*100.0;
+      last if ($evalue > $e_cutoff && $percid < $p_cutoff);
+
+      my $frame = "";
+      my $l_len = $fields[-4];
+      if ($fields[-4] =~ m/\[f|r\]/) {
+	$l_len = $fields[-5];
+	$frame = $fields[-4];
+	if ($head) {unshift @hit_fields, "[fr]";}
+      }
+
+      if ($head && $first_hit) {
+	unshift @hit_fields, qw(acc llen); 
+	print "#" . join("\t",(@hit_fields, @m9_fields)) . "\n";
+	print ">$query_descr\t$query_len\n";
+	$first_hit = 0;
+	$head = 0;
+      }
+
+      $l_len =~ s/\(//;
+      $l_len =~ s/\)//;
+      my ($l_db,$l_acc) = parse_descr($fields[0]);
+
+      my @out_fields = ($l_acc, $l_len);
+      if ($l_db) { unshift @out_fields, $l_db;}
+      if ($frame) { push @out_fields, $frame;}
+      print join("\t",(@out_fields, @fields[-3,-2,-1], @afields)) . "\n";
+    }
+  }
+}
+
+sub skip_to_results {
+  my ($res_handle) = @_;
+  my ($q_num, $query_desc, $query_len, $best_yes);
+
+  while (my $line = <$res_handle>) {
+    if ($line =~ m/^\s*(\d+)>>>(\S+)\s/) {
+      ($q_num,$query_desc) = ($1,$2);
+      ($query_len) = ($line =~ m/\s(\d+)\s\w+$/);
+      goto have_query;
+    }
+    elsif ($line =~ m/>>>\/\/\//) {goto done;}
+  }
+  warn "EOF - no query\n";
+ done:
+  return "";
+
+ have_query:
+  while (my $line = <$res_handle>) {
+    $best_yes = 0;
+    if ($line =~ m/^The best scores are:/) {
+      my ($left, $right) = split(/\t/,$line);
+      if ($head) {
+	my @afields = split(/\s+/,$left);
+	@m9_fields = split(/\s+/,$right);
+	@hit_fields = @afields[-3,-2,-1];
+      }
+      $best_yes = 1;
+      last;
+    }
+    last if ($line =~ m/^!! No sequences/);
+  }
+  return ($q_num, $query_desc, $query_len, $best_yes);
+}
+
+sub parse_descr {
+  my ($descr) = @_;
+
+  my ($dummy, $gi, $db, $acc);
+
+  if ($descr !~ m/\|/) {
+    $db="";
+    $acc=$descr;
+  }
+  elsif ($descr =~ m/gi\|/) {
+    # has std gi|12345|ref|acc
+    ($dummy, $gi, $db, $acc) = split(/\|/,$descr);
+  }
+  elsif ($descr =~ m/\d+\|\w+/) {
+    # has std 12345|ref|acc from libtype=10
+    ($gi, $db, $acc) = split(/\|/,$descr);
+  }
+
+  # remove version number
+  $acc =~ s/\.\d+$//;
+
+  return ($db, $acc);
+}
diff --git a/misc/res2R.pl b/misc/res2R.pl
new file mode 100755
index 0000000..a8d1f75
--- /dev/null
+++ b/misc/res2R.pl
@@ -0,0 +1,22 @@
+#!/usr/bin/perl -w
+
+# convert FASTA .res file to fields for 'R'
+
+# lose the first line
+<>;
+
+print "len\tscore\n";
+
+my @line;
+
+while(<>) {
+    last if (m/\/\*\*/);
+
+    @line = split(/\s+/);
+
+#fields are:
+# [0] ACC; [1] 0; [2] len; [3] frame; [4] comp; [5] H; [6-8] score[0-2];
+# [9] rst.escore [10] segnum; [11] seglen; [12]lseek
+
+    print "$line[2]\t$line[6]\n";
+}
diff --git a/misc/shuffle_embed.pl b/misc/shuffle_embed.pl
new file mode 100755
index 0000000..ee034d6
--- /dev/null
+++ b/misc/shuffle_embed.pl
@@ -0,0 +1,148 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Getopt::Long;
+use Pod::Usage;
+
+my ($window, $insert, $shelp, $help, $n_shuff) = (20, 1, 0, 0,1);
+
+GetOptions("window=i" => \$window,
+	   "insert=i" => \$insert,
+	   "n=i" => \$n_shuff,
+	   "h|?" => \$shelp,
+	   "help" => \$help,
+
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+
+my ($seq, $header) = ("","");
+
+while (my $line = <>) {
+  chomp($line);
+  if ($line =~ /^>/) {
+    if ($seq) { process_seq($header, $seq, $window, $insert, $n_shuff);}
+    $header = $line;
+    $seq = "";
+  }
+  else {
+    $seq .= $line;
+  }
+}
+
+if ($seq) { process_seq($header, $seq, $window, $insert, $n_shuff);}
+
+exit(0);
+
+sub process_seq {
+  my ($header, $seq, $window, $insert, $n_shuff) = @_;
+
+  # remove non amino-acids
+  $seq =~ s/[^A-Za-z]//g;
+  my $seq_len = length($seq);
+
+  for (my $shuff_cnt = 0; $shuff_cnt < $n_shuff; $shuff_cnt++) {
+
+    my $shuff_seq = win_shuffle($seq, $window);
+    my $left_sseq = substr($shuff_seq, 0, ($seq_len+1)/2);
+    my $right_sseq = substr($shuff_seq, ($seq_len+1)/2, $seq_len - ($seq_len+1)/2 +1);
+
+    my $embed_seq = $left_sseq;
+    if ($insert) {
+      $embed_seq .= $seq;
+    }
+    $embed_seq .= $right_sseq;
+
+    my ($acc, $descr) = ($header =~ m/^>(\S+)\s*(.*)$/);
+
+    if ($insert) {
+      if ($n_shuff > 1) {
+	printf ">%s_%d e:%d-%d %s\n",$acc,$shuff_cnt,length($left_sseq)+1,length($left_sseq) + $seq_len,$descr;
+      }
+      else {
+	printf ">%s e:%d-%d %s\n",$acc,length($left_sseq)+1,length($left_sseq) + $seq_len,$descr;
+      }
+    } else {
+      if ($n_shuff > 1) {
+	printf ">%s_shuff_%d %s\n",$acc,$shuff_cnt, $descr;
+      }
+      else {
+	printf ">%s_shuff %s\n",$acc, $descr;
+      }
+    }
+
+    $embed_seq =~ s/(.{60})/$1\n/g;
+
+    print "$embed_seq\n";
+  }
+}
+
+my $random_seed;
+
+sub win_shuffle {
+  my ($seq, $win) = @_;
+
+  # break sequence into $win len pieces
+  my @seq_arr = ($seq =~ m/(.{1,$win})/g);
+
+  # shuffle the subsets
+  for (my $j = 0; $j < @seq_arr; $j++) {
+    my @subs_arr = split(//,$seq_arr[$j]);
+    fy_shuffle(\@subs_arr);
+    $seq_arr[$j] = join("", at subs_arr);
+  }
+
+  # now shuffle the window order
+  fy_shuffle(\@seq_arr);
+  # and put it back together
+  return join("", at seq_arr);
+}
+
+# fy_shuffle array_ref
+sub fy_shuffle {
+  my $arr = shift;
+
+  die "fy_shuffle (array_ref)" unless (ref($arr) eq 'ARRAY');
+
+  return unless @$arr;
+  my $i = scalar(@$arr)-1;
+
+  while ($i > 0) {
+    my $is = int(rand($i));
+    ($arr->[$i],$arr->[$is]) = ($arr->[$is],$arr->[$i]);
+    $i--;
+  }
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+shuffle_embed.pl
+
+=head1 SYNOPSIS
+
+shuffle_embed.pl --n=1 --insert=1 --window=20 file.seq > file.shuff_emb
+
+=head1 OPTIONS
+
+ -h	    short help
+ --help     include description
+ --insert=0 shuffle only, do not insert unshuffled
+ --n=1      number of shuffles
+ --window   size of shuffle window
+
+=head1 DESCRIPTION
+
+shuffle_embed.pl takes a fasta formatted protein or DNA sequence file,
+reads the sequence, shuffles it, splits the shuffled sequence in the
+middle, and embeds the unshuffled sequence between the two halves of
+the shuffled sequence.
+
+With --insert 0, the sequences produced are random, no unshuffled
+sequence is embedded.
+
+=cut
diff --git a/psisearch2/README.md b/psisearch2/README.md
new file mode 100644
index 0000000..5bbf1e6
--- /dev/null
+++ b/psisearch2/README.md
@@ -0,0 +1,92 @@
+
+## PSISEARCH2 - iterative PSSM-based similarity searching using PSIBLAST or SSEARCH36
+
+#### September, 2016
+
+`psisearch2_msa.pl` and `psisearch2_msa.py` (both scripts have
+identical arguments and functionality) perform iterative searches
+using PsiBLAST or ssearch36, but with additional options that
+dramatically improve search selectivity.  In tests with challenging
+queries, `psisearch2_msa.pl/py` searches often reduce the number of
+false-positives more than ten fold, and sometimes 100-fold or more.
+
+For a simple test of `psisearch2,` try (from the `psisearch2/` directory):
+
+```
+  ./psisearch2_msa.pl --query ../seq/mgstm1.aa --db ../seq/prot_test.lseg
+```
+
+This command should produce the output:
+```
+#./psisearch2_msa.pl --query ../seq/mgstm1.aa --db ../seq/prot_test.lseg
+./psisearch2_msa.pl ssearch ../seq/mgstm1.aa ../seq/prot_test.lseg converged (2 iterations)
+```
+as well as four files:
+```
+mgstm1.aa.it1
+mgstm1.aa.it1.bnd_out
+mgstm1.aa.it2
+mgstm1.aa.it2.bnd_out
+```
+
+Real iterative searches must be run against comprehensive sequence
+databases, like SwissProt, RefSeq proteins, or Uniprot, e.g.:
+
+```
+   ./psisearch2_msa.pl --query ../seq/mgstm1.aa --db /slib2/swissprot.lseg
+```
+
+## More selective searches
+
+By default, `psisearch2_msa.pl` simple runs a search program
+(`ssearch36` by default, use `--pgm psisblast` to run `psiblast`),
+scans the output to produce a multiple sequence alignment, which is
+then used to build a `PSSM` for the next iteration.  Running
+`psisearch2_msa.pl` for five iterations should produce results very
+similar to running `psiblast` for five iterations.
+
+`psisearch2_msa.pl` can perform much more selective searches using the
+`--query_seed` option, which is equivalent to the `--int_seed=query`
+and `--end_seed=query` options.  The `--query_seed` option causes the
+`m89_btop_msa2.pl` program to insert query residues into the gapped
+positions of subject sequences in the sequence library used to produce
+the `PSSM`.  This `PSSM` is slightly less sensitive than the normal
+model, but it is much less likely to produce alignment-overextension,
+so it is much less likely for alignments to extend into neighboring,
+non-homologous regions and contaminate the `PSSM` model.
+
+In addition to `--query_seed`, two other options: `--align`, and
+`--domain` can also be used to reduce alignment overextension.
+`--align` causes `psisearch2_msa.pl` to include only portion of a
+subject sequence that aligned the first time it shares significant
+similarity to the query PSSM; additional residues from the sequence
+that are aligned in later iterations are not included. This option can
+be used with both `--pgm ssearch` and `--pgm psiblast`.
+
+The `--domain` option uses a more sophisticated strategy for including
+additional residues in a PSSM.  They are included only if the
+similarity score across the domain has a probability less than 0.001
+(q-value 30). This option is only available for `--pgm ssearch`, and
+requires that a second option, `--annot_db`, be specified.  Typically
+`--annot_db=pfam`.
+
+## Customizing psisearch2
+
+`psisearch2_msa.pl` is a script that uses other programs for
+similarity searching and constructing the PSSM (and for annotating
+domains in alignments if the `--domain` option is used).  The location
+of these programs is defined in the `psisearch2_msa.pl` and
+`psisearch2_msa.py` scripts using the `$pgm_bin` and `$pgm_data`
+variable (perl, `pgm_bin` and `pgm_data` for python).  You will
+probably need to modify those variables for your installation.  In
+particular, the NCBI `datatool` program is required for producing the
+asn binary files required by `ssearch36`.
+
+## `psisearch2_msa.pl/py` output
+
+In this current version, the `psisearch2_msa.pl` and
+`psisearch2_msa.py` programs *ONLY* produce tab-delimited BTOP output.
+The programs do not produce the traditional `psiblast` alignment,
+which includes a list of hits with E()-values, and a set of
+alignments.  More alignment output flexibility will be available soon.
+
diff --git a/psisearch2/m89_btop_msa2.pl b/psisearch2/m89_btop_msa2.pl
new file mode 100755
index 0000000..0e3d679
--- /dev/null
+++ b/psisearch2/m89_btop_msa2.pl
@@ -0,0 +1,927 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014,2015 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License.
+################################################################
+
+################################################################
+# m89_btop_msa2.pl --query query.file blast_tab_btop_file
+################################################################
+# m89_btop_msa2.pl takes a query sequence and either a BLAST -outfmt
+# 7/fasta -m 8CB output file (default) or fasta -m 8B (--m_format m9)
+# output file with a BTOP field, and constructs a query-driven
+# multiple sequence alignment of the subject sequences that can be
+# used as input to psiblast with the "--in_msa msa.file" option.
+#
+# (because BLAST BTOP encoding provides the mismatched residues, the
+# library sequences are not required to produce the MSA -- they are
+# available in the BTOP string)
+#
+# The BTOP alignment encoding file generated from "blastp/n" or
+# "blast_formatter" using the command: blast_formatter -archive
+# blast_output.asn -outfmt '7 qseqid sseqid pident length mismatch
+# gapopen qstart qend sstart send evalue bitscore score btop' >
+# blast_output.tab_annot
+#
+################################################################
+
+use strict;
+use Pod::Usage;
+use Getopt::Long;
+
+# read lines of the form:
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|121694|sp|P20432|GSTT1_DROME	100.00	209	0	0	1	209	1	209	6e-156	433	1113	209
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|1170090|sp|P04907|GSTF3_MAIZE	26.77	198	123	7	4	185	6	197	2e-08	51.2	121	FL1YG ... 1NKRA1YW1
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|81174731|sp|P0ACA5|SSPA_ECO57	39.66	58	32	2	43	100	49	103	8e-06	43.9	102	EDFLLI ... V-I-NEQS3FM
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|121695|sp|P12653|GSTF1_MAIZE	27.62	181	107	7	32	203	34	199	9e-05	40.8	94	LI1LF ... N-1AS1CLLM1
+
+
+my ($shelp, $help, $m_format, $evalue, $qvalue, $domain_bound) = (0, 0, "m8CB", 0.001, 30.0,0);
+my ($query_file, $bound_file_in, $bound_file_only, $bound_file_out, $masked_lib_out,$mask_type_end, $mask_type_int) = ("","","","","","","");
+my $query_lib_r = 0;
+my ($eval2_fmt, $eval2) = (0,"");
+
+GetOptions(
+    "query=s" => \$query_file,
+    "query_file=s" => \$query_file,
+    "eval2=s" => \$eval2,		# change the evalue used for inclusion
+    "evalue=f" => \$evalue,
+    "expect=f" => \$evalue,
+    "qvalue=f" => \$qvalue,
+    "format=s" => \$m_format,
+    "m_format=s" => \$m_format,
+    "mformat=s" => \$m_format,
+    "bound_file_in=s" => \$bound_file_in,
+    "bound_file_only=s" => \$bound_file_only,
+    "bound_file_out=s" => \$bound_file_out,
+    "bound_in=s" => \$bound_file_in,
+    "bound_only=s" => \$bound_file_only,
+    "bound_out=s" => \$bound_file_out,
+    "masked_library_out=s" => \$masked_lib_out,
+    "masked_lib_out=s" => \$masked_lib_out,
+    "mask_lib_out=s" => \$masked_lib_out,
+    "mask_out=s" => \$masked_lib_out,
+    "end_mask_type=s" => \$mask_type_end,
+    "end_mask=s" => \$mask_type_end,
+    "domain_bound" => \$domain_bound,
+    "domain" => \$domain_bound,
+    "int_mask_type=s" => \$mask_type_int,
+    "int_mask=s" => \$mask_type_int,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+unless (-f STDIN || -p STDIN || @ARGV) {
+ pod2usage(1);
+}
+
+################
+# initialization
+my @random_res = ();
+if ($mask_type_end =~ m/^rand/i) {
+  @random_res = init_random_res();
+}
+
+my @m9_field_names = qw(percid perc_sim raw_score a_len q_start q_end qc_start qc_end s_start s_end sc_start sc_end gap_q gap_l fs);
+my @m8_field_names = qw(q_seqid s_seqid percid a_len mismatch gopen q_start q_end s_start s_end evalue bits);
+
+my @hit_list = ();
+
+my %seq_bound = ();  # boundaries for each accession
+my %acc_names = ();  # generate uniq s_seq_id names
+my %multi_align = ();
+my @multi_names = ();
+
+################
+# get query sequence, and insert into MSA
+#
+my ($query_acc, $query_seq_r, $query_len);
+if ($query_file) {
+  ($query_acc, $query_seq_r) = parse_query_lib($query_file);
+  $query_len = scalar(@$query_seq_r)-1;  # -1 for ' ' 1: offset
+}
+
+if (! $query_file || !$query_len) {
+  die "query sequence required";
+}
+
+push @multi_names, $query_acc;
+$acc_names{$query_acc} = 1;
+$multi_align{$query_acc} = btop2alignment($query_seq_r, $query_len, {BTOP=>$query_len, q_start=>1, q_end=>$query_len}, 0);
+my $max_sseqid_len = length($query_acc);
+
+################
+# get sequence boundaries if available
+#
+my $seq_bound_hr = 0;
+my @seq_bound_accs = ();
+
+$seq_bound_hr = \%seq_bound;
+
+if ($bound_file_in) {
+  $seq_bound_hr = parse_bound_file($bound_file_in);
+}
+elsif ($bound_file_only) {
+  $seq_bound_hr = parse_bound_file($bound_file_only);
+}
+
+################
+# skip down to "The best scores are:"
+#
+my ($q_num, $query_descr, $q_len, $lib_cnt, $lib_len, $best_yes, $last_fields_r);
+
+if ($m_format =~ m/^m9/i) {
+  ($q_num, $query_descr, $q_len, $lib_cnt, $lib_len, $best_yes) = skip_to_m9results();
+  warn "Cannot find the best scores are:"  unless $query_descr;
+}
+elsif ($m_format =~ m/^m8/) {
+  ($query_descr, $best_yes, $last_fields_r) = skip_to_m8results();
+  warn "Cannot find the best scores are: in $ARGV" unless $query_descr;
+
+  if (scalar(@{$last_fields_r})) {
+    push @m8_field_names, @{$last_fields_r};
+  }
+  push @m8_field_names, "annot";
+}
+else {
+  die "cannot recognize format: $m_format";
+}
+
+my $eval_fptr = \&eval_func;
+if ($eval2_fmt && $eval2) {
+  if($eval2 eq 'eval2') {
+    $eval_fptr = \&eval2_func;
+  }
+  elsif ($eval2 eq 'ave') {
+    $eval_fptr = \&eval_ave;
+  }
+}
+
+my ($tmp, $gi, $q_db, $q_acc, $q_id);
+
+if ($query_descr =~ /^gi\|\d+\|/) {
+  ($tmp, $gi, $q_db, $q_acc, $q_id) = split(/\|/,$query_descr);
+}
+elsif ($query_descr eq 'unnamed') {
+  $q_acc = 'unnamed'
+}
+else {
+  ($q_db,$q_acc, $q_id) = split(/\|/,$query_descr);
+}
+
+unless ($q_acc) {
+  $q_acc = $query_descr;
+}
+
+$acc_names{$q_acc} = 1;	# this is necessary for the new acc-only NCBI SwissProt libraries
+
+$q_acc =~ s/\.\d+$//;
+
+while (my $line = <>) {
+
+  chomp $line;
+  next unless ($line);
+  my %hit_data =();
+  my ($s_seqid, $subj_acc, $s_seqid_u);
+  my $annot_f='NULL';
+
+  if ($m_format =~ m/^m9/i) {
+    last if $line =~ m/>>>/;
+    next if $line =~ m/^\+\-/; # skip over HSPs
+    my ($left, $right, $align_f) = ("","",'NULL');
+    ($left, $right, $align_f, $annot_f) = split(/\t/,$line);
+
+    $align_f= 'NULL' unless $align_f;
+    $annot_f= 'NULL' unless $annot_f;
+
+    my @fields = split(/\s+/,$left);
+    my ($ldb, $l_id, $l_acc) = ("","","");
+    if ($fields[0] =~ m/:/) {
+      ($ldb, $l_id) = split(/:/,$fields[0]);
+      ($l_acc) = $fields[1];
+    } else {
+      ($ldb, $l_acc,$l_id) = split(/\|/,$fields[0]);
+    }
+
+    @hit_data{@m9_field_names} = split(/\s+/,$right);
+    if ($eval2_fmt) {
+      @hit_data{qw(bits evalue eval2)} = @fields[-3, -2,-1];
+    }
+    else {
+      @hit_data{qw(bits evalue)} = @fields[-2,-1];
+    }
+
+    #
+    # currently preselbdr files have $ldb|$l_acc, not full s_seqid, so construct it
+    #
+    ($s_seqid, $subj_acc) = (join('|',($ldb, $l_acc, $l_id)), "$ldb|$l_acc");
+    @hit_data{qw(s_seqid subj_acc)} = ($s_seqid, $subj_acc);
+    @hit_data{qw(query_id query_acc)} = ($query_descr, $q_acc);
+    $hit_data{BTOP} = $align_f;
+    $hit_data{annot} = $annot_f;
+  }
+  else {
+    last if $line =~ m/^#/;
+    @hit_data{@m8_field_names} = split(/\t/,$line);
+    $subj_acc = $hit_data{'s_seqid'};
+    $subj_acc =~ s/^gi\|\d+\|(\w+\|\w+)\|?\w+/$1/;
+  }
+
+  # a better solution would be to rename the q_seqid, or at least to
+  # check for identity
+  # next if ($hit_data{q_seqid} eq $hit_data{s_seqid});
+  next if ($hit_data{a_len} == $query_len && $hit_data{BTOP} =~ m/^$query_len$/);
+
+  $s_seqid_u = $hit_data{'s_seqid'};
+  if ($acc_names{$s_seqid_u}) {
+    next;  # skip additional HSPs
+#    $acc_names{$s_seqid_u}++;
+#    $s_seqid_u .= "_". $acc_names{$subj_acc};
+  }
+  else {
+    $acc_names{$hit_data{'s_seqid'}} = 1;
+  }
+
+  # must be after duplicate seqid check because blast HSP's have bad E-values after good.
+  next if ($eval_fptr->(\%hit_data) > $evalue);
+
+  $hit_data{s_seqid_u} = $s_seqid_u;
+
+  if (length($s_seqid_u) > $max_sseqid_len) {
+    $max_sseqid_len = length($s_seqid_u);
+  }
+
+  my $have_dom = 0;
+  if ($domain_bound && $hit_data{annot}) {
+    my $hit_doms_ar = parse_hit_domains($hit_data{annot});
+    # scan from left to right to make domain boundaries based on $qvalue
+    # the following seems reversed, but it is putting upper (and lower) limits on boundaries
+    my ($left_bound, $right_bound) = @hit_data{qw(s_end s_start)};
+    foreach my $dom_r ( @$hit_doms_ar ) {
+      next unless $dom_r->{target} eq 'subj';
+      # next if $dom_r->{virtual};	# should be controlled by annotation process
+      next unless $dom_r->{qval} > $qvalue;
+
+      if ($dom_r->{s_start} < $left_bound) {
+	$left_bound = $dom_r->{s_start};
+	$have_dom = 1;
+      }
+
+      if ($dom_r->{s_end} > $right_bound) {
+	$right_bound = $dom_r->{s_end};
+	$have_dom = 1;
+      }
+    }
+
+    if ($have_dom) {
+      if (exists($seq_bound_hr->{$subj_acc})) {
+	@{$seq_bound_hr->{$subj_acc}}{qw(start end)} = ($left_bound, $right_bound);
+      }
+      else {
+	$seq_bound_hr->{$subj_acc} = {start=>$left_bound, end=>$right_bound};
+	push @seq_bound_accs, $subj_acc;
+      }
+    }
+  }
+
+  # must have separate @hit_list that can be sorted, for searches with multiple alignment results
+
+  if ($bound_file_only || $have_dom) {
+    if (exists($seq_bound_hr->{$subj_acc})) {
+      my ($status, $alignment) = bound_btop2alignment($query_seq_r, $query_len, \%hit_data, @{$seq_bound_hr->{$subj_acc}}{qw(start end)});
+      if ($status) {	# aligment is within boundary
+	push @multi_names, $hit_data{s_seqid_u};
+	$multi_align{$hit_data{s_seqid_u}} = $alignment;
+      }
+      # do not delete entry, because it needs to be preserved
+    }
+  }
+  elsif ($bound_file_in) {
+    if (exists($seq_bound_hr->{$subj_acc})) {
+      my ($status, $alignment) = bound_btop2alignment($query_seq_r, $query_len, \%hit_data, @{$seq_bound_hr->{$subj_acc}}{qw(start end)});
+      if ($status) {
+	push @multi_names, $hit_data{s_seqid_u};
+	$multi_align{$hit_data{s_seqid_u}} = $alignment;
+#	push @multi_align, $alignment;
+      }
+    }
+    else {
+      push @multi_names, $hit_data{s_seqid_u};
+#      push @multi_align, btop2alignment($query_seq_r, $query_len, \%hit_data, );
+      $multi_align{$hit_data{s_seqid_u}} = btop2alignment($query_seq_r, $query_len, \%hit_data);
+      @{$seq_bound_hr->{$subj_acc}}{qw(start end)} = @hit_data{qw(s_start s_end)};
+      push @seq_bound_accs, $subj_acc;
+    }
+  }
+  else {  # no sequence boundaries
+    push @multi_names, $hit_data{s_seqid_u};
+    $multi_align{$hit_data{s_seqid_u}} = btop2alignment($query_seq_r, $query_len, \%hit_data);
+#    push @multi_align, btop2alignment($query_seq_r, $query_len, \%hit_data);
+    if (!$have_dom && ($bound_file_out)) {
+      @{$seq_bound_hr->{$subj_acc}}{qw(start end)} = @hit_data{qw(s_start s_end)};
+      push @seq_bound_accs, $subj_acc;
+    }
+  }
+}
+
+# final MSA output
+$max_sseqid_len += 2;
+
+printf "BTOP%s multiple sequence alignment\n\n\n",$m_format;
+
+my $i_pos = 0;
+for (my $j = 0; $j < $query_len/60; $j++) {
+  my $i_end = $i_pos + 59;
+  if ($i_end >= $query_len) {$i_end = $query_len-1;}
+  for my $acc (@multi_names) {
+    next unless $acc;
+    printf("%-".$max_sseqid_len."s %s\n",$acc,join("",@{$multi_align{$acc}}[$i_pos .. $i_end]));
+  }
+  $i_pos += 60;
+  print "\n\n";
+}
+
+################
+# if bound_file_out provide it
+if ($bound_file_out) {
+  open(my $bound_fd, ">", $bound_file_out) || die "cannot open $bound_file_out";
+  for my $s_acc ( @seq_bound_accs ) {
+    print $bound_fd join("\t", ($s_acc, @{$seq_bound_hr->{$s_acc}}{qw(start end)})),"\n";
+  }
+  close($bound_fd);
+}
+
+if ($masked_lib_out) {
+  open(my $masked_fd, ">", $masked_lib_out) || die "cannot open $masked_lib_out";
+
+  for my $s_acc ( @multi_names ) {
+    print $masked_fd ">$s_acc\n";
+
+    # here we have four choices for masking:
+    # (1) simply delete the '-'s
+    # (2) delete the leading/trailing '-',s replace interal '-'s with 'X'
+    # (3) replace leading/trailing '-' with 'X', remove internal
+    # (4) replace leading/trailing '-' with query sequence, remove internal
+    # (5) replace leading/trailing '-' with random, remove internal
+    # (6) replace leading/trailing '-' with random, internal with 'X'
+
+#    my @masked_seq = @{$multi_align{$s_acc}};
+    my $seq = join('',@{$multi_align{$s_acc}});
+
+    my @masked_seq = @{$multi_align{$s_acc}};
+    my $n_res = scalar(@masked_seq);
+    my $n_rand_res = scalar(@random_res);
+    if ($mask_type_end =~ m/x/i) {
+      for (my $i=0; $i < $n_res; $i++) {
+	last if ($masked_seq[$i] ne '-') ;
+	$masked_seq[$i] = 'X';
+      }
+      for (my $i=$n_res-1; $i >= 0; $i--) {
+	last if ($masked_seq[$i] ne '-') ;
+	$masked_seq[$i] = 'X';
+      }
+    }
+    elsif ($mask_type_end =~ m/^q/i) {
+      if ($mask_type_int =~ m/^q/i) {
+	for (my $i=0; $i < $n_res; $i++) {
+	  if ($masked_seq[$i] eq '-') {
+	    $masked_seq[$i] = $multi_align{$query_acc}[$i];
+	  }
+	}
+      }
+      else {
+	my $li = 0;
+	for ( ; $li < $n_res; $li++) {
+	  last if ($masked_seq[$li] ne '-') ;
+	  $masked_seq[$li] = $multi_align{$query_acc}[$li];
+	}
+	my $ri = $n_res-1;
+	for ( ; $ri >= 0; $ri--) {
+	  last if ($masked_seq[$ri] ne '-') ;
+	  $masked_seq[$ri] = $multi_align{$query_acc}[$ri];
+	}
+	if ($mask_type_int =~ m/^rand/i) {
+	  for (my $i=$li; $i <= $ri; $i++) {
+	    if ($masked_seq[$i] eq '-') {
+	      $masked_seq[$i] = $random_res[int(rand($n_rand_res))];
+	    }
+	  }
+	}
+      }
+    } elsif ($mask_type_end =~ m/^rand/i) {
+      if ($mask_type_int =~ m/^rand/i) {
+	for (my $i=0; $i < $n_res; $i++) {
+	  if ($masked_seq[$i] eq '-') {
+	    $masked_seq[$i] = $random_res[int(rand($n_rand_res))];
+	  }
+	}
+      }
+      else {
+	for (my $i=0; $i < $n_res; $i++) {
+	  last if ($masked_seq[$i] ne '-') ;
+	  $masked_seq[$i] = $random_res[int(rand($n_rand_res))];
+	}
+	for (my $i=$n_res-1; $i >= 0; $i--) {
+	  last if ($masked_seq[$i] ne '-') ;
+	  $masked_seq[$i] = $random_res[int(rand($n_rand_res))];
+	}
+      }
+    }
+
+    my $masked_seq = join("", at masked_seq);
+    if ($mask_type_int =~ m/X/) {
+      $masked_seq =~ s/\-/X/g;
+    }
+    else {
+      $masked_seq =~ s/\-//g;
+    }
+
+    $masked_seq =~ s/(.{60})/$1\n/g;
+    print $masked_fd "$masked_seq\n";
+  }
+  close($masked_fd);
+}
+
+# input: a blast BTOP string of the form: "1VA160TS7KG10RK27"
+# returns a list_ref of tokens: (1, "VA", 60, "TS", 7, "KG, 10, "RK", 27)
+#
+sub decode_btop {
+  my ($btop_str) = @_;
+
+  my @tokens = split(/(\d+)/,$btop_str);
+
+  shift @tokens unless $tokens[0];
+
+  my @out_tokens = ();
+
+  for my $token (@tokens) {
+    if ($token =~ m/^\d+$/) {
+      push @out_tokens, $token
+    }
+    else {
+      my @mis_tokens = split(/(..)/,$token);
+      for my $mis (@mis_tokens) {
+	if ($mis) {push @out_tokens, $mis};
+      }
+    }
+  }
+
+  return \@out_tokens;
+}
+
+sub parse_hit_domains {
+  my ($annot_str) = @_;
+
+## annot_str looks like: "|RX:6-65:6-65:s=311;b=125.4;I=1.000;Q=339.6;C=C.HTH~1
+#                         |XR:6-65:6-65:s=311;b=125.4;I=1.000;Q=339.6;C=C.HTH~1
+#                         |RX:66-297:66-297:s=1200;b=483.7;I=1.000;Q=1409.6;C=NODOM~0
+#                         |XR:66-297:66-297:s=1200;b=483.7;I=1.000;Q=1409.6;C=NODOM~0
+
+  return 0 unless ($annot_str);
+
+  my @hit_annots = ();
+
+  my @annots = split(/\|/,$annot_str);
+  shift @annots;	# remove first blank
+
+  for my $annot ( @annots ) {
+    my %dom_info = ();
+
+    # parse an entry:
+    # |RX:6-65:6-65:s=311;b=125.4;I=1.000;Q=339.6;C=C.HTH~1
+    my @d_fields = split(";",$annot);
+
+    ($dom_info{dom}) = ($d_fields[4] =~ m/C=(.+?)~?\d*v?$/);   # also remove virtual domain symbols
+    next if ($dom_info{dom} =~ m/NODOM/);
+
+    ################
+    # parse @d_fields
+    if ($d_fields[4] =~ m/v$/) {
+      $dom_info{virtual} = 1;
+    }
+    else {
+      $dom_info{virtual} = 0;
+    }
+
+    ($dom_info{bits}) = ($d_fields[1] =~ m/b=(\-?\d+\.?\d*)/);
+    unless (defined($dom_info{bits})) {
+	warn "missing score info - annot: $annot\n  annot_str: $annot_str";
+	$dom_info{bits} = '\N';
+    }
+    ($dom_info{percid}) = ($d_fields[2] =~ m/I=(\-?[\d\.]+)/);
+    unless (defined($dom_info{percid})) {
+	warn "missing percid info - annot: $annot\n  annot_str: $annot_str";
+	$dom_info{percid} = '\N';
+    }
+
+    ($dom_info{qval}) = ($d_fields[3] =~ m/Q=([\d\.]+)/);
+
+    ################
+    # parse @c_fields
+    my @c_fields = split(":",$d_fields[0]);
+
+    if ($c_fields[0] =~ m/RX/) {$dom_info{target} = 'query';}
+    else {$dom_info{target} = 'subj';}
+
+    @dom_info{qw(q_start q_end)} = ($c_fields[1] =~ m/(\d+)\-(\d+)/);
+    @dom_info{qw(s_start s_end)} = ($c_fields[2] =~ m/(\d+)\-(\d+)/);
+    ($dom_info{score}) = ($c_fields[3] =~ m/s=(\-?\d+)/);
+    unless (defined($dom_info{score})) {
+	warn "missing score info - annot: $annot\n  annot_str: $annot_str";
+	$dom_info{score} = '\N';
+    }
+
+    push @hit_annots, \%dom_info;
+  }
+
+  return \@hit_annots;
+}
+
+
+sub btop2alignment {
+  my ($query_seq_r, $query_len, $hit_data_hr, $seq_bound_hr) = @_;
+
+  # $query_seq_r is 1: based
+  my @alignment = ();
+
+  # the left unaligned region gets " ";
+  for (my $i=1; $i < $hit_data_hr->{q_start}; $i++) {
+    push @alignment, "-";
+  }
+
+  my $btop_align_r = decode_btop($hit_data_hr->{BTOP});
+
+  my ($seq0, $seq1) = ("","");
+  my $qix = $hit_data_hr->{q_start};
+  for my $btop (@{$btop_align_r}) {
+    if ($btop =~ m/^\d+$/) {  # matching query sequence, add it up
+      for (my $i=0; $i < $btop; $i++) {
+	push @alignment, $query_seq_r->[$qix++];
+      }
+    }
+    else {  # could be: TS/-S/T-
+      ($seq0, $seq1) = split(//,$btop);
+      if ($seq0 ne '-') {
+	push @alignment, $seq1;
+	$qix++;
+      }
+    }
+  }
+  # all done with alignment, double check that $qix = $hit_data_hr->{q_end}
+  unless ($qix == $hit_data_hr->{q_end}+1) {
+    warn "$qix != ".$hit_data_hr->{q_end}+1;
+  }
+
+  for (my $i = $hit_data_hr->{q_end}+1; $i <= $query_len; $i++) {
+    push @alignment, "-";
+  }
+
+  return \@alignment;
+}
+
+################
+# generates MSA alignment entry between $sb_start and $sb_end
+# if there are no aligned residues between these locations, return $status=0
+
+sub bound_btop2alignment {
+  my ($query_seq_r, $query_len, $hit_data_hr, $sb_start, $sb_end) = @_;
+
+  # $query_seq_r is 1: based
+  my @alignment = ();
+
+  my $have_aligned_res = 0;
+
+  # the left unaligned region gets " ";
+  for (my $i=1; $i < $hit_data_hr->{q_start}; $i++) {
+    push @alignment, "-";
+  }
+
+  my $btop_align_r = decode_btop($hit_data_hr->{BTOP});
+
+  my ($seq0, $seq1) = ("","");
+  my ($qix, $six) = @{$hit_data_hr}{qw(q_start s_start)};
+
+  for my $btop (@{$btop_align_r}) {
+    if ($btop =~ m/^\d+$/) {  # matching query sequence, add it up
+      for (my $i=0; $i < $btop; $i++) {
+	if ($six >= $sb_start && $six <= $sb_end) {
+	  push @alignment, $query_seq_r->[$qix];
+	  $have_aligned_res=1;
+	}
+	else {
+	  push @alignment, '-';
+	}
+	$qix++; $six++;
+      }
+    }
+    else {  # could be: TS/-S/T-
+      ($seq0, $seq1) = split(//,$btop);
+      if ($seq1 eq '-') {  # gap in subject
+	push @alignment, '-';
+	$qix++;
+      }
+      elsif ($seq0 ne '-') {  # mismatch
+	if ($six >= $sb_start && $six <= $sb_end) {
+	  $have_aligned_res=1;
+	  push @alignment, $seq1;
+	}
+	else {
+	  push @alignment, '-';
+	}
+	$qix++;
+	$six++;
+      }
+      else {  # gap in query, consume $six
+	$six++;
+      }
+    }
+  }
+  # all done with alignment, double check that $qix = $hit_data_hr->{q_end}
+  unless ($qix == $hit_data_hr->{q_end}+1) {
+    warn $qix." != ".$hit_data_hr->{q_end}+1;
+  }
+
+  for (my $i = $hit_data_hr->{q_end}+1; $i <= $query_len; $i++) {
+    push @alignment, "-";
+  }
+
+  return ($have_aligned_res, \@alignment);
+}
+
+sub parse_query_lib {
+  my ($query_file) = @_;
+
+  my %query_seqs = ();
+
+  open(my $qfd, $query_file);
+
+
+  my ($header, $sequence) = ("","");
+  while (my $entry = <$qfd>) {  # returns an entire fasta entry
+    chomp $entry;
+    if ($entry =~ m/^>/) {
+      $header = $entry;
+    }
+    else {
+      $sequence .= $entry
+    }
+  }
+
+  $sequence =~ s/[^A-Za-z\*]//g;    # remove everything but letters
+  $sequence = uc($sequence);
+
+  $header =~ s/^>//;
+  $header =~ s/\s.*$//;
+
+  my @seq = split(//,$sequence);
+  unshift @seq,"";	# @seq is now 1-based
+
+  close($qfd);
+
+  return ($header, \@seq);
+}
+
+sub parse_bound_file {
+  my ($bound_file) = @_;
+
+  open(my $qfd, $bound_file) || return 0;
+
+  while (my $line = <$qfd>) {
+    next if ($line =~ m/^#/);
+    chomp $line;
+    my @data = split(/\t/,$line);
+    if (!defined($seq_bound{$data[0]})) {
+      $seq_bound{$data[0]} = {start=>$data[1], end=>$data[2]};
+      push @seq_bound_accs, $data[0];
+    }
+    else {
+      warn "multiple boundaries for $data[0]";
+    }
+  }
+
+  return \%seq_bound;
+}
+
+sub skip_to_m9results {
+
+  my ($q_num, $query_desc, $q_start, $q_stop, $q_len, $l_num, $l_len, $best_yes);
+
+  while (my $line = <>) {
+    if ($line =~ m/^\s*(\d+)>>>(\S+)\s.+ \- (\d+) aa$/) {
+      ($q_num,$query_desc, $q_len) = ($1,$2,$3);
+#      ($q_len) = ($line =~ m/(\d+) aa$/);
+      $line = <>;	# skip Library:
+      $line = <>;	#   153571012 residues in 291716 sequences
+      ($l_len, $l_num) = ($line =~ m/^\s+(\d+)\s+residues in\s+(\d+)/);
+      goto have_query;
+    }
+    elsif ($line =~ m/>>>\/\/\//) {goto done;}
+  }
+ done:
+  return (0,"");
+
+ have_query:
+  while (my $line = <>) {
+    $best_yes = 0;
+
+    if ($line =~ m/^The best scores are:/) {
+      $best_yes = 1;
+      $eval2_fmt = 1 if ($line =~ m/E2()/);
+      last;
+    }
+    last if ($line =~ m/^!! No sequences/);
+  }
+  return ($q_num, $query_desc,$q_start, $q_stop, $q_len, $l_num, $l_len, $best_yes);
+}
+
+sub skip_to_m8results {
+
+  my ($query_desc, $best_yes);
+
+  $best_yes = 0;
+  $eval2_fmt = 0;
+
+  my @last_fields = ();
+
+  while (my $line = <>) {
+    if ($line =~ m/^# Query:/) {  # Query:
+      ($query_desc) = ($line =~ m/^# Query:\s+(\S+)/);
+      $query_desc = 'unnamed' unless ($query_desc);
+#      ($q_len) = ($line =~ m/(\d+) aa$/);
+      $line = <>;	# Database:
+      $line = <>;	# Fields:
+
+      unless ($line =~ m/# Fields:/) {
+	warn "!!! warning !!!: # Fields not found: $line";
+      }
+
+      if ($line =~ m/,\seval2/) {		# only with FASTA
+	push @last_fields, "eval2";
+	$eval2_fmt = 1;
+      }
+
+      if ($line =~ m/,\sscore,\s+BTOP/i) {	# only with BLAST
+	push @last_fields, qw(score BTOP);
+      }
+      elsif ($line =~ m/\BTOP,\s+score/i) {
+	  push @last_fields, qw(BTOP score);
+	}
+      elsif ($line =~ m/,\s+BTOP/) {
+	push @last_fields, qw(BTOP);
+      }
+
+      $line = <>; 	# NNN fits found or
+      if ($line =~ m/^#\s+\d+\s+hits found/) {
+	$best_yes = 1;
+      }
+      goto have_query;
+    }
+    elsif ($line =~ m/>>>\/\/\//) {goto done;}
+  }
+ done:
+  return (0,"");
+
+ have_query:
+  return ($query_desc, $best_yes, \@last_fields);
+}
+
+################
+# eval_func -- return evalue
+sub eval_func {
+    my ($hit) = @_;
+    return $hit->{evalue};
+}
+
+# eval_func -- return evalue
+sub eval2_func {
+    my ($hit) = @_;
+    return $hit->{eval2};
+}
+# eval_func -- return evalue
+sub eval_ave {
+    my ($hit) = @_;
+    return sqrt($hit->{evalue}*$hit->{eval2});
+}
+
+
+################
+# init_random_res initializes a 1000 element array of amino acid residues with Robinson/Robinson frequencies
+
+sub init_random_res {
+  my @rr_res = qw(A R N D C Q E G H I L K M F P S T W Y V);
+  my @rr_counts = (35155, 23105, 20212, 24161,  8669,  19208,
+		   28354, 33229,  9906, 23161, 40625,  25872,
+		   10101, 17367, 23435, 32070, 26311,   5990,
+		   14488, 29012);
+  my $rr_total = 450431;
+
+  my $rr_seq = "";
+  for (my $i=0; $i < 20; $i++) {
+    $rr_seq = $rr_seq . $rr_res[$i] x int(1000.0 *$rr_counts[$i]/$rr_total + 0.5);
+  }
+  return split(//,$rr_seq);
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ m89_btop_msa2.pl
+
+=head1 SYNOPSIS
+
+ m89_btop_msa2.pl --query_file query.fasta fasta_m8CB_output.file
+ m89_btop_msa2.pl --query_file query.fasta blast_outfmt7_BTOP.output.file
+ m89_btop_msa2.pl --query_file query.fasta [--m_format m9] fasta_m9C_output.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+
+ --query_file -- query sequence file
+ --query      -- same as --query_file
+ (only one sequence per file)
+
+ --eval2 : "": use E()-value, "eval2": use E2()/eval2, "ave": use geom. mean
+
+ --bound_file_in -- tab delimited accession<tab>start<tab>end that
+                    specifies MSA boundaries WITHIN alignment.
+                    Additional hits use alignment (or domain)
+                    boundaries.
+
+ --bound_file_only -- tab delimited accession<tab>start<tab>end that
+                      specifies MSA boundaries WITHIN alignment.
+                      Only sequences in --bound_file_only will be in the MSA.
+
+ --bound_file_out -- "--bound_file" for next iteration of psisearch2
+
+ --domain_bound  parse domain annotations (-V) from m9B file
+ --domain
+
+ --masked_lib_out -- FASTA format library of MSA sequences
+
+ --int_mask_type = "query", "rand", "X", "none"
+ --end_mask_type = "query", "rand", "X", "none"
+specify the residues to be inserted into output library
+
+=head1 DESCRIPTION
+
+C<m89_btop_msa2.pl> takes a fasta36/ssearch36 -m 9B ouput file, which
+includes a BTOP encoded alignment string, and produces the multiple
+sequence alignment (MSA) implied by the query sequence, alignment
+boundaries, and pairwise alignments.  The alignment does not allow
+gaps in the query sequence, only in the subject sequences.
+
+The C<--query_file> must be specified, and the query sequence is
+provided as the first sequence in the MSA.
+
+If a C<--bound_file> is provided, then the ends of the alignments are
+reduced to the coordinates specified by the C<bound_file>.  In
+addition, only sequences included in the C<bound_file> are included in
+the MSA.
+
+Output: A clustal-like interleaved multiple sequence alignment that
+can be used as input (using the C<-in_msa> option) to C<psiblast>.
+
+If an C<--masked_lib_out> filename.fasta is specified, a version of the MSA
+in FASTA format is written to filename.fasta.  This file can be
+converted to BLAST format (C<makeblastdb --in filename.fasta>) and
+and the converted blast library can be used to rebuild a PSSM with
+C<psiblast -num_iterations 2 -db filename.fasta -in_msa filename.msa
+-out_pssm filename.asn_txt>.
+
+The sequences in the C<--masked_lib_out> fasta file can be modifed
+where gaps are present in the MSA as specified by the C<--int_mask_type> and C<--end_mask_type>
+options. If no --int_mask_type/--end_mask_type is specified
+("none"), then the subject sequence in the output library matches the
+aligned part of the subject sequenc (gaps characters are deleted).  If
+the --end/int_mask_type is "query", "rand", or "X", then either the
+aligned query residue, a random residue, or an "X" substituted at
+each gap, producing a library of subject sequences that may differ
+from the original subject sequences.  These different options can be
+used to force C<psiblast> to build a PSSM that more accurately
+reflects the original C<ssearch36> alignment.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/psisearch2/psisearch2_msa.pl b/psisearch2/psisearch2_msa.pl
new file mode 100755
index 0000000..2bf2d5b
--- /dev/null
+++ b/psisearch2/psisearch2_msa.pl
@@ -0,0 +1,453 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2016 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License.
+################################################################
+
+use strict;
+use Getopt::Long;
+use Pod::Usage;
+
+################
+# implementation of simple shell script to do iterative searches
+#
+# logic:
+# (1) do initial search
+# (2) use results of initial search to produce MSA/PSSM for next search
+# (3) do PSSM search
+# (4) use results of PSSM search to produce MSA/PSSM for iterative step 3
+#
+################
+#
+# command:
+# psisearch2_msa.pl --query query_file --db database --num_iter N --evalue 0.002 --no_msa --int_mask none/query/random --end_mask none/query/random --tmp_dir results/ --domain --align --out_suffix none --pgm ssearch/psiblast
+#
+################
+
+use vars qw( $query_file $db_file $num_iter $evalue $int_mask $end_mask $query_mask $no_msa $tmp_dir $dom_flag $align_flag $suffix $srch_pgm $file_out $help $shelp $error_log $rm_flag $annot_type $quiet);
+use vars qw( $prev_msa $next_msa $prev_hitdb $next_hitdb $prev_pssm $next_pssm $prev_bound_in $next_bound_out $tmp_file_list $save_all $delete_bnd $delete_tmp);
+
+
+################
+# locations of required programs:
+# (1) m89_btop_msa2.pl
+# (2) ssearch
+# (3) NCBI blast+ programs: psiblast/makeblastdb
+# (4) NCBI datatool (required only for ssearch36 PSSMs)
+
+my $pgm_bin = "/seqprg/bin";
+my $pgm_data = "/seqprg/data";
+my $ssearch_bin = "$pgm_bin/ssearch36";
+my $psiblast_bin = "$pgm_bin/psiblast";
+my $makeblastdb_bin = "$pgm_bin/makeblastdb";
+my $datatool_bin = "$pgm_bin/datatool -m $pgm_data/NCBI_all.asn";
+my $align2msa_lib = "m89_btop_msa2.pl";
+
+my %srch_subs = ('ssearch' => \&get_ssearch_cmd,
+		 'psiblast' => \&get_psiblast_cmd,
+		);
+
+my %annot_cmds = ('rpd3' => qq("\!../scripts/ann_pfam28.pl --pfacc --db RPD3 --vdoms --split_over"),
+		  'rpd3nv' => qq("\!../scripts/ann_pfam28.pl --pfacc --db RPD3 --split_over"),
+		  'rpd3nvn' => qq("\!../scripts/ann_pfam28.pl --pfacc --db RPD3 --split_over --neg"),
+		  'pfam' => qq("\!../scripts/ann_pfam30.pl --pfacc --vdoms --split_over"));
+
+($num_iter, $evalue, $dom_flag, $align_flag, $int_mask, $end_mask, $query_mask, $srch_pgm, $tmp_dir, $error_log, $annot_type, $quiet) =
+  ( 5, 0.002, 0, 0, 'none', 'none', 0, 'ssearch','',0, 0, "", 0);
+($save_all, $tmp_file_list, $delete_bnd, $delete_tmp) = (0, "", 0, 0);
+
+
+my $pgm_command =  "#".join(" ",($0, at ARGV));
+print STDERR "#",join(" ",($0, at ARGV)),"\n" if ($error_log);
+
+GetOptions(
+	   'query|sequence=s' => \$query_file,
+	   'db|database=s' => \$db_file,
+           'suffix|out_suffix=s' => \$suffix,
+    	   'dir=s' => \$tmp_dir,
+	   'evalue=f' => \$evalue,
+	   'annot_db=s' => \$annot_type,
+           'out_name=s' => \$file_out,
+	   'iter=i' => \$num_iter,
+	   # 'in_msa=s' => \$prev_msa,
+	   # 'out_msa=s' => \$next_msa,
+	   # 'in_hitdb=s' => \$prev_hitdb,
+	   # 'out_hitdb=s' => \$next_hitdb,
+	   'in_pssm=s' => \$prev_pssm,
+	   # 'out_pssm=s' => \$next_pssm,
+	   'in_bounds=s' => \$prev_bound_in,
+	   'out_bounds=s' => \$next_bound_out,
+	   'num_iter|max_iter=i' => \$num_iter,
+           # 'no_msa' => \$no_msa,
+	   'dom|domain' => \$dom_flag,
+	   'align' => \$align_flag,
+    	   'query_seed|query_mask' => \$query_mask,
+	   'int_mask|int-mask|int_seed|int-seed=s' => \$int_mask,
+	   'end_mask|end-mask|end_seed|end-seed=s' => \$end_mask,
+	   'pgm=s' => \$srch_pgm,
+    	   'quiet' => \$quiet,
+    	   'q' => \$quiet,
+    	   'silent' => \$quiet,
+	   'h|?' => \$shelp,
+	   "help" => \$help,
+    	   "errors" => \$error_log,
+	   "save_list:s" => \$tmp_file_list,  # files to save (not delete)
+	   "save_tmp|save_all" => \$save_all,
+	   "del_bnd|delete_bnd" => \$delete_bnd,
+	   "del_tmp|del_all|delete_tmp|delete_all" => \$delete_tmp,
+	  );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+
+pod2usage(1) unless $query_file && -r $query_file;  # need a query
+pod2usage(1) unless $db_file ;        # need a database
+
+my @del_file_ext = qw(msa psibl_out hit_db asntxt asnbin);
+
+if ($srch_pgm =~ m/psiblast/) {
+  pop(@del_file_ext);
+}
+
+if ($query_mask) {
+  $int_mask='query' unless $int_mask ne 'none';
+  $end_mask='query' unless $end_mask ne 'none';
+}
+
+if ($delete_tmp) {
+  $delete_bnd = 1;
+}
+elsif ($save_all) {
+  @del_file_ext = ();
+  $tmp_file_list = "";
+  $delete_bnd = 0
+}
+
+################
+# which tmp files should be saved/deleted?
+#
+my %save_file_ext = ();
+if ($tmp_file_list) {
+  my @new_del_file_ext = ();
+  for my $ext (split(/,\s*/,$tmp_file_list)) {
+  # possible values are: msa, asnbin, asntxt, psibl_out, hit_db (see @del_file_ext)
+    $save_file_ext{$ext} = 1;
+  }
+
+  for my $ext (@del_file_ext) {
+    push @new_del_file_ext, $ext unless $save_file_ext{$ext};
+  }
+  @del_file_ext = @new_del_file_ext;
+}
+
+print "$pgm_command\n" unless ($quiet);
+
+my $this_iter = "it1";
+
+my ($query_pref) = ($query_file =~ m/([\w\.]+)$/);
+
+$file_out = $query_pref unless $file_out;
+
+my $this_file_pref = "$file_out.$this_iter";
+$this_file_pref = "$this_file_pref.$suffix" if ($suffix);
+my $this_file_out = $this_file_pref;
+$this_file_out = "$tmp_dir/$this_file_out" if ($tmp_dir);
+
+my $prev_file_out = $this_file_out;
+
+####
+# parse output to build PSSM
+# generate output filenames
+
+# do the first search
+my $search = $srch_subs{$srch_pgm}($query_file, $db_file, $prev_pssm);
+log_system("$search > $this_file_out 2> $this_file_out.err");
+
+my ($this_pssm, $this_bound_out) = build_msa_pssm($query_file, $this_file_out, $prev_bound_in);
+
+# now have necessary files for next iteration
+
+for (my $it=2; $it <= $num_iter; $it++) {
+
+  $prev_pssm = $this_pssm;
+  $prev_bound_in = $this_bound_out;
+  ####
+  # build filename for this iteration
+  $this_file_pref = $this_file_out = "$file_out.it$it";
+  $this_file_out = "$this_file_pref.$suffix" if ($suffix);
+  $this_file_out = "$tmp_dir/$this_file_out" if ($tmp_dir);
+
+  $search = $srch_subs{$srch_pgm}($query_file, $db_file, $prev_pssm);
+  log_system("$search > $this_file_out 2> $this_file_out.err");
+
+  # here, we are done with previous .msa, .asntxt, .asnbin, etc files.  Delete them if desired
+  if (@del_file_ext) {
+    my @del_file_list = ();
+    for my $ext (@del_file_ext) {
+      push @del_file_list, "$prev_file_out.$ext";
+    }
+    log_system("rm ".join(" ", at del_file_list));
+  }
+  $prev_file_out = $this_file_out;
+
+  ($this_pssm, $this_bound_out) = build_msa_pssm($query_file, $this_file_out, $prev_bound_in);
+
+  if (has_converged($prev_bound_in, $this_bound_out)) {
+    print STDERR "$0 $srch_pgm $query_file $db_file converged ($it iterations)\n" unless ($quiet);
+
+    if (@del_file_ext) {
+      my @del_file_list = ();
+      for my $ext (@del_file_ext) {
+	push @del_file_list, "$prev_file_out.$ext";
+      }
+      log_system("rm ".join(" ", at del_file_list));
+      log_system("rm $prev_bound_in $this_bound_out") if ($delete_bnd);
+    }
+    exit(0);
+  }
+  log_system("rm $prev_bound_in") if ($delete_bnd);
+}
+
+if (@del_file_ext) {
+  my @del_file_list = ();
+  for my $ext (@del_file_ext) {
+    push @del_file_list, "$prev_file_out.$ext";
+  }
+  log_system("rm ".join(" ", at del_file_list));
+}
+
+log_system("rm $this_bound_out") if ($delete_bnd);
+
+unless ($quiet) {
+  print STDERR "$0 $srch_pgm $query_file $db_file finished ($num_iter iterations)\n";
+}
+
+################
+# log_system()
+# run system on string, logging first if $error_log
+#
+sub log_system {
+
+  my ($cmd) = @_;
+
+  print STDERR "$cmd\n" if $error_log;
+  system($cmd);
+}
+
+################
+# sub get_ssearch_cmd()
+# builds an ssearch command line with query, db, and pssm
+#
+sub get_ssearch_cmd {
+  my ($query_file, $db_file, $pssm_file) = @_;
+
+  my $search_cmd = qq($ssearch_bin -S -m 8CB -d 0 -E "1.0 0" -s BP62);
+  if ($annot_type) {
+    $search_cmd .= qq( -V $annot_cmds{$annot_type});
+  }
+  if ($pssm_file) {
+    $search_cmd .= qq( -P "$pssm_file 2");
+  }
+
+  $search_cmd .= qq( $query_file $db_file);
+
+  return $search_cmd;
+}
+
+################
+# sub get_psiblast_cmd()
+# builds an ssearch command line with query, db, and pssm
+#
+sub get_psiblast_cmd {
+  my ($query_file, $db_file, $pssm_file) = @_;
+
+  my $search_cmd = qq($psiblast_bin -num_threads 4 -max_target_seqs 5000 -outfmt '7 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore score btop' -inclusion_ethresh $evalue -num_iterations 1 -db $db_file);
+  if ($pssm_file) {
+    $search_cmd .= qq( -in_pssm $pssm_file);
+#    $search_cmd .= qq( -comp_based_stats 0);
+  }
+  else {
+    $search_cmd .= qq( -query $query_file);
+  }
+
+  return $search_cmd;
+}
+
+################
+# sub build_msa_pssm()
+#
+# given query, search output file ($this_file_out), prev_boundary_file
+# uses m89_btop_msa2.pl to generate PSSM in .asntxt or .asnbin format, also bound_file_out if $align_flag
+# (later - optionally deletes intermediate files)
+#
+# always produce a $bound_file_out file to test for convergence
+#
+sub build_msa_pssm {
+  my ($query_file, $this_file_out,$prev_bound_in) = @_;
+
+  my ($this_msa, $this_hit_db, $this_pssm_asntxt, $this_pssm_asnbin, $this_psibl_out, $this_bound_out) =
+    ("$this_file_out.msa",
+     "$this_file_out.hit_db",
+     "$this_file_out.asntxt",
+     "$this_file_out.asnbin",
+     "$this_file_out.psibl_out",
+     "$this_file_out.bnd_out",
+    );
+
+  my $blastdb_err = "$this_file_out.mkbldb_err";
+  my $aln2msa_cmd = qq($align2msa_lib --query $query_file --evalue $evalue --masked_lib_out=$this_hit_db);
+
+  if ($int_mask) {
+    $aln2msa_cmd .= qq( --int_mask_type $int_mask);
+  }
+
+  if ($end_mask) {
+    $aln2msa_cmd .= qq( --end_mask_type $end_mask);
+  }
+
+  if ($dom_flag) {
+    $aln2msa_cmd .= qq( --domain);
+  }
+
+  if ($align_flag && $prev_bound_in) {
+    $aln2msa_cmd .= qq( --bound_file_in $prev_bound_in);
+  }
+
+  # always produce this file to check for convergence
+  $aln2msa_cmd .= qq( --bound_file_out $this_bound_out);
+
+  log_system("$aln2msa_cmd $this_file_out > $this_msa");
+
+  my $makeblastdb_cmd = "$makeblastdb_bin -in $this_hit_db -dbtype prot -parse_seqids > $blastdb_err";
+
+  log_system($makeblastdb_cmd);
+
+  my $buildpssm_cmd = "$psiblast_bin -max_target_seqs 5000 -outfmt 7 -inclusion_ethresh 100.0 -in_msa $this_msa -db $this_hit_db -out_pssm $this_pssm_asntxt -num_iterations 1 -save_pssm_after_last_round";
+
+  log_system("$buildpssm_cmd  > $this_psibl_out 2> $this_psibl_out.err");
+
+  log_system("rm $this_hit_db.p* $blastdb_err");
+
+  # remove uninformative error logs
+  log_system("rm $this_psibl_out.err $this_file_out.err") unless $error_log;
+
+  unless ($srch_pgm eq 'psiblast') {
+    my $asn2asn_cmd = "$datatool_bin -v $this_pssm_asntxt -e $this_pssm_asnbin";
+    log_system($asn2asn_cmd);
+    return ($this_pssm_asnbin, $this_bound_out);
+  }
+  else {
+    return ($this_pssm_asntxt, $this_bound_out);
+  }
+}
+
+################
+# sub has_converged()
+# reads two boundary files and compares accessions
+#
+sub has_converged {
+  my ($file1, $file2) = @_;
+
+  my @f1_names = ();
+  my @f2_names = ();
+
+  open (my $fd1, '<', $file1) || die "cannot read $file1";
+  while (my $line = <$fd1>) {
+    chomp($line);
+    my @fields = split(/\t/,$line);
+    push @f1_names, $fields[0];
+  }
+  close $fd1;
+
+  open (my $fd2, '<', $file2) || die "cannot read $file2";
+  while (my $line = <$fd2>) {
+    chomp($line);
+    my @fields = split(/\t/,$line);
+    push @f2_names, $fields[0];
+  }
+  close $fd2;
+
+  # check for same length
+  return 0  if (scalar(@f1_names) != scalar(@f2_names));
+
+  @f1_names = sort @f1_names;
+  @f2_names = sort @f2_names;
+
+  for (my $i=0; $i < scalar(@f1_names); $i++) {
+    return 0 if ($f1_names[$i] ne $f2_names[$i]);
+  }
+  return 1;
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+psisearch2_msa.pl
+
+=head1 SYNOPSIS
+
+ psisearch2_msa.pl --query q_file --db db_file --pgm ssearch|psiblast --num_iter 5
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+ --query query file  (also --sequence)
+ --db    database file (--database)
+ --pgm   program used for searching, ssearch or psiblast
+ --num_iter maximum number of iterations (--max_iter)
+ --dir   working directory and location of output
+ --evalue  threshold for inclusion in PSSM
+ --out_name/--suffix  result file is "out_name.it#.suffix"
+ --in_msa/--out_msa   [not implemented] MSA used to build PSSM, requires --in_hitdb
+ --in_hitdb/--out_hitdb [not implemented] used to build PSSM
+ --in_pssm/--out_pssm [--out_pssm not implemented]
+ --in_bounds/--out_bounds used to control alignment boundaries for PSSM
+ --int_mask/--end_mask  none|query|random -  values embeeded in library sequences based on gaps in MSA
+ --delete_all remove all tmp files (also --delete_tmp, --del_all, --del_tmp)
+ --delete_bnd remove boundary file (included with --delete_all, but not deleted by default)
+ --save_all save temporary files (.asnbin, .asntxt, .msa, .psiblout, .hit_db)
+ --save_list: comma delimited list of file extensions (above) to save
+
+=head1 DESCRIPTION
+
+C<psisearch2_msa.pl> automates successive iterations of C<psiblast> or
+C<ssearch36> using different strategies to reduce PSSM contamination
+from alignment over-extension.  C<psisearch2_msa.pl> uses the
+C<m89_btop_msa2.pl> program to read BTOP formatted output from
+C<psiblast> or C<ssearch36> and produce both a multiple sequence
+alignment (MSA) and a fasta formatted custom database of the sequences in the
+MSA.  C<psiblast> then produces a PSSM From the MSA and custom database.
+
+Different strategies to reduce PSSM contamination from alignment
+overextension can be specified using the C<--int_mask>, C<--end_mask>,
+C<--align>, and C<--domain> options.  If C<--int_mask> and
+C<--end_mask> are not specified (or set to "none"), then the PSSM is
+generated by aligning the MSA with the sequence residues that were
+aligned in the previous search. C<--int_mask> and C<--int_mask> are
+set to "query", then any gaps in the MSA are filled with the
+corresponding aligned residue from the query sequence.  In our
+experience, this dramatically reduces alignment over-extension and
+false-positives.
+
+=head1 AUTHORS
+
+William R. Pearson (wrp at virginia.edu) and Weizhong Li (wli at ebi.ac.uk)
+
+=cut
diff --git a/psisearch2/psisearch2_msa.py b/psisearch2/psisearch2_msa.py
new file mode 100755
index 0000000..1d2b71f
--- /dev/null
+++ b/psisearch2/psisearch2_msa.py
@@ -0,0 +1,368 @@
+#!/usr/bin/python
+
+################################################################
+# copyright (c) 2016 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License.
+################################################################
+
+import sys
+import os
+import argparse
+import subprocess
+import re
+
+# python re-write of psisearch2_msa.pl
+#
+# logic:
+# (1) do initial search
+# (2) use results of initial search to produce MSA/PSSM for next search
+# (3) do PSSM search
+# (4) use results of PSSM search to produce MSA/PSSM for iterative step 3
+#
+################
+#
+# command:
+# psisearch2_msa.py --query query_file --db database --num_iter N --evalue 0.002 --no_msa --int_mask none/query/random --end_mask none/query/random --tmp_dir results/ --domain --align --suffix --pgm ssearch/psiblast
+#
+################
+
+################
+# locations of required programs:
+# (1) m89_btop_msa2.pl
+# (2) ssearch
+# (3) NCBI blast+ programs: psiblast/makeblastdb
+# (4) NCBI datatool (required only for ssearch36 PSSMs)
+
+pgm_bin = "/seqprg/bin"
+pgm_data = "/seqprg/data"
+ssearch_bin = pgm_bin+"/ssearch36"
+psiblast_bin = pgm_bin+"/psiblast"
+makeblastdb_bin = pgm_bin+"/makeblastdb"
+datatool_bin = "%s/datatool -m %s/NCBI_all.asn" % (pgm_bin,pgm_data)
+align2msa_lib = "m89_btop_msa2.pl"
+
+annot_cmds = {'rpd3': '"!../scripts/ann_pfam28.pl --pfacc --db RPD3 --vdoms --split_over"',
+              'rpd3nv':'"!../scripts/ann_pfam28.pl --pfacc --db RPD3 --split_over"',
+              'pfam':'"!../scripts/ann_pfam30.pl --pfacc --vdoms --split_over"'}
+
+num_iter = 5
+evalue = 0.002
+dom_flag = 0
+align_flag = 0
+int_mask = 'none'
+end_mask = 'none'
+srch_pgm = 'ssearch'
+tmp_dir = ''
+error_log = 0
+rm_flag = 0
+annot_type = ''
+quiet = 0
+
+################
+# log_system()
+# run system on string, logging first if error_log
+#
+def log_system (cmd, error_log):
+
+  if (error_log) :
+      sys.stderr.write(cmd+"\n")
+
+  subprocess.call(cmd, shell=True)
+#  print cmd
+
+################
+# sub get_ssearch_cmd()
+# builds an ssearch command line with query, db, and pssm
+#
+def get_ssearch_cmd(query_file, db_file, pssm_file) :
+
+  search_cmd = '%s -S -m 8CB -d 0 -E "1.0 0" -s BP62' % (ssearch_bin)
+
+  if (annot_type) :
+      search_cmd += " -V %s" % (annot_cmds[annot_type])
+
+  if (pssm_file) :
+      search_cmd += ' -P "%s 2"' % (pssm_file)
+
+  search_cmd += " %s %s" % (query_file, db_file)
+
+  return search_cmd
+
+
+################
+# sub get_psiblast_cmd()
+# builds an ssearch command line with query, db, and pssm
+#
+def get_psiblast_cmd(query_file, db_file, pssm_file) :
+
+  search_cmd = "%s -num_threads 4 -max_target_seqs 5000 -outfmt '7 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore score btop' -inclusion_ethresh %f -num_iterations 1 -db %s" % (psiblast_bin, evalue, db_file)
+
+  if (pssm_file) :
+      search_cmd += " -in_pssm %s" % (pssm_file)
+  else :
+      search_cmd += " -query %s" % (query_file)
+
+  return search_cmd
+
+
+################
+# sub build_msa_pssm()
+#
+# given query, search output file (this_file_out), prev_boundary_file
+# uses m89_btop_msa2.pl to generate PSSM in .asntxt or .asnbin format, also bound_file_out if align_flag
+# (later - optionally deletes intermediate files)
+#
+# always produce a bound_file_out file to test for convergence
+#
+def build_msa_pssm(query_file, this_file_out,prev_bound_in, error_log) :
+
+  (this_msa, this_hit_db, this_pssm_asntxt, this_pssm_asnbin, this_psibl_out, this_bound_out) =  (this_file_out+".msa",this_file_out+".hit_db",this_file_out+".asntxt",this_file_out+".asnbin",this_file_out+".psibl_out",this_file_out+".bnd_out")
+
+  blastdb_err = this_file_out+".mkbldb_err"
+  aln2msa_cmd = "%s --query %s --evalue %f --masked_lib_out=%s" % (align2msa_lib, query_file, evalue, this_hit_db)
+
+  if (int_mask) :
+      aln2msa_cmd += " --int_mask_type %s" % (int_mask)
+
+  if (end_mask) :
+      aln2msa_cmd += " --end_mask_type %s" % (end_mask)
+
+  if (dom_flag) :
+    aln2msa_cmd += " --domain"
+
+  if (align_flag and prev_bound_in) :
+      aln2msa_cmd += " --bound_file_in %s" %(prev_bound_in)
+
+  # always produce this file to check for convergence
+  aln2msa_cmd += " --bound_file_out %s" % (this_bound_out)
+
+  log_system("%s %s > %s"%(aln2msa_cmd, this_file_out, this_msa), error_log)
+
+  makeblastdb_cmd = "%s -in %s -dbtype prot -parse_seqids > %s" % (makeblastdb_bin, this_hit_db, blastdb_err)
+
+  log_system(makeblastdb_cmd, error_log)
+
+  buildpssm_cmd = "%s -max_target_seqs 5000 -outfmt 7 -inclusion_ethresh 100.0 -in_msa %s -db %s -out_pssm %s -num_iterations 1 -save_pssm_after_last_round" % (psiblast_bin, this_msa, this_hit_db, this_pssm_asntxt)
+
+  log_system("%s > %s 2> %s.err" % (buildpssm_cmd, this_psibl_out, this_psibl_out), error_log)
+
+  log_system("rm %s.p* %s" % (this_hit_db,blastdb_err), error_log)
+
+  # remove uninformative error logs
+  if (not error_log) :
+    log_system("rm "+this_psibl_out+".err "+this_file_out+".err",error_log)
+
+  if (srch_pgm != 'psiblast') :
+      asn2asn_cmd = "%s -v %s -e %s" % (datatool_bin, this_pssm_asntxt, this_pssm_asnbin)
+      log_system(asn2asn_cmd, error_log)
+      return (this_pssm_asnbin, this_bound_out)
+  else :
+      return (this_pssm_asntxt, this_bound_out)
+
+################
+# sub has_converged()
+# reads two boundary files and compares accessions
+#
+def has_converged(file1, file2) :
+
+  f1_names = []
+  f2_names = []
+
+  with open(file1) as fd:
+      for line in fd:
+          line = line.rstrip('\n')
+          fields = line.split('\t')
+          f1_names.append(fields[0])
+
+  with open(file2) as fd:
+      for line in fd:
+          line = line.rstrip('\n')
+          fields = line.split('\t')
+          f2_names.append(fields[0])
+
+  # check for same length
+  if (len(f1_names) != len(f2_names)) :
+      return 0
+
+  f1_names.sort()
+  f2_names.sort()
+
+  for i,v in enumerate(f1_names) :
+    if (f2_names[i] != v) :
+        return 0
+
+  return 1
+
+# main()
+
+srch_subs = {'ssearch' : get_ssearch_cmd,
+	     'psiblast': get_psiblast_cmd}
+
+pgm_command =  "# "+" ".join(sys.argv);
+if (error_log) :
+    sys.stderr.write('pgm_command\n')
+
+arg_parse = argparse.ArgumentParser(description='Iterative search with SSEARCH/PSIBLAST')
+arg_parse.add_argument('--query', dest='query_file', action='store',help='query sequence file')
+arg_parse.add_argument('--sequence', dest='query_file', action='store',help='query sequence file')
+arg_parse.add_argument('--db', dest='db_file', action='store',help='sequence database name')
+arg_parse.add_argument('--database', dest='db_file', action='store',help='sequence database name')
+arg_parse.add_argument('--dir', dest='tmp_dir', action='store',help='directory for result and tmp_file output')
+arg_parse.add_argument('--evalue', dest='evalue', default=0.002, type=float, action='store',help='E()-value threshold for inclusion in PSSM')
+arg_parse.add_argument('--annot_db', dest='annot_type', action='store',help='source of domain annotations')
+arg_parse.add_argument('--suffix', dest='suffix', action='store',help='suffix for result output')
+arg_parse.add_argument('--out_name', dest='file_out', action='store',help='result file name')
+arg_parse.add_argument('--iter', dest='num_iter', default=5, type=int, action='store',help='number of iterations')
+arg_parse.add_argument('--in_pssm', dest='prev_pssm', action='store',help='initial PSSM')
+arg_parse.add_argument('--in_bounds', dest='prev_bound_in', type=str, action='store',help='initial boundaries')
+arg_parse.add_argument('--domain', dest='dom_flag', action='store_true',help='use domain annotations')
+arg_parse.add_argument('--align', dest='align_flag', action='store_true',help='use alignment boundaries')
+arg_parse.add_argument('--pgm', dest='srch_pgm', action='store',default='ssearch',help='search program: ssearch/psiblast')
+arg_parse.add_argument('--query_seed', dest='query_mask', action='store_true',help='use query seeding')
+arg_parse.add_argument('--int_seed', dest='int_mask', action='store',default='none',help='sequence masking: none/query/random')
+arg_parse.add_argument('--end_seed', dest='end_mask', action='store',default='none',help='sequence masking: none/query/random')
+arg_parse.add_argument('--int_mask', dest='int_mask', action='store',default='none',help='sequence masking: none/query/random')
+arg_parse.add_argument('--end_mask', dest='end_mask', action='store',default='none',help='sequence masking: none/query/random')
+arg_parse.add_argument('--save_list', dest='tmp_file_list', action='store',help='temporary extensions saved')
+arg_parse.add_argument('--save_all', dest='save_all', action='store_true',help='save all temporary files')
+arg_parse.add_argument('--delete_all', dest='delete_tmp', action='store_true',help='delete all temporary files')
+arg_parse.add_argument('--delete_bnd', dest='delete_bnd', action='store_true',help='delete boundary temporary file')
+arg_parse.add_argument('--quiet', dest='quiet', action='store_true',help='fewer messages')
+arg_parse.add_argument('-Q', dest='quiet', action='store_true',help='fewer messages')
+
+args = arg_parse.parse_args()
+if (args.quiet) :
+    quiet = args.quiet
+
+if (args.srch_pgm) :
+    srch_pgm = args.srch_pgm
+
+if (not quiet) :
+    print pgm_command
+
+del_file_ext = ["msa","psibl_out","hit_db","asntxt","asnbin"]
+
+if (re.match('psiblast',srch_pgm)) :
+    del_file_ext.pop()
+
+
+if (args.query_mask) :
+    if (args.int_mask == 'none') :
+        args.int_mask = 'query'
+    if (args.end_mask == 'none') :
+        args.end_mask = 'query'
+
+delete_bnd = args.delete_bnd
+if (args.delete_tmp) :
+    delete_bnd = 1
+elif (args.save_all) :
+    delete_file_ext = ()
+    args.tmp_file_list = ''
+    delete_bnd = 0
+
+save_file_ext = {}
+if (args.tmp_file_list) :
+    new_del_file_ext = []
+    for ext in re.split(",\s*",args.tmp_file_list) :
+        save_file_ext[ext] = 1
+    for ext in del_file_ext :
+        if (not (ext in save_file_ext)):
+            new_del_file_ext.append(ext)
+    del_file_ext = new_del_file_ext[:]
+
+this_iter = "it1"
+
+query_pref = query_file = args.query_file
+m = re.search(r'([\w\.]+)$',str(args.query_file))
+query_pref = m.groups()[0]
+
+if (not args.file_out) :
+    file_out = query_pref
+else :
+    file_out = args.file_out
+
+this_file_out = this_file_pref = file_out+"."+this_iter
+if (args.suffix) :
+    this_file_out = this_file_pref+"."+args.suffix
+if (args.tmp_dir) :
+    this_file_out = args.tmp_dir+"/"+this_file_out
+
+####
+# parse output to build PSSM
+# generate output filenames
+
+prev_file_out = this_file_out
+
+# do the first search
+search_str = srch_subs[srch_pgm](args.query_file, args.db_file, args.prev_pssm)
+log_system(search_str+" > "+this_file_out+" 2> "+this_file_out+".err", error_log)
+
+prev_file_out = this_file_out
+
+(this_pssm, this_bound_out) = build_msa_pssm(args.query_file, this_file_out, args.prev_bound_in, error_log)
+# now have necessary files for next iteration
+
+it=2
+while (it <= args.num_iter) :
+
+  prev_pssm = this_pssm
+  prev_bound_in = this_bound_out
+  ####
+  # build filename for this iteration
+  this_file_out = this_file_pref = "%s.it%d" % (file_out,it)
+  if (args.suffix) :
+      this_file_out = this_file_pref+"."+args.suffix
+  if (args.tmp_dir) :
+      this_file_out = args.tmp_dir+"/"+this_file_out
+
+  search_str = srch_subs[srch_pgm](args.query_file, args.db_file, prev_pssm)
+  log_system("%s > %s 2> %s" % (search_str,this_file_out,this_file_out+".err"), error_log)
+
+  if (len(del_file_ext)):
+      del_file_list = [ prev_file_out+'.'+ext for ext in del_file_ext]
+      log_system('rm '+' '.join(del_file_list),error_log)
+
+  prev_file_out = this_file_out
+
+  (this_pssm, this_bound_out) = build_msa_pssm(query_file, this_file_out, prev_bound_in, error_log)
+
+  if (has_converged(prev_bound_in, this_bound_out)) :
+      if (not quiet) :
+          sys.stderr.write(" %s %s %s %s converged (%d iterations)\n" % (sys.argv[0], srch_pgm, query_file, args.db_file, it))
+
+      if (len(del_file_ext)):
+          del_file_list = [ prev_file_out+'.'+ext for ext in del_file_ext]
+          log_system('rm '+' '.join(del_file_list),error_log)
+
+      if (delete_bnd) :
+          log_system("rm "+prev_bound_in,error_log)
+
+      exit(0)
+
+  if (delete_bnd) :
+      log_system("rm "+prev_bound_in,error_log)
+
+  it += 1
+
+if (len(del_file_ext)):
+    del_file_list = [ prev_file_out+'.'+ext for ext in del_file_ext]
+    log_system('rm '+' '.join(del_file_list),error_log)
+
+if (delete_bnd):
+    log_system("rm "+this_bound_out,error_log)
+
+if (not quiet) :
+    sys.stderr.write(" %s %s %s %s finished (%d iterations)\n" % (sys.argv[0], srch_pgm, query_file, args.db_file, it-1))
+
diff --git a/scripts/README b/scripts/README
new file mode 100644
index 0000000..5a4e5cf
--- /dev/null
+++ b/scripts/README
@@ -0,0 +1,108 @@
+
+22-Jan-2014
+13-Apr-2016 updated
+
+fasta36/scripts
+
+Perl scripts for annotating sequences and expanding libraries
+
+-- Sequence alignment scoring/annotation
+
+Two program scripts -- annot_blast_btop2.pl and blastp_cmd.sh -- have
+been added to support sub-alignment scoring of BLASTP alignments.
+
+annot_blast_btop2.pl takes three inputs: (1) a query sequence file; (2)
+a domain annotation script (see below), and (3) a BLAST tabular format
+output with two additional fields, "score" and "btop":
+
+annot_blast_btop2.pl --query query.file --ann_script ann_pfam_www.pl blast_tab_btop_file
+
+The blast_tab_btop_file can be produced using the blastp_cmd.sh shell
+script, which uses ASN.1 output and blast_formatter to produce both a
+standard alignment file and the modified blast tabular btop file.
+
+-- Implied Multiple sequence alignment --
+
+As part of a strategy to improve PSSM-based similarity searching, two
+scripts that use a BTOP encoded alignment string from either -m 8CB or
+-m 9B output files to produce a Clustal-like multiple sequence
+alignment (MSA) that can be used as input to psiblast to produce an
+ASN.1 text file (which can be converted with datatool to ASN.1 binary,
+which can be read by ssearch36 -P "file.asn1 2").  We used the BTOP
+encoding, rather than the more common CIGAR string (-m 9C), or the
+older alignment encoding (-m 9c), because the BTOP encoding only
+requires the query sequence to reproduce both the query and subject
+aligned residues.  Thus:
+
+m8_btop_msa.pl --query gstt1_drome.aa gstt1_sp.bl_btop > gstt1_sp.ss_msa
+
+where "gstt1_sp.bl_btop" is "-m 8CB" output, produces:
+  ====
+  SSEARCHm8 multiple sequence alignment
+
+
+  sp|P20432|GSTT1_DROME     MVDFYYLPGSSPCRSVIMTAKAVGVELNKKLLNLQAGEHLKPEFLKINPQHTIPTLVDNG
+  sp|P04907|GSTF3_MAIZE     ---LYGMPLSPNVVRVATVLNEKGLDFEIVPVDLTTGAHKQPDFLALNPFGQIPALVDGD
+  sp|P12653|GSTF1_MAIZE     -------------------------------INFATAEHKSPEHLVRNPFGQVPALQDGD
+  sp|P0ACA5|SSPA_ECO57      ------------------------------------------DLIDLNPNQSVPTLVDRE
+  sp|P00502|GSTA1_RAT       VLHYFNARGRMECIRWLLA--AAGVEFDEKFI--QSPEDL--EKLKKDGNDQVPMVEIDG
+
+
+  ...
+  ====
+
+which can be used with psiblast -in_msa gstt1_sp.ss_msa.
+m9B_btop_msa.pl does the same for "-m 9B" output.
+
+-- Domain annotation --
+
+(Nov. 2015) These domain annotation scripts allow overlapping domains,
+and must be used with versions of the FASTA programs that support the
+current "start - stop domain_description" format (in contrast to the
+older format which put domain starts and stops on separate lines with
+'[' and ']'). Until this release, the "overlapping" domain scripts had
+'_e' in their name, e.g. ann_pfam28_e.pl.  The "_e" scripts have been
+renamed, losing the '_e', and the old non-'_e' scripts have been
+removed from the distribuition.
+
+All of the "ann_*.pl" scripts are used to annotate query or library
+sequences using the -V option.  See ../test/test2V.pl for examples.
+
+
+ann_feats2ipr.pl -- generate Uniprot sites, Interpro domains, from a mySQL database
+ann_feats2l.pl -- generate Uniprot sites, domains from a mySQL database
+
+ann_feats_up_www2.pl -- generate Uniprot sites, domains from an EBI
+		     	web server that converts Uniprot DAS to gff3.
+
+ann_feats_up_www.pl -- generate Uniprot sites, domains from a Uniprot
+		       gff web server (less information than ann_feats_www2.pl)
+
+ann_ipr_www.pl -- Interpro domains from Interpro WWW site.
+
+ann_pdb_cath.pl -- generate CATH domains using PDB accessions from a mySQL database
+ann_pdb_vast.pl -- use VAST domains, but domain names are not informative
+
+ann_pfam27.pl -- generate Pfam domains using local Pfam mySQL database (Pfam27 with auto_pfamA, auto_pfamseq)
+ann_pfam28.pl -- generate Pfam domains using local Pfam mySQL database (Pfam28, no auto_pfamA, auto_pfamseq)
+ann_pfam_www.pl -- use Pfam Website, and XML::Twig, to get Pfam domain info.
+
+ann_exons_ens.pl -- generate exon boundaries on SwissProt proteins from Ensembl.
+ann_exons_up_www.pl -- generate exon boundaries on SwissProt proteins using the EBI/Proteins/API/coordinate service
+ann_exons_ncbi.pl -- generate exon boundaries on NCBI refseq proteins.
+
+-- Library expansion
+
+expand_uniref50.pl -- allows search of uniref50 to be expanded
+expand_links.pl -- script to take hits from a smaller library and expand to complete library
+links2sql.pl -- create links for expand_links.pl
+
+exp_up_ensg.pl -- expand uniprot sequences to include Ensembl splice variants
+
+-- Plot local alignments (.lav files)
+
+lav2plt.pl    -- used to produce postscript or svg plots of "lalign36 -m 11" lav output files
+color_defs.pl -- used by lav2plt.pl to produce domain colors
+lavplt_ps.pl  -- used by lav2plt.pl --dev ps
+lavplt_svg.pl -- used by lav2plt.pl --dev svg
+
diff --git a/scripts/README.scripts b/scripts/README.scripts
new file mode 100644
index 0000000..cb479d8
--- /dev/null
+++ b/scripts/README.scripts
@@ -0,0 +1,84 @@
+
+  $Id: README.scripts 1258 2014-05-02 13:27:07Z wrp $
+  $Revision: 1258 $
+
+May 13, 2013
+
+This directory contains a variety of scripts that work with the two
+scripting options for the FASTA programs:
+
+-e expand_script.pl
+
+and
+
+-V \!annotate_script.pl
+
+All these scripts use a mysql database that provides a mapping between
+the sequence identifiers provided by the sequence library and the
+additional sequences (-e) or annotations (-V) that are generated.
+Thus, the scripts will NOT work as written because they reference
+mysql databases that are only available inside the University of
+Virginia.
+
+Scripts for sequence library expansion:
+
+expand_uniref50.pl -- produces new sequences from the
+   Uniref50 mapping of Uniref50 to Uniprot.
+
+expand_links.pl -- produce new sequences from a custom-built database
+  of protein links
+links2sql.pl -- build the file of protein accessions to linked accessions
+
+exp_up_ensg.pl -- (human sequences only) use the ENSEMBL to Uniprot
+  mapping to extract alternative splice isoforms.
+
+The expansion scripts expect a file name for a file that contains:
+
+sp|P09488|GSTM1<tab>1e-100
+...
+
+The file is then opened, read, and the accessions extracted and used
+to find the linked sequences.
+
+================
+
+Scripts for library annotation:
+
+The annotation scripts are very similar to the expansion scripts, but
+have the option of either (1) taking the name of a file sequence
+annotations, e.g.
+
+  annot_script.pl annot_file
+
+or (2) taking an argument that is a single sequence identifier:
+
+  annot_script.pl 'sp|P09488|GSTM1_HUMAN'  ('|' must be escaped for  many shells)
+
+Three annotation scripts are available:
+
+  ann_feats2l.pl - get features and domains from local Uniprot database
+  ann_feats2ipr.pl -- get features from Uniprot and domains from a local Uniprot/Interpro database
+
+  ann_pfam.pl -- get domains (only) from local copy of Pfam SQL database
+  ann_pfam_www.pl -- get domains (only) from Pfam web services
+
+  ann_feats_up_www.pl -- get features/domains from Uniprot gff3 server (http://www.uniprot.org/uniprot/P0948.gff)
+
+The Uniprot gff service does not provide information on the actual sequence changes associated with mutants and variants:
+P09488	UniProtKB	Natural variant	210	210	.	.	.	ID=VAR_014497;Dbxref=dbSNP:rs449856	
+P09488	UniProtKB	Mutagenesis	7	7	.	.	.	Note=Reduces catalytic activity 100-fold.	
+However, the Uniprot XML service does provide this information, so a second resource:
+
+  ann_feats_up_www2.pl -- get features/domains from EMBL/EBI XSLT conversion of Uniprot XML:
+
+ http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/uniprotkb/P09488/gff2
+
+Provides substitution information, as well as links to references:
+
+P09488	UniProtKB	natural_variant_site	210	210	.	.	.	Note "S -> T" ; Note "UniProtKB FT ID: VAR_014497" ; Note "dbSNP:rs449856" ; Link "http://www.ensembl.org/Homo_sapiens/Variation/Explore?v=rs449856" ; Link "http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs=449856"
+P09488	UniProtKB	mutated_variant_site	7	7	.	.	.	Note "Y -> F" ; Note "Reduces catalytic activity 100- fold" ; Link "http://www.ncbi.nlm.nih.gov/pubmed/16548513"
+
+All the annotation scripts offer  -h and --help options.
+
+================
+
diff --git a/scripts/acc_examples b/scripts/acc_examples
new file mode 100644
index 0000000..b790db8
--- /dev/null
+++ b/scripts/acc_examples
@@ -0,0 +1,5 @@
+P09488
+sp|P09488
+up|P09488|GSTM1_HUMAN
+SP:GSTM1_HUMAN P09488
+SP:GSTM1_HUMAN
diff --git a/scripts/ann_exons_ens.pl b/scripts/ann_exons_ens.pl
new file mode 100755
index 0000000..a8666c1
--- /dev/null
+++ b/scripts/ann_exons_ens.pl
@@ -0,0 +1,287 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014,2015 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# ann_exons_ens.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# gi|23065544|ref|NP_000552.2| 
+#
+# and returns the exons present in the protein from NCBI gff3 tables (human and mouse only)
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the acc
+# (3) return the tab delimited exon boundaries
+
+
+use strict;
+
+use DBI;
+use Getopt::Long;
+use Pod::Usage;
+
+use vars qw($host $db $port $user $pass);
+
+my $hostname = `/bin/hostname`;
+
+($host, $db, $port, $user, $pass)  = ("wrpxdb.its.virginia.edu", "uniprot", 0, "web_user", "fasta_www");
+
+my ($auto_reg,$rpd2_fams, $neg_doms, $lav, $no_doms, $pf_acc, $shelp, $help) = (0, 0, 0, 0,0, 0,0,0);
+my ($min_nodom) = (10);
+
+my $color_sep_str = " :";
+$color_sep_str = '~';
+
+GetOptions(
+    "host=s" => \$host,
+    "db=s" => \$db,
+    "user=s" => \$user,
+    "password=s" => \$pass,
+    "port=i" => \$port,
+    "lav" => \$lav,
+    "min_nodom=i" => \$min_nodom,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (@ARGV || -p STDIN || -f STDIN);
+
+my $connect = "dbi:mysql(AutoCommit=>1,RaiseError=>1):database=$db";
+$connect .= ";host=$host" if $host;
+$connect .= ";port=$port" if $port;
+
+my $dbh = DBI->connect($connect,
+		       $user,
+		       $pass
+		      ) or die $DBI::errstr;
+
+my %annot_types = ();
+my %domains = (NODOM=>0);
+my $domain_cnt = 0;
+
+my $get_annot_sub = \&get_ensembl_exons;
+
+my $get_exons_acc = $dbh->prepare(<<EOSQL);
+
+SELECT ex_num, ex_p_start as seq_start, ex_p_end as seq_end
+FROM ens_exons
+JOIN ens2up USING(ensp)
+WHERE  acc=?
+ORDER BY seq_start
+
+EOSQL
+
+my $get_exons_id = $dbh->prepare(<<EOSQL);
+
+SELECT ex_num, ex_p_start as seq_start, ex_p_end as seq_end
+FROM ens_exons
+JOIN ens2up USING(ensp)
+JOIN annot2 using(acc)
+WHERE  id=?
+ORDER BY seq_start
+
+EOSQL
+
+my $get_annots_sql = $get_exons_acc;
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+# get the query
+my ($query, $seq_len) = @ARGV;
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+
+#if it's a file I can open, read and parse it
+unless ($query && ( $query =~ m/[\|:]/ 
+		    || $query =~ m/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}\s/
+		    || $query =~ m/^(XN)(MP)_\d+/)) {
+
+  while (my $a_line = <>) {
+    $a_line =~ s/^>//;
+    chomp $a_line;
+    push @annots, show_annots($a_line, $get_annot_sub);
+  }
+}
+else {
+  push @annots, show_annots("$query\t$seq_len", $get_annot_sub);
+}
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    if (!$lav && defined($domains{$annot->[-1]})) {
+      $annot->[-1] .= $color_sep_str.$domains{$annot->[-1]};
+    }
+    print join("\t",@$annot),"\n";
+  }
+}
+
+exit(0);
+
+sub show_annots {
+  my ($query_len, $get_annot_sub) = @_;
+
+  my ($annot_line, $seq_len) = split(/\t/,$query_len);
+
+  my $pfamA_acc;
+
+  my %annot_data = (seq_info=>$annot_line);
+
+  $use_acc = 1;
+  $get_annots_sql = $get_exons_acc;
+
+  if ($annot_line =~ m/^gi\|/) {
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^(sp|tr|up)\|/) {
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^(SP|TR):(\w+) (\w+)/) {
+    ($sdb, $id, $acc) = (lc($1), $2, $3);
+  }
+  elsif ($annot_line =~ m/^(SP|TR):(\w+)/) {
+    ($sdb, $id, $acc) = (lc($1), $2, "");
+    $use_acc=0;
+  }
+  elsif ($annot_line =~ m/\|/) {
+    ($sdb, $acc, $db) = split(/\|/,$annot_line);
+  }
+  else {
+    ($acc) = ($annot_line =~ m/^(\S+)/);
+  }
+
+  $acc =~ s/\.\d+$//;
+  if ($use_acc) {
+    $get_annots_sql->execute($acc);
+  }
+  else {
+    $get_annots_sql = $get_exons_id;
+    $get_annots_sql->execute($id);
+  }
+
+  $annot_data{list} = $get_annot_sub->($get_annots_sql, $seq_len);
+
+  return \%annot_data;
+}
+
+sub get_ensembl_exons {
+  my ($get_annots, $seq_length) = @_;
+
+  my @exons = ();
+
+  # get the list of domains, sorted by start
+  while ( my $row_href = $get_annots->fetchrow_hashref()) {
+
+    $row_href->{info} = "exon_".$row_href->{ex_num};
+    push @exons, $row_href
+  }
+
+  # check for domain overlap, and resolve check for domain overlap
+  # (possibly more than 2 domains), choosing the domain with the best
+  # evalue
+
+  my @feats = ();
+
+  for my $d_ref (@exons) {
+    if ($lav) {
+      push @feats, [$d_ref->{seq_start}, $d_ref->{seq_end}, $d_ref->{info}];
+    }
+    else {
+      push @feats, [$d_ref->{seq_start}, '-', $d_ref->{seq_end},  $d_ref->{info} ];
+#      push @feats, [$d_ref->{seq_end}, ']', '-', ""];
+    }
+
+  }
+
+  return \@feats;
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+sub domain_name {
+
+  my ($value) = @_;
+
+  if (!defined($domains{$value})) {
+    $domain_cnt++;
+    $domains{$value} = $domain_cnt;
+  }
+  return $value;
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_feats.pl
+
+=head1 SYNOPSIS
+
+ ann_pfam.pl --neg-doms  'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+ --neg-doms,  -- report domains between annotated domains as NODOM
+                 (also --neg, --neg_doms)
+ --min_nodom=10  -- minimum length between domains for NODOM
+
+ --host, --user, --password, --port --db -- info for mysql database
+
+=head1 DESCRIPTION
+
+C<ann_pfam.pl> extracts domain information from a msyql
+database.  Currently, the program works with database sequence
+descriptions in one of two formats:
+
+ >pf26|649|O94823|AT10B_HUMAN -- RPD2_seqs
+
+(pf26 databases have auto_pfamseq in the second field) and
+
+ >gi|1705556|sp|P54670.1|CAF1_DICDI
+
+C<ann_pfam.pl> uses the C<pfamA_reg_full_significant>, C<pfamseq>,
+and C<pfamA> tables of the C<pfam> database to extract domain
+information on a protein.  For proteins that have multiple domains
+associated with the same overlapping region (domains overlap by more
+than 1/3 of the domain length), C<auto_pfam.pl> selects the domain
+annotation with the best C<domain_evalue_score>.  When domains overlap
+by less than 1/3 of the domain length, they are shortened to remove
+the overlap.
+
+C<ann_pfam.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_pfam.pl> or C<-V "\!ann_pfam.pl --neg"> option.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_exons_ncbi.pl b/scripts/ann_exons_ncbi.pl
new file mode 100755
index 0000000..26d1acc
--- /dev/null
+++ b/scripts/ann_exons_ncbi.pl
@@ -0,0 +1,243 @@
+#!/usr/bin/perl -w
+
+# ann_exons_ncbi.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# gi|23065544|ref|NP_000552.2| 
+#
+# and returns the exons present in the protein from NCBI gff3 tables (human and mouse only)
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the acc
+# (3) return the tab delimited exon boundaries
+
+
+use strict;
+
+use DBI;
+use Getopt::Long;
+use Pod::Usage;
+
+use vars qw($host $db $port $user $pass);
+
+my $hostname = `/bin/hostname`;
+
+($host, $db, $port, $user, $pass)  = ("wrpxdb.its.virginia.edu", "seqdb_demo2", 0, "web_user", "fasta_www");
+
+my ($auto_reg,$rpd2_fams, $neg_doms, $lav, $no_doms, $pf_acc, $shelp, $help) = (0, 0, 0, 0,0, 0,0,0);
+my ($min_nodom) = (10);
+
+my $color_sep_str = " :";
+$color_sep_str = '~';
+
+GetOptions(
+    "host=s" => \$host,
+    "db=s" => \$db,
+    "user=s" => \$user,
+    "password=s" => \$pass,
+    "port=i" => \$port,
+    "lav" => \$lav,
+    "neg" => \$neg_doms,
+    "neg_doms" => \$neg_doms,
+    "neg-doms" => \$neg_doms,
+    "min_nodom=i" => \$min_nodom,
+    "pfacc" => \$pf_acc,
+    "RPD2" => \$rpd2_fams,
+    "auto_reg" => \$auto_reg,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (@ARGV || -p STDIN || -f STDIN);
+
+my $connect = "dbi:mysql(AutoCommit=>1,RaiseError=>1):database=$db";
+$connect .= ";host=$host" if $host;
+$connect .= ";port=$port" if $port;
+
+my $dbh = DBI->connect($connect,
+		       $user,
+		       $pass
+		      ) or die $DBI::errstr;
+
+my %annot_types = ();
+my %domains = (NODOM=>0);
+my $domain_cnt = 0;
+
+my $get_annot_sub = \&get_refseq_exons;
+
+my $get_exons_acc = $dbh->prepare(<<EOSQL);
+
+SELECT ex_num, ex_p_start as seq_start, ex_p_end as seq_end
+FROM ref_exons
+WHERE  acc=?
+ORDER BY seq_start
+
+EOSQL
+
+my $get_annots_sql = $get_exons_acc;
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+# get the query
+my ($query, $seq_len) = @ARGV;
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+
+#if it's a file I can open, read and parse it
+unless ($query && ($query =~ m/[\|:]/ || $query =~ m/^[XN]P_/)) {
+
+  while (my $a_line = <>) {
+    $a_line =~ s/^>//;
+    chomp $a_line;
+    push @annots, show_annots($a_line, $get_annot_sub);
+  }
+}
+else {
+  push @annots, show_annots("$query $seq_len", $get_annot_sub);
+}
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    if (!$lav && defined($domains{$annot->[-1]})) {
+      $annot->[-1] .= $color_sep_str.$domains{$annot->[-1]};
+    }
+    print join("\t",@$annot),"\n";
+  }
+}
+
+exit(0);
+
+sub show_annots {
+  my ($query_len, $get_annot_sub) = @_;
+
+  my ($annot_line, $seq_len) = split(/\s+/,$query_len);
+
+  my $pfamA_acc;
+
+  my %annot_data = (seq_info=>$annot_line);
+
+  $use_acc = 1;
+  $get_annots_sql = $get_exons_acc;
+
+  if ($annot_line =~ m/^gi\|/) {
+    ($tmp, $gi, $sdb, $acc) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^ref\|/) {
+    ($sdb, $acc) = split(/\|/,$annot_line);
+  }
+
+  $acc =~ s/\.\d+$//;
+  $get_annots_sql->execute($acc);
+
+  $annot_data{list} = $get_annot_sub->($get_annots_sql, $seq_len);
+
+  return \%annot_data;
+}
+
+sub get_refseq_exons {
+  my ($get_annots, $seq_length) = @_;
+
+  my @exons = ();
+
+  # get the list of domains, sorted by start
+  while ( my $row_href = $get_annots->fetchrow_hashref()) {
+
+    $row_href->{info} = "exon_".$row_href->{ex_num};
+    push @exons, $row_href
+  }
+
+  # check for domain overlap, and resolve check for domain overlap
+  # (possibly more than 2 domains), choosing the domain with the best
+  # evalue
+
+  my @feats = ();
+
+  for my $d_ref (@exons) {
+    if ($lav) {
+      push @feats, [$d_ref->{seq_start}, $d_ref->{seq_end}, $d_ref->{info}];
+    }
+    else {
+      push @feats, [$d_ref->{seq_start}, '-', $d_ref->{seq_end},  $d_ref->{info} ];
+#      push @feats, [$d_ref->{seq_end}, ']', '-', ""];
+    }
+
+  }
+
+  return \@feats;
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+sub domain_name {
+
+  my ($value) = @_;
+
+  if (!defined($domains{$value})) {
+    $domain_cnt++;
+    $domains{$value} = $domain_cnt;
+  }
+  return $value;
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_feats.pl
+
+=head1 SYNOPSIS
+
+ ann_pfam.pl --neg-doms  'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+ --neg-doms,  -- report domains between annotated domains as NODOM
+                 (also --neg, --neg_doms)
+ --min_nodom=10  -- minimum length between domains for NODOM
+
+ --host, --user, --password, --port --db -- info for mysql database
+
+=head1 DESCRIPTION
+
+C<ann_pfam.pl> extracts domain information from a msyql
+database.  Currently, the program works with database sequence
+descriptions in one of two formats:
+
+ >pf26|649|O94823|AT10B_HUMAN -- RPD2_seqs
+
+(pf26 databases have auto_pfamseq in the second field) and
+
+ >gi|1705556|sp|P54670.1|CAF1_DICDI
+
+C<ann_pfam.pl> uses the C<pfamA_reg_full_significant>, C<pfamseq>,
+and C<pfamA> tables of the C<pfam> database to extract domain
+information on a protein.  For proteins that have multiple domains
+associated with the same overlapping region (domains overlap by more
+than 1/3 of the domain length), C<auto_pfam.pl> selects the domain
+annotation with the best C<domain_evalue_score>.  When domains overlap
+by less than 1/3 of the domain length, they are shortened to remove
+the overlap.
+
+C<ann_pfam.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_pfam.pl> or C<-V "\!ann_pfam.pl --neg"> option.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_exons_up_www.pl b/scripts/ann_exons_up_www.pl
new file mode 100755
index 0000000..4db872d
--- /dev/null
+++ b/scripts/ann_exons_up_www.pl
@@ -0,0 +1,239 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014,2015 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# ann_exons_up_www.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# gi|23065544|ref|NP_000552.2| 
+#
+# and returns the exons present in the protein from NCBI gff3 tables (human and mouse only)
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the acc
+# (3) get exon information from EBI/Uniprot
+# (4) return the tab delimited exon boundaries
+
+# 22-May-2017 -- use get("http://"), not get_https("https://"), because EBI does not have LWP::Protocol:https
+
+use strict;
+
+use Getopt::Long;
+use LWP::Simple;
+use LWP::UserAgent;
+# use LWP::Protocol::https;
+use Pod::Usage;
+use JSON qw(decode_json);
+
+use vars qw($host $db $port $user $pass);
+
+my ($lav, $shelp, $help) = (0, 0,0);
+
+my $color_sep_str = " :";
+$color_sep_str = '~';
+
+GetOptions(
+    "lav" => \$lav,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (@ARGV || -p STDIN || -f STDIN);
+
+my %domains = (NODOM=>0);
+my @domain_list = (0);
+my $domain_cnt = 0;
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+my $get_annot_sub = \&get_up_www_exons;
+
+my $ua = LWP::UserAgent->new(ssl_opts=>{verify_hostname => 0});
+my $uniprot_url = 'http://www.ebi.ac.uk/proteins/api/coordinates/';
+my $uniprot_suff = ".json";
+
+# get the query
+my ($query, $seq_len) = @ARGV;
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+my %annot_set = (); # re-use annotations if they are available (not yet implemented)
+
+#if it's a file I can open, read and parse it
+unless ($query && ($query =~ m/[\|:]/ ||
+		   $query =~ m/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}\s/)) {
+
+  while (my $a_line = <>) {
+    $a_line =~ s/^>//;
+    chomp $a_line;
+    push @annots, show_annots($a_line, $get_annot_sub);
+  }
+}
+else {
+  push @annots, show_annots("$query\t$seq_len", $get_annot_sub);
+}
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    print join("\t",@$annot),"\n";
+  }
+}
+
+exit(0);
+
+sub show_annots {
+  my ($query_len, $get_annot_sub) = @_;
+
+  my ($annot_line, $seq_len) = split(/\t/,$query_len);
+
+  my %annot_data = (seq_info=>$annot_line);
+
+  $use_acc = 1;
+
+  if ($annot_line =~ m/^gi\|/) {
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^(sp|tr|up)\|/) {
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^(SP|TR):(\w+) (\w+)/) {
+      ($sdb, $id, $acc) = (lc($1), $2, $3);
+  }
+  elsif ($annot_line =~ m/^(SP|TR):(\w+)/) {
+    ($sdb, $id, $acc) = (lc($1), $2, "");
+    warn "*** $0 - accession required: $annot_line";
+  }
+  elsif ($annot_line =~ m/\|/) {
+    ($sdb, $acc) = split(/\|/,$annot_line);
+  }
+  else {
+    ($acc) = ($annot_line =~ m/^(\S+)/);
+  }
+
+  $acc =~ s/\.\d+$//;
+
+  my  $exon_json = get($uniprot_url.$acc.$uniprot_suff);
+
+  unless (!$exon_json || $exon_json =~ m/errorMessage/ || $exon_json =~ m/Can not find/) {
+    $annot_data{list} = parse_json_up_exons($exon_json);
+  }
+
+  return \%annot_data;
+}
+
+sub parse_json_up_exons {
+  my ($exon_json) = @_;
+
+  my @exons = ();
+
+  my $acc_exons = decode_json($exon_json);
+
+  my $exon_num = 1;
+  for my $exon ( @{$acc_exons->{'gnCoordinate'}[0]{'genomicLocation'}{'exon'}} ) {
+    my ($p_begin, $p_end) = ($exon->{'proteinLocation'}{'begin'}{'position'},$exon->{'proteinLocation'}{'end'}{'position'});
+    if ($p_end >= $p_begin) {
+      push @exons, {
+		    info=>"exon_".$exon_num.$color_sep_str.$exon_num,
+		    seq_start=>$p_begin,
+		    seq_end=>$p_end,
+		   };
+      $exon_num++;
+    }
+  }
+
+  # check for domain overlap, and resolve check for domain overlap
+  # (possibly more than 2 domains), choosing the domain with the best
+  # evalue
+
+  my @ex_feats = ();
+
+  for my $d_ref (@exons) {
+    if ($lav) {
+      push @ex_feats, [$d_ref->{seq_start}, $d_ref->{seq_end}, $d_ref->{info}];
+    }
+    else {
+      push @ex_feats, [$d_ref->{seq_start}, '-', $d_ref->{seq_end},  $d_ref->{info} ];
+    }
+  }
+  return \@ex_feats;
+}
+
+sub get_https {
+  my ($url) = @_;
+
+  my $result = "";
+  my $response = $ua->get($url);
+
+  if ($response->is_success) {
+    $result = $response->decoded_content;
+  } else {
+    $result = '';
+  }
+  return $result;
+}
+
+sub domain_name {
+
+  my ($value) = @_;
+
+  if (!defined($domains{$value})) {
+    $domain_cnt++;
+    $domains{$value} = $domain_cnt;
+  }
+  return $value;
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_exons_up_www.pl
+
+=head1 SYNOPSIS
+
+ ann_exons_up_www.pl 'sp|P09488|GSTM1_HUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+ --lav  produce lav2plt.pl annotation format
+
+=head1 DESCRIPTION
+
+C<ann_exons_up_www.pl> extracts exon coordinates for proteins using
+the EBI Proteins REST API described here:
+C<https://www.ebi.ac.uk/proteins/api/doc/#coordinatesApi>.  Exon
+intron boundaries, in protein coordinates, are available for Uniprot
+proteins with Ensembl entries.
+
+C<ann_pfam.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_exons_up_www.pl> or C<-V "q\!ann_exons_up_www.plg"> option.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_feats2ipr.pl b/scripts/ann_feats2ipr.pl
new file mode 100755
index 0000000..0a0ae84
--- /dev/null
+++ b/scripts/ann_feats2ipr.pl
@@ -0,0 +1,526 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# ann_feats2ipr.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# gi|62822551|sp|P00502|GSTA1_RAT Glutathione S-transfer\n  (at least from pir1.lseg)
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the up_acc
+# (3) return the tab delimited features
+#
+
+# this version takes "features:"
+#   ACT_SITE, MOD_RES, SITE, METAL, VARIANT, MUTAGEN
+# from Uniprot and combines them with domain annotations from my merge of the Interpro database.
+#
+
+# ann_feats2ipr.pl is largely identical to ann_feats2l.pl, except that
+#   it uses Interpro for domain/repeat information.
+
+use strict;
+
+use DBI;
+use Getopt::Long;
+use Pod::Usage;
+
+use vars qw($host $db $dom_db $a_table $port $user $pass);
+
+my %domains = ();
+my $domain_cnt = 0;
+
+my $hostname = `/bin/hostname`;
+
+unless ($hostname =~ m/ebi/) {
+  ($host, $db, $a_table, $port, $user, $pass)  = ("wrpxdb.its.virginia.edu", "uniprot", "annot2", 0, "web_user", "fasta_www");
+#  $host = 'localhost';
+} else {
+  ($host, $db, $a_table, $port, $user, $pass)  = ("mysql-pearson-prod", "up_db", "annot", 4124, "web_user", "fasta_www");
+}
+
+my ($lav, $neg_doms, $no_doms, $no_feats, $no_label, $use_ipr, $acc_comment, $shelp, $help, $no_mod, $dom_db, $db_ref_acc) = 
+    (0,0,0,0,0,0,0,0,0,0,0,0);
+
+my $color_sep_str = " :";
+$color_sep_str = '~';
+
+GetOptions(
+	   "host=s" => \$host,
+	   "db=s" => \$db,
+	   "user=s" => \$user,
+	   "password=s" => \$pass,
+	   "port=i" => \$port,
+	   "lav" => \$lav,
+	   "no_mod" => \$no_mod,
+	   "no-mod" => \$no_mod,
+	   "no-doms" => \$no_doms,
+	   "nodoms" => \$no_doms,
+           "dom_db=s" => \$dom_db,
+	   "dom_acc" => \$db_ref_acc,
+	   "dom-acc" => \$db_ref_acc,
+	   "neg" => \$neg_doms,
+	   "neg_doms" => \$neg_doms,
+	   "neg-doms" => \$neg_doms,
+	   "negdoms" => \$neg_doms,
+	   "no_feats" => \$no_feats,
+	   "no-feats" => \$no_feats,
+	   "nofeats" => \$no_feats,
+	   "no_label" => \$no_label,
+	   "no-label" => \$no_label,
+	   "nolabel" => \$no_label,
+	   "ipr" => \$use_ipr,
+	   "acc_comment" => \$acc_comment,
+	   "h|?" => \$shelp,
+	   "help" => \$help,
+	  );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (-p STDIN || -f STDIN || @ARGV);
+
+my $connect = "dbi:mysql(AutoCommit=>1,RaiseError=>1):database=$db";
+$connect .= ";host=$host" if $host;
+$connect .= ";port=$port" if $port;
+
+my $dbh = DBI->connect($connect,
+		       $user,
+		       $pass
+		      ) or die $DBI::errstr;
+
+my @feat_keys = qw(ACT_SITE MOD_RES BINDING SITE METAL VARIANT MUTAGEN);
+my @feat_vals = ( '=','*','#','^','!','V','V');
+my @feat_text = ( "active site", "phosphorylation", "binding site", "site", "metal binding");
+
+my @dom_vals = ( [ '[', ']'],[ '[', ']']);
+
+my %annot_types = ();
+ at annot_types{@feat_keys} = @feat_vals;
+
+my $get_annot_sub = \&get_fasta_annots;
+if ($lav) {
+  $no_feats = 1;
+  $get_annot_sub = \&get_lav_annots;
+}
+
+if ($neg_doms) {
+  $domains{'NODOM'}=0;
+}
+
+if ($no_mod) {
+  @feat_keys = qw(ACT_SITE BINDING SITE METAL);
+  @feat_text = ( "active site", "binding site", "site", "metal binding");
+  @feat_vals = ( '=','#','^','!');
+  delete($annot_types{'MOD_RES'});
+  delete($annot_types{'MUTAGEN'});
+  delete($annot_types{'VARIANT'});
+}
+
+my $get_ft2_sites_id = $dbh->prepare(qq(select acc, pos, end, label, value, len from features2 join $a_table using(acc) where id=? and label in ('ACT_SITE','MOD_RES','BINDING','SITE','METAL','VARIANT','MUTAGEN') order by pos));
+
+my $get_ft2_sites_acc = $dbh->prepare(qq(select acc, pos, end, label, value, len from features2 join $a_table using(acc) where acc=? and label in ('ACT_SITE','MOD_RES','BINDING','SITE','METAL','VARIANT','MUTAGEN') order by pos));
+
+my $get_ft2_sites_refacc= $dbh->prepare(qq(select ref_acc, pos, end, label, value, len from features2 join $a_table using(acc) where ref_acc=? and label in ('ACT_SITE','MOD_RES','BINDING','SITE','METAL','VARIANT','MUTAGEN') order by pos));
+
+my $get_ipr_doms_id = $dbh->prepare(qq(select acc, start, stop, ipr_acc, db_ref, s_descr, len from prot2ipr_s join $a_table using(acc) join ipr_annot using(ipr_acc) where id=? order by start));
+
+my $get_ipr_domdb_id = $dbh->prepare(qq(select acc, start, stop, ipr_acc, db_ref, s_descr, len from prot2ipr join $a_table using(acc) join ipr_annot using(ipr_acc) where dom_db='$dom_db' AND id=? order by start));
+
+my $get_ipr_doms_acc = $dbh->prepare(qq(select acc, start, stop, ipr_acc, db_ref, s_descr, len from prot2ipr_s join $a_table using(acc) join ipr_annot using(ipr_acc) where acc=? order by start));
+
+my $get_ipr_doms_refacc = $dbh->prepare(qq(select ref_acc, start, stop, ipr_acc, db_ref, s_descr, len from prot2ipr_s join $a_table using(acc) join ipr_annot using(ipr_acc) where ref_acc=? order by start));
+
+my $get_ipr_domdb_acc = $dbh->prepare(qq(select acc, start, stop, ipr_acc, db_ref, s_descr, len from prot2ipr join $a_table using(acc) join ipr_annot using(ipr_acc) where dom_db='$dom_db' AND acc=? order by start));
+
+my $get_sites_sql = $get_ft2_sites_id;
+my $get_doms_sql = $get_ipr_doms_id;
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+unless ($no_feats || $no_label) {
+  for my $i ( 0 .. $#feat_text ) {
+    print "=",$feat_vals[$i],":",$feat_text[$i],"\n";
+  }
+  # print "=*:phosphorylation\n";
+  # print "==".":active site\n";
+  # print "=@".":site\n";
+  # print "=^:binding\n";
+  # print "=!:metal binding\n";
+}
+
+# get the query
+my ($query, $seq_len) = @ARGV;
+
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if $query;
+
+my @annots = ();
+
+#if it's a file I can open, read and parse it
+unless ($query && ($query =~ m/[\|:]/ ||
+		   $query =~ m/^[NX]P_/ ||
+		   $query =~ m/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}\s/)) {
+
+  while (my $a_line = <>) {
+    $a_line =~ s/^>//;
+    chomp $a_line;
+    my $annots_ref = show_annots($a_line, $get_annot_sub);
+    push @annots, $annots_ref if ($annots_ref);
+  }
+} else {
+  my $annots_ref = show_annots("$query\t$seq_len", $get_annot_sub);
+  push @annots, $annots_ref if ($annots_ref);
+}
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    if (!$lav && defined($domains{$annot->[4]})) {
+      $annot->[-2] .= $color_sep_str.$domains{$annot->[4]};
+    }
+    if ($lav) {
+      print join("\t",@$annot[0 .. 2]),"\n";
+    }
+    else {
+      print join("\t",@$annot[0 .. 3]),"\n";
+    }
+  }
+}
+
+exit(0);
+
+sub show_annots {
+  my ($query_length, $get_annot_sub) = @_;
+
+  my ($annot_line, $seq_length) = split(/\t/,$query_length);
+
+  my %annot_data = (seq_info=>$annot_line);
+
+  if ($annot_line =~ m/^gi\|/ && $annot_line =~ m/\|[sp|ref]\|/) {
+    $use_acc = 1;
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+    if ($sdb !~ m/sp/ && $annot_line =~ m/\|sp\|(\w+)/) {
+      ($acc) = ($annot_line =~ m/\|sp\|(\w+)/);
+    }
+  }
+  elsif ($annot_line =~ m/^[sp|tr|ref|up]\|/ ) {
+    $use_acc = 1;
+    ($sdb, $acc) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^gi\|/) {
+    $use_acc = 1;
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^(SP|TR):(\w+) (\w+)/) {
+    $use_acc = 1;
+    ($sdb, $id, $acc) = ($1,$2,$3);
+    $sdb = lc($sdb)
+  }
+  elsif ($annot_line =~ m/^(SP|TR):(\w+)/) {
+    $use_acc = 0;
+    ($sdb, $id, $acc) = ($1,$2,"");
+    $sdb = lc($sdb)
+  }
+  elsif ($annot_line =~ m/\|/) {  # new NCBI swissprot format
+    $use_acc = 1;
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+  } else {
+    $use_acc = 1;
+    $sdb = 'sp';
+    ($acc) = split(/\s+/,$annot_line);
+  }
+
+  # remove version number
+  unless ($use_acc) {
+    unless ($no_feats) {
+      $get_sites_sql = $get_ft2_sites_id;
+      $get_sites_sql->execute($id);
+    }
+    unless ($no_doms) {
+      if ($dom_db) {
+	$get_doms_sql = $get_ipr_domdb_id;
+      }
+      else {
+	$get_doms_sql = $get_ipr_doms_id;
+      }
+
+      $get_doms_sql->execute($id);
+    }
+  } else {
+    unless ($acc) {
+      print STDERR "ann_feats2ipr.pl no acc: $annot_line\n";
+      return 0;
+    }
+    $acc =~ s/\.\d+$//;
+    if ($sdb eq 'ref') {
+      unless ($no_feats) {
+	$get_sites_sql = $get_ft2_sites_refacc;
+	$get_sites_sql->execute($acc);
+      }
+      unless ($no_doms) {
+	$get_doms_sql = $get_ipr_doms_refacc;
+	$get_doms_sql->execute($acc);
+      }
+    }
+    else {
+      unless ($no_feats) {
+	$get_sites_sql = $get_ft2_sites_acc;
+	$get_sites_sql->execute($acc);
+      }
+      unless ($no_doms) {
+	if ($dom_db) {
+	  $get_doms_sql = $get_ipr_domdb_acc;
+	}
+	else {
+	  $get_doms_sql = $get_ipr_doms_acc;
+	}
+	$get_doms_sql->execute($acc);
+      }
+    }
+  }
+
+  $annot_data{list} = $get_annot_sub->($seq_length, $get_sites_sql, $get_doms_sql);
+
+  return \%annot_data;
+}
+
+sub get_fasta_annots {
+  my ($seq_len, $get_sites_sql, $get_doms_sql) = @_;
+
+  my ($acc, $pos, $end, $label, $value, $comment, $len);
+
+  $seq_len = 0;
+
+  my @feats2 = (); # features with start/stop, for checking overlap, adding negative
+  my @sites = ();  # sites with one position
+
+  # get sites
+  unless ($no_feats) {
+    while (($acc, $pos, $end, $label, $value, $len) = $get_sites_sql->fetchrow_array()) {
+      $seq_len = $len if ($len > $seq_len);
+      next unless $annot_types{$label};
+      if ($label =~ m/VARIANT/) {
+	my ($aa_res, $comment) = split(/\(/,$value);
+	if ($comment) {	
+	  $comment =~ s/\)//;
+	  # remove the  /FTId=VAR_014497 information
+	  $comment =~ s/\s+\/FTId=.*$//;
+	} else {
+	  $comment = "";
+	}
+	next if ($comment =~ /MISSING/);
+	my ($vfrom, $vto) = ($aa_res =~ m/(\w)\s*->\s*(\w)/);
+	if ($vto) {
+	  $comment = '' unless $comment;
+	  $value = $vto;
+	  push @sites, [$pos, $annot_types{$label}, $value, $comment, ""];
+	}
+      } elsif ($label =~ m/MUTAGEN/) {
+	my ($aa_res, $comment) = split(/: /,$value);
+	next if ($comment =~ /MISSING/);
+	my ($vfrom, $vto) = split(/\->/,$aa_res);
+	if ($vto) {
+	  my @vto_list = split(/,/,$vto);
+	  $value = $vto;
+	  for my $val ( @vto_list) {
+	    push @sites, [$pos, $annot_types{$label}, $val, "Mutagen: $comment", ""];
+	  }
+	}
+      } else {
+	push @sites, [$pos, $annot_types{$label}, "-", "$label: $value", ""];
+      }
+    }
+  }
+
+  unless ($no_doms) {
+    my ($ipr_acc, $db_ref, $s_descr) = ("","","");
+    while (($acc, $pos, $end, $ipr_acc, $db_ref, $s_descr, $len) = $get_doms_sql->fetchrow_array()) {
+      $db_ref =~ s/G3DSA://;
+      $seq_len = $len unless ($seq_len > $len);
+
+      $value = domain_name($ipr_acc, $s_descr);
+      if ($acc_comment) {
+	$value .= "{$ipr_acc}";
+      }
+      if ($db_ref_acc) {
+	$value = $db_ref;
+      }
+      elsif ($use_ipr) {
+	$value = $ipr_acc;
+      }
+
+      push @feats2, [$pos, "-", $end, $value, $ipr_acc];
+    }
+  }
+
+  # ensure that domains do not overlap
+  for (my $i=1; $i < scalar(@feats2); $i++) {
+    my $diff = $feats2[$i-1]->[2] - $feats2[$i]->[0];
+    if ($diff >= 0) {
+      $feats2[$i-1]->[2] = $feats2[$i]->[0]+ int($diff/2);
+      $feats2[$i]->[0] = $feats2[$i-1]->[2] + 1;
+    }
+  }
+
+  my @n_feats2 = ();
+
+  if ($neg_doms) {
+    my $last_end = 0;
+    for my $feat ( @feats2 ) {
+      if ($feat->[0] - $last_end > 10) {
+	push @n_feats2, [$last_end+1, "-", $feat->[0]-1, "NODOM", "NODOM"];
+      }
+      $last_end = $feat->[2];
+    }
+    if ($seq_len - $last_end > 10) {
+      push @n_feats2, [$last_end+1, "-", $seq_len, "NODOM", "NODOM"];
+    }
+  }
+
+  my @feats = ();
+  for my $feat (@feats2, @n_feats2) {
+    push @feats, [$feat->[0], '[', '-', $feat->[-2], $feat->[-1] ];
+    push @feats, [$feat->[2], ']', '-', "", ""];
+  }
+
+  @feats = sort { $a->[0] <=> $b->[0] } (@sites, @feats);
+
+  return \@feats;
+}
+
+sub get_lav_annots {
+  my ($seq_len, $get_sites_sql, $get_doms_sql) = @_;
+
+  my @feats = ();
+
+  my %annot = ();
+  while (my ($acc, $pos, $end, $ipr_acc, $db_ref, $s_descr, $len) = $get_doms_sql->fetchrow_array()) {
+    #    $value = domain_name($label,$value);
+    my $value = domain_name($ipr_acc,$s_descr);
+    push @feats, [$pos, $end, $value];
+  }
+
+  return \@feats;
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+sub domain_name {
+
+  my ($ipr_acc, $s_descr) = @_;
+
+  $s_descr =~ s/[\-_]domain//;
+  $s_descr =~ s/[\-_]homology//;
+
+  $s_descr =~ s/^(.{20})/$1/;
+
+  if (!defined($domains{$ipr_acc})) {
+      $domain_cnt++;
+      $domains{$ipr_acc} = $domain_cnt;
+  }
+  return $s_descr;
+}
+
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_feats2.pl
+
+=head1 SYNOPSIS
+
+ ann_feats2.pl --no_doms --no_feats --lav 'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+
+ --acc_comment provide the InterPro accession in {IPR00123} brackets for links
+ --dom_db=G3DSA use a single domain database (e.g. PF, G3DSA, PS5) from InterPro
+ --dom_acc provide the domain accession, not the description, as the domain label
+ --neg, --neg_doms, --neg-doms label non-domain regions > 10 residues as "NODOM"
+ --ipr proide InterPro accession as label
+ --no-doms  do not show domain boundaries (domains are always shown with --lav)
+ --no-feats do not show feature (variants, active sites, phospho-sites)
+ --no-label do show feature key (==*phosphorylation, etc)
+
+ --lav  produce lav2plt.pl annotation format, only show domains/repeats
+
+ --host, --user, --password, --port --db -- info for mysql database
+
+=head1 DESCRIPTION
+
+C<ann_feats2ipr.pl> extracts feature, domain, and repeat information from
+two msyql databases (default names: uniprot/ipr2) built by parsing the
+uniprot_sprot.dat and uniprot_trembl.dat feature tables.  Given a
+command line argument that contains a sequence accession (P09488) or
+identifier (GSTM1_HUMAN), the program looks up the features available
+for that sequence and returns them in a tab-delimited format:
+
+ >sp|P09488
+ 2	-	88	DOMAIN: GST N-terminal.
+ 7	V	F	Mutagen: Reduces catalytic activity 100- fold.
+ 23	*	-	MOD_RES: Phosphotyrosine (By similarity).
+ 33	*	-	MOD_RES: Phosphotyrosine (By similarity).
+ 34	*	-	MOD_RES: Phosphothreonine (By similarity).
+ 90	-	208	DOMAIN: GST C-terminal.
+ 108	V	S	Mutagen: Changes the properties of the enzyme toward some substrates.
+ 108	V	Q	Mutagen: Reduces catalytic activity by half.
+ 109	V	I	Mutagen: Reduces catalytic activity by half.
+ 116	#	-	BINDING: Substrate.
+ 116	V	A	Mutagen: Reduces catalytic activity 10-fold.
+ 116	V	F	Mutagen: Slight increase of catalytic activity.
+ 173	V	N	in allele GSTM1B; dbSNP:rs1065411.
+ 210	V	T	in dbSNP:rs449856.
+
+If features are provided, then a legend of feature symbols is provided
+as well (disabled with C<--no-label>):
+
+ =*:phosphorylation
+ ==:active site
+ =@:site
+ =^:binding
+ =!:metal binding
+
+If the C<--lav> option is specified, domain and repeat features are
+presented in a different format for the C<lav2plt.pl> program:
+
+  >sp|P09488|GSTM1_HUMAN
+  2	88	GST N-terminal.
+  90	208	GST C-terminal.
+
+C<ann_feats2.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_feats2.pl> option.  It can also be used with the lav2plt.pl
+program with the C<--xA "\!ann_feats2.pl --lav"> or C<--yA "\!ann_feats2.pl --lav"> options.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_feats2ipr_e.pl b/scripts/ann_feats2ipr_e.pl
new file mode 100755
index 0000000..99edc4f
--- /dev/null
+++ b/scripts/ann_feats2ipr_e.pl
@@ -0,0 +1,544 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# ann_feats2ipr.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# gi|62822551|sp|P00502|GSTA1_RAT Glutathione S-transfer\n  (at least from pir1.lseg)
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the up_acc
+# (3) return the tab delimited features
+#
+
+# this version takes "features:"
+#   ACT_SITE, MOD_RES, SITE, METAL, VARIANT, MUTAGEN
+# from Uniprot and combines them with domain annotations from my merge of the Interpro database.
+#
+
+# ann_feats2ipr.pl is largely identical to ann_feats2l.pl, except that
+#   it uses Interpro for domain/repeat information.
+
+use strict;
+
+use DBI;
+use Getopt::Long;
+use Pod::Usage;
+
+use vars qw($host $db $dom_db $a_table $port $user $pass);
+
+my %domains = ();
+my $domain_cnt = 0;
+
+my $hostname = `/bin/hostname`;
+
+unless ($hostname =~ m/ebi/) {
+  ($host, $db, $a_table, $port, $user, $pass)  = ("wrpxdb.its.virginia.edu", "uniprot", "annot2", 0, "web_user", "fasta_www");
+#  $host = 'xdb';
+} else {
+  ($host, $db, $a_table, $port, $user, $pass)  = ("mysql-pearson-prod", "up_db", "annot", 4124, "web_user", "fasta_www");
+}
+
+my ($lav, $neg_doms, $no_doms, $no_feats, $no_label, $use_ipr, $acc_comment, $shelp, $help, $no_mod, $dom_db, $db_ref_acc, $bound_comment) = 
+    (0,0,0,0,0,0,0,0,0,0,0,0,0);
+
+my ($show_color, $color_sep_str) = (1," :");
+$color_sep_str = '~';
+
+GetOptions(
+	   "host=s" => \$host,
+	   "db=s" => \$db,
+	   "user=s" => \$user,
+	   "password=s" => \$pass,
+	   "port=i" => \$port,
+	   "lav" => \$lav,
+	   "bound_comment" => \$bound_comment,
+	   "color!" => \$show_color,
+	   "no_mod" => \$no_mod,
+	   "no-mod" => \$no_mod,
+	   "no-doms" => \$no_doms,
+	   "nodoms" => \$no_doms,
+           "dom_db=s" => \$dom_db,
+	   "dom_acc" => \$db_ref_acc,
+	   "dom-acc" => \$db_ref_acc,
+	   "neg" => \$neg_doms,
+	   "neg_doms" => \$neg_doms,
+	   "neg-doms" => \$neg_doms,
+	   "negdoms" => \$neg_doms,
+	   "no_feats" => \$no_feats,
+	   "no-feats" => \$no_feats,
+	   "nofeats" => \$no_feats,
+	   "no_label" => \$no_label,
+	   "no-label" => \$no_label,
+	   "nolabel" => \$no_label,
+	   "ipr" => \$use_ipr,
+	   "acc_comment" => \$acc_comment,
+	   "h|?" => \$shelp,
+	   "help" => \$help,
+	  );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (-p STDIN || -f STDIN || @ARGV);
+
+my $connect = "dbi:mysql(AutoCommit=>1,RaiseError=>1):database=$db";
+$connect .= ";host=$host" if $host;
+$connect .= ";port=$port" if $port;
+
+my $dbh = DBI->connect($connect,
+		       $user,
+		       $pass
+		      ) or die $DBI::errstr;
+
+my @feat_keys = qw(ACT_SITE MOD_RES BINDING SITE METAL VARIANT MUTAGEN);
+my @feat_vals = ( '=','*','#','^','!','V','V');
+my @feat_text = ( "active site", "phosphorylation", "binding site", "site", "metal binding");
+
+my %annot_types = ();
+ at annot_types{@feat_keys} = @feat_vals;
+
+my $get_annot_sub = \&get_fasta_annots;
+if ($lav) {
+  $no_feats = 1;
+  $get_annot_sub = \&get_lav_annots;
+}
+
+if ($neg_doms) {
+  $domains{'NODOM'}=0;
+}
+
+if ($no_mod) {
+  @feat_keys = qw(ACT_SITE BINDING SITE METAL);
+  @feat_text = ( "active site", "binding site", "site", "metal binding");
+  @feat_vals = ( '=','#','^','!');
+  delete($annot_types{'MOD_RES'});
+  delete($annot_types{'MUTAGEN'});
+  delete($annot_types{'VARIANT'});
+}
+
+my $get_ft2_sites_id = $dbh->prepare( <<EOSQL );
+SELECT acc, pos, end, label, value, len
+ FROM features2 
+ JOIN $a_table USING(acc) 
+WHERE id=? 
+  AND  label in ('ACT_SITE','MOD_RES','BINDING','SITE','METAL','VARIANT','MUTAGEN')
+ORDER BY pos
+EOSQL
+
+my $get_ft2_sites_acc = $dbh->prepare( <<EOSQL );
+SELECT acc, pos, end, label, value, len 
+ FROM features2 
+ JOIN $a_table USING(acc)
+WHERE acc=?
+  AND label IN ('ACT_SITE','MOD_RES','BINDING','SITE','METAL','VARIANT','MUTAGEN')
+ORDER BY pos
+EOSQL
+
+my $get_ft2_sites_refacc= $dbh->prepare( <<EOSQL );
+select ref_acc, pos, end, label, value, len from features2 join $a_table using(acc) where ref_acc=? and label in ('ACT_SITE','MOD_RES','BINDING','SITE','METAL','VARIANT','MUTAGEN') order by pos
+EOSQL
+
+my $get_ipr_doms_id = $dbh->prepare(qq(select acc, start, stop, ipr_acc, db_ref, s_descr, len from prot2ipr_s join $a_table using(acc) join ipr_annot using(ipr_acc) where id=? order by start));
+
+my $get_ipr_domdb_id = $dbh->prepare(qq(select acc, start, stop, ipr_acc, db_ref, s_descr, len from prot2ipr join $a_table using(acc) join ipr_annot using(ipr_acc) where dom_db='$dom_db' AND id=? order by start));
+
+my $get_ipr_doms_acc = $dbh->prepare(qq(select acc, start, stop, ipr_acc, db_ref, s_descr, len from prot2ipr_s join $a_table using(acc) join ipr_annot using(ipr_acc) where acc=? order by start));
+
+my $get_ipr_doms_refacc = $dbh->prepare(qq(select ref_acc, start, stop, ipr_acc, db_ref, s_descr, len from prot2ipr_s join $a_table using(acc) join ipr_annot using(ipr_acc) where ref_acc=? order by start));
+
+my $get_ipr_domdb_acc = $dbh->prepare(qq(select acc, start, stop, ipr_acc, db_ref, s_descr, len from prot2ipr join $a_table using(acc) join ipr_annot using(ipr_acc) where dom_db='$dom_db' AND acc=? order by start));
+
+my $get_sites_sql = $get_ft2_sites_id;
+my $get_doms_sql = $get_ipr_doms_id;
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+unless ($no_feats || $no_label) {
+  for my $i ( 0 .. $#feat_text ) {
+    print "=",$feat_vals[$i],":",$feat_text[$i],"\n";
+  }
+  # print "=*:phosphorylation\n";
+  # print "==".":active site\n";
+  # print "=@".":site\n";
+  # print "=^:binding\n";
+  # print "=!:metal binding\n";
+}
+
+# get the query
+my ($query, $seq_len) = @ARGV;
+
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+
+#if it's a file I can open, read and parse it
+unless ($query && $query =~ m/[\|:]/ ) {
+
+  while (my $a_line = <>) {
+    $a_line =~ s/^>//;
+    chomp $a_line;
+    my $annots_ref = show_annots($a_line, $get_annot_sub);
+    push @annots, $annots_ref if ($annots_ref);
+  }
+} else {
+  my $annots_ref = show_annots("$query\t$seq_len", $get_annot_sub);
+  push @annots, $annots_ref if ($annots_ref);
+}
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    if (!$lav && defined($domains{$annot->[4]})) {
+      if ($bound_comment) {
+	$annot->[-2] .= $color_sep_str.$annot->[0].":".$annot->[2];
+      }
+      elsif ($show_color) {
+	$annot->[-2] .= $color_sep_str.$domains{$annot->[4]};
+      }
+    }
+    if ($lav) {
+      print join("\t",@$annot[0 .. 2]),"\n";
+    }
+    else {
+      print join("\t",@$annot[0 .. 3]),"\n";
+    }
+  }
+}
+
+exit(0);
+
+sub show_annots {
+  my ($query_length, $get_annot_sub) = @_;
+
+  my ($annot_line, $seq_length) = split(/\t/,$query_length);
+
+  my %annot_data = (seq_info=>$annot_line);
+
+  if ($annot_line =~ m/^gi\|/ && $annot_line =~ m/\|[sp|ref]\|/) {
+    $use_acc = 1;
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+    if ($sdb !~ m/sp/ && $annot_line =~ m/\|sp\|(\w+)/) {
+      ($acc) = ($annot_line =~ m/\|sp\|(\w+)/);
+    }
+  }
+  elsif ($annot_line =~ m/^[sp|tr|ref]\|/ ) {
+    $use_acc = 1;
+    ($sdb, $acc) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^gi\|/) {
+    $use_acc = 1;
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^(SP|TR):(\w+) (\w+)/) {
+    ($sdb, $id, $acc) = ($1,$2,$3);
+    $use_acc = 1;
+    $sdb = lc($sdb)
+  }
+  elsif ($annot_line =~ m/^(SP|TR):(\w+)/) {
+    ($sdb, $id, $acc) = ($1,$2,"");
+    $use_acc = 0;
+    $sdb = lc($sdb)
+  }
+  elsif ($annot_line !~ m/\|/) {  # new NCBI swissprot format
+    $use_acc =1;
+    $sdb = 'sp';
+    ($acc) = split(/\s+/,$annot_line);
+  } else {
+    $use_acc = 1;
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+
+  # remove version number
+  unless ($use_acc) {
+    unless ($no_feats) {
+      $get_sites_sql = $get_ft2_sites_id;
+      $get_sites_sql->execute($id);
+    }
+    unless ($no_doms) {
+      if ($dom_db) {
+	$get_doms_sql = $get_ipr_domdb_id;
+      }
+      else {
+	$get_doms_sql = $get_ipr_doms_id;
+      }
+
+      $get_doms_sql->execute($id);
+    }
+  } else {
+    unless ($acc) {
+      print STDERR "ann_feats2ipr.pl no acc: $annot_line\n";
+      return 0;
+    }
+    $acc =~ s/\.\d+$//;
+    if ($sdb eq 'ref') {
+      unless ($no_feats) {
+	$get_sites_sql = $get_ft2_sites_refacc;
+	$get_sites_sql->execute($acc);
+      }
+      unless ($no_doms) {
+	$get_doms_sql = $get_ipr_doms_refacc;
+	$get_doms_sql->execute($acc);
+      }
+    }
+    else {
+      unless ($no_feats) {
+	$get_sites_sql = $get_ft2_sites_acc;
+	$get_sites_sql->execute($acc);
+      }
+      unless ($no_doms) {
+	if ($dom_db) {
+	  $get_doms_sql = $get_ipr_domdb_acc;
+	}
+	else {
+	  $get_doms_sql = $get_ipr_doms_acc;
+	}
+	$get_doms_sql->execute($acc);
+      }
+    }
+  }
+
+  $annot_data{list} = $get_annot_sub->($seq_length, $get_sites_sql, $get_doms_sql);
+
+  return \%annot_data;
+}
+
+sub get_fasta_annots {
+  my ($seq_len, $get_sites_sql, $get_doms_sql) = @_;
+
+  my ($acc, $pos, $end, $label, $value, $comment, $len);
+
+  $seq_len = 0;
+
+  my @feats2 = (); # features with start/stop, for checking overlap, adding negative
+  my @sites = ();  # sites with one position
+
+  # get sites
+  unless ($no_feats) {
+    while (($acc, $pos, $end, $label, $value, $len) = $get_sites_sql->fetchrow_array()) {
+      $seq_len = $len if ($len > $seq_len);
+      next unless $annot_types{$label};
+      if ($label =~ m/VARIANT/) {
+	my ($aa_res, $comment) = split(/\(/,$value);
+	if ($comment) {	
+	  $comment =~ s/\)//;
+	  # remove the  /FTId=VAR_014497 information
+	  $comment =~ s/\s+\/FTId=.*$//;
+	} else {
+	  $comment = "";
+	}
+	next if ($comment =~ /MISSING/);
+	my ($vfrom, $vto) = ($aa_res =~ m/(\w)\s*->\s*(\w)/);
+	if ($vto) {
+	  $comment = '' unless $comment;
+	  $value = $vto;
+	  push @sites, [$pos, $annot_types{$label}, $value, $comment, ""];
+	}
+      } elsif ($label =~ m/MUTAGEN/) {
+	my ($aa_res, $comment) = split(/: /,$value);
+	next if ($comment =~ /MISSING/);
+	my ($vfrom, $vto) = split(/\->/,$aa_res);
+	if ($vto) {
+	  my @vto_list = split(/,/,$vto);
+	  $value = $vto;
+	  for my $val ( @vto_list) {
+	    push @sites, [$pos, $annot_types{$label}, $val, "Mutagen: $comment", ""];
+	  }
+	}
+      } else {
+	push @sites, [$pos, $annot_types{$label}, "-", "$label: $value", ""];
+      }
+    }
+  }
+
+  unless ($no_doms) {
+    my ($ipr_acc, $db_ref, $s_descr) = ("","","");
+    while (($acc, $pos, $end, $ipr_acc, $db_ref, $s_descr, $len) = $get_doms_sql->fetchrow_array()) {
+      $db_ref =~ s/G3DSA://;
+      $seq_len = $len unless ($seq_len > $len);
+
+      $value = domain_name($ipr_acc, $s_descr);
+      if ($acc_comment) {
+	$value .= "{$ipr_acc}";
+      }
+      if ($db_ref_acc) {
+	$value = $db_ref;
+      }
+      elsif ($use_ipr) {
+	$value = $ipr_acc;
+      }
+
+      push @feats2, [$pos, "-", $end, $value, $ipr_acc];
+    }
+  }
+
+  # ensure that domains do not overlap
+  for (my $i=1; $i < scalar(@feats2); $i++) {
+    my $diff = $feats2[$i-1]->[2] - $feats2[$i]->[0];
+    if ($diff >= 0) {
+      $feats2[$i-1]->[2] = $feats2[$i]->[0]+ int($diff/2);
+      $feats2[$i]->[0] = $feats2[$i-1]->[2] + 1;
+    }
+  }
+
+  my @n_feats2 = ();
+
+  if ($neg_doms) {
+    my $last_end = 0;
+    for my $feat ( @feats2 ) {
+      if ($feat->[0] - $last_end > 10) {
+	push @n_feats2, [$last_end+1, "-", $feat->[0]-1, "NODOM", "NODOM"];
+      }
+      $last_end = $feat->[2];
+    }
+    if ($seq_len - $last_end > 10) {
+      push @n_feats2, [$last_end+1, "-", $seq_len, "NODOM", "NODOM"];
+    }
+  }
+
+  my @feats = ();
+  for my $feat (@feats2, @n_feats2) {
+    push @feats, [$feat->[0], '-', $feat->[2], $feat->[-2], $feat->[-1] ];
+  }
+
+  @feats = sort { $a->[0] <=> $b->[0] } (@sites, @feats);
+
+  return \@feats;
+}
+
+sub get_lav_annots {
+  my ($seq_len, $get_sites_sql, $get_doms_sql) = @_;
+
+  my @feats = ();
+
+  my %annot = ();
+  while (my ($acc, $pos, $end, $ipr_acc, $db_ref, $s_descr, $len) = $get_doms_sql->fetchrow_array()) {
+    #    $value = domain_name($label,$value);
+    my $value = domain_name($ipr_acc,$s_descr);
+    push @feats, [$pos, $end, $value];
+  }
+
+  return \@feats;
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+sub domain_name {
+
+  my ($ipr_acc, $s_descr) = @_;
+
+  $s_descr =~ s/[\-_]domain//;
+  $s_descr =~ s/[\-_]homology//;
+
+  $s_descr =~ s/^(.{20})/$1/;
+
+  if (!defined($domains{$ipr_acc})) {
+      $domain_cnt++;
+      $domains{$ipr_acc} = $domain_cnt;
+  }
+  return $s_descr;
+}
+
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_feats2.pl
+
+=head1 SYNOPSIS
+
+ ann_feats2.pl --no_doms --no_feats --lav 'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+
+ --acc_comment provide the InterPro accession in {IPR00123} brackets for links
+ --dom_db=G3DSA use a single domain database (e.g. PF, G3DSA, PS5) from InterPro
+ --dom_acc provide the domain accession, not the description, as the domain label
+ --neg, --neg_doms, --neg-doms label non-domain regions > 10 residues as "NODOM"
+ --ipr proide InterPro accession as label
+ --no-doms  do not show domain boundaries (domains are always shown with --lav)
+ --no-feats do not show feature (variants, active sites, phospho-sites)
+ --no-label do show feature key (==*phosphorylation, etc)
+
+ --lav  produce lav2plt.pl annotation format, only show domains/repeats
+
+ --host, --user, --password, --port --db -- info for mysql database
+
+=head1 DESCRIPTION
+
+C<ann_feats2ipr.pl> extracts feature, domain, and repeat information from
+two msyql databases (default names: uniprot/ipr2) built by parsing the
+uniprot_sprot.dat and uniprot_trembl.dat feature tables.  Given a
+command line argument that contains a sequence accession (P09488) or
+identifier (GSTM1_HUMAN), the program looks up the features available
+for that sequence and returns them in a tab-delimited format:
+
+ >sp|P09488
+ 2	-	88	DOMAIN: GST N-terminal.
+ 7	V	F	Mutagen: Reduces catalytic activity 100- fold.
+ 23	*	-	MOD_RES: Phosphotyrosine (By similarity).
+ 33	*	-	MOD_RES: Phosphotyrosine (By similarity).
+ 34	*	-	MOD_RES: Phosphothreonine (By similarity).
+ 90	-	208	DOMAIN: GST C-terminal.
+ 108	V	S	Mutagen: Changes the properties of the enzyme toward some substrates.
+ 108	V	Q	Mutagen: Reduces catalytic activity by half.
+ 109	V	I	Mutagen: Reduces catalytic activity by half.
+ 116	#	-	BINDING: Substrate.
+ 116	V	A	Mutagen: Reduces catalytic activity 10-fold.
+ 116	V	F	Mutagen: Slight increase of catalytic activity.
+ 173	V	N	in allele GSTM1B; dbSNP:rs1065411.
+ 210	V	T	in dbSNP:rs449856.
+
+If features are provided, then a legend of feature symbols is provided
+as well (disabled with C<--no-label>):
+
+ =*:phosphorylation
+ ==:active site
+ =@:site
+ =^:binding
+ =!:metal binding
+
+If the C<--lav> option is specified, domain and repeat features are
+presented in a different format for the C<lav2plt.pl> program:
+
+  >sp|P09488|GSTM1_HUMAN
+  2	88	GST N-terminal.
+  90	208	GST C-terminal.
+
+C<ann_feats2.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_feats2.pl> option.  It can also be used with the lav2plt.pl
+program with the C<--xA "\!ann_feats2.pl --lav"> or C<--yA "\!ann_feats2.pl --lav"> options.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_feats_up_sql.pl b/scripts/ann_feats_up_sql.pl
new file mode 100755
index 0000000..c11d3e2
--- /dev/null
+++ b/scripts/ann_feats_up_sql.pl
@@ -0,0 +1,463 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014,2015 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License.
+################################################################
+
+# ann_feats_up_sql.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# gi|62822551|sp|P00502|GSTA1_RAT Glutathione S-transfer\n  (at least from pir1.lseg)
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the up_acc
+# (3) return the tab delimited features
+#
+
+# this version can read feature2 uniprot features (acc/pos/end/label/value), but returns sorted start/end domains
+# modified 18-Jan-2016 to produce annotation symbols consistent with ann_feats_up_www2.pl
+
+use strict;
+
+use DBI;
+use Getopt::Long;
+use Pod::Usage;
+
+use vars qw($host $db $a_table $port $user $pass);
+
+my %domains = ();
+my $domain_cnt = 0;
+
+my $hostname = `/bin/hostname`;
+
+unless ($hostname =~ m/ebi/) {
+  ($host, $db, $a_table, $port, $user, $pass)  = ("wrpxdb.its.virginia.edu", "uniprot", "annot2", 0, "web_user", "fasta_www");
+#  $host = 'xdb';
+}
+else {
+  ($host, $db, $a_table, $port, $user, $pass)  = ("mysql-pearson-prod", "up_db", "annot", 4124, "web_user", "fasta_www");
+}
+
+my ($sstr, $lav, $neg_doms, $no_vars, $no_doms, $no_feats, $shelp, $help, $pfam26) = (0,0,0,0,0,0,0,0,0,0);
+my ($min_nodom) = (10);
+
+my ($show_color) = (1);
+my $color_sep_str = " :";
+$color_sep_str = '~';
+
+GetOptions(
+    "host=s" => \$host,
+    "db=s" => \$db,
+    "user=s" => \$user,
+    "password=s" => \$pass,
+    "port=i" => \$port,
+    "lav" => \$lav,
+    "no_doms" => \$no_doms,
+    "no-doms" => \$no_doms,
+    "nodoms" => \$no_doms,
+    "no_var" => \$no_vars,
+    "no-var" => \$no_vars,
+    "novar" => \$no_vars,
+    "neg" => \$neg_doms,
+    "neg_doms" => \$neg_doms,
+    "neg-doms" => \$neg_doms,
+    "negdoms" => \$neg_doms,
+    "min_nodom=i" => \$min_nodom,
+    "min-nodom=i" => \$min_nodom,
+    "no_feats" => \$no_feats,
+    "no-feats" => \$no_feats,
+    "nofeats" => \$no_feats,
+    "color!" => \$show_color,
+    "sstr" => \$sstr,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (@ARGV || -p STDIN || -f STDIN);
+
+my $connect = "dbi:mysql(AutoCommit=>1,RaiseError=>1):database=$db";
+$connect .= ";host=$host" if $host;
+$connect .= ";port=$port" if $port;
+
+my $dbh = DBI->connect($connect,
+		       $user,
+		       $pass
+		      ) or die $DBI::errstr;
+
+
+my @feat_keys = qw(ACT_SITE MOD_RES BINDING SITE METAL);
+my @feat_vals = ( '=','*','#','^','!');
+my @feat_names = ('Active site', 'Modified', 'Substrate binding', 'Site', 'Metal binding');
+unless ($no_vars) {
+  push @feat_keys, qw(VARIANT MUTAGEN);
+  push @feat_vals, ('V','V');
+  push @feat_names, ('','');
+}
+
+my %feat_label = ();
+ at feat_label{@feat_keys} = @feat_names;
+
+my @dom_keys = qw( DOMAIN REPEAT );
+my @dom_vals = ( [ '[', ']'],[ '[', ']']);
+
+my @ssr_keys = qw( SSTR );
+my @ssr_vals = ( [ '[', ']']);
+
+my %annot_types = ();
+
+my $get_annot_sub = \&get_fasta_annots;
+if ($lav) {
+  $no_feats = 1;
+  $get_annot_sub = \&get_lav_annots;
+}
+
+if ($sstr) {@annot_types{@ssr_keys} = @ssr_vals;}
+else {
+  @annot_types{@feat_keys} = @feat_vals unless ($no_feats);
+  @annot_types{@dom_keys} = @dom_vals unless ($no_doms);
+}
+
+if ($neg_doms) {
+  $domains{'NODOM'}=0;
+}
+
+my $get_annots_id = $dbh->prepare(qq(select acc, pos, end, label, value, len from features2 join $a_table using(acc) where id=? order by pos));
+my $get_annots_acc = $dbh->prepare(qq(select acc, pos, end, label, value, len from features2 join $a_table using(acc) where acc=? order by pos));
+
+my $get_annots_refacc = $dbh->prepare(qq(select ref_acc, pos, end, label, value, len from features2 join $a_table using(acc) where ref_acc=? order by pos));
+
+my $up_atable = "uniprot." . $a_table;
+
+my $get_annots_sql = $get_annots_id;
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+unless ($no_feats || $sstr) {
+  for my $i ( 0 .. $#feat_keys) {
+    next unless $feat_label{$feat_keys[$i]};
+    print "=",$feat_vals[$i],":",$feat_label{$feat_keys[$i]},"\n";
+  }
+}
+
+# unless ($no_feats || $sstr) {
+#   print "=*:phosphorylation\n";
+#   print "==".":active site\n";
+#   print "=@".":site\n";
+#   print "=^:binding\n";
+#   print "=!:metal binding\n";
+# }
+
+# get the query
+my ($query, $seq_len) =  @ARGV;
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+
+#if it's a file I can open, read and parse it
+unless ($query && ($query =~ m/[\|:]/ ||
+		   $query =~ m/^[NX]P_/ ||
+		   $query =~ m/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}\s/)) {
+
+  while (my $a_line = <>) {
+    $a_line =~ s/^>//;
+    chomp $a_line;
+    push @annots, show_annots($a_line, $get_annot_sub);
+  }
+}
+else {
+  push @annots, show_annots("$query\t$seq_len", $get_annot_sub);
+}
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    if (!$lav && $show_color && defined($domains{$annot->[-1]})) {
+      $annot->[-1] .= $color_sep_str.$domains{$annot->[-1]};
+    }
+    print join("\t",@$annot),"\n";
+  }
+}
+
+exit(0);
+
+sub show_annots {
+  my ($query_len, $get_annot_sub) = @_;
+
+  my ($annot_line, $seq_len) = split(/\t/,$query_len);
+
+  my %annot_data = (seq_info=>$annot_line);
+
+  if ($annot_line =~ m/^gi\|/) {
+    $use_acc = 1;
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^(SP|TR):(\w+) (\w+)/) {
+    ($sdb, $id, $acc) = ($1,$2,$3);
+    $use_acc = 1;
+    $sdb = lc($sdb)
+  }
+  elsif ($annot_line =~ m/^(SP|TR):(\w+)/) {
+    ($sdb, $id) = ($1,$2);
+    $use_acc = 0;
+    $sdb = lc($sdb)
+  }
+  elsif ($annot_line !~ m/\|/) {  # new NCBI swissprot format
+    $use_acc =1;
+    $sdb = 'sp';
+    ($acc) = split(/\s+/,$annot_line);
+  }
+  else {
+    $use_acc = 1;
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+
+  # remove version number
+  unless ($use_acc) {
+    $get_annots_sql = $get_annots_id;
+    $get_annots_sql->execute($id);
+  }
+  else {
+    unless ($sdb =~ m/ref/) {
+      $get_annots_sql = $get_annots_acc;
+    } else {
+      $get_annots_sql = $get_annots_refacc;
+    }
+    $acc =~ s/\.\d+$//;
+    $get_annots_sql->execute($acc);
+  }
+
+  $annot_data{list} = $get_annot_sub->(\%annot_types, $get_annots_sql, $seq_len);
+
+  return \%annot_data;
+}
+
+sub get_fasta_annots {
+  my ($annot_types, $get_annots_sql, $seq_len) = @_;
+
+  my ($acc, $pos, $end, $label, $value, $comment, $len);
+
+  $seq_len = 0;
+
+  my @feats2 = ();	# features with start/stop, for checking overlap, adding negative
+  my @sites = ();	# sites with one position
+
+  while (($acc, $pos, $end, $label, $value, $len) = $get_annots_sql->fetchrow_array()) {
+    $seq_len = $len if ($len > $seq_len);
+    if ($annot_types->{$label}) {
+      if ($label =~ m/VARIANT/) {
+	my ($aa_res, $comment) = split(/\(/,$value);
+	if ($comment) {	
+	    $comment =~ s/\)//;
+# remove the  /FTId=VAR_014497 information
+	    $comment =~ s/\s+\/FTId=.*$//;
+	}
+	else {$comment = "";}
+	next if ($comment =~ /MISSING/);
+	my ($vfrom, $vto) = ($aa_res =~ m/(\w)\s*->\s*(\w)/);
+	if ($vto) {
+	  $comment = '' unless $comment;
+	  $value = $vto;
+	  push @sites, [$pos, $annot_types->{$label}, $value, $comment];
+	}
+      } elsif ($label =~ m/MUTAGEN/) {
+	my ($aa_res, $comment) = split(/: /,$value);
+	next if ($comment =~ /MISSING/);
+	my ($vfrom, $vto) = split(/\->/,$aa_res);
+	next if (length($vfrom) > 1 || length($vto) > 1);
+	if ($vto) {
+	  my @vto_list = split(/,/,$vto);
+	  $value = $vto;
+	  for my $val ( @vto_list) {
+	    push @sites, [$pos, $annot_types->{$label}, $val, "Mutagen: $comment"];
+	  }
+	}
+      } elsif ($label =~ m/DOMAIN/ || $label =~ m/REPEAT/) {
+	$value = domain_name($label,$value);
+	push @feats2, [$pos, "-", $end, $value];
+
+      } elsif ($label =~ m/SSTR/) {
+	next if $value =~ m/TURN/;
+	push @feats2, [$pos, "-", $end, $value];
+      }
+      else {
+#	print join("\t",($pos, $annot_types->{$label})),"\n";
+#	print join("\t",($pos, $annot_types->{$label}, "-", "$label: $value")),"\n";
+	push @sites, [$pos, $annot_types->{$label}, "-", "$label: $value"];
+      }
+    }
+  }
+
+  # ensure that domains do not overlap
+  for (my $i=1; $i < scalar(@feats2); $i++) {
+    my $diff = $feats2[$i-1]->[2] - $feats2[$i]->[0];
+    if ($diff >= 0) {
+      $feats2[$i-1]->[2] = $feats2[$i]->[0]+ int($diff/2);
+      $feats2[$i]->[0] = $feats2[$i-1]->[2] + 1;
+    }
+  }
+
+  my @n_feats2 = ();
+
+  if ($neg_doms) {
+    my $last_end = 0;
+    for my $feat ( @feats2 ) {
+      if ($feat->[0] - $last_end > $min_nodom) {
+	push @n_feats2, [$last_end+1, "-", $feat->[0]-1, "NODOM"];
+      }
+      $last_end = $feat->[2];
+    }
+    if ($seq_len - $last_end > $min_nodom) {
+      push @n_feats2, [$last_end+1, "-", $seq_len, "NODOM"];
+    }
+  }
+
+  my @feats = ();
+  for my $feat (@feats2, @n_feats2) {
+    push @feats, [$feat->[0], '-', $feat->[2], $feat->[-1] ];
+#    push @feats, [$feat->[2], ']', '-', ""];
+  }
+
+  @feats = sort { $a->[0] <=> $b->[0] } (@sites, @feats);
+
+  return \@feats;
+}
+
+sub get_lav_annots {
+  my ($annot_types, $get_annots_sql, $seq_len) = @_;
+
+  my ($pos, $end, $label, $value, $comment);
+
+  my @feats = ();
+
+  my %annot = ();
+  while (($acc, $pos, $end, $label, $value) = $get_annots_sql->fetchrow_array()) {
+    next unless ($label =~ m/^DOMAIN/ || $label =~ m/^REPEAT/);
+    $value =~ s/\s?\{.+\}\.?$//;
+    $value = domain_name($label,$value);
+    push @feats, [$pos, $end, $value];
+  }
+
+  return \@feats;
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+sub domain_name {
+
+  my ($label, $value) = @_;
+
+  if ($label =~ /DOMAIN|REPEAT/) {
+    $value =~ s/;.*$//;
+    $value =~ s/\s+\d+\.?$//;
+    $value =~ s/\.\s*$//;
+    $value =~ s/\s+\d+\.\s+.*$//;
+    $value =~ s/\s+/_/;
+    if (!defined($domains{$value})) {
+      $domain_cnt++;
+      $domains{$value} = $domain_cnt;
+    }
+    return $value;
+  }
+  else {
+    return $value;
+  }
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_feats_up_sql.pl
+
+=head1 SYNOPSIS
+
+ ann_feats_up_sql.pl --no_doms --no_feats --lav 'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+ --no-doms  do not show domain boundaries (domains are always shown with --lav)
+ --no-feats do not show features (variants, active sites, phospho-sites)
+ --no-var   do not show variant sites (--no_var, --novar)
+ --lav  produce lav2plt.pl annotation format, only show domains/repeats
+ --neg-doms,  -- report domains between annotated domains as NODOM
+                 (also --neg, --neg_doms)
+ --min_nodom=10  minimum non-domain length to produce NODOM
+ --host, --user, --password, --port --db -- info for mysql database
+
+=head1 DESCRIPTION
+
+C<ann_feats_up_sql.pl> extracts feature, domain, and repeat information from
+a msyql database (default name, uniprot) built by parsing the
+uniprot_sprot.dat and uniprot_trembl.dat feature tables.  Given a
+command line argument that contains a sequence accession (P09488) or
+identifier (GSTM1_HUMAN), the program looks up the features available
+for that sequence and returns them in a tab-delimited format:
+
+ >sp|P09488|GSTM1_HUMAN
+ 2	-	88	GST_N-terminal~1
+ 7	V	F	Mutagen: Reduces catalytic activity 100- fold. {ECO:0000269|PubMed:16548513}.
+ 34	*	-	MOD_RES: Phosphothreonine. {ECO:0000250|UniProtKB:P10649}.
+ 90	-	208	GST_C-terminal~2
+ 108	V	S	Mutagen: Changes the properties of the enzyme toward some substrates. {ECO:0000269|PubMed:16548513, ECO:0000269|PubMed:9930979}.
+ 108	V	Q	Mutagen: Reduces catalytic activity by half. {ECO:0000269|PubMed:16548513, ECO:0000269|PubMed:9930979}.
+ 109	V	I	Mutagen: Reduces catalytic activity by half. {ECO:0000269|PubMed:16548513}.
+ 116	#	-	BINDING: Substrate.
+ 116	V	A	Mutagen: Reduces catalytic activity 10-fold. {ECO:0000269|PubMed:16548513}.
+ 116	V	F	Mutagen: Slight increase of catalytic activity. {ECO:0000269|PubMed:16548513}.
+ 173	V	N	in allele GSTM1B; dbSNP:rs1065411. {ECO:0000269|Ref.3, ECO:0000269|Ref.5}.
+ 210	*	-	MOD_RES: Phosphoserine. {ECO:0000250|UniProtKB:P04905}.
+ 210	V	T	in dbSNP:rs449856.
+
+If features are provided, then a legend of feature symbols is provided
+as well:
+
+ ==:Active site
+ =*:Modified
+ =#:Substrate binding
+ =^:Site
+ =!:Metal binding
+
+If the C<--lav> option is specified, domain and repeat features are
+presented in a different format for the C<lav2plt.pl> program:
+
+  >sp|P09488|GSTM1_HUMAN
+  2	88	GST N-terminal.
+  90	208	GST C-terminal.
+
+C<ann_feats_up_sql.pl> is designed to be used by the B<FASTA> programs
+with the C<-V \!ann_feats_up_sql.pl> option, or by the
+C<annot_blast_btop.pl> script.  It can also be used with the
+lav2plt.pl program with the C<--xA "\!ann_feats_up_sql.pl --lav"> or
+C<--yA "\!ann_feats_up_sql.pl --lav"> options.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_feats_up_www2.pl b/scripts/ann_feats_up_www2.pl
new file mode 100755
index 0000000..becce41
--- /dev/null
+++ b/scripts/ann_feats_up_www2.pl
@@ -0,0 +1,455 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014,2015 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+## modified 29-Sept-2016 to use EBI/proteins JSON URL:
+## http://www.ebi.ac.uk/proteins/api/features/p12345
+
+# ann_feats_up_www2.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# SP:GSTM1_HUMAN P09488 218
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the up_acc
+# (3) return the tab delimited features
+#
+
+# this version can read feature2 uniprot features (acc/pos/end/label/value), but returns sorted start/end domains
+
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+use LWP::Simple;
+use JSON qw(decode_json);
+
+## use IO::String;
+
+my $up_base = 'http://www.ebi.ac.uk/proteins/api/features';
+
+my %domains = ();
+my $domain_cnt = 0;
+
+my $hostname = `/bin/hostname`;
+
+my ($sstr, $lav, $neg_doms, $no_doms, $no_feats, $no_vars, $no_over, $data_file, $shelp, $help) = (0,0,0,0,0,0,0,0,0,0);
+my ($min_nodom) = (10);
+
+my ($show_color) = (1);
+my $color_sep_str = " :";
+$color_sep_str = '~';
+
+GetOptions(
+    "lav" => \$lav,
+    "no-over" => \$no_over,
+	   "no_doms" => \$no_doms,
+	   "no-doms" => \$no_doms,
+	   "nodoms" => \$no_doms,
+	   "no_vars" => \$no_vars,
+	   "no-vars" => \$no_vars,
+	   "novars" => \$no_vars,
+	   "neg" => \$neg_doms,
+	   "neg_doms" => \$neg_doms,
+	   "neg-doms" => \$neg_doms,
+	   "negdoms" => \$neg_doms,
+	   "min_nodom=i" => \$min_nodom,
+	   "no_feats" => \$no_feats,
+	   "no-feats" => \$no_feats,
+	   "nofeats" => \$no_feats,
+	   "data:s" => \$data_file,
+    	   "color!" => \$show_color,
+	   "sstr" => \$sstr,
+	   "h|?" => \$shelp,
+	   "help" => \$help,
+	  );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless @ARGV || $data_file || -p STDIN || -f STDIN;
+
+my @feat_keys = qw( ACT_SITE MOD_RES BINDING METAL SITE );
+my @feat_vals = ( '=','*','#','^','@');
+my @feat_names = ('Active site', 'Modified', 'Binding', 'Metal binding', 'Site');
+
+unless ($no_vars) {
+  push @feat_keys, qw(MUTAGEN VARIANT);
+  push @feat_vals, ('V','V',);
+  push @feat_names, ("","",);
+}
+
+my %feats_text = ();
+ at feats_text{@feat_keys} = @feat_names;
+
+my %feats_label;
+ at feats_label{@feat_keys} = @feat_names;
+
+my @dom_keys = qw( DOMAIN REPEAT );
+my @dom_vals = ( [ '[', ']'],[ '[', ']']);
+
+my @ssr_keys = qw(STRAND HELIX);
+my @ssr_vals = ( [ '[', ']'], [ '[', ']']);
+
+my %annot_types = ();
+
+my $get_annot_sub = \&json_annots;
+if ($lav) {
+  $no_feats = 1;
+}
+
+if ($sstr) {
+  @annot_types{@ssr_keys} = @ssr_vals;
+} else {
+  @annot_types{@feat_keys} = @feat_vals unless ($no_feats);
+  @annot_types{@dom_keys} = @dom_vals unless ($no_doms);
+}
+
+if ($neg_doms) {
+  $domains{'NODOM'}=0;
+}
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+unless ($no_feats || $sstr) {
+  for my $i ( 0 .. $#feat_keys) {
+    next unless $feats_label{$feat_keys[$i]};
+    print "=",$feat_vals[$i],":",$feats_label{$feat_keys[$i]},"\n";
+  }
+}
+
+# get the query
+my ($query, $seq_len) =  @ARGV;
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+
+#if it's a file I can open, read and parse it
+
+unless ($data_file) {
+  unless ($query && ($query =~ m/[\|:]/ 
+		     || $query =~ m/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}\s/
+		     || $query =~ m/^(XN)(MP)_\d+/)) {
+
+    while (my $a_line = <>) {
+      $a_line =~ s/^>//;
+      chomp $a_line;
+      push @annots, lwp_annots($a_line, $get_annot_sub);
+    }
+  } else {
+    push @annots, lwp_annots("$query\t$seq_len", $get_annot_sub);
+  }
+} else {   # just read the data from a file, give to $get_annot_sub().
+  my %annot_data = (seq_info => ">$data_file DATA");
+
+  open(DATA_IN, $data_file) || die "Cannot read $data_file";
+
+  my $lwp_data = "";
+  while (<DATA_IN>) {
+    $lwp_data .= $_;
+  }
+
+  $annot_data{list} = $get_annot_sub->(\%annot_types, $lwp_data,0);
+
+  push @annots, \%annot_data;
+}
+
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    if (!$lav && $show_color && defined($domains{$annot->[-1]})) {
+      $annot->[-1] .= $color_sep_str.$domains{$annot->[-1]};
+    }
+    print join("\t",@$annot),"\n";
+  }
+}
+
+exit(0);
+
+sub lwp_annots {
+  my ($query_len, $get_annot_sub) = @_;
+
+  my ($annot_line, $seq_len) = split(/\t/,$query_len);
+
+  my %annot_data = (seq_info=>$annot_line);
+
+  if ($annot_line =~ m/^gi\|/) {
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+  } elsif ($annot_line =~ m/^(SP|TR):(\w+)\s(\w+)/) {
+    ($sdb, $id, $acc) = (lc($1), $2, $3);
+  } elsif ($annot_line =~ m/^(SP|TR):(\w+)/) {
+    ($sdb, $id, $acc) = (lc($1), $2, "");
+    warn("*** $0 accession required: $annot_line\n");
+  } elsif ($annot_line =~ m/^(UR\d{3}:UniRef\d{2})_(\w+)/) {
+    $sdb = lc($1);
+    $id = $2;
+#    $acc = $2;
+  } elsif ($annot_line =~ m/\|/) {
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  else {
+    ($acc) = ($annot_line =~ m/^(\S+)/);
+  }
+
+  $acc =~ s/\.\d+// if ($acc);
+
+  $annot_data{list} = [];
+  my $lwp_features = "";
+
+  if ($acc && ($acc =~ m/^[A-Z][0-9][A-Z0-9]{3}[0-9]/)) {
+    $lwp_features = get("$up_base/$acc.json");
+  }
+#  elsif ($id && ($id =~ m/^\w+$/)) {
+#    $lwp_features = get("$up_base/$id/$gff_post");
+#  }
+
+  if ($lwp_features && ($lwp_features !~ /ERROR/)) {
+    my $annot_json = decode_json($lwp_features);
+    $annot_data{list} = $get_annot_sub->(\%annot_types, $annot_json, $seq_len);
+  }
+
+  return \%annot_data;
+}
+
+####
+# parses www.ebi.ac.uk/uniprot/api json
+#
+sub json_annots {
+  my ($annot_types, $json_ref, $seq_len) = @_;
+
+  my ($acc, $pos, $end, $label, $value, $comment, $len);
+
+  $seq_len = 0;
+
+  my @feats2 = (); # features with start/stop, for checking overlap, adding negative
+  my @sites = ();  # sites with one position
+
+  my ($seq_str, $seq_acc, $seq_id)  = @{$json_ref}{qw(sequence accession entryName)};
+  $seq_len = length($seq_str);
+
+  for my $feat ( @{$json_ref->{features}} ) {
+    if ($annot_types->{$feat->{type}}) {
+
+      my ($label, $pos, $end, $value)  = @{$feat}{qw(type begin end description)};
+
+      $pos =~ s/[<>]//g;
+      $end =~ s/[<>]//g;
+
+      if ($label =~ m/DOMAIN/ || $label =~ m/REPEAT/) {
+	$value = domain_name($label,$value);
+	push @feats2, [$pos, "-", $end, $value];
+      } elsif ($label =~ m/HELIX/) {
+	push @feats2, [$pos, "-", $end, $label];
+      } elsif ($label =~ m/STRAND/) {
+	push @feats2, [$pos, "-", $end, $label];
+      } elsif ($label =~ m/VARIANT/ || $label =~ m/MUTAGEN/) {
+	push @sites, [$pos, $annot_types->{$label}, $feat->{alternativeSequence}, $value];
+      } 
+      else {
+	next unless ($pos == $end);
+	if ($feats_text{$label}) {
+	  my $info = $feats_text{$label};
+	  if ($value) {
+	    $info .= ": $value";
+	  }
+	  push @sites, [$pos, $annot_types->{$label}, "-", $info];
+	} else {
+	  push @sites, [$pos, $annot_types->{$label}, "-", $value];
+	}
+      }
+    }
+  }
+
+  @feats2 = sort { $a->[0] <=> $b->[0] } @feats2;
+
+  if ($no_over) {
+    # check for containment
+    my $have_contained = 0;
+    my $last_container = 0;
+    for (my $i=1; $i < scalar(@feats2); $i++) {
+      if ($feats2[$i]->[0] >= $feats2[$last_container]->[0] && $feats2[$i]->[2] <= $feats2[$last_container]->[2]) {
+	$feats2[$i]->[1] = 'Delete';
+	$have_contained = 1;
+      } else {
+	$last_container=$i;
+      }
+    }
+
+    if ($have_contained) {
+      @feats2 = grep { $_->[1] !~ /Delete/ } @feats2;
+    }
+
+    # ensure that domains do not overlap
+    for (my $i=1; $i < scalar(@feats2); $i++) {
+      my $diff = $feats2[$i-1]->[2] - $feats2[$i]->[0];
+      if ($diff >= 0) {
+	$feats2[$i-1]->[2] = $feats2[$i]->[0]+ int($diff/2);
+	$feats2[$i]->[0] = $feats2[$i-1]->[2] + 1;
+      }
+    }
+  }
+
+  my @n_feats2 = ();
+
+  if ($neg_doms) {
+    my $last_end = 0;
+    for my $feat ( @feats2 ) {
+      if ($feat->[0] - $last_end > $min_nodom) {
+	push @n_feats2, [$last_end+1, "-", $feat->[0]-1, "NODOM"];
+      }
+      $last_end = $feat->[2];
+    }
+    if ($seq_len - $last_end > $min_nodom) {
+      push @n_feats2, [$last_end+1, "-", $seq_len, "NODOM"];
+    }
+  }
+
+  my @feats = ();
+  for my $feat (@feats2, @n_feats2) {
+    if (!$lav)  {
+      push @feats, [$feat->[0], '-', $feat->[2], $feat->[-1] ];
+#      push @feats, [$feat->[2], ']', '-', ""];
+    }
+    else {
+      push @feats, [$feat->[0], $feat->[2], $feat->[-1]];
+    }
+  }
+
+  @feats = sort { $a->[0] <=> $b->[0] } (@sites, @feats);
+
+  return \@feats;
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+sub domain_name {
+
+  my ($label, $value) = @_;
+
+  $value = 'UnDef' unless $value;
+
+  $value =~ s/ /_/g;
+
+  if ($label =~ /Domain|Repeat/i) {
+    $value =~ s/;.*$//;
+    $value =~ s/\.\s*$//;
+    $value =~ s/\s+\d+$//;
+    if (!defined($domains{$value})) {
+      $domain_cnt++;
+      $domains{$value} = $domain_cnt;
+    }
+    return $value;
+  }
+  else {
+    return $value;
+  }
+}
+
+
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_feats_up_www2.pl
+
+=head1 SYNOPSIS
+
+ ann_feats_up_www2.pl --no_doms --no_feats --lav 'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+ --no-doms  do not show domain boundaries (domains are always shown with --lav)
+ --no-feats do not show feature (variants, active sites, phospho-sites)
+ --lav  produce lav2plt.pl annotation format, only show domains/repeats
+
+ --neg-doms,  -- report domains between annotated domains as NODOM
+                 (also --neg, --neg_doms)
+ --min_nodom=10  -- minimum length between domains for NODOM
+
+ --host, --user, --password, --port --db -- info for mysql database
+
+=head1 DESCRIPTION
+
+C<ann_feats_up_www2.pl> extracts feature, domain, and repeat
+information from the Uniprot DAS server through an XSLT transation
+provided by http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/uniprotkb.
+This server provides GFF descriptions of Uniprot entries, with most of
+the information provided in UniProt feature tables.
+
+C<ann_feats_up_www2.pl> is an alternative to C<ann_pfam.pl> and
+C<ann_pfam.pl> that does not require a local MySQL copy of Pfam.
+
+Given a command line argument that contains a sequence accession
+(P09488), the program looks up the features available for that
+sequence and returns them in a tab-delimited format:
+
+>sp|P09488|GSTM1_HUMAN
+2	[	-	GST N-terminal :1
+7	V	F	Mutagen: Reduces catalytic activity 100- fold.
+23	*	-	MOD_RES: Phosphotyrosine (By similarity).
+33	*	-	MOD_RES: Phosphotyrosine (By similarity).
+34	*	-	MOD_RES: Phosphothreonine (By similarity).
+88	]	-	
+90	[	-	GST C-terminal :2
+108	V	Q	Mutagen: Reduces catalytic activity by half.
+108	V	S	Mutagen: Changes the properties of the enzyme toward some substrates.
+109	V	I	Mutagen: Reduces catalytic activity by half.
+116	#	-	BINDING: Substrate.
+116	V	A	Mutagen: Reduces catalytic activity 10-fold.
+116	V	F	Mutagen: Slight increase of catalytic activity.
+173	V	N	in allele GSTM1B; dbSNP:rs1065411.
+208	]	-	
+210	V	T	in dbSNP:rs449856.
+
+If features are provided, then a legend of feature symbols is provided
+as well:
+
+ =*:phosphorylation
+ ==:active site
+ =@:site
+ =^:binding
+ =!:metal binding
+
+If the C<--lav> option is specified, domain and repeat features are
+presented in a different format for the C<lav2plt.pl> program:
+
+  >sp|P09488|GSTM1_HUMAN
+  2	88	GST N-terminal.
+  90	208	GST C-terminal.
+
+C<ann_feats_up_www2.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_feats_up_www2.pl> option.  It can also be used with the lav2plt.pl
+program with the C<--xA "\!ann_feats_up_www2.pl --lav"> or C<--yA "\!ann_feats_up_www2.pl --lav"> options.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_ipr_www.pl b/scripts/ann_ipr_www.pl
new file mode 100755
index 0000000..77f1a4e
--- /dev/null
+++ b/scripts/ann_ipr_www.pl
@@ -0,0 +1,467 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# ann_ipr_www.pl gets an annotation file from fasta36 -V with a line of the form:
+
+
+# gi|62822551|sp|P00502|GSTA1_RAT Glutathione S-transfer\n  (at least from pir1.lseg)
+
+# this version only annotates sequences known to InterPro
+# and only provides domain information
+
+# This script uses the dbfetch iprmc database, which REQUIRES a
+# Uniprot Acc (not ID).  If an Acc is not provided, we must get an ACC
+# first from the ID.
+
+# SP:GSTM1_HUMAN P09488 218
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the up_acc
+# (3) return the tab delimited domains
+#
+
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+use LWP::Simple;
+## use IO::String;
+
+# use dbfetch and IPRMC to get Interpro domain coordinates
+# http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=iprmc&id=gstm1_human&format=gff2&style=default&Retrieve=Retrieve
+
+my $ipr_base = 'http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=iprmc&id=';
+my $gff_post = '&format=gff2&style=default&Retrieve=Retrieve';
+
+################################################################
+#
+##gff-version 2
+##Type Protein
+# InterPro Matches for UniProtKB entries 
+##source-version InterProMatches 49.0
+##date 20-NOV-14
+
+##sequence-region P09488 1 218
+# P09488	InterProScan	region	99	190	1.1999999998684077E-49	.	.	Signature GENE3D G3DSA:1.20.1050.10 "G3DSA:1.20.1050.10" T ; InterPro IPR010987 "Glutathione S-transferase, C-terminal-like"
+# P09488	InterProScan	region	2	98	5.800000000494973E-51	.	.	Signature GENE3D G3DSA:3.40.30.10 "G3DSA:3.40.30.10" T ; InterPro IPR012336 "Thioredoxin-like fold"
+# P09488	InterProScan	region	105	189	3.900000000000007E-16	.	.	Signature PFAM PF00043 "GST_C" T ; InterPro IPR004046 "Glutathione S-transferase, C-terminal"
+# P09488	InterProScan	region	4	82	7.299999999999985E-21	.	.	Signature PFAM PF02798 "GST_N" T ; InterPro IPR004045 "Glutathione S-transferase, N-terminal"
+# P09488	InterProScan	region	31	43	1.1000015067164208E-25	.	.	Signature PRINTS PR01267 "GSTRNSFRASEM" T ; InterPro IPR003081 "Glutathione S-transferase, Mu class"
+# P09488	InterProScan	region	44	56	1.1000015067164208E-25	.	.	Signature PRINTS PR01267 "GSTRNSFRASEM" T ; InterPro IPR003081 "Glutathione S-transferase, Mu class"
+# P09488	InterProScan	region	87	98	1.1000015067164208E-25	.	.	Signature PRINTS PR01267 "GSTRNSFRASEM" T ; InterPro IPR003081 "Glutathione S-transferase, Mu class"
+# P09488	InterProScan	region	139	152	1.1000015067164208E-25	.	.	Signature PRINTS PR01267 "GSTRNSFRASEM" T ; InterPro IPR003081 "Glutathione S-transferase, Mu class"
+# P09488	InterProScan	region	1	88	0.0	.	.	Signature PROFILE PS50404 "GST_NTER" T ; InterPro IPR004045 "Glutathione S-transferase, N-terminal"
+# P09488	InterProScan	region	90	208	0.0	.	.	Signature PROFILE PS50405 "GST_CTER" T ; InterPro IPR010987 "Glutathione S-transferase, C-terminal-like"
+# P09488	InterProScan	region	1	217	0.0	.	.	Signature PANTHER PTHR11571 "PTHR11571" T
+# P09488	InterProScan	region	1	217	0.0	.	.	Signature PANTHER PTHR11571:SF117 "PTHR11571:SF117" T
+# P09488	InterProScan	region	86	217	8.190000000746436E-47	.	.	Signature SSF SSF47616 "SSF47616" T ; InterPro IPR010987 "Glutathione S-transferase, C-terminal-like"
+# P09488	InterProScan	region	3	85	3.339999999911062E-23	.	.	Signature SSF SSF52833 "SSF52833" T ; InterPro IPR012336 "Thioredoxin-like fold"
+###
+
+my %domains = ();
+my $domain_cnt = 0;
+
+my $hostname = `/bin/hostname`;
+
+my ($sstr, $lav, $neg_doms, $no_doms, $no_feats, $no_over, $data_file, $shelp, $help) = (0,0,0,0,1,0,0,0,0);
+my $dom_dbs = "PFAM+PROFILE+GENE3D";
+
+my ($min_nodom) = (10);
+
+my $color_sep_str = " :";
+$color_sep_str = '~';
+
+GetOptions(
+    "lav" => \$lav,
+    "no-over" => \$no_over,
+	   "no_doms" => \$no_doms,
+	   "no-doms" => \$no_doms,
+	   "nodoms" => \$no_doms,
+    	   "dom_dbs:s" => \$dom_dbs,	# PF, PS, 
+    	   "dbs:s" => \$dom_dbs,
+	   "neg" => \$neg_doms,
+	   "neg_doms" => \$neg_doms,
+	   "neg-doms" => \$neg_doms,
+	   "negdoms" => \$neg_doms,
+	   "min_nodom=i" => \$min_nodom,
+	   "no_feats" => \$no_feats,
+	   "no-feats" => \$no_feats,
+	   "nofeats" => \$no_feats,
+	   "data:s" => \$data_file,
+	   "sstr" => \$sstr,
+	   "h|?" => \$shelp,
+	   "help" => \$help,
+	  );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (-p STDIN || -f STDIN || @ARGV || $data_file);
+
+my @feat_keys = qw(catalytic_residue posttranslation_modification binding_motif metal_contact
+		   polypeptide_region mutated_variant_site natural_variant_site);
+
+my %feats_text = ();
+ at feats_text{@feat_keys} = ('Active site', '', 'Substrate binding', 'Metal binding', 'Site', '','');
+
+my %feats_label;
+ at feats_label{@feat_keys} = ('Active site', 'Modified', 'Substrate binding', 'Metal binding', 'Site', '','');
+
+my @feat_vals = ( '=','*','#','^','@','V','V');
+
+my %annot_types = ();
+
+my $get_annot_sub = \&iprmc_annots;
+if ($lav) {
+  $no_feats = 1;
+}
+
+if ($dom_dbs) {
+  my @dom_db_list = split(/\+/,$dom_dbs);
+
+  for my $dom_db (@dom_db_list) {
+    $annot_types{$dom_db} = $dom_db;
+  }
+}
+
+if ($neg_doms) {
+  $domains{'NODOM'}=0;
+}
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+unless ($no_feats || $sstr) {
+  for my $i ( 0 .. $#feat_keys) {
+    next unless $feats_label{$feat_keys[$i]};
+    print "=",$feat_vals[$i],":",$feats_label{$feat_keys[$i]},"\n";
+  }
+}
+
+# get the query
+my ($query, $seq_len) =  @ARGV;
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+
+#if it's a file I can open, read and parse it
+
+unless ($data_file) {
+    unless ($query && ($query =~ m/[\|:]/ 
+		       || $query =~ m/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}\s/
+		       || $query =~ m/^(NX)(MP)_\d+/)) {
+
+    while (my $a_line = <>) {
+      $a_line =~ s/^>//;
+      chomp $a_line;
+      push @annots, lwp_annots($a_line, $get_annot_sub);
+    }
+  } else {
+    push @annots, lwp_annots("$query\t$seq_len", $get_annot_sub);
+  }
+} else {   # just read the data from a file, give to $get_annot_sub().
+  my %annot_data = (seq_info => ">$data_file DATA");
+
+  open(DATA_IN, $data_file) || die "Cannot read $data_file";
+
+  my $lwp_data = "";
+  while (<DATA_IN>) {
+    $lwp_data .= $_;
+  }
+
+  $annot_data{list} = $get_annot_sub->(\%annot_types, $lwp_data,0);
+
+  push @annots, \%annot_data;
+}
+
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    if (!$lav && defined($domains{$annot->[-1]})) {
+      $annot->[-2] .= $color_sep_str.$domains{$annot->[-1]};
+    }
+    print join("\t",@{$annot}[0..3]),"\n";
+  }
+}
+
+exit(0);
+
+sub lwp_annots {
+  my ($query_len, $get_annot_sub) = @_;
+
+  my ($annot_line, $seq_len) = split(/\t/,$query_len);
+
+  my %annot_data = (seq_info=>$annot_line);
+
+  if ($annot_line =~ m/^gi\|/) {
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+  } elsif ($annot_line =~ m/^(SP|TR):(\w+) (\w+)/) {
+    ($sdb, $id, $acc)  = (lc($1), $2, $3);
+  } elsif ($annot_line =~ m/^(SP|TR):(\w+)/) {
+    ($sdb, $id, $acc)  = (lc($1), $2, "");
+  } elsif ($annot_line =~ m/^(UR\d{3}:UniRef\d{2})_(\w+)/) {
+    $sdb = lc($1);
+    $id = $2;
+#    $acc = $2;
+  } elsif ($annot_line =~ m/\|/) {
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+  } else {
+    ($acc) = ($annot_line =~ m/^(\S+)/);
+  }
+
+  $acc =~ s/\.\d+// if ($acc);
+
+  $annot_data{list} = [];
+  my $lwp_domains = "";
+
+  if ($acc && ($acc =~ m/^[A-Z][0-9][A-Z0-9]{3}[0-9]/)) {
+    $lwp_domains = get($ipr_base . $acc . $gff_post);
+  } elsif ($id && ($id =~ m/^\w+$/)) {
+    $lwp_domains = get($ipr_base . $id . $gff_post);
+  }
+
+  if ($lwp_domains && ($lwp_domains !~ /ERROR/)) {
+    $annot_data{list} = $get_annot_sub->(\%annot_types, $lwp_domains, $seq_len);
+  }
+
+  return \%annot_data;
+}
+
+# parses www.uniprot.org gff feature table
+sub iprmc_annots {
+  my ($annot_types, $annot_data, $seq_len) = @_;
+
+  my ($acc, $pos, $end, $label, $value, $comment, $len);
+  my ($seq_acc, $seq_start, $seq_end, $tmp);
+
+  $seq_len = 0;
+
+  my @feats2 = (); # domains with start/stop, for checking overlap, adding negative
+  my @sites = ();  # sites with one position
+
+  my @gff_lines = split(/\n/m,$annot_data);
+
+  while (my $gff_line = shift @gff_lines) {
+    chomp $gff_line;
+    if ($gff_line =~ m/^#sequence-region/) {
+      my @fields = split($gff_line, /\s+/);
+      $seq_end = $fields[-1];
+      last;
+    }
+  }
+
+  while (my $gff_line = shift(@gff_lines)) {
+    next if ($gff_line =~ m/^#/);
+    chomp($gff_line);
+
+    my @gff_line_arr = split(/\t/,$gff_line);
+    ($acc, $pos, $end, $comment) = @gff_line_arr[(0,3,4,-1)];
+
+    # parse the comment to get signature (domain_db), domain_db_acc, interpro_acc, description
+    my ($domain_info, $dom_acc) = parse_ipr_comment($comment);
+
+    next unless $domain_info;
+
+    push @feats2, [$pos, "-", $end, $domain_info, $dom_acc];
+
+    $value = '' unless $value;
+    #	print join("\t",($pos, $annot_types->{$label})),"\n";
+    #	print join("\t",($pos, $annot_types->{$label}, "-", "$label: $value")),"\n";
+  }
+
+  @feats2 = sort { $a->[0] <=> $b->[0] } @feats2;
+
+  if ($no_over) {
+    # check for containment
+    my $have_contained = 0;
+    my $last_container = 0;
+    for (my $i=1; $i < scalar(@feats2); $i++) {
+      if ($feats2[$i]->[0] >= $feats2[$last_container]->[0] && $feats2[$i]->[2] <= $feats2[$last_container]->[2]) {
+	$feats2[$i]->[1] = 'Delete';
+	$have_contained = 1;
+      } else {
+	$last_container=$i;
+      }
+    }
+
+    if ($have_contained) {
+      @feats2 = grep { $_->[1] !~ /Delete/ } @feats2;
+    }
+
+    # ensure that domains do not overlap
+    for (my $i=1; $i < scalar(@feats2); $i++) {
+      my $diff = $feats2[$i-1]->[2] - $feats2[$i]->[0];
+      if ($diff >= 0) {
+	$feats2[$i-1]->[2] = $feats2[$i]->[0]+ int($diff/2);
+	$feats2[$i]->[0] = $feats2[$i-1]->[2] + 1;
+      }
+    }
+  }
+
+  my @n_feats2 = ();
+
+  if ($neg_doms) {
+    my $last_end = 0;
+    for my $feat ( @feats2 ) {
+      if ($feat->[0] - $last_end > $min_nodom) {
+	push @n_feats2, [$last_end+1, "-", $feat->[0]-1, "NODOM", ""];
+      }
+      $last_end = $feat->[2];
+    }
+    if ($seq_len - $last_end > $min_nodom) {
+      push @n_feats2, [$last_end+1, "-", $seq_len, "NODOM", ""];
+    }
+  }
+
+  my @feats = ();
+
+  for my $feat (@feats2, @n_feats2) {
+    if (!$lav)  {
+      push @feats, [$feat->[0], '-', $feat->[2], $feat->[-2], $feat->[-1] ];
+#      push @feats, [$feat->[2], ']', '-', ""];
+    }
+    else {
+      push @feats, [$feat->[0], $feat->[2], $feat->[-1]];
+    }
+  }
+
+  @feats = sort { $a->[0] <=> $b->[0] } (@sites, @feats);
+
+  # now that domains are sorted, give them names
+  for my $feat ( @feats ) {
+    $feat->[-2] = domain_name($feat->[-2],$feat->[-1]);
+  }
+
+  return \@feats;
+}
+
+sub parse_ipr_comment {
+  my ($comment_str) = @_;
+
+  my @comments = split(/\s+;\s+/,$comment_str);
+  my @comment_info = ();
+  my $ipr_info = "";
+  $comments[0] =~ s/^Signature\s+//;
+
+  return ("","") unless @comments;
+
+  for my $comment (@comments) {
+    my %ipr_data = ();
+    @ipr_data{qw(db acc descr)} = ($comment =~ m/(\S+)\s+(\S+)\s+"([^"]+)"/);
+    return ("","") if $ipr_data{db} =~ m/(PRINTS|PROSITE)/i;
+    $ipr_data{descr} =~ s/\s+/_/g;
+    push @comment_info, \%ipr_data;
+  }
+
+  my $primary_acc = $comment_info[0]->{acc};
+
+  for my $comment (@comment_info) {
+    if ($comment->{db} =~ m/InterPro/) {
+      return ("$primary_acc:".$comment->{descr},$comment->{acc});
+    }
+  }
+  return ("","");
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+sub domain_name {
+
+  my ($value, $ipr_acc) = @_;
+
+  $value = 'UnDef' unless $value;
+
+  $value =~ s/;.*$//;
+  $value =~ s/\.\s*$//;
+  $value =~ s/\s+\d+$//;
+  if (!defined($domains{$ipr_acc})) {
+    $domain_cnt++;
+    $domains{$ipr_acc} = $domain_cnt;
+  }
+  return $value;
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_ipr_www.pl
+
+=head1 SYNOPSIS
+
+ ann_ipr_www.pl --no_doms --no_feats --lav 'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+ --no-doms  do not show domain boundaries (domains are always shown with --lav)
+ --no-feats do not show feature (variants, active sites, phospho-sites)
+ --lav  produce lav2plt.pl annotation format, only show domains/repeats
+
+ --neg-doms,  -- report domains between annotated domains as NODOM
+                 (also --neg, --neg_doms)
+ --min_nodom=10  -- minimum length between domains for NODOM
+
+ --host, --user, --password, --port --db -- info for mysql database
+
+=head1 DESCRIPTION
+
+C<ann_ipr_www.pl> extracts feature, domain, and repeat
+information from the Uniprot DAS server through an XSLT transation
+provided by http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/uniprotkb.
+This server provides GFF descriptions of Uniprot entries, with most of
+the information provided in UniProt feature tables.
+
+C<ann_ipr_www.pl> is an alternative to C<ann_pfam.pl> and
+C<ann_pfam.pl> that does not require a local MySQL copy of Pfam.
+
+Given a command line argument that contains a sequence accession
+(P09488), the program looks up the domains available for that
+sequence and returns them in a tab-delimited format:
+
+>sp|P09488|GSTM1_HUMAN
+2	-	88	GST N-terminal :1
+90	-	208	GST C-terminal :2
+
+If the C<--lav> option is specified, domain and repeat domains are
+presented in a different format for the C<lav2plt.pl> program:
+
+  >sp|P09488|GSTM1_HUMAN
+  2	88	GST N-terminal.
+  90	208	GST C-terminal.
+
+C<ann_ipr_www.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_ipr_www.pl> option.  It can also be used with the lav2plt.pl
+program with the C<--xA "\!ann_ipr_www.pl --lav"> or C<--yA "\!ann_ipr_www.pl --lav"> options.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_pdb_cath.pl b/scripts/ann_pdb_cath.pl
new file mode 100755
index 0000000..1eeeaf1
--- /dev/null
+++ b/scripts/ann_pdb_cath.pl
@@ -0,0 +1,345 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# ann_feats.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# gi|62822551|sp|P00502|GSTA1_RAT Glutathione S-transfer\n  (at least from pir1.lseg)
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the up_acc
+# (3) return the tab delimited features
+#
+
+# this version is designed for various formats of the pdbaa/pdbaa_off NCBI files with the lines:
+# >gi|4388890|pdb|1GTUA|sp|P09488  or 
+# >gi|4388890|pdb|1GTU|A
+# if I can find |sp|P09488, I will use that, otherwise I will use
+# |pdb|1GTU|A (concatenated) and a different part of the cath_dom
+# database
+#
+
+use strict;
+
+use DBI;
+use Getopt::Long;
+use Pod::Usage;
+
+use vars qw($host $db $port $user $pass);
+
+my $hostname = `/bin/hostname`;
+
+($host, $db, $port, $user, $pass)  = ("wrpxdb.its.virginia.edu", "uniprot", 0, "web_user", "fasta_www");
+
+my ($neg_doms, $lav, $shelp, $help, $class) = (0, 0, 0, 0, 0);
+my ($min_nodom) = (10);
+
+my $color_sep_str = " :";
+$color_sep_str = '~';
+
+GetOptions(
+    "host=s" => \$host,
+    "db=s" => \$db,
+    "user=s" => \$user,
+    "password=s" => \$pass,
+    "port=i" => \$port,
+    "lav" => \$lav,
+    "neg" => \$neg_doms,
+    "neg_doms" => \$neg_doms,
+    "neg-doms" => \$neg_doms,
+    "min_nodom=i" => \$min_nodom,
+    "class" => \$class,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless ( @ARGV || -p STDIN || -f STDIN);
+
+my $connect = "dbi:mysql(AutoCommit=>1,RaiseError=>1):database=$db";
+$connect .= ";host=$host" if $host;
+$connect .= ";port=$port" if $port;
+
+my $dbh = DBI->connect($connect,
+		       $user,
+		       $pass
+		      ) or die $DBI::errstr;
+
+my %domains = (NODOM=>0);
+my $domain_cnt = 0;
+
+my $get_offsets_pdb =  $dbh->prepare(<<EOSQL);
+SELECT res_beg, pdb_beg, pdb_end, sp_beg, sp_end
+FROM pdb_chain_up
+WHERE pdb_acc=?
+ORDER BY res_beg
+EOSQL
+
+my $get_cathdoms_pdb = $dbh->prepare(<<EOSQL);
+SELECT s_start, s_stop, p_start, p_stop, cath_class, s_descr as info
+FROM cath_doms
+JOIN cath_names using(cath_class)
+WHERE pdb_acc=?
+ORDER BY s_start
+EOSQL
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+# get the query
+my ($query, $seq_len) = @ARGV;
+$seq_len = 1 unless $seq_len;
+
+$query =~ s/^>// if $query;
+
+my @annots = ();
+
+#if it's a file I can open, read and parse it
+unless ($query && $query =~ m/[\|:]/) {
+
+  while (my $a_line = <>) {
+    $a_line =~ s/^>//;
+    chomp $a_line;
+    push @annots, show_annots($lav,$a_line);
+  }
+}
+else {
+  push @annots, show_annots($lav, "$query $seq_len");
+}
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    if (!$lav && defined($domains{$annot->[-1]})) {
+      $annot->[-1] .= $color_sep_str.$domains{$annot->[-1]};
+    }
+    print join("\t",@$annot),"\n";
+  }
+}
+
+exit(0);
+
+sub show_annots {
+  my ($lav, $annot_line) = @_;
+
+  my ($query, $seq_len) = split(/\s+/,$annot_line);
+
+  my %annot_data = (seq_info => $query);
+
+  my ($tmp, $gi, $pdb, $pdb_acc, $pdb_id, $pdb_chain, $sdb, $up_acc, $off_flag);
+
+  $off_flag = 0;
+  if ($query =~ m/^gi\|/) {
+    if ($query =~ m/\|sp\|/) {
+      ($tmp, $gi, $pdb, $pdb_acc, $sdb, $up_acc) = split(/\|/,$query);
+      $up_acc =~ s/\.\d+$//;
+      $off_flag = 1;
+    }
+    elsif ($query =~ m/\|pdb\|/) {
+      ($tmp, $gi, $pdb, $pdb_id, $pdb_chain) = split(/\|/,$query);
+      $pdb_acc = $pdb_id . $pdb_chain;
+    }
+  }
+  elsif ($query =~ m/^sp\|/) {
+    ($pdb, $pdb_acc) = split(/\|/,$query);
+  }
+  elsif  ($query =~ m/^pdb\|(\w{4})\|(\w)/) {
+    $pdb_acc = $1 . $2;
+  }
+  else {
+    $pdb_acc = $query;
+  }
+
+# only get the first res_beg because it is used to calculate pdbaa_off @c:xxx
+  $get_offsets_pdb->execute($pdb_acc);
+  my ($res_beg, $pdb_beg, $pdb_end, $sp_beg, $sp_end) = $get_offsets_pdb->fetchrow_array();
+
+  $res_beg = 1 unless defined($res_beg);
+  $pdb_beg = 1 unless defined($pdb_beg);
+  $sp_beg = 1 unless defined($sp_beg);
+
+  if (defined($sp_end) && $sp_end > $seq_len) {$seq_len = $sp_end;}
+  if (defined($pdb_end) && $pdb_end > $seq_len) {$seq_len = $pdb_end;}
+
+  # unless ($seq_len > 1) {
+  #   if (defined($sp_end)) {
+  #     $seq_len = $sp_end;
+  #   }
+  #   elsif (defined($pdb_end)) {
+  #     $seq_len = $pdb_end;
+  #   }
+  # }
+
+  $get_cathdoms_pdb->execute($pdb_acc);
+  $annot_data{list} = get_cath_annots($lav, $get_cathdoms_pdb, $pdb_beg, $seq_len, $off_flag);
+
+  return \%annot_data;
+}
+
+sub get_cath_annots {
+  my ($lav, $get_annots, $sp_offset, $seq_length, $is_offset) = @_;
+
+  my @cath_domains = ();
+
+  # get the list of domains, sorted by start
+  while ( my $row_href = $get_annots->fetchrow_hashref()) {
+
+    # put in logic to subtract sp_offset when necessary
+    if ($is_offset && $row_href->{p_start}) {
+      $row_href->{seq_start} = $row_href->{p_start} - $sp_offset;
+      $row_href->{seq_end} = $row_href->{p_stop} - $sp_offset;
+    }
+    else {
+      $row_href->{seq_start} = $row_href->{s_start} - $sp_offset;
+      $row_href->{seq_end} = $row_href->{s_stop} - $sp_offset;
+    }
+
+    if ($seq_length <= 1) {
+      $seq_length = $row_href->{seq_end};
+    }
+    else {
+      $row_href->{seq_end} = $seq_length if ($row_href->{seq_end} > $seq_length);
+    }
+    
+    $row_href->{info} =~ s/\s+/_/g;
+
+    push @cath_domains, $row_href
+  }
+
+  return unless (scalar(@cath_domains));
+
+  # do a consistency check
+  for (my $i=1; $i < scalar(@cath_domains); $i++) {
+    if ($cath_domains[$i]->{seq_start} <= $cath_domains[$i-1]->{seq_end}) {
+      my $delta = $cath_domains[$i]->{seq_start} - $cath_domains[$i-1]->{seq_end};
+      $cath_domains[$i-1]->{seq_end} -= $delta/2;
+      $cath_domains[$i]->{seq_start} = $cath_domains[$i-1]->{seq_end}+1;
+    }
+  }
+
+  if ($neg_doms) {
+    my @ncath_domains;
+    my $prev_dom={seq_end=>0};
+    for my $cur_dom ( @cath_domains) {
+      if ($cur_dom->{seq_start} - $prev_dom->{seq_end} > $min_nodom) {
+	my %new_dom = (seq_start=>$prev_dom->{seq_end}+1, seq_end => $cur_dom->{seq_start}-1, info=>'NODOM');
+	push @ncath_domains, \%new_dom;
+      }
+      push @ncath_domains, $cur_dom;
+      $prev_dom = $cur_dom;
+    }
+    my %new_dom = (seq_start=>$prev_dom->{seq_end}+1, seq_end=>$seq_length, info=>'NODOM');
+    if ($new_dom{seq_end} > $new_dom{seq_start}) {push @ncath_domains, \%new_dom;}
+
+    @cath_domains = @ncath_domains;
+  }
+
+  for my $cath (@cath_domains) {
+    if ($class && $cath->{cath_class}) {$cath->{info} = $cath->{cath_class};}
+    $cath->{info} = domain_name($cath->{info});
+  }
+
+  my @feats = ();
+
+  if ($lav) {
+    for my $d_ref (@cath_domains) {
+      push @feats, [$d_ref->{seq_start}, $d_ref->{seq_end}, $d_ref->{info} ];
+    }
+  }
+  else {
+    for my $d_ref (@cath_domains) {
+      push @feats, [$d_ref->{seq_start}, '-',  $d_ref->{seq_end}, $d_ref->{info} ];
+    }
+  }
+
+  return \@feats;
+
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+sub domain_name {
+
+  my ($value) = @_;
+
+  if (!defined($domains{$value})) {
+    $domain_cnt++;
+    $domains{$value} = $domain_cnt;
+  }
+  return $value;
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_feats.pl
+
+=head1 SYNOPSIS
+
+ ann_pdb_cath.pl --neg 'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+
+ --lav  produce lav2plt.pl annotation format, only show domains/repeats
+ --neg-doms,  -- report domains between annotated domains as NODOM
+                 (also --neg, --neg_doms)
+ --min_nodom=10  -- minimum length between domains for NODOM
+
+ --host, --user, --password, --port --db -- info for mysql database
+
+=head1 DESCRIPTION
+
+C<ann_pfam26.pl> extracts domain information from the pfam26 msyql
+database.  Currently, the program works with database sequence
+descriptions in one of two formats:
+
+ >pf26|649|O94823|AT10B_HUMAN -- RPD2_seqs
+
+(pf26 databases have auto_pfamseq in the second field) and
+
+ >gi|1705556|sp|P54670.1|CAF1_DICDI
+
+C<ann_pfam26.pl> uses the C<pfamA_reg_full_significant>, C<pfamseq>,
+and C<pfamA> tables of the C<pfam26> database to extract domain
+information on a protein.  For proteins that have multiple domains
+associated with the same overlapping region (domains overlap by more
+than 1/3 of the domain length), C<auto_pfam26.pl> selects the domain
+annotation with the best C<domain_evalue_score>.  When domains overlap
+by less than 1/3 of the domain length, they are shortened to remove
+the overlap.
+
+C<ann_pfam26.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_pfam26.pl> or C<-V "\!ann_pfam26.pl --neg"> option.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_pdb_vast.pl b/scripts/ann_pdb_vast.pl
new file mode 100755
index 0000000..130af5f
--- /dev/null
+++ b/scripts/ann_pdb_vast.pl
@@ -0,0 +1,320 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014, 2015 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# ann_pdb_vast.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# gi|62822551|sp|P00502|GSTA1_RAT Glutathione S-transfer\n  (at least from pir1.lseg)
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the up_acc
+# (3) return the tab delimited features
+#
+
+# this version is designed for various formats of the pdbaa/pdbaa_off NCBI files with the lines:
+# >gi|4388890|pdb|1GTUA|sp|P09488  or 
+# >gi|4388890|pdb|1GTU|A
+# if I can find |sp|P09488, I will use that, otherwise I will use
+# |pdb|1GTU|A (concatenated) and a different part of the cath_dom
+# database
+#
+
+use strict;
+
+use LWP::Simple;
+use Getopt::Long;
+use HTML::TableExtract;
+use Pod::Usage;
+
+use vars qw($host $db $port $user $pass);
+
+my ($neg_doms, $lav, $shelp, $help, $class) = (0, 0, 0, 0, 0);
+my ($min_nodom) = (10);
+
+my $color_sep_str = " :";
+$color_sep_str = '~';
+
+GetOptions(
+    "host=s" => \$host,
+    "db=s" => \$db,
+    "lav" => \$lav,
+    "neg" => \$neg_doms,
+    "neg_doms" => \$neg_doms,
+    "neg-doms" => \$neg_doms,
+    "min_nodom=i" => \$min_nodom,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (@ARGV || -p STDIN || -f STDIN);
+
+################################################################
+# strategy for connecting to NCBI to get list of domains
+
+my $db = "structure";
+my $report = "FASTA";
+my $pdb_acc_chain = "";
+
+my $utils = "http://www.ncbi.nlm.nih.gov/entrez/eutils";
+my $vast_url = "http://www.ncbi.nlm.nih.gov/Structure/vastplus/vastplus.cgi?cmd=v&uid=";
+
+my $esearch = "$utils/esearch.fcgi?db=structure&retmax=1&term=";
+
+################################################################
+
+my %domains = (NODOM=>0);
+my $domain_cnt = 0;
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+# get the query
+my ($query, $seq_len) = @ARGV;
+$seq_len = 1 unless $seq_len;
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+
+#if it's a file I can open, read and parse it
+unless ($query && $query =~ m/\|/) {
+
+  while (my $a_line = <>) {
+    $a_line =~ s/^>//;
+    chomp $a_line;
+    push @annots, show_annots($lav,$a_line);
+  }
+}
+else {
+  push @annots, show_annots($lav, "$query $seq_len");
+}
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    if (!$lav && defined($domains{$annot->[-1]})) {
+      $annot->[-1] .= $color_sep_str.$domains{$annot->[-1]};
+    }
+    print join("\t",@$annot),"\n";
+  }
+}
+
+exit(0);
+
+sub show_annots {
+  my ($lav, $annot_line) = @_;
+
+  my ($query, $seq_len) = split(/\s+/,$annot_line);
+
+  my %annot_data = (seq_info => $query);
+
+  my ($tmp, $gi, $pdb, $pdb_acc, $pdb_id, $pdb_chain, $sdb, $up_acc, $off_flag);
+
+  $off_flag = 0;
+  if ($query =~ m/^gi\|/) {
+    if ($query =~ m/\|sp\|/) {
+      ($tmp, $gi, $pdb, $pdb_acc, $sdb, $up_acc) = split(/\|/,$query);
+      $up_acc =~ s/\.\d+$//;
+      $off_flag = 1;
+    }
+    elsif ($query =~ m/\|pdb\|/) {
+      ($tmp, $gi, $pdb, $pdb_id, $pdb_chain) = split(/\|/,$query);
+      $pdb_acc = $pdb_id . $pdb_chain;
+    }
+  }
+  elsif ($query =~ m/^sp\|/) {
+    ($pdb, $pdb_acc) = split(/\|/,$query);
+  }
+  elsif  ($query =~ m/^pdb\|(\w{4})\|(\w)/) {
+    $pdb_acc = $1 . $2;
+  }
+  else {
+    $pdb_acc = $query;
+  }
+
+# only get the first res_beg because it is used to calculate pdbaa_off @c:xxx
+
+  $annot_data{list} = get_vast_annots($lav, $pdb_acc, $seq_len, $off_flag);
+
+  return \%annot_data;
+}
+
+sub get_vast_annots {
+  my ($lav, $pdb_acc, $seq_length) = @_;
+
+  my $domain_href = get_vast_info($pdb_acc);
+
+  return unless (scalar(@$domain_href));
+
+  # do a consistency check
+  for (my $i=1; $i < scalar(@$domain_href); $i++) {
+    if ($domain_href->[$i]{seq_start} <= $domain_href->[$i-1]{seq_end}) {
+      my $delta = $domain_href->[$i]{seq_start} - $domain_href->[$i-1]{seq_end};
+      $domain_href->[$i-1]{seq_end} -= $delta/2;
+      $domain_href->[$i]{seq_start} = $domain_href->[$i-1]{seq_end}+1;
+    }
+  }
+
+  if ($neg_doms) {
+    my @nvast_domains;
+    my $prev_dom={seq_end=>0};
+    for my $cur_dom ( @$domain_href) {
+      if ($cur_dom->{seq_start} - $prev_dom->{seq_end} > $min_nodom) {
+	my %new_dom = (seq_start=>$prev_dom->{seq_end}+1, seq_end => $cur_dom->{seq_start}-1, info=>'NODOM');
+	push @nvast_domains, \%new_dom;
+      }
+      push @nvast_domains, $cur_dom;
+      $prev_dom = $cur_dom;
+    }
+    my %new_dom = (seq_start=>$prev_dom->{seq_end}+1, seq_end=>$seq_length, info=>'NODOM');
+    if ($new_dom{seq_end} > $new_dom{seq_start}) {push @nvast_domains, \%new_dom;}
+
+    $domain_href = \@nvast_domains;
+  }
+
+  my @feats = ();
+
+  if ($lav) {
+    for my $d_ref (@$domain_href) {
+      push @feats, [$d_ref->{seq_start}, $d_ref->{seq_end}, $d_ref->{info} ];
+    }
+  }
+  else {
+    for my $d_ref (@$domain_href) {
+      push @feats, [$d_ref->{seq_start}, '-', $d_ref->{seq_end}, $d_ref->{info} ];
+    }
+  }
+
+  return \@feats;
+
+}
+
+################################################################
+# get_vast_info ( pdb_acc_chain pdb|1ABC|D
+#
+
+sub get_vast_info {
+  my $pdb_acc_chain = shift @_;
+
+  $pdb_acc_chain =~ s/^pdb\|//;
+
+  my ($pdb_acc, $pdb_chain) = ($pdb_acc_chain =~ m/(\w{4})(\w)/);
+
+  my $esearch_result = get($esearch . $pdb_acc . "[pdb+accession]");
+
+  #  print "\nESEARCH RESULT: $esearch_result\n";
+
+  my ($Count) =  ($esearch_result =~  m|<Count>(\d+)</Count>|s);
+
+  my $mmdb_id = 0;
+
+  ($mmdb_id) = ($esearch_result =~  m|<Id>(\d+)</Id>|s);
+
+  my $vast_dom_html = get($vast_url.$mmdb_id);
+
+  my $te = HTML::TableExtract->new(depth=>0, count=>0);
+
+  $te->parse($vast_dom_html);
+
+  my ($ts) = $te->tables();
+
+  my @ts_rows = $ts->rows();
+
+  my $row_header = shift(@ts_rows);
+
+  # print "header:\t\t",join("\t",@$row_header),"\n";
+
+  my $current_chain = "";
+
+  my @domain_list = ();
+
+  for my $row ( @ts_rows ) {
+    if ($row->[0]) {
+      ($current_chain) = ($row->[0] =~ m/^\[(\w+)\]/);
+    }
+    next unless ($current_chain eq $pdb_chain);
+    next if ($row->[1] =~ m/^Entire/);
+    my $range = $row->[2];
+
+    # $range can actually be a list of ranges.  Need to remove the spaces
+    $range =~ s/\s+//g;
+
+    push @domain_list, $range;
+  }
+
+  # here we have the list of domain, indexed, but some domains will have multiple parts
+
+  my @part_list = ();
+
+  my $dom_cnt=1;
+  for my $range (@domain_list) {
+    my @parts = split(/,/,$range);
+    for my $part ( @parts ) {
+      my ($start, $end) = ($part =~ m/(\d+)\s*\-\s*(\d+)/);
+      push @part_list, {info=>"VASTdom".$dom_cnt, seq_start=>$start, seq_end=>$end};
+    }
+    $dom_cnt++;
+  }
+
+  @part_list = sort { $a->{seq_start} <=> $b->{seq_start} } @part_list;
+
+  return \@part_list;
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_feats.pl
+
+=head1 SYNOPSIS
+
+ ann_pdb_vast.pl --neg 'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+
+ --lav  produce lav2plt.pl annotation format, only show domains/repeats
+ --neg-doms,  -- report domains between annotated domains as NODOM
+                 (also --neg, --neg_doms)
+ --min_nodom=10  -- minimum length between domains for NODOM
+
+=head1 DESCRIPTION
+
+C<ann_pdb_cath.pl> extracts domain information from the pfam26 msyql
+database.  Currently, the program works with database sequence
+descriptions in several formats:
+
+ >pdb|3F6F|A	-- database|accession
+
+>gi|262118558|pdb|3F6F|A  -- with GI number
+
+C<ann_pdb_vast.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_pfam26.pl> or C<-V "\!ann_pfam26.pl --neg"> option.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_pfam27.pl b/scripts/ann_pfam27.pl
new file mode 100755
index 0000000..505a711
--- /dev/null
+++ b/scripts/ann_pfam27.pl
@@ -0,0 +1,656 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# ann_pfam_e.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# gi|62822551|sp|P00502|GSTA1_RAT Glutathione S-transfer\n  (at least from pir1.lseg)
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the up_acc
+# (3) return the tab delimited features
+#
+
+# this version only annotates sequences known to Pfam:pfamseq:
+# >pf26|164|O57809|1A1D_PYRHO
+# and only provides domain information
+
+use strict;
+
+use DBI;
+use Getopt::Long;
+use Pod::Usage;
+
+use vars qw($host $db $port $user $pass);
+
+my $hostname = `/bin/hostname`;
+
+($host, $db, $port, $user, $pass)  = ("wrpxdb.its.virginia.edu", "pfam27", 0, "web_user", "fasta_www");
+#$host = 'xdb';
+
+my ($auto_reg,$rpd2_fams, $vdoms, $neg_doms, $lav, $no_doms, $no_clans, $pf_acc, $no_over, $acc_comment, $shelp, $help) = 
+  (0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0);
+my ($min_nodom, $min_vdom) = (10,10);
+
+my $color_sep_str = " :";
+$color_sep_str = '~';
+
+
+GetOptions(
+    "host=s" => \$host,
+    "db=s" => \$db,
+    "user=s" => \$user,
+    "password=s" => \$pass,
+    "port=i" => \$port,
+    "lav" => \$lav,
+    "acc_comment" => \$acc_comment,
+    "no-over" => \$no_over,
+    "no_over" => \$no_over,
+    "no-clans" => \$no_clans,
+    "no_clans" => \$no_clans,
+    "neg" => \$neg_doms,
+    "neg_doms" => \$neg_doms,
+    "neg-doms" => \$neg_doms,
+    "min_nodom=i" => \$min_nodom,
+    "pfacc" => \$pf_acc,
+    "RPD2" => \$rpd2_fams,
+    "auto_reg" => \$auto_reg,
+    "vdoms" => \$vdoms,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (@ARGV || -p STDIN || -f STDIN);
+
+my $connect = "dbi:mysql(AutoCommit=>1,RaiseError=>1):database=$db";
+$connect .= ";host=$host" if $host;
+$connect .= ";port=$port" if $port;
+
+my $dbh = DBI->connect($connect,
+		       $user,
+		       $pass
+		      ) or die $DBI::errstr;
+
+my %annot_types = ();
+my %domains = (NODOM=>0);
+my %domain_clan = (NODOM => {clan_id => 'NODOM', clan_acc=>0, domain_cnt=>0});
+my @domain_list = (0);
+my $domain_cnt = 0;
+
+my $get_annot_sub = \&get_pfam_annots;
+
+my $get_pfam_acc = $dbh->prepare(<<EOSQL);
+
+SELECT seq_start, seq_end, model_start, model_end, model_length, auto_pfamA, pfamA_acc, pfamA_id, auto_pfamA_reg_full, domain_evalue_score as evalue, length
+FROM pfamseq
+JOIN pfamA_reg_full_significant using(auto_pfamseq)
+JOIN pfamA USING (auto_pfamA)
+WHERE in_full = 1
+AND  pfamseq_acc=?
+ORDER BY seq_start
+
+EOSQL
+
+my $get_pfam_refacc = $dbh->prepare(<<EOSQL);
+
+SELECT seq_start, seq_end, auto_pfamA, pfamA_acc, pfamA_id, auto_pfamA_reg_full, domain_evalue_score as evalue, length
+FROM pfamseq
+JOIN pfamA_reg_full_significant using(auto_pfamseq)
+JOIN pfamA USING (auto_pfamA)
+JOIN seqdb_demo2.annot as sa1 on(sa1.acc=pfamseq_acc and sa1.db='sp')
+JOIN seqdb_demo2.annot as sa2 using(prot_id)
+WHERE in_full = 1
+AND  sa2.acc=?
+AND  sa2.db='ref'
+ORDER BY seq_start
+
+EOSQL
+
+my $get_annots_sql = $get_pfam_acc;
+
+my $get_pfam_id = $dbh->prepare(<<EOSQL);
+
+SELECT seq_start, seq_end, auto_pfamA, pfamA_acc, pfamA_id, auto_pfamA_reg_full, domain_evalue_score as evalue, length
+FROM pfamseq
+JOIN pfamA_reg_full_significant using(auto_pfamseq)
+JOIN pfamA USING (auto_pfamA)
+WHERE in_full=1
+AND  pfamseq_id=?
+ORDER BY seq_start
+
+EOSQL
+
+my $get_pfam_clan = $dbh->prepare(<<EOSQL);
+
+SELECT clan_acc, clan_id
+FROM clans
+JOIN clan_membership using(auto_clan)
+WHERE auto_pfamA=?
+
+EOSQL
+
+my $get_rpd2_clans = $dbh->prepare(<<EOSQL);
+
+SELECT auto_pfamA, clan
+FROM ljm_db.RPD2_final_fams
+WHERE clan is not NULL
+
+EOSQL
+
+# -- LEFT JOIN clan_membership USING (auto_pfamA)
+# -- LEFT JOIN clans using(auto_clan)
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+# get the query
+my ($query, $seq_len) = @ARGV;
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+
+my %rpd2_clan_fams = ();
+
+if ($rpd2_fams) {
+  $get_rpd2_clans->execute();
+  my ($auto_pfam, $auto_clan);
+  while (($auto_pfam, $auto_clan)=$get_rpd2_clans->fetchrow_array()) {
+    $rpd2_clan_fams{$auto_pfam} = $auto_clan;
+  }
+}
+
+#if it's a file I can open, read and parse it
+unless ($query && $query =~ m/[\|:]/) {
+
+  while (my $a_line = <>) {
+    $a_line =~ s/^>//;
+    chomp $a_line;
+    push @annots, show_annots($a_line, $get_annot_sub);
+  }
+}
+else {
+  push @annots, show_annots("$query $seq_len", $get_annot_sub);
+}
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    if (!$lav && defined($domains{$annot->[-1]})) {
+      my ($a_name, $a_num) = domain_num($annot->[-1],$domains{$annot->[-1]});
+      if ($acc_comment) {
+	$annot->[-1] .= $a_name."{$domain_list[$a_num]}";
+      }
+      $annot->[-1] = $a_name.$color_sep_str.$a_num;
+    }
+    print join("\t",@$annot),"\n";
+  }
+}
+
+exit(0);
+
+sub show_annots {
+  my ($query_len, $get_annot_sub) = @_;
+
+  my ($annot_line, $seq_len) = split(/\s+/,$query_len);
+
+  my $pfamA_acc;
+
+  my %annot_data = (seq_info=>$annot_line);
+
+  $use_acc = 1;
+  $get_annots_sql = $get_pfam_acc;
+
+  if ($annot_line =~ m/^pf26\|/) {
+    ($sdb, $gi, $acc, $id) = split(/\|/,$annot_line);
+    $dbh->do("use RPD2_pfam");
+  }
+  elsif ($annot_line =~ m/^gi\|/) {
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+    if ($sdb =~ m/ref/) {
+	$get_annots_sql = $get_pfam_refacc;
+    }
+  }
+  elsif ($annot_line =~ m/^sp\|/) {
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^ref\|/) {
+    ($sdb, $acc) = split(/\|/,$annot_line);
+    $get_annots_sql = $get_pfam_refacc;
+  }
+  elsif ($annot_line =~ m/^tr\|/) {
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^SP:/i) {
+    ($sdb, $id) = split(/:/,$annot_line);
+    $use_acc = 0;
+  }
+  else {
+    $use_acc = 1;
+    ($acc) = split(/\s+/,$annot_line);
+  }
+
+  # remove version number
+  unless ($use_acc) {
+    $get_annots_sql = $get_pfam_id;
+    $get_annots_sql->execute($id);
+  }
+  else {
+    $acc =~ s/\.\d+$//;
+    $get_annots_sql->execute($acc);
+  }
+
+  $annot_data{list} = $get_annot_sub->($get_annots_sql, $seq_len);
+
+  return \%annot_data;
+}
+
+sub get_pfam_annots {
+  my ($get_annots, $seq_length) = @_;
+
+  $seq_length = 0 unless $seq_length;
+
+  my @pf_domains = ();
+
+  # get the list of domains, sorted by start
+  while ( my $row_href = $get_annots->fetchrow_hashref()) {
+    if ($auto_reg) {
+      $row_href->{info} = $row_href->{auto_pfamA_reg_full};
+    }
+    elsif ($pf_acc) {
+      $row_href->{info} = $row_href->{pfamA_acc};
+    }
+    else {
+      $row_href->{info} = $row_href->{pfamA_id};
+    }
+
+    if ($row_href && $row_href->{length} > $seq_length && $seq_length == 0) { $seq_length = $row_href->{length};}
+
+    next if ($row_href->{seq_start} >= $seq_length);
+    if ($row_href->{seq_end} > $seq_length) {
+	$row_href->{seq_end} = $seq_length;
+    }
+
+    push @pf_domains, $row_href
+  }
+
+  # check for domain overlap, and resolve check for domain overlap
+  # (possibly more than 2 domains), choosing the domain with the best
+  # evalue
+
+  if($no_over && scalar(@pf_domains) > 1) {
+
+    my @tmp_domains = @pf_domains;
+    my @save_domains = ();
+
+    my $prev_dom = shift @tmp_domains;
+
+    while (my $curr_dom = shift @tmp_domains) {
+
+      my @overlap_domains = ($prev_dom);
+
+      my $diff = $prev_dom->{seq_end} - $curr_dom->{seq_start};
+      # check for overlap > domain_length/3
+
+      my ($prev_len, $cur_len) = ($prev_dom->{seq_end}-$prev_dom->{seq_start}+1, $curr_dom->{seq_end}-$curr_dom->{seq_start}+1);
+      my $inclusion = ((($curr_dom->{seq_start} >= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} <= $prev_dom->{seq_end})) ||
+		       (($curr_dom->{seq_start} <= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} >= $prev_dom->{seq_end})));
+
+      my $longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+
+      while ($inclusion || ($diff > 0 && $diff > $longer_len/3)) {
+	push @overlap_domains, $curr_dom;
+	$curr_dom = shift @tmp_domains;
+	last unless $curr_dom;
+	$diff = $prev_dom->{seq_end} - $curr_dom->{seq_start};
+	($prev_len, $cur_len) = ($prev_dom->{seq_end}-$prev_dom->{seq_start}+1, $curr_dom->{seq_end}-$curr_dom->{seq_start}+1);
+	$longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+	$inclusion = ((($curr_dom->{seq_start} >= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} <= $prev_dom->{seq_end})) ||
+		      (($curr_dom->{seq_start} <= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} >= $prev_dom->{seq_end})));
+      }
+
+      # check for overlapping domains; >1 because $prev_dom is always there
+      if (scalar(@overlap_domains) > 1 ) {
+	# if $rpd2_fams, check for a chosen one
+	if ($rpd2_fams) {
+	  for my $dom (@overlap_domains) {
+	    if ($rpd2_clan_fams{$dom->{auto_pfamA}}) {
+	      $prev_dom = $dom;
+	      last;
+	    }
+	  }
+	}
+	else {
+	  @overlap_domains = sort { $a->{evalue} <=> $b->{evalue} } @overlap_domains;
+	  $prev_dom = $overlap_domains[0];
+	}
+      }
+
+      # $prev_dom should be the best of the overlaps, and we are no longer overlapping > dom_length/3
+      push @save_domains, $prev_dom;
+      $prev_dom = $curr_dom;
+    }
+    if ($prev_dom) {push @save_domains, $prev_dom;}
+
+    @pf_domains = @save_domains;
+
+    # now check for smaller overlaps
+    for (my $i=1; $i < scalar(@pf_domains); $i++) {
+      if ($pf_domains[$i-1]->{seq_end} >= $pf_domains[$i]->{seq_start}) {
+	my $overlap = $pf_domains[$i-1]->{seq_end} - $pf_domains[$i]->{seq_start};
+	$pf_domains[$i-1]->{seq_end} -= int($overlap/2);
+	$pf_domains[$i]->{seq_start} = $pf_domains[$i-1]->{seq_end}+1;
+      }
+    }
+  }
+
+  # $vdoms -- virtual Pfam domains -- the equivalent of $neg_doms,
+  # but covering parts of a Pfam model that are not annotated.  split
+  # domains have been joined, so simply check beginning and end of
+  # each domain (but must also check for bounded-ness)
+  # only add when 10% or more is missing and missing length > $min_nodom
+
+  if ($vdoms && scalar(@pf_domains)) {
+    my @vpf_domains;
+
+    my $curr_dom = $pf_domains[0];
+    my $length = $curr_dom->{length};
+
+    my $prev_dom={seq_end=>0, pfamA_acc=>''};
+    my $prev_dom_end = 0;
+    my $next_dom_start = $length+1;
+
+    for (my $dom_ix=0; $dom_ix < scalar(@pf_domains); $dom_ix++ ) {
+      $curr_dom = $pf_domains[$dom_ix];
+
+      my $pfamA =  $curr_dom->{pfamA_acc};
+
+      # first, look left, is there a domain there (if there is,
+      # it should be updated right
+
+      # my $min_vdom = $curr_dom->{model_length} / 10;
+
+      if ($prev_dom->{pfamA_acc}) { # look for previous domain
+	$prev_dom_end = $prev_dom->{seq_end};
+      }
+
+      # there is a domain to the left, how much room is available?
+      my $left_dom_len = min($curr_dom->{seq_start}-$prev_dom_end-1, $curr_dom->{model_start}-1);
+      if ( $left_dom_len > $min_vdom) {
+	# there is room for a virtual domain
+	my %new_dom = (seq_start=> $curr_dom->{seq_start}-$left_dom_len,
+	               seq_end => $curr_dom->{seq_start}-1,
+		       info=>'@'.$curr_dom->{info},
+		       model_length=>$curr_dom->{model_length},
+		       model_end => $curr_dom->{model_start}-1,
+		       model_start => $left_dom_len,
+		       pfamA_acc=>$pfamA,
+		      );
+	push @vpf_domains, \%new_dom;
+      }
+
+      # save the current domain
+      push @vpf_domains, $curr_dom;
+      $prev_dom = $curr_dom;
+
+      if ($dom_ix < $#pf_domains) { # there is a domain to the right
+	# first, give all the extra space to the first domain (no splitting)
+	$next_dom_start = $pf_domains[$dom_ix+1]->{seq_start};
+      }
+      else {
+	$next_dom_start = $length;
+      }
+
+      # is there room for a virtual domain right
+	  
+      my $right_dom_len = min($next_dom_start-$curr_dom->{seq_end}-1, # space available 
+			      $curr_dom->{model_length}-$curr_dom->{model_end} # space needed
+			     );
+      if ( $right_dom_len > $min_vdom) {
+	my %new_dom = (seq_start=> $curr_dom->{seq_end}+1,
+		       seq_end=> $curr_dom->{seq_end}+$right_dom_len,
+		       info=>'@'.$pfamA,
+		       model_length => $curr_dom->{model_length},
+		       pfamA_acc=> $pfamA,
+		      );
+	push @vpf_domains, \%new_dom;
+	$prev_dom = \%new_dom;
+      }
+    }				# all done, check for last one
+
+    # $curr_dom=$pf_domains[-1];
+    # # my $min_vdom = $curr_dom->{model_length}/10;
+
+    # my $right_dom_len = min($length - $curr_dom->{seq_end}+1,  # space available 
+    # 			    $curr_dom->{model_length}-$curr_dom->{model_end} # space needed
+    # 			   );
+    # if ($right_dom_len > $min_vdom) {
+    #   my %new_dom = (seq_start=> $curr_dom->{seq_end}+1,
+    # 		     seq_end => $curr_dom->{seq_end}+$right_dom_len,
+    # 		     info=>'@'.$curr_dom->{pfamA_acc},
+    # 		     model_len=> $curr_dom->{model_len},
+    # 		     pfamA_acc => $curr_dom->{pfamA_acc},
+    # 		     model_start => $curr_dom->{model_end}+1,
+    # 		     model_end => $curr_dom->{model_len},
+    # 		     );
+
+    #   push @vpf_domains, \%new_dom;
+    # }
+
+    # @vpf_domains has both old @pf_domains and new neg-domains
+    @pf_domains = @vpf_domains;
+  }
+
+  if ($neg_doms) {
+    my @npf_domains;
+    my $prev_dom={seq_end=>0};
+    for my $curr_dom ( @pf_domains) {
+      if ($curr_dom->{seq_start} - $prev_dom->{seq_end} > $min_nodom) {
+	my %new_dom = (seq_start=>$prev_dom->{seq_end}+1, seq_end => $curr_dom->{seq_start}-1, info=>'NODOM');
+	push @npf_domains, \%new_dom;
+      }
+      push @npf_domains, $curr_dom;
+      $prev_dom = $curr_dom;
+    }
+    if ($seq_length - $prev_dom->{seq_end} > $min_nodom) {
+      my %new_dom = (seq_start=>$prev_dom->{seq_end}+1, seq_end=>$seq_length, info=>'NODOM');
+      if ($new_dom{seq_end} > $new_dom{seq_start}) {push @npf_domains, \%new_dom;}
+    }
+
+    # @npf_domains has both old @pf_domains and new neg-domains
+    @pf_domains = @npf_domains;
+  }
+
+  # now make sure we have useful names: colors
+
+  for my $pf (@pf_domains) {
+    $pf->{info} = domain_name($pf->{info}, $pf->{auto_pfamA}, $pf->{pfamA_acc});
+  }
+
+  my @feats = ();
+  for my $d_ref (@pf_domains) {
+    if ($lav) {
+      push @feats, [$d_ref->{seq_start}, $d_ref->{seq_end}, $d_ref->{info}];
+    }
+    else {
+      push @feats, [$d_ref->{seq_start}, '-', $d_ref->{seq_end},  $d_ref->{info} ];
+#      push @feats, [$d_ref->{seq_end}, ']', '-', ""];
+    }
+
+  }
+
+  return \@feats;
+}
+
+sub min {
+  my ($arg1, $arg2) = @_;
+
+  return ($arg1 <= $arg2 ? $arg1 : $arg2);
+}
+
+sub max {
+  my ($arg1, $arg2) = @_;
+
+  return ($arg1 >= $arg2 ? $arg1 : $arg2);
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+sub domain_name {
+
+  my ($value, $pfamA_acc) = @_;
+  my $is_virtual = 0;
+
+  if ($value =~ m/^@/) {
+    $is_virtual = 1;
+    $value =~ s/^@//;
+  }
+
+  # check for clan:
+  if ($no_clans) {
+    if (! defined($domains{$value})) {
+      $domain_clan{$value} = 0;
+      $domains{$value} = ++$domain_cnt;
+      push @domain_list, $pfamA_acc;
+    }
+  }
+  elsif (!defined($domain_clan{$value})) {
+    ## only do this for new domains, old domains have known mappings
+
+    ## ways to highlight the same domain:
+    # (1) for clans, substitute clan name for family name
+    # (2) for clans, use the same color for the same clan, but don't change the name
+    # (3) for clans, combine family name with clan name, but use colors based on clan
+
+    # check to see if it's a clan
+    $get_pfam_clan->execute($pfamA_acc);
+
+    my $pfam_clan_href=0;
+
+    if ($pfam_clan_href=$get_pfam_clan->fetchrow_hashref()) {  # is a clan
+      my ($clan_id, $clan_acc) = @{$pfam_clan_href}{qw(clan_id clan_acc)};
+
+      # now check to see if we have seen this clan before (if so, do not increment $domain_cnt)
+      my $c_value = "C." . $clan_id;
+      if ($pf_acc) {$c_value = $clan_acc;}
+
+      $domain_clan{$value} = {clan_id => $clan_id,
+			      clan_acc => $clan_acc};
+
+      if ($domains{$c_value}) {
+	$domain_clan{$value}->{domain_cnt} =  $domains{$c_value};
+	$value = $c_value;
+      }
+      else {
+	$domain_clan{$value}->{domain_cnt} = ++ $domain_cnt;
+	$value = $c_value;
+	$domains{$value} = $domain_cnt;
+	push @domain_list, $pfamA_acc;
+      }
+    }
+    else {			# not a clan
+      $domain_clan{$value} = 0;
+      $domains{$value} = ++$domain_cnt;
+      push @domain_list, $pfamA_acc;
+    }
+  }
+  elsif ($domain_clan{$value} && $domain_clan{$value}->{clan_acc}) {
+    if ($pf_acc) {$value = $domain_clan{$value}->{clan_acc};}
+    else { $value = "C." . $domain_clan{$value}->{clan_id}; }
+  }
+
+  if ($is_virtual) {
+    $domains{'@'.$value} = $domains{$value};
+    $value = '@'.$value;
+  }
+  return $value;
+}
+
+sub domain_num {
+  my ($value, $number) = @_;
+  if ($value =~ m/^@/) {
+    $value =~ s/^@/v/;
+#    $number = $number."v";
+  }
+  return ($value, $number);
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_feats.pl
+
+=head1 SYNOPSIS
+
+ ann_pfam_e.pl --neg-doms  'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+ --no-over  : generate non-overlapping domains (equivalent to ann_pfam.pl)
+ --no-clans : do not use clans with multiple families from same clan
+ --neg-doms : report domains between annotated domains as NODOM
+                 (also --neg, --neg_doms)
+ --min_nodom=10  : minimum length between domains for NODOM
+
+ --host, --user, --password, --port --db : info for mysql database
+
+=head1 DESCRIPTION
+
+C<ann_pfam_e.pl> extracts domain information from the pfam msyql
+database.  Currently, the program works with database sequence
+descriptions in one of two formats:
+
+ Currently, the program works with database
+sequence descriptions in several formats:
+
+ >gi|1705556|sp|P54670.1|CAF1_DICDI
+ >sp|P09488|GSTM1_HUMAN
+ >sp:CALM_HUMAN 
+
+C<ann_pfam_e.pl> uses the C<pfamA_reg_full_significant>, C<pfamseq>,
+and C<pfamA> tables of the C<pfam> database to extract domain
+information on a protein. 
+
+If the "--no-over" option is set, overlapping domains are selected and
+edited to remove overlaps.  For proteins with multiple overlapping
+domains (domains overlap by more than 1/3 of the domain length),
+C<auto_pfam_e.pl> selects the domain annotation with the best
+C<domain_evalue_score>.  When domains overlap by less than 1/3 of the
+domain length, they are shortened to remove the overlap.
+
+C<ann_pfam_e.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_pfam_e.pl> or C<-V "\!ann_pfam_e.pl --neg"> option.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_pfam28.pl b/scripts/ann_pfam28.pl
new file mode 100755
index 0000000..6a4079b
--- /dev/null
+++ b/scripts/ann_pfam28.pl
@@ -0,0 +1,782 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014,2015 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# ann_pfam.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# gi|62822551|sp|P00502|GSTA1_RAT Glutathione S-transfer\n  (at least from pir1.lseg)
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the up_acc
+# (3) return the tab delimited features
+#
+
+# this version only annotates sequences known to Pfam:pfamseq:
+# and only provides domain information
+
+use strict;
+
+use DBI;
+use Getopt::Long;
+use Pod::Usage;
+
+use vars qw($host $db $port $user $pass);
+
+my $hostname = `/bin/hostname`;
+
+($host, $db, $port, $user, $pass)  = ("wrpxdb.its.virginia.edu", "pfam28", 0, "web_user", "fasta_www");
+#$host = 'xdb';
+#$host = 'localhost';
+#$db = 'RPD2_pfam28u';
+
+my ($auto_reg,$rpd2_fams, $neg_doms, $vdoms, $lav, $no_doms, $no_clans, $pf_acc, $acc_comment, $bound_comment, $shelp, $help) = 
+  (0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0,);
+my ($no_over, $split_over, $over_fract) = (0, 0, 3.0);
+
+my $color_sep_str = " :";
+$color_sep_str = '~';
+
+my ($min_nodom, $min_vdom) = (10,10);
+
+GetOptions(
+    "host=s" => \$host,
+    "db=s" => \$db,
+    "user=s" => \$user,
+    "password=s" => \$pass,
+    "port=i" => \$port,
+    "lav" => \$lav,
+    "acc_comment" => \$acc_comment,
+    "bound_comment" => \$bound_comment,
+    "no-over" => \$no_over,
+    "no_over" => \$no_over,
+    "split-over" => \$split_over,
+    "split_over" => \$split_over,
+    "over_fract" => \$over_fract,
+    "over-fract" => \$over_fract,
+    "no-clans" => \$no_clans,
+    "no_clans" => \$no_clans,
+    "neg" => \$neg_doms,
+    "neg_doms" => \$neg_doms,
+    "neg-doms" => \$neg_doms,
+    "min_nodom=i" => \$min_nodom,
+    "vdoms" => \$vdoms,
+    "v_doms" => \$vdoms,
+    "pfacc" => \$pf_acc,
+    "RPD2" => \$rpd2_fams,
+    "auto_reg" => \$auto_reg,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (@ARGV || -p STDIN || -f STDIN);
+
+my $connect = "dbi:mysql(AutoCommit=>1,RaiseError=>1):database=$db";
+$connect .= ";host=$host" if $host;
+$connect .= ";port=$port" if $port;
+
+my $dbh = DBI->connect($connect,
+		       $user,
+		       $pass
+		      ) or die $DBI::errstr;
+
+my %annot_types = ();
+my %domains = (NODOM=>0);
+my %domain_clan = (NODOM => {clan_id => 'NODOM', clan_acc=>0, domain_cnt=>0});
+my @domain_list = (0);
+my $domain_cnt = 0;
+
+my $pfamA_reg_full = 'pfamA_reg_full_significant';
+
+my $get_annot_sub = \&get_pfam_annots;
+
+my @pfam_fields = qw(seq_start seq_end model_start model_end model_length pfamA_acc pfamA_id auto_pfamA_reg_full domain_evalue_score as evalue length);
+
+my $get_pfam_acc = $dbh->prepare(<<EOSQL);
+SELECT seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_pfamA_reg_full, domain_evalue_score as evalue, length
+FROM pfamseq
+JOIN $pfamA_reg_full using(pfamseq_acc)
+JOIN pfamA USING (pfamA_acc)
+WHERE in_full = 1
+AND  pfamseq_acc=?
+ORDER BY seq_start
+
+EOSQL
+
+my $get_pfam_refacc = $dbh->prepare(<<EOSQL);
+
+SELECT seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_pfamA_reg_full, domain_evalue_score as evalue, length
+FROM pfamseq
+JOIN $pfamA_reg_full using(pfamseq_acc)
+JOIN pfamA USING (pfamA_acc)
+JOIN seqdb_demo2.annot as sa1 on(sa1.acc=pfamseq_acc and sa1.db='sp')
+JOIN seqdb_demo2.annot as sa2 using(prot_id)
+WHERE in_full = 1
+AND  sa2.acc=?
+AND  sa2.db='ref'
+ORDER BY seq_start
+
+EOSQL
+
+my $get_annots_sql = $get_pfam_acc;
+
+my $get_pfam_id = $dbh->prepare(<<EOSQL);
+
+SELECT seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_pfamA_reg_full, domain_evalue_score as evalue, length
+FROM pfamseq
+JOIN $pfamA_reg_full using(pfamseq_acc)
+JOIN pfamA USING (pfamA_acc)
+WHERE in_full=1
+AND  pfamseq_id=?
+ORDER BY seq_start
+
+EOSQL
+
+my $get_pfam_clan = $dbh->prepare(<<EOSQL);
+
+SELECT clan_acc, clan_id
+FROM clan
+JOIN clan_membership using(clan_acc)
+WHERE pfamA_acc=?
+
+EOSQL
+
+my $get_rpd2_clans = $dbh->prepare(<<EOSQL);
+
+SELECT auto_pfamA, clan
+FROM ljm_db.RPD2_final_fams
+WHERE clan is not NULL
+
+EOSQL
+
+# -- LEFT JOIN clan_membership USING (auto_pfamA)
+# -- LEFT JOIN clans using(auto_clan)
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+# get the query
+my ($query, $seq_len) = @ARGV;
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+
+my %rpd2_clan_fams = ();
+
+if ($rpd2_fams) {
+  $get_rpd2_clans->execute();
+  my ($auto_pfam, $auto_clan);
+  while (($auto_pfam, $auto_clan)=$get_rpd2_clans->fetchrow_array()) {
+    $rpd2_clan_fams{$auto_pfam} = $auto_clan;
+  }
+}
+
+#if it's a file I can open, read and parse it
+unless ($query && $query =~ m/[\|:]/) {
+
+  while (my $a_line = <>) {
+    $a_line =~ s/^>//;
+    chomp $a_line;
+    push @annots, show_annots($a_line, $get_annot_sub);
+  }
+}
+else {
+  push @annots, show_annots("$query $seq_len", $get_annot_sub);
+}
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    if (!$lav && defined($domains{$annot->[-1]})) {
+      my ($a_name, $a_num) = domain_num($annot->[-1],$domains{$annot->[-1]});
+      $annot->[-1] = $a_name;
+      my $tmp_a_num = $a_num;
+      $tmp_a_num =~ s/v$//;
+      if ($acc_comment) {
+	$annot->[-1] .= "{$domain_list[$tmp_a_num]}";
+      }
+      if ($bound_comment) {
+	$annot->[-1] .= $color_sep_str.$annot->[0].":".$annot->[2];
+      }
+      $annot->[-1] .= $color_sep_str.$a_num;
+    }
+    print join("\t",@$annot),"\n";
+  }
+}
+
+exit(0);
+
+sub show_annots {
+  my ($query_len, $get_annot_sub) = @_;
+
+  my ($annot_line, $seq_len) = split(/\t/,$query_len);
+
+  my $pfamA_acc;
+
+  my %annot_data = (seq_info=>$annot_line);
+
+  $use_acc = 1;
+  $get_annots_sql = $get_pfam_acc;
+
+  if ($annot_line =~ m/^pf\d+\|/) {
+    ($sdb, $gi, $pfamA_acc, $acc, $id) = split(/\|/,$annot_line);
+#    $dbh->do("use RPD2_pfam");
+  }
+  elsif ($annot_line =~ m/^gi\|/) {
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+    if ($sdb =~ m/ref/) {
+	$get_annots_sql = $get_pfam_refacc;
+    }
+  }
+  elsif ($annot_line =~ m/^(sp|tr)\|/) {
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^ref\|/) {
+    ($sdb, $acc) = split(/\|/,$annot_line);
+    $get_annots_sql = $get_pfam_refacc;
+  }
+  elsif ($annot_line =~ m/^(SP|TR):/i) {
+    ($sdb, $id) = split(/:/,$annot_line);
+    $use_acc = 0;
+  }
+  elsif ($annot_line !~ m/\|/) {  # new NCBI swissprot format
+    $use_acc =1;
+    $sdb = 'sp';
+    ($acc) = split(/\s+/,$annot_line);
+  }
+
+  # remove version number
+  unless ($use_acc) {
+    $get_annots_sql = $get_pfam_id;
+    $get_annots_sql->execute($id);
+  } else {
+    unless ($acc) {
+      warn "missing acc in $annot_line";
+      next;
+    } else {
+      $acc =~ s/\.\d+$//;
+      $get_annots_sql->execute($acc);
+    }
+  }
+
+  $annot_data{list} = $get_annot_sub->($get_annots_sql, $seq_len);
+
+  return \%annot_data;
+}
+
+sub get_pfam_annots {
+  my ($get_annots, $seq_length) = @_;
+
+  $seq_length = 0 unless $seq_length;
+
+  my @pf_domains = ();
+
+  # get the list of domains, sorted by start
+
+  # $row_href has: seq_start, seq_end, model_start, model_end, model_length, 
+  #                pfamA_acc, pfamA_id, auto_pfamA_reg_full,
+  #                domain_evalue_score as evalue, length
+
+  while ( my $row_href = $get_annots->fetchrow_hashref()) {
+    if ($auto_reg) {
+      $row_href->{info} = $row_href->{auto_pfamA_reg_full};
+    } elsif ($pf_acc) {
+      $row_href->{info} = $row_href->{pfamA_acc};
+    } else {
+      $row_href->{info} = $row_href->{pfamA_id};
+    }
+
+    if ($row_href && $row_href->{length} > $seq_length && $seq_length == 0) {
+      $seq_length = $row_href->{length};
+    }
+
+    next if ($row_href->{seq_start} >= $seq_length);
+    if ($row_href->{seq_end} > $seq_length) {
+      $row_href->{seq_end} = $seq_length;
+    }
+
+    push @pf_domains, $row_href
+  }
+
+  # before checking for domain overlap, check for "split-domains"
+  # (self-unbound) by looking for runs of the same domain that are
+  # ordered by model_start
+
+  if (scalar(@pf_domains) > 1) {
+    my @j_domains;		#joined domains
+    my @tmp_domains = @pf_domains;
+
+    my $prev_dom = shift(@tmp_domains);
+
+    for my $curr_dom (@tmp_domains) {
+      # to join domains:
+      # (1) the domains must be in order by model_start/end coordinates
+      # (3) joining the domains cannot make the total combination too long
+
+      # check for model and sequence consistency
+      if (($prev_dom->{pfamA_acc} eq $curr_dom->{pfamA_acc})  # same family
+	  && $prev_dom->{model_start} < $curr_dom->{model_start}  # model check
+	  && $prev_dom->{model_end} < $curr_dom->{model_end}
+
+	  && ($curr_dom->{model_start} > $prev_dom->{model_end} * 0.80   # limit overlap
+	      || $curr_dom->{model_start} <  $prev_dom->{model_end} * 1.25)
+	  && ((($curr_dom->{model_end} - $curr_dom->{model_start}+1)/$curr_dom->{model_length} +
+	       ($prev_dom->{model_end} - $prev_dom->{model_start}+1)/$prev_dom->{model_length}) < 1.33)
+	 ) {			# join them by updating $prev_dom
+	$prev_dom->{seq_end} = $curr_dom->{seq_end};
+	$prev_dom->{model_end} = $curr_dom->{model_end};
+	$prev_dom->{auto_pfamA_reg_full} = $prev_dom->{auto_pfamA_reg_full} . ";". $curr_dom->{auto_pfamA_reg_full};
+	$prev_dom->{evalue} = ($prev_dom->{evalue} < $curr_dom->{evalue} ? $prev_dom->{evalue} : $curr_dom->{evalue});
+      } else {
+	push @j_domains, $prev_dom;
+	$prev_dom = $curr_dom;
+      }
+    }
+    push @j_domains, $prev_dom;
+    @pf_domains = @j_domains;
+
+
+    if ($no_over) {	# for either $no_over or $split_over, check for overlapping domains and edit/split them
+
+      my @tmp_domains = @pf_domains;	# allow shifts from copy of @pf_domains
+      my @save_domains = ();		# where the new domains go
+
+      my $prev_dom = shift @tmp_domains;
+
+      while (my $curr_dom = shift @tmp_domains) {
+
+	my @overlap_domains = ($prev_dom);
+
+	my $diff = $prev_dom->{seq_end} - $curr_dom->{seq_start};
+
+	my ($prev_len, $cur_len) = ($prev_dom->{seq_end}-$prev_dom->{seq_start}+1,
+				    $curr_dom->{seq_end}-$curr_dom->{seq_start}+1);
+
+	my $inclusion = ((($curr_dom->{seq_start} >= $prev_dom->{seq_start})    # start is right && end is left 
+			  && ($curr_dom->{seq_end} <= $prev_dom->{seq_end})) || # -- curr inside prev
+			 (($curr_dom->{seq_start} <= $prev_dom->{seq_start})    # start is left && end is right
+			  && ($curr_dom->{seq_end} >= $prev_dom->{seq_end})));  # -- prev is inside curr
+
+	my $longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+
+	# check for overlap > domain_length/$over_fract
+	while ($inclusion || ($diff > 0 && $diff > $longer_len/$over_fract)) {
+	  push @overlap_domains, $curr_dom;
+	  $curr_dom = shift @tmp_domains;
+	  last unless $curr_dom;
+	  $diff = $prev_dom->{seq_end} - $curr_dom->{seq_start};
+	  ($prev_len, $cur_len) = ($prev_dom->{seq_end}-$prev_dom->{seq_start}+1, $curr_dom->{seq_end}-$curr_dom->{seq_start}+1);
+	  $longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+	  $inclusion = ((($curr_dom->{seq_start} >= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} <= $prev_dom->{seq_end})) ||
+			(($curr_dom->{seq_start} <= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} >= $prev_dom->{seq_end})));
+	}
+
+	# check for overlapping domains; >1 because $prev_dom is always there
+	if (scalar(@overlap_domains) > 1 ) {
+	  # if $rpd2_fams, check for a chosen one
+
+	  for my $dom ( @overlap_domains) {
+	    $dom->{evalue} = 1.0 unless defined($dom->{evalue});
+	  }
+
+	  @overlap_domains = sort { $a->{evalue} <=> $b->{evalue} } @overlap_domains;
+	  $prev_dom = $overlap_domains[0];
+	}
+
+	# $prev_dom should be the best of the overlaps, and we are no longer overlapping > dom_length/3
+	push @save_domains, $prev_dom;
+	$prev_dom = $curr_dom;
+      }
+
+      if ($prev_dom) {
+	push @save_domains, $prev_dom;
+      }
+
+      @pf_domains = @save_domains;
+
+      # now check for smaller overlaps
+      for (my $i=1; $i < scalar(@pf_domains); $i++) {
+	if ($pf_domains[$i-1]->{seq_end} >= $pf_domains[$i]->{seq_start}) {
+	  my $overlap = $pf_domains[$i-1]->{seq_end} - $pf_domains[$i]->{seq_start};
+	  $pf_domains[$i-1]->{seq_end} -= int($overlap/2);
+	  $pf_domains[$i]->{seq_start} = $pf_domains[$i-1]->{seq_end}+1;
+	}
+      }
+    }
+    elsif ($split_over) {   # here, everything that overlaps by > $min_vdom should be split into a separate domain
+      my @save_domains = ();		# where the new domains go
+
+      # check to see if one domain is included (or overlapping) more
+      # than xx% of the other.  If so, pick the longer one
+
+      my ($prev_dom, $curr_dom) = ($pf_domains[0],0) ;
+      for (my $i=1; $i < scalar(@pf_domains); $i++) {
+	$curr_dom = $pf_domains[$i];
+
+	my ($prev_len, $cur_len) = ($prev_dom->{seq_end}-$prev_dom->{seq_start}+1, $curr_dom->{seq_end}-$curr_dom->{seq_start}+1);
+	my $longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+
+	if (($curr_dom->{seq_start} >= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} <= $prev_dom->{seq_end})
+	   && $cur_len / $prev_len > 0.80) {
+	  # $prev_dom stays the same, $curr_dom deleted
+	  next;
+	}
+	elsif (($curr_dom->{seq_start} <= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} >= $prev_dom->{seq_end})
+	      && $prev_len / $cur_len > 0.80) {
+	  $prev_dom = $curr_dom; # this should delete $prev_dom
+	  next;
+	}
+
+	if ($prev_dom->{seq_end} >= $curr_dom->{seq_start} + $min_vdom) {
+	  my ($l_seq_end, $r_seq_start) = ($curr_dom->{seq_start}-1, $prev_dom->{seq_end}+1);
+
+	  $prev_dom->{seq_end} = $l_seq_end;
+	  push @save_domains, $prev_dom;
+	  my $new_dom = {seq_start => $l_seq_end+1, seq_end=>$r_seq_start-1, 
+			 model_length => -1,
+			 pfamA_acc=>$prev_dom->{pfamA_acc}."/".$curr_dom->{pfamA_acc},
+			 pfamA_id=>$prev_dom->{pfamA_id}."/".$curr_dom->{pfamA_id},
+			 };
+
+	  if ($pf_acc) {
+	    $new_dom->{info} = $new_dom->{pfamA_acc};
+	  }
+	  else {
+	    $new_dom->{info} = $new_dom->{pfamA_id};
+	  }
+
+	  push @save_domains, $new_dom;
+	  $curr_dom->{seq_start} = $r_seq_start;
+	  $prev_dom = $curr_dom;
+	}
+	else {
+	  push @save_domains, $prev_dom;
+	  $prev_dom = $curr_dom;
+	}
+      }
+      push @save_domains, $prev_dom;
+      @pf_domains = @save_domains;
+    }
+  }
+
+  # $vdoms -- virtual Pfam domains -- the equivalent of $neg_doms,
+  # but covering parts of a Pfam model that are not annotated.  split
+  # domains have been joined, so simply check beginning and end of
+  # each domain (but must also check for bounded-ness)
+  # only add when 10% or more is missing and missing length > $min_nodom
+
+  if ($vdoms && scalar(@pf_domains)) {
+    my @vpf_domains;
+
+    my $curr_dom = $pf_domains[0];
+    my $length = $curr_dom->{length};
+
+    my $prev_dom={seq_end=>0, pfamA_acc=>''};
+    my $prev_dom_end = 0;
+    my $next_dom_start = $length+1;
+
+    for (my $dom_ix=0; $dom_ix < scalar(@pf_domains); $dom_ix++ ) {
+      $curr_dom = $pf_domains[$dom_ix];
+
+      my $pfamA =  $curr_dom->{pfamA_acc};
+
+      # first, look left, is there a domain there (if there is,
+      # it should be updated right
+
+      # my $min_vdom = $curr_dom->{model_length} / 10;
+
+      if ($curr_dom->{model_length} < $min_vdom) {
+	push @vpf_domains, $curr_dom;
+	next;
+      }
+      if ($prev_dom->{pfamA_acc}) { # look for previous domain
+	$prev_dom_end = $prev_dom->{seq_end};
+      }
+
+      # there is a domain to the left, how much room is available?
+      my $left_dom_len = min($curr_dom->{seq_start}-$prev_dom_end-1, $curr_dom->{model_start}-1);
+      if ( $left_dom_len > $min_vdom) {
+	# there is room for a virtual domain
+	my %new_dom = (seq_start=> $curr_dom->{seq_start}-$left_dom_len,
+	               seq_end => $curr_dom->{seq_start}-1,
+		       info=>'@'.$curr_dom->{info},
+		       model_length=>$curr_dom->{model_length},
+		       model_end => $curr_dom->{model_start}-1,
+		       model_start => $left_dom_len,
+		       pfamA_acc=>$pfamA,
+		      );
+	push @vpf_domains, \%new_dom;
+      }
+
+      # save the current domain
+      push @vpf_domains, $curr_dom;
+      $prev_dom = $curr_dom;
+
+      if ($dom_ix < $#pf_domains) { # there is a domain to the right
+	# first, give all the extra space to the first domain (no splitting)
+	$next_dom_start = $pf_domains[$dom_ix+1]->{seq_start};
+      }
+      else {
+	$next_dom_start = $length;
+      }
+
+      # is there room for a virtual domain right
+	  
+      my $right_dom_len = min($next_dom_start-$curr_dom->{seq_end}-1, # space available 
+			      $curr_dom->{model_length}-$curr_dom->{model_end} # space needed
+			     );
+      if ( $right_dom_len > $min_vdom) {
+	my %new_dom = (seq_start=> $curr_dom->{seq_end}+1,
+		       seq_end=> $curr_dom->{seq_end}+$right_dom_len,
+		       info=>'@'.$curr_dom->{info},
+		       model_length => $curr_dom->{model_length},
+		       pfamA_acc=> $pfamA,
+		      );
+	push @vpf_domains, \%new_dom;
+	$prev_dom = \%new_dom;
+      }
+    }				# all done, check for last one
+
+    # $curr_dom=$pf_domains[-1];
+    # # my $min_vdom = $curr_dom->{model_length}/10;
+
+    # my $right_dom_len = min($length - $curr_dom->{seq_end}+1,  # space available 
+    # 			    $curr_dom->{model_length}-$curr_dom->{model_end} # space needed
+    # 			   );
+    # if ($right_dom_len > $min_vdom) {
+    #   my %new_dom = (seq_start=> $curr_dom->{seq_end}+1,
+    # 		     seq_end => $curr_dom->{seq_end}+$right_dom_len,
+    # 		     info=>'@'.$curr_dom->{pfamA_acc},
+    # 		     model_len=> $curr_dom->{model_len},
+    # 		     pfamA_acc => $curr_dom->{pfamA_acc},
+    # 		     model_start => $curr_dom->{model_end}+1,
+    # 		     model_end => $curr_dom->{model_len},
+    # 		     );
+
+    #   push @vpf_domains, \%new_dom;
+    # }
+
+    # @vpf_domains has both old @pf_domains and new neg-domains
+    @pf_domains = @vpf_domains;
+  }
+
+  if ($neg_doms) {
+    my @npf_domains;
+    my $prev_dom={seq_end=>0};
+    for my $curr_dom ( @pf_domains) {
+      if ($curr_dom->{seq_start} - $prev_dom->{seq_end} > $min_nodom) {
+	my %new_dom = (seq_start=>$prev_dom->{seq_end}+1, seq_end => $curr_dom->{seq_start}-1, info=>'NODOM');
+	push @npf_domains, \%new_dom;
+      }
+      push @npf_domains, $curr_dom;
+      $prev_dom = $curr_dom;
+    }
+    if ($seq_length - $prev_dom->{seq_end} > $min_nodom) {
+      my %new_dom = (seq_start=>$prev_dom->{seq_end}+1, seq_end=>$seq_length, info=>'NODOM');
+      if ($new_dom{seq_end} > $new_dom{seq_start}) {
+	push @npf_domains, \%new_dom;
+      }
+    }
+
+    # @npf_domains has both old @pf_domains and new neg-domains
+    @pf_domains = @npf_domains;
+  }
+
+  # now make sure we have useful names: colors
+
+  for my $pf (@pf_domains) {
+    $pf->{info} = domain_name($pf->{info}, $pf->{pfamA_acc});
+  }
+
+  my @feats = ();
+  for my $d_ref (@pf_domains) {
+    if ($lav) {
+      push @feats, [$d_ref->{seq_start}, $d_ref->{seq_end}, $d_ref->{info}];
+    } else {
+      push @feats, [$d_ref->{seq_start}, '-', $d_ref->{seq_end},  $d_ref->{info} ];
+      #      push @feats, [$d_ref->{seq_end}, ']', '-', ""];
+    }
+
+  }
+
+  return \@feats;
+}
+
+sub min {
+  my ($arg1, $arg2) = @_;
+
+  return ($arg1 <= $arg2 ? $arg1 : $arg2);
+}
+
+sub max {
+  my ($arg1, $arg2) = @_;
+
+  return ($arg1 >= $arg2 ? $arg1 : $arg2);
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+sub domain_name {
+
+  my ($value, $pfamA_acc) = @_;
+  my $is_virtual = 0;
+
+  if ($value =~ m/^@/) {
+    $is_virtual = 1;
+    $value =~ s/^@//;
+  }
+
+  # check for clan:
+  if ($no_clans) {
+    if (! defined($domains{$value})) {
+      $domain_clan{$value} = 0;
+      $domains{$value} = ++$domain_cnt;
+      push @domain_list, $pfamA_acc;
+    }
+  }
+  elsif (!defined($domain_clan{$value})) {
+    ## only do this for new domains, old domains have known mappings
+
+    ## ways to highlight the same domain:
+    # (1) for clans, substitute clan name for family name
+    # (2) for clans, use the same color for the same clan, but don't change the name
+    # (3) for clans, combine family name with clan name, but use colors based on clan
+
+    # check to see if it's a clan
+    $get_pfam_clan->execute($pfamA_acc);
+
+    my $pfam_clan_href=0;
+
+    if ($pfam_clan_href=$get_pfam_clan->fetchrow_hashref()) {  # is a clan
+      my ($clan_id, $clan_acc) = @{$pfam_clan_href}{qw(clan_id clan_acc)};
+
+      # now check to see if we have seen this clan before (if so, do not increment $domain_cnt)
+      my $c_value = "C." . $clan_id;
+      if ($pf_acc) {$c_value = $clan_acc;}
+
+      $domain_clan{$value} = {clan_id => $clan_id,
+			      clan_acc => $clan_acc};
+
+      if ($domains{$c_value}) {
+	$domain_clan{$value}->{domain_cnt} =  $domains{$c_value};
+	$value = $c_value;
+      }
+      else {
+	$domain_clan{$value}->{domain_cnt} = ++ $domain_cnt;
+	$value = $c_value;
+	$domains{$value} = $domain_cnt;
+	push @domain_list, $pfamA_acc;
+      }
+    }
+    else {			# not a clan
+      $domain_clan{$value} = 0;
+      $domains{$value} = ++$domain_cnt;
+      push @domain_list, $pfamA_acc;
+    }
+  }
+  elsif ($domain_clan{$value} && $domain_clan{$value}->{clan_acc}) {
+    if ($pf_acc) {$value = $domain_clan{$value}->{clan_acc};}
+    else { $value = "C." . $domain_clan{$value}->{clan_id}; }
+  }
+
+  if ($is_virtual) {
+    $domains{'@'.$value} = $domains{$value};
+    $value = '@'.$value;
+  }
+  return $value;
+}
+
+sub domain_num {
+  my ($value, $number) = @_;
+  if ($value =~ m/^@/) {
+    $value =~ s/^@/v/;
+    $number = $number."v";
+  }
+  return ($value, $number);
+}
+
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_pfam28.pl
+
+=head1 SYNOPSIS
+
+ ann_pfam28.pl --neg-doms  --vdoms 'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+ --no-over  : generate non-overlapping domains (equivalent to ann_pfam.pl)
+ --split-over  : overlaps of two domains generate a new hybrid domain
+ --no-clans : do not use clans with multiple families from same clan
+ --neg-doms : report domains between annotated domains as NODOM
+                 (also --neg, --neg_doms)
+ --vdoms : produce "virtual domains" using model_start, 
+           model_end for partial pfam domains
+ --min_nodom=10  : minimum length between domains for NODOM
+
+ --host, --user, --password, --port --db : info for mysql database
+
+=head1 DESCRIPTION
+
+C<ann_pfam28.pl> extracts domain information from the pfam msyql
+database. Currently, the program works with database
+sequence descriptions in several formats:
+
+ >gi|1705556|sp|P54670.1|CAF1_DICDI
+ >sp|P09488|GSTM1_HUMAN
+ >sp:CALM_HUMAN 
+
+C<ann_pfam28.pl> uses the C<pfamA_reg_full_significant>, C<pfamseq>,
+and C<pfamA> tables of the C<pfam> database to extract domain
+information on a protein. 
+
+If the C<--no-over> option is set, overlapping domains are selected and
+edited to remove overlaps.  For proteins with multiple overlapping
+domains (domains overlap by more than 1/3 of the domain length),
+C<auto_pfam28.pl> selects the domain annotation with the best
+C<domain_evalue_score>.  When domains overlap by less than 1/3 of the
+domain length, they are shortened to remove the overlap.
+
+If the C<--split-over> option is set, if two domains overlap, the
+overlapping region is split out of the domains and labeled as a new,
+virtual-lie, domain.  If one domain is internal to another and spans
+80% of the domain, the shorter domain is removed.
+
+C<ann_pfam28.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_pfam28.pl> or C<-V "\!ann_pfam28.pl --neg"> option.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_pfam30.pl b/scripts/ann_pfam30.pl
new file mode 100755
index 0000000..2f894f1
--- /dev/null
+++ b/scripts/ann_pfam30.pl
@@ -0,0 +1,859 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014,2015 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# ann_pfam.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# gi|62822551|sp|P00502|GSTA1_RAT Glutathione S-transfer\n  (at least from pir1.lseg)
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the up_acc
+# (3) return the tab delimited features
+#
+
+# this is the first version that works with the new Pfam strategy of
+# separating Uniprot reference sequences from the rest of uniprot.  as
+# a result, it is possible that 2 SQL queries will be required, one to
+# pfamA_reg_full_significant and a second to uniprot_reg_full.
+
+# modified 15-Jan-2017 to reduce the number of calls when the same
+# accession is present multiple times.  Accessions are saved in a hash
+# than ensures uniqueness.  (Could also speed things up by creating temporary table.)
+#
+
+
+use strict;
+
+use DBI;
+use Getopt::Long;
+use Pod::Usage;
+
+use vars qw($host $db $port $user $pass);
+
+my $hostname = `/bin/hostname`;
+
+($host, $db, $port, $user, $pass)  = ("wrpxdb.its.virginia.edu", "pfam31", 0, "web_user", "fasta_www");
+#$host = 'xdb';
+#$host = 'localhost';
+#$db = 'RPD2_pfam28u';
+
+my ($auto_reg,$rpd2_fams, $neg_doms, $vdoms, $lav, $no_doms, $no_clans, $pf_acc, $acc_comment, $bound_comment, $shelp, $help) = 
+  (0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0,);
+my ($no_over, $split_over, $over_fract) = (0, 0, 3.0);
+
+my ($color_sep_str, $show_color) = (" :",1);
+$color_sep_str = '~';
+
+my ($min_nodom, $min_vdom) = (10,10);
+
+GetOptions(
+    "host=s" => \$host,
+    "db=s" => \$db,
+    "user=s" => \$user,
+    "password=s" => \$pass,
+    "port=i" => \$port,
+    "lav" => \$lav,
+    "acc_comment" => \$acc_comment,
+    "bound_comment" => \$bound_comment,
+    "color!" => \$show_color,
+    "no-over" => \$no_over,
+    "no_over" => \$no_over,
+    "split-over" => \$split_over,
+    "split_over" => \$split_over,
+    "over_fract" => \$over_fract,
+    "over-fract" => \$over_fract,
+    "no-clans" => \$no_clans,
+    "no_clans" => \$no_clans,
+    "neg" => \$neg_doms,
+    "neg_doms" => \$neg_doms,
+    "neg-doms" => \$neg_doms,
+    "min_nodom=i" => \$min_nodom,
+    "vdoms" => \$vdoms,
+    "v_doms" => \$vdoms,
+    "pfacc" => \$pf_acc,
+    "RPD2" => \$rpd2_fams,
+    "auto_reg" => \$auto_reg,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (@ARGV || -p STDIN || -f STDIN);
+
+my $connect = "dbi:mysql(AutoCommit=>1,RaiseError=>1):database=$db";
+$connect .= ";host=$host" if $host;
+$connect .= ";port=$port" if $port;
+
+my $dbh = DBI->connect($connect,
+		       $user,
+		       $pass
+		      ) or die $DBI::errstr;
+
+my %annot_types = ();
+my %domains = (NODOM=>0);
+my %domain_clan = (NODOM => {clan_id => 'NODOM', clan_acc=>0, domain_cnt=>0});
+my @domain_list = (0);
+my $domain_cnt = 0;
+
+my $pfamA_reg_full = 'pfamA_reg_full_significant';
+my $uniprot_reg_full = 'uniprot_reg_full';
+
+my $get_annot_sub = \&get_pfam_annots;
+
+my @pfam_fields = qw(seq_start seq_end model_start model_end model_length pfamA_acc pfamA_id auto_pfamA_reg_full domain_evalue_score as evalue length);
+my @upfam_fields = qw(seq_start seq_end model_start model_end model_length pfamA_acc pfamA_id auto_uniprot_reg_full domain_evalue_score as evalue length);
+
+my $get_pfam_acc = $dbh->prepare(<<EOSQL);
+SELECT seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_pfamA_reg_full, domain_evalue_score as evalue, length
+FROM pfamseq
+JOIN pfamA_reg_full_significant using(pfamseq_acc)
+JOIN pfamA USING (pfamA_acc)
+WHERE in_full = 1
+AND  pfamseq_acc=?
+ORDER BY seq_start
+
+EOSQL
+
+my $get_upfam_acc = $dbh->prepare(<<EOSQL);
+SELECT seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_uniprot_reg_full as auto_pfamA_reg_full, domain_evalue_score as evalue, length
+FROM uniprot
+JOIN uniprot_reg_full using(uniprot_acc)
+JOIN pfamA USING (pfamA_acc)
+WHERE in_full = 1
+AND  uniprot_acc=?
+ORDER BY seq_start
+
+EOSQL
+
+my $get_pfam_refacc = $dbh->prepare(<<EOSQL);
+SELECT seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_pfamA_reg_full, domain_evalue_score as evalue, length
+  FROM $pfamA_reg_full
+  JOIN pfamseq using(pfamseq_acc)
+  JOIN pfamA USING (pfamA_acc)
+  JOIN uniprot.refseq2up as rf2up on(rf2up.up_acc=pfamseq_acc)
+ WHERE in_full = 1
+   AND rf2up.refseq_acc=?
+ ORDER BY seq_start
+
+EOSQL
+
+my $get_upfam_refacc = $dbh->prepare(<<EOSQL);
+SELECT seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_uniprot_reg_full as auto_pfamA_reg_full, domain_evalue_score as evalue, length
+FROM uniprot
+JOIN uniprot_reg_full using(uniprot_acc)
+JOIN pfamA USING (pfamA_acc)
+JOIN uniprot.refseq2up as rf2up on(rf2up.up_acc=uniprot_acc)
+WHERE in_full = 1
+AND  refseq_acc=?
+ORDER BY seq_start
+
+EOSQL
+
+my $get_annots_sql = $get_pfam_acc;
+
+my $get_pfam_id = $dbh->prepare(<<EOSQL);
+SELECT seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_pfamA_reg_full, domain_evalue_score as evalue, length
+FROM pfamseq
+JOIN $pfamA_reg_full using(pfamseq_acc)
+JOIN pfamA USING (pfamA_acc)
+WHERE in_full=1
+AND  pfamseq_id=?
+ORDER BY seq_start
+
+EOSQL
+
+my $get_upfam_id = $dbh->prepare(<<EOSQL);
+SELECT seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_uniprot_reg_full as auto_pfamA_reg_full, domain_evalue_score as evalue, length
+FROM uniprot
+JOIN uniprot_reg_full using(pfamseq_acc)
+JOIN pfamA USING (pfamA_acc)
+WHERE in_full=1
+AND  uniprot_id=?
+ORDER BY seq_start
+
+EOSQL
+
+my $get_pfam_clan = $dbh->prepare(<<EOSQL);
+
+SELECT clan_acc, clan_id
+FROM clan
+JOIN clan_membership using(clan_acc)
+WHERE pfamA_acc=?
+
+EOSQL
+
+my $get_rpd2_clans = $dbh->prepare(<<EOSQL);
+
+SELECT auto_pfamA, clan
+FROM ljm_db.RPD2_final_fams
+WHERE clan is not NULL
+
+EOSQL
+
+# -- LEFT JOIN clan_membership USING (auto_pfamA)
+# -- LEFT JOIN clans using(auto_clan)
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+# get the query
+my ($query, $seq_len) = @ARGV;
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+my %annot_set = ();
+
+my %rpd2_clan_fams = ();
+
+if ($rpd2_fams) {
+  $get_rpd2_clans->execute();
+  my ($auto_pfam, $auto_clan);
+  while (($auto_pfam, $auto_clan)=$get_rpd2_clans->fetchrow_array()) {
+    $rpd2_clan_fams{$auto_pfam} = $auto_clan;
+  }
+}
+
+#if it's a file I can open, read and parse it
+unless ($query && ($query =~ m/[\|:]/ || 
+		   $query =~ m/^[NX]P_/ ||
+		   $query =~ m/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}\s/)) {
+
+  while (my $a_line = <>) {
+    $a_line =~ s/^>//;
+    chomp $a_line;
+    push @annots, show_annots($a_line, $get_annot_sub);
+  }
+}
+else {
+  push @annots, show_annots("$query\t$seq_len", $get_annot_sub);
+}
+
+for my $seq_annot (@annots) {
+  next unless $seq_annot;
+  my $annot_r = $annot_set{$seq_annot};
+  print ">",$annot_r->{seq_info},"\n";
+  for my $annot (@{$annot_r->{list}}) {
+    if (!$lav && defined($domains{$annot->[-1]})) {
+      my ($a_name, $a_num) = domain_num($annot->[-1],$domains{$annot->[-1]});
+      $annot->[-1] = $a_name;
+      my $tmp_a_num = $a_num;
+      $tmp_a_num =~ s/v$//;
+      if ($acc_comment) {
+	$annot->[-1] .= "{$domain_list[$tmp_a_num]}";
+      }
+      if ($bound_comment) {
+	$annot->[-1] .= $color_sep_str.$annot->[0].":".$annot->[2];
+      }
+      elsif ($show_color) {
+	  $annot->[-1] .= $color_sep_str.$a_num;
+      }
+    }
+    print join("\t",@$annot),"\n";
+  }
+}
+
+exit(0);
+
+sub show_annots {
+  my ($query_len, $get_annot_sub) = @_;
+
+  my ($annot_line, $seq_len) = split(/\t/,$query_len);
+
+  my $pfamA_acc;
+
+  $use_acc = 1;
+  $get_annots_sql = $get_pfam_acc;
+
+  my $get_annots_sql_u = $get_upfam_acc;
+
+  if ($annot_line =~ m/^pf\d+\|/) {
+    ($sdb, $gi, $pfamA_acc, $acc, $id) = split(/\|/,$annot_line);
+#    $dbh->do("use RPD2_pfam");
+  }
+  elsif ($annot_line =~ m/^gi\|/) {
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+    if ($sdb =~ m/ref/) {
+	$get_annots_sql = $get_pfam_refacc;
+	$get_annots_sql_u = $get_upfam_refacc;
+    }
+  }
+  elsif ($annot_line =~ m/^(sp|tr|up)\|/) {
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^ref\|/) {
+    ($sdb, $acc) = split(/\|/,$annot_line);
+    $get_annots_sql = $get_pfam_refacc;
+    $get_annots_sql_u = $get_upfam_refacc;
+  }
+  elsif ($annot_line =~ m/^(SP|TR):/i) {
+    ($sdb, $id) = split(/:/,$annot_line);
+    $use_acc = 0;
+  }
+  elsif ($annot_line !~ m/\|/ && $annot_line !~ m/:/) {
+    $use_acc = 1;
+    ($acc) = split(/\s+/,$annot_line);
+  }
+  # deal with no-database SwissProt/NR
+  else {
+    ($acc)=($annot_line =~ /^(\S+)/);
+  }
+
+  # here we have an $acc or an $id: check to see if we have the data
+
+  my %annot_data = (seq_info=>$annot_line);
+  my $annot_key = '';
+  unless ($use_acc) {
+    next if ($annot_set{$id});
+    $annot_set{$id} = \%annot_data;
+    $annot_key = $id;
+
+    $get_annots_sql = $get_pfam_id;
+    $get_annots_sql->execute($id);
+    unless ($get_annots_sql->rows()) {
+      $get_annots_sql = $get_annots_sql_u;
+      $get_annots_sql->execute($id);
+    }
+  } else {
+    unless ($acc) {
+      warn "missing acc in $annot_line";
+      return "";
+    }
+    else {
+      $acc =~ s/\.\d+$//;
+
+      $annot_key = $acc;
+      if ($annot_set{$acc}) {
+	goto ret_label;
+      }
+      $annot_set{$acc} = \%annot_data;
+
+      $get_annots_sql->execute($acc);
+      unless ($get_annots_sql->rows()) {
+	$get_annots_sql = $get_annots_sql_u;
+	$get_annots_sql->execute($acc);
+      }
+    }
+  }
+
+  $annot_data{list} = $get_annot_sub->($get_annots_sql, $seq_len);
+
+ret_label:
+  return $annot_key;
+}
+
+sub get_pfam_annots {
+  my ($get_annots, $seq_length) = @_;
+
+  $seq_length = 0 unless $seq_length;
+
+  my @pf_domains = ();
+
+  # get the list of domains, sorted by start
+
+  # $row_href has: seq_start, seq_end, model_start, model_end, model_length, 
+  #                pfamA_acc, pfamA_id, auto_pfamA_reg_full,
+  #                domain_evalue_score as evalue, length
+
+  while ( my $row_href = $get_annots->fetchrow_hashref()) {
+    if ($auto_reg) {
+      $row_href->{info} = $row_href->{auto_pfamA_reg_full};
+    } elsif ($pf_acc) {
+      $row_href->{info} = $row_href->{pfamA_acc};
+    } else {
+      $row_href->{info} = $row_href->{pfamA_id};
+    }
+
+    if ($row_href && $row_href->{length} > $seq_length && $seq_length == 0) {
+      $seq_length = $row_href->{length};
+    }
+
+    next if ($row_href->{seq_start} >= $seq_length);
+    if ($row_href->{seq_end} > $seq_length) {
+      $row_href->{seq_end} = $seq_length;
+    }
+
+    push @pf_domains, $row_href
+  }
+
+  # before checking for domain overlap, check for "split-domains"
+  # (self-unbound) by looking for runs of the same domain that are
+  # ordered by model_start
+
+  if (scalar(@pf_domains) > 1) {
+    my @j_domains;		#joined domains
+    my @tmp_domains = @pf_domains;
+
+    my $prev_dom = shift(@tmp_domains);
+
+    for my $curr_dom (@tmp_domains) {
+      # to join domains:
+      # (1) the domains must be in order by model_start/end coordinates
+      # (3) joining the domains cannot make the total combination too long
+
+      # check for model and sequence consistency
+      if (($prev_dom->{pfamA_acc} eq $curr_dom->{pfamA_acc})  # same family
+	  && $prev_dom->{model_start} < $curr_dom->{model_start}  # model check
+	  && $prev_dom->{model_end} < $curr_dom->{model_end}
+
+	  && ($curr_dom->{model_start} > $prev_dom->{model_end} * 0.80   # limit overlap
+	      || $curr_dom->{model_start} <  $prev_dom->{model_end} * 1.25)
+	  && ((($curr_dom->{model_end} - $curr_dom->{model_start}+1)/$curr_dom->{model_length} +
+	       ($prev_dom->{model_end} - $prev_dom->{model_start}+1)/$prev_dom->{model_length}) < 1.33)
+	 ) {			# join them by updating $prev_dom
+	$prev_dom->{seq_end} = $curr_dom->{seq_end};
+	$prev_dom->{model_end} = $curr_dom->{model_end};
+	$prev_dom->{auto_pfamA_reg_full} = $prev_dom->{auto_pfamA_reg_full} . ";". $curr_dom->{auto_pfamA_reg_full};
+	$prev_dom->{evalue} = ($prev_dom->{evalue} < $curr_dom->{evalue} ? $prev_dom->{evalue} : $curr_dom->{evalue});
+      } else {
+	push @j_domains, $prev_dom;
+	$prev_dom = $curr_dom;
+      }
+    }
+    push @j_domains, $prev_dom;
+    @pf_domains = @j_domains;
+
+
+    if ($no_over) {	# for either $no_over or $split_over, check for overlapping domains and edit/split them
+
+      my @tmp_domains = @pf_domains;	# allow shifts from copy of @pf_domains
+      my @save_domains = ();		# where the new domains go
+
+      my $prev_dom = shift @tmp_domains;
+
+      while (my $curr_dom = shift @tmp_domains) {
+
+	my @overlap_domains = ($prev_dom);
+
+	my $diff = $prev_dom->{seq_end} - $curr_dom->{seq_start};
+
+	my ($prev_len, $cur_len) = ($prev_dom->{seq_end}-$prev_dom->{seq_start}+1,
+				    $curr_dom->{seq_end}-$curr_dom->{seq_start}+1);
+
+	my $inclusion = ((($curr_dom->{seq_start} >= $prev_dom->{seq_start})    # start is right && end is left 
+			  && ($curr_dom->{seq_end} <= $prev_dom->{seq_end})) || # -- curr inside prev
+			 (($curr_dom->{seq_start} <= $prev_dom->{seq_start})    # start is left && end is right
+			  && ($curr_dom->{seq_end} >= $prev_dom->{seq_end})));  # -- prev is inside curr
+
+	my $longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+
+	# check for overlap > domain_length/$over_fract
+	while ($inclusion || ($diff > 0 && $diff > $longer_len/$over_fract)) {
+	  push @overlap_domains, $curr_dom;
+	  $curr_dom = shift @tmp_domains;
+	  last unless $curr_dom;
+	  $diff = $prev_dom->{seq_end} - $curr_dom->{seq_start};
+	  ($prev_len, $cur_len) = ($prev_dom->{seq_end}-$prev_dom->{seq_start}+1, $curr_dom->{seq_end}-$curr_dom->{seq_start}+1);
+	  $longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+	  $inclusion = ((($curr_dom->{seq_start} >= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} <= $prev_dom->{seq_end})) ||
+			(($curr_dom->{seq_start} <= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} >= $prev_dom->{seq_end})));
+	}
+
+	# check for overlapping domains; >1 because $prev_dom is always there
+	if (scalar(@overlap_domains) > 1 ) {
+	  # if $rpd2_fams, check for a chosen one
+
+	  for my $dom ( @overlap_domains) {
+	    $dom->{evalue} = 1.0 unless defined($dom->{evalue});
+	  }
+
+	  @overlap_domains = sort { $a->{evalue} <=> $b->{evalue} } @overlap_domains;
+	  $prev_dom = $overlap_domains[0];
+	}
+
+	# $prev_dom should be the best of the overlaps, and we are no longer overlapping > dom_length/3
+	push @save_domains, $prev_dom;
+	$prev_dom = $curr_dom;
+      }
+
+      if ($prev_dom) {
+	push @save_domains, $prev_dom;
+      }
+
+      @pf_domains = @save_domains;
+
+      # now check for smaller overlaps
+      for (my $i=1; $i < scalar(@pf_domains); $i++) {
+	if ($pf_domains[$i-1]->{seq_end} >= $pf_domains[$i]->{seq_start}) {
+	  my $overlap = $pf_domains[$i-1]->{seq_end} - $pf_domains[$i]->{seq_start};
+	  $pf_domains[$i-1]->{seq_end} -= int($overlap/2);
+	  $pf_domains[$i]->{seq_start} = $pf_domains[$i-1]->{seq_end}+1;
+	}
+      }
+    }
+    elsif ($split_over) {   # here, everything that overlaps by > $min_vdom should be split into a separate domain
+      my @save_domains = ();		# where the new domains go
+
+      # check to see if one domain is included (or overlapping) more
+      # than xx% of the other.  If so, pick the longer one
+
+      my ($prev_dom, $curr_dom) = ($pf_domains[0],0) ;
+      for (my $i=1; $i < scalar(@pf_domains); $i++) {
+	$curr_dom = $pf_domains[$i];
+
+	my ($prev_len, $cur_len) = ($prev_dom->{seq_end}-$prev_dom->{seq_start}+1, $curr_dom->{seq_end}-$curr_dom->{seq_start}+1);
+	my $longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+
+	if (($curr_dom->{seq_start} >= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} <= $prev_dom->{seq_end})
+	   && $cur_len / $prev_len > 0.80) {
+	  # $prev_dom stays the same, $curr_dom deleted
+	  next;
+	}
+	elsif (($curr_dom->{seq_start} <= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} >= $prev_dom->{seq_end})
+	      && $prev_len / $cur_len > 0.80) {
+	  $prev_dom = $curr_dom; # this should delete $prev_dom
+	  next;
+	}
+
+	if ($prev_dom->{seq_end} >= $curr_dom->{seq_start} + $min_vdom) {
+	  my ($l_seq_end, $r_seq_start) = ($curr_dom->{seq_start}-1, $prev_dom->{seq_end}+1);
+
+	  $prev_dom->{seq_end} = $l_seq_end;
+	  push @save_domains, $prev_dom;
+	  my $new_dom = {seq_start => $l_seq_end+1, seq_end=>$r_seq_start-1, 
+			 model_length => -1,
+			 pfamA_acc=>$prev_dom->{pfamA_acc}."/".$curr_dom->{pfamA_acc},
+			 pfamA_id=>$prev_dom->{pfamA_id}."/".$curr_dom->{pfamA_id},
+			 };
+
+	  if ($pf_acc) {
+	    $new_dom->{info} = $new_dom->{pfamA_acc};
+	  }
+	  else {
+	    $new_dom->{info} = $new_dom->{pfamA_id};
+	  }
+
+	  push @save_domains, $new_dom;
+	  $curr_dom->{seq_start} = $r_seq_start;
+	  $prev_dom = $curr_dom;
+	}
+	else {
+	  push @save_domains, $prev_dom;
+	  $prev_dom = $curr_dom;
+	}
+      }
+      push @save_domains, $prev_dom;
+      @pf_domains = @save_domains;
+    }
+  }
+
+  # $vdoms -- virtual Pfam domains -- the equivalent of $neg_doms,
+  # but covering parts of a Pfam model that are not annotated.  split
+  # domains have been joined, so simply check beginning and end of
+  # each domain (but must also check for bounded-ness)
+  # only add when 10% or more is missing and missing length > $min_nodom
+
+  if ($vdoms && scalar(@pf_domains)) {
+    my @vpf_domains;
+
+    my $curr_dom = $pf_domains[0];
+    my $length = $curr_dom->{length};
+
+    my $prev_dom={seq_end=>0, pfamA_acc=>''};
+    my $prev_dom_end = 0;
+    my $next_dom_start = $length+1;
+
+    for (my $dom_ix=0; $dom_ix < scalar(@pf_domains); $dom_ix++ ) {
+      $curr_dom = $pf_domains[$dom_ix];
+
+      my $pfamA =  $curr_dom->{pfamA_acc};
+
+      # first, look left, is there a domain there (if there is,
+      # it should be updated right
+
+      # my $min_vdom = $curr_dom->{model_length} / 10;
+
+      if ($curr_dom->{model_length} < $min_vdom) {
+	push @vpf_domains, $curr_dom;
+	next;
+      }
+      if ($prev_dom->{pfamA_acc}) { # look for previous domain
+	$prev_dom_end = $prev_dom->{seq_end};
+      }
+
+      # there is a domain to the left, how much room is available?
+      my $left_dom_len = min($curr_dom->{seq_start}-$prev_dom_end-1, $curr_dom->{model_start}-1);
+      if ( $left_dom_len > $min_vdom) {
+	# there is room for a virtual domain
+	my %new_dom = (seq_start=> $curr_dom->{seq_start}-$left_dom_len,
+	               seq_end => $curr_dom->{seq_start}-1,
+		       info=>'@'.$curr_dom->{info},
+		       model_length=>$curr_dom->{model_length},
+		       model_end => $curr_dom->{model_start}-1,
+		       model_start => $left_dom_len,
+		       pfamA_acc=>$pfamA,
+		      );
+	push @vpf_domains, \%new_dom;
+      }
+
+      # save the current domain
+      push @vpf_domains, $curr_dom;
+      $prev_dom = $curr_dom;
+
+      if ($dom_ix < $#pf_domains) { # there is a domain to the right
+	# first, give all the extra space to the first domain (no splitting)
+	$next_dom_start = $pf_domains[$dom_ix+1]->{seq_start};
+      }
+      else {
+	$next_dom_start = $length;
+      }
+
+      # is there room for a virtual domain right
+	  
+      my $right_dom_len = min($next_dom_start-$curr_dom->{seq_end}-1, # space available 
+			      $curr_dom->{model_length}-$curr_dom->{model_end} # space needed
+			     );
+      if ( $right_dom_len > $min_vdom) {
+	my %new_dom = (seq_start=> $curr_dom->{seq_end}+1,
+		       seq_end=> $curr_dom->{seq_end}+$right_dom_len,
+		       info=>'@'.$curr_dom->{info},
+		       model_length => $curr_dom->{model_length},
+		       pfamA_acc=> $pfamA,
+		      );
+	push @vpf_domains, \%new_dom;
+	$prev_dom = \%new_dom;
+      }
+    }				# all done, check for last one
+
+    # $curr_dom=$pf_domains[-1];
+    # # my $min_vdom = $curr_dom->{model_length}/10;
+
+    # my $right_dom_len = min($length - $curr_dom->{seq_end}+1,  # space available 
+    # 			    $curr_dom->{model_length}-$curr_dom->{model_end} # space needed
+    # 			   );
+    # if ($right_dom_len > $min_vdom) {
+    #   my %new_dom = (seq_start=> $curr_dom->{seq_end}+1,
+    # 		     seq_end => $curr_dom->{seq_end}+$right_dom_len,
+    # 		     info=>'@'.$curr_dom->{pfamA_acc},
+    # 		     model_len=> $curr_dom->{model_len},
+    # 		     pfamA_acc => $curr_dom->{pfamA_acc},
+    # 		     model_start => $curr_dom->{model_end}+1,
+    # 		     model_end => $curr_dom->{model_len},
+    # 		     );
+
+    #   push @vpf_domains, \%new_dom;
+    # }
+
+    # @vpf_domains has both old @pf_domains and new neg-domains
+    @pf_domains = @vpf_domains;
+  }
+
+  if ($neg_doms) {
+    my @npf_domains;
+    my $prev_dom={seq_end=>0};
+    for my $curr_dom ( @pf_domains) {
+      if ($curr_dom->{seq_start} - $prev_dom->{seq_end} > $min_nodom) {
+	my %new_dom = (seq_start=>$prev_dom->{seq_end}+1, seq_end => $curr_dom->{seq_start}-1, info=>'NODOM');
+	push @npf_domains, \%new_dom;
+      }
+      push @npf_domains, $curr_dom;
+      $prev_dom = $curr_dom;
+    }
+    if ($seq_length - $prev_dom->{seq_end} > $min_nodom) {
+      my %new_dom = (seq_start=>$prev_dom->{seq_end}+1, seq_end=>$seq_length, info=>'NODOM');
+      if ($new_dom{seq_end} > $new_dom{seq_start}) {
+	push @npf_domains, \%new_dom;
+      }
+    }
+
+    # @npf_domains has both old @pf_domains and new neg-domains
+    @pf_domains = @npf_domains;
+  }
+
+  # now make sure we have useful names: colors
+
+  for my $pf (@pf_domains) {
+    $pf->{info} = domain_name($pf->{info}, $pf->{pfamA_acc});
+  }
+
+  my @feats = ();
+  for my $d_ref (@pf_domains) {
+    if ($lav) {
+      push @feats, [$d_ref->{seq_start}, $d_ref->{seq_end}, $d_ref->{info}];
+    } else {
+      push @feats, [$d_ref->{seq_start}, '-', $d_ref->{seq_end},  $d_ref->{info} ];
+      #      push @feats, [$d_ref->{seq_end}, ']', '-', ""];
+    }
+
+  }
+
+  return \@feats;
+}
+
+sub min {
+  my ($arg1, $arg2) = @_;
+
+  return ($arg1 <= $arg2 ? $arg1 : $arg2);
+}
+
+sub max {
+  my ($arg1, $arg2) = @_;
+
+  return ($arg1 >= $arg2 ? $arg1 : $arg2);
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+sub domain_name {
+
+  my ($value, $pfamA_acc) = @_;
+  my $is_virtual = 0;
+
+  if ($value =~ m/^@/) {
+    $is_virtual = 1;
+    $value =~ s/^@//;
+  }
+
+  # check for clan:
+  if ($no_clans) {
+    if (! defined($domains{$value})) {
+      $domain_clan{$value} = 0;
+      $domains{$value} = ++$domain_cnt;
+      push @domain_list, $pfamA_acc;
+    }
+  }
+  elsif (!defined($domain_clan{$value})) {
+    ## only do this for new domains, old domains have known mappings
+
+    ## ways to highlight the same domain:
+    # (1) for clans, substitute clan name for family name
+    # (2) for clans, use the same color for the same clan, but don't change the name
+    # (3) for clans, combine family name with clan name, but use colors based on clan
+
+    # check to see if it's a clan
+    $get_pfam_clan->execute($pfamA_acc);
+
+    my $pfam_clan_href=0;
+
+    if ($pfam_clan_href=$get_pfam_clan->fetchrow_hashref()) {  # is a clan
+      my ($clan_id, $clan_acc) = @{$pfam_clan_href}{qw(clan_id clan_acc)};
+
+      # now check to see if we have seen this clan before (if so, do not increment $domain_cnt)
+      my $c_value = "C." . $clan_id;
+      if ($pf_acc) {$c_value = $clan_acc;}
+
+      $domain_clan{$value} = {clan_id => $clan_id,
+			      clan_acc => $clan_acc};
+
+      if ($domains{$c_value}) {
+	$domain_clan{$value}->{domain_cnt} =  $domains{$c_value};
+	$value = $c_value;
+      }
+      else {
+	$domain_clan{$value}->{domain_cnt} = ++ $domain_cnt;
+	$value = $c_value;
+	$domains{$value} = $domain_cnt;
+	push @domain_list, $pfamA_acc;
+      }
+    }
+    else {			# not a clan
+      $domain_clan{$value} = 0;
+      $domains{$value} = ++$domain_cnt;
+      push @domain_list, $pfamA_acc;
+    }
+  }
+  elsif ($domain_clan{$value} && $domain_clan{$value}->{clan_acc}) {
+    if ($pf_acc) {$value = $domain_clan{$value}->{clan_acc};}
+    else { $value = "C." . $domain_clan{$value}->{clan_id}; }
+  }
+
+  if ($is_virtual) {
+    $domains{'@'.$value} = $domains{$value};
+    $value = '@'.$value;
+  }
+  return $value;
+}
+
+sub domain_num {
+  my ($value, $number) = @_;
+  if ($value =~ m/^@/) {
+    $value =~ s/^@/v/;
+    $number = $number."v";
+  }
+  return ($value, $number);
+}
+
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_pfam30.pl
+
+=head1 SYNOPSIS
+
+ ann_pfam30.pl --neg-doms  --vdoms 'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+ --no-over  : generate non-overlapping domains (equivalent to ann_pfam.pl)
+ --split-over  : overlaps of two domains generate a new hybrid domain
+ --no-clans : do not use clans with multiple families from same clan
+ --neg-doms : report domains between annotated domains as NODOM
+                 (also --neg, --neg_doms)
+ --vdoms : produce "virtual domains" using model_start, 
+           model_end for partial pfam domains
+ --min_nodom=10  : minimum length between domains for NODOM
+
+ --host, --user, --password, --port --db : info for mysql database
+
+=head1 DESCRIPTION
+
+C<ann_pfam30.pl> extracts domain information from the pfam msyql
+database. Currently, the program works with database
+sequence descriptions in several formats:
+
+ >gi|1705556|sp|P54670.1|CAF1_DICDI
+ >sp|P09488|GSTM1_HUMAN
+ >sp:CALM_HUMAN 
+
+C<ann_pfam30.pl> uses the C<pfamA_reg_full_significant>, C<pfamseq>,
+and C<pfamA> tables of the C<pfam> database to extract domain
+information on a protein. 
+
+If the C<--no-over> option is set, overlapping domains are selected and
+edited to remove overlaps.  For proteins with multiple overlapping
+domains (domains overlap by more than 1/3 of the domain length),
+C<auto_pfam28.pl> selects the domain annotation with the best
+C<domain_evalue_score>.  When domains overlap by less than 1/3 of the
+domain length, they are shortened to remove the overlap.
+
+If the C<--split-over> option is set, if two domains overlap, the
+overlapping region is split out of the domains and labeled as a new,
+virtual-lie, domain.  If one domain is internal to another and spans
+80% of the domain, the shorter domain is removed.
+
+C<ann_pfam30.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_pfam30.pl> or C<-V "\!ann_pfam30.pl --neg"> option.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_pfam30_tmptbl.pl b/scripts/ann_pfam30_tmptbl.pl
new file mode 100755
index 0000000..c9e5b19
--- /dev/null
+++ b/scripts/ann_pfam30_tmptbl.pl
@@ -0,0 +1,875 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014,2015 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# ann_pfam.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# gi|62822551|sp|P00502|GSTA1_RAT Glutathione S-transfer\n  (at least from pir1.lseg)
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the up_acc
+# (3) return the tab delimited features
+#
+
+# this is the first version that works with the new Pfam strategy of
+# separating Uniprot reference sequences from the rest of uniprot.  as
+# a result, it is possible that 2 SQL queries will be required, one to
+# pfamA_reg_full_significant and a second to uniprot_reg_full.
+
+# modified 15-Jan-2017 to reduce the number of calls when the same
+# accession is present multiple times.  Accessions are saved in a hash
+# than ensures uniqueness.
+#
+# Uses tmp_annot.temporary table for more rapid joins.  $user must have
+# create temporary tables/select permissions for tmp_annot
+#
+
+use strict;
+
+use DBI;
+use Getopt::Long;
+use Pod::Usage;
+use File::Temp qw/tempfile/;
+
+use vars qw($host $db $port $user $pass);
+
+my $hostname = `/bin/hostname`;
+
+($host, $db, $port, $user, $pass)  = ("wrpxdb.its.virginia.edu", "pfam30", 0, "web_user", "fasta_www");
+#$host = 'xdb';
+#$host = 'localhost';
+#$db = 'RPD2_pfam28u';
+
+my ($auto_reg,$rpd2_fams, $neg_doms, $vdoms, $lav, $no_doms, $no_clans, $pf_acc, $acc_comment, $bound_comment, $shelp, $help) = 
+  (0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0,);
+my ($no_over, $split_over, $over_fract) = (0, 0, 3.0);
+
+my ($color_sep_str, $show_color) = (" :",1);
+$color_sep_str = '~';
+
+my ($min_nodom, $min_vdom) = (10,10);
+
+GetOptions(
+    "host=s" => \$host,
+    "db=s" => \$db,
+    "user=s" => \$user,
+    "password=s" => \$pass,
+    "port=i" => \$port,
+    "lav" => \$lav,
+    "acc_comment" => \$acc_comment,
+    "bound_comment" => \$bound_comment,
+    "color!" => \$show_color,
+    "no-over" => \$no_over,
+    "no_over" => \$no_over,
+    "split-over" => \$split_over,
+    "split_over" => \$split_over,
+    "over_fract" => \$over_fract,
+    "over-fract" => \$over_fract,
+    "no-clans" => \$no_clans,
+    "no_clans" => \$no_clans,
+    "neg" => \$neg_doms,
+    "neg_doms" => \$neg_doms,
+    "neg-doms" => \$neg_doms,
+    "min_nodom=i" => \$min_nodom,
+    "vdoms" => \$vdoms,
+    "v_doms" => \$vdoms,
+    "pfacc" => \$pf_acc,
+    "RPD2" => \$rpd2_fams,
+    "auto_reg" => \$auto_reg,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (@ARGV || -p STDIN || -f STDIN);
+
+my $connect = "dbi:mysql(AutoCommit=>1,RaiseError=>1):database=$db";
+$connect .= ";host=$host" if $host;
+$connect .= ";port=$port" if $port;
+
+my $dbh = DBI->connect($connect,
+		       $user,
+		       $pass
+		      ) or die $DBI::errstr;
+
+my %annot_types = ();
+my %domains = (NODOM=>0);
+my %domain_clan = (NODOM => {clan_id => 'NODOM', clan_acc=>0, domain_cnt=>0});
+my @domain_list = (0);
+my $domain_cnt = 0;
+
+my $pfamA_reg_full = 'pfamA_reg_full_significant';
+my $uniprot_reg_full = 'uniprot_reg_full';
+
+my @pfam_fields = qw(seq_start seq_end model_start model_end model_length pfamA_acc pfamA_id auto_pfamA_reg_full evalue length);
+my @upfam_fields = qw(seq_start seq_end model_start model_end model_length pfamA_acc pfamA_id auto_uniprot_reg_full length);
+
+my $get_pfam_acc = $dbh->prepare(<<EOSQL);
+SELECT t_acc, seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_pfamA_reg_full, domain_evalue_score as evalue, length
+FROM pfamseq
+JOIN pfamA_reg_full_significant using(pfamseq_acc)
+JOIN pfamA USING (pfamA_acc)
+JOIN tmp_annot.targets on(pfamseq_acc=t_acc)
+WHERE in_full = 1
+ORDER BY t_acc,seq_start
+
+EOSQL
+
+my $get_upfam_acc = $dbh->prepare(<<EOSQL);
+SELECT t_acc, seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_uniprot_reg_full as auto_pfamA_reg_full, domain_evalue_score as evalue, length
+FROM uniprot
+JOIN uniprot_reg_full using(uniprot_acc)
+JOIN pfamA USING (pfamA_acc)
+JOIN tmp_annot.targets on(uniprot_acc=t_acc)
+WHERE in_full = 1
+ORDER BY t_acc, seq_start
+
+EOSQL
+
+my $get_pfam_refacc = $dbh->prepare(<<EOSQL);
+SELECT t_acc, seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_pfamA_reg_full, domain_evalue_score as evalue, length
+  FROM $pfamA_reg_full
+  JOIN pfamseq using(pfamseq_acc)
+  JOIN pfamA USING (pfamA_acc)
+  JOIN uniprot.refseq2up as rf2up on(rf2up.up_acc=pfamseq_acc)
+  JOIN tmp_annot.targets on(rf2up.refseq_acc=t_acc)
+ WHERE in_full = 1
+ ORDER BY t_acc, seq_start
+
+EOSQL
+
+my $get_upfam_refacc = $dbh->prepare(<<EOSQL);
+SELECT t_acc,seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_uniprot_reg_full as auto_pfamA_reg_full, domain_evalue_score as evalue, length
+  FROM uniprot
+  JOIN uniprot_reg_full using(uniprot_acc)
+  JOIN pfamA USING (pfamA_acc)
+  JOIN uniprot.refseq2up as rf2up on(rf2up.up_acc=uniprot_acc)
+  JOIN tmp_annot.targets on(rf2up.refseq_acc=t_acc)
+WHERE in_full = 1
+ORDER BY t_acc, seq_start
+
+EOSQL
+
+my $get_annots_sql = $get_pfam_acc;
+my $get_annots_sql_u = $get_upfam_acc;
+
+my $get_pfam_id = $dbh->prepare(<<EOSQL);
+SELECT t_acc, seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_pfamA_reg_full, domain_evalue_score as evalue, length
+  FROM pfamseq
+  JOIN $pfamA_reg_full using(pfamseq_acc)
+  JOIN pfamA USING (pfamA_acc)
+  JOIN tmp_annot.targets on(pfamseq_id=t_acc)
+WHERE in_full=1
+ORDER BY t_acc, seq_start
+
+EOSQL
+
+my $get_upfam_id = $dbh->prepare(<<EOSQL);
+SELECT t_acc, seq_start, seq_end, model_start, model_end, model_length, pfamA_acc, pfamA_id, auto_uniprot_reg_full as auto_pfamA_reg_full, domain_evalue_score as evalue, length
+  FROM uniprot
+  JOIN uniprot_reg_full using(pfamseq_acc)
+  JOIN pfamA USING (pfamA_acc)
+  JOIN tmp_annot.targets on(uniprot_id=t_acc)
+WHERE in_full=1
+ORDER BY t_acc, seq_start
+
+EOSQL
+
+my $get_pfam_clan = $dbh->prepare(<<EOSQL);
+
+SELECT clan_acc, clan_id
+FROM clan
+JOIN clan_membership using(clan_acc)
+WHERE pfamA_acc=?
+
+EOSQL
+
+my $get_rpd2_clans = $dbh->prepare(<<EOSQL);
+
+SELECT auto_pfamA, clan
+FROM ljm_db.RPD2_final_fams
+WHERE clan is not NULL
+
+EOSQL
+
+$dbh->do(<<EOSQL);
+create temporary table tmp_annot.targets (t_acc char(10) primary key)
+EOSQL
+
+# -- LEFT JOIN clan_membership USING (auto_pfamA)
+# -- LEFT JOIN clans using(auto_clan)
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+my @lav_list = qw(seq_start seq_end info);
+my @no_lav_list = qw(seq_start dash seq_end info);
+my $out_list_r = \@no_lav_list;
+if ($lav) {
+  $show_color = 0;
+  $out_list_r = \@lav_list;
+}
+
+# get the query
+my ($query, $seq_len) = @ARGV;
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+my %annot_set = ();
+
+my %rpd2_clan_fams = ();
+
+if ($rpd2_fams) {
+  $get_rpd2_clans->execute();
+  my ($auto_pfam, $auto_clan);
+  while (($auto_pfam, $auto_clan)=$get_rpd2_clans->fetchrow_array()) {
+    $rpd2_clan_fams{$auto_pfam} = $auto_clan;
+  }
+}
+
+#if it's a file I can open, read and parse it
+unless ($query && ($query =~ m/[\|:]/ ||
+		   $query =~ m/^[NX]P_/ ||
+		   $query =~ m/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}\s/)) {
+
+  while (my $a_line = <>) {
+    $a_line =~ s/^>//;
+    chomp $a_line;
+    push @annots, show_annots($a_line);
+  }
+}
+else {
+  push @annots, show_annots("$query\t$seq_len");
+}
+
+# @annots has a list of id's or annotations
+# write to temporary file, load data local infile, join to get results
+
+my ($fh, $temp_file) = tempfile(TEMPLATE=>'accannXXXXX');
+my @u_annots = keys %annot_set;
+print $fh join("\n", at u_annots);
+close($fh);
+
+$dbh->do("load data local infile '$temp_file' into table tmp_annot.targets");
+
+unlink($temp_file);
+
+# $get_annots_sql->execute();
+# while (my $annot_ar = $get_annots_sql->fetchrow_arrayref()) {
+#   my %annot_data = ();
+#   @annot_data{@pfam_fields} = @{$annot_ar}[1..10];
+#   if (!defined($annot_set{$annot_ar->[0]}->{list})) {
+#     $annot_set{$annot_ar->[0]}->{list} = [\%annot_data];
+#   }
+#   else {
+#     push @{$annot_set{$annot_ar->[0]}->{list}}, \%annot_data;
+#   }
+# }
+
+$get_annots_sql_u->execute();
+while (my $annot_hr = $get_annots_sql_u->fetchrow_hashref()) {
+  if (!defined($annot_set{$annot_hr->{t_acc}}->{list})) {
+    $annot_set{$annot_hr->{t_acc}}->{list} = [$annot_hr];
+  }
+  else {
+    push @{$annot_set{$annot_hr->{t_acc}}->{list}}, $annot_hr;
+  }
+}
+
+for my $u_acc (@u_annots) {
+  map_pfam_annots($annot_set{$u_acc});
+}
+
+for my $seq_annot (@annots) {
+  next unless $seq_annot;
+  my $annot_r = $annot_set{$seq_annot};
+  print ">",$annot_r->{seq_info},"\n";
+  for my $annot (@{$annot_r->{list}}) {
+    $annot->{dash} = '-';
+    if (defined($domains{$annot->{info}})) {
+      my ($a_name, $a_num) = domain_num($annot->{info},$domains{$annot->{info}});
+      $annot->{info} = $a_name;
+      my $tmp_a_num = $a_num;
+      $tmp_a_num =~ s/v$//;
+      if ($acc_comment) {
+	$annot->{info} .= "{$domain_list[$tmp_a_num]}";
+      }
+      if ($bound_comment) {
+	$annot->{info} .= $color_sep_str.$annot->{seq_start}.":".$annot->{seq_end};
+      }
+      elsif ($show_color) {
+	  $annot->{info} .= $color_sep_str.$a_num;
+      }
+    }
+    print join("\t",@{$annot}{@{$out_list_r}}),"\n";
+  }
+}
+
+exit(0);
+
+sub show_annots {
+  my ($query_len) = @_;
+
+  my ($annot_line, $seq_len) = split(/\t/,$query_len);
+
+  my $pfamA_acc;
+
+  $use_acc = 1;
+  $get_annots_sql = $get_pfam_acc;
+  $get_annots_sql_u = $get_upfam_acc;
+
+  if ($annot_line =~ m/^pf\d+\|/) {
+    ($sdb, $gi, $pfamA_acc, $acc, $id) = split(/\|/,$annot_line);
+#    $dbh->do("use RPD2_pfam");
+  }
+  elsif ($annot_line =~ m/^gi\|/) {
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+    if ($sdb =~ m/ref/) {
+	$get_annots_sql = $get_pfam_refacc;
+	$get_annots_sql_u = $get_upfam_refacc;
+    }
+  }
+  elsif ($annot_line =~ m/^(sp|tr)\|/) {
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^ref\|/) {
+    ($sdb, $acc) = split(/\|/,$annot_line);
+    $get_annots_sql = $get_pfam_refacc;
+    $get_annots_sql_u = $get_upfam_refacc;
+  }
+  elsif ($annot_line =~ m/^(SP|TR):/i) {
+    ($sdb, $id) = split(/:/,$annot_line);
+    $use_acc = 0;
+  }
+  elsif ($annot_line !~ m/\|/ && $annot_line !~ m/:/) {
+    $use_acc = 1;
+    ($acc) = split(/\s+/,$annot_line);
+  }
+  # deal with no-database SwissProt/NR
+  else {
+    ($acc)=($annot_line =~ /^(\S+)/);
+  }
+
+  # here we have an $acc or an $id: check to see if we have the data
+
+  my %annot_data = (seq_info=>$annot_line, length=>$seq_len);
+  my $annot_key = '';
+  unless ($use_acc) {
+    next if ($annot_set{$id});
+    $annot_set{$id} = \%annot_data;
+    $annot_key = $id;
+
+    $get_annots_sql = $get_pfam_id;
+  }
+  else {
+    unless ($acc) {
+      warn "missing acc in $annot_line";
+      return "";
+    }
+    else {
+      $acc =~ s/\.\d+$//;
+
+      next if ($annot_set{$acc});
+      $annot_set{$acc} = \%annot_data;
+      $annot_key = $acc;
+    }
+  }
+
+  return $annot_key;
+}
+
+sub map_pfam_annots {
+  my ($annot_ref) = @_;
+
+  my $seq_length = $annot_ref->{length};
+  my $pf_domains_r = $annot_ref->{list};
+
+  my $row_href=$annot_ref->{list}[0];
+  if ($row_href->{length} && $row_href->{length} > $seq_length && $seq_length == 0) {
+      $annot_ref->{length} = $seq_length = $row_href->{length};
+    }
+
+  # fill in {info} field
+  for my $pf_dom (@$pf_domains_r) {
+    if ($auto_reg) {
+      $pf_dom->{info} = $pf_dom->{auto_pfamA_reg_full};
+    } elsif ($pf_acc) {
+      $pf_dom->{info} = $pf_dom->{pfamA_acc};
+    } else {
+      $pf_dom->{info} = $pf_dom->{pfamA_id};
+    }
+  }
+
+  # before checking for domain overlap, check for "split-domains"
+  # (self-unbound) by looking for runs of the same domain that are
+  # ordered by model_start
+
+  if (scalar(@{$pf_domains_r}) > 1) {
+    my @j_domains;		#joined domains
+    my @tmp_domains = @{$pf_domains_r};
+
+    my $prev_dom = shift(@tmp_domains);
+
+    for my $curr_dom (@tmp_domains) {
+      # to join domains:
+      # (1) the domains must be in order by model_start/end coordinates
+      # (3) joining the domains cannot make the total combination too long
+
+      # check for model and sequence consistency
+      if (($prev_dom->{pfamA_acc} eq $curr_dom->{pfamA_acc})  # same family
+	  && $prev_dom->{model_start} < $curr_dom->{model_start}  # model check
+	  && $prev_dom->{model_end} < $curr_dom->{model_end}
+
+	  && ($curr_dom->{model_start} > $prev_dom->{model_end} * 0.80   # limit overlap
+	      || $curr_dom->{model_start} <  $prev_dom->{model_end} * 1.25)
+	  && ((($curr_dom->{model_end} - $curr_dom->{model_start}+1)/$curr_dom->{model_length} +
+	       ($prev_dom->{model_end} - $prev_dom->{model_start}+1)/$prev_dom->{model_length}) < 1.33)
+	 ) {			# join them by updating $prev_dom
+	$prev_dom->{seq_end} = $curr_dom->{seq_end};
+	$prev_dom->{model_end} = $curr_dom->{model_end};
+	$prev_dom->{auto_pfamA_reg_full} = $prev_dom->{auto_pfamA_reg_full} . ";". $curr_dom->{auto_pfamA_reg_full};
+	$prev_dom->{evalue} = ($prev_dom->{evalue} < $curr_dom->{evalue} ? $prev_dom->{evalue} : $curr_dom->{evalue});
+      } else {
+	push @j_domains, $prev_dom;
+	$prev_dom = $curr_dom;
+      }
+    }
+    push @j_domains, $prev_dom;
+    @{$pf_domains_r} = @j_domains;
+
+
+    if ($no_over) {	# for either $no_over or $split_over, check for overlapping domains and edit/split them
+
+      my @tmp_domains = @{$pf_domains_r};	# allow shifts from copy of @pf_domains
+      my @save_domains = ();		# where the new domains go
+
+      my $prev_dom = shift @tmp_domains;
+
+      while (my $curr_dom = shift @tmp_domains) {
+
+	my @overlap_domains = ($prev_dom);
+
+	my $diff = $prev_dom->{seq_end} - $curr_dom->{seq_start};
+
+	my ($prev_len, $cur_len) = ($prev_dom->{seq_end}-$prev_dom->{seq_start}+1,
+				    $curr_dom->{seq_end}-$curr_dom->{seq_start}+1);
+
+	my $inclusion = ((($curr_dom->{seq_start} >= $prev_dom->{seq_start})    # start is right && end is left 
+			  && ($curr_dom->{seq_end} <= $prev_dom->{seq_end})) || # -- curr inside prev
+			 (($curr_dom->{seq_start} <= $prev_dom->{seq_start})    # start is left && end is right
+			  && ($curr_dom->{seq_end} >= $prev_dom->{seq_end})));  # -- prev is inside curr
+
+	my $longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+
+	# check for overlap > domain_length/$over_fract
+	while ($inclusion || ($diff > 0 && $diff > $longer_len/$over_fract)) {
+	  push @overlap_domains, $curr_dom;
+	  $curr_dom = shift @tmp_domains;
+	  last unless $curr_dom;
+	  $diff = $prev_dom->{seq_end} - $curr_dom->{seq_start};
+	  ($prev_len, $cur_len) = ($prev_dom->{seq_end}-$prev_dom->{seq_start}+1, $curr_dom->{seq_end}-$curr_dom->{seq_start}+1);
+	  $longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+	  $inclusion = ((($curr_dom->{seq_start} >= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} <= $prev_dom->{seq_end})) ||
+			(($curr_dom->{seq_start} <= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} >= $prev_dom->{seq_end})));
+	}
+
+	# check for overlapping domains; >1 because $prev_dom is always there
+	if (scalar(@overlap_domains) > 1 ) {
+	  # if $rpd2_fams, check for a chosen one
+
+	  for my $dom ( @overlap_domains) {
+	    $dom->{evalue} = 1.0 unless defined($dom->{evalue});
+	  }
+
+	  @overlap_domains = sort { $a->{evalue} <=> $b->{evalue} } @overlap_domains;
+	  $prev_dom = $overlap_domains[0];
+	}
+
+	# $prev_dom should be the best of the overlaps, and we are no longer overlapping > dom_length/3
+	push @save_domains, $prev_dom;
+	$prev_dom = $curr_dom;
+      }
+
+      if ($prev_dom) {
+	push @save_domains, $prev_dom;
+      }
+
+      @{$pf_domains_r} = @save_domains;
+
+      # now check for smaller overlaps
+      for (my $i=1; $i < scalar(@{$pf_domains_r}); $i++) {
+	if ($pf_domains_r->[$i-1]->{seq_end} >= $pf_domains_r->[$i]->{seq_start}) {
+	  my $overlap = $pf_domains_r->[$i-1]->{seq_end} - $pf_domains_r->[$i]->{seq_start};
+	  $pf_domains_r->[$i-1]->{seq_end} -= int($overlap/2);
+	  $pf_domains_r->[$i]->{seq_start} = $pf_domains_r->[$i-1]->{seq_end}+1;
+	}
+      }
+    }
+    elsif ($split_over) {   # here, everything that overlaps by > $min_vdom should be split into a separate domain
+      my @save_domains = ();		# where the new domains go
+
+      # check to see if one domain is included (or overlapping) more
+      # than xx% of the other.  If so, pick the longer one
+
+      my ($prev_dom, $curr_dom) = ($pf_domains_r->[0],0) ;
+      for (my $i=1; $i < scalar(@{$pf_domains_r}); $i++) {
+	$curr_dom = $pf_domains_r->[$i];
+
+	my ($prev_len, $cur_len) = ($prev_dom->{seq_end}-$prev_dom->{seq_start}+1, $curr_dom->{seq_end}-$curr_dom->{seq_start}+1);
+	my $longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+
+	if (($curr_dom->{seq_start} >= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} <= $prev_dom->{seq_end})
+	   && $cur_len / $prev_len > 0.80) {
+	  # $prev_dom stays the same, $curr_dom deleted
+	  next;
+	}
+	elsif (($curr_dom->{seq_start} <= $prev_dom->{seq_start}) && ($curr_dom->{seq_end} >= $prev_dom->{seq_end})
+	      && $prev_len / $cur_len > 0.80) {
+	  $prev_dom = $curr_dom; # this should delete $prev_dom
+	  next;
+	}
+
+	if ($prev_dom->{seq_end} >= $curr_dom->{seq_start} + $min_vdom) {
+	  my ($l_seq_end, $r_seq_start) = ($curr_dom->{seq_start}-1, $prev_dom->{seq_end}+1);
+
+	  $prev_dom->{seq_end} = $l_seq_end;
+	  push @save_domains, $prev_dom;
+	  my $new_dom = {seq_start => $l_seq_end+1, seq_end=>$r_seq_start-1, 
+			 model_length => -1,
+			 pfamA_acc=>$prev_dom->{pfamA_acc}."/".$curr_dom->{pfamA_acc},
+			 pfamA_id=>$prev_dom->{pfamA_id}."/".$curr_dom->{pfamA_id},
+			 };
+
+	  if ($pf_acc) {
+	    $new_dom->{info} = $new_dom->{pfamA_acc};
+	  }
+	  else {
+	    $new_dom->{info} = $new_dom->{pfamA_id};
+	  }
+
+	  push @save_domains, $new_dom;
+	  $curr_dom->{seq_start} = $r_seq_start;
+	  $prev_dom = $curr_dom;
+	}
+	else {
+	  push @save_domains, $prev_dom;
+	  $prev_dom = $curr_dom;
+	}
+      }
+      push @save_domains, $prev_dom;
+      @{$pf_domains_r} = @save_domains;
+    }
+  }
+
+  # $vdoms -- virtual Pfam domains -- the equivalent of $neg_doms,
+  # but covering parts of a Pfam model that are not annotated.  split
+  # domains have been joined, so simply check beginning and end of
+  # each domain (but must also check for bounded-ness)
+  # only add when 10% or more is missing and missing length > $min_nodom
+
+  if ($vdoms && scalar(@{$pf_domains_r})) {
+    my @vpf_domains;
+
+    my $curr_dom = $pf_domains_r->[0];
+    my $length = $curr_dom->{length};
+
+    my $prev_dom={seq_end=>0, pfamA_acc=>''};
+    my $prev_dom_end = 0;
+    my $next_dom_start = $length+1;
+
+    for (my $dom_ix=0; $dom_ix < scalar(@{$pf_domains_r}); $dom_ix++ ) {
+      $curr_dom = $pf_domains_r->[$dom_ix];
+
+      my $pfamA =  $curr_dom->{pfamA_acc};
+
+      # first, look left, is there a domain there (if there is,
+      # it should be updated right
+
+      # my $min_vdom = $curr_dom->{model_length} / 10;
+
+      if ($curr_dom->{model_length} < $min_vdom) {
+	push @vpf_domains, $curr_dom;
+	next;
+      }
+      if ($prev_dom->{pfamA_acc}) { # look for previous domain
+	$prev_dom_end = $prev_dom->{seq_end};
+      }
+
+      # there is a domain to the left, how much room is available?
+      my $left_dom_len = min($curr_dom->{seq_start}-$prev_dom_end-1, $curr_dom->{model_start}-1);
+      if ( $left_dom_len > $min_vdom) {
+	# there is room for a virtual domain
+	my %new_dom = (seq_start=> $curr_dom->{seq_start}-$left_dom_len,
+	               seq_end => $curr_dom->{seq_start}-1,
+		       info=>'@'.$curr_dom->{info},
+		       model_length=>$curr_dom->{model_length},
+		       model_end => $curr_dom->{model_start}-1,
+		       model_start => $left_dom_len,
+		       pfamA_acc=>$pfamA,
+		      );
+	push @vpf_domains, \%new_dom;
+      }
+
+      # save the current domain
+      push @vpf_domains, $curr_dom;
+      $prev_dom = $curr_dom;
+
+      if ($dom_ix < scalar(@$pf_domains_r)-1) { # there is a domain to the right
+	# first, give all the extra space to the first domain (no splitting)
+	$next_dom_start = $pf_domains_r->[$dom_ix+1]->{seq_start};
+      }
+      else {
+	$next_dom_start = $length;
+      }
+
+      # is there room for a virtual domain right
+	  
+      my $right_dom_len = min($next_dom_start-$curr_dom->{seq_end}-1, # space available 
+			      $curr_dom->{model_length}-$curr_dom->{model_end} # space needed
+			     );
+      if ( $right_dom_len > $min_vdom) {
+	my %new_dom = (seq_start=> $curr_dom->{seq_end}+1,
+		       seq_end=> $curr_dom->{seq_end}+$right_dom_len,
+		       info=>'@'.$curr_dom->{info},
+		       model_length => $curr_dom->{model_length},
+		       pfamA_acc=> $pfamA,
+		      );
+	push @vpf_domains, \%new_dom;
+	$prev_dom = \%new_dom;
+      }
+    }				# all done, check for last one
+
+    # $curr_dom=$pf_domains_r->[-1];
+    # # my $min_vdom = $curr_dom->{model_length}/10;
+
+    # my $right_dom_len = min($length - $curr_dom->{seq_end}+1,  # space available 
+    # 			    $curr_dom->{model_length}-$curr_dom->{model_end} # space needed
+    # 			   );
+    # if ($right_dom_len > $min_vdom) {
+    #   my %new_dom = (seq_start=> $curr_dom->{seq_end}+1,
+    # 		     seq_end => $curr_dom->{seq_end}+$right_dom_len,
+    # 		     info=>'@'.$curr_dom->{pfamA_acc},
+    # 		     model_len=> $curr_dom->{model_len},
+    # 		     pfamA_acc => $curr_dom->{pfamA_acc},
+    # 		     model_start => $curr_dom->{model_end}+1,
+    # 		     model_end => $curr_dom->{model_len},
+    # 		     );
+
+    #   push @vpf_domains, \%new_dom;
+    # }
+
+    # @vpf_domains has both old @{$pf_domains_r} and new neg-domains
+    @{$pf_domains_r} = @vpf_domains;
+  }
+
+  if ($neg_doms) {
+    my @npf_domains;
+    my $prev_dom={seq_end=>0};
+    for my $curr_dom ( @{$pf_domains_r}) {
+      if ($curr_dom->{seq_start} - $prev_dom->{seq_end} > $min_nodom) {
+	my %new_dom = (seq_start=>$prev_dom->{seq_end}+1, seq_end => $curr_dom->{seq_start}-1, info=>'NODOM');
+	push @npf_domains, \%new_dom;
+      }
+      push @npf_domains, $curr_dom;
+      $prev_dom = $curr_dom;
+    }
+    if ($seq_length - $prev_dom->{seq_end} > $min_nodom) {
+      my %new_dom = (seq_start=>$prev_dom->{seq_end}+1, seq_end=>$seq_length, info=>'NODOM');
+      if ($new_dom{seq_end} > $new_dom{seq_start}) {
+	push @npf_domains, \%new_dom;
+      }
+    }
+
+    # @npf_domains has both old @pf_domains and new neg-domains
+    @{$pf_domains_r} = @npf_domains;
+  }
+
+  # now make sure we have useful names: colors
+
+  for my $pf (@{$pf_domains_r}) {
+    $pf->{info} = domain_name($pf->{info}, $pf->{pfamA_acc});
+  }
+}
+
+sub min {
+  my ($arg1, $arg2) = @_;
+
+  return ($arg1 <= $arg2 ? $arg1 : $arg2);
+}
+
+sub max {
+  my ($arg1, $arg2) = @_;
+
+  return ($arg1 >= $arg2 ? $arg1 : $arg2);
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+sub domain_name {
+
+  my ($value, $pfamA_acc) = @_;
+  my $is_virtual = 0;
+
+  if ($value =~ m/^@/) {
+    $is_virtual = 1;
+    $value =~ s/^@//;
+  }
+
+  # check for clan:
+  if ($no_clans) {
+    if (! defined($domains{$value})) {
+      $domain_clan{$value} = 0;
+      $domains{$value} = ++$domain_cnt;
+      push @domain_list, $pfamA_acc;
+    }
+  }
+  elsif (!defined($domain_clan{$value})) {
+    ## only do this for new domains, old domains have known mappings
+
+    ## ways to highlight the same domain:
+    # (1) for clans, substitute clan name for family name
+    # (2) for clans, use the same color for the same clan, but don't change the name
+    # (3) for clans, combine family name with clan name, but use colors based on clan
+
+    # check to see if it's a clan
+    $get_pfam_clan->execute($pfamA_acc);
+
+    my $pfam_clan_href=0;
+
+    if ($pfam_clan_href=$get_pfam_clan->fetchrow_hashref()) {  # is a clan
+      my ($clan_id, $clan_acc) = @{$pfam_clan_href}{qw(clan_id clan_acc)};
+
+      # now check to see if we have seen this clan before (if so, do not increment $domain_cnt)
+      my $c_value = "C." . $clan_id;
+      if ($pf_acc) {$c_value = $clan_acc;}
+
+      $domain_clan{$value} = {clan_id => $clan_id,
+			      clan_acc => $clan_acc};
+
+      if ($domains{$c_value}) {
+	$domain_clan{$value}->{domain_cnt} =  $domains{$c_value};
+	$value = $c_value;
+      }
+      else {
+	$domain_clan{$value}->{domain_cnt} = ++ $domain_cnt;
+	$value = $c_value;
+	$domains{$value} = $domain_cnt;
+	push @domain_list, $pfamA_acc;
+      }
+    }
+    else {			# not a clan
+      $domain_clan{$value} = 0;
+      $domains{$value} = ++$domain_cnt;
+      push @domain_list, $pfamA_acc;
+    }
+  }
+  elsif ($domain_clan{$value} && $domain_clan{$value}->{clan_acc}) {
+    if ($pf_acc) {$value = $domain_clan{$value}->{clan_acc};}
+    else { $value = "C." . $domain_clan{$value}->{clan_id}; }
+  }
+
+  if ($is_virtual) {
+    $domains{'@'.$value} = $domains{$value};
+    $value = '@'.$value;
+  }
+  return $value;
+}
+
+sub domain_num {
+  my ($value, $number) = @_;
+  if ($value =~ m/^@/) {
+    $value =~ s/^@/v/;
+    $number = $number."v";
+  }
+  return ($value, $number);
+}
+
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_pfam30_tmptbl.pl
+
+=head1 SYNOPSIS
+
+ ann_pfam30_tmptbl.pl --neg-doms  --vdoms 'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+ --no-over  : generate non-overlapping domains (equivalent to ann_pfam.pl)
+ --split-over  : overlaps of two domains generate a new hybrid domain
+ --no-clans : do not use clans with multiple families from same clan
+ --neg-doms : report domains between annotated domains as NODOM
+                 (also --neg, --neg_doms)
+ --vdoms : produce "virtual domains" using model_start, 
+           model_end for partial pfam domains
+ --min_nodom=10  : minimum length between domains for NODOM
+
+ --host, --user, --password, --port --db : info for mysql database
+
+=head1 DESCRIPTION
+
+C<ann_pfam30_tmptbl.pl> extracts domain information from the pfam msyql
+database. Currently, the program works with database
+sequence descriptions in several formats:
+
+ >gi|1705556|sp|P54670.1|CAF1_DICDI
+ >sp|P09488|GSTM1_HUMAN
+ >sp:CALM_HUMAN 
+
+C<ann_pfam30_tmptbl.pl> uses the C<pfamA_reg_full_significant>, C<pfamseq>,
+and C<pfamA> tables of the C<pfam> database to extract domain
+information on a protein. 
+
+If the C<--no-over> option is set, overlapping domains are selected and
+edited to remove overlaps.  For proteins with multiple overlapping
+domains (domains overlap by more than 1/3 of the domain length),
+C<auto_pfam28.pl> selects the domain annotation with the best
+C<domain_evalue_score>.  When domains overlap by less than 1/3 of the
+domain length, they are shortened to remove the overlap.
+
+If the C<--split-over> option is set, if two domains overlap, the
+overlapping region is split out of the domains and labeled as a new,
+virtual-lie, domain.  If one domain is internal to another and spans
+80% of the domain, the shorter domain is removed.
+
+C<ann_pfam30_tmptbl.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_pfam30_tmptbl.pl> or C<-V "\!ann_pfam30_tmptbl.pl --neg"> option.
+
+C<ann_pfam30_tmptbl.pl> requires an additional database, C<tmp_annot>,
+with C<create temporary tables>, C<insert>, and C<select> privileges
+for the default user.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_pfam_www.pl b/scripts/ann_pfam_www.pl
new file mode 100755
index 0000000..897cff9
--- /dev/null
+++ b/scripts/ann_pfam_www.pl
@@ -0,0 +1,687 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014, 2015 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# ann_pfam_www_e.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# gi|62822551|sp|P00502|GSTA1_RAT Glutathione S-transfer\n  (at least from pir1.lseg)
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the up_acc
+# (3) return the tab delimited features
+#
+
+# This version uses the Pfam RESTful interface, rather than a local database
+# >pf26|164|O57809|1A1D_PYRHO
+# and only provides domain information
+
+# use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+use LWP::Simple;
+use XML::Twig;
+# use Data::Dumper;
+
+my ($auto_reg,$rpd2_fams, $neg_doms, $vdoms, $lav, $no_clans, $pf_acc_flag, $shelp, $help, $no_over, $acc_comment, $bound_comment, $pfamB) =
+  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+my ($show_color) = (1);
+my ($min_nodom, $min_vdom) = (10, 10);
+
+my $color_sep_str = " :";
+$color_sep_str = '~';
+
+GetOptions(
+    "lav" => \$lav,
+    "acc_comment" => \$acc_comment,
+    "bound_comment" => \$bound_comment,
+    "min_nodom=i" => \$min_nodom,
+    "neg" => \$neg_doms,
+    "neg_doms" => \$neg_doms,
+    "neg-doms" => \$neg_doms,
+    "no-over" => \$no_over,
+    "no_over" => \$no_over,
+    "no-clans" => \$no_clans,
+    "no_clans" => \$no_clans,
+    "color!" => \$show_color,
+    "pfamB" => \$pfamB,
+    "vdoms" => \$vdoms,
+    "v_doms" => \$vdoms,
+    "pfacc" => \$pf_acc_flag,
+    "pfam_acc" => \$pf_acc_flag,
+    "acc" => \$pf_acc_flag,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (@ARGV || -p STDIN || -f STDIN);
+
+my %annot_types = ();
+my %domains = (NODOM=>0);
+my %domain_clan = (NODOM => {clan_id => 'NODOM', clan_acc=>0, domain_cnt=>0});
+my @domain_list = (0);
+my $domain_cnt = 0;
+
+my $loc="http://pfam.xfam.org/";
+my $url;
+
+my @pf_domains;
+my %pfamA_fams = ();
+my ($pf_seq_length, $pf_model_length)=(0,0);
+my ($clan_acc, $clan_id) = ("","");
+
+my $get_annot_sub = \&get_pfam_www;
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+# get the query
+my ($query, $seq_len) = @ARGV;
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+
+#if it's a file I can open, read and parse it
+unless ($query && ($query =~ m/[\|:]/ ||
+		   $query =~ m/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}\s/)) {
+
+  while (my $a_line = <>) {
+    $a_line =~ s/^>//;
+    chomp $a_line;
+    push @annots, show_annots($a_line, $get_annot_sub);
+  }
+}
+else {
+  push @annots, show_annots("$query\t$seq_len", $get_annot_sub);
+}
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    if (!$lav && defined($domains{$annot->[-1]})) {
+      my ($a_name, $a_num) = domain_num($annot->[-1],$domains{$annot->[-1]});
+      $annot->[-1] = $a_name;
+      my $tmp_a_num = $a_num;
+      $tmp_a_num =~ s/v$//;
+      if ($acc_comment) {
+	$annot->[-1] .= "{$domain_list[$tmp_a_num]}";
+      }
+      if ($bound_comment) {
+	$annot->[-1] .= $color_sep_str.$annot->[0].":".$annot->[2];
+      }
+      elsif ($show_color) {
+	$annot->[-1] .= $color_sep_str.$a_num;
+      }
+    }
+    print join("\t",@$annot),"\n";
+  }
+}
+
+exit(0);
+
+sub show_annots {
+  my ($query_len, $get_annot_sub) = @_;
+
+  my ($annot_line, $seq_len) = split(/\t/,$query_len);
+
+  my $pfamA_acc;
+
+  my %annot_data = (seq_info=>$annot_line);
+
+  $use_acc = 1;
+  if ($annot_line =~ m/^pf26\|/) {
+    ($sdb, $gi, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^gi\|/) {
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  elsif ($annot_line =~ m/^(sp|tr|up)\|/) {
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+    $use_acc = 1;
+  }
+  elsif ($annot_line =~ m/^(SP|TR):(\w+) (\w+)/) {
+    ($sdb, $id, $acc) = (lc($1), $2, $3);
+    $use_acc = 1;
+  }
+  elsif ($annot_line =~ m/^(SP|TR):(\w+)/) {
+    ($sdb, $id, $acc) = (lc($1), $2, "");
+    $use_acc = 0;
+  }
+  elsif ($annot_line !~ m/\|/ && $annot_line !~ m/:/) {
+    $use_acc = 1;
+    ($acc) = split(/\s+/,$annot_line);
+  }
+
+  # remove version number
+  unless ($use_acc) {
+    $annot_data{list} = get_pfam_www($id, $seq_len);
+  }
+  else {
+    $acc =~ s/\.\d+$//;
+    $annot_data{list} = get_pfam_www($acc, $seq_len);
+  }
+
+  return \%annot_data;
+}
+
+sub get_length {
+    my ($t, $elt) = @_;
+    $pf_seq_length = $elt->{att}->{length};
+}
+
+sub push_match {
+    my ($t, $elt) = @_;
+#    return unless ($elt->{att}->{type} =~ m/Pfam-A/);
+    my $attr_ref = $elt->{att};
+    my $loc_ref = $elt->first_child('location')->{att};
+    push @pf_domains, { %$attr_ref, %$loc_ref };
+}
+
+sub get_model_length {
+    my ($t, $elt) = @_;
+    $pf_model_length = $elt->{att}->{model_length};
+}
+
+sub get_clan {
+    my ($t, $elt) = @_;
+    my $attr_ref = $elt->{att};
+#    print Dumper($attr_ref);
+    ($clan_acc, $clan_id) = ($attr_ref->{clan_acc},$attr_ref->{clan_id});
+}
+
+sub get_pfam_www {
+  my ($acc, $seq_length) = @_;
+
+#  if ($acc =~ m/_/) {$url = "protein?id=$acc&output=xml"; }
+#  else {$url = "protein/$acc?output=xml"; }
+
+  $url = "protein/$acc?output=xml";
+
+  my $res = get($loc . $url);
+
+  @pf_domains = ();
+
+  my $twig_dom = XML::Twig->new(twig_roots => {matches => 1, sequence => 1},
+#			    start_tag_handlers => {
+#						   'sequence' => \&get_length,
+#						  },
+			    twig_handlers => {
+					      'match' => \&push_match,
+					      'sequence' => \&get_length,
+					     },
+			    pretty_print => 'indented');
+  my $xml = $twig_dom->parse($res);
+
+  if (!$seq_length || $seq_length == 0) {
+      $seq_length = $pf_seq_length;
+  }
+
+  @pf_domains = sort { $a->{start} <=> $b->{start} } @pf_domains;
+
+  unless ($pfamB) {
+    @pf_domains = grep { $_->{type} !~ m/Pfam-B/ } @pf_domains;
+  }
+
+  # for virtual domains, also need information about the families
+  for my $curr_dom (@pf_domains) {
+      
+      my $acc = $curr_dom->{accession};
+      $url = "family/$acc?output=xml";
+
+      my $res = get($loc . $url);
+
+      my $twig_fam = XML::Twig->new(twig_roots => {hmm_details => 1, clan_membership=> 1},
+				    twig_handlers => {
+					'hmm_details' => \&get_model_length,
+					'clan_membership' => \&get_clan,
+				    },
+				    pretty_print => 'indented');
+
+      ($clan_acc, $clan_id) = ("","");
+      my $fam_xml = $twig_fam->parse($res);
+
+      $pfamA_fams{$acc} = { model_length => $pf_model_length, clan_acc=>$clan_acc, clan_id=>$clan_id};
+      $curr_dom->{model_length} = $pf_model_length;
+  }
+
+  # check for domain overlap, and resolve check for domain overlap
+  # (possibly more than 2 domains), choosing the domain with the best
+  # evalue
+
+  my @raw_pf_domains = @pf_domains;
+  @pf_domains = ();
+
+  for my $dom_ref (@raw_pf_domains) {
+    if ($pf_acc_flag) {
+      $dom_ref->{info} = $dom_ref->{accession};
+    }
+    else {
+      $dom_ref->{info} = $dom_ref->{id};
+    }
+    next if ($dom_ref->{start} >= $seq_length);
+    if ($dom_ref->{end} >= $seq_length) {
+	$dom_ref->{end} = $seq_length;
+    }
+    push @pf_domains, $dom_ref;
+  }
+
+  if($no_over && scalar(@pf_domains) > 1) {
+
+    my @tmp_domains = @pf_domains;
+    my @save_domains = ();
+
+    my $prev_dom = shift @tmp_domains;
+
+    while (my $curr_dom = shift @tmp_domains) {
+
+      my @overlap_domains = ($prev_dom);
+
+      my $diff = $prev_dom->{end} - $curr_dom->{start};
+      # check for overlap > domain_length/3
+
+      my ($prev_len, $cur_len) = ($prev_dom->{end}-$prev_dom->{start}+1, $curr_dom->{end}-$curr_dom->{start}+1);
+      my $inclusion = ((($curr_dom->{start} >= $prev_dom->{start}) && ($curr_dom->{end} <= $prev_dom->{end})) ||
+		       (($curr_dom->{start} <= $prev_dom->{start}) && ($curr_dom->{end} >= $prev_dom->{end})));
+
+      my $longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+
+      while ($inclusion || ($diff > 0 && $diff > $longer_len/3)) {
+	push @overlap_domains, $curr_dom;
+	$curr_dom = shift @tmp_domains;
+	last unless $curr_dom;
+	$diff = $prev_dom->{end} - $curr_dom->{start};
+	($prev_len, $cur_len) = ($prev_dom->{end}-$prev_dom->{start}+1, $curr_dom->{end}-$curr_dom->{start}+1);
+	$longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+	$inclusion = ((($curr_dom->{start} >= $prev_dom->{start}) && ($curr_dom->{end} <= $prev_dom->{end})) ||
+		      (($curr_dom->{start} <= $prev_dom->{start}) && ($curr_dom->{end} >= $prev_dom->{end})));
+      }
+
+      # check for overlapping domains; >1 because $prev_dom is always there
+      if (scalar(@overlap_domains) > 1 ) {
+	# if $rpd2_fams, check for a chosen one
+
+	for my $dom ( @overlap_domains) {
+	  $dom->{evalue} = 1.0 unless defined($dom->{evalue});
+	}
+
+	@overlap_domains = sort { $a->{evalue} <=> $b->{evalue} } @overlap_domains;
+	$prev_dom = $overlap_domains[0];
+      }
+
+      # $prev_dom should be the best of the overlaps, and we are no longer overlapping > dom_length/3
+      push @save_domains, $prev_dom;
+      $prev_dom = $curr_dom;
+    }
+    if ($prev_dom) {push @save_domains, $prev_dom;}
+
+    @pf_domains = @save_domains;
+
+    # now check for smaller overlaps
+    for (my $i=1; $i < scalar(@pf_domains); $i++) {
+      if ($pf_domains[$i-1]->{end} >= $pf_domains[$i]->{start}) {
+	my $overlap = $pf_domains[$i-1]->{end} - $pf_domains[$i]->{start};
+	$pf_domains[$i-1]->{end} -= int($overlap/2);
+	$pf_domains[$i]->{start} = $pf_domains[$i-1]->{end}+1;
+      }
+    }
+  }
+
+  # before checking for domain overlap, check for "split-domains"
+  # (self-unbound) by looking for runs of the same domain that are
+  # ordered by model_start
+
+  if (scalar(@pf_domains) > 1) {
+    my @j_domains;		#joined domains
+    my @tmp_domains = @pf_domains;
+
+    my $prev_dom = shift(@tmp_domains);
+
+    for my $curr_dom (@tmp_domains) {
+      # to join domains:
+      # (1) the domains must be in order by model_start/end coordinates
+      # (3) joining the domains cannot make the total combination too long
+
+      # check for model and sequence consistency
+      if ($prev_dom->{accession} eq $curr_dom->{accession}) { # same family
+
+	my $prev_dom_len = $prev_dom->{hmm_end}-$prev_dom->{hmm_start}+1;
+	my $curr_dom_len = $curr_dom->{hmm_end}-$curr_dom->{hmm_start}+1;
+	my $prev_dom_fn = $prev_dom_len/$curr_dom->{model_length};
+	my $curr_dom_fn = $curr_dom_len/$curr_dom->{model_length};
+	my $missing_dom_fn = max(0,($curr_dom->{hmm_start} - $prev_dom->{hmm_end})/$curr_dom->{model_length});
+
+	if ( $prev_dom->{hmm_start} < $curr_dom->{hmm_start} # model check
+	     && $prev_dom->{hmm_end} < $curr_dom->{hmm_end}
+	     && ($curr_dom->{hmm_start} > $prev_dom->{hmm_end} * 0.80 # limit overlap
+		 || $curr_dom->{hmm_start} <  $prev_dom->{hmm_end} * 1.25)
+	     && $prev_dom_fn + $curr_dom_fn < 1.33
+	     && $missing_dom_fn < min($prev_dom_fn,$curr_dom_fn)) { # join them by updating $prev_dom
+	  $prev_dom->{end} = $curr_dom->{end};
+	  $prev_dom->{hmm_end} = $curr_dom->{hmm_end};
+	  $prev_dom->{evalue} = ($prev_dom->{evalue} < $curr_dom->{evalue} ? $prev_dom->{evalue} : $curr_dom->{evalue});
+	  next;
+	}
+
+	push @j_domains, $prev_dom;
+	$prev_dom = $curr_dom;
+      }
+      else {
+	  push @j_domains, $prev_dom;
+	  $prev_dom = $curr_dom;
+      }
+    }
+    push @j_domains, $prev_dom;
+    @pf_domains = @j_domains;
+  }
+
+  # $vdoms -- virtual Pfam domains -- the equivalent of $neg_doms,
+  # but covering parts of a Pfam model that are not annotated.  split
+  # domains have been joined, so simply check beginning and end of
+  # each domain (but must also check for bounded-ness)
+  # only add when 10% or more is missing and missing length > $min_nodom
+
+  if ($vdoms && scalar(@pf_domains)) {
+      my @vpf_domains;
+
+      my $curr_dom = $pf_domains[0];
+      my $length = $curr_dom->{model_length};
+
+      my $prev_dom={end=>0, accession=>''};
+      my $prev_dom_end = 0;
+      my $next_dom_start = $length+1;
+
+      for (my $dom_ix=0; $dom_ix < scalar(@pf_domains); $dom_ix++ ) {
+	  $curr_dom = $pf_domains[$dom_ix];
+
+	  my $pfamA =  $curr_dom->{accession};
+
+	  # first, look left, is there a domain there (if there is,
+	  # it should be updated right
+
+	  # my $min_vdom = $curr_dom->{model_length} / 10;
+
+	  if ($prev_dom->{accession}) { # look for previous domain
+	      $prev_dom_end = $prev_dom->{end};
+	  }
+
+	  # there is a domain to the left, how much room is available?
+	  my $left_dom_len = min($curr_dom->{start}-$prev_dom_end-1, $curr_dom->{hmm_start}-1);
+	  if ( $left_dom_len > $min_vdom) {
+	      # there is room for a virtual domain
+	      my %new_dom = (start=> $curr_dom->{start}-$left_dom_len,
+			     end => $curr_dom->{start}-1,
+			     info=>'@'.$curr_dom->{accession},
+			     model_length=> $curr_dom->{model_length},
+			     hmm_end => $curr_dom->{hmm_start}-1,
+			     hmm_start => $left_dom_len,
+			     accession=>$pfamA,
+		  );
+	      push @vpf_domains, \%new_dom;
+	  }
+
+	  # save the current domain
+	  push @vpf_domains, $curr_dom;
+	  $prev_dom = $curr_dom;
+
+	  if ($dom_ix < $#pf_domains) { # there is a domain to the right
+	      # first, give all the extra space to the first domain (no splitting)
+	      $next_dom_start = $pf_domains[$dom_ix+1]->{start};
+	  }
+	  else {
+	      $next_dom_start = $seq_length;
+	  }
+
+	  # is there room for a virtual domain right
+
+	  my $right_dom_len = min($next_dom_start-$curr_dom->{end}-1, # space available 
+				  $curr_dom->{model_length}-$curr_dom->{hmm_end} # space needed
+	      );
+	  if ( $right_dom_len > $min_vdom) {
+	      my %new_dom = (start=> $curr_dom->{end}+1,
+			     end=> $curr_dom->{end}+$right_dom_len,
+			     info=>'@'.$pfamA,
+			     model_length => $curr_dom->{model_length},
+			     accession=> $pfamA,
+		  );
+	      push @vpf_domains, \%new_dom;
+	      $prev_dom = \%new_dom;
+	  }
+      }				# all done, check for last one
+    # @vpf_domains has both old @pf_domains and new virtural-domains
+    @pf_domains = @vpf_domains;
+  }
+
+  if ($neg_doms) {
+    my @npf_domains;
+    my $prev_dom={end=>0};
+    for my $curr_dom ( @pf_domains) {
+      if ($curr_dom->{start} - $prev_dom->{end} > $min_nodom) {
+	my %new_dom = (start=>$prev_dom->{end}+1, end => $curr_dom->{start}-1, info=>'NODOM');
+	push @npf_domains, \%new_dom;
+      }
+      push @npf_domains, $curr_dom;
+      $prev_dom = $curr_dom;
+    }
+    if ($seq_length - $prev_dom->{end} > $min_nodom) {
+      my %new_dom = (start=>$prev_dom->{end}+1, end=>$seq_length, info=>'NODOM');
+      if ($new_dom{end} > $new_dom{start}) {push @npf_domains, \%new_dom;}
+    }
+
+    # @npf_domains has both old @pf_domains and new neg-domains
+    @pf_domains = @npf_domains;
+  }
+
+  # now make sure we have useful names: colors
+
+  for my $pf (@pf_domains) {
+    $pf->{info} = domain_name($pf->{info}, $acc, $pf->{accession});
+  }
+
+  my @feats = ();
+  for my $d_ref (@pf_domains) {
+    if ($lav) {
+      push @feats, [$d_ref->{start}, $d_ref->{end}, $d_ref->{info}];
+    }
+    else {
+      push @feats, [$d_ref->{start}, '-', $d_ref->{end},  $d_ref->{info} ];
+#      push @feats, [$d_ref->{end}, ']', '-', ""];
+    }
+  }
+
+  return \@feats;
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+# in addition, domain_name() looks up each domain name to see if it
+# has a clan, and, if different domains share the same clan, they get
+# the same colors.
+
+sub domain_name {
+
+  my ($value, $seq_id, $pf_acc) = @_;
+  my $is_virtual = 0;
+
+  if ($value =~ m/^@/) {
+    $is_virtual = 1;
+    $value =~ s/^@//;
+  }
+
+  unless (defined($value)) {
+    warn "missing domain name for $seq_id";
+    return "";
+  }
+
+  if ($no_clans) {
+    if (! defined($domains{$value})) {
+      $domain_clan{$value} = 0;
+      $domains{$value} = ++$domain_cnt;
+      push @domain_list, $pf_acc;
+    }
+  }
+  elsif (!defined($domain_clan{$value})) {
+    ## only do this for new domains, old domains have known mappings
+
+    ## ways to highlight the same domain:
+    # (1) for clans, substitute clan name for family name
+    # (2) for clans, use the same color for the same clan, but don't change the name
+    # (3) for clans, combine family name with clan name, but use colors based on clan
+
+    # return the clan name, identifier if a clan member
+    if (!defined($pfamA_fams{$pf_acc})) {
+
+      my $url = "family/$value?output=xml";
+
+      my $res = get($loc . $url);
+
+      my $twig_clan = XML::Twig->new(twig_roots => {'clan_membership'=>1},
+				     twig_handlers => {
+						       'clan_membership' => \&get_clan,
+						      },
+				     pretty_print => 'indented');
+
+      # make certain to reinitialize
+      ($clan_acc, $clan_id) = ("","");
+      my $xml = $twig_clan->parse($res);
+    }
+    else {
+      ($clan_acc, $clan_id) = @{$pfamA_fams{$pf_acc}}{qw(clan_acc clan_id)};
+    }
+
+    if ($clan_acc) {
+      my $c_value = "C." . $clan_id;
+      if ($pf_acc_flag) {$c_value = "C." . $clan_acc;}
+
+      $domain_clan{$value} = {clan_id => $clan_id,
+			      clan_acc => $clan_acc};
+
+      if ($domains{$c_value}) {
+	$domain_clan{$value}->{domain_cnt} =  $domains{$c_value};
+	$value = $c_value;
+      }
+      else {
+	$domain_clan{$value}->{domain_cnt} = ++ $domain_cnt;
+	$value = $c_value;
+	$domains{$value} = $domain_cnt;
+	push @domain_list, $pf_acc;
+      }
+    }
+    else {
+      $domain_clan{$value} = 0;
+      $domains{$value} = ++$domain_cnt;
+      push @domain_list, $pf_acc;
+    }
+  }
+  elsif ($domain_clan{$value} && $domain_clan{$value}->{clan_acc}) {
+    if ($pf_acc_flag) {$value = "C." . $domain_clan{$value}->{clan_acc};}
+    else { $value = "C." . $domain_clan{$value}->{clan_id}; }
+  }
+
+  if ($is_virtual) {
+    $domains{'@'.$value} = $domains{$value};
+    $value = '@'.$value;
+  }
+  return $value;
+}
+
+sub domain_num {
+  my ($value, $number) = @_;
+  if ($value =~ m/^@/) {
+    $value =~ s/^@/v/;
+    $number = $number."v";
+  }
+  return ($value, $number);
+}
+
+sub min {
+  my ($arg1, $arg2) = @_;
+
+  return ($arg1 <= $arg2 ? $arg1 : $arg2);
+}
+
+sub max {
+  my ($arg1, $arg2) = @_;
+
+  return ($arg1 >= $arg2 ? $arg1 : $arg2);
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_feats.pl
+
+=head1 SYNOPSIS
+
+ ann_pfam_www_e.pl --neg-doms  'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+
+ --lav  produce lav2plt.pl annotation format, only show domains/repeats
+ --neg-doms : report domains between annotated domains as NODOM
+                 (also --neg, --neg_doms)
+ --no-over  : generate non-overlapping domains (equivalent to ann_pfam_www.pl)
+ --no-clans : do not use clans with multiple families from same clan
+ --min_nodom=10  : minimum length between domains for NODOM
+
+=head1 DESCRIPTION
+
+C<ann_pfam_www_e.pl> extracts domain information from the Pfam www site
+(pfam.xfam.org).  Currently, the program works with database
+sequence descriptions in several formats:
+
+ >gi|1705556|sp|P54670.1|CAF1_DICDI
+ >sp|P09488|GSTM1_HUMAN
+ >sp:CALM_HUMAN 
+
+C<ann_pfam_www_e.pl> uses the Pfam RESTful WWW interface
+(C<pfam.xfam.org/help#tabview=10>) to download domain
+names/locations/score. C<ann_pfam_www_e.pl> is an alternative to
+C<ann_pfam_e.pl> that does not require a MySQL instance with a Pfam
+database installation.
+
+If the "--no-over" option is set, overlapping domains are selected and
+edited to remove overlaps.  For proteins with multiple overlapping
+domains (domains overlap by more than 1/3 of the domain length),
+C<auto_pfam_e.pl> selects the domain annotation with the best
+C<domain_evalue_score>.  When domains overlap by less than 1/3 of the
+domain length, they are shortened to remove the overlap.
+
+C<ann_pfam_www_e.pl> is designed to be used by the B<FASTA> programs
+with the C<-V \!ann_pfam_www_e.pl> or C<-V "\!ann_pfam_www_e.pl --neg">
+option.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/ann_script_list b/scripts/ann_script_list
new file mode 100644
index 0000000..fc99c5e
--- /dev/null
+++ b/scripts/ann_script_list
@@ -0,0 +1,9 @@
+ann_exons_ens.pl
+ann_exons_up_www.pl
+ann_feats2ipr.pl
+ann_feats_up_sql.pl
+ann_feats_up_www2.pl
+ann_ipr_www.pl
+ann_pfam30.pl
+ann_pfam_www.pl
+ann_upfeats_pfam_www_e.pl
diff --git a/scripts/ann_upfeats_pfam_www_e.pl b/scripts/ann_upfeats_pfam_www_e.pl
new file mode 100755
index 0000000..f82ad0a
--- /dev/null
+++ b/scripts/ann_upfeats_pfam_www_e.pl
@@ -0,0 +1,801 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# ann_upfeats_pfam_www.pl gets an annotation file from fasta36 -V with a line of the form:
+
+# SP:GSTM1_HUMAN P09488 218
+#
+# it must:
+# (1) read in the line
+# (2) parse it to get the up_acc
+# (3) return the tab delimited features
+#
+
+# this version can read feature2 uniprot features (acc/pos/end/label/value), but returns sorted start/end domains
+
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+use LWP::Simple;
+use XML::Twig;
+use JSON qw(decode_json);
+## use IO::String;
+
+my $up_base = 'http://www.ebi.ac.uk/proteins/api/features';
+
+my %domains = ();
+my $domain_cnt = 0;
+
+my ($lav, $neg_doms, $no_doms, $no_feats, $no_over, $shelp, $help, $no_vars) = (0,0,0,0,0,0,0,0);
+my ($auto_reg, $vdoms, $no_clans, $pf_acc_flag, $acc_comment, $bound_comment) =  (0, 0, 0, 0, 0, 0);
+
+my $color_sep_str = " :";
+$color_sep_str = '~';
+
+my ($min_nodom, $min_vdom) = (10,10);
+
+GetOptions(
+    "lav" => \$lav,
+    "acc_comment" => \$acc_comment,
+    "bound_comment" => \$bound_comment,
+    "no-over" => \$no_over,
+    "no_doms|no-doms|nodoms" => \$no_doms,
+    "neg" => \$neg_doms,
+    "neg_doms|neg-doms|negdoms" => \$neg_doms,
+    "min_nodom=i" => \$min_nodom,
+    "no_feats|no-feats|nofeats" => \$no_feats,
+    "no-vars|no_vars" => \$no_vars,
+    "vdoms" => \$vdoms,
+    "v_doms" => \$vdoms,
+    "pfacc" => \$pf_acc_flag,
+    "pfam_acc" => \$pf_acc_flag,
+    "acc" => \$pf_acc_flag,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless (@ARGV || -f STDIN || -p STDIN);
+
+my @feat_keys = qw( ACT_SITE MOD_RES BINDING METAL SITE );
+my @feat_vals = ( '=','*','#','^','@');
+my @feat_names = ('Active site', 'Modified', 'Binding', 'Metal binding', 'Site');
+
+unless ($no_vars) {
+  push @feat_keys, qw(MUTAGEN VARIANT);
+  push @feat_vals, ('V','V',);
+  push @feat_names, ("","",);
+}
+
+my %feats_text = ();
+ at feats_text{@feat_keys} = ('Active site', '', 'Substrate binding', 'Metal binding', 'Site', '','');
+
+my %feats_label;
+ at feats_label{@feat_keys} = ('Active site', 'Modified', 'Substrate binding', 'Metal binding', 'Site', '','');
+
+# my @feat_vals = ( '=','*','#','^','@','V','V');
+
+
+my @dom_keys = qw( DOMAIN REPEAT );
+my @dom_vals = ( [ '[', ']'],[ '[', ']']);
+
+my @ssr_keys = qw(STRAND HELIX);
+my @ssr_vals = ( [ '[', ']']);
+
+my %annot_types = ();
+
+# from ann_pfam_www_e.pl 
+my %domain_clan = (NODOM => {clan_id => 'NODOM', clan_acc=>0, domain_cnt=>0});
+my @domain_list = (0);
+
+my $loc="http://pfam.xfam.org/";
+my $pf_url;
+
+my @pf_domains;
+my %pfamA_fams = ();
+my ($pf_seq_length, $pf_model_length)=(0,0);
+my ($clan_acc, $clan_id) = ("","");
+
+my $get_domain_sub = \&get_pfam_annots;
+
+if ($lav) {
+  $no_feats = 1;
+}
+
+ at annot_types{@feat_keys} = @feat_vals unless ($no_feats);
+
+if ($neg_doms) {
+  $domains{'NODOM'}=0;
+}
+
+my ($tmp, $gi, $sdb, $acc, $id, $use_acc);
+
+unless ($no_feats) {
+  for my $i ( 0 .. $#feat_keys) {
+    next unless $feats_label{$feat_keys[$i]};
+    print "=",$feat_vals[$i],":",$feats_label{$feat_keys[$i]},"\n";
+  }
+}
+
+# get the query
+my ($query, $seq_len) =  @ARGV;
+$seq_len = 0 unless defined($seq_len);
+
+$query =~ s/^>// if ($query);
+
+my @annots = ();
+
+#if it's a file I can open, read and parse it
+
+unless ($query && ($query =~ m/[\|:]/
+		   || $query =~ m/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}\s/
+		   || $query =~ m/^(XN)(MP)_\d+/)) {
+
+    while (my $a_line = <>) {
+	$a_line =~ s/^>//;
+	chomp $a_line;
+	push @annots, upfeats_pfam_www($a_line, \&up_json_annots, \&get_pfam_www);
+    }
+} else {
+  push @annots, upfeats_pfam_www("$query\t$seq_len", \&up_json_annots, \&get_pfam_www);
+}
+
+for my $seq_annot (@annots) {
+  print ">",$seq_annot->{seq_info},"\n";
+  for my $annot (@{$seq_annot->{list}}) {
+    if (!$lav && defined($domains{$annot->[-1]})) {
+      my ($a_name, $a_num) = domain_num($annot->[-1],$domains{$annot->[-1]});
+      $annot->[-1] = $a_name;
+      if ($acc_comment) {
+	$annot->[-1] .= "{$domain_list[$a_num]}";
+      }
+      if ($bound_comment) {
+	$annot->[-1] .= $color_sep_str.$annot->[0].":".$annot->[2];
+      }
+      $annot->[-1] .= $color_sep_str.$a_num;
+    }
+    print join("\t",@$annot),"\n";
+  }
+}
+
+exit(0);
+
+sub upfeats_pfam_www {
+  my ($query_len, $get_upfeats_sub, $get_pfam_sub) = @_;
+
+  my ($annot_line, $seq_len) = split(/\t/,$query_len);
+
+  my %annot_data = (seq_info=>$annot_line);
+
+  $use_acc = 1;
+
+  if ($annot_line =~ m/^gi\|/) {
+    ($tmp, $gi, $sdb, $acc, $id) = split(/\|/,$annot_line);
+  } elsif ($annot_line =~ m/^(SP|TR):(\w+)\s(\w+)/) {
+    ($sdb, $id, $acc) = (lc($1), $2, $3);
+    $use_acc = 1;
+  } elsif ($annot_line =~ m/^(SP|TR):(\w+)/) {
+    ($sdb, $id) = (lc($1), $2, '');
+    warn("*** $0 - accession required: $annot_line");
+    $use_acc = 0;
+  } elsif ($annot_line =~ m/^(UR\d{3}:UniRef\d{2})_(\w+)/) {
+    $sdb = lc($1);
+    $id = $2;
+#    $acc = $2;
+  } elsif ($annot_line =~ m/\|/) {
+    ($sdb, $acc, $id) = split(/\|/,$annot_line);
+  }
+  else {
+    ($acc) = split(/\s+/,$annot_line);
+  }
+
+  $acc =~ s/\.\d+// if ($acc);
+  $annot_data{list} = [];
+  my $lwp_features = "";
+
+  if ($acc && ($acc =~ m/^[A-Z][0-9][A-Z0-9]{3}[0-9]/)) {
+    $lwp_features = get("$up_base/$acc.json");
+  }
+
+  my @annots = ();
+
+  if ($lwp_features && ($lwp_features !~ /ERROR/)) {
+    push @annots, up_json_annots(\%annot_types, $lwp_features, $seq_len);
+  }
+
+  unless ($use_acc) {
+    push @annots, $get_pfam_sub->($id, $seq_len);
+  }
+  else {
+    $acc =~ s/\.\d+$//;
+    push @annots, $get_pfam_sub->($acc, $seq_len);
+  }
+
+  @annots  = sort { $a->[0] <=> $b->[0] } @annots;
+  $annot_data{list} = \@annots;
+
+  return \%annot_data;
+}
+
+# parses www.uniprot.org gff feature table
+sub up_json_annots {
+  my ($annot_types, $annot_data, $seq_len) = @_;
+
+  my $json_ref = decode_json($annot_data);
+
+  my ($acc, $pos, $end, $label, $value, $comment, $len);
+
+  $seq_len = 0;
+
+  my @sites = ();  # sites with one position
+
+  my ($seq_str, $seq_acc, $seq_id)  = @{$json_ref}{qw(sequence accession entryName)};
+
+  for my $feat ( @{$json_ref->{features}} ) {
+    next unless ($annot_types->{$feat->{type}});
+
+    my ($label, $pos, $end, $value)  = @{$feat}{qw(type begin end description)};
+
+    if ($label =~ m/VARIANT/ || $label =~ m/MUTAGEN/) {
+      push @sites, [$pos, $annot_types->{$label}, $feat->{alternativeSequence}, $value];
+    }
+    else {
+      next unless ($pos == $end);
+      if ($feats_text{$label}) {
+	my $info = $feats_text{$label};
+	if ($value) {
+	  $info .= ": $value";
+	}
+	push @sites, [$pos, $annot_types->{$label}, "-", $info];
+      } else {
+	push @sites, [$pos, $annot_types->{$label}, "-", $value];
+      }
+    }
+  }
+
+  return @sites;
+}
+
+sub get_length {
+    my ($t, $elt) = @_;
+    $pf_seq_length = $elt->{att}->{length};
+}
+
+sub push_match {
+    my ($t, $elt) = @_;
+#    return unless ($elt->{att}->{type} =~ m/Pfam-A/);
+    my $attr_ref = $elt->{att};
+    my $loc_ref = $elt->first_child('location')->{att};
+    push @pf_domains, { %$attr_ref, %$loc_ref };
+}
+
+sub get_model_length {
+    my ($t, $elt) = @_;
+    $pf_model_length = $elt->{att}->{model_length};
+}
+
+sub get_clan {
+    my ($t, $elt) = @_;
+    my $attr_ref = $elt->{att};
+#    print Dumper($attr_ref);
+    ($clan_acc, $clan_id) = ($attr_ref->{clan_acc},$attr_ref->{clan_id});
+}
+
+sub get_pfam_www {
+  my ($acc, $seq_length) = @_;
+
+#  if ($acc =~ m/_/) {$url = "protein?id=$acc&output=xml"; }
+#  else {$url = "protein/$acc?output=xml"; }
+
+  $pf_url = "protein/$acc?output=xml";
+
+  my $res = get($loc . $pf_url);
+
+  @pf_domains = ();
+
+  my $twig_dom = XML::Twig->new(twig_roots => {matches => 1, sequence => 1},
+#			    start_tag_handlers => {
+#						   'sequence' => \&get_length,
+#						  },
+			    twig_handlers => {
+					      'match' => \&push_match,
+					      'sequence' => \&get_length,
+					     },
+			    pretty_print => 'indented');
+  my $xml = $twig_dom->parse($res);
+
+  if (!$seq_length || $seq_length == 0) {
+      $seq_length = $pf_seq_length;
+  }
+
+  @pf_domains = sort { $a->{start} <=> $b->{start} } @pf_domains;
+
+  # to look for possible joining, need model_length
+  for my $curr_dom (@pf_domains) {
+      
+      my $acc = $curr_dom->{accession};
+      $pf_url = "family/$acc?output=xml";
+
+      my $res = get($loc . $pf_url);
+
+      my $twig_fam = XML::Twig->new(twig_roots => {hmm_details => 1, clan_membership=> 1},
+				    twig_handlers => {
+					'hmm_details' => \&get_model_length,
+					'clan_membership' => \&get_clan,
+				    },
+				    pretty_print => 'indented');
+
+      ($clan_acc, $clan_id) = ("","");
+      my $fam_xml = $twig_fam->parse($res);
+
+      $pfamA_fams{$acc} = { model_length => $pf_model_length, clan_acc=>$clan_acc, clan_id=>$clan_id};
+      $curr_dom->{model_length} = $pf_model_length;
+  }
+
+  # check for domain overlap, and resolve check for domain overlap
+  # (possibly more than 2 domains), choosing the domain with the best
+  # evalue
+
+  my @raw_pf_domains = @pf_domains;
+  @pf_domains = ();
+
+  for my $dom_ref (@raw_pf_domains) {
+    if ($pf_acc_flag) {
+      $dom_ref->{info} = $dom_ref->{accession};
+    }
+    else {
+      $dom_ref->{info} = $dom_ref->{id};
+    }
+    next if ($dom_ref->{start} >= $seq_length);
+    if ($dom_ref->{end} >= $seq_length) {
+	$dom_ref->{end} = $seq_length;
+    }
+    push @pf_domains, $dom_ref;
+  }
+
+  if($no_over && scalar(@pf_domains) > 1) {
+
+    my @tmp_domains = @pf_domains;
+    my @save_domains = ();
+
+    my $prev_dom = shift @tmp_domains;
+
+    while (my $curr_dom = shift @tmp_domains) {
+
+      my @overlap_domains = ($prev_dom);
+
+      my $diff = $prev_dom->{end} - $curr_dom->{start};
+      # check for overlap > domain_length/3
+
+      my ($prev_len, $cur_len) = ($prev_dom->{end}-$prev_dom->{start}+1, $curr_dom->{end}-$curr_dom->{start}+1);
+      my $inclusion = ((($curr_dom->{start} >= $prev_dom->{start}) && ($curr_dom->{end} <= $prev_dom->{end})) ||
+		       (($curr_dom->{start} <= $prev_dom->{start}) && ($curr_dom->{end} >= $prev_dom->{end})));
+
+      my $longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+
+      while ($inclusion || ($diff > 0 && $diff > $longer_len/3)) {
+	push @overlap_domains, $curr_dom;
+	$curr_dom = shift @tmp_domains;
+	last unless $curr_dom;
+	$diff = $prev_dom->{end} - $curr_dom->{start};
+	($prev_len, $cur_len) = ($prev_dom->{end}-$prev_dom->{start}+1, $curr_dom->{end}-$curr_dom->{start}+1);
+	$longer_len = ($prev_len > $cur_len) ? $prev_len : $cur_len;
+	$inclusion = ((($curr_dom->{start} >= $prev_dom->{start}) && ($curr_dom->{end} <= $prev_dom->{end})) ||
+		      (($curr_dom->{start} <= $prev_dom->{start}) && ($curr_dom->{end} >= $prev_dom->{end})));
+      }
+
+      # check for overlapping domains; >1 because $prev_dom is always there
+      if (scalar(@overlap_domains) > 1 ) {
+	# if $rpd2_fams, check for a chosen one
+
+	for my $dom ( @overlap_domains) {
+	  $dom->{evalue} = 1.0 unless defined($dom->{evalue});
+	}
+
+	@overlap_domains = sort { $a->{evalue} <=> $b->{evalue} } @overlap_domains;
+	$prev_dom = $overlap_domains[0];
+      }
+
+      # $prev_dom should be the best of the overlaps, and we are no longer overlapping > dom_length/3
+      push @save_domains, $prev_dom;
+      $prev_dom = $curr_dom;
+    }
+    if ($prev_dom) {push @save_domains, $prev_dom;}
+
+    @pf_domains = @save_domains;
+
+    # now check for smaller overlaps
+    for (my $i=1; $i < scalar(@pf_domains); $i++) {
+      if ($pf_domains[$i-1]->{end} >= $pf_domains[$i]->{start}) {
+	my $overlap = $pf_domains[$i-1]->{end} - $pf_domains[$i]->{start};
+	$pf_domains[$i-1]->{end} -= int($overlap/2);
+	$pf_domains[$i]->{start} = $pf_domains[$i-1]->{end}+1;
+      }
+    }
+  }
+
+  # before checking for domain overlap, check for "split-domains"
+  # (self-unbound) by looking for runs of the same domain that are
+  # ordered by model_start
+
+  if (scalar(@pf_domains) > 1) {
+    my @j_domains;		#joined domains
+    my @tmp_domains = @pf_domains;
+
+    my $prev_dom = shift(@tmp_domains);
+
+    for my $curr_dom (@tmp_domains) {
+      # to join domains:
+      # (1) the domains must be in order by model_start/end coordinates
+      # (3) joining the domains cannot make the total combination too long
+
+      # check for model and sequence consistency
+      if ($prev_dom->{accession} eq $curr_dom->{accession}) { # same family
+
+	my $prev_dom_len = $prev_dom->{hmm_end}-$prev_dom->{hmm_start}+1;
+	my $curr_dom_len = $curr_dom->{hmm_end}-$curr_dom->{hmm_start}+1;
+	my $prev_dom_fn = $prev_dom_len/$curr_dom->{model_length};
+	my $curr_dom_fn = $curr_dom_len/$curr_dom->{model_length};
+	my $missing_dom_fn = max(0,($curr_dom->{hmm_start} - $prev_dom->{hmm_end})/$curr_dom->{model_length});
+
+	if ( $prev_dom->{hmm_start} < $curr_dom->{hmm_start} # model check
+	     && $prev_dom->{hmm_end} < $curr_dom->{hmm_end}
+	     && ($curr_dom->{hmm_start} > $prev_dom->{hmm_end} * 0.80 # limit overlap
+		 || $curr_dom->{hmm_start} <  $prev_dom->{hmm_end} * 1.25)
+	     && $prev_dom_fn + $curr_dom_fn < 1.33
+	     && $missing_dom_fn < min($prev_dom_fn,$curr_dom_fn)) { # join them by updating $prev_dom
+	  $prev_dom->{end} = $curr_dom->{end};
+	  $prev_dom->{hmm_end} = $curr_dom->{hmm_end};
+	  $prev_dom->{evalue} = ($prev_dom->{evalue} < $curr_dom->{evalue} ? $prev_dom->{evalue} : $curr_dom->{evalue});
+	  next;
+	}
+
+	push @j_domains, $prev_dom;
+	$prev_dom = $curr_dom;
+      }
+      else {
+	  push @j_domains, $prev_dom;
+	  $prev_dom = $curr_dom;
+      }
+    }
+    push @j_domains, $prev_dom;
+    @pf_domains = @j_domains;
+  }
+
+  # $vdoms -- virtual Pfam domains -- the equivalent of $neg_doms,
+  # but covering parts of a Pfam model that are not annotated.  split
+  # domains have been joined, so simply check beginning and end of
+  # each domain (but must also check for bounded-ness)
+  # only add when 10% or more is missing and missing length > $min_nodom
+
+  if ($vdoms) {
+      my @vpf_domains;
+
+      my $curr_dom = $pf_domains[0];
+
+      my $prev_dom={end=>0, accession=>''};
+      my $prev_dom_end = 0;
+      my $next_dom_start = $seq_length+1;
+
+      for (my $dom_ix=0; $dom_ix < scalar(@pf_domains); $dom_ix++ ) {
+	  $curr_dom = $pf_domains[$dom_ix];
+
+	  my $pfamA =  $curr_dom->{accession};
+
+	  # first, look left, is there a domain there (if there is,
+	  # it should be updated right
+
+	  # my $min_vdom = $curr_dom->{model_length} / 10;
+
+	  if ($prev_dom->{accession}) { # look for previous domain
+	      $prev_dom_end = $prev_dom->{end};
+	  }
+
+	  # there is a domain to the left, how much room is available?
+	  my $left_dom_len = min($curr_dom->{start}-$prev_dom_end-1, $curr_dom->{hmm_start}-1);
+	  if ( $left_dom_len > $min_vdom) {
+	      # there is room for a virtual domain
+	      my %new_dom = (start=> $curr_dom->{start}-$left_dom_len,
+			     end => $curr_dom->{start}-1,
+			     info=>'@'.$curr_dom->{accession},
+			     model_length=> $curr_dom->{model_length},
+			     hmm_end => $curr_dom->{hmm_start}-1,
+			     hmm_start => $left_dom_len,
+			     accession=>$pfamA,
+		  );
+	      push @vpf_domains, \%new_dom;
+	  }
+
+	  # save the current domain
+	  push @vpf_domains, $curr_dom;
+	  $prev_dom = $curr_dom;
+
+	  if ($dom_ix < $#pf_domains) { # there is a domain to the right
+	      # first, give all the extra space to the first domain (no splitting)
+	      $next_dom_start = $pf_domains[$dom_ix+1]->{start};
+	  }
+	  else {
+	      $next_dom_start = $seq_length;
+	  }
+
+	  # is there room for a virtual domain right
+
+	  my $right_dom_len = min($next_dom_start-$curr_dom->{end}-1, # space available 
+				  $curr_dom->{model_length}-$curr_dom->{hmm_end} # space needed
+	      );
+	  if ( $right_dom_len > $min_vdom) {
+	      my %new_dom = (start=> $curr_dom->{end}+1,
+			     end=> $curr_dom->{end}+$right_dom_len,
+			     info=>'@'.$pfamA,
+			     model_length => $curr_dom->{model_length},
+			     accession=> $pfamA,
+		  );
+	      push @vpf_domains, \%new_dom;
+	      $prev_dom = \%new_dom;
+	  }
+      }				# all done, check for last one
+    # @vpf_domains has both old @pf_domains and new virtural-domains
+    @pf_domains = @vpf_domains;
+  }
+
+  if ($neg_doms) {
+    my @npf_domains;
+    my $prev_dom={end=>0};
+    for my $curr_dom ( @pf_domains) {
+      if ($curr_dom->{start} - $prev_dom->{end} > $min_nodom) {
+	my %new_dom = (start=>$prev_dom->{end}+1, end => $curr_dom->{start}-1, info=>'NODOM');
+	push @npf_domains, \%new_dom;
+      }
+      push @npf_domains, $curr_dom;
+      $prev_dom = $curr_dom;
+    }
+    if ($seq_length - $prev_dom->{end} > $min_nodom) {
+      my %new_dom = (start=>$prev_dom->{end}+1, end=>$seq_length, info=>'NODOM');
+      if ($new_dom{end} > $new_dom{start}) {push @npf_domains, \%new_dom;}
+    }
+
+    # @npf_domains has both old @pf_domains and new neg-domains
+    @pf_domains = @npf_domains;
+  }
+
+  # now make sure we have useful names: colors
+
+  for my $pf (@pf_domains) {
+    $pf->{info} = domain_name($pf->{info}, $acc, $pf->{accession});
+  }
+
+  my @feats = ();
+  for my $d_ref (@pf_domains) {
+    if ($lav) {
+      push @feats, [$d_ref->{start}, $d_ref->{end}, $d_ref->{info}];
+    }
+    else {
+      push @feats, [$d_ref->{start}, '-', $d_ref->{end},  $d_ref->{info} ];
+#      push @feats, [$d_ref->{end}, ']', '-', ""];
+    }
+  }
+
+  return @feats;
+}
+
+# domain name takes a uniprot domain label, removes comments ( ;
+# truncated) and numbers and returns a canonical form. Thus:
+# Cortactin 6.
+# Cortactin 7; truncated.
+# becomes "Cortactin"
+#
+
+# in addition, domain_name() looks up each domain name to see if it
+# has a clan, and, if different domains share the same clan, they get
+# the same colors.
+
+sub domain_name {
+
+  my ($value, $seq_id, $pf_acc) = @_;
+  my $is_virtual = 0;
+
+  if ($value =~ m/^@/) {
+    $is_virtual = 1;
+    $value =~ s/^@//;
+  }
+
+  unless (defined($value)) {
+    warn "missing domain name for $seq_id";
+    return "";
+  }
+
+  if ($no_clans) {
+    if (! defined($domains{$value})) {
+      $domain_clan{$value} = 0;
+      $domains{$value} = ++$domain_cnt;
+      push @domain_list, $pf_acc;
+    }
+  }
+  elsif (!defined($domain_clan{$value})) {
+    ## only do this for new domains, old domains have known mappings
+
+    ## ways to highlight the same domain:
+    # (1) for clans, substitute clan name for family name
+    # (2) for clans, use the same color for the same clan, but don't change the name
+    # (3) for clans, combine family name with clan name, but use colors based on clan
+
+    # return the clan name, identifier if a clan member
+    if (!defined($pfamA_fams{$pf_acc})) {
+
+      my $pf_url = "family/$value?output=xml";
+
+      my $res = get($loc . $pf_url);
+
+      my $twig_clan = XML::Twig->new(twig_roots => {'clan_membership'=>1},
+				     twig_handlers => {
+						       'clan_membership' => \&get_clan,
+						      },
+				     pretty_print => 'indented');
+
+      # make certain to reinitialize
+      ($clan_acc, $clan_id) = ("","");
+      my $xml = $twig_clan->parse($res);
+    }
+    else {
+      ($clan_acc, $clan_id) = @{$pfamA_fams{$pf_acc}}{qw(clan_acc clan_id)};
+    }
+
+    if ($clan_acc) {
+      my $c_value = "C." . $clan_id;
+      if ($pf_acc_flag) {$c_value = "C." . $clan_acc;}
+
+      $domain_clan{$value} = {clan_id => $clan_id,
+			      clan_acc => $clan_acc};
+
+      if ($domains{$c_value}) {
+	$domain_clan{$value}->{domain_cnt} =  $domains{$c_value};
+	$value = $c_value;
+      }
+      else {
+	$domain_clan{$value}->{domain_cnt} = ++ $domain_cnt;
+	$value = $c_value;
+	$domains{$value} = $domain_cnt;
+	push @domain_list, $pf_acc;
+      }
+    }
+    else {
+      $domain_clan{$value} = 0;
+      $domains{$value} = ++$domain_cnt;
+      push @domain_list, $pf_acc;
+    }
+  }
+  elsif ($domain_clan{$value} && $domain_clan{$value}->{clan_acc}) {
+    if ($pf_acc_flag) {$value = "C." . $domain_clan{$value}->{clan_acc};}
+    else { $value = "C." . $domain_clan{$value}->{clan_id}; }
+  }
+
+  if ($is_virtual) {
+    $domains{'@'.$value} = $domains{$value};
+    $value = '@'.$value;
+  }
+  return $value;
+}
+
+sub domain_num {
+  my ($value, $number) = @_;
+  if ($value =~ m/^@/) {
+    $value =~ s/^@/v/;
+    $number = $number."v";
+  }
+  return ($value, $number);
+}
+
+sub min {
+  my ($arg1, $arg2) = @_;
+
+  return ($arg1 <= $arg2 ? $arg1 : $arg2);
+}
+
+sub max {
+  my ($arg1, $arg2) = @_;
+
+  return ($arg1 >= $arg2 ? $arg1 : $arg2);
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ann_feats_up_www2.pl
+
+=head1 SYNOPSIS
+
+ ann_feats_up_www2.pl --no_doms --no_feats --lav 'sp|P09488|GSTM1_NUMAN' | accession.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+ --no-doms  do not show domain boundaries (domains are always shown with --lav)
+ --no-feats do not show feature (variants, active sites, phospho-sites)
+ --lav  produce lav2plt.pl annotation format, only show domains/repeats
+
+ --neg-doms,  -- report domains between annotated domains as NODOM
+                 (also --neg, --neg_doms)
+ --min_nodom=10  -- minimum length between domains for NODOM
+
+ --host, --user, --password, --port --db -- info for mysql database
+
+=head1 DESCRIPTION
+
+C<ann_feats_up_www2.pl> extracts feature, domain, and repeat
+information from the Uniprot DAS server through an XSLT transation
+provided by http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/uniprotkb.
+This server provides GFF descriptions of Uniprot entries, with most of
+the information provided in UniProt feature tables.
+
+C<ann_feats_up_www2.pl> is an alternative to C<ann_pfam.pl> and
+C<ann_pfam.pl> that does not require a local MySQL copy of Pfam.
+
+Given a command line argument that contains a sequence accession
+(P09488), the program looks up the features available for that
+sequence and returns them in a tab-delimited format:
+
+>sp|P09488|GSTM1_HUMAN
+2	[	-	GST N-terminal :1
+7	V	F	Mutagen: Reduces catalytic activity 100- fold.
+23	*	-	MOD_RES: Phosphotyrosine (By similarity).
+33	*	-	MOD_RES: Phosphotyrosine (By similarity).
+34	*	-	MOD_RES: Phosphothreonine (By similarity).
+88	]	-	
+90	[	-	GST C-terminal :2
+108	V	Q	Mutagen: Reduces catalytic activity by half.
+108	V	S	Mutagen: Changes the properties of the enzyme toward some substrates.
+109	V	I	Mutagen: Reduces catalytic activity by half.
+116	#	-	BINDING: Substrate.
+116	V	A	Mutagen: Reduces catalytic activity 10-fold.
+116	V	F	Mutagen: Slight increase of catalytic activity.
+173	V	N	in allele GSTM1B; dbSNP:rs1065411.
+208	]	-	
+210	V	T	in dbSNP:rs449856.
+
+If features are provided, then a legend of feature symbols is provided
+as well:
+
+ =*:phosphorylation
+ ==:active site
+ =@:site
+ =^:binding
+ =!:metal binding
+
+If the C<--lav> option is specified, domain and repeat features are
+presented in a different format for the C<lav2plt.pl> program:
+
+  >sp|P09488|GSTM1_HUMAN
+  2	88	GST N-terminal.
+  90	208	GST C-terminal.
+
+C<ann_feats_up_www2.pl> is designed to be used by the B<FASTA> programs with
+the C<-V \!ann_feats_up_www2.pl> option.  It can also be used with the lav2plt.pl
+program with the C<--xA "\!ann_feats_up_www2.pl --lav"> or C<--yA "\!ann_feats_up_www2.pl --lav"> options.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/annot_blast_btop2.pl b/scripts/annot_blast_btop2.pl
new file mode 100755
index 0000000..63acbf8
--- /dev/null
+++ b/scripts/annot_blast_btop2.pl
@@ -0,0 +1,1306 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014,2015 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+################################################################
+# annot_blast_btop2.pl --query query.file --ann_script ann_pfam_www.pl blast_tab_btop_file
+################################################################
+# annot_blast_btop2.pl associates domain annotation information and
+# subalignment scores with a blast tabular (-outfmt 6 or -outfmt 7)
+# file that contains the raw score and the BTOP alignment encoding
+# This file can be generated from "blastp/n" or "blast_formatter"
+# using the command:
+#   blast_formatter -archive blast_output.asn -outfmt '7 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore score btop'  > blast_output.tab_annot
+#
+# If the BTOP field or query_file is not available, the script
+# produces domain content without sub-alignment scores.
+################################################################
+## 13-Jan-2017
+# modified to provide query/subject coordinates and identities if no
+# query sequence -- does not decrement for reverse-complement fastx/blastx DNA
+################################################################
+## 16-Nov-2015
+# modify to allow multi-query blast searches
+################################################################
+## 19-Dec-2015
+# add -q_annot_script to annotate query sequence
+#
+
+use strict;
+use IPC::Open2;
+use Pod::Usage;
+use Getopt::Long;
+# use Data::Dumper;
+
+# read lines of the form:
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|121694|sp|P20432|GSTT1_DROME	100.00	209	0	0	1	209	1	209	6e-156	433	1113	209
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|1170090|sp|P04907|GSTF3_MAIZE	26.77	198	123	7	4	185	6	197	2e-08	51.2	121	FL1YG ... 1NKRA1YW1
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|81174731|sp|P0ACA5|SSPA_ECO57	39.66	58	32	2	43	100	49	103	8e-06	43.9	102	EDFLLI ... V-I-NEQS3FM
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|121695|sp|P12653|GSTF1_MAIZE	27.62	181	107	7	32	203	34	199	9e-05	40.8	94	LI1LF ... N-1AS1CLLM1
+
+# and report the domain content ala -m 8CC
+
+my ($matrix, $ann_script, $q_ann_script, $show_raw, $shelp, $help) = ("BLOSUM62", "", "", 0, 0, 0);
+my ($query_lib_name) = ("");  # if $query_lib_name, do not use $query_file_name
+my ($out_field_str) = ("");
+my $query_lib_r = 0;
+
+my @blosum62 = ();
+my @blosum62_diag = ();
+my %aa_map = ();
+my ($g_open, $g_ext) = (-11, -1);
+init_blosum62();
+
+GetOptions(
+    "matrix:s" => \$matrix,
+    "ann_script:s" => \$ann_script,
+    "q_ann_script:s" => \$q_ann_script,
+    "query:s" => \$query_lib_name,
+    "query_file:s" => \$query_lib_name,
+    "query_lib:s" => \$query_lib_name,
+    "out_fields:s" => \$out_field_str,
+    "script:s" => \$ann_script,
+    "q_script:s" => \$q_ann_script,
+    "raw_score" => \$show_raw,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+unless (-f STDIN || -p STDIN || @ARGV) {
+ pod2usage(1);
+}
+
+if ($query_lib_name) {
+  $query_lib_r = parse_query_lib($query_lib_name);
+}
+
+my @tab_fields = qw(q_seqid s_seqid percid alen mismatch gopen q_start q_end s_start s_end evalue bits score BTOP);
+
+# the fields that are displayed are listed here.  By default, all fields except score and BTOP are displayed.
+my @out_tab_fields = @tab_fields[0 .. $#tab_fields-1];
+if ($show_raw) {
+  push @out_tab_fields, "raw_score";
+
+}
+if ($out_field_str) {
+  @out_tab_fields = split(/\s+/,$out_field_str);
+}
+
+my @header_lines = ();
+
+# need outer loop to enable multiple queries
+while (1) {
+
+  my $next_line = "";
+  my $have_data = 0;
+
+  my @hit_list = ();
+  my @q_hit_list = ();
+
+  while (my $line = <>) {
+    if ($line =~ /^#/) {
+      if ($have_data) {
+	$next_line = $line;
+	$have_data = 0;
+	last;
+      } else {
+	push @header_lines, $line;
+      }
+      next;
+    }
+
+    $have_data = 1;
+    my %hit_data = ();
+    chomp $line;
+    next unless $line;
+    @hit_data{@tab_fields} = split(/\t/,$line);
+
+    push @hit_list, \%hit_data;
+  }
+
+  # get the current query sequence
+  if ($q_ann_script && -x (split(/\s+/,$q_ann_script))[0]) {
+    # get the domains for the q_seqid using --q_ann_script
+    #
+    my ($Reader, $Writer);
+    my $pid = open2($Reader, $Writer, $q_ann_script);
+    my $hit = $hit_list[0];
+
+    print $Writer $hit->{q_seqid},"\n";
+    close($Writer);
+
+    @q_hit_list = ({ s_seq_id=> $hit->{q_seqid} });
+
+    read_annots($Reader, \@q_hit_list, 0);
+
+    waitpid($pid, 0);
+  }
+
+  # get the current query sequence
+  if ($ann_script && -x (split(/\s+/,$ann_script))[0]) {
+    # get the domains for each s_seqid using --ann_script
+    #
+    my ($Reader, $Writer);
+    my $pid = open2($Reader, $Writer, $ann_script);
+    for my $hit (@hit_list) {
+      print $Writer $hit->{s_seqid},"\n";
+    }
+    close($Writer);
+
+    read_annots($Reader, \@hit_list, 1);
+
+    waitpid($pid, 0);
+  }
+
+  for my $line (@header_lines) {
+    print $line;
+  }
+  @header_lines = ($next_line);
+
+  # now get query sequence if available
+
+  my $q_hit = $q_hit_list[0];
+
+  for my $hit (@hit_list) {
+    my @list_covered = ();
+
+    # If I have an encoded aligment {BTOP} and a query sequence $query_lib_r && $query_lib_r->{$hit->{q_seqid}}
+    # then I can calculate sub-alignment scores
+    if (defined($hit->{BTOP}) && $query_lib_r && $query_lib_r->{$hit->{q_seqid}}) {
+
+      $hit->{raw_score} = 0;  # initialize in case no domains and raw_score requested
+      # calculate sub-alignment scores in subject/library coordinates
+      if (defined($hit->{domains}) && scalar(@{$hit->{domains}})) {
+	($hit->{raw_score}, $hit->{aligned_domains_r}) = 
+	  sub_alignment_score($query_lib_r->{$hit->{q_seqid}},
+			      $hit, \@blosum62, \@blosum62_diag, $hit->{domains}, 1);
+      }
+
+      if (defined($hit->{sites}) && scalar(@{$hit->{sites}})) {
+	$hit->{aligned_sites_r} = site_align($query_lib_r->{$hit->{q_seqid}},
+					     $hit, \@blosum62, $hit->{sites}, 1);
+      }
+
+      # calculate sub-alignment scores in query coordinates
+      if (defined($q_hit->{domains}) && scalar(@{$q_hit->{domains}})) {
+	($hit->{raw_score}, $hit->{q_aligned_domains_r}) = 
+	  sub_alignment_score($query_lib_r->{$hit->{q_seqid}},
+			      $hit, \@blosum62, \@blosum62_diag, $q_hit->{domains}, 0);
+      }
+
+      if (defined($q_hit->{sites}) && scalar(@{$q_hit->{sites}})) {
+	$hit->{q_aligned_sites_r} =  site_align($query_lib_r->{$hit->{q_seqid}},
+						$hit, \@blosum62, $q_hit->{sites}, 0);
+      }
+    }
+    elsif (defined($hit->{BTOP})) {
+      if (defined($hit->{domains}) && scalar(@{$hit->{domains}})) {
+	$hit->{aligned_domains_r} = 
+	  sub_alignment_pos($hit, $hit->{domains}, 1);
+      }
+    }
+    else {		   # no alignment info, can provide domain overlap, and subject coordinates
+      $hit->{raw_score} = 0;
+      for my $dom_r (@{$hit->{domains}}) {
+	next if $dom_r->{d_end} < $hit->{s_start}; # before start
+	last if $dom_r->{d_pos} > $hit->{s_end}; # after end
+
+	if ($dom_r->{d_pos} <= $hit->{s_end} && $dom_r->{d_end} >= $hit->{s_start}) {
+	  push @list_covered, $dom_r->{descr};
+	}
+      }
+    }
+
+
+    ################
+    ## final output display
+
+    print join("\t",@{$hit}{@out_tab_fields}); # show fields from original blast tabular file
+
+    my $merged_annots_r = merge_annots($hit);	# merge the four possible annotation lists into one.
+
+    if (scalar(@$merged_annots_r)) { # show subalignment scores if available
+      print "\t";
+
+      print format_annot_info($hit, $merged_annots_r);
+    }
+    elsif (@list_covered) {	# otherwise show domain content
+      print "\t",join(";", at list_covered);
+    }
+    print "\n";
+  }
+
+  # for my $line (@footer_lines) {
+  #   print $line;
+  # }
+  # @footer_lines = ();
+
+  last if eof(ARGV);
+}
+
+for my $line (@header_lines) {
+  print $line;
+}
+
+################
+# read_annots (\@hit_list)
+# input: $hit_entry->{s_seq_id, etc}, $target
+# output: modified $hit_entry->{domains}
+#         modified $hit_entry->{sites}
+
+sub read_annots {
+  my ($Reader, $hit_list_r, $target) = @_;
+
+  my $current_domain = "";
+  my $hit_ix = 0;
+  my @hit_domains = ();
+  my @hit_sites = ();
+
+  while (my $line = <$Reader>) {
+    next if $line=~ m/^=/;
+    chomp $line;
+
+    # check for header
+    if ($line =~ m/^>/) {
+      if ($current_domain) {  # previous domains/sites have already been found and parsed
+	if ($hit_list_r->[$hit_ix]{s_seqid} eq $current_domain) {
+	    $hit_list_r->[$hit_ix]{domains} = [ @hit_domains ];  # previous domains
+	    $hit_list_r->[$hit_ix]{sites} = [ @hit_sites ];      # previous sites
+	    $hit_ix++;
+	  } else {
+	    warn "phase error: $current_domain != $hit_list_r->[$hit_ix]{s_seqid}";
+	  }
+      }
+      @hit_domains = ();   # current domains
+      @hit_sites = ();     # current sites
+      $current_domain = $line;
+      $current_domain =~ s/^>//;
+    } else {			# check for data
+      my %annot_info = (target=>$target);
+      my @a_fields = split(/\t/,$line);
+      if ($a_fields[1] eq '-') {
+	@annot_info{qw(d_pos type d_end descr)} = @a_fields;
+	$annot_info{descr} =~ s/ :(\d+)$/~$1/;
+	push @hit_domains, \%annot_info;   # current
+      }
+      else {
+	@annot_info{qw(d_pos type d_val descr)} = @a_fields;
+	$annot_info{'d_end'} = $annot_info{'d_pos'};
+	push @hit_sites, \%annot_info;   # current
+      }
+    }
+  }
+  close($Reader);
+
+  # all done, save the last one
+  $hit_list_r->[$hit_ix]{domains} = \@hit_domains;
+  $hit_list_r->[$hit_ix]{sites} = \@hit_sites;
+}
+
+# input: a blast BTOP string of the form: "1VA160TS7KG10RK27"
+# returns a list_ref of tokens: (1, "VA", 60, "TS", 7, "KG, 10, "RK", 27)
+#
+sub decode_btop {
+  my ($btop_str) = @_;
+
+  my @tokens = split(/(\d+)/,$btop_str);
+
+  shift @tokens unless $tokens[0];
+
+  my @out_tokens = ();
+
+  for my $token (@tokens) {
+    if ($token =~ m/^\d+$/) {
+      push @out_tokens, $token
+    }
+    else {
+      my @mis_tokens = split(/(..)/,$token);
+      for my $mis (@mis_tokens) {
+	if ($mis) {push @out_tokens, $mis};
+      }	
+    }
+  }
+
+  return \@out_tokens;
+}
+
+sub parse_query_lib {
+  my ($query_file) = @_;
+
+  my %query_seqs = ();
+
+  open(my $qfd, $query_file);
+  {				# local scope for $/
+    local $/ = "\n>";
+
+    while (my $entry = <$qfd>) {  # returns an entire fasta entry
+      chomp $entry;
+      my ($header, $sequence) = ($entry =~ m/^>?           # ^> only in first entry
+                                             ( [^\n]* ) \n # header line
+                                             ( .*     )    # the sequence
+                                            /osx);    # optimize, multiline, commented
+      $sequence =~ s/[^A-Za-z\*]//g;    # remove everything but letters
+      $sequence = uc($sequence);
+      $header =~ s/\s.*$//;
+      my @seq = split(//,$sequence);
+      unshift @seq,"";	# @seq is now 1-based
+      $query_seqs{$header} =  \@seq;
+    }
+  }
+  return \%query_seqs;
+}
+
+sub parse_query_file {
+  my ($query_file) = @_;
+
+  my $seq_data = "";
+
+  open(my $qfd, $query_file);
+  while (my $line = <$qfd>) {
+    next if $line =~ m/^>/;
+    next if $line =~ m/^;/;
+    chomp $line;
+    $line =~ s/[^A-Za-z\*]//g;
+    $seq_data .= $line
+  }
+
+  $seq_data = uc($seq_data);
+
+  my @seq = split(//,$seq_data); 
+
+  return \@seq;
+}
+
+sub init_blosum62 {
+
+  my @ncbi_blaa = qw(    A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  * );
+
+  $blosum62[ 0] = [ qw(  4 -1 -2 -2  0 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -3 -2  0 -2 -1  0 -4) ]; # A
+  $blosum62[ 1] = [ qw( -1  5  0 -2 -3  1  0 -2  0 -3 -2  2 -1 -3 -2 -1 -1 -3 -2 -3 -1  0 -1 -4) ]; # R
+  $blosum62[ 2] = [ qw( -2  0  6  1 -3  0  0  0  1 -3 -3  0 -2 -3 -2  1  0 -4 -2 -3  3  0 -1 -4) ];
+  $blosum62[ 3] = [ qw( -2 -2  1  6 -3  0  2 -1 -1 -3 -4 -1 -3 -3 -1  0 -1 -4 -3 -3  4  1 -1 -4) ];
+  $blosum62[ 4] = [ qw(  0 -3 -3 -3  9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4) ];
+  $blosum62[ 5] = [ qw( -1  1  0  0 -3  5  2 -2  0 -3 -2  1  0 -3 -1  0 -1 -2 -1 -2  0  3 -1 -4) ];
+  $blosum62[ 6] = [ qw( -1  0  0  2 -4  2  5 -2  0 -3 -3  1 -2 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4) ];
+  $blosum62[ 7] = [ qw(  0 -2  0 -1 -3 -2 -2  6 -2 -4 -4 -2 -3 -3 -2  0 -2 -2 -3 -3 -1 -2 -1 -4) ];
+  $blosum62[ 8] = [ qw( -2  0  1 -1 -3  0  0 -2  8 -3 -3 -1 -2 -1 -2 -1 -2 -2  2 -3  0  0 -1 -4) ];
+  $blosum62[ 9] = [ qw( -1 -3 -3 -3 -1 -3 -3 -4 -3  4  2 -3  1  0 -3 -2 -1 -3 -1  3 -3 -3 -1 -4) ];
+  $blosum62[10] = [ qw( -1 -2 -3 -4 -1 -2 -3 -4 -3  2  4 -2  2  0 -3 -2 -1 -2 -1  1 -4 -3 -1 -4) ];
+  $blosum62[11] = [ qw( -1  2  0 -1 -3  1  1 -2 -1 -3 -2  5 -1 -3 -1  0 -1 -3 -2 -2  0  1 -1 -4) ];
+  $blosum62[12] = [ qw( -1 -1 -2 -3 -1  0 -2 -3 -2  1  2 -1  5  0 -2 -1 -1 -1 -1  1 -3 -1 -1 -4) ];
+  $blosum62[13] = [ qw( -2 -3 -3 -3 -2 -3 -3 -3 -1  0  0 -3  0  6 -4 -2 -2  1  3 -1 -3 -3 -1 -4) ];
+  $blosum62[14] = [ qw( -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4  7 -1 -1 -4 -3 -2 -2 -1 -2 -4) ];
+  $blosum62[15] = [ qw(  1 -1  1  0 -1  0  0  0 -1 -2 -2  0 -1 -2 -1  4  1 -3 -2 -2  0  0  0 -4) ];
+  $blosum62[16] = [ qw(  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  1  5 -2 -2  0 -1 -1  0 -4) ];
+  $blosum62[17] = [ qw( -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1  1 -4 -3 -2 11  2 -3 -4 -3 -2 -4) ];
+  $blosum62[18] = [ qw( -2 -2 -2 -3 -2 -1 -2 -3  2 -1 -1 -2 -1  3 -3 -2 -2  2  7 -1 -3 -2 -1 -4) ];
+  $blosum62[19] = [ qw(  0 -3 -3 -3 -1 -2 -2 -3 -3  3  1 -2  1 -1 -2 -2  0 -3 -1  4 -3 -2 -1 -4) ];
+  $blosum62[20] = [ qw( -2 -1  3  4 -3  0  1 -1  0 -3 -4  0 -3 -3 -2  0 -1 -4 -3 -3  4  1 -1 -4) ];
+  $blosum62[21] = [ qw( -1  0  0  1 -3  3  4 -2  0 -3 -3  1 -1 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4) ];
+  $blosum62[22] = [ qw(  0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2  0  0 -2 -1 -1 -1 -1 -1 -4) ];
+  $blosum62[23] = [ qw( -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4  1) ];
+
+
+  die "blosum62 length mismatch $#blosum62 != $#ncbi_blaa" if (scalar(@blosum62) != scalar(@ncbi_blaa));
+
+  for (my $i=0; $i < scalar(@ncbi_blaa); $i++) {
+    $aa_map{$ncbi_blaa[$i]} = $i;
+    $blosum62_diag[$i] = $blosum62[$i][$i];
+  }
+
+  ($g_open, $g_ext) = (-11, -1);
+}
+
+# given: (1) a query sequence; (2) an encoded alignment; (3) a scoring matrix
+# calculate a score
+
+sub alignment_score {
+  my ($query_r, $query_start, $btop_align_r, $matrix_2d) = @_;
+
+  my ($gap0, $gap1) = (0,0);
+
+  my $qix = $query_start-1; # start from zero
+
+  my ($score, $m_score) = 0;
+  my ($seq0, $seq1) = ("","");
+  for my $btop (@{$btop_align_r}) {
+    if ($btop =~ m/^\d+$/) {  # matching query sequence, add it up
+      for (my $i=0; $i < $btop; $i++) {
+	$score += $matrix_2d->{$query_r->[$qix]}{$query_r->[$qix]};
+	$qix++;
+      }
+    }
+    else {
+      ($seq0, $seq1) = split(//,$btop);
+      if ($btop=~ m/\-/) {
+	if ($seq0 eq '-') {
+	  if ($gap0) { $score += $g_ext;}
+	  else { $score += $g_open+$g_ext;}
+	  $gap0 = 1;
+	}
+	else {
+	  if ($gap1) { $score += $g_ext;}
+	  else { $score += $g_open+$g_ext;}
+	  $gap1 = 1;
+	  $qix++;
+	}
+      }
+      else {
+	$score += $matrix_2d->{$seq0}{$seq1};
+	$gap1=$gap0 = 0;
+	$qix++;
+      }
+    }
+  }
+  return $score;
+}
+
+################################################################
+# sub_alignment_score()
+# input: $query_r : a query sequence;
+#        $hit_r->{BTOP} : an encoded alignment;
+#        $matrix_2d, $matrix_diag : a scoring matrix
+#        $domain_r : domain boundaries in query (target=0) or subject (target=1)
+#        $target : 0=query, 1=target
+#
+# calculate a score
+# updates $domain_r in place with new values:
+# domain_r->[]->{ident} (as fraction identical),
+#             ->{score} --matrix raw similarity score
+#             ->{qa_start,qa_end}  domain boundaries in query
+#             ->{sa_start, sa_end} domain boundaries in subject
+#
+sub sub_alignment_score {
+  my ($query_r, $hit_r, $matrix_2d, $matrix_diag, $domain_r, $target) = @_;
+
+  return (0, $domain_r) unless ($domain_r && scalar(@$domain_r));
+
+  my $btop_enc_r = decode_btop($hit_r->{BTOP});
+
+  my ($gap0, $gap1) = (0,0);
+
+  my @active_dom_list = ();
+  my @aligned_domains = ();
+
+  my $left_active_end = $domain_r->[-1]->{d_end}+1;	# as far right as possible
+  my ($q_start, $s_start, $h_start, $h_end) = @{$hit_r}{qw(q_start s_start s_start s_end)};
+  my ($qix, $six)  = ($q_start, $s_start); # $qix now starts from 1, like $ssix;
+
+  my $ds_ix = \$six;	# use to track the subject position
+  # reverse coordinate names if $target==0
+  unless ($target) {
+    $ds_ix = \$qix;	# track query position
+    $h_start = $hit_r->{q_start};
+    $h_end = $hit_r->{q_end};
+  }
+
+  my ($score, $m_score) = 0;
+  my ($seq0, $seq1) = ("","");
+
+  # find the first overlapping domain
+  my ($dom_ix, $dom_nx) = (0,scalar(@$domain_r));
+  my $dom_r = $domain_r->[0];
+
+  # skip over domains that do not overlap alignment
+  # capture first domain that alignment overlaps
+  for ($dom_ix=0; $dom_ix < $dom_nx; $dom_ix++) {
+    if ($domain_r->[$dom_ix]->{d_end} >= $h_start) {  # if {d_end} < $_start, cannot overlap
+      $dom_r = $domain_r->[$dom_ix];
+      if ($dom_r->{d_pos} <= $h_start) {  # {d_pos} is less, {d_end} is greater, overlap
+	push @aligned_domains, $dom_r;
+	$left_active_end = push_annot_match(\@active_dom_list, $dom_r, $q_start, $s_start, 0, 0);
+      }
+      else { last; }
+    }
+  }
+
+  my ($dom_score, $id_cnt) = (0,0);
+
+  for my $btop (@{$btop_enc_r}) {
+
+    if ($btop =~ m/^\d+$/) {  # matching query sequence, add it up
+      for (my $i=0; $i < $btop; $i++) {  # $i is used to count through BTOP, not to index anything.
+
+	my $seq0_map = $aa_map{'X'};
+	unless ($query_r->[$qix]) {
+	  warn "qix: $qix out of range";
+	}
+	else {
+	  $seq0_map = $aa_map{$query_r->[$qix]} if exists($aa_map{$query_r->[$qix]});
+#	  print "$qix:$six : ",$query_r->[$qix],"\n";
+	}
+
+	$m_score = $matrix_diag->[$seq0_map];
+	$score += $m_score;
+
+	if ($dom_ix < $dom_nx && $$ds_ix == $dom_r->{d_pos}) {
+	  push @aligned_domains, $dom_r;
+	  $left_active_end = push_annot_match(\@active_dom_list, $dom_r, $qix, $six, $id_cnt, $dom_score);
+	  $dom_ix++;
+	  $dom_r = $domain_r->[$dom_ix];
+	  ($dom_score, $id_cnt) = (0,0);
+	}
+
+	if (@active_dom_list) {
+	  $dom_score += $m_score;
+	  $id_cnt++;
+	  if ($$ds_ix == $left_active_end) {
+	    $left_active_end = pop_annot_match(\@active_dom_list, $qix, $six, $$ds_ix, $id_cnt, $dom_score);
+	    $dom_score = $id_cnt = 0;
+	  }
+	}
+
+	$qix++;
+	$six++;
+	$gap0 = $gap1 = 0;
+      }
+    }
+    else {
+      ($seq0, $seq1) = split(//,$btop);
+
+#      print "$qix:$six : $btop\n";
+
+      if ($btop=~ m/\-/) {
+	if ($seq0 eq '-') {  # gap in seq0
+	  if ($gap0) {
+	    $m_score = $g_ext;
+	  }
+	  else {
+	    $m_score = $g_open+$g_ext;
+	    $gap0 = 1;
+	  }
+
+	  $score += $m_score;
+
+	  if ($target) {	# subject domains
+	    if ($dom_ix < $dom_nx && $$ds_ix == $dom_r->{d_pos}) {
+	      push @aligned_domains, $dom_r;
+	      $left_active_end = push_annot_match(\@active_dom_list, $dom_r, $qix, $six, $id_cnt, $dom_score);
+	      $dom_ix++;
+	      $dom_r = $domain_r->[$dom_ix];
+	      ($dom_score, $id_cnt) = (0,0);
+	    }
+	    if (@active_dom_list) {
+	      $dom_score += $m_score;
+	      if ($$ds_ix == $left_active_end) {
+		$left_active_end = pop_annot_match(\@active_dom_list, $qix, $six, $$ds_ix, $id_cnt, $dom_score);
+		$dom_score = $id_cnt = 0;
+	      }
+	    }
+	  }
+	  $six++;
+	}
+	else {  # gap in seq1
+	  if ($gap1) {
+	    $m_score = $g_ext;
+	  }
+	  else {
+	    $m_score = $g_open+$g_ext;
+	    $gap1 = 1;
+	  }
+	  $score += $m_score;
+
+	  unless ($target) {	# query domains
+	    if ($dom_ix < $dom_nx && $$ds_ix == $dom_r->{d_pos}) {
+	      push @aligned_domains, $dom_r;
+	      $left_active_end = push_annot_match(\@active_dom_list, $dom_r, $qix, $six, $id_cnt, $dom_score);
+	      $dom_ix++;
+	      $dom_r = $domain_r->[$dom_ix];
+	      ($dom_score, $id_cnt) = (0,0);
+	    }
+
+	    if (@active_dom_list) {
+	      $dom_score += $m_score;
+	      if ($$ds_ix == $left_active_end) {
+		$left_active_end = pop_annot_match(\@active_dom_list, $qix, $six, $$ds_ix, $id_cnt, $dom_score);
+		$dom_score = $id_cnt = 0;
+	      }
+	    }
+	  }
+	  $qix++;
+	}
+      }
+      else {	# mismatch
+	my ($seq0_map, $seq1_map) = ($aa_map{$seq0},$aa_map{$seq1});
+	$seq0_map = $aa_map{'X'} unless defined($seq0_map);
+	$seq1_map = $aa_map{'X'} unless defined($seq1_map);
+
+	$m_score = $matrix_2d->[$seq0_map][$seq1_map];
+	$score += $m_score;
+	if ($dom_ix < $dom_nx && $$ds_ix == $dom_r->{d_pos}) {
+	  push @aligned_domains, $dom_r;
+	  $left_active_end = push_annot_match(\@active_dom_list, $dom_r, $qix, $six, $id_cnt, $dom_score);
+	  $dom_ix++;
+	  $dom_r = $domain_r->[$dom_ix];
+	  ($dom_score, $id_cnt) = (0,0);
+	}
+
+	if (@active_dom_list) {
+	  $dom_score += $m_score;
+	  if ($$ds_ix == $left_active_end) {
+	    $left_active_end = pop_annot_match(\@active_dom_list, $qix, $six, $$ds_ix, $id_cnt, $dom_score);
+	    $dom_score = $id_cnt = 0;
+	  }
+	}
+	$qix++;
+	$six++;
+	$gap0 = $gap1 = 0;
+      }
+    }
+#    print join(":",($qix, $six, $score)),"\n";
+  }
+
+  # all done, finish any domain stuff
+  if (@active_dom_list) {
+    last_annot_match(\@active_dom_list, $hit_r->{q_end}, $hit_r->{s_end}, $id_cnt, $dom_score);
+  }
+
+  return ($score, \@aligned_domains);
+}
+
+################################################################
+# sub_alignment_pos
+# input: $hit_r->{BTOP} : an encoded alignment;
+#        $domain_r : domain boundaries in query (target=0) or subject (target=1)
+#        $target : 0=query, 1=target
+#
+# updates $domain_r in place with new values:
+# domain_r->[]->{ident} (as fraction identical),
+#             ->{sa_start, sa_end} domain boundaries in subject
+#
+sub sub_alignment_pos {
+  my ($hit_r, $domain_r, $target) = @_;
+
+  return (0, $domain_r) unless ($domain_r && scalar(@$domain_r));
+
+  my $btop_enc_r = decode_btop($hit_r->{BTOP});
+
+  my ($gap0, $gap1) = (0,0);
+
+  my @active_dom_list = ();
+  my @aligned_domains = ();
+
+  my $left_active_end = $domain_r->[-1]->{d_end}+1;	# as far right as possible
+  my ($q_start, $s_start, $h_start) = @{$hit_r}{qw(q_start s_start s_start)};
+  my ($qix, $six)  = ($q_start, $s_start); # $qix now starts from 1, like $ssix;
+
+  my $ds_ix = \$six;	# use to track the subject position
+  # reverse coordinate names if $target==0
+  unless ($target) {
+    $ds_ix = \$qix;	# track query position
+    $h_start = $hit_r->{q_start};
+  }
+
+  my ($score, $m_score) = 0;
+  my ($seq0, $seq1) = ("","");
+
+  # find the first overlapping domain
+  my ($dom_ix, $dom_nx) = (0,scalar(@$domain_r));
+  my $dom_r = $domain_r->[0];
+
+  # skip over domains that do not overlap alignment
+  # capture first domain that alignment overlaps
+  for ($dom_ix=0; $dom_ix < $dom_nx; $dom_ix++) {
+    if ($domain_r->[$dom_ix]->{d_end} >= $h_start) {  # if {d_end} < $_start, cannot overlap
+      $dom_r = $domain_r->[$dom_ix];
+      if ($dom_r->{d_pos} <= $h_start) {  # {d_pos} is less, {d_end} is greater, overlap
+	push @aligned_domains, $dom_r;
+	$left_active_end = push_annot_match(\@active_dom_list, $dom_r, $q_start, $s_start, 0, 0);
+      }
+      else { last; }
+    }
+  }
+
+  my ($dom_score, $id_cnt) = (0,0);
+
+  for my $btop (@{$btop_enc_r}) {
+
+    if ($btop =~ m/^\d+$/) {  # matching query sequence, add it up
+      for (my $i=0; $i < $btop; $i++) {  # $i is used to count through BTOP, not to index anything.
+
+	if ($dom_ix < $dom_nx && $$ds_ix == $dom_r->{d_pos}) {
+	  push @aligned_domains, $dom_r;
+	  $left_active_end = push_annot_match(\@active_dom_list, $dom_r, $qix, $$ds_ix, $id_cnt, $dom_score);
+	  $dom_ix++;
+	  $dom_r = $domain_r->[$dom_ix];
+	  ($dom_score, $id_cnt) = (0,0);
+	}
+	if (@active_dom_list) {
+	  $id_cnt++;
+	  if ($$ds_ix == $left_active_end) {
+	    $left_active_end = pop_annot_match(\@active_dom_list, $qix, $six, $$ds_ix, $id_cnt, $dom_score);
+	    $dom_score = $id_cnt = 0;
+	  }
+	}
+
+	$qix++;
+	$six++;
+	$gap0 = $gap1 = 0;
+      }
+    }
+    else {
+      ($seq0, $seq1) = split(//,$btop);
+
+#      print "$qix:$six : $btop\n";
+
+      if ($btop=~ m/\-/) {
+	if ($seq0 eq '-') {  # gap in seq0
+
+	  if ($target) {	# subject domains
+	    if ($dom_ix < $dom_nx && $$ds_ix == $dom_r->{d_pos}) {
+	      push @aligned_domains, $dom_r;
+	      $left_active_end = push_annot_match(\@active_dom_list, $dom_r, $qix, $$ds_ix, $id_cnt, $dom_score);
+	      $dom_ix++;
+	      $dom_r = $domain_r->[$dom_ix];
+	      ($dom_score, $id_cnt) = (0,0);
+	    }
+	    if (@active_dom_list) {
+	      if ($dom_ix < $dom_nx && $$ds_ix == $left_active_end) {
+		$left_active_end = pop_annot_match(\@active_dom_list, $qix, $six, $$ds_ix, $id_cnt, $dom_score);
+		$dom_score = $id_cnt = 0;
+	      }
+	    }
+	  }
+	  $six++;
+	}
+	else {  # gap in seq1
+
+	  unless ($target) {	# query domains
+	    if ($dom_ix < $dom_nx && $$ds_ix == $dom_r->{d_pos}) {
+	      push @aligned_domains, $dom_r;
+	      $left_active_end = push_annot_match(\@active_dom_list, $dom_r, $qix, $$ds_ix, $id_cnt, $dom_score);
+	      $dom_ix++;
+	      $dom_r = $domain_r->[$dom_ix];
+	      ($dom_score, $id_cnt) = (0,0);
+	    }
+	    if (@active_dom_list) {
+	      $dom_score += $m_score;
+	      if ($dom_ix < $dom_nx && $$ds_ix == $left_active_end) {
+		$left_active_end = pop_annot_match(\@active_dom_list, $qix, $six, $$ds_ix, $id_cnt, $dom_score);
+		$dom_score = $id_cnt = 0;
+	      }
+	    }
+	  }
+	  $qix++;
+	}
+      }
+      else {	# mismatch
+	my ($seq0_map, $seq1_map) = ($aa_map{$seq0},$aa_map{$seq1});
+	if ($dom_ix < $dom_nx && $$ds_ix == $dom_r->{d_pos}) {
+	  push @aligned_domains, $dom_r;
+	  $left_active_end = push_annot_match(\@active_dom_list, $dom_r, $qix, $$ds_ix, $id_cnt, $dom_score);
+	  $dom_ix++;
+	  $dom_r = $domain_r->[$dom_ix];
+	  ($dom_score, $id_cnt) = (0,0);
+	}
+	if (@active_dom_list) {
+	  if ($$ds_ix == $left_active_end) {
+	    $left_active_end = pop_annot_match(\@active_dom_list, $qix, $six, $$ds_ix, $id_cnt, $dom_score);
+	    $dom_score = $id_cnt = 0;
+	  }
+	}
+	$qix++;
+	$six++;
+	$gap0 = $gap1 = 0;
+      }
+    }
+#    print join(":",($qix, $six, $score)),"\n";
+  }
+
+  # all done, finish any domain stuff
+  if (@active_dom_list) {
+    last_annot_match(\@active_dom_list, $hit_r->{q_end}, $hit_r->{s_end}, $id_cnt, $dom_score);
+  }
+
+  return ($score, \@aligned_domains);
+}
+
+################
+# push_annot_match - adds domain to set of @$active_doms_r,
+#		     update ->{score}, ->{ident} for existing @$active_doms_r
+#		     initialize ->{score}, ->{ident} to zero for new domain
+#		     insert (splice) new domain in list ordered left-to-right by ->{d_end}
+#                    returns current left-most {d_end} boundary
+#
+sub push_annot_match {
+  my ($active_doms_r, $dom_r, $q_pos, $s_pos, $c_ident, $c_score) = @_;
+
+  $dom_r->{ident} = 0;
+  $dom_r->{score} = 0;
+  $dom_r->{qa_start} = $dom_r->{qa_pos} = $q_pos;
+  $dom_r->{sa_start} = $dom_r->{sa_pos} = $s_pos;
+
+  # no previous domains, just initialize
+  unless (scalar(@$active_doms_r)) {
+    push @$active_doms_r, $dom_r;
+    return $dom_r->{d_end};
+  }
+
+  # some previous domains, update score, identity for domains in list
+  # also find insertion point
+  my $nx = scalar(@$active_doms_r);
+  my $min_ix = $nx;
+  for (my $ix=0; $ix < $nx; $ix++) {
+    $active_doms_r->[$ix]->{ident} += $c_ident;
+    $active_doms_r->[$ix]->{score} += $c_score;
+    if ($dom_r->{d_end} < $active_doms_r->[$ix]->{d_end}) {
+      $min_ix = $ix;
+    }
+  }
+
+  # now have location for insert
+  splice(@$active_doms_r, $min_ix, 0, $dom_r);
+  return $active_doms_r->[0]->{d_end};
+}
+
+################
+# pop_annot_match - update domains in @$active_doms_r 
+#		    update: ->{ident}, ->{score}
+#		    add: ->{qa_end},->{sa_end}
+#                   remove all domains that end at $s_ix and convert {ident} count to fraction
+#                   return left-most right boundary
+
+sub pop_annot_match {
+  my ($active_doms_r, $q_pos, $s_pos, $d_pos, $c_ident, $c_score) = @_;
+
+  my $nx = scalar(@$active_doms_r);
+
+  # we know the left most (first) domain matches,
+  my $pop_count = 0;
+  for my $cur_r (@$active_doms_r) {
+    $cur_r->{ident} += $c_ident;
+    $cur_r->{score} += $c_score;
+    $pop_count++ if ($cur_r->{d_end} == $d_pos);
+  }
+
+  while ($pop_count-- > 0) {
+    my $cur_r = shift @$active_doms_r;
+    # convert identity count to identity fraction
+    $cur_r->{percid} = $cur_r->{ident}/($cur_r->{d_end} - $cur_r->{d_pos}+1);
+    $cur_r->{qa_end} = $cur_r->{qa_pos} = $q_pos;
+    $cur_r->{sa_end} = $cur_r->{sa_pos} = $s_pos;
+  }
+
+  if (scalar(@$active_doms_r)) {
+    my $leftmost_end = $active_doms_r->[0]->{d_end};
+    for (my $lix = 1; $lix < scalar(@$active_doms_r); $lix++) {
+      if ($active_doms_r->[$lix]->{d_end} < $leftmost_end) {
+	$leftmost_end = $active_doms_r->[$lix]->{d_end};
+      }
+    }
+    return $leftmost_end;
+  }
+  else {
+    return -1;
+  }
+}
+
+sub last_annot_match {
+  my ($active_doms_r, $q_pos, $s_pos, $c_ident, $c_score) = @_;
+
+  my $nx = scalar(@$active_doms_r);
+
+  # we know the left most (first) domain matches,
+  my $pop_count = 0;
+  for my $cur_r (@$active_doms_r) {
+    $cur_r->{ident} += $c_ident;
+    $cur_r->{score} += $c_score;
+    $cur_r->{percid} = $cur_r->{ident}/($cur_r->{d_end} - $cur_r->{d_pos}+1);
+    $cur_r->{qa_end} = $cur_r->{qa_pos} = $q_pos;
+    $cur_r->{sa_end} = $cur_r->{sa_pos} = $s_pos;
+  }
+
+  $active_doms_r = [];
+}
+
+# given: (1) a query sequence; (2) an encoded alignment; (3) a scoring matrix
+# report matches/mismatches on annotated sites
+# updates $site_r->[]->{q_coord, s_coord}
+#                    ->{q_res, s_res}
+
+sub site_align {
+  my ($query_r, $hit_r, $matrix_2d, $site_r, $target) = @_;
+
+  return [] unless ($site_r && scalar(@$site_r));
+
+  my @aligned_sites = ();
+
+  my $btop_enc_r = decode_btop($hit_r->{BTOP});
+
+  my ($q_start, $q_end, $s_start, $s_end) = @{$hit_r}{qw(q_start q_end s_start s_end)};
+  my ($qix, $six)  = ($q_start, $s_start); # $qix, $six 1-based
+  my $ds_ix = \$six;	# use to track the subject position
+
+  unless ($target) {
+    ($q_start, $q_end, $s_start, $s_end) = @{$hit_r}{qw(s_start s_end q_start q_end)};
+    $ds_ix = \$qix;	# track query position
+  }
+
+  my ($seq0, $seq1) = ("","");
+
+  # find the first overlapping domain
+
+  my ($site_ix, $site_nx) = (0,scalar(@$site_r));
+  my $s_r = $site_r->[0];
+
+  # skip over sites that do not overlap alignment
+  for ($site_ix=0; $site_ix < $site_nx; $site_ix++) {
+    if ($site_r->[$site_ix]->{d_pos} >= $s_start) {  # find the first site inside alignment
+      $s_r = $site_r->[$site_ix];
+      last;
+    }
+  }
+
+  return [] unless $site_ix < $site_nx;
+
+  for my $btop (@{$btop_enc_r}) {
+    last if ($site_ix >= $site_nx);
+    if ($btop =~ m/^\d+$/) {  # matching query sequence, check for sites within current region
+      my $bt_end = $$ds_ix + $btop - 1;
+      if ($bt_end < $s_r->{d_pos}) {  # no site in identical region
+	$qix += $btop;
+	$six += $btop;
+      }
+      else {      # yes site in region, jump to it
+	my $c_pos;
+	while ($site_ix < $site_nx && $s_r->{d_pos} <= $bt_end) {
+	  $c_pos = $$ds_ix;	# must be inside loop because $ds_ix points to $qix or $six
+	  $qix += $s_r->{d_pos} - $c_pos;	# jump forward to site
+	  $six += $s_r->{d_pos} - $c_pos;	# jump forward to site
+	  $seq0 = $query_r->[$qix];
+
+	  @{$s_r}{qw(annot_ix qa_pos sa_pos q_res s_res m_symb d_end)} = ($site_ix, $qix, $six, $seq0, $seq0, match_symb($seq0, $seq0, $matrix_2d));
+	  push @aligned_sites, $s_r;
+	  $site_ix++;
+	  $s_r=$site_r->[$site_ix];
+	}
+	# past the last site annotation, but not done with $btop;
+	$c_pos = ($bt_end - $$ds_ix + 1);
+	$qix += $c_pos;
+	$six += $c_pos;
+      }
+    }
+    else {	# sequence does not match -- must check each position
+      ($seq0, $seq1) = split(//,$btop);
+      if ($btop =~ m/\-/) {
+	if ($seq0 eq '-') {
+	  if ($target) {
+	    while ($site_ix < $site_nx && $s_r->{d_pos} == $six) {
+	      @{$s_r}{qw(annot_ix qa_pos sa_pos q_res s_res m_symb)} = ($site_ix, $qix, $six, $seq0, $seq1, match_symb($seq0, $seq1, $matrix_2d));
+	      push @aligned_sites, $s_r;
+	      $site_ix++;
+	      $s_r=$site_r->[$site_ix];
+	    }
+	  }
+	  $six++;
+	}
+	else {  # gap in seq1, cannot match domain
+	  unless ($target) {
+	    while ($site_ix < $site_nx && $s_r->{d_pos} == $qix) {
+	      @{$s_r}{qw(annot_ix qa_pos sa_pos q_res s_res m_symb)} = ($site_ix, $qix, $six, $seq0, $seq1, match_symb($seq0, $seq1, $matrix_2d));
+	      push @aligned_sites, $s_r;
+	      $site_ix++;
+	      $s_r=$site_r->[$site_ix];
+	    }
+	  }
+	  $qix++;
+	}
+      }
+      else {	# mismatch; $btop string is twice length of covered region
+	while ($s_r->{d_pos} == $$ds_ix && $site_ix < $site_nx ) {
+	  @{$s_r}{qw(annot_ix qa_pos sa_pos q_res s_res m_symb)} = ($site_ix, $qix, $six, $seq0, $seq1, match_symb($seq0, $seq1, $matrix_2d));
+	  push @aligned_sites, $s_r;
+	  $site_ix++;  $s_r=$site_r->[$site_ix];
+	}
+	$qix++;
+	$six++;
+      }
+    }
+  }
+
+  return (\@aligned_sites);
+}
+
+sub match_symb {
+  my ($seq0, $seq1, $matrix_2d) = @_;
+
+  if (uc($seq0) eq uc($seq1)) {
+    return "=";
+  }
+  else {
+    my $seq0_map = $aa_map{$seq0};
+    $seq0_map = $aa_map{'X'} unless defined($seq0_map);
+
+    my $seq1_map = $aa_map{$seq1};
+    $seq1_map = $aa_map{'X'} unless defined($seq1_map);
+
+    my $m_score = $matrix_2d->[$seq0_map][$seq1_map];
+
+    if ($m_score < 0) {return "<";}
+    elsif ($m_score > 0) {return ">";}
+    else {return "z";}
+  }
+}
+
+
+
+# merge up to four lists of annotations into a single list, and return
+# a reference to the list
+# input: $hit references, possibly with {aligned_domains_r}, {aligned_sites_r}
+#                                       {q_aligned_domains_r}, {q_aligned_sites_r}
+#
+sub merge_annots {
+  my ($hit_r) = @_;
+
+  my @merged_array = ();
+
+  # merge the sites arrays first, so that conserved annotated sites are juxtaposed
+
+  my ($qs_ix, $ss_ix, $qs_nx, $ss_nx) = (0,0,0,0);
+
+  $ss_nx = scalar(@{$hit_r->{aligned_sites_r}}) if (exists($hit_r->{aligned_sites_r}));
+  $qs_nx = scalar(@{$hit_r->{q_aligned_sites_r}}) if (exists($hit_r->{q_aligned_sites_r}));
+
+  if ($ss_nx && $qs_nx) {  # have sites on both sequences
+    # find out how many positions match between {q_aligned_sites_r} and {aligned_sites_r}
+
+    my @uniq_sites = ();
+
+    for my $qs_ref (@{$hit_r->{q_aligned_sites_r}}) {
+      $qs_ref->{merged} = 0;
+      for my $ss_ref ( @{$hit_r->{aligned_sites_r}} ) {
+	next if ($ss_ref->{qa_pos} < $qs_ref->{qa_pos});
+	last if ($ss_ref->{qa_pos} > $qs_ref->{qa_pos});
+	if ($qs_ref->{qa_pos} == $ss_ref->{qa_pos} && $qs_ref->{type} eq $ss_ref->{type}) {
+	  $qs_ref->{merged} = $ss_ref->{merged} = 1;
+	  $qs_ref->{target} = $ss_ref->{target} = 2;
+	  # save match
+	  push @uniq_sites, $qs_ref;
+	}
+      }
+    }
+
+    # save merged sites
+    push @merged_array, @uniq_sites;
+
+    # save unmerged subject
+    @uniq_sites = ();
+    for my $ss_ref ( @{$hit_r->{aligned_sites_r}} ) {
+      push @uniq_sites, $ss_ref if (!defined($ss_ref->{merged}) || $ss_ref->{merged} == 0);
+    }
+    push @merged_array, @uniq_sites;
+
+    # save unmerged query
+    @uniq_sites = ();
+    for my $qs_ref ( @{$hit_r->{aligned_sites_r}} ) {
+      push @uniq_sites, $qs_ref if (!defined($qs_ref->{merged}) || $qs_ref->{merged} == 0);
+    }
+    push @merged_array, @uniq_sites;
+  }
+  elsif ($ss_nx) {
+    push @merged_array, @{$hit_r->{aligned_sites_r}};
+  }
+  elsif ($qs_nx) {
+    push @merged_array, @{$hit_r->{q_aligned_sites_r}};
+  }
+
+#  for my $ann_r ( @merged_array) {
+#    unless ($ann_r->{qa_pos}) {
+#      print STDERR "missing qa_pos:",join(":",@{$ann_r}{qw(q_seqid s_seqid)}),"\n";
+#    }
+#  }
+
+  @merged_array = sort { $a->{qa_pos} <=> $b->{qa_pos} } @merged_array;
+
+
+  push @merged_array, @{$hit_r->{aligned_domains_r}} if (exists($hit_r->{aligned_domains_r}));
+  push @merged_array, @{$hit_r->{q_aligned_domains_r}} if (exists($hit_r->{q_aligned_domains_r}));
+
+  @merged_array = sort { $a->{qa_pos} <=> $b->{qa_pos} } @merged_array;
+
+  return \@merged_array;
+}
+
+# domain output formatter
+sub format_dom_info {
+  my ($hit_r, $raw_score, $dom_r) = @_;
+
+  unless ($raw_score) {
+    warn "no raw_score at: ".$hit_r->{s_seqid}."\n";
+    $raw_score = $hit_r->{score};
+  }
+
+  my ($score_scale, $fsub_score) = ($hit_r->{score}/$raw_score, $dom_r->{score}/$raw_score);
+
+  my $qval = 0.0;
+  if ($hit_r->{evalue} == 0.0) {
+    $qval = 3000.0
+  }
+  else {
+    $qval = -10.0*log($hit_r->{evalue})*$fsub_score/(log(10.0))
+  }
+
+  my ($ns_score, $s_bit) = (int($dom_r->{score} * $score_scale+0.5),
+			    int($hit_r->{bits} * $fsub_score +0.5),
+			   );
+  $qval = 0 if $qval < 0;
+
+  #	print join(":",($dom_r->{ad_pos},$dom_r->{ad_end},$ns_score, $s_bit, sprintf("%.1f",$qval))),"\n";
+  return join(";",(sprintf("|XR:%d-%d:%d-%d:s=%d",
+			   $dom_r->{qa_start},$dom_r->{qa_end},
+			   $dom_r->{sa_start},$dom_r->{sa_end},$ns_score),
+		   sprintf("b=%.1f",$s_bit),
+		   sprintf("I=%.3f",$dom_r->{percid}),
+		   sprintf("Q=%.1f",$qval),$dom_r->{descr}));
+}
+
+# merged annot output formatter
+sub format_annot_info {
+  my ($hit_r, $annot_list_r) = @_;
+
+  my $raw_score = 0;
+
+  if  ($hit_r->{raw_score} ) {
+    $raw_score = $hit_r->{raw_score};
+  }
+  else {
+#    warn "no raw_score at: ".$hit_r->{s_seqid}."\n";
+    $raw_score = $hit_r->{score};
+  }
+
+  my $score_scale = $hit_r->{score}/$raw_score;
+
+  my $annot_str = "";
+
+  # two types of annotations, domains and sites.
+
+  for my $annot_r ( @$annot_list_r ) {
+
+    if ($annot_r->{type} eq '-') { # domain with scores
+      my $fsub_score = $annot_r->{score}/$raw_score;
+
+      my $qval = 0.0;
+      if ($hit_r->{evalue} == 0.0) {
+	$qval = 3000.0
+      } else {
+	$qval = -10.0*log($hit_r->{evalue})*$fsub_score/(log(10.0))
+      }
+
+      my ($ns_score, $s_bit) = (int($annot_r->{score} * $score_scale+0.5),
+				int($hit_r->{bits} * $fsub_score +0.5),
+			       );
+      $qval = 0 if $qval < 0;
+
+      $annot_str .= join(";",(sprintf("|%s:%d-%d:%d-%d:s=%d",
+				      $annot_r->{target} ? "XR" : "RX",
+				      $annot_r->{qa_start},$annot_r->{qa_end},
+				      $annot_r->{sa_start},$annot_r->{sa_end},$ns_score),
+			      sprintf("b=%.1f",$s_bit),
+			      sprintf("I=%.3f",$annot_r->{percid}),
+			      sprintf("Q=%.1f",$qval),$annot_r->{descr}));
+    }
+    else {	# site annotation
+      my $ann_type = $annot_r->{type};
+      my $site_str = "|".$ann_type . "X";
+      if ($annot_r->{target} == 1) {
+	$site_str = "|X".$ann_type;
+      }
+      elsif ($annot_r->{target} == 2) {
+	$site_str = "|$ann_type$ann_type";
+      }
+
+      $annot_str .= "$site_str:" . sprintf("%d%s%s%d%s",
+					   $annot_r->{qa_pos}, $annot_r->{q_res}, $annot_r->{m_symb}, $annot_r->{sa_pos}, $annot_r->{s_res});
+
+    }
+  }
+  return $annot_str;
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+annot_blast_btop2.pl
+
+=head1 SYNOPSIS
+
+ annot_blast_btop2 --ann_script ann_pfam_www_e.pl [--query_file query.fasta] --out_fields "q_seqid s_seqid percid evalue" blast_tabular_file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+
+ --ann_script -- annotation script returning site/domain locations for subject sequences
+              -- same as --script
+
+ --q_ann_script -- annotation script for query sequences
+                -- same as --q_script
+
+ --query_file -- fasta query sequence
+              -- same as --query, --query_lib
+                 (can contain multiple sequences for multi-sequence search)
+
+ --out_fields -- blast tabular fields shown before domain information
+
+ --raw_score -- add the raw_score used to normalized domain scores to
+                tabular output (raw_scores are only calculated for domains)
+
+=head1 DESCRIPTION
+
+C<annot_blast_btop2.pl> runs the script specified by
+C<--ann_script/--q_ann_script> to annotate functional sites domain
+content of the sequences specified by the subject/query seqid field of
+blast tabular format (-outfmt 6 or 7) or FASTA blast tabular format
+(-m 8).  The C<--ann_script/--q_ann_script> script produces domain
+boundary coordinates, which are mapped to the alignment.  For searches
+against SwissProt sequences, C<--ann_script ann_feats_up_www2.pl> will
+acquire features and domains from Uniprot.  C<--ann_script
+ann_pfam_www.pl --neg> will get domain information from Pfam, and
+score non-domain (NODOM) regions.
+
+The tab file is read and parsed, and then the subject/query seqid is used to
+capture domain locations in the subject/query sequence.  If the domains
+overlap the aligned region, the domain names are appended to the
+intput.
+
+If a C<--query_file> is specified and two additional fields, C<score>
+and C<btop> are present, C<annot_blast_btop2.pl> calculates
+sub-alignment scores, including fraction identity, bit score, and
+Q-value (-log10(E-value)), partitioning the alignment score, identity,
+and bit score across the overlapping domains.
+
+The C<--out_fields> specifies the blast tabular fields that can be
+returned.  By default, C<q_seqid s_seqid percid alen mismatch gopen
+q_start q_end s_start s_end evalue bits> (but not C<score> and
+C<BTOP>) are shown.
+
+Currently, this program is fully functional only for blastp (or
+blastn) searches.  For translated searches (blastx) domain content,
+location and identity is provided, but not bit-scores or Q-values.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/blastp_cmd.sh b/scripts/blastp_cmd.sh
new file mode 100755
index 0000000..73f86e3
--- /dev/null
+++ b/scripts/blastp_cmd.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+cmd="";
+for i in "$@"
+do
+    case $i in
+	-o=*|--outname=*)
+	    OUTNAME="${i#*=}"
+	    shift # past argument=value
+	    ;;
+	*)
+	    cmd="$cmd $i"
+	    ;;
+    esac
+done
+
+bl_asn=${OUTNAME}.asn
+bl0_out="$OUTNAME.html"
+blm_out="$OUTNAME.msa"
+blt_out="$OUTNAME.bl_tab"
+
+# echo "OUTFILE = ${OUTNAME}"
+
+#export BLAST_PATH="/ebi/extserv/bin/ncbi-blast+/bin"
+export BLAST_PATH="/seqprg/bin"
+
+$BLAST_PATH/blastp -outfmt 11 $cmd > $bl_asn
+$BLAST_PATH/blast_formatter -archive $bl_asn -outfmt 0 -html > $bl0_out
+$BLAST_PATH/blast_formatter -archive $bl_asn -outfmt '7 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore score btop'  > $blt_out
+$BLAST_PATH/blast_formatter -archive $bl_asn -outfmt 2  > $blm_out
+
diff --git a/scripts/color_defs.pl b/scripts/color_defs.pl
new file mode 100755
index 0000000..0d09ed5
--- /dev/null
+++ b/scripts/color_defs.pl
@@ -0,0 +1,170 @@
+# color_defs.pl used by lav2plt.pl for domain coloring
+
+################################################################
+# copyright (c) 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+%color_names = (
+aliceblue=>[240,248,255],
+antiquewhite=>[250,235,215],
+aqua=>[0,255,255],
+aquamarine=>[127,255,212],
+azure=>[240,255,255],
+beige=>[245,245,220],
+bisque=>[255,228,196],
+black=>[0,0,0],
+blanchedalmond=>[255,235,205],
+blue=>[0,0,255],
+blueviolet=>[138,43,226],
+brown=>[165,42,42],
+burlywood=>[222,184,135],
+cadetblue=>[95,158,160],
+chartreuse=>[127,255,0],
+chocolate=>[210,105,30],
+coral=>[255,127,80],
+cornflowerblue=>[100,149,237],
+cornsilk=>[255,248,220],
+crimson=>[220,20,60],
+cyan=>[0,255,255],
+darkblue=>[0,0,139],
+darkcyan=>[0,139,139],
+darkgoldenrod=>[184,134,11],
+darkgray=>[169,169,169],
+darkgreen=>[0,100,0],
+darkgrey=>[169,169,169],
+darkkhaki=>[189,183,107],
+darkmagenta=>[139,0,139],
+darkolivegreen=>[85,107,47],
+darkorange=>[255,140,0],
+darkorchid=>[153,50,204],
+darkred=>[139,0,0],
+darksalmon=>[233,150,122],
+darkseagreen=>[143,188,143],
+darkslateblue=>[72,61,139],
+darkslategray=>[47,79,79],
+darkslategrey=>[47,79,79],
+darkturquoise=>[0,206,209],
+darkviolet=>[148,0,211],
+deeppink=>[255,20,147],
+deepskyblue=>[0,191,255],
+dimgray=>[105,105,105],
+dimgrey=>[105,105,105],
+dodgerblue=>[30,144,255],
+firebrick=>[178,34,34],
+floralwhite=>[255,250,240],
+forestgreen=>[34,139,34],
+fuchsia=>[255,0,255],
+gainsboro=>[220,220,220],
+ghostwhite=>[248,248,255],
+gold=>[255,215,0],
+goldenrod=>[218,165,32],
+gray=>[128,128,128],
+green=>[0,128,0],
+greenyellow=>[173,255,47],
+grey=>[128,128,128],
+honeydew=>[240,255,240],
+hotpink=>[255,105,180],
+indianred=>[205,92,92],
+indigo=>[75,0,130],
+ivory=>[255,255,240],
+khaki=>[240,230,140],
+lavender=>[230,230,250],
+lavenderblush=>[255,240,245],
+lawngreen=>[124,252,0],
+lemonchiffon=>[255,250,205],
+lightblue=>[173,216,230],
+lightcoral=>[240,128,128],
+lightcyan=>[224,255,255],
+lightgoldenrodyellow=>[250,250,210],
+lightgray=>[211,211,211],
+lightgreen=>[144,238,144],
+lightgrey=>[211,211,211],
+lightpink=>[255,182,193],
+lightsalmon=>[255,160,122],
+lightseagreen=>[32,178,170],
+lightskyblue=>[135,206,250],
+lightslategray=>[119,136,153],
+lightslategrey=>[119,136,153],
+lightsteelblue=>[176,196,222],
+lightyellow=>[255,255,224],
+lime=>[0,255,0],
+limegreen=>[50,205,50],
+linen=>[250,240,230],
+magenta=>[255,0,255],
+maroon=>[128,0,0],
+mediumaquamarine=>[102,205,170],
+mediumblue=>[0,0,205],
+mediumorchid=>[186,85,211],
+mediumpurple=>[147,112,219],
+mediumseagreen=>[60,179,113],
+mediumslateblue=>[123,104,238],
+mediumspringgreen=>[0,250,154],
+mediumturquoise=>[72,209,204],
+mediumvioletred=>[199,21,133],
+midnightblue=>[25,25,112],
+mintcream=>[245,255,250],
+mistyrose=>[255,228,225],
+moccasin=>[255,228,181],
+navajowhite=>[255,222,173],
+navy=>[0,0,128],
+oldlace=>[253,245,230],
+olive=>[128,128,0],
+olivedrab=>[107,142,35],
+orange=>[255,165,0],
+orangered=>[255,69,0],
+orchid=>[218,112,214],
+palegoldenrod=>[238,232,170],
+palegreen=>[152,251,152],
+paleturquoise=>[175,238,238],
+palevioletred=>[219,112,147],
+papayawhip=>[255,239,213],
+peachpuff=>[255,218,185],
+peru=>[205,133,63],
+pink=>[255,192,203],
+plum=>[221,160,221],
+powderblue=>[176,224,230],
+purple=>[128,0,128],
+red=>[255,0,0],
+rosybrown=>[188,143,143],
+royalblue=>[65,105,225],
+saddlebrown=>[139,69,19],
+salmon=>[250,128,114],
+sandybrown=>[244,164,96],
+seagreen=>[46,139,87],
+seashell=>[255,245,238],
+sienna=>[160,82,45],
+silver=>[192,192,192],
+skyblue=>[135,206,235],
+slateblue=>[106,90,205],
+slategray=>[112,128,144],
+slategrey=>[112,128,144],
+snow=>[255,250,250],
+springgreen=>[0,255,127],
+steelblue=>[70,130,180],
+tan=>[210,180,140],
+teal=>[0,128,128],
+thistle=>[216,191,216],
+tomato=>[255,99,71],
+turquoise=>[64,224,208],
+violet=>[238,130,238],
+wheat=>[245,222,179],
+white=>[255,255,255],
+whitesmoke=>[245,245,245],
+yellow=>[255,255,0],
+yellowgreen=>[154,205,50],
+);
+
+1;
diff --git a/scripts/exp_up_ensg.pl b/scripts/exp_up_ensg.pl
new file mode 100755
index 0000000..f7356c5
--- /dev/null
+++ b/scripts/exp_up_ensg.pl
@@ -0,0 +1,145 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2010, 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+## usage - expand_link.pl seed_acc.file > linked_fasta.file
+##
+## take a fasta36 -e expand.sh result file of the form:
+## sp|P09488|<tab>1.1e-50
+##
+## and extract the accession number, looking it up from the an SQL
+## table $table. This script uses the database created by link2sql.pl
+## Code is included for linking to UniRef and as well as NCBI refseq
+## searches.
+
+## Once the linked accession numbers are found, the sequences are
+## extracted from the SQL database uniprot (see Mackey and Pearson
+## (2004) Current Protocols in Bioinformatics (L. Stein, ed) "Using
+## SQL databases for sequence similarity searching and analysis".
+## Alternatively, one could use blastdbcmd or fastacmd to extract the
+## sequences from an NCBI blast-formatted database.
+##
+
+use strict;
+use DBI;
+
+my ($host, $port, $db, $table, $user, $pass);
+
+my $hostname = `/bin/hostname`;
+
+unless ($hostname =~ m/ebi/) {
+  ($host, $db, $port, $user, $pass)  = ("xdb", "uniprot", 0, "web_user", "fasta_www");
+}
+else {
+  ($host, $db, $port, $user, $pass)  = ("mysql-pearson", "up_db", 4124, "web_user", "fasta_www");
+}
+
+my $connect = "dbi:mysql(AutoCommit=>1,RaiseError=>1):database=$db";
+$connect .= ";host=$host" if $host;
+$connect .= ";port=$port" if $port;
+
+my $dbh = DBI->connect($connect,
+		       $user,
+		       $pass
+		      ) or die $DBI::errstr;
+
+my %sth = ( 
+    up2ensg_id => "SELECT acc, ensg FROM ensg JOIN annot2 USING(acc) WHERE id=?",
+    up2ensg_acc => "SELECT * FROM ensg WHERE acc=?",
+    ensg2seq => "SELECT * FROM ensg JOIN annot2 USING(acc) JOIN protein USING(acc) WHERE ensg=?",
+    );
+
+for my $sth (keys(%sth)) {
+  $sth{$sth} = $dbh->prepare($sth{$sth});
+}
+
+my %acc_uniq = ();
+my %ensg_uniq = ();
+
+while (my $line = <>) {
+  chomp($line);
+  my ($hit, $e_val) = split(/\t/,$line);
+  processLine($hit,$sth{up2ensg});
+}
+
+for my $ensg_acc ( keys %ensg_uniq ) {
+
+  $sth{ensg2seq}->execute($ensg_acc);
+  while (my $row_href = $sth{ensg2seq}->fetchrow_hashref ) {
+    next if ($acc_uniq{$row_href->{acc}});
+#    print ">". $row_href->{db} . "|". $row_href->{acc} . " (".
+#	$ensg_uniq{$acc}->{acc}."|".$ensg_uniq{$acc}->{id}.":$acc) " .
+#      $row_href->{descr}. "\n";
+#    print ">" . uc($row_href->{db}) . ":$ensg_uniq{$acc}->{acc} $row_href->{acc} $acc $row_href->{descr}\n";
+    print ">",join('|', (lc($row_href->{db}),$row_href->{acc},$row_href->{id}))," ($ensg_uniq{$ensg_acc}->{id}|$ensg_acc) $row_href->{descr}\n";
+    print $row_href->{seq} . "\n";
+  }
+  $sth{ensg2seq}->finish();
+}
+
+$dbh->disconnect();
+
+sub processLine{
+  my ($id)=@_;
+  my ($dummy, $link_acc, $link_id);
+
+  my $use_acc = 1;
+  my $get_sth = $sth{up2ensg_acc};
+
+  if ($id =~ m/^gi\|/) {
+      # $id of the form: gi|12346|ref|NP_98765.1|<tab>1.1e-50
+      ($link_acc, $link_id) = (split(/\|/,$id))[3,4];
+      $link_acc =~ s/\.\d+$//;
+  }
+  elsif ($id =~ m/(\w+):(\w+)/) {
+    $link_id = $2;
+    $link_acc = '';
+    $use_acc = 0;
+  }
+  elsif ($id =~ m/sp\|([\w\-\.]+)/) {
+      ($dummy, $link_acc, $link_id) = split(/\|/,$id);
+  }
+  elsif ($id =~ m/tr\|([\w\-\.]+)/) {
+      ($dummy, $link_acc, $link_id) = split(/\|/,$id);
+  }
+# form: SP:GSTM1_MOUSE P10649
+  elsif ($id =~ m/SP\:(\w+)/) {
+      ($link_id) = ($1);
+      $use_acc = 0;
+  } 
+  elsif ($id =~ m/TR\:(\w+)/) {
+      ($link_id) = ($1);
+      $use_acc = 0;
+  }
+  else {$link_acc = $id;}
+
+  if ($use_acc) {
+    return if ($acc_uniq{$link_acc});
+    $get_sth->execute($link_acc);
+  }
+  else {
+    $get_sth = $sth{up2ensg_id};
+    $get_sth->execute($link_id);
+  }
+
+  while (my ($acc, $ensg) = $get_sth->fetchrow_array()) {
+    $acc_uniq{$acc} = $acc unless $acc_uniq{$acc};
+    $ensg_uniq{$ensg} = {id=>$acc} unless $ensg_uniq{$ensg};
+  }
+  $get_sth->finish();
+}
diff --git a/scripts/expand_links.pl b/scripts/expand_links.pl
new file mode 100755
index 0000000..9be7c0c
--- /dev/null
+++ b/scripts/expand_links.pl
@@ -0,0 +1,100 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2010, 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+## usage - expand_link.pl seed_acc.file > linked_fasta.file
+##
+## take a fasta36 -e expand.sh result file of the form:
+## gi|12346|ref|NP_98765.1|<tab>1.1e-50
+##
+## and extract the accession number, looking it up from the an SQL
+## table $table. This script uses the database created by link2sql.pl
+## Code is included for linking to UniRef and as well as NCBI refseq
+## searches.
+
+## Once the linked accession numbers are found, the sequences are
+## extracted from the SQL database seqdb_demo2 (see Mackey and Pearson
+## (2004) Current Protocols in Bioinformatics (L. Stein, ed) "Using
+## SQL databases for sequence similarity searching and analysis".
+## Alternatively, one could use blastdbcmd or fastacmd to extract the
+## sequences from an NCBI blast-formatted database.
+##
+
+use strict;
+use DBI;
+
+my ($host, $db, $port, $user, $pass, $table)  = ("xdb", "wrp_link", 0, "web_user", "fasta_www", "micr_samp_link50");
+
+my $dbh = DBI->connect("dbi:mysql:host=$host:$db",
+		       $user, $password,
+                       { RaiseError => 1, AutoCommit => 1}
+                      ) or die $DBI::errstr;
+
+my %sth = ( 
+    seed2link => "SELECT link_acc FROM $table WHERE seed_acc=?",
+    link2seq => "SELECT * FROM seqdb_demo2.annot JOIN seqdb_demo2.protein USING(prot_id) WHERE acc=? AND pref=1"
+    );
+
+for my $sth (keys(%sth)) {
+  $sth{$sth} = $dbh->prepare($sth{$sth});
+}
+
+my %acc_uniq = ();
+
+while (my $line = <>) {
+  chomp($line);
+  my ($hit, $e_val) = split(/\t/,$line);
+  processLine($hit,$sth{seed2link});
+}
+
+for my $acc ( keys %acc_uniq ) {
+
+  $sth{link2seq}->execute($acc);
+  while (my $row_href = $sth{link2seq}->fetchrow_hashref ) {
+    print ">". $row_href->{db} . "|". $row_href->{acc} . " (micr_samp|$acc_uniq{$acc}) " .
+      $row_href->{descr}. "\n";
+    print $row_href->{seq} . "\n";
+  }
+  $sth{link2seq}->finish();
+}
+
+$dbh->disconnect();
+
+sub processLine{
+  my ($id,$sth)=@_;
+  my ($link_acc);
+
+  if ($id =~ m/^gi\|/) {
+      # $id of the form: gi|12346|ref|NP_98765.1|<tab>1.1e-50
+      $link_acc = (split(/\|/,$id))[3];
+      $link_acc =~ s/\.\d+$//;
+  }
+  elsif ($id =~ m/^UniRef/) {
+      $link_acc = $id;
+      $link_acc =~ s/^UniRef\d+_//;
+  }
+  else {$link_acc = $id;}
+
+  my $result = $sth->execute($link_acc);
+
+  while (my ($acc) = $sth->fetchrow_array()) {
+    next if ($acc eq $link_acc);
+    $acc_uniq{$acc} = $link_acc unless $acc_uniq{$acc};
+  }
+  $sth->finish();
+}
diff --git a/scripts/expand_uniref50.pl b/scripts/expand_uniref50.pl
new file mode 100755
index 0000000..43d6900
--- /dev/null
+++ b/scripts/expand_uniref50.pl
@@ -0,0 +1,83 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2010, 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+## usage - expand_uniref50.pl uref50acc.file > up_fasta.file
+#
+
+# (1) take a list of uniref50 accessions and uses uniref50link to get
+#     the associated uniprot accessions
+# (2) take the uniprot accessions and produce a fasta library file
+#     from them
+
+use strict;
+use DBI;
+
+my ($host, $db, $port, $user, $pass)  = ("xdb", "uniprot", 0, "web_user", "fasta_www");
+
+my $connect = "dbi:mysql(AutoCommit=>1,RaiseError=>1):database=$db";
+$connect .= ";host=$host" if $host;
+$connect .= ";port=$port" if $port;
+
+my $dbh = DBI->connect($connect,
+		       $user,
+		       $pass
+		      ) or die $DBI::errstr;
+
+my %up_sth = ( 
+    ur50_to_upacc => "SELECT uniprot_acc FROM  uniref50link WHERE uniref50_acc=?",
+    upacc_to_seq => "SELECT * FROM annot2 join protein USING(acc) WHERE acc=?",
+    );
+
+for my $sth (keys(%up_sth)) {
+  $up_sth{$sth} = $dbh->prepare($up_sth{$sth});
+}
+
+my %acc_uniq = ();
+
+while (my $line = <>) {
+  next if ($line =~ m/^UniRef50_UPI/);	# _UPI accessions are not in sp-trembl
+  chomp($line);
+  my ($up_acc, $e_val) = split(/\t/,$line);
+  processLine($up_acc,$up_sth{ur50_to_upacc});
+}
+
+for my $up_acc ( keys %acc_uniq ) {
+
+  $up_sth{upacc_to_seq}->execute($up_acc);
+  while (my $row_href = $up_sth{upacc_to_seq}->fetchrow_hashref ) {
+    print ">sp|". $row_href->{acc} . "|". $row_href->{id} . " (uref50|$acc_uniq{$up_acc}) " .
+      $row_href->{descr}. "\n";
+    print $row_href->{seq} . "\n";
+  }
+  $up_sth{upacc_to_seq}->finish();
+}
+
+$dbh->disconnect();
+
+sub processLine{
+  my ($id,$sth)=@_;
+
+  $id=~ s/UniRef50_//;
+  my $result = $sth->execute($id);
+
+  while (my ($acc) = $sth->fetchrow_array()) {
+    $acc_uniq{$acc} = $id unless $acc_uniq{$acc};
+  }
+  $sth->finish();
+}
diff --git a/scripts/lav2plt.pl b/scripts/lav2plt.pl
new file mode 100755
index 0000000..2f1b593
--- /dev/null
+++ b/scripts/lav2plt.pl
@@ -0,0 +1,349 @@
+#!/usr/bin/perl -w
+
+# lav2plt.pl - produce plotfrom lav output */
+
+# $Id: lav2plt.c 625 2011-03-23 17:21:38Z wrp $ */
+# $Revision: 625 $  */
+
+################################################################
+# copyright (c) 2012, 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+use strict;
+use Getopt::Long;
+use Pod::Usage;
+
+use vars qw($pminx $pmaxx $pminy $pmaxy $lvstr $max_x $max_y
+	    $fxscal $fyscal $fxoff $fyoff
+	    @linarr @elinval @blinval @ilinval
+	    @line_colors @block_colors
+	    $annot_color %annot_names %color_names);
+
+ at line_colors=qw(black blue brown green lightgreen);
+ at block_colors = qw( slategrey lightgreen lightblue pink cyan tan gold plum mediumplum );
+
+my ($have_bits, $have_zdb, $zdb_size,$lav_dev, $shelp, $help) = (0,0,0,'svg',0,0);
+my ($x_upd_script, $y_upd_script) = ("","");
+my ($x_annot_arr_r, $y_annot_arr_r) = (0,0);
+
+$annot_color = 1;
+%annot_names = ();
+
+GetOptions("B"=>\$have_bits,
+	   "Z=i"=>\$zdb_size,
+	   "xA=s"=>\$x_upd_script,
+	   "yA=s"=> \$y_upd_script,
+	   "dev=s" => \$lav_dev,
+	   "h|?" => \$shelp,
+	   "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+
+#require "./lav_defs.pl";
+# $max_x, $max_y define the maximum plotting area
+# the actual bounding box/view area will be larger if annotation comments are available
+($max_x,$max_y)=(540,540);
+ at elinval=(1e-4,1e-2,1.0,100.0);
+ at blinval=(40.0,30.0,20.0,10.0);
+ at ilinval=(200,100,50,25);
+
+require "./color_defs.pl";
+
+if ($lav_dev =~ m/ps/) {require "./lavplt_ps.pl";}
+else {require "./lavplt_svg.pl";}
+
+my ($g_n0, $g_n1);
+
+my $pgm_desc;
+my ($s_name0, $s_name1);
+my ($s_desc0, $s_desc1, $ss_desc0, $ss_desc1);
+my ($p0_beg, $p1_beg, $p0_end, $p1_end);
+my $open_plt = 0;
+
+if ($zdb_size) {
+  $have_zdb = 1;
+}
+else {
+  $zdb_size = 1;
+}
+
+while (my $line = <>) {
+  chomp $line;
+  next unless ($line);
+  next if ($line =~ m/^#/);
+
+  if ($line =~ m/^d/) {$pgm_desc = get_str();}
+  elsif ($line =~ m/^h/) {
+      ($s_desc0, $s_desc1) = get_str2();
+      $s_desc0 =~ s/^gi\|\d+\|//;
+      $s_desc1 =~ s/^gi\|\d+\|//;
+      $s_desc0 = substr($s_desc0,0,50);
+      $s_desc1 = substr($s_desc1,0,50);
+      $ss_desc0 = ($s_desc0 =~ m/^(\S+)\s*/);
+      $ss_desc1 = ($s_desc1 =~ m/^(\S+)\s*/);
+  }
+  elsif ($line =~ m/^s/) {
+    ($s_name0, $p0_beg, $p0_end,$s_name1, $p1_beg, $p1_end) = get_seq_info();
+    $g_n0 = $p0_end - $p0_beg + 1;
+    $g_n1 = $p1_end - $p1_beg + 1;
+  }
+  elsif ($line =~ m/^a/) {
+    unless ($open_plt) {
+      if ($y_upd_script) {$y_annot_arr_r = get_annot($s_desc1, $y_upd_script);}
+      if ($x_upd_script) {$x_annot_arr_r = get_annot($s_desc0, $x_upd_script);}
+      openplt($g_n0, $g_n1, $p0_beg, $p1_beg,  $s_desc0, $s_desc1, $x_annot_arr_r, $y_annot_arr_r,$have_zdb, $have_bits);
+      if (($g_n0 == $g_n1) && ($p0_beg == $p1_beg) && ($p0_end == $p1_end) && $ss_desc0 eq $ss_desc1) {
+	drawdiag($g_n0, $g_n1);
+      }
+      $open_plt = 1;
+    }
+    do_alignment($p0_beg, $p1_beg);
+  }
+}
+
+unless ($open_plt) {
+  if ($y_upd_script) {$y_annot_arr_r = get_annot($y_upd_script);}
+  if ($x_upd_script) {$x_annot_arr_r = get_annot($x_upd_script);}
+  openplt($g_n0, $g_n1, $p0_beg, $p1_beg,  $s_desc0, $s_desc1, $x_annot_arr_r, $y_annot_arr_r,$have_zdb, $have_bits);
+  if (($g_n0 == $g_n1) && ($p0_beg == $p1_beg) && ($p0_end == $p1_end) &&
+      $ss_desc0 eq $ss_desc1) {
+    drawdiag($g_n0, $g_n1);
+  }
+  $open_plt = 1;
+}
+closeplt();
+exit(0);
+
+# get a quote enclosed string
+# d {
+#   "../bin/lalign36 -m "F11 mchu.lav" ../seq/mchu.aa ../seq/mchu.aa"
+# }
+
+# void get_str(FILE *file, char *str, size_t len) {
+sub get_str {
+
+  my $str = "";
+  while (my $line = <>) {
+    chomp $line;
+    next unless $line;
+    next if ($line =~ m/^#/);
+    last if ($line =~ m/}/);
+    $str .= $line
+  }
+
+  $str =~ s/^\s+"//;
+  $str =~ s/"\s*$//;
+
+  return $str;
+}
+
+# get two quote enclosed strings
+# h {
+#    "MCHU - Calmodulin - Human, rabbit, bovine, rat, a - 148 aa"
+#    "MCHU - Calmodulin - Human, rabbit, bovine, rat, and ch"
+# }
+#
+#void get_str2(FILE *file, char *str0, size_t len0,  char *str1, size_t len1)
+
+sub get_str2 {
+
+  my @str = ();
+  my ($str0,$str1) = ("","");
+
+  while (my $line = <>) {
+    chomp $line;
+    next unless $line;
+    next if ($line =~ m/^#/);
+    last if ($line =~ m/}/);
+    push @str, $line;
+  }
+
+  do {
+    $str0 .= shift @str;
+  } while (@str && $str0 !~ m/"\s*$/);
+
+  do {
+    $str1 .= shift @str;
+  } while (@str && $str1 !~ m/"\s*$/);
+
+  $str0 =~ s/^\s+"//;
+  $str0 =~ s/"\s*$//;
+
+  $str1 =~ s/^\s+"//;
+  $str1 =~ s/"\s*$//;
+
+  return ($str0, $str1);
+}
+
+#void get_seq_info(FILE *file,
+#	     char *str0, size_t len0, int *n0_begin, int *n0_end,
+#	     char *str1, size_t len1, int *n1_begin, int *n1_end)
+
+sub get_seq_info {
+
+  my @lines = ();
+  my ($str0, $str1) = ("","");
+  my ($n0_beg, $n0_end, $n1_beg, $n1_end, $blank);
+
+  while (my $line = <>) {
+    chomp($line);
+    next if ($line =~ m/^#/);
+    last if ($line =~ m/}/);
+    push @lines, $line;
+  }
+
+  ($blank, $str0, $n0_beg, $n0_end) = split(/\s+/,$lines[0]);
+  ($blank, $str1, $n1_beg, $n1_end) = split(/\s+/,$lines[1]);
+
+  $str0 =~ s/^\s+"//;
+  $str0 =~ s/"\s*$//;
+
+  $str1 =~ s/^\s+"//;
+  $str1 =~ s/"\s*$//;
+
+  return ($str0, $n0_beg, $n0_end, $str1, $n1_beg, $n1_end);
+}
+
+# void do_alignment(FILE *file, int p0_beg, int p1_beg)
+sub do_alignment {
+
+  my ($score, $s0_beg, $s0_end, $s1_beg, $s1_end, $percent, $bits);
+  my $have_line = 0;
+
+  while (my $line = <>) {
+    chomp $line;
+    next unless $line;
+    next if ($line =~ m/^#/);
+    last if ($line =~ m/}/);
+
+    my @fields = split(/\s+/,$line);
+    # loose first field if blank
+    unless ($fields[0]) {shift @fields;}
+
+    if ($fields[0] eq 's') {($score, $bits) = @fields[1,2];}
+    elsif ($fields[0] eq 'b') {($s0_beg, $s1_beg) = @fields[1,2];}
+    elsif ($fields[0] eq 'e') {($s0_end, $s1_end) = @fields[1,2];}
+    elsif ($fields[0] eq 'l') {
+      ($s0_beg, $s1_beg, $s0_end, $s1_end,$percent) = @fields[1..5];
+      if ($have_line) {
+	sxy_draw($s0_beg-$p0_beg+1, $s1_beg-$p1_beg+1);
+	sxy_draw($s0_end-$p0_beg+1, $s1_end-$p1_beg+1);
+      }
+      else {
+	opnline($score, $bits);
+	sxy_move($s0_beg - $p0_beg + 1, $s1_beg - $p1_beg + 1);
+	sxy_draw($s0_end - $p0_beg + 1, $s1_end - $p1_beg + 1);
+	$have_line = 1;
+      }
+    }
+  }
+  clsline();
+}
+
+# get annot does 2 things:
+# (1) read in the annotations
+# (2) make a hash of annotation colors, changing the color with each addition
+#
+
+sub get_annot {
+  my ($acc, $script) = @_;
+
+  my $FIN;
+
+  if ($script !~ /^!/) {
+    if (!open($FIN,$script)) {
+      warn "cannot open annotation file: $script\n";
+      return 0;
+    }
+  }
+  else { # run the script on the accession
+    $script =~ s/!//;
+    $acc =~ m/^(\S+)/;
+    $acc = $1;
+    if (!open($FIN, "$script \'$acc\' |")) {
+      warn "cannot run annotation script:  $script $acc\n";
+      return 0;
+    }
+  }
+
+  my @annots = ();
+
+  my $header = <$FIN>;
+  while (my $line = <$FIN>) {
+    last if ($line =~ m/^>/);
+    chomp $line;
+    my %fields = ();
+#    @fields{qw(beg type end descr)} = split(/\t/,$line);
+    @fields{qw(beg end descr)} = ($line =~ m/^\s*(\d+)\s+(\d+)\s+(\S.*)$/);
+    $fields{sdescr} = substr($fields{descr},0,12);
+    ($fields{sname}) = ($fields{sdescr} =~ m/^(\S+)/);
+    push @annots, \%fields;
+    unless ($annot_names{$fields{sname}}) {
+      $annot_names{$fields{sname}} = $annot_color++;
+    }
+  }
+  close($FIN);
+  return \@annots;
+}
+
+my $M_LN2=0.69314718055994530942;
+
+# produce e_val from bit score */
+
+#double bit_to_E (double bit)
+sub bit_to_E {
+  my $bit = shift @_;
+
+  my ($p_val);
+
+  $p_val = $g_n0 * $g_n1 / exp($bit * $M_LN2);
+  if ($p_val > 0.01) {$p_val = 1.0 - exp(-$p_val);}
+
+  return $zdb_size * $p_val;
+}
+
+=pod
+
+=head1 NAME
+
+lav2plt.pl
+
+=head1 SYNOPSIS
+
+ lav2plt.pl -h -help -B -Z=10000 --dev svg|ps --xA x_annot_script.pl --yA y_annot_script.pl  output.lav
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+ -B	have bit scores
+ -Z=#   simulated database size
+ --dev svg|ps graphical output format
+ --xA/--yA domain annotation script
+
+=head1 DESCRIPTION
+
+C<lav2plt.pl> reads a local alignment lav file, produced by 'lalign36
+-m 11' and produces an alignment plot (on stdout) in svg (--dev svg, default)
+or postscript (--dev ps) format.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/lavplt_ps.pl b/scripts/lavplt_ps.pl
new file mode 100755
index 0000000..f8ed5d4
--- /dev/null
+++ b/scripts/lavplt_ps.pl
@@ -0,0 +1,540 @@
+################################################################
+# copyright (c) 2012, 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+#define SX(x) (int)((double)(x)*fxscal+fxoff+24)
+sub SX {
+  my $xx = shift;
+  return int($xx*$fxscal+$fxoff+24);
+}
+
+#define SY(y) (int)((double)(y)*fyscal+fyoff+48)
+sub SY {
+  my $yy = shift;
+  return int($yy*$fyscal+$fyoff+84);
+}
+
+# alignment lines: black blue cyan green lt_green */
+my @rlincol=(0.0,0.0,0.0,0.45,0.0);
+my @glincol=(0.0,0.0,0.5,0.30,1.0);
+my @blincol=(0.0,0.8,0.5,0.15,0.0);
+
+my @line_colors=qw(black blue cyan green lightgreen);
+my @block_colors = qw( slategrey lightgreen lightblue pink cyan tan gold plum mediumplum );
+
+# domain blocks: grey blue cyan green lt_green
+my @rblk_col=(0.33, 0.0, 0.0, 0.45, 0.0);
+my @gblk_col=(0.33, 0.0, 0.5, 0.30, 1.0);
+my @bblk_col=(0.33, 0.8, 0.5, 0.15, 0.0);
+
+# void openplt(long n0, long n1, int sq0off, int sq1off, char *xtitle, char *ytitle)
+sub openplt {
+  my ($n0, $n1, $sq0off, $sq1off, $xtitle, $ytitle, $x_annot_r, $y_annot_r, $have_zdb, $have_bits) = @_;
+
+  if ($lvstr) {
+    @elinval = split(/\s+/,$lvstr);
+  }
+  elsif ($ENV{LINEVAL}) {
+    @elinval = split(/\s+/,$ENV{LINEVAL});
+  }
+	
+## 8.5 x 11 paper is 612 pt x 792 pt -- important to stay on one page, with comments
+## max_x, max_y are set to 540 pt -- 7.5 in, 9pt (0.75 in) margins, leaving little extra space
+## if comments are provided, max_x, max_y must be reduced (max_x for space, max_y to keep things square)
+##
+
+  my ($xbound, $ybound) = ($max_x + 24, $max_y + 72);
+  if ($x_annot_r) {$ybound += 64;}
+  if ($y_annot_r) {
+    $xbound += 100;
+    $max_x -= $max_x/10;
+    $max_y -= $max_x/10;
+  }
+
+  print("%!PS-Adobe-2.0\n");
+  print("%%Creator: plalign\n");
+  print("%%CreationDate: %s","2012-01-01");
+  print("%%DocumentFonts: Courier\n");
+  print("%%Pages: 1\n");
+  print("%%BoundingBox: 18 18 $xbound $ybound\n");
+  print("%%EndComments\n");
+  print("%%EndProlog\n");
+  print("%%Page: 1 1\n");
+  print("/Courier findfont 14 scalefont setfont\n");
+  print("/vcprint { gsave 90 rotate dup stringwidth pop 2 div neg 0 rmoveto\n");
+  print("show newpath stroke grestore } def\n");
+  print("/vprint { gsave 90 rotate\n");
+  print("show newpath stroke grestore } def\n");
+  print("/hcprint { gsave dup stringwidth pop 2 div neg 0 rmoveto\n");
+  print("show newpath stroke grestore } def\n");
+  print("/hrprint { gsave dup stringwidth pop neg 0 rmoveto\n");
+  print("show newpath stroke grestore } def\n");
+  print("/hprint { gsave show newpath stroke grestore } def\n");
+  # % x y w h RT - % draw a rectangle size w h at x y
+  print("/RT { [ ] 0 setdash newpath 4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto neg 0 exch rlineto closepath }  def \n");
+
+  ($pmaxx, $pmaxy) = ($n0, $n1);
+
+# $max_x, $max_y define the maximum plotting area
+# the actual bounding box/view area will be larger if annotation comments are available
+
+  $fxscal = ($max_x-1)/$n1;
+  $fyscal = ($max_y-1)/$n0;
+
+  if ($fxscal > $fyscal) {$fxscal = $fyscal;}
+  else {$fyscal = $fxscal;}
+
+  if ($fyscal * $n0 < $max_y/5.0) {
+    $fyscal = ($max_y-1)/($n0*5.0);
+  }
+
+  $fxscal *= 0.9; $fxoff = ($max_x-1)/11.0;
+  $fyscal *= 0.9; $fyoff = ($max_y-1)/11.0;
+
+  printf("%% openplt - frame - %ld %ld\n", $n0, $n1);
+
+  # draw the plot frame
+  linetype(0);
+  print("gsave\n");
+  print("currentlinewidth 1.5 mul setlinewidth\n");
+  newline();
+  move(SX(0),SY(0));
+  draw(SX(0),SY($n1+1));
+  draw(SX($n0+1),SY($n1+1));
+  draw(SX($n0+1),SY(0));
+  draw(SX(0),SY(0));
+  clsline($n0,$n1,100000);
+  print("grestore\n");
+
+  my $n_div = 11;
+  xaxis($n0,$sq1off, $xtitle, $n_div);
+
+  $n_div = 21 unless $n0 == $n1;
+  yaxis($n1,$sq0off, $ytitle, $n_div);
+  legend($have_zdb, $have_bits);
+
+  print("%% openplt done\n");
+
+  if ($x_annot_r) {xgrid($x_annot_r, $n0, $sq0off, $n1, $sq1off);}
+  if ($y_annot_r) {ygrid($y_annot_r, $n0, $sq0off, $n1, $sq1off);}
+}
+
+#void drawdiag(long n0, long n1)
+sub drawdiag  {
+  my ($n0, $n1) = @_;
+  linetype(0);
+  printf("%% drawdiag %ld %ld\n",$n0, $n1);
+  print("gsave\n");
+  print("currentlinewidth 1.5 mul setlinewidth\n");
+  newline();
+  move(SX(0),SY(0));
+  draw(SX($n0+1),SY($n1+1));
+  clsline($n0,$n1,10000);
+  print("grestore\n");
+  print("%% drawdiag done\n");
+}
+
+# tick array - values */
+my @tarr = (10,20,50,100,200,500,1000,2000,5000,10000,20000,50000,100000,200000,500000,1000000);
+
+# void xaxis(long n, int offset, char *title)
+sub xaxis {
+  my ($n, $offset, $title, $n_div) = @_;
+
+  my ($i, $jm, $tick);
+  my ($js, $jo, $jl);
+  my $num_len;
+  my $numstr;
+
+  $tick = 6;
+
+  # search for the correct increment for the tick array */
+  for ($i=0; $i< @tarr; $i++) {
+    # seek to divide into 20 or fewer divisions */
+    if (($jm = $n/$tarr[$i])<$n_div) {goto found;}
+  }
+  $i=scalar(@tarr)-1;
+  $jm = $n/$tarr[$i];
+ found:
+  # js is the start of the value - modify to accomodate offset */
+  $js = $tarr[$i];
+
+  # jo is the offset */
+  $jo = ($offset-1) % $tarr[$i];	# figure out offset in tarr[i] increments */
+
+  # jl is the label */
+  $jl = ($offset-1)/$tarr[$i];	# figure out offset in tarr[i] increments */
+  $jl *= $tarr[$i];
+
+  newline();
+  for ($i=1; $i<=$jm; $i++) {
+    move(SX($i*$js - $jo),SY(0));
+    draw(SX($i*$js - $jo),SY(0)-$tick);
+  }
+  clsline($n,$n,10000);
+
+  $numstr = sprintf("%ld",$js + $jl );
+  $num_len = length($numstr);
+
+  if ($num_len > 4) {
+    move(SX($js-$jo),SY(0)-$tick-16);
+    printf("(%s) hcprint\n",$numstr);
+
+    $numstr=sprintf("%ld",$jm*$js+$jl);
+    move(SX($jm*$js-$jo),SY(0)-$tick-16);
+    printf("(%s) hcprint\n",$numstr);
+  }
+  else {	# put in all the axis values */
+    for ($i=1; $i<=$jm; $i++) {
+      $numstr=sprintf("%ld",$i*$js+$jl);
+      move(SX($i*$js-$jo),SY(0)-$tick-16);
+      printf("(%s) hcprint\n",$numstr);
+    }
+  }
+
+  print("newpath\n");
+  move(SX($n/2),SY(0)-$tick-30);
+
+#  for (bp = strchr(title,'('); (bp!=NULL); bp = strchr(bp+1,'(')) *bp=' ';
+#  for (bp = strchr(title,')'); (bp!=NULL); bp = strchr(bp+1,')')) *bp=' ';
+  $title =~ s/\(/ /g;
+  $title =~ s/\)/ /g;
+  printf("(%s) hcprint\n",$title);
+}
+		
+# void yaxis(long n, int offset, char *title)
+sub yaxis {
+  my ($n, $offset, $title, $n_div) = @_;
+
+  my  ($i, $jm, $tick);
+  my ($js, $jo, $jl);
+  my ($num_len, $numstr);
+	
+  $tick = 6;
+
+  for ($i=0; $i<@tarr; $i++) {
+    if (($jm = $n/$tarr[$i])<$n_div) {goto found;}
+  }
+  $jm = $n/5000;
+  $i= scalear(@tarr)-1;
+
+ found:
+  $js = $tarr[$i];
+
+  # $jo is the offset */
+  $jo = ($offset-1) % $tarr[$i];	# figure out offset in tarr[i] increments */
+  # jl is the label */
+  $jl = ($offset-1)/$tarr[$i];	# figure out offset in tarr[i] increments */
+  $jl *= $tarr[$i];
+
+  newline();
+  for ($i=1; $i<=$jm; $i++) {
+    move(SX(0),SY($i*$js-$jo));
+    draw(SX(0)-$tick,SY($i*$js-$jo));
+  }
+  clsline($n,$n,10000);
+
+  $numstr = sprintf("%ld",$js+$jl);
+
+  $num_len = length($numstr);
+
+  if ($num_len > 4) {
+    move(SX(0)-$tick-4,SY($js-$jo)-4);
+    printf("(%s) hrprint\n",$numstr);
+
+    $numstr = sprintf("%ld",$jm*$js+$jl);
+    move(SX(0)-$tick-4,SY($jm*$js-$jo)-4);
+    printf("(%s) hrprint\n",$numstr);
+  }
+  else {
+    for ($i=1; $i<=$jm; $i++) {
+      $numstr = sprintf("%ld",$i*$js+$jl);
+      move(SX(0)-$tick-4,SY($i*$js-$jo)-4);
+      printf("(%s) hrprint\n",$numstr);
+    }
+  }
+
+  move(SX(0)-$tick-42,SY($n/2));
+#  for (bp = strchr(title,'('); (bp!=NULL); bp = strchr(bp+1,'(')) *bp=' ';
+#  for (bp = strchr(title,')'); (bp!=NULL); bp = strchr(bp+1,')')) *bp=' ';
+  $title =~ s/\(/\\(/g;
+  $title =~ s/\)/\\)/g;
+  printf("(%s) vcprint\n",$title);
+
+}
+
+sub xgrid {
+  my ($annot_arr_r, $n0, $sq0_off, $n1, $sq1_off) = @_;
+
+  my $sq_off = $sq0_off;
+
+  my $show_block = 1;
+  my $text_offset = 8;
+  if ($show_block) {$text_offset = 24;}
+  my $color = 1;
+
+  print("%% xgrid: $n0 $n1\n");
+  print("gsave\n");
+  print("/Courier findfont 11 scalefont setfont\n");
+  print("currentlinewidth 0.5 mul setlinewidth\n");
+  for my $annot ( @$annot_arr_r) {
+    next unless $annot->{beg} >= $sq_off;
+    next if ($annot->{end} > $sq_off + $n0 - 1);
+    last if ($annot->{beg} > $sq_off + $n0 - 1);
+    newline();
+    print("0.33 0.33 0.33 setrgbcolor\n");
+    move(SX($annot->{beg}-$sq_off),SY(0));
+    print("[3 6] 0 setdash\n");
+    draw(SX($annot->{beg}-$sq_off),SY($n1));
+    clsline();
+    newline();
+    print("0.33 0.33 0.33 setrgbcolor\n");
+    move(SX($annot->{end}-$sq_off),SY(0));
+    print("[6 3] 0 setdash\n");
+    draw(SX($annot->{end}-$sq_off),SY($n1));
+    clsline();
+
+    # show rotated label
+    my $xpos = SX(($annot->{end} - $annot->{beg})/2 + $annot->{beg} - $sq_off) + 4;
+    my $ypos = SY($n1) + $text_offset;
+    # printf("<text x=\"0\" y=\"0\" text-anchor=\"left\" transform=\"translate($xpos, $ypos) rotate(-90,0,0)\">%s</text>\n",$annot->{sdescr});
+    if ($show_block) {
+      draw_block(SX($annot->{beg} - $sq_off), SY($n1) + 6, 
+		 SX($annot->{end} - $sq_off)-SX($annot->{beg} - $sq_off),12,
+		 $annot_names{$annot->{sname}});
+    }
+    move($xpos, $ypos);
+    my $str = $annot->{sdescr};
+    $str =~ s/\(/\\(/g;
+    $str =~ s/\)/\\)/g;
+    print "($str) vprint\n";
+  }
+  print("grestore\n");
+}
+
+sub ygrid {
+  my ($annot_arr_r, $n0, $sq0_off, $n1, $sq1_off) = @_;
+
+  my $sq_off = $sq1_off;
+
+  my $show_block = 1;
+
+  my $text_offset = 8;
+  if ($show_block) {$text_offset = 24;}
+  my $color=4;
+
+  print("gsave\n");
+  print("/Courier findfont 11 scalefont setfont\n");
+  print("currentlinewidth 0.5 mul setlinewidth\n");
+  for my $annot ( @$annot_arr_r) {
+    next unless $annot->{beg} >= $sq_off;
+    next if ($annot->{end} > $sq_off + $n1 - 1);
+    last if ($annot->{beg} > $sq_off + $n1 - 1);
+    newline();
+    print("0.33 0.33 0.33 setrgbcolor\n");
+    move(SX(0), SY($annot->{beg}-$sq_off));
+    print("[3 6] 0 setdash\n");
+    draw(SX($n0), SY($annot->{beg}-$sq_off));
+    clsline();
+    newline();
+    move(SX(0), SY($annot->{end}-$sq_off));
+    print("[6 3] 0 setdash\n");
+    draw(SX($n0), SY($annot->{end}-$sq_off));
+    clsline();
+
+    my $xpos = SX($n0) + $text_offset;
+    my $ypos = SY(($annot->{end} - $annot->{beg})/2 + $annot->{beg} - $sq_off) - 3;
+
+    if ($show_block) {
+      draw_block(SX($n0)+6, SY($annot->{beg} - $sq_off), 12,
+		 SY($annot->{end} - $sq_off)-SY($annot->{beg} - $sq_off),
+		 $annot_names{$annot->{sname}});
+    }
+
+#    printf("<text x=\"$xpos\" y=\"$ypos\" text-anchor=\"left\">%s</text>\n",$annot->{sdescr});
+    move($xpos, $ypos);
+    my $str = $annot->{sdescr};
+    $str =~ s/\(/\\(/g;
+    $str =~ s/\)/\\)/g;
+    print "($str) hprint\n";
+  }
+  print("grestore\n");
+}
+
+sub draw_block {
+  my ($x, $y, $w, $h, $color) = @_;
+
+  $color = ($color % scalar(@block_colors));
+  my $rgb = $color_names{$block_colors[$color]};
+
+# color is index into @[rgb]blk_color 
+  print "gsave\n";
+
+  printf("%.3f %.3f %.3f setrgbcolor\n",
+	 $rgb->[0]/255,$rgb->[1]/255,$rgb->[2]/255);
+
+  printf "%d %d %d %d RT\n",$x+1,$y+1,$w-2,$h-2;
+  print "fill\n";
+  print "stroke\n";
+
+  print "1.0 1.0 1.0 setrgbcolor\n";
+  print "$x $y $w $h RT\n";
+  print "stroke\n";
+  print "grestore\n";
+}
+
+# void legend()
+sub legend {
+  my ($have_zdb, $have_bits) = @_;
+
+  my ($i, $last);
+  my ($ixp, $iyp);
+  my $numstr;
+  my @xpos=(144,144,288,288,432);
+  my @ypos=(36,18,36,18,27);
+
+  if ($have_zdb || $have_bits) {$last = 5;}
+  else {$last = 4;}
+
+  move(60,27+18);
+  if ($have_zdb)  {draw_str("E():  ");}
+  elsif ($have_bits) {draw_str("Bits:  ");}
+
+  for ($i=0; $i<$last ; $i++) {
+    print("gsave currentlinewidth 1.5 mul setlinewidth\n");
+    newline();
+    linetype($i);
+    move($xpos[$i]-36,$ypos[$i]+18);
+    draw($xpos[$i]+24,$ypos[$i]+18);
+    clsline(1000,1000,10000);
+    print("grestore\n");
+    move($xpos[$i]+36,$ypos[$i]-4+18);
+    if ($have_zdb) {
+      if ($i==4) {$numstr = sprintf(">%.1lg",$elinval[3]);}
+      else {$numstr = sprintf("<%.1lg",$elinval[$i]);}
+    }
+    elsif ($have_bits) {
+      if ($i==4) {$numstr = sprintf("<%.1lf",$blinval[3]);}
+      else {$numstr = sprintf(">=%.1lf",$blinval[$i]);}
+    }
+    else {
+      if ($i==3) {$numstr = sprintf("<%d",$ilinval[3]);}
+      else {$numstr = sprintf(">%d",$ilinval[$i]);}
+    }
+    printf("(%s) hprint\n",$numstr);
+  }
+}
+
+#void linetype(type)
+sub linetype {
+  my $type = shift;
+
+  my $rgb_name = $line_colors[$type];
+  my $rgb = $color_names{$rgb_name};
+
+  printf("%5.3f %5.3f %5.3f setrgbcolor\n",
+	 $rgb->[0]/256, $rgb->[1]/256, $rgb->[2]/256);
+}
+
+#void closeplt()
+sub closeplt {
+  print("%%Trailer\n");
+  print("showpage\n");
+  print("%%EOF\n");
+}
+
+# void opnline(int s, double bits)
+sub opnline {
+  my ($s, $bits) = shift;
+
+  my $e_val;
+
+  if ($have_zdb) {
+    $e_val = bit_to_E($bits);
+    if ($e_val < $elinval[0]) {linetype(0);}
+    elsif ($e_val < $elinval[1]) {linetype(1);}
+    elsif ($e_val < $elinval[2]) {linetype(2);}
+    elsif ($e_val < $elinval[3]) {linetype(3);}
+    else {linetype(4);}
+  }
+  elsif ($have_bits) {
+    if ($bits >= $blinval[0]) {linetype(0);}
+    elsif ($bits >= $blinval[1]) {linetype(1);}
+    elsif ($bits >= $blinval[2]) {linetype(2);}
+    elsif ($bits >= $blinval[3]) {linetype(3);}
+    else {linetype(4);}
+  }
+  else {
+    if ($s > $ilinval[0]) {linetype(0);}
+    elsif ($s > $ilinval[1]) {linetype(1);}
+    elsif ($s > $ilinval[2]) {linetype(2);}
+    else {linetype(3);}
+  }
+
+  print("newpath\n");
+}
+
+# void newline()
+sub newline {
+  print("0 0 0 setrgbcolor\n newpath\n");
+}
+
+# void clsline(long x,long y,int s)
+sub clsline {
+  print("stroke\n");
+}
+
+# void move(int x, int y)
+sub move {
+  my ($xx, $yy) = @_;
+
+  printf("%d %d moveto\n",$xx,$yy);
+}
+
+# void sxy_move(int x, int y)
+sub sxy_move {
+  my ($x, $y) = @_;
+  printf("%d %d moveto\n",SX($x),SY($y));
+}
+
+# void draw(int x, int y)
+sub draw {
+  my ($x,$y) = @_;
+  printf("%d %d lineto\n",$x,$y);
+}
+
+# void sxy_draw(int x, int y)
+sub sxy_draw {
+  my ($x,$y) = @_;
+  printf("%d %d lineto\n",SX($x),SY($y));
+}
+
+#void draw_str(char *str)
+sub draw_str
+{
+  my $str = shift;
+
+#  for (bp = strchr(str,'('); (bp!=NULL); bp = strchr(bp+1,'(')) *bp=' ';
+#  for (bp = strchr(str,')'); (bp!=NULL); bp = strchr(bp+1,')')) *bp=' ';
+
+  $str =~ s/\(/\\(/g;
+  $str =~ s/\)/\\)/g;
+
+  printf("(%s) show\n",$str);
+}
+
+#void cal_coord(int n0, int n1, long *a_start0, long *a_stop0, long *a_start1, long *a_stop1 )
+sub cal_coord {}
diff --git a/scripts/lavplt_svg.pl b/scripts/lavplt_svg.pl
new file mode 100755
index 0000000..ccc1c7a
--- /dev/null
+++ b/scripts/lavplt_svg.pl
@@ -0,0 +1,461 @@
+
+################################################################
+# copyright (c) 2012, 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+#define SX(x) (int)((double)(x)*fxscal+fxoff+6)
+sub SX {
+  my $xx = shift;
+  return int($xx*$fxscal+$fxoff+18);
+}
+
+#define SY(y) (max_y + 24 - (int)((double)(y)*fyscal+fyoff))
+sub SY {
+  my $yy = shift;
+  return $max_y + 24 - int($yy*$fyscal+$fyoff);
+}
+
+# $y_delta used widely to offset for x_domain annotation
+my $y_delta = 0;
+
+#void openplt(long n0, long n1, int sq0off, int sq1off, char *xtitle, char *ytitle)
+sub openplt
+{
+  my ($n0, $n1, $sq0off, $sq1off, $xtitle, $ytitle, $x_annot_r, $y_annot_r,$have_zdb, $have_bits) = @_;
+
+  if ($lvstr) {
+    @elinval = split(/\s+/,$lvstr);
+  }
+  elsif ($ENV{LINEVAL}) {
+    @elinval = split(/\s+/,$ENV{LINEVAL});
+  }
+
+  my ($xbound, $ybound) = ($max_x + 24, $max_y + 48);
+  $y_delta = 0;
+
+  if ($x_annot_r) {
+    my $x_comments = 0;
+    for my $annot (@$x_annot_r) {
+      if (length($annot->{sdescr}) > $x_comments) {
+	$x_comments = length($annot->{sdescr})
+      }
+    }
+    $ybound += 24 + 6 * $x_comments;
+    $y_delta += 24;
+  }
+
+  if ($y_annot_r) {
+    my $y_comments = 0;
+    for my $annot (@$y_annot_r) {
+      if (length($annot->{sdescr}) > $y_comments) {
+	$y_comments = length($annot->{sdescr})
+      }
+    }
+    $xbound += 6 * $y_comments;
+    $max_x -= 6 * $y_comments;
+    $max_y -= 6 * $y_comments
+  }
+
+  print("<?xml version=\"1.0\" standalone=\"no\"?>\n");
+  print("<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\" \n");
+  print("\"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n\n");
+
+  print("<svg width=\"$xbound\" height=\"$ybound\" version=\"1.1\"\n");
+  print("xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n\n");
+
+  ($pmaxx, $pmaxy) = ($n0, $n1);
+
+  $fxscal = ($max_x-1)/$n1;
+  $fyscal = ($max_y-1)/$n0;
+
+  if ($fxscal > $fyscal) {$fxscal = $fyscal;}
+  else {$fyscal = $fxscal;}
+
+  if ($fyscal * $n0 < $max_y/5.0) {
+    $fyscal = ($max_y-1)/($n0*5.0);
+  }
+
+  $fxscal *= 0.9; $fxoff = ($max_x-1)/11.0;
+  $fyscal *= 0.9; $fyoff = ($max_y-1)/11.0;
+  if ($x_annot_r) {$fyoff -= (48 + $y_delta);}
+
+  # draw the plot frame
+  printf("<rect x=\"%d\" y=\"%d\" width=\"%d\" height=\"%d\"\n",
+	 SX(0),SY($n1+1), SX($n0+1)-SX(0), SY(0) - SY($n1+1));
+  print("stroke=\"black\" stroke-width=\"2.0\" fill=\"none\" />\n");
+
+  my $n_div = 11;
+  xaxis($n0, $sq1off, $xtitle, $n_div);
+
+  $n_div = 21 unless ($n0 == $n1);
+  yaxis($n1, $sq0off, $ytitle, $n_div);
+  legend($have_zdb, $have_bits, ($x_annot_r));
+
+  if ($x_annot_r) {xgrid($x_annot_r, $n0, $sq0off, $n1, $sq1off);}
+  if ($y_annot_r) {ygrid($y_annot_r, $n0, $sq0off, $n1, $sq1off);}
+}
+	
+# void drawdiag(long n0, long n1)
+sub drawdiag
+{
+  my ($n0, $n1) = @_;
+  # 	printf("currentlinewidth 1.5 mul setlinewidth\n"); */
+  newline("stroke=\"black\" stroke-width=\"1.5\"");
+  move(SX(0),SY(0));
+  draw(SX($n0+1),SY($n1+1));
+  clsline($n0,$n1,10000);
+}
+
+# tick array - values */
+my @tarr = (10,20,50,100,200,500,1000,2000,5000,10000,20000,50000,100000,200000,500000,1000000);
+my $MAX_INTERVAL=1000000;
+
+# void xaxis(long n, int offset, char *title)
+sub xaxis {
+  my ($n, $offset, $title, $n_div) = @_;
+
+  my ($i, $jm, $tick);
+  my ($js, $jo, $jl);
+  my $num_len;
+  my $numstr;
+
+  $tick = 6;
+
+  # search for the correct increment for the tick array */
+  for ($i=0; $i< @tarr; $i++) {
+    # seek to divide into 20 or fewer divisions */
+    if (($jm = $n/$tarr[$i])< $n_div) {goto found;}
+  }
+  $i= scalar(@tarr)-1;
+  $jm = $n/$tarr[$i];
+ found:
+  # js is the start of the value - modify to accomodate offset */
+  $js = $tarr[$i];
+
+  # jo is the offset */
+  $jo = ($offset-1) % $tarr[$i];	# figure out offset in tarr[i] increments */
+
+  # jl is the label */
+  $jl = ($offset-1)/$tarr[$i];	# figure out offset in tarr[i] increments */
+  $jl *= $tarr[$i];
+
+  newline("stroke=\"black\" stroke-width=\"1.5\"");
+  for ($i=1; $i<=$jm; $i++) {
+    move(SX($i*$js - $jo),SY(0));
+    draw(SX($i*$js - $jo),SY(0)+$tick);
+  }
+  clsline($n,$n,10000);
+
+  $numstr = sprintf("%ld",$js + $jl );
+  $num_len = length($numstr);
+  if ($num_len > 4) {
+
+    printf("<text x=\"%d\" y=\"%d\" text-anchor=\"middle\">%s</text>\n",SX($js-$jo),SY(0)+$tick+16,$numstr);
+
+    $numstr = sprintf("%ld",$jm*$js+$jl);
+    printf("<text x=\"%d\" y=\"%d\" text-anchor=\"middle\">%s</text>\n",SX($jm*$js-$jo),SY(0)+$tick+16,$numstr);
+  }
+  else {
+    for ($i=1; $i<=$jm; $i++) {
+      $numstr = sprintf("%ld",$i*$js+$jl);
+      printf("<text x=\"%d\" y=\"%d\" text-anchor=\"middle\">%s</text>\n",SX($i*$js-$jo),SY(0)+$tick+16,$numstr);
+    }
+  }
+
+  printf("<text x=\"%d\" y=\"%d\" text-anchor=\"middle\">%s</text>\n",SX($n/2),SY(0)-$tick+42, $title);
+}
+
+sub xgrid {
+  my ($annot_arr_r, $n0, $sq0_off, $n1, $sq1_off) = @_;
+
+  my $sq_off = $sq0_off;
+
+  my $show_block = 1;
+  my $text_offset = 8;
+
+  if ($show_block) {$text_offset = 24;}
+
+  for my $annot ( @$annot_arr_r) {
+    next unless $annot->{beg} >= $sq_off;
+    next if ($annot->{end} > $sq_off + $n0 - 1);
+    last if ($annot->{beg} > $sq_off + $n0 - 1);
+    newline("stroke=\"black\" stroke-width=\"1.5\" stroke-opacity=\"0.33\" stroke-dasharray=\"3,6\"" );
+    move(SX($annot->{beg} - $sq_off),SY(0));
+    draw(SX($annot->{beg} - $sq_off),SY($n1));
+    clsline();
+    newline("stroke=\"black\" stroke-width=\"1.5\" stroke-opacity=\"0.33\" stroke-dasharray=\"6,3\"" );
+    move(SX($annot->{end} - $sq_off),SY(0));
+    draw(SX($annot->{end} - $sq_off),SY($n1));
+    clsline();
+
+    if ($show_block) {
+      draw_block(SX($annot->{beg} - $sq_off), SY($n1) - 18,
+		 SX($annot->{end} - $sq_off) - SX($annot->{beg} - $sq_off),
+		 12, $annot_names{$annot->{sname}});
+    }
+
+    # show rotated label
+    my $xpos = SX(($annot->{end} - $annot->{beg})/2 + $annot->{beg} - $sq_off) + 4;
+    my $ypos = SY($n1) - $text_offset;
+    printf("<text x=\"0\" y=\"0\" text-anchor=\"left\" transform=\"translate($xpos, $ypos) rotate(-90,0,0)\">%s</text>\n",$annot->{sdescr});
+  }
+}
+
+sub ygrid {
+  my ($annot_arr_r, $n0, $sq0_off, $n1, $sq1_off) = @_;
+
+  my $sq_off = $sq1_off;
+
+  my $show_block = 1;
+  my $text_offset = 8;
+  if ($show_block) {$text_offset = 24;}
+
+  for my $annot ( @$annot_arr_r) {
+    next unless $annot->{beg} >= $sq_off;
+    next if ($annot->{end} > $sq_off + $n1 - 1);
+    last if ($annot->{beg} > $sq_off + $n1 - 1);
+    newline("stroke=\"black\" stroke-width=\"1.5\" stroke-opacity=\"0.33\" stroke-dasharray=\"3,6\"" );
+    move(SX(0), SY($annot->{beg} - $sq_off));
+    draw(SX($n0), SY($annot->{beg} - $sq_off));
+    clsline();
+    newline("stroke=\"black\" stroke-width=\"1.5\" stroke-opacity=\"0.33\" stroke-dasharray=\"6,3\"" );
+    move(SX(0), SY($annot->{end} - $sq_off));
+    draw(SX($n0), SY($annot->{end} - $sq_off));
+    clsline();
+
+    my $xpos = SX($n0) + $text_offset;
+    my $ypos = SY(($annot->{end} - $annot->{beg})/2 + $annot->{beg} - $sq_off) + 4;
+
+    if ($show_block) {
+      draw_block(SX($n0)+6, SY($annot->{end} - $sq_off), 12,
+		 SY($annot->{beg} - $sq_off) - SY($annot->{end} - $sq_off),
+		 $annot_names{$annot->{sname}});
+    }
+    printf("<text x=\"$xpos\" y=\"$ypos\" text-anchor=\"left\">%s</text>\n",$annot->{sdescr});
+  }
+}
+
+#void yaxis(long n, int offset, char *title)
+sub yaxis {
+  my ($n, $offset, $title, $n_div) = @_;
+
+  my ($i, $jm, $tick);
+  my ($js, $jo, $jl);
+  my $num_len;
+  my $numstr;
+
+  $tick = 6;
+
+  for ($i=0; $i< @tarr; $i++) {
+    if (($jm = $n/$tarr[$i])< $n_div) {goto found;}
+  }
+  $jm = $n/5000;
+  $i= scalar(@tarr)-1;
+
+ found:
+  $js = $tarr[$i];
+
+  # jo is the offset */
+  $jo = ($offset-1) % $tarr[$i];	# figure out offset in tarr[i] increments */
+  # jl is the label */
+  $jl = ($offset-1) / $tarr[$i];	# figure out offset in tarr[i] increments */
+  $jl *= $tarr[$i];
+
+  newline("stroke=\"black\" stroke-width=\"1.5\"");
+  for ($i=1; $i<=$jm; $i++) {
+    move(SX(0),SY($i*$js-$jo));
+    draw(SX(0)-$tick,SY($i*$js-$jo));
+  }
+  clsline($n,$n,10000);
+
+  $numstr = sprintf("%d",$js+$jl);
+  $num_len = length($numstr);
+  if ($num_len > 4) {
+    printf("<text x=\"%d\" y=\"%d\" text-anchor=\"end\">%s</text>\n",SX(0)-$tick-4,SY($js-$jo)+4,$numstr);
+
+    $numstr = sprintf("%ld",$jm*$js+$jl);
+    printf("<text x=\"%d\" y=\"%d\" text-anchor=\"end\">%s</text>\n",SX(0)-$tick-4,SY($jm*$js-$jo)+4,$numstr);
+  }
+  else {
+    for ($i=1; $i<=$jm; $i++) {
+      $numstr = sprintf("%ld",$i*$js+$jl);
+      printf("<text x=\"%d\" y=\"%d\" text-anchor=\"end\">%s</text>\n",SX(0)-$tick-4,SY($i*$js-$jo)+4,$numstr);
+    }
+  }
+  # make a path for the label */
+
+  #move(SX(0)-$tick-18,SY($n/2));
+  printf(qq(<g transform="rotate(-90, 142, 142)">\n));
+  printf(qq(<text x="0" y="12" text-anchor="middle">%s</text>\n),$title);
+  print("</g>\n");
+}
+
+sub draw_block {
+  my ($x, $y, $w, $h, $color) = @_;
+
+  $color = ($color % scalar(@block_colors));
+  my $svg_color = $block_colors[$color];
+
+  print (qq(<rect x="$x" y="$y" width="$w" height="$h" fill="$svg_color" stroke="white" stroke-width="1" />));
+}
+
+sub legend
+{
+  my ($have_zdb, $have_bits, $annot_flg) = @_;
+
+  my ($i, $last, $del);
+  my ($ixp, $iyp);
+  my $numstr;
+  my $optstr;
+  my @xpos=(144,144,288,288,432);
+  my @ypos=(36,18,36,18,27);
+
+  my $y_off = 66;
+  if ($annot_flg) {$y_off = 120;}
+
+  if ($have_zdb || $have_bits) {$last = 5;}
+  else {$last = 4;}
+
+  if ($have_zdb)  {printf("<text x=\"%d\" y=\"%d\">E(): </text>",54,$max_y + $y_off - 24 + $y_delta);}
+  elsif ($have_bits) {printf("<text x=\"%d\" y=\"%d\">bits: </text>",54,$max_y + $y_off - 24 + $_delta);}
+
+  $del = 10;
+  for ($i=0; $i<$last ; $i++) {
+    $optstr = sprintf("stroke-width=\"1.5\" stroke=\"%s\"",$line_colors[$i]);
+    newline($optstr);
+    #    linetype(i);*/
+    move($xpos[$i]-48,$max_y + $y_off - $ypos[$i] + $y_delta);
+    draw($xpos[$i]+12,$max_y + $y_off - $ypos[$i] + $y_delta);
+    clsline(1000,1000,10000);
+
+    if ($have_zdb) {
+      if ($i==4) {$numstr = sprintf(">%.1lg",$elinval[3]);}
+      else {$numstr = sprintf("<%.1lg",$elinval[$i]);}
+    }
+    elsif ($have_bits) {
+      if ($i==4) {$numstr = sprintf("<%.1lf",$blinval[3]);}
+      else {$numstr = sprintf(">=%.1lf",$blinval[$i]);}
+    }
+    else {
+      if ($i==3) {$numstr = sprintf("<%d",$ilinval[3]);}
+      else {$numstr = sprintf(">%d",$ilinval[$i]);}
+    }
+
+    printf("<text align=\"center\" x=\"%d\" y=\"%d\">%s</text>\n",$xpos[$i] + 18, $max_y + $y_off - $ypos[$i] + $y_delta + 4,$numstr);
+  }
+}
+
+# void linetype(int type)
+sub linetype
+{
+  my $type = shift;
+  printf(" stroke=\"%s\"",$line_colors[$type]);
+}
+
+#void closeplt()
+sub closeplt
+{
+  print("</svg>\n");
+}
+
+#void opnline(int s, double bits)
+sub opnline
+{
+  my ($s, $bits) = @_;
+
+  my $e_val;
+
+  if ($have_zdb) {
+    $e_val = bit_to_E($bits);
+    printf("<!-- score: %d; bits: %.1g; E(): %.1g -->\n",$s,$bits,$e_val);
+    print("<path ");
+    if ($e_val < $elinval[0]) {linetype(0);}
+    elsif ($e_val < $elinval[1]) {linetype(1);}
+    elsif ($e_val < $elinval[2]) {linetype(2);}
+    elsif ($e_val < $elinval[3]) {linetype(3);}
+    else {linetype(4);}
+  }
+  elsif ($have_bits) {
+    printf("<!-- score: %d; bits: %.1g -->\n",$s,$bits);
+    print("<path ");
+    if ($bits >= $blinval[0]) {linetype(0);}
+    elsif ($bits >= $blinval[1]) {linetype(1);}
+    elsif ($bits >= $blinval[2]) {linetype(2);}
+    elsif ($bits >= $blinval[3]) {linetype(3);}
+    else {linetype(4);}
+  }
+  else {
+    printf("<!-- score: %d -->\n",$s);
+    print("<path ");
+    if ($s > $ilinval[0]) {linetype(0);}
+    elsif ($s> $ilinval[1]) {linetype(1);}
+    elsif ($s> $ilinval[2]) {linetype(2);}
+    else {linetype(3);}
+  }
+
+  print(" d=\"");
+}
+
+# void newline(char *options)
+sub newline
+{
+  my $options = shift;
+
+  if ($options)  {
+      printf("<path %s d=\"",$options);
+  }
+  else {print("<path stroke=\"black\" d=\"");}
+}
+
+# void clsline(long x, long y, int s)
+sub clsline
+{
+  my ($x, $y, $s) = @_;
+
+  print("\" fill=\"none\" />\n");
+}
+
+#void move(int x, int y)
+sub move
+{
+  my ($x, $y) = @_;
+  printf(" M %d %d",$x,$y);
+}
+
+# void sxy_move(int x, int y)
+sub sxy_move
+{
+  my ($x, $y) = @_;
+  move(SX($x), SY($y));
+}
+
+# void draw(int x, int y)
+sub draw
+{
+  my ($x, $y) = @_;
+  printf(" L %d %d",$x,$y);
+}
+
+# void sxy_draw(int x, int y)
+sub sxy_draw
+{
+  my ($x, $y) = @_;
+  draw(SX($x),SY($y));
+}
+
+#void cal_coord(int n0, int n1, long *a_start0, long *a_stop0, long *a_start1, long *a_stop1 )
+sub cal_coord {}
+
diff --git a/scripts/links2sql.pl b/scripts/links2sql.pl
new file mode 100755
index 0000000..bdcc301
--- /dev/null
+++ b/scripts/links2sql.pl
@@ -0,0 +1,61 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+use strict;
+use DBI;
+use Getopt::Long;
+
+use vars qw($host $db $user $password $table $tab_file);
+
+GetOptions(
+    'host=s'=>\$host,
+    'db=s'=>\$db,
+    'user=s'=>\$user,
+    'pass=s'=>\$password,
+    'table=s'=>\$table,
+    'file=s'=>\$tab_file,
+    );
+
+$tab_file ||= "link_tmp.tab";
+
+my $dbh = DBI->connect("dbi:mysql:host=$host:$db",
+		       $user, $password,
+                       { RaiseError => 1, AutoCommit => 1}
+                      ) or die $DBI::errstr;
+
+$dbh->do(qq(drop table if exists $table;));
+$dbh->do(qq(create table $table (seed_acc varchar(20) not NULL, link_acc varchar(20) not NULL, key seed_acc (seed_acc), key link_acc (link_acc));));
+
+open(FH, ">$tab_file");
+
+while (my $seed_line = <> ) {
+    chomp($seed_line);
+    my ($seed, $hit_line) = split(/\s+/,$seed_line);
+    my @hits = split(/;/,$hit_line);
+    for my $hit (@hits) {
+	if ($hit ne $seed) {print FH "$seed\t$hit\n";}
+    }
+}
+close(FH);
+
+$dbh->do(qq(load data local infile '$tab_file' into table $table;));
+
+unlink($tab_file);
+
+$dbh->disconnect();
diff --git a/scripts/m8_btop_msa.pl b/scripts/m8_btop_msa.pl
new file mode 100755
index 0000000..9cf6da6
--- /dev/null
+++ b/scripts/m8_btop_msa.pl
@@ -0,0 +1,412 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014,2015 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+################################################################
+# m8_btop_msa.pl --query query.file blast_tab_btop_file
+################################################################
+# m8_btop_msa.pl takes a query sequence and a blast tabular format
+# file with a BTOP field, and constructs a query-driven multiple
+# sequence alignment of the subject sequences that can be used as
+# input to psiblast with the "--in_msa msa.file" option.
+#
+# (because BLAST BTOP encoding provides the mismatched residues, the
+# library sequences are not required to produce the MSA -- they are
+# available in the BTOP string)
+# 
+# The BTOP alignment encoding file generated from "blastp/n" or
+# "blast_formatter" using the command: blast_formatter -archive
+# blast_output.asn -outfmt '7 qseqid sseqid pident length mismatch
+# gapopen qstart qend sstart send evalue bitscore score btop' >
+# blast_output.tab_annot
+#
+# the raw score shown above is used by the annot_blast_btop2.pl
+# program, if present, but is not required by m8_btop_msa.pl
+#
+################################################################
+
+use strict;
+use IPC::Open2;
+use Pod::Usage;
+use Getopt::Long;
+# use Data::Dumper;
+
+# read lines of the form:
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|121694|sp|P20432|GSTT1_DROME	100.00	209	0	0	1	209	1	209	6e-156	433	1113	209
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|1170090|sp|P04907|GSTF3_MAIZE	26.77	198	123	7	4	185	6	197	2e-08	51.2	121	FL1YG ... 1NKRA1YW1
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|81174731|sp|P0ACA5|SSPA_ECO57	39.66	58	32	2	43	100	49	103	8e-06	43.9	102	EDFLLI ... V-I-NEQS3FM
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|121695|sp|P12653|GSTF1_MAIZE	27.62	181	107	7	32	203	34	199	9e-05	40.8	94	LI1LF ... N-1AS1CLLM1
+
+# and report the domain content ala -m 8CC
+
+my ($shelp, $help, $evalue) = (0, 0, 0.001);
+my ($query_file, $bound_file) = ("","");
+my ($out_field_str) = ("");
+my $query_lib_r = 0;
+
+GetOptions(
+    "query=s" => \$query_file,
+    "query_file=s" => \$query_file,
+    "evalue=f" => \$evalue,
+    "bound_file=s" => \$bound_file,
+    "bound=s" => \$bound_file,
+    "seqbdr=s" => \$bound_file,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+unless (-f STDIN || -p STDIN || @ARGV) {
+ pod2usage(1);
+}
+
+my @hit_list = ();
+my @multi_align = ();
+my @multi_names = ();
+
+################
+# get query sequence, and insert into MSA
+#
+my ($query_acc, $query_seq_r, $query_len);
+if ($query_file) {
+  ($query_acc, $query_seq_r) = parse_query_lib($query_file);
+  $query_len = scalar(@$query_seq_r)-1;  # -1 for ' ' 1: offset
+}
+
+if (! $query_file || !$query_len) {
+  die "query sequence required";
+}
+
+push @multi_names, $query_acc;
+push @multi_align, btop2alignment($query_seq_r, $query_len, {BTOP=>$query_len, q_start=>1, q_end=>$query_len});
+my $max_sseqid_len = length($query_acc);
+
+################
+# get sequence boundaries if available
+#
+my $seq_bound_hr = 0;
+
+if ($bound_file) {
+  $seq_bound_hr = parse_bound_file($bound_file)
+}
+
+my @tab_fields = qw(q_seqid s_seqid percid alen mismatch gopen q_start q_end s_start s_end evalue bits score BTOP);
+
+while (my $line = <>) {
+  if ($line =~ m/^# Fields:/ && $line !~ m/bit score, score, BTOP/)  {
+    # raw score missing, edit @tab_fields
+    pop @tab_fields;
+    pop @tab_fields;
+    push @tab_fields, "BTOP";
+    next;
+  }
+  next if ($line =~ m/^#/);
+  chomp $line;
+  next unless $line;
+
+  my %hit_data = ();
+
+  @hit_data{@tab_fields} = split(/\t/,$line);
+
+  next if ($hit_data{evalue} > $evalue);
+
+#  push @hit_list, \%hit_data;
+  if (length($hit_data{s_seqid}) > $max_sseqid_len) {
+    $max_sseqid_len = length($hit_data{s_seqid});
+  }
+
+  if ($bound_file) {
+    if (defined($seq_bound_hr->{$hit_data{subj_acc}})) {
+      push @multi_names, $hit_data{s_seqid};
+      push @multi_align, bound_btop2alignment($query_seq_r, $query_len, \%hit_data, @{$seq_bound_hr->{$hit_data{subj_acc}}}{qw(start end)});
+    }
+  }
+  else {  # no sequence boundaries
+    push @multi_names, $hit_data{s_seqid};
+    push @multi_align, btop2alignment($query_seq_r, $query_len, \%hit_data);
+  }
+}
+
+# final MSA output
+$max_sseqid_len += 4;
+
+print "SSEARCHm8 multiple sequence alignment\n\n\n";
+
+my $i_pos = 0;
+for (my $j = 0; $j < $query_len/60; $j++) {
+  my $i_end = $i_pos + 59;
+  if ($i_end > $query_len) {$i_end = $query_len-1;}
+  for (my $n = 0; $n < scalar(@multi_names); $n++) {
+    printf("%-".$max_sseqid_len."s %s\n",$multi_names[$n],join("",@{$multi_align[$n]}[$i_pos .. $i_end]));
+  }
+  $i_pos += 60;
+  print "\n\n";
+}
+
+
+# input: a blast BTOP string of the form: "1VA160TS7KG10RK27"
+# returns a list_ref of tokens: (1, "VA", 60, "TS", 7, "KG, 10, "RK", 27)
+#
+sub decode_btop {
+  my ($btop_str) = @_;
+
+  my @tokens = split(/(\d+)/,$btop_str);
+
+  shift @tokens unless $tokens[0];
+
+  my @out_tokens = ();
+
+  for my $token (@tokens) {
+    if ($token =~ m/^\d+$/) {
+      push @out_tokens, $token
+    }
+    else {
+      my @mis_tokens = split(/(..)/,$token);
+      for my $mis (@mis_tokens) {
+	if ($mis) {push @out_tokens, $mis};
+      }	
+    }
+  }
+
+  return \@out_tokens;
+}
+
+
+sub btop2alignment {
+  my ($query_seq_r, $query_len, $hit_data_hr) = @_;
+
+  # $query_seq_r is 1: based
+  my @alignment = ();
+
+  # make a local copy
+  # my @query_seq = @{$query_seq_r};
+
+  # the left unaligned region gets " ";
+  for (my $i=1; $i < $hit_data_hr->{q_start}; $i++) {
+    push @alignment, "-";
+  }
+
+  my $btop_align_r = decode_btop($hit_data_hr->{BTOP});
+
+  my ($seq0, $seq1) = ("","");
+  my $qix = $hit_data_hr->{q_start};
+  for my $btop (@{$btop_align_r}) {
+    if ($btop =~ m/^\d+$/) {  # matching query sequence, add it up
+      for (my $i=0; $i < $btop; $i++) {
+	push @alignment, $query_seq_r->[$qix++];
+      }
+    }
+    else {  # could be: TS/-S/T-
+      ($seq0, $seq1) = split(//,$btop);
+      if ($seq0 ne '-') {
+	push @alignment, $seq1;
+	$qix++;
+      }
+    }
+  }
+  # all done with alignment, double check that $qix = $hit_data_hr->{q_end}
+  unless ($qix == $hit_data_hr->{q_end}+1) {
+    warn "$qix != ".$hit_data_hr->{q_end}+1;
+  }
+
+  for (my $i = $hit_data_hr->{q_end}+1; $i <= $query_len; $i++) {
+    push @alignment, "-";
+  }
+
+  return \@alignment;
+}
+
+sub bound_btop2alignment {
+  my ($query_seq_r, $query_len, $hit_data_hr, $sb_start, $sb_end) = @_;
+
+  # $query_seq_r is 1: based
+  my @alignment = ();
+
+  # the left unaligned region gets " ";
+  for (my $i=1; $i < $hit_data_hr->{q_start}; $i++) {
+    push @alignment, "-";
+  }
+
+  my $btop_align_r = decode_btop($hit_data_hr->{BTOP});
+
+  my ($seq0, $seq1) = ("","");
+  my ($qix, $six) = @{$hit_data_hr}{qw(q_start s_start)};
+
+  for my $btop (@{$btop_align_r}) {
+    if ($btop =~ m/^\d+$/) {  # matching query sequence, add it up
+      for (my $i=0; $i < $btop; $i++) {
+	if ($six >= $sb_start && $six <= $sb_end) {
+	  push @alignment, $query_seq_r->[$qix];
+	}
+	else {
+	  push @alignment, '-';
+	}
+	$qix++; $six++;
+      }
+    }
+    else {  # could be: TS/-S/T-
+      ($seq0, $seq1) = split(//,$btop);
+      if ($seq1 eq '-') {  # gap in subject
+	push @alignment, '-';
+	$qix++;
+      }
+      elsif ($seq0 ne '-') {  # mismatch
+	if ($six >= $sb_start && $six <= $sb_end) {
+	  push @alignment, $seq1;
+	}
+	else {
+	  push @alignment, '-';
+	}
+	$qix++;
+	$six++;
+      }
+      else {  # gap in query, consume $six
+	$six++;
+      }
+    }
+  }
+  # all done with alignment, double check that $qix = $hit_data_hr->{q_end}
+  unless ($qix == $hit_data_hr->{q_end}+1) {
+    warn $qix." != ".$hit_data_hr->{q_end}+1;
+  }
+
+  for (my $i = $hit_data_hr->{q_end}+1; $i <= $query_len; $i++) {
+    push @alignment, "-";
+  }
+
+  return \@alignment;
+}
+
+sub parse_query_lib {
+  my ($query_file) = @_;
+
+  my %query_seqs = ();
+
+  open(my $qfd, $query_file);
+
+
+  my ($header, $sequence) = ("","");
+  while (my $entry = <$qfd>) {  # returns an entire fasta entry
+    chomp $entry;
+    if ($entry =~ m/^>/) {
+      $header = $entry;
+    }
+    else {
+      $sequence .= $entry
+    }
+  }
+
+  $sequence =~ s/[^A-Za-z\*]//g;    # remove everything but letters
+  $sequence = uc($sequence);
+
+  $header =~ s/^>//;
+  $header =~ s/\s.*$//;
+  my @seq = split(//,$sequence);
+  unshift @seq,"";	# @seq is now 1-based
+
+  return ($header, \@seq);
+}
+
+sub parse_query_file {
+  my ($query_file) = @_;
+
+  my $seq_data = "";
+
+  open(my $qfd, $query_file);
+  while (my $line = <$qfd>) {
+    next if $line =~ m/^>/;
+    next if $line =~ m/^;/;
+    chomp $line;
+    $line =~ s/[^A-Za-z\*]//g;
+    $seq_data .= $line
+  }
+
+  $seq_data = uc($seq_data);
+
+  my @seq = split(//,$seq_data); 
+
+  return \@seq;
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+annot_blast_btop2.pl
+
+=head1 SYNOPSIS
+
+ annot_blast_btop2 --ann_script ann_pfam_www_e.pl [--query_file query.fasta] --out_fields "q_seqid s_seqid percid evalue" blast_tabular_file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+
+ --ann_script -- annotation script returning site/domain locations for subject sequences
+              -- same as --script
+
+ --q_ann_script -- annotation script for query sequences
+                -- same as --q_script
+
+ --query_file -- fasta query sequence
+              -- same as --query, --query_lib
+                 (can contain multiple sequences for multi-sequence search)
+
+ --out_fields -- blast tabular fields shown before domain information
+
+ --raw_score -- add the raw_score used to normalized domain scores to
+                tabular output (raw_scores are only calculated for domains)
+
+=head1 DESCRIPTION
+
+C<annot_blast_btop2.pl> runs the script specified by
+C<--ann_script/--q_ann_script> to annotate functional sites domain
+content of the sequences specified by the subject/query seqid field of
+blast tabular format (-outfmt 6 or 7) or FASTA blast tabular format
+(-m 8).  The C<--ann_script/--q_ann_script> file is run to produce
+domain boundary coordinates.  For searches against SwissProt
+sequences, C<--ann_script ann_feats_up_www2.pl> will acquire features
+and domains from Uniprot.  C<--ann_script ann_pfam_www.pl --neg> will
+get domain information from Pfam, and score non-domain (NODOM)
+regions.
+
+The tab file is read and parsed, and then the subject/query seqid is used to
+capture domain locations in the subject/query sequence.  If the domains
+overlap the aligned region, the domain names are appended to the
+intput.
+
+If a C<--query_file> is specified and two additional fields, C<score>
+and C<btop> are present, C<annot_blast_btop2.pl> calculates
+sub-alignment scores, including fraction identity, bit score, and
+Q-value (-log10(E-value)), partitioning the alignment score, identity,
+and bit score across the overlapping domains.
+
+The C<--out_fields> specifies the blast tabular fields that can be
+returned.  By default, C<q_seqid s_seqid percid alen mismatch gopen
+q_start q_end s_start s_end evalue bits> (but not C<score> and
+C<BTOP>) are shown.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/m9B_btop_msa.pl b/scripts/m9B_btop_msa.pl
new file mode 100755
index 0000000..5c72854
--- /dev/null
+++ b/scripts/m9B_btop_msa.pl
@@ -0,0 +1,654 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014,2015 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License.
+################################################################
+
+################################################################
+# m9B_btop_msa.pl --query query.file blast_tab_btop_file
+################################################################
+# m9B_btop_msa.pl takes a query sequence and a fasta -m 9b
+# file with a BTOP field, and constructs a query-driven multiple
+# sequence alignment of the subject sequences that can be used as
+# input to psiblast with the "--in_msa msa.file" option.
+#
+# (because BLAST BTOP encoding provides the mismatched residues, the
+# library sequences are not required to produce the MSA -- they are
+# available in the BTOP string)
+#
+# The BTOP alignment encoding file generated from "blastp/n" or
+# "blast_formatter" using the command: blast_formatter -archive
+# blast_output.asn -outfmt '7 qseqid sseqid pident length mismatch
+# gapopen qstart qend sstart send evalue bitscore score btop' >
+# blast_output.tab_annot
+#
+################################################################
+
+use strict;
+use IPC::Open2;
+use Pod::Usage;
+use Getopt::Long;
+# use Data::Dumper;
+
+# read lines of the form:
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|121694|sp|P20432|GSTT1_DROME	100.00	209	0	0	1	209	1	209	6e-156	433	1113	209
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|1170090|sp|P04907|GSTF3_MAIZE	26.77	198	123	7	4	185	6	197	2e-08	51.2	121	FL1YG ... 1NKRA1YW1
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|81174731|sp|P0ACA5|SSPA_ECO57	39.66	58	32	2	43	100	49	103	8e-06	43.9	102	EDFLLI ... V-I-NEQS3FM
+# gi|121694|sp|P20432.1|GSTT1_DROME	gi|121695|sp|P12653|GSTF1_MAIZE	27.62	181	107	7	32	203	34	199	9e-05	40.8	94	LI1LF ... N-1AS1CLLM1
+
+# and report the domain content ala -m 8CC
+
+my ($shelp, $help, $evalue, $qvalue, $domain_bound) = (0, 0, 0.001, 30.0,0);
+my ($query_file, $bound_file_in, $bound_file_only, $bound_file_out, $masked_lib_out) = ("","","","","");
+my $query_lib_r = 0;
+
+GetOptions(
+    "query=s" => \$query_file,
+    "query_file=s" => \$query_file,
+    "evalue=f" => \$evalue,
+    "expect=f" => \$evalue,
+    "qvalue=f" => \$qvalue,
+    "bound_file_in=s" => \$bound_file_in,
+    "bound_file_only=s" => \$bound_file_only,
+    "bound_file_out=s" => \$bound_file_out,
+    "masked_library_out=s" => \$masked_lib_out,
+    "masked_lib_out=s" => \$masked_lib_out,
+    "domain_bound" => \$domain_bound,
+    "domain" => \$domain_bound,
+    "bound_in=s" => \$bound_file_in,
+    "bound_only=s" => \$bound_file_only,
+    "bound_out=s" => \$bound_file_out,
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+unless (-f STDIN || -p STDIN || @ARGV) {
+ pod2usage(1);
+}
+
+my @m9_field_names = qw(percid perc_sim raw_score a_len q_start q_end qc_start qc_end s_start s_end sc_start sc_end gap_q gap_l fs);
+
+my @hit_list = ();
+my %multi_align = ();
+my @multi_names = ();
+
+################
+# get query sequence, and insert into MSA
+#
+my ($query_acc, $query_seq_r, $query_len);
+if ($query_file) {
+  ($query_acc, $query_seq_r) = parse_query_lib($query_file);
+  $query_len = scalar(@$query_seq_r)-1;  # -1 for ' ' 1: offset
+}
+
+if (! $query_file || !$query_len) {
+  die "query sequence required";
+}
+
+push @multi_names, $query_acc;
+$multi_align{$query_acc} = btop2alignment($query_seq_r, $query_len, {BTOP=>$query_len, q_start=>1, q_end=>$query_len}, 0);
+my $max_sseqid_len = length($query_acc);
+
+################
+# get sequence boundaries if available
+#
+my $seq_bound_hr = 0;
+my @seq_bound_accs = ();
+
+if ($bound_file_in) {
+  $seq_bound_hr = parse_bound_file($bound_file_in);
+}
+elsif ($bound_file_only) {
+  $seq_bound_hr = parse_bound_file($bound_file_only);
+}
+elsif ($domain_bound) {
+  my %seq_bound = ();
+  $seq_bound_hr = \%seq_bound;
+}
+elsif ($bound_file_out) {
+  my %seq_bound = ();
+  $seq_bound_hr = \%seq_bound;
+}
+
+################
+# skip down to "The best scores are:"
+#
+my ($q_num, $query_descr, $q_len, $lib_cnt, $lib_len, $best_yes) = skip_to_results();
+warn "Cannot find the best scores are:"  unless $query_descr;
+
+my ($tmp, $gi, $q_db, $q_acc, $q_id);
+
+if ($query_descr =~ /^gi\|\d+\|/) {
+    ($tmp, $gi, $q_db,$q_acc, $q_id) = split(/\|/,$query_descr);
+}
+else {
+    ($q_db,$q_acc, $q_id) = split(/\|/,$query_descr);
+}
+
+$q_acc =~ s/\.\d+$//;
+
+while (my $line = <>) {
+  chomp $line;
+  next unless ($line);
+
+  last if $line =~ m/>>>/;
+  next if $line =~ m/^\+\-/; # skip over HSPs
+  chomp ($line);
+
+  my %hit_data =();
+
+  my ($left, $right, $align_f, $annot_f) = split(/\t/,$line);
+
+  $align_f= 'NULL' unless $align_f;
+  $annot_f= 'NULL' unless $annot_f;
+
+  my @fields = split(/\s+/,$left);
+  my ($ldb, $l_id, $l_acc) = ("","","");
+  if ($fields[0] =~ m/:/) {
+      ($ldb, $l_id) = split(/:/,$fields[0]);
+      ($l_acc) = $fields[1];
+  }
+  else {
+      ($ldb, $l_acc,$l_id) = split(/\|/,$fields[0]);
+  }
+
+  @hit_data{@m9_field_names} = split(/\s+/,$right);
+  @hit_data{qw(bits evalue)} = @fields[-2,-1];
+
+  #
+  # currently preselbdr files have $ldb|$l_acc, not full s_seqid, so construct it
+  #
+  my ($s_seqid, $subj_acc) = (join('|',($ldb, $l_acc, $l_id)), "$ldb|$l_acc");
+  @hit_data{qw(s_seqid subj_acc)} = ($s_seqid, $subj_acc);
+  @hit_data{qw(query_id query_acc)} = ($query_descr, $q_acc);
+  $hit_data{BTOP} = $align_f;
+
+  next if ($hit_data{evalue} > $evalue);
+
+  if (length($s_seqid) > $max_sseqid_len) {
+    $max_sseqid_len = length($s_seqid);
+  }
+
+  my $have_dom = 0;
+  if ($domain_bound) {
+    my $hit_doms_ar = parse_hit_domains($annot_f);
+    # scan from left to right to make domain boundaries based on $qvalue
+    my ($left_bound, $right_bound) = @hit_data{qw(s_end s_start)};
+    foreach my $dom_r ( @$hit_doms_ar ) {
+      next unless $dom_r->{target} eq 'subj';
+      next if $dom_r->{virtual};
+      next unless $dom_r->{qval} > $qvalue;
+
+      if ($dom_r->{s_start} < $left_bound) {
+	$left_bound = $dom_r->{s_start};
+	$have_dom = 1;
+      }
+
+      if ($dom_r->{s_end} > $right_bound) {
+	$right_bound = $dom_r->{s_end};
+	$have_dom = 1;
+      }
+    }
+
+    if ($have_dom) {
+      if (exists($seq_bound_hr->{$subj_acc})) {
+	@{$seq_bound_hr->{$subj_acc}}{qw(start end)} = ($left_bound, $right_bound);
+      }
+      else {
+	$seq_bound_hr->{$subj_acc} = {start=>$left_bound, end=>$right_bound};
+	push @seq_bound_accs, $subj_acc;
+      }
+    }
+  }
+
+  # must have separate @hit_list that can be sorted, for searches with multiple alignment results
+
+  if ($bound_file_only || $have_dom) {
+    if (exists($seq_bound_hr->{$subj_acc})) {
+      my ($status, $alignment) = bound_btop2alignment($query_seq_r, $query_len, \%hit_data, @{$seq_bound_hr->{$subj_acc}}{qw(start end)});
+      if ($status) {	# aligment is within boundary
+	push @multi_names, $s_seqid;
+	$multi_align{$s_seqid} = $alignment;
+      }
+      # do not delete entry, because it needs to be preserved 
+    }
+  }
+  elsif ($bound_file_in) {
+    if (exists($seq_bound_hr->{$subj_acc})) {
+      my ($status, $alignment) = bound_btop2alignment($query_seq_r, $query_len, \%hit_data, @{$seq_bound_hr->{$subj_acc}}{qw(start end)});
+      if ($status) {
+	push @multi_names, $s_seqid;
+	$multi_align{$s_seqid} = $alignment;
+#	push @multi_align, $alignment;
+      }
+    }
+    else {
+      push @multi_names, $s_seqid;
+#      push @multi_align, btop2alignment($query_seq_r, $query_len, \%hit_data, );
+      $multi_align{$s_seqid} = btop2alignment($query_seq_r, $query_len, \%hit_data);
+      @{$seq_bound_hr->{$subj_acc}}{qw(start end)} = @hit_data{qw(s_start s_end)};
+      push @seq_bound_accs, $subj_acc;
+    }
+  }
+  else {  # no sequence boundaries
+    push @multi_names, $s_seqid;
+    $multi_align{$s_seqid} = btop2alignment($query_seq_r, $query_len, \%hit_data);
+#    push @multi_align, btop2alignment($query_seq_r, $query_len, \%hit_data);
+    if (!$have_dom && ($bound_file_out)) {
+      @{$seq_bound_hr->{$subj_acc}}{qw(start end)} = @hit_data{qw(s_start s_end)};
+      push @seq_bound_accs, $subj_acc;
+    }
+  }
+}
+
+# final MSA output
+$max_sseqid_len += 2;
+
+print "SSEARCHm9B multiple sequence alignment\n\n\n";
+
+my $i_pos = 0;
+for (my $j = 0; $j < $query_len/60; $j++) {
+  my $i_end = $i_pos + 59;
+  if ($i_end > $query_len) {$i_end = $query_len-1;}
+  for my $acc (@multi_names) {
+    next unless $acc;
+    printf("%-".$max_sseqid_len."s %s\n",$acc,join("",@{$multi_align{$acc}}[$i_pos .. $i_end]));
+  }
+  $i_pos += 60;
+  print "\n\n";
+}
+
+################
+# if bound_file_out provide it
+if ($bound_file_out) {
+  open(my $bound_fd, ">", $bound_file_out) || die "cannot open $bound_file_out";
+  for my $s_acc ( @seq_bound_accs ) {
+    print $bound_fd join("\t", ($s_acc, @{$seq_bound_hr->{$s_acc}}{qw(start end)})),"\n";
+  }
+  close($bound_fd);
+}
+
+if ($masked_lib_out) {
+  open(my $masked_fd, ">", $masked_lib_out) || die "cannot open $masked_lib_out";
+
+  for my $s_acc ( @multi_names ) {
+    print $masked_fd ">$s_acc\n";
+    my $seq_lines = join('',@{$multi_align{$s_acc}});
+
+    # currently, simply remove all '-' insertions --
+    # other options would be to make external '-'s 'X's
+    $seq_lines =~ s/\-//g;
+
+    $seq_lines =~ s/(.{60})/$1\n/g; 
+    print $masked_fd "$seq_lines\n";
+  }
+  close($masked_fd);
+}
+
+# input: a blast BTOP string of the form: "1VA160TS7KG10RK27"
+# returns a list_ref of tokens: (1, "VA", 60, "TS", 7, "KG, 10, "RK", 27)
+#
+sub decode_btop {
+  my ($btop_str) = @_;
+
+  my @tokens = split(/(\d+)/,$btop_str);
+
+  shift @tokens unless $tokens[0];
+
+  my @out_tokens = ();
+
+  for my $token (@tokens) {
+    if ($token =~ m/^\d+$/) {
+      push @out_tokens, $token
+    }
+    else {
+      my @mis_tokens = split(/(..)/,$token);
+      for my $mis (@mis_tokens) {
+	if ($mis) {push @out_tokens, $mis};
+      }
+    }
+  }
+
+  return \@out_tokens;
+}
+
+sub parse_hit_domains {
+  my ($annot_str) = @_;
+
+## annot_str looks like: "|RX:6-65:6-65:s=311;b=125.4;I=1.000;Q=339.6;C=C.HTH~1
+#                         |XR:6-65:6-65:s=311;b=125.4;I=1.000;Q=339.6;C=C.HTH~1
+#                         |RX:66-297:66-297:s=1200;b=483.7;I=1.000;Q=1409.6;C=NODOM~0
+#                         |XR:66-297:66-297:s=1200;b=483.7;I=1.000;Q=1409.6;C=NODOM~0
+
+  return 0 unless ($annot_str);
+
+  my @hit_annots = ();
+
+  my @annots = split(/\|/,$annot_str);
+  shift @annots;	# remove first blank
+
+  for my $annot ( @annots ) {
+    my %dom_info = ();
+
+    # parse an entry:
+    # |RX:6-65:6-65:s=311;b=125.4;I=1.000;Q=339.6;C=C.HTH~1
+    my @d_fields = split(";",$annot);
+
+    ($dom_info{dom}) = ($d_fields[4] =~ m/C=(.+?)~?\d*v?$/);   # also remove virtual domain symbols
+    next if ($dom_info{dom} =~ m/NODOM/);
+
+    ################
+    # parse @d_fields
+    if ($d_fields[4] =~ m/v$/) {
+      $dom_info{virtual} = 1;
+    }
+    else {
+      $dom_info{virtual} = 0;
+    }
+
+    ($dom_info{bits}) = ($d_fields[1] =~ m/b=(\-?\d+\.?\d*)/);
+    unless (defined($dom_info{bits})) {
+	warn "missing score info - annot: $annot\n  annot_str: $annot_str";
+	$dom_info{bits} = '\N';
+    }
+    ($dom_info{percid}) = ($d_fields[2] =~ m/I=(\-?[\d\.]+)/);
+    unless (defined($dom_info{percid})) {
+	warn "missing percid info - annot: $annot\n  annot_str: $annot_str";
+	$dom_info{percid} = '\N';
+    }
+
+    ($dom_info{qval}) = ($d_fields[3] =~ m/Q=([\d\.]+)/);
+
+    ################
+    # parse @c_fields
+    my @c_fields = split(":",$d_fields[0]);
+
+    if ($c_fields[0] =~ m/RX/) {$dom_info{target} = 'query';}
+    else {$dom_info{target} = 'subj';}
+
+    @dom_info{qw(q_start q_end)} = ($c_fields[1] =~ m/(\d+)\-(\d+)/);
+    @dom_info{qw(s_start s_end)} = ($c_fields[2] =~ m/(\d+)\-(\d+)/);
+    ($dom_info{score}) = ($c_fields[3] =~ m/s=(\-?\d+)/);
+    unless (defined($dom_info{score})) {
+	warn "missing score info - annot: $annot\n  annot_str: $annot_str";
+	$dom_info{score} = '\N';
+    }
+
+    push @hit_annots, \%dom_info;
+  }
+
+  return \@hit_annots;
+}
+
+
+sub btop2alignment {
+  my ($query_seq_r, $query_len, $hit_data_hr, $seq_bound_hr) = @_;
+
+  # $query_seq_r is 1: based
+  my @alignment = ();
+
+  # the left unaligned region gets " ";
+  for (my $i=1; $i < $hit_data_hr->{q_start}; $i++) {
+    push @alignment, "-";
+  }
+
+  my $btop_align_r = decode_btop($hit_data_hr->{BTOP});
+
+  my ($seq0, $seq1) = ("","");
+  my $qix = $hit_data_hr->{q_start};
+  for my $btop (@{$btop_align_r}) {
+    if ($btop =~ m/^\d+$/) {  # matching query sequence, add it up
+      for (my $i=0; $i < $btop; $i++) {
+	push @alignment, $query_seq_r->[$qix++];
+      }
+    }
+    else {  # could be: TS/-S/T-
+      ($seq0, $seq1) = split(//,$btop);
+      if ($seq0 ne '-') {
+	push @alignment, $seq1;
+	$qix++;
+      }
+    }
+  }
+  # all done with alignment, double check that $qix = $hit_data_hr->{q_end}
+  unless ($qix == $hit_data_hr->{q_end}+1) {
+    warn "$qix != ".$hit_data_hr->{q_end}+1;
+  }
+
+  for (my $i = $hit_data_hr->{q_end}+1; $i <= $query_len; $i++) {
+    push @alignment, "-";
+  }
+
+  return \@alignment;
+}
+
+################
+# generates MSA alignment entry between $sb_start and $sb_end
+# if there are no aligned residues between these locations, return $status=0
+
+sub bound_btop2alignment {
+  my ($query_seq_r, $query_len, $hit_data_hr, $sb_start, $sb_end) = @_;
+
+  # $query_seq_r is 1: based
+  my @alignment = ();
+
+  my $have_aligned_res = 0;
+
+  # the left unaligned region gets " ";
+  for (my $i=1; $i < $hit_data_hr->{q_start}; $i++) {
+    push @alignment, "-";
+  }
+
+  my $btop_align_r = decode_btop($hit_data_hr->{BTOP});
+
+  my ($seq0, $seq1) = ("","");
+  my ($qix, $six) = @{$hit_data_hr}{qw(q_start s_start)};
+
+  for my $btop (@{$btop_align_r}) {
+    if ($btop =~ m/^\d+$/) {  # matching query sequence, add it up
+      for (my $i=0; $i < $btop; $i++) {
+	if ($six >= $sb_start && $six <= $sb_end) {
+	  push @alignment, $query_seq_r->[$qix];
+	  $have_aligned_res=1;
+	}
+	else {
+	  push @alignment, '-';
+	}
+	$qix++; $six++;
+      }
+    }
+    else {  # could be: TS/-S/T-
+      ($seq0, $seq1) = split(//,$btop);
+      if ($seq1 eq '-') {  # gap in subject
+	push @alignment, '-';
+	$qix++;
+      }
+      elsif ($seq0 ne '-') {  # mismatch
+	if ($six >= $sb_start && $six <= $sb_end) {
+	  $have_aligned_res=1;
+	  push @alignment, $seq1;
+	}
+	else {
+	  push @alignment, '-';
+	}
+	$qix++;
+	$six++;
+      }
+      else {  # gap in query, consume $six
+	$six++;
+      }
+    }
+  }
+  # all done with alignment, double check that $qix = $hit_data_hr->{q_end}
+  unless ($qix == $hit_data_hr->{q_end}+1) {
+    warn $qix." != ".$hit_data_hr->{q_end}+1;
+  }
+
+  for (my $i = $hit_data_hr->{q_end}+1; $i <= $query_len; $i++) {
+    push @alignment, "-";
+  }
+
+  return ($have_aligned_res, \@alignment);
+}
+
+sub parse_query_lib {
+  my ($query_file) = @_;
+
+  my %query_seqs = ();
+
+  open(my $qfd, $query_file);
+
+
+  my ($header, $sequence) = ("","");
+  while (my $entry = <$qfd>) {  # returns an entire fasta entry
+    chomp $entry;
+    if ($entry =~ m/^>/) {
+      $header = $entry;
+    }
+    else {
+      $sequence .= $entry
+    }
+  }
+
+  $sequence =~ s/[^A-Za-z\*]//g;    # remove everything but letters
+  $sequence = uc($sequence);
+
+  $header =~ s/^>//;
+  $header =~ s/\s.*$//;
+
+  my @seq = split(//,$sequence);
+  unshift @seq,"";	# @seq is now 1-based
+
+  return ($header, \@seq);
+}
+
+sub parse_bound_file {
+  my ($bound_file) = @_;
+
+  my %seq_bound = ();
+
+  open(my $qfd, $bound_file) || return 0;
+
+  while (my $line = <$qfd>) {
+    next if ($line =~ m/^#/);
+    chomp $line;
+    my @data = split(/\t/,$line);
+    if (!defined($seq_bound{$data[0]})) {
+      $seq_bound{$data[0]} = {start=>$data[1], end=>$data[2]};
+      push @seq_bound_accs, $data[0];
+    }
+    else {
+      warn "multiple boundaries for $data[0]";
+    }
+  }
+
+  return \%seq_bound;
+}
+
+sub skip_to_results {
+
+  my ($q_num, $query_desc, $q_start, $q_stop, $q_len, $l_num, $l_len, $best_yes);
+
+  while (my $line = <>) {
+    if ($line =~ m/^\s*(\d+)>>>(\S+)\s.+ \- (\d+) aa$/) {
+      ($q_num,$query_desc, $q_len) = ($1,$2,$3);
+#      ($q_len) = ($line =~ m/(\d+) aa$/);
+      $line = <>;	# skip Library:
+      $line = <>;	#   153571012 residues in 291716 sequences
+      ($l_len, $l_num) = ($line =~ m/^\s+(\d+)\s+residues in\s+(\d+)/);
+      goto have_query;
+    }
+    elsif ($line =~ m/>>>\/\/\//) {goto done;}
+  }
+ done:
+  return (0,"");
+
+ have_query:
+  while (my $line = <>) {
+    $best_yes = 0;
+
+    if ($line =~ m/^The best scores are:/) {
+      $best_yes = 1;
+      last;
+    }
+    last if ($line =~ m/^!! No sequences/);
+  }
+  return ($q_num, $query_desc,$q_start, $q_stop, $q_len, $l_num, $l_len, $best_yes);
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+ m9B_btop_msa.pl
+
+=head1 SYNOPSIS
+
+ m9B_btop_msa.pl --query_file query.fasta [--bound_file seqbdr.tab] fasta_m9_output.file
+
+=head1 OPTIONS
+
+ -h	short help
+ --help include description
+
+ --query_file -- query sequence file
+              -- same as --query
+                 (only one sequence per file)
+
+ --bound_file_in -- tab delimited accession<tab>start<tab>end that
+                    specifies MSA boundaries WITHIN alignment.
+                    Additional hits use alignment (or domain)
+                    boundaries.
+
+ --bound_file_only -- tab delimited accession<tab>start<tab>end that
+                      specifies MSA boundaries WITHIN alignment.
+                      Only sequences in --bound_file_only will be in the MSA.
+
+ --bound_file_out -- "--bound_file" for next iteration of psisearch2
+
+ --domain_bound  parse domain annotations (-V) from m9B file
+ --domain
+
+ --masked_lib_out -- FASTA format library of MSA sequences
+
+=head1 DESCRIPTION
+
+C<m9B_btop_msa.pl> takes a fasta36/ssearch36 -m 9B ouput file, which
+includes a BTOP encoded alignment string, and produces the multiple
+sequence alignment (MSA) implied by the query sequence, alignment
+boundaries, and pairwise alignments.  The alignment does not allow
+gaps in the query sequence, only in the subject sequences.
+
+The C<--query_file> must be specified, and the query sequence is
+provided as the first sequence in the MSA.
+
+If a C<--bound_file> is provided, then the ends of the alignments are
+reduced to the coordinates specified by the C<bound_file>.  In
+addition, only sequences included in the C<bound_file> are included in
+the MSA.
+
+Output: A clustal-like interleaved multiple sequence alignment that
+can be used as input (using the C<-in_msa> option) to C<psiblast>.
+
+=head1 AUTHOR
+
+William R. Pearson, wrp at virginia.edu
+
+=cut
diff --git a/scripts/plot_domain2t.cgi b/scripts/plot_domain2t.cgi
new file mode 100755
index 0000000..0aa0c53
--- /dev/null
+++ b/scripts/plot_domain2t.cgi
@@ -0,0 +1,667 @@
+#!/usr/bin/perl -w
+
+# plot_domain2.pl - produce SVG plot for aligned domains
+# version2 plots both n0 and n1 sequences, with 2 axes
+#
+# args:
+#  q_cstop - n0 - query length
+#  l_cstop - n1 - lib length
+#  q_name - query_acc
+#  l_name - library_acc
+#  l_astart= lib start  (need q_start, q_astop, l_astart, l_astop)
+#  l_astop= lib stop 
+#  pgm = program used 
+#  regions -- same as annotations on alignment
+#  doms -- domains on (library) sequence
+#
+#  l_annot - script to run to annotate library domain (separate from alignment)
+#  q_annot - script to run to annotate library domain (separate from alignment)
+#
+
+# 9-May-2013 -- modify to accomodate reverse-complement coordinates
+
+use strict;
+use Getopt::Long;
+use Pod::Usage;
+
+use CGI qw(header param end_html);
+#use URI::Escape;
+
+use vars qw($pminx $pmaxx $pminy $pmaxy $lvstr $max_x $max_y
+	    $fxscal $fyscal $fxoff $fyoff $x_rev $y_rev
+	    @block_colors
+	    $annot_color %annot_names %color_names);
+
+ at block_colors = qw( slategrey lightgreen lightblue pink cyan tan gold plum mediumplum );
+
+$annot_color = 1;
+%annot_names = ();
+
+my @annot_scripts = ("", "",
+		     "ann_feats2ipr.pl",
+		     "ann_feats2l.pl",
+		     "ann_feats2ipr.pl",
+		     "ann_pfam26.pl",
+		     "ann_pfam26.pl --pfacc",
+		     "ann_pdb_cath.pl",
+		     "ann_pdb_cath.pl --class",
+		    );
+
+# $max_x, $max_y define the maximum plotting area
+# the actual bounding box/view area will be larger if annotation comments are available
+($max_x,$max_y)=(540,24);
+
+my @xax_len = (200, 300, 500, 1000, 2000, 3000, 5000, 10000, 20000, 30000, 50000);
+my $max_xax = -1;
+my $comb_xax = 0;  # comb_xax captures the length of the two sequences minus the offset
+my ($x0c_off, $x1c_off, $xdc_off) = (0,0,0);
+
+my ($x0f3, $x1f3) = (1,1);	# set to 3 for fastx (x0f3) or tfastx (x1f3)
+
+# tick array - values */
+my @tarr = (50, 100,200,500,1000,2000,5000,10000,20000);
+my $MAX_INTERVAL=20000;
+
+my $q = new CGI;
+
+my %dom_colors = ();
+my $max_color = 0;
+
+my @arg_names = $q->param();
+
+my %args = map { $_ => $q->param($_) } @arg_names;
+
+if ($args{pgm} =~ m/^f[xy]$/) { $x0f3 = 3;}
+elsif ($args{pgm} =~ m/^tf[xy]/) { $x1f3 = 3;}
+
+#unless ($ENV{DOCUMENT_ROOT}) {
+#  %args = map { $_ => uri_unescape($args{$_}) } keys %args;
+#}
+
+my ($region_info_r, $q_dom_info_r, $l_dom_info_r);
+
+if ($args{regions}) {
+  $region_info_r = parse_regions($args{regions});
+}
+else {$region_info_r = [];}
+
+if ($args{doms}) {
+  ($q_dom_info_r, $l_dom_info_r) = parse_domains($args{doms});
+} else {
+  $q_dom_info_r = [];
+  $l_dom_info_r = [];
+}
+
+my @q_annots = ();
+# unless (scalar(@{$q_dom_info_r})) {
+#   my $q_annot_script = "";
+#   if (defined($args{q_annot}) && $args{q_annot}) {
+#     $q_annot_script = $annot_scripts[$args{q_annot}];
+#   }
+#   if (!$q_annot_script && defined($args{l_annot}) && $args{l_annot}) {
+#     $q_annot_script = $annot_scripts[$args{l_annot}];
+#   }
+
+#   if ($q_annot_script) {
+#     open(S_IN, '-|',"./$q_annot_script --lav \'sp|$args{q_name}'") || die "cannot open $args{q_name}\n";
+#     while (my $s_line = <S_IN>) {
+#       next if ($s_line =~ m/^#/);
+#       next if ($s_line =~ m/^>/);
+#       chomp($s_line);
+#       my %q_data = ();
+#       @q_data{qw(beg end descr)} = split(/\t/,$s_line);
+#       if ($dom_colors{$q_data{descr}}) {
+#   	$q_data{color} = $dom_colors{$q_data{descr}}
+#       } else {
+#   	$q_data{color} = ++$max_color;
+#   	$dom_colors{$q_data{descr}} = $max_color;
+#       }
+#       push @q_annots, \%q_data;
+#     }
+#   }
+# }
+
+
+openplt(($args{q_cstop}-$args{q_cstart})+1, ($args{l_cstop}-$args{l_cstart})+1, $q_dom_info_r, $l_dom_info_r);
+draw_align(\%args);
+if (scalar(@{$region_info_r})) {
+  draw_regions($region_info_r, $args{l_cstop});
+}
+
+my $q_annot_script = "";
+if ($args{doms}) {
+  if (scalar(@{$l_dom_info_r})) {
+    draw_doms($l_dom_info_r, $x1c_off, -12, $args{l_cstart}, $args{l_cstop});
+  }
+  if (scalar(@{$q_dom_info_r})) {
+    draw_doms($q_dom_info_r, $x0c_off, 48, $args{q_cstart}, $args{q_cstop});
+  }
+  elsif (scalar(@q_annots)) {
+    draw_doms(\@q_annots, $x0c_off, 48, $args{q_cstart}, $args{q_cstop});
+  }
+}
+
+closeplt($args{l_cstop});
+
+exit(0);
+
+# have all the data (and length of sequence), scale it and color it
+
+#define SX(x) (int)((double)(x)*fxscal+fxoff+6)
+sub SX {
+  my $xx = shift;
+  return int($xx*$fxscal+$fxoff+18);
+}
+
+sub SY {
+  my $yy = shift;
+  return $max_y - int($yy*$fyscal+$fyoff);
+}
+
+my $y_delta = 0;
+
+#void openplt(long n0, long n1, int sq0off, int sq1off, char *xtitle, char *ytitle)
+sub openplt
+{
+  my ($n0, $n1, $q_dom_info_r, $l_dom_info_r) = @_;
+
+  my ($xbound, $ybound) = ($max_x + 24, 48);
+  my ($x0_rev, $x1_rev) = (0,0);
+
+  if (scalar(@{$q_dom_info_r})) {
+    $ybound += 14;
+  }
+  elsif (scalar(@q_annots)) {
+    $ybound += 14;
+  }
+  $ybound += 14 if (scalar(@{$l_dom_info_r}));
+
+  if ($n0 < 0) {$x0_rev=1; $n0 = 2 - $n0;}
+  if ($n1 < 0) {$x1_rev=1; $n1 = 2 - $n1;}
+
+  print $q->header('image/svg+xml') if ($ENV{DOCUMENT_ROOT});
+  print("<?xml version=\"1.0\" standalone=\"no\"?>\n");
+  print("<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\" \n");
+  print("\"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n\n");
+
+  print(qq(<!-- l_name=$args{l_name} -->\n));
+  print("<svg width=\"$xbound\" height=\"$ybound\" version=\"1.1\"\n");
+#  print("<svg version=\"1.1\"\n");
+  print("xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n\n");
+
+# simple things first, if query is shorter and inside, or library is
+# shorter and inside, then just use the longer sequence.
+
+  ($x0c_off, $x1c_off, $xdc_off) = (0,0,0);
+
+  ($comb_xax, $xdc_off, $x0c_off, $x1c_off) = calc_offsets($n0, $x0f3, $x0_rev, $n1, $x1f3, $x1_rev, \%args);
+
+#  if ($args{pgm} =~ m/lsw/) {
+#    $max_xax = $comb_xax;
+#  } else {
+    for (my $i=0; $i < scalar(@xax_len); $i++) {
+      if ($comb_xax <= $xax_len[$i]) {
+	$max_xax = $xax_len[$i];
+	last;
+      }
+#    }
+    $max_xax = $xax_len[$#xax_len] if ($max_xax <= 0);
+  }
+
+  $fxscal = ($max_x-1)/$max_xax;
+  $fyscal = 1;
+
+  $fxscal *= 0.9; $fxoff = 24;
+  $fyoff = -14;
+  if (scalar(@{$q_dom_info_r}) || scalar(@q_annots)) {$fyoff -= 12;}
+
+
+  xaxis2($n0/$x0f3,$args{q_cstart}/$x0f3, $x0_rev, $n1/$x1f3, $args{l_cstart}/$x1f3, $x1_rev);
+
+  newline(qq(stroke="black" stroke-width="1.5"));
+  move(SX($x0c_off), SY(15));
+  draw(SX($x0c_off+($n0/$x0f3)),SY(15));
+  clsline($n0,0);
+
+  newline(qq(stroke="black" stroke-width="1.5"));
+  move(SX($x1c_off), SY(9));
+  draw(SX($x1c_off+($n1/$x1f3)),SY(9));
+
+  clsline($n1,0);
+}
+
+sub closeplt {
+  print "</svg>\n";
+}
+
+sub calc_offsets {
+  my ($n0, $x0f3, $x0_rev, $n1, $x1f3, $x1_rev, $args_r) = @_;
+
+  my ($comb_xax, $x0c_off, $x1c_off, $xdc_off) = (0,0,0,0);
+
+  my ($n0f, $n1f, $q_start_d, $q_stop_d, $l_start_d, $l_stop_d) =
+    (
+     $n0/$x0f3, # $n0f
+     $n1/$x0f3, # $n1f
+     abs($args_r->{q_astart} - $args_r->{q_cstart})/$x0f3, # $q_start_d
+     abs($args_r->{q_astop} - $args_r->{q_cstop})/$x0f3,   # $q_stop_d
+     abs($args_r->{l_astart} - $args_r->{l_cstart})/$x1f3, # $l_start_d
+     abs($args_r->{l_astop} - $args_r->{l_cstop})/$x1f3    # $l_stop_d
+    );
+
+  if (($n1f >= $n0f) && ($l_start_d >= $q_start_d) && ($l_stop_d >= $q_stop_d)) {
+    # n1 is longer and n0 is contained
+    $comb_xax = $n1f; # $comb_xax : combined x-axis, in amino-acids if translated
+    $x0c_off = $l_start_d - $q_start_d;
+  }
+  elsif (($n1f < $n0f) && ($l_start_d <= $q_start_d) && ($l_stop_d <= $q_stop_d)) {
+    # n0 is longer and n1 is contained
+    $comb_xax = $n0f;
+    $xdc_off = $x1c_off = $q_start_d - $l_start_d;
+  }
+    # some kind of extension is necessary
+  elsif ($l_start_d >= $q_start_d) {
+    $x0c_off = $l_start_d - $q_start_d;
+    $comb_xax = $n0f + $x0c_off;
+  }
+  else {
+    $xdc_off = $x1c_off = $q_start_d - $l_start_d;
+    $comb_xax = $n1f + $x1c_off;
+  }
+
+  return ($comb_xax, $xdc_off, $x0c_off, $x1c_off);
+}
+
+sub draw_trapz {
+  my ($start0, $stop0, $start1, $stop1, $color, $text) = @_;
+
+  $color = ($color % scalar(@block_colors));
+  my $svg_color = $block_colors[$color];
+  my $tx = $start1 + int(($stop1-$start1+1)/2);
+  my $ty = 10 + 9;
+
+  $text = substr($text,0,10);
+
+  newline(qq(stroke="black" stroke-width="1.5"));
+
+  move(SX($start0+$x0c_off), SY(20));
+  draw(SX($stop0+$x0c_off),SY(20));
+  draw(SX($stop1+$x1c_off),SY(10));
+  draw(SX($start0+$x1c_off),SY(10));
+  move(SX($start0+$x0c_off), SY(20));
+
+  print(qq(" fill="$svg_color" />\n));
+
+
+#  print (qq(<rect x="$x" y="$y" width="$w" height="$h" fill="$svg_color" stroke="white" stroke-width="1" />\n));
+#  print (qq(<text x="$tx" y="$ty" font-size="9" font-family="sans-serif" fill="white" text-anchor="middle">$text</text>\n));
+}
+
+# draws a colored solid block, and labels it, to indicate domain
+sub draw_block {
+  my ($x, $y, $w, $h, $color, $text, $Q) = @_;
+
+  $color = ($color % scalar(@block_colors));
+  my $svg_color = $block_colors[$color];
+  my $tx = $x + int($w/2);
+  my $ty = $y + 9;
+
+  $text = substr($text,0,10);
+
+  my $stroke_width = 0.5;
+  if ($Q < 30.0) {$stroke_width = 2;}
+
+  print (qq(<rect x="$x" y="$y" width="$w" height="$h" fill="$svg_color" stroke="white" stroke-width="$stroke_width" />\n));
+  print (qq(<text x="$tx" y="$ty" font-size="9" font-family="sans-serif" fill="white" text-anchor="middle">$text</text>\n));
+}
+
+sub draw_regions {
+  my ($annot_arr_r, $n1) = @_;
+
+  for my $annot ( @$annot_arr_r) {
+    draw_block(SX($annot->{beg1} - $args{l_cstart}+$xdc_off), SY(18), SX($annot->{end1}+$xdc_off)-SX($annot->{beg1}+$xdc_off),
+	       12, $annot->{color}, $annot->{descr}, $annot->{Q});
+  }
+}
+
+sub draw_doms {
+  my ($annot_arr_r, $xc_off, $y_off, $xc_start, $xc_stop) = @_;
+
+  for my $annot ( @$annot_arr_r) {
+    draw_block(SX($annot->{beg}+$xc_off), SY($y_off), SX($annot->{end}+$xc_off)-SX($annot->{beg}+$xc_off),
+	       12, $annot->{color}, $annot->{descr}, 100.0);
+  }
+}
+
+sub draw_align {
+  my $arg_r = shift;
+
+  my ($x, $y, $w, $h) = (SX($args{l_astart} - $args{l_cstart} + $xdc_off), SY(21), SX($args{l_astop}+$xdc_off) - SX($args{l_astart}+$xdc_off), 18);
+
+  print (qq(<rect x="$x" y="$y" width="$w" height="$h" stroke="black" fill-opacity="0" stroke-width="1" />\n));
+}
+
+# void newline(char *options)
+sub newline
+{
+  my $options = shift;
+
+  if ($options)  {
+      printf("<path %s d=\"",$options);
+  }
+  else {print("<path stroke=\"black\" d=\"");}
+}
+
+# void clsline(long x, long y, int s)
+sub clsline
+{
+  my ($x, $y, $s) = @_;
+
+  print("\" fill=\"none\" />\n");
+}
+
+#void move(int x, int y)
+sub move
+{
+  my ($x, $y) = @_;
+  printf(" M %d %d",$x,$y);
+}
+
+# void draw(int x, int y)
+sub draw
+{
+  my ($x, $y) = @_;
+  printf(" L %d %d",$x,$y);
+}
+
+# void xaxis(long n, int offset, char *title)
+# coordinates in amino acids - modify for final axes
+sub xaxis2 {
+  my ($n0, $offset0, $x0_rev, $n1, $offset1,$x1_rev) = @_;
+
+  my ($v_offset0, $v_offset1) = ($offset0, $offset1);
+
+  my ($i, $jm, $tick_length, $max_ticks, $tick_inc);
+  my ($js, $jl0, $jl1);
+  my ($sgn0, $sgn1) = (1,1);
+
+  my $num_len;
+  my $numstr;
+
+  if ($x0_rev) {
+    $sgn0 = -1;
+    $v_offset0 = $offset0 - $n0;
+  }
+
+  if ($x1_rev) {
+    $sgn1 = -1;
+    $v_offset1 = $offset1 - $n1;
+  }
+
+  # for translated-DNA/protein searches, both $n0 and $n1 are in amino-acids
+  my $n_max = $n1;
+  $n_max = $n0 if ($n0 > $n1);
+  my $offset = 0;
+
+  $tick_length = 2;
+
+  # search for the correct increment for the tick array */
+  # @tarr[] has the list of axis increments we might use
+  # we want a tick increment that gives < 6 ticks
+  for ($i=0; $i< @tarr; $i++) {
+    # seek to divide into 10 or fewer divisions */
+    if (($max_ticks = $n_max/$tarr[$i]) <= 6) {goto found;}
+  }
+
+  # these happen only if no tick increment was found
+  # point $i to the last element
+  $i = scalar(@tarr)-1;
+
+  # $max_ticks is the number of increments for longest sequence
+
+  $max_ticks = ($n_max)/$tarr[-1];
+  $i = -1;
+
+ found:
+  $max_ticks += max($offset0, $offset1)/$tarr[$i];
+  $tick_inc = $tarr[$i];
+
+  # jo is the offset for the coordinate system, e.g. if we are
+  # plotting an alignment from 101 - 300 rather than 1 - 400 we may
+  # show partial sequences in alignments, it should be kept, but is is
+  # different from the axis shift ($x0c_off, $x1c_off)
+
+  my ($xx0c_off, $xx1c_off) = ($x0c_off, $x1c_off);
+
+  unless ($x0_rev) {
+#    $xx0c_off -= $offset0;
+# draw up-tick
+    newline("stroke=\"black\" stroke-width=\"1.5\"");
+    for ($i=1; $i<=$max_ticks; $i++) {
+      last if ($i*$tick_inc > $n0 + $v_offset0);
+      next if ($i*$tick_inc < $v_offset0);
+
+      move(SX($i*$tick_inc + $xx0c_off - $v_offset0),SY(26));
+      draw(SX($i*$tick_inc + $xx0c_off - $v_offset0),SY(26)+$tick_length);
+    }
+    clsline($n_max,$n_max,10000);
+
+    for ($i=1; $i<=$max_ticks; $i++) {
+      last if ($i*$tick_inc > $n0 + $v_offset0);
+      next if ($i*$tick_inc < $v_offset0);
+      $numstr = sprintf("%ld",$i*$tick_inc*$x0f3 );
+      printf(qq(<text x="%d" y="%d" font-family="sans-serif" font-size='9' text-anchor="middle">%s</text>\n),
+	     SX($i*$tick_inc+$xx0c_off - $v_offset0),SY(28)+$tick_length-1,$numstr);
+    }
+  }
+  else {
+    $xx0c_off += $offset0;
+    # if $x0_rev need to know $x0_max_ticks
+    my $x0_max_ticks = int(($n0+$offset0)/$tick_inc);
+
+    newline("stroke=\"black\" stroke-width=\"1.5\"");
+    for ($i=$x0_max_ticks; $i>0; $i--) {
+      move(SX($xx0c_off - $i*$tick_inc),SY(26));
+      draw(SX($xx0c_off - $i*$tick_inc),SY(26)+$tick_length);
+    }
+    clsline($n_max,$n_max,10000);
+
+    # now put in the numbers, using the same reverse counting
+    for ($i=$x0_max_ticks; $i>0; $i--) {
+      $numstr = sprintf("%ld",$i*$tick_inc*$x0f3);
+      printf(qq(<text x="%d" y="%d" font-family="sans-serif" font-size='9' text-anchor="middle">%s</text>\n),
+	     SX($xx0c_off - $i*$tick_inc - $v_offset0),SY(28)+$tick_length-1,$numstr);
+    }
+  }
+
+  unless ($x1_rev) { 
+#    $xx1c_off -= $offset1;
+    # draw down-tick
+    newline("stroke=\"black\" stroke-width=\"1.5\"");
+    for ($i=1; $i<=$max_ticks; $i++) {
+      last if ($i*$tick_inc > $n1 + $v_offset1);
+      next if ($i*$tick_inc < $v_offset1);
+      move(SX($i*$tick_inc + $xx1c_off - $v_offset1),SY(0));
+      draw(SX($i*$tick_inc + $xx1c_off - $v_offset1),SY(0)+$tick_length);
+    }
+    clsline($n_max,$n_max,10000);
+
+    $numstr = sprintf("%ld",$tick_inc*$x0f3);
+    $num_len = length($numstr);
+
+    for ($i=1; $i<=$max_ticks; $i++) {
+      last if ($i*$tick_inc > $n1 + $offset1);
+      next if ($i*$tick_inc < $offset1);
+      $numstr = sprintf("%ld",$i*$tick_inc*$x1f3);
+      printf(qq(<text x="%d" y="%d" font-family="sans-serif" font-size='9' text-anchor="middle">%s</text>\n),
+	     SX($i*$tick_inc+$xx1c_off-$v_offset1),SY(0)+$tick_length+8,$numstr);
+    }
+  }
+  else {
+    my $x1_max_ticks = int($n1/$tick_inc);
+    $xx1c_off += $offset1;
+
+    # draw down-tick
+    newline("stroke=\"black\" stroke-width=\"1.5\"");
+    for ($i=$x1_max_ticks; $i>0; $i--) {
+      move(SX($xx1c_off - $i*$tick_inc),SY(0));
+      draw(SX($xx1c_off - $i*$tick_inc),SY(0)+$tick_length);
+    }
+    clsline($n_max,$n_max,10000);
+
+    $numstr = sprintf("%ld",$tick_inc*$x0f3);
+    $num_len = length($numstr);
+
+    for ($i=$x1_max_ticks; $i>0; $i--) {
+      $numstr = sprintf("%ld",$i*$tick_inc*$x1f3);
+      printf(qq(<text x="%d" y="%d" font-family="sans-serif" font-size='9' text-anchor="middle">%s</text>\n),
+	     SX($xx1c_off - $i*$tick_inc),SY(0)+$tick_length+8,$numstr);
+    }
+  }
+}
+
+# void xaxis(long n, int offset, char *title)
+sub xaxis_d {
+  my ($n, $offset) = @_;
+
+  my ($i, $jm, $tick);
+  my ($js, $jo, $jl);
+  my $num_len;
+  my $numstr;
+
+  $tick = 2;
+
+  # search for the correct increment for the tick array */
+  for ($i=0; $i< @tarr; $i++) {
+    # seek to divide into 10 or fewer divisions */
+    if (($jm = $n/$tarr[$i])<6) {goto found;}
+  }
+  $i= scalar(@tarr)-1;
+  $jm = $n/$tarr[$i];
+ found:
+  # js is the start of the value - modify to accomodate offset */
+  $js = $tarr[$i];
+
+  # jo is the offset */
+  $jo = $offset % $tarr[$i];	# figure out offset in tarr[i] increments */
+
+  # jl is the label */
+  $jl = $offset/$tarr[$i];	# figure out offset in tarr[i] increments */
+  $jl *= $tarr[$i];
+
+  newline("stroke=\"black\" stroke-width=\"1.5\"");
+  for ($i=1; $i<=$jm; $i++) {
+    move(SX($i*$js - $jo),SY(0));
+    draw(SX($i*$js - $jo),SY(0)+$tick);
+  }
+  clsline($n,$n,10000);
+
+  $numstr = sprintf("%ld",$js + $jl );
+  $num_len = length($numstr);
+  if ($num_len > 4) {
+
+    printf(qq(<text x="%d" y="%d" font-family="sans-serif" font-size='9' text-anchor="middle">%s</text>\n),SX($js-$jo),SY(0)+$tick+8,$numstr);
+
+    $numstr = sprintf("%ld",$jm*$js+$jl);
+    printf(qq(<text x="%d" y="%d" font-family="sans-serif" font-size='9' text-anchor="middle">%s</text>\n),SX($jm*$js-$jo),SY(0)+$tick+8,$numstr);
+  }
+  else {
+    for ($i=1; $i<=$jm; $i++) {
+      $numstr = sprintf("%ld",$i*$js+$jl);
+      printf(qq(<text x="%d" y="%d" font-family="sans-serif" font-size='9' text-anchor="middle">%s</text>\n),SX($i*$js-$jo),SY(0)+$tick+8,$numstr);
+    }
+  }
+}
+
+sub parse_regions {
+  my $region_str = shift;
+
+  my @regions = split(/\n\s*/,$region_str);
+
+  my @region_info = ();
+
+  for my $region ( @regions) {
+    $region =~ s/^\s+//;
+    next unless ($region =~ m/^Region/);
+
+    my @fields = split(/\s+:\s*/,$region);
+
+    my %data = ();
+
+    @data{qw(descr color)} = @fields[-2,-1];
+
+    $dom_colors{$data{descr}}=$data{color} unless defined($dom_colors{$data{descr}});
+    $max_color = $data{color} if ($data{color} > $max_color);
+
+    my @scores = split(/;\s*/,$fields[1]);
+
+    for my $score (@scores) {
+      my ($key, $value) = split(/=/,$score);
+      $data{$key} = $value;
+    }
+
+    # this line hides low-score NODOMs
+    next if ($data{color}==0 && $data{Q} < 30.0);
+
+    @data{qw(beg0 end0 beg1 end1)} = ($fields[0] =~ m/^Region:\s*(\d+)-(\d+):(\d+)-(\d+)$/);
+
+    push @region_info, \%data;
+  }
+
+  return \@region_info;
+}
+
+sub parse_domains {
+  my $domain_str = shift;
+
+  my @domains = split(/\n\s*/,$domain_str);
+
+  my @q_domain_info = ();
+  my @l_domain_info = ();
+
+  for my $domain ( @domains) {
+    $domain =~ s/^\s+//;
+    next unless ($domain =~ m/^[ql]Domain/);
+
+    my @fields = split(/\t/,$domain);
+
+    next if ($fields[-1] =~ m/NODOM/);
+
+    my %data = ();
+
+    @data{qw(beg end)}  = ($fields[1]) =~ m/(\-?\d+)\-(\-?\d+)/;
+    @data{qw(descr color)} = split(/ :/,$fields[-1]);
+
+    $dom_colors{$data{descr}}=$data{color} unless defined($dom_colors{$data{descr}});
+    $max_color = $data{color} if ($data{color} > $max_color);
+
+    if ($fields[0] =~ m/^qDomain/) {
+	$data{beg} -= $args{q_cstart} + 1;
+	$data{end} -= $args{q_cstart} + 1;
+	next if $data{end} < 1;
+	$data{beg} = 1 if $data{beg} < 1;
+	next if $data{beg} > $args{q_cstop};
+	$data{end} = $args{q_cstop} if $data{end} > $args{q_cstop};
+
+	push @q_domain_info, \%data;
+    }
+    elsif ($fields[0] =~ m/^lDomain/) {
+	$data{beg} -= $args{l_cstart} + 1;
+	$data{end} -= $args{l_cstart} + 1;
+	next if $data{end} < 1;
+	$data{beg} = 1 if $data{beg} < 1;
+	next if $data{beg} > $args{l_cstop};
+	$data{end} = $args{l_cstop} if $data{end} > $args{l_cstop};
+	push @l_domain_info, \%data;
+    }
+  }
+
+  return (\@q_domain_info, \@l_domain_info);
+}
+
+sub max {
+  my ($x0, $x1) = @_;
+
+  return $x0 if $x0 >= $x1;
+  return $x1;
+}
diff --git a/scripts/summ_domain_ident.pl b/scripts/summ_domain_ident.pl
new file mode 100755
index 0000000..85e9c55
--- /dev/null
+++ b/scripts/summ_domain_ident.pl
@@ -0,0 +1,97 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# summ_domain_ident.pl takes a -m 8CC result file with query-annotated
+# domains, and produces a tab-delimited summary of identity across the domains
+# parse:
+# sp|P09488|GSTM1_HUMAN	gi|121735|sp|P09488.3|GSTM1_HUMAN	100.00	218	0	0	1	218	1	218	2.9e-113	408.2	218M	|RX:1-12:1-12:s=64;b=25.0;I=1.000;Q=47.5;C=exon_1|RX:13-37:13-37:s=128;b=49.9;I=1.000;Q=121.4;C=exon_2|RX:38-59:38-59:s=125;b=48.7;I=1.000;Q=117.9;C=exon_3|RX:60-86:60-86:s=145;b=56.5;I=1.000;Q=141.0;C=exon_4|RX:87-120:87-120:s=185;b=72.1;I=1.000;Q=187.2;C=exon_5|RX:121-152:121-152:s=174;b=67.8;I=1.000;Q=174.5;C=exon_6|RX:153-189:153-189:s=197;b=76.8;I=1.000;Q=201.0;C=exon_7|RX:190-21 [...]
+
+
+
+use strict;
+use Getopt::Long;
+use Pod::Usage;
+
+my ($shelp, $help) = (0, 0);
+
+GetOptions(
+    "h|?" => \$shelp,
+    "help" => \$help,
+    );
+
+pod2usage(1) if $shelp;
+pod2usage(exitstatus => 0, verbose => 2) if $help;
+pod2usage(1) unless @ARGV;
+
+my $first_line =1;
+
+my @a_field_names = qw( score_field bits id qval comment );
+my @domain_names = ();
+
+while (my $line = <>) {
+  next if $line =~ m/^#/;
+  chomp($line);
+  
+  # get last (annotation) field
+
+  my @fields = split(/\t/,$line);
+  $fields[1] =~ s/^gi\|\d+\|//;
+  $fields[1] =~ s/\.\d+\|/\|/;
+
+  my @annots = split(/\|/,$fields[-1]);
+  shift @annots; # first is blank
+
+  # $annots[...]:
+  # RX:1-12:1-12:s=64;b=25.0;I=1.000;Q=47.5;C=exon_1
+
+  my %dom_ids = ();
+
+  for my $annot (@annots) {
+    my %a_fields = ();
+    @a_fields{@a_field_names} = split(/;/,$annot);
+    $a_fields{'id'} =~ s/^I=//;
+    $a_fields{'comment'} =~ s/^C=//;
+
+    $dom_ids{$a_fields{'comment'}} = $a_fields{'id'};
+
+    if ($first_line) {
+      push @domain_names, $a_fields{'comment'};
+    }
+  }
+  if ($first_line) {
+    print join("\t",("subj_acc          ","ident", at domain_names)),"\n";
+    $first_line = 0;
+  }
+
+  for my $dom ( @domain_names ) {
+    if (defined($dom_ids{$dom})) {
+      if (100.0 * $dom_ids{$dom} >= $fields[2]) {
+	$dom_ids{$dom} .= '+';
+      }
+      else {
+	$dom_ids{$dom} .= '-';
+      }
+    }
+    else {
+      $dom_ids{$dom} = '';
+    }
+  }
+
+  print join("\t",($fields[1],$fields[2], at dom_ids{@domain_names})),"\n";
+}
diff --git a/scripts/test_ann_scripts.sh b/scripts/test_ann_scripts.sh
new file mode 100755
index 0000000..eb9f299
--- /dev/null
+++ b/scripts/test_ann_scripts.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+## a script to test the annotation scripts ann_*.pl
+## acc_examples contains:
+# P09488  -- new NCBI format
+# sp|P09488 --more traditional format
+# up|P09488|GSTM1_HUMAN
+# SP:GSTM1_HUMAN P09488  ---ebi searches with accession
+# SP:GSTM1_HUMAN -- ebi searches without accession
+##
+
+if [ ! $1=='' ]; then
+   script_file=$1
+else
+    script_file=ann_script_list
+fi
+
+if [ ! $1=='' ]; then
+   ex_file=$1
+else
+    ex_file=acc_examples
+fi
+
+for script in `cat $script_file `; do
+  for acc_type in `cat $ex_file`; do
+      echo $script ${acc_type}
+      $script ${acc_type}
+  done
+  echo '***DONE***'  $script `date`
+done
diff --git a/seq/bovgh.seq b/seq/bovgh.seq
new file mode 100644
index 0000000..f30b344
--- /dev/null
+++ b/seq/bovgh.seq
@@ -0,0 +1,38 @@
+>BOVGH bovine growth hormone (presomatotropin) gene and flanks.
+ AAAACCTATG GGGTGGGCTC TCAAGCTGAG ACCCTGTGTG CACAGCCCTC TGGCTGGTGG
+ CAGTGGAGAC GGGATNNNAT GACAAGCCTG GGGGACATGA CCCCAGAGAA GGAACGGGAA
+ CAGGATGAGT GAGAGGAGGT TCTAAATTAT CCATTAGCAC AGGCTGCCAG TGGTCCTTGC
+ ATAAATGTAT AGAGCACACA GGTGGGGGGA AAGGGAGAGA GAGAAGAAGC CAGGGTATAA
+ AAATGGCCCA GCAGGGACCA ATTCCAGGAT CCCAGGACCC AGTTCACCAG ACGACTCAGG
+ GTCCTGTGGA CAGCTCACCA GCTATGATGG CTGCAGGTAA GCTCGCTAAA ATCCCCTCCA
+ TTCGCGTGTC CTAAAGGGGT AATGCGGGGG GCCCTGCCGA TGGATGTGTT CAGAGCTTTG
+ GGCTTTAGGG CTTCCGAATG TGAACATAGG TATCTACACC CAGACATTTG GCCAAGTTTG
+ AAATGTTCTC AGTCCCTGGA GGGAAGGGTA GGTGGGGGCT GGCAGGAGAT CAGGCGTCTA
+ GCTCCCTGGG GCCCTCCGTC GCGGCCCTCC TGGTCTCTCC CTAGGCCCCC GGACCTCCCT
+ GCTCCTGGCT TTCGCCCTGC TCTGCCTGCC CTGGACTCAG GTGGTGGGCG CCTTCCCAGC
+ CATGTCCTTG TCCGGCCTGT TTGCCAACGC TGTGCTCCGG GCTCAGCACC TGCATCAGCT
+ GGCTGCTGAC ACCTTCAAAG AGTTTGTAAG CTCCCGAGGG ATGCGTCCTA GGGGTGGGGA
+ GGCAGGAAGG GGTGAATCCA CACCCCCTCC ACACAGTGGG AGGAAACTGA GGAGTTCAGC
+ CGTATTTTAT CCAAGTAGGG ATGTGGTTAG GGGAGCAGAA ACGGGGGTGT GTGGGGTGGG
+ GAGGGTTCCG AATAAGGCGG GGAGGGGAAC CGCGCACCAG CTTAGACCTG GGTGGGTGTG
+ TTCTTCCCCC AGGAGCGCAC CTACATCCCG GAGGGACAGA GATACTCCAT CCAGAACACC
+ CAGGTTGCCT TCTGCTTCTC TGAAACCATC CCGGCCCCCA CGGGCAAGAA TGAGGCCCAG
+ CAGAAATCAG TGAGTGGCAA CCTCGGACCG AGGAGCAGGG GACCTCCTTC ATCCTAAGTA
+ GGCTGCCCCA GCTCTCCGCA CCGGGCCTGG GGCGGCCTTC TCCCCGAGGT GGCGGAGGTT
+ GTTGGATGGC AGTGGAGGAT GATGGTGGGC GGTGGTGGCA GGAGGTCCTC GGGCAGAGGC
+ CGACCTTGCA GGGCTGCCCC AAGCCCGCGG CACCCACCGA CCACCCATCT GCCAGCAGGA
+ CTTGGAGCTG CTTCGCATCT CACTGCTCCT CATCCAGTCG TGGCTTGGGC CCCTGCAGTT
+ CCTCAGCAGA GTCTTCACCA ACAGCTTGGT GTTTGGCACC TCGGACCGTG TCTATGAGAA
+ GCTGAAGGAC CTGGAGGAAG GCATCCTGGC CCTGATGCGG GTGGGGATGG CGTTGTGGGT
+ CCCTTCCATG CTGGGGGCCA TGCCCGCCCT CTCCTGGCTT AGCCAGGAGA ATGCACGTGG
+ GCTTGGGGAG ACAGATCCCT GCTCTCTCCC TCTTTCTAGC AGTCCAGCCT TGACCCAGGG
+ GAAACCTTTT CCCCTTTTGA AACCTCCTTC CTCGCCCTTC TCCAAGCCTG TAGGGGAGGG
+ TGGAAAATGG AGCGGGCAGG AGGGAGCTGC TCCTGAGGGC CCTTCGGCCT CTCTGTCTCT
+ CCCTCCCTTG GCAGGAGCTG GAAGATGGCA CCCCCCGGGC TGGGCAGATC CTCAAGCAGA
+ CCTATGACAA ATTTGACACA AACATGCGCA GTGACGACGC GCTGCTCAAG AACTACGGTC
+ TGCTCTCCTG CTTCCGGAAG GACCTGCATA AGACGGAGAC GTACCTGAGG GTCATGAAGT
+ GCCGCCGCTT CGGGGAGGCC AGCTGTGCCT TCTAGTTGCC AGCCATCTGT TGTTTGCCCC
+ TCCCCCGTGC CTTCCTTGAC CCTGGAAGGT GCCACTCCCA CTGTCCTTTC CTAATAAAAT
+ GAGGAAATTG CATCGCATTG TCTGAGTAGG TGTCATTCTA TTCTGGGGGG TGGGGTGGGG
+ CAGGACAGCA AGGGGGAGGA TTGGGAAGAC AATAGCAGGC ATGCTGGGGA TGCGGTGGGC
+ TCTATGGGTA CCCAGGTGCT GAAGAATTGA CCCGGTTCCT CCTGGG
diff --git a/seq/bovprl.seq b/seq/bovprl.seq
new file mode 100644
index 0000000..c7f4f11
--- /dev/null
+++ b/seq/bovprl.seq
@@ -0,0 +1,17 @@
+>BOVPRL GenBank entry BOVPRL from omam file.  907 nucleotides.
+TGCTTGGCTGAGGAGCCATAGGACGAGAGCTTCCTGGTGAAGTGTGTTTCTTGAAATCAT
+CACCACCATGGACAGCAAAGGTTCGTCGCAGAAAGGGTCCCGCCTGCTCCTGCTGCTGGT
+GGTGTCAAATCTACTCTTGTGCCAGGGTGTGGTCTCCACCCCCGTCTGTCCCAATGGGCC
+TGGCAACTGCCAGGTATCCCTTCGAGACCTGTTTGACCGGGCAGTCATGGTGTCCCACTA
+CATCCATGACCTCTCCTCGGAAATGTTCAACGAATTTGATAAACGGTATGCCCAGGGCAA
+AGGGTTCATTACCATGGCCCTCAACAGCTGCCATACCTCCTCCCTTCCTACCCCGGAAGA
+TAAAGAACAAGCCCAACAGACCCATCATGAAGTCCTTATGAGCTTGATTCTTGGGTTGCT
+GCGCTCCTGGAATGACCCTCTGTATCACCTAGTCACCGAGGTACGGGGTATGAAAGGAGC
+CCCAGATGCTATCCTATCGAGGGCCATAGAGATTGAGGAAGAAAACAAACGACTTCTGGA
+AGGCATGGAGATGATATTTGGCCAGGTTATTCCTGGAGCCAAAGAGACTGAGCCCTACCC
+TGTGTGGTCAGGACTCCCGTCCCTGCAAACTAAGGATGAAGATGCACGTTATTCTGCTTT
+TTATAACCTGCTCCACTGCCTGCGCAGGGATTCAAGCAAGATTGACACTTACCTTAAGCT
+CCTGAATTGCAGAATCATCTACAACAACAACTGCTAAGCCCACATTCCATCCTATCCATT
+TCTGAGATGGTTCTTAATGATCCATTCCCTGGCAAACTTCTCTGAGCTTTATAGCTTTGT
+AATGCATGCTTGGCTCTAATGGGTTTCATCTTAAATAAAAACAGACTCTGTAGCGATGTC
+AAAATCT
diff --git a/seq/dna_test_s.nlib b/seq/dna_test_s.nlib
new file mode 100644
index 0000000..e8de1cd
--- /dev/null
+++ b/seq/dna_test_s.nlib
@@ -0,0 +1,47 @@
+>RABGLTR Oryctolagus cuniculus glutathione S-transferase mRNA, complete cds.
+ CGGCAGCTCC TGTGGACTCA GAGGAGCTGC ACCATGCCCA TGACGCTGGG TTACTGGGAC
+ GTCCGTGGGC TGGCTCTGCC AATCCGCATG CTCCTGGAAT ACACGGACAC CAGCTATGAG
+ GAAAAGAAAT ACACCATGGG GGATGCTCCC AACTATGACC AAAGCAAGTG GCTGAGTGAG
+ AAGTTCACCC TGGGCCTGGA CTTTCCCAAT CTGCCCTACC TAATTGATGG GACTCACAAG
+ CTCACGCAGA GCAACGCCAT CCTGCGCTAC CTGGCCCGCA AGCACGGCCT GTGTGGGGAG
+ ACGGAAGAGG AGAGGATTCG CGTGGACATT CTGGAGAATC AGCTGATGGA CAACCGCTTC
+ CAACTTGTAA ACGTCTGCTA CAGTCCCGAC TTTGAGAAGC TCAAGCCCGA GTACCTGAAG
+ GGGCTCCCTG AGAAGCTGCA GCTGTACTCG CAGTTCCTGG GAAGCCTCCC CTGGTTCGCA
+ GGGGACAAGA TCACCTTCGC CGATTTCCTT GTCTACGACG TTCTTGACCA GAACCGGATA
+ TTTGTGCCTG GGTGCCTGGA CGCGTTCCCA AACCTGAAGG ACTTTCATGT CCGCTTTGAG
+ GGCCTGCCGA AGATCTCTGC CTACATGAAG TCCAGCCGCT TTATCCGAGT CCCTGTGTTT
+ TTAAAGAAGG CCACGTGGAC GGGAATATAG GGCCCTGGAA GGAGGTGGGC CATCCCCTGG
+ GAGCTCAGGT CTCCCAGCCT CTTGCTCATC TTCCTCAACC TTCCCAAAAA CAAAAGCCTA
+ CTGCCTGCTT GTGTTCTGAG CCAGCCCCTC CCATGCAGGC TCTGGCCAGC TCAGAAACCC
+ ACCCTTCTAG CCATGGGCTC TCTAAGGCTG CTCTTCCCGG ACTAAGCAGA CCCCACGGGC
+ CACATCTCTC TTCGTGGGCT CCGTTTGATC TCCCCGACTG CCAGAATCAT GGTTGTACCT
+ GCTGCGGCCC TATTCCCAGG CGGGACTCCC CAGTGCTGTT TGGTCCCCAG GAGGGCCTGA
+ CCTCAGCCAG GGCCCTTCTT ACCCCTCCCT GTGTTGCACT GGAGTGGGCG CTGACTGTGC
+ AGACCTTGGG GGGGTTTCTT TGTTCTGCTG CCCACAGCAT GGCTGGGTGG GGCAGGATTA
+ GTGTGGGGGG AGTTGGGTGC TCAGGCAGGG CTATGAGGGA TCTTGTTCAT TTCCGGGCCC
+ TATCCATGTG CTCTGCTCCT CGCCCTGGGT TTTCTCCTCT GCCCGGGTTC CTCGTTCCTT
+ CACCCTGGAG GGAGGCCAGG GCCACGTGCA GCCGTGCCGG GTTCTGAGAG CGCTGGGCTG
+ ATGGGGACGG GGCTGAGCAG GCTTGAGCAG ACCCCTCTGT CACCATCTCC CGGAAGCTTT
+ CAGCTGATAC AGATGCTCCT CGTCTATAGT TTCAGGATGT TTCTCAATAA AACATCCCAC
+ TGT
+>EYKX4VC01BO0UO length=222 xy=0577_3838 region=1 run=R_2007_11_07_16_15_57_
+AGACCTGGGACAGCGGCGGGCTGCTGAAGCCGCAGGCGATAGAGGACAAACTGCAGTACC
+GCTTCTGGCTGCACTATGCCGAAGGCTCGCTGATGCCGCTGCTGTTAATGAAGCTGGTGT
+TCGCCAGCCTGGGTAAACCCCCTGTGCCCTTTGGCGTCCGCTCGCTGGGCGCCCTGCTGG
+GCAAGGGCATTCAGAAAGCGTGGCTGGATCCCCAGCTGGCCA
+>mgstm1 reverse complement
+AAGTGTGTTTCAGACTTTATTGTAGACGAGACAGACCTGGGGAGGCTACTCCACCAGGAACAGGCTGGCACTCAA
+GTATTGACTTCGGGGTAACTCTAGGGAGGGCTAGCACTAAGATAGTGTTGACCATCGGGGTAATTCTAGGAAGCG
+TGAGTTCAGGACAGACCTCAGTTCGCAGAAACGGGCTGTGAGGTTGGGTCAGGGAGCCAATGAAGAAGGGGCCAT
+gtgaaagaaactggggagaatgaaggctgtgtggacttgactgggaagagggtgaggagatggggctgaccaagc
+tgcagaagggagcgggaaggagagagaaccaggagccacagtgcagaaggccagggtgctgtccccacccagggc
+CTGCAGGATCCCCAGTGTGGACAGGTCCTCCTAGTGAGTGCCCGTGTAGCAAGGGCCTACTTGTTACTCCAGTGG
+GCCATCTTTGAAAATATAGGTGTTGCGATGTAGCGGCTACTCTTCATGTAGGCAGAGATCTTCTTGAGGCCCTCG
+AAGCGGGCCAGGAAGTCCCTCAGGTTTGGGAAGGCGTCCAGGCACTTGGGCTCAAACATACGGTACTGGTCAAGA
+ATGTCATAAGCAAGGAAATCCACATAGGTGACCTTGTCCCCTGCAAACCATGGCCTCTTGCCCAGGAACTCAGAG
+tagagcttcattttctcagggatggtcttcaagaactctggcttctgcttctcaaagtcagggttgtaacagagc
+atgatgagctgcatgcgggtgtccatgacctggttctccacaatgtctgcacggatcctctcctcctctgtctct
+ccatccaggtggtgctttcgggcaaggtagcgcaggatggcattgctctgggtgatcttgtgtgatccatcgatc
+AAGTAAGGCAGATTGGGAAAGTCCAGGCCCAGCTTGAACTTCTCATTCAGCCACTGGCTTCTGTCAAAGTCGGGA
+GCGTCACCCATGGTGTATCTCTTCTCATCATAGCTTGAGTCTGTGTATTCCAGGAGCATGCGGATCGGGTGTGTC
+AGTCCGCGGACGTTCCAGTATCCCAGTATCATAGGCATGGTGCTGGTGCTGTGGTCTTCTCAAACTGGCTTCAGC
diff --git a/seq/dyr_human.aa b/seq/dyr_human.aa
new file mode 100644
index 0000000..bb5fcdc
--- /dev/null
+++ b/seq/dyr_human.aa
@@ -0,0 +1,4 @@
+>gi|118992|sp|P00374.2|DYR_HUMAN RecName: Full=Dihydrofolate reductase
+MVGSLNCIVAVSQNMGIGKNGDLPWPPLRNEFRYFQRMTTTSSVEGKQNLVIMGKKTWFSIPEKNRPLKGRINLVLSREL
+KEPPQGAHFLSRSLDDALKLTEQPELANKVDMVWIVGGSSVYKEAMNHPGHLKLFVTRIMQDFESDTFFPEIDLEKYKLL
+PEYPGVLSDVQEEKGIKYKFEVYEKND
diff --git a/seq/egmsmg.aa b/seq/egmsmg.aa
new file mode 100644
index 0000000..e3ec155
--- /dev/null
+++ b/seq/egmsmg.aa
@@ -0,0 +1,19 @@
+>EGMSMG Epidermal growth factor precursor - Mouse
+MPWGRRPTWLLLAFLLVFLKISILSVTAWQTGNCQPGPLERSERSGTCAGPAPFLVFSQGKSISRIDPDG
+TNHQQLVVDAGISADMDIHYKKERLYWVDVERQVLLRVFLNGTGLEKVCNVERKVSGLAIDWIDDEVLWV
+DQQNGVITVTDMTGKNSRVLLSSLKHPSNIAVDPIERLMFWSSEVTGSLHRAHLKGVDVKTLLETGGISV
+LTLDVLDKRLFWVQDSGEGSHAYIHSCDYEGGSVRLIRHQARHSLSSMAFFGDRIFYSVLKSKAIWIANK
+HTGKDTVRINLHPSFVTPGKLMVVHPRAQPRTEDAAKDPDPELLKQRGRPCRFGLCERDPKSHSSACAEG
+YTLSRDRKYCEDVNECATQNHGCTLGCENTPGSYHCTCPTGFVLLPDGKQCHELVS
+CPGNVSKCSHGCVLTSDGPRCICPAGSVLGRDGKTCTGCSSPDNGGCSQICLPLRPGSWECDCFPGYDLQ
+SDRKSCAASGPQPLLLFANSQDIRHMHFDGTDYKVLLSRQMGMVFALDYDPVESKIYFAQTALKWIERAN
+MDGSQRERLITEGVDTLEGLALDWIGRRIYWTDSGKSVVGGSDLSGKHHRIIIQERISRPRGIAVHPRAR
+RLFWTDVGMSPRIESASLQGSDRVLIASSNLLEPSGITIDYLTDTLYWCDTKRSVIEMANLDGSKRRRLI
+QNDVGHPFSLAVFEDHLWVSDWAIPSVIRVNKRTGQNRVRLQGSMLKPSSLVVVHPLAKPGADPCLYRNG
+GCEHICQESLGTARCLCREGFVKAWDGKMCLPQDYPILSGENADLSKEVTSLSNST
+QAEVPDDDGTESSTLVAEIMVSGMNYEDDCGPGGCGSHARCVSDGETAECQCLKGFARDGNLCSDIDECV
+LARSDCPSTSSRCINTEGGYVCRCSEGYEGDGISCFDIDECQRGAHNCAENAACTNTEGGYNCTCAGRPS
+SPGRSCPDSTAPSLLGEDGHHLDRNSYPGCPSSYDGYCLNGGVCMHIESLDSYTCNCVIGYSGDRCQTRD
+LRWWELRHAGYGQKHDIMVVAVCMVALVLLLLLGMWGTYYYRTRKQLSNPPKNPCDEPSGSVSSSGPDSS
+SGAAVASCPQPWFVVLEKHQDPKNGSLPADGTNGAVVDAGLSPSLQLGSVHLTSWRQKPHIDGMGTGQSC
+WIPPSSDRGPQEIEGNSHLPSYRPVGPEKLHSLQSANGSCHERAPDLPRQTEPVK 
diff --git a/seq/grou_drome.pseg b/seq/grou_drome.pseg
new file mode 100644
index 0000000..618f592
--- /dev/null
+++ b/seq/grou_drome.pseg
@@ -0,0 +1,14 @@
+>gi|121620|sp|P16371|GROU_DROME GROUCHO PROTEIN (ENHANCER OF SPLIT M9/10)
+MYPSPVRHpaaggpppqgpIKFTIADTLERIKEEFNFLQAHYHSIKLECEKLSNEKTEMQ
+RHYVMYYEMSYGLNVEMHKQTEIAKRLNTLINQLLPFLQADHQQQVLQAVERAKQVTMQE
+LNLIIGQQIHAqqvpggppqpmgALNPFGALGATMGLPHGPQGLLNKPPEHHRPDIKPTG
+LEGPAAAEERLRNSVSPADREKYRTRSPLDIENDSKRRKDEKLQEDEGEKSDQDLVVDVA
+NEMESHSPRPNGEHVSMEVRDRESLNGERLEKPSSSGIKQErppsrsgssssrstpsLKT
+KDMEKPGTPGakartptpnaaapapgvnpkqmmpqgpppagypgapyqrpaDPYQRPPSD
+PAYGRPPPMPYDPHAHVRTNGIPHPSALTGGKPAYSFHMNGEGSLQPVPFPPDALVGVGI
+PRHARQINTLSHGEVVCAVTISNPTKYVYTGGKGCVKVWDISQPGNKNPVSQLDCLQRDN
+YIRSVKLLPDGRTLIVGGEASNLSIWDLASPTPRIKAELTSAAPACYALAISPDSKVCFS
+CCSDGNIAVWDLHNEILVRQFQGHTDGASCIDISPDGSRLWTGGLDNTVRSWDLREGRQL
+QQHDFSSQIFSLGYCPTGDWLAVGMENSHVEVLHASKPDKYQLHLHESCVLSLRFAACGK
+WFVSTGKDNLLNAWRTPYGASIFQSKETSSVLSCDISTDDKYIVTGSGDKKATVYEVIY
+
diff --git a/seq/gst.nlib b/seq/gst.nlib
new file mode 100644
index 0000000..2f04a8e
--- /dev/null
+++ b/seq/gst.nlib
@@ -0,0 +1,284 @@
+>pGT875 | 266
+GCTGAAGCCAGTTTGAGAAGACCACAGCACCAGCACCATGCCTATGATACTGGGATACTG
+GAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACTCAAGCTA
+TGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTTGACAGAAGCCAGTGGCTGAA
+TGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCTGCCTTACTTGATCGATGGATCACA
+CAAGATCACCCAGAGCAATGCCATCCTGCGCTACCTTGCCCGAAAGCACCACCTGGATGG
+AGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGTGGAGAACCAGGTCATGGACACCCG
+catgcagctcatcatgctctgttacaaccctgactttgagaagcagaagccagagttctt
+gaagaccatccctgagaaaatgaagctctactctgagttcctgggcaagaggccatggtt
+tgcaggggacaaggtcacctatgtggatttccttgcttatgacattcttgaccagtaccg
+tatgtttgagcccaagtgcctggacgccttcccaaacctgagggacttcctggcccgctt
+cgagggcctcaagaagatctctgcctacatgaagagtagccgctacatcgcaacacctat
+ATTTTCAAAGATGGCCCACTGGAGTAACAAGTAGGCCCTTGCTACACGGGCACTCACTAG
+GAGGACCTGTCCACACTGGGGATCCTGCAGGCCCTGGGTGGGGACAGCACCCTGGCCTTC
+TGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCTCCCTTCTGCAGCTTGGTCAGCCCCA
+TCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGCCTTCATTCTCCCCAGTTTCTTTCAC
+ATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCTCACAGCCCGTTTCTGCGAACTGAGG
+TCTGTCCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACACTATCTTAGTGCTAG
+CCCTCCCTAGAGTTACCCCGAAGTCAATACTTGAGTGCCAGCCTGTTCCTGGTGGAGTAG
+CCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTGAAACACACTT
+>RABGLTR Oryctolagus cuniculus glutathione S-transferase mRNA, complete cds.
+ CGGCAGCTCC TGTGGACTCA GAGGAGCTGC ACCATGCCCA TGACGCTGGG TTACTGGGAC
+ GTCCGTGGGC TGGCTCTGCC AATCCGCATG CTCCTGGAAT ACACGGACAC CAGCTATGAG
+ GAAAAGAAAT ACACCATGGG GGATGCTCCC AACTATGACC AAAGCAAGTG GCTGAGTGAG
+ AAGTTCACCC TGGGCCTGGA CTTTCCCAAT CTGCCCTACC TAATTGATGG GACTCACAAG
+ CTCACGCAGA GCAACGCCAT CCTGCGCTAC CTGGCCCGCA AGCACGGCCT GTGTGGGGAG
+ ACGGAAGAGG AGAGGATTCG CGTGGACATT CTGGAGAATC AGCTGATGGA CAACCGCTTC
+ CAACTTGTAA ACGTCTGCTA CAGTCCCGAC TTTGAGAAGC TCAAGCCCGA GTACCTGAAG
+ GGGCTCCCTG AGAAGCTGCA GCTGTACTCG CAGTTCCTGG GAAGCCTCCC CTGGTTCGCA
+ GGGGACAAGA TCACCTTCGC CGATTTCCTT GTCTACGACG TTCTTGACCA GAACCGGATA
+ TTTGTGCCTG GGTGCCTGGA CGCGTTCCCA AACCTGAAGG ACTTTCATGT CCGCTTTGAG
+ GGCCTGCCGA AGATCTCTGC CTACATGAAG TCCAGCCGCT TTATCCGAGT CCCTGTGTTT
+ TTAAAGAAGG CCACGTGGAC GGGAATATAG GGCCCTGGAA GGAGGTGGGC CATCCCCTGG
+ GAGCTCAGGT CTCCCAGCCT CTTGCTCATC TTCCTCAACC TTCCCAAAAA CAAAAGCCTA
+ CTGCCTGCTT GTGTTCTGAG CCAGCCCCTC CCATGCAGGC TCTGGCCAGC TCAGAAACCC
+ ACCCTTCTAG CCATGGGCTC TCTAAGGCTG CTCTTCCCGG ACTAAGCAGA CCCCACGGGC
+ CACATCTCTC TTCGTGGGCT CCGTTTGATC TCCCCGACTG CCAGAATCAT GGTTGTACCT
+ GCTGCGGCCC TATTCCCAGG CGGGACTCCC CAGTGCTGTT TGGTCCCCAG GAGGGCCTGA
+ CCTCAGCCAG GGCCCTTCTT ACCCCTCCCT GTGTTGCACT GGAGTGGGCG CTGACTGTGC
+ AGACCTTGGG GGGGTTTCTT TGTTCTGCTG CCCACAGCAT GGCTGGGTGG GGCAGGATTA
+ GTGTGGGGGG AGTTGGGTGC TCAGGCAGGG CTATGAGGGA TCTTGTTCAT TTCCGGGCCC
+ TATCCATGTG CTCTGCTCCT CGCCCTGGGT TTTCTCCTCT GCCCGGGTTC CTCGTTCCTT
+ CACCCTGGAG GGAGGCCAGG GCCACGTGCA GCCGTGCCGG GTTCTGAGAG CGCTGGGCTG
+ ATGGGGACGG GGCTGAGCAG GCTTGAGCAG ACCCCTCTGT CACCATCTCC CGGAAGCTTT
+ CAGCTGATAC AGATGCTCCT CGTCTATAGT TTCAGGATGT TTCTCAATAA AACATCCCAC
+ TGT
+>BTGST Bovine GST mRNA for gluthathione S-transferase, class-pi.
+ CGGCTCAGGC CGCCGCCGAG CGCGCTGGAA CTTTGCTGCC GCCGCCACCT TTACCGACTT
+ CCCCGACTCC AGGATGCCTC CCTACACCAT CGTCTACTTC CCGGTTCAAG GGCGCTGCGA
+ GGCCATGCGC ATGCTGCTGG CCGACCAGGG CCAGAGCTGG AAGGAGGAGG TCGTAGCCAT
+ GCAGAGCTGG CTGCAGGGCC CACTCAAGGC CTCCTGCCTG TACGGGCAGC TCCCCAAGTT
+ CCAGGACGGA GACCTCACGC TGTACCAGTC CAATGCCATC CTGCGGCACC TGGGCCGCAC
+ CCTCGGGCTG TATGGGAAGG ACCAGCAGGA GGCGGCCCTG GTGGACATGG TGAATGACGG
+ TGTAGAGGAC CTTCGCTGCA AATACGTCTC CCTCATTTAC ACCAACTACG AGGCGGGCAA
+ GGAGGACTAT GTGAAGGCGC TGCCCCAGCA CCTGAAGCCT TTCGAGACCC TGCTGTCCCA
+ GAACAAGGGT GGCCAGGCCT TCATCGTGGG CGACCAGATC TCCTTTGCGG ACTACAACCT
+ GCTGGACCTG CTTCGGATTC ACCAGGTCCT GGCCCCCAGC TGTCTGGACT CCTTCCCCCT
+ GCTCTCAGCC TACGTGGCCC GTCTCAACTC CCGGCCCAAG CTCAAGGCCT TCCTGGCCTC
+ CCCCGAGCAC ATGAACCGGC CCATCAACGG CAATGGGAAA CAGTGAGGGC TTGCAGCACT
+ CTCTGCTCGA GGCAGGGGGC TGCCTGCTCT TCCCTTTCCC CAGGACCAAT AAAACTTCCA
+ AGAGAGAAAA AAAAAAAAAA AAAAAAAAA
+>OCDHPR Rabbit mRNA for dihydropyridine (DHP) receptor (from skeletal
+ TTCCACCTAC ATGTTGGCCT GGACAGCAGG GAGCCGAGGG GAGGCTAATT TTACTGCTGG
+ GAGCAGCTAG CATAATCCTC CCGCCCCCAC CCCGCTGGCT CAGCAGGGCA GGCTTCGCCC
+ GGCAAGCTCA GCGGCCCAGT CCCCAAGGCG GGGAACACTG GGGACGCAGG GAAGAGAGGG
+ CCGCGGGGTG GGGGAGCAGC AGGAAGCGCC GTGGCCAGGG AAGCCATGGA GCCATCCTCA
+ CCCCAGGATG AGGGCCTGAG GAAGAAACAG CCCAAGAAGC CCCTGCCCGA GGTCCTGCCC
+ AGGCCGCCGC GGGCTCTGTT CTGCCTGACC CTGCAGAACC CGCTGAGGAA GGCGTGCATC
+ AGCATCGTGG AATGGAAACC CTTCGAGACC ATCATCCTGC TCACCATCTT TGCCAACTGT
+ GTGGCCCTGG CCGTGTACCT GCCCATGCCC GAGGATGACA ACAACTCCCT GAACCTGGGC
+ CTGGAGAAGC TGGAGTACTT CTTCCTCACC GTCTTCTCCA TCGAAGCCGC CATGAAGATC
+ ATCGCCTACG GCTTCCTGTT CCACCAGGAC GCCTACCTGC GCAGCGGCTG GAACGTGCTG
+ GACTTCATCA TCGTCTTCCT GGGGGTCTTC ACGGCGATTC TGGAACAGGT CAACGTCATC
+ CAGAGCAACA CGGCCCCGAT GAGCAGCAAA GGAGCCGGCC TGGACGTCAA GGCCCTGAGG
+ GCCTTCCGTG TGCTCAGACC CCTCCGGCTG GTGTCGGGGG TGCCTAGTTT GCAGGTGGTC
+ CTCAACTCCA TCTTCAAGGC CATGCTCCCC CTGTTCCACA TCGCCCTGCT CGTCCTCTTC
+ ATGGTCATCA TCTACGCCAT CATCGGGCTG GAGCTCTTCA AGGGCAAGAT GCACAAGACC
+ TGCTACTACA TCGGGACAGA CATCGTGGCC ACAGTGGAGA ATGAGAAGCC CTCGCCCTGC
+ GCTAGGACGG GCTCGGGGCG CCCCTGCACC ATCAACGGCA GCGAGTGCCG GGGCGGCTGG
+ CCGGGGCCCA ACCACGGCAT CACGCACTTC GACAACTTCG GCTTCTCCAT GCTCACCGTG
+ TACCAGTGCA TCACCATGGA GGGCTGGACA GATGTCCTCT ACTGGGTCAA CGATGCCATC
+ GGGAACGAGT GGCCCTGGAT CTACTTTGTC ACTCTCATCC TGCTGGGGTC CTTCTTCATC
+ CTCAACCTGG TGCTGGGCGT CCTGAGTGGG GAATTCACCA AGGAGCGGGA GAAGGCCAAG
+ TCCAGGGGAA CCTTCCAGAA GCTGCGGGAG AAGCAGCAGC TGGAGGAGGA CCTTCGGGGC
+ TACATGAGCT GGATCACGCA GGGCGAGGTC ATGGACGTGG AGGACCTGAG AGAAGGAAAG
+ CTGTCCTTGG AAGAGGGAGG CTCCGACACG GAAAGCCTGT ACGAAATCGA GGGCTTGAAC
+ AAAATCATCC AGTTCATCCG ACACTGGAGG CAGTGGAACC GTGTCTTTCG CTGGAAGTGC
+ CATGACCTGG TGAAGTCGAG AGTCTTCTAC TGGCTGGTCA TCCTGATCGT GGCCCTCAAC
+ ACCCTGTCCA TCGCCTCGGA GCACCACAAC CAGCCGCTCT GGCTGACCCA CTTGCAAGAC
+ ATCGCCAATC GAGTGCTGCT GTCACTCTTC ACCATCGAGA TGCTGCTGAA GATGTACGGG
+ CTGGGCCTGC GCCAGTACTT CATGTCCATC TTCAACCGCT TCGACTGCTT CGTGGTGTGC
+ AGCGGCATCC TGGAGCTGCT GCTGGTGGAG TCGGGCGCCA TGACGCCGCT GGGCATCTCC
+ GTGTTGCGCT GCATCCGCCT CCTGAGGCTC TTCAAGATCA CCAAGTACTG GACGTCGCTC
+ AGCAACCTGG TGGCCTCCCT GCTCAACTCC ATCCGCTCCA TCGCCTCGCT GCTGCTGCTG
+ CTCTTCCTCT TCATCATCAT CTTCGCCCTG CTGGGCATGC AGCTCTTCGG GGGGCGGTAC
+ GACTTCGAGG ACACGGAAGT GCGACGCAGC AACTTCGACA ACTTCCCCCA GGCCCTCATC
+ AGCGTCTTCC AGGTGCTGAC GGGTGAGGAC TGGAACTCCG TGATGTACAA CGGGATCATG
+ GCCTACGGAG GCCCGTCCTA CCCGGGCGTT CTCGTGTGCA TCTATTTCAT CATCCTTTTT
+ GTCTGCGGCA ACTATATCCT GCTGAATGTC TTCCTGGCCA TCGCCGTGGA CAACCTGGCC
+ GAGGCGGAGA GCCTGACTTC CGCGCAAAAG GCCAAGGCCG AGGAGAGGAA ACGCAGGAAG
+ ATGTCCAGGG GTCTCCCTGA CAAGACAGAG GAGGAGAAGT CTGTGATGGC CAAGAAGCTG
+ GAGCAGAAGC CCAAGGGGGA GGGCATCCCC ACCACTGCCA AGCTCAAGGT CGATGAGTTC
+ GAATCTAACG TCAACGAGGT GAAGGACCCC TACCCTTCAG CTGACTTCCC AGGGGATGAT
+ GAGGAGGACG AGCCTGAGAT CCCAGTGAGC CCCCGACCGC GCCCGCTGGC CGAGCTGCAG
+ CTCAAAGAGA AGGCAGTGCC CATCCCGGAA GCCAGCTCCT TCTTCATCTT CAGTCCCACC
+ AATAAGGTCC GTGTCCTGTG TCACCGCATC GTCAACGCCA CCTGGTTCAC CAACTTCATC
+ CTGCTCTTCA TCCTGCTCAG CAGTGCTGCG CTGGCCGCCG AGGACCCCAT CCGGGCGGAG
+ TCCGTGAGGA ATCAGATCCT TGGATATTTT GATATTGCCT TCACCTCTGT CTTCACTGTG
+ GAGATTGTCC TCAAGATGAC GACCTACGGC GCCTTCCTGC ACAAGGGCTC CTTCTGCCGC
+ AACTACTTCA ACATCCTGGA CCTGCTGGTG GTGGCTGTGT CTCTCATCTC CATGGGTCTC
+ GAGTCCAGCA CCATCTCCGT GGTAAAGATC CTGAGAGTGC TAAGGGTGCT CCGGCCCCTG
+ CGAGCCATCA ACAGAGCCAA AGGGTTGAAG CACGTGGTCC AGTGCGTGTT CGTGGCCATC
+ CGCACCATCG GGAACATCGT CCTGGTCACC ACGCTCCTGC AGTTCATGTT CGCCTGCATT
+ GGTGTCCAGC TCTTCAAGGG CAAGTTCTTC AGCTGCAACG ACCTATCCAA GATGACAGAA
+ GAGGAGTGCA GGGGCTACTA CTATGTGTAC AAGGACGGGG ACCCCACGCA GATGGAGCTG
+ CGCCCCCGCC AGTGGATACA CAATGACTTC CACTTTGACA ACGTGCTGTC GGCCATGATG
+ TCGCTCTTCA CGGTGTCCAC CTTCGAGGGA TGGCCCCAGC TGCTGTACAG GGCCATAGAC
+ TCCAACGAGG AGGACATGGG CCCCGTTTAC AACAACCGAG TGGAGATGGC CATCTTCTTC
+ ATCATCTACA TCATCCTCAT TGCCTTCTTC ATGATGAACA TCTTTGTGGG CTTTGTCATC
+ GTCACCTTCC AGGAGCAGGG GGAGACAGAG TACAAGAACT GCGAGCTGGA CAAGAACCAG
+ CGCCAGTGTG TGCAGTATGC CCTGAAGGCC CGCCCACTTC GGTGCTACAT CCCCAAGAAC
+ CCATACCAGT ACCAGGTGTG GTACGTCGTC ACCTCCTCCT ACTTTGAATA CCTGATGTTC
+ GCCCTCATCA TGCTCAACAC CATCTGCCTG GGCATGCAGC ACTACCACCA GTCGGAGGAG
+ ATGAACCACA TCTCGGACAT CCTCAACGTG GCCTTCACCA TCATCTTCAC ACTGGAGATG
+ ATCCTCAAGC TCTTGGCGTT CAAGGCCAGG GGCTATTTCG GAGACCCCTG GAATGTGTTC
+ GACTTCCTGA TCGTCATCGG CAGCATCATT GACGTCATCC TCAGCGAGAT CGACACTTTC
+ CTGGCCTCCA GCGGGGGACT GTATTGCCTG GGTGGCGGCT GCGGGAACGT TGACCCAGAC
+ GAGAGCGCCC GCATCTCCAG TGCCTTCTTC CGCCTGTTCC GGGTCATGAG GCTGATCAAG
+ CTGCTGAGTC GGGCCGAGGG CGTGCGCACG CTGCTGTGGA CGTTCATCAA GTCCTTCCAG
+ GCCCTGCCCT ACGTGGCCCT GCTCATCGTC ATGCTGTTCT TCATCTACGC CGTCATCGGC
+ ATGCAGATGT TTGGAAAGAT CGCCCTGGTG GACGGGACCC AGATCAACCG CAACAACAAC
+ TTCCAGACCT TCCCGCAGGC CGTGCTGCTG CTCTTCAGGT GTGCGACAGG GGAGGCGTGG
+ CAAGAGATCC TGCTGGCCTG CAGCTACGGG AAGTTGTGCG ACCCAGAGTC AGACTACGCC
+ CCGGGCGAGG AGTACACGTG TGGCACCAAC TTCGCCTACT ACTACTTCAT CAGCTTCTAC
+ ATGCTCTGCG CCTTCCTGAT CATCAACCTC TTCGTGGCTG TCATCATGGA CAACTTTGAC
+ TACCTGACAC GCGACTGGTC CATCCTGGGC CCTCACCACC TGGACGAGTT CAAGGCTATC
+ TGGGCAGAGT ATGACCCAGA GGCCAAGGGG CGAATCAAGC ACCTGGACGT GGTGACCCTG
+ CTGAGAAGGA TCCAGCCCCC TCTGGGCTTC GGGAAGTTCT GTCCACACCG GGTGGCCTGT
+ AAGCGCCTGG TGGGCATGAA CATGCCCCTG AACAGTGACG GCACGGTCAC CTTCAATGCC
+ ACGCTCTTTG CCCTGGTGCG CACGGCCCTC AAGATCAAGA CAGAAGGTAA CTTTGAGCAG
+ GCCAACGAGG AGCTGAGGGC CATCATCAAG AAGATCTGGA AGAGAACCAG CATGAAGCTG
+ CTGGACCAGG TCATCCCTCC CATAGGAGAT GACGAGGTGA CCGTGGGGAA GTTCTACGCC
+ ACATTCCTCA TCCAGGAGCA CTTCCGGAAG TTCATGAAGC GCCAGGAGGA ATATTATGGG
+ TATCGGCCCA AGAAGGACAC CGTGCAGATC CAGGCTGGGC TGCGGACCAT AGAGGAGGAG
+ GCGGCCCCTG AGATCCGCCG CACCATCTCA GGAGACCTGA CCGCCGAGGA GGAGCTGGAG
+ AGAGCCATGG TGGAGGCTGC GATGGAGGAG AGGATCTTCC GGAGGACGGG AGGCCTGTTT
+ GGCCAGGTGG ACACCTTCCT GGAAAGGACC AACTCCCTGC CCCCGGTGAT GGCCAACCAA
+ AGACCGCTCC AGTTTGCTGA GATAGAAATG GAAGAGCTTG AGTCGCCTGT CTTCTTGGAG
+ GACTTCCCTC AAGATGCAAG AACCAACCCT CTCGCTCGTG CCAATACCAA CAACGCCAAT
+ GCCAATGTTG CCTATGGCAA CAGCAACCAT AGCAACAACC AGATGTTTTC CAGCGTCCAC
+ TGTGAAAGGG AGTTCCCGGG AGAGGCGGAG ACACCGGCTG CCGGACGAGG AGCCCTCAGC
+ CACTCCCACA GGGCCCTGGG ACCTCACAGC AAGCCCTGTG CTGGAAAACT GAATGGGCAG
+ CTGGTCCAGC CGGGGATGCC CATCAACCAG GCACCTCCTG CCCCCTGCCA GCAGCCTAGC
+ ACGGATCCCC CAGAGCGCGG GCAGAGGAGG ACCTCCCTGA CAGGGTCTCT GCAAGACGAA
+ GCACCCCAGA GGAGGAGCTC CGAGGGGAGC ACCCCCAGGC GCCCGGCTCC TGCTACAGCT
+ CTGCTGATCC AAGAGGCTCT GGTTCGAGGG GGCCTGGACA CCTTGGCAGC TGATGCTGGC
+ TTCGTCACGG CAACAAGCCA GGCCCTGGCA GACGCCTGTC AGATGGAACC GGAGGAAGTA
+ GAGGTCGCAG CCACAGAGCT ACTGAAAGCG CGAGAGTCTG TCCAGGGCAT GGCCAGTGTC
+ CCGGGAAGCC TGAGCCGCAG GTCCTCCCTG GGCAGCCTTG ACCAGGTCCA GGGCTCCCAG
+ GAAACCCTTA TTCCTCCCAG GCCGTGATGG CTGTGGTGTC CACATGACCA AGGCGAGAGG
+ GACAGTGCGT GCAGAAGCTC AGCCCTGCAT GGCAGCCTCC CTCTGTCTCA GCCCTCCTGC
+ TGAGCTGGGG CGGTCTGGAA CCGCACCAGG AAGCCAGGAG CCTCCCCTGG CCAGCAAGAG
+ GCATGATTCT AAAGCCATCC AGAAAGGCCT GGTCAGTGCC ACTCCCCAGC AGGACATTAA
+ AGTCTCTAGG TCTGTGGCAC TGG
+>RABALP1A Rabbit dihydropyridine-sensitive calcium channel alpha-1 subunit
+ TTCCACCTAC ATGTTGGCCT GGACAGCAGG GAGCCGAGGG GAGGCTAATT TTACTGCTGG
+ GAGCAGCTAG CATAATCCTC CCGCCCCCAC CCCGCTGGCT CAGCAGGGCA GGCTTCGCCC
+ GGCAAGCTCA GCGGCCCAGT CCCCAAGGCG GGGAACACTG GGGACGCAGG GAAGAGAGGG
+ CCGCGGGGTG GGGGAGCAGC AGGAAGCGCC GTGGCCAGGG AAGCCATGGA GCCATCCTCA
+ CCCCAGGATG AGGGCCTGAG GAAGAAACAG CCCAAGAAGC CCCTGCCCGA GGTCCTGCCC
+ AGGCCGCCGC GGGCTCTGTT CTGCCTGACC CTGCAGAACC CGCTGAGGAA GGCGTGCATC
+ AGCATCGTGG AATGGAAACC CTTCGAGACC ATCATCCTGC TCACCATCTT TGCCAACTGT
+ GTGGCCCTGG CCGTGTACCT GCCCATGCCC GAGGATGACA ACAACTCCCT GAACCTGGGC
+ CTGGAGAAGC TGGAGTACTT CTTCCTCACC GTCTTCTCCA TCGAAGCCGC CATGAAGATC
+ ATCGCCTACG GCTTCCTGTT CCACCAGGAC GCCTACCTGC GCAGCGGCTG GAACGTGCTG
+ GACTTCATCA TCGTCTTCCT GGGGGTCTTC ACGGCGATTC TGGAACAGGT CAACGTCATC
+ CAGAGCAACA CGGCCCCGAT GAGCAGCAAA GGAGCCGGCC TGGACGTCAA GGCCCTGAGG
+ GCCTTCCGTG TGCTCAGACC CCTCCGGCTG GTGTCGGGGG TGCCTAGTTT GCAGGTGGTC
+ CTCAACTCCA TCTTCAAGGC CATGCTCCCC CTGTTCCACA TCGCCCTGCT CGTCCTCTTC
+ ATGGTCATCA TCTACGCCAT CATCGGGCTG GAGCTCTTCA AGGGCAAGAT GCACAAGACC
+ TGCTACTACA TCGGGACAGA CATCGTGGCC ACAGTGGAGA ATGAGAAGCC CTCGCCCTGC
+ GCTAGGACGG GCTCGGGGCG CCCCTGCACC ATCAACGGCA GCGAGTGCCG GGGCGGCTGG
+ CCGGGGCCCA ACCACGGCAT CACGCACTTC GACAACTTCG GCTTCTCCAT GCTCACCGTG
+ TACCAGTGCA TCACCATGGA GGGCTGGACA GATGTCCTCT ACTGGGTCAA CGATGCCATC
+ GGGAACGAGT GGCCCTGGAT CTACTTTGTC ACTCTCATCC TGCTGGGGTC CTTCTTCATC
+ CTCAACCTGG TGCTGGGCGT CCTGAGTGGG GAATTCACCA AGGAGCGGGA GAAGGCCAAG
+ TCCAGGGGAA CCTTCCAGAA GCTGCGGGAG AAGCAGCAGC TGGAGGAGGA CCTTCGGGGC
+ TACATGAGCT GGATCACGCA GGGCGAGGTC ATGGACGTGG AGGACCTGAG AGAAGGAAAG
+ CTGTCCTTGG AAGAGGGAGG CTCCGACACG GAAAGCCTGT ACGAAATCGA GGGCTTGAAC
+ AAAATCATCC AGTTCATCCG ACACTGGAGG CAGTGGAACC GTGTCTTTCG CTGGAAGTGC
+ CATGACCTGG TGAAGTCGAG AGTCTTCTAC TGGCTGGTCA TCCTGATCGT GGCCCTCAAC
+ ACCCTGTCCA TCGCCTCGGA GCACCACAAC CAGCCGCTCT GGCTGACCCA CTTGCAAGAC
+ ATCGCCAATC GAGTGCTGCT GTCACTCTTC ACCATCGAGA TGCTGCTGAA GATGTACGGG
+ CTGGGCCTGC GCCAGTACTT CATGTCCATC TTCAACCGCT TCGACTGCTT CGTGGTGTGC
+ AGCGGCATCC TGGAGCTGCT GCTGGTGGAG TCGGGCGCCA TGACGCCGCT GGGCATCTCC
+ GTGTTGCGCT GCATCCGCCT CCTGAGGCTC TTCAAGATCA CCAAGTACTG GACGTCGCTC
+ AGCAACCTGG TGGCCTCCCT GCTCAACTCC ATCCGCTCCA TCGCCTCGCT GCTGCTGCTG
+ CTCTTCCTCT TCATCATCAT CTTCGCCCTG CTGGGCATGC AGCTCTTCGG GGGGCGGTAC
+ GACTTCGAGG ACACGGAAGT GCGACGCAGC AACTTCGACA ACTTCCCCCA GGCCCTCATC
+ AGCGTCTTCC AGGTGCTGAC GGGTGAGGAC TGGAACTCCG TGATGTACAA CGGGATCATG
+ GCCTACGGAG GCCCGTCCTA CCCGGGCGTT CTCGTGTGCA TCTATTTCAT CATCCTTTTT
+ GTCTGCGGCA ACTATATCCT GCTGAATGTC TTCCTGGCCA TCGCCGTGGA CAACCTGGCC
+ GAGGCCGAGA GCCTGACTTC CGCGCAAAAG GCCAAGGCCG AGGAGAGGAA ACGTAGGAAG
+ ATGTCCAGGG GTCTCCCTGA CAAGAGAGAG GAGGAGAAGT CTGTGATGGC CAAGAAGCTG
+ GAGCAGAAGC CCAAGGGGGA GGGCATCCCC ACCACTGCCA AGCTCAAGGT CGATGAGTTC
+ GAATCTAACG TCAACGAGGT GAAGGACCCC TACCCTTCAG CTGACTTCCC AGGGGATGAT
+ GAGGAGGACG AGCCTGAGAT CCCAGTGAGC CCCCGACCGC GCCCGCTGGC CGAGCTGCAG
+ CTCAAAGAGA AGGCAGTGCC CATCCCGGAA GCCAGCTCCT TCTTCATCTT CAGTCCCACC
+ AATAAGGTCC GTGTCCTGTG TCACCGCATC GTCAACGCCA CCTGGTTCAC CAACTTCATC
+ CTGCTCTTCA TCCTGCTCAG CAGTGCTGCG CTGGCCGCCG AGGACCCCAT CCGGGCGGAG
+ TCCGTGAGGA ATCAGATCCT TGGATATTTT GATATTGCCT TCACCTCTGT CTTCACTGTG
+ GAGATTGTCC TCAAGATGAC AACCTACGGC GCCTTCCTGC ACAAGGGCTC CTTCTGCCGC
+ AACTACTTCA ACATCCTGGA CCTGCTGGTG GTGGCCGTGT CTCTCATCTC CATGGGTCTC
+ GAGTCCAGCA CCATCTCCGT GGTAAAGATC CTGAGAGTGC TAAGGGTGCT CCGGCCCCTG
+ CGAGCCATCA ACAGAGCCAA AGGGTTGAAG CACGTGGTCC AGTGCGTGTT CGTGGCCATC
+ CGCACCATCG GGAACATCGT CCTGGTCACC ACGCTCCTGC AGTTCATGTT CGCCTGCATC
+ GGTGTCCAGC TCTTCAAGGG CAAGTTCTTC AGCTGCAATG ACCTATCCAA GATGACAGAA
+ GAGGAGTGCA GGGGCTACTA CTATGTGTAC AAGGACGGGG ACCCCACGCA GATGGAGCTG
+ CGCCCCCGCC AGTGGATACA CAATGACTTC CACTTTGACA ACGTGCTGTC GGCCATGATG
+ TCGCTCTTCA CGGTGTCCAC CTTCGAGGGA TGGCCCCAGC TGCTGTACAG GGCCATAGAC
+ TCCAACGAGG AGGACATGGG CCCCGTTTAC AACAACCGAG TGGAGATGGC CATCTTCTTC
+ ATCATCTACA TCATCCTCAT TGCCTTCTTC ATGATGAACA TCTTTGTGGG CTTTGTCATC
+ GTCACCTTCC AGGAGCAGGG GGAGACAGAG TACAAGAACT GCGAGCTGGA CAAGAACCAG
+ CGCCAGTGTG TGCAGTATGC CCTGAAGGCC CGCCCACTTC GGTGCTACAT CCCCAAGAAC
+ CCATACCAGT ACCAGGTGTG GTACGTCGTC ACCTCCTCCT ACTTTGAATA CCTGATGTTC
+ GCCCTCATCA TGCTCAACAC CATCTGCCTG GGCATGCAGC ACTACCACCA GTCGGAGGAG
+ ATGAACCACA TCTCAGACAT CCTCAATGTG GCCTTCACCA TCATCTTCAC GCTGGAGATG
+ ATTCTCAAGC TCTTGGCGTT CAAGGCCAGG GGCTATTTCG GAGACCCCTG GAATGTGTTC
+ GACTTCCTGA TCGTCATCGG CAGCATCATT GACGTCATCC TCAGCGAGAT CGACACTTTC
+ CTGGCCTCCA GCGGGGGACT GTATTGCCTG GGTGGCGGCT GCGGGAACGT TGACCCAGAC
+ GAGAGCGCCC GCATCTCCAG TGCCTTCTTC CGCCTGTTCC GGGTTATGAG GCTGATCAAG
+ CTGCTGAGTC GGGCCGAGGG CGTGCGCACG CTGCTGTGGA CGTTCATCAA GTCCTTCCAG
+ GCCCTGCCCT ACGTGGCCCT GCTCATCGTC ATGCTGTTCT TCATCTACGC CGTCATCGGC
+ ATGCAGATGT TTGGAAAGAT CGCCCTGGTG GACGGGACCC AGATCAACCG CAACAACAAC
+ TTCCAGACCT TCCCGCAGGC CGTGCTGCTG CTCTTCAGGT GTGCGACAGG GGAGGCGTGG
+ CAAGAGATCC TGCTGGCCTG CAGCTACGGG AAGTTGTGCG ACCCAGAGTC AGACTACGCC
+ CCGGGCGAGG AGTACACGTG TGGCACCAAC TTCGCCTACT ACTACTTCAT CAGCTTCTAC
+ ATGCTCTGCG CCTTCCTGAT CATCAACCTC TTCGTGGCTG TCATCATGGA CAACTTTGAC
+ TACCTGACAC GCGACTGGTC CATCCTGGGC CCTCACCACC TGGACGAGTT CAAGGCCATC
+ TGGGCAGAGT ATGACCCAGA GGCCAAGGGG CGAATCAAGC ACCTGGACGT GGTGACCCTG
+ CTGAGAAGGA TCCAGCCCCC TCTGGGCTTC GGGAAGTTCT GTCCACACCG GGTGGCCTGT
+ AAGCGCCTGG TGGGCATGAA CATGCCCCTG AACAGTGACG GCACGGTCAC CTTCAATGCC
+ ACGCTCTTTG CCCTGGTGCG CACGGCCCTC AAGATCAAGA CAGAAGGTAA CTTCGAGCAG
+ GCCAACGAGG AGCTGAGGGC CATCATCAAG AAGATCTGGA AGAGAACCAG CATGAAGCTA
+ CTGGACCAGG TCATCCCTCC CATAGGAGAT GACGAGGTGA CCGTGGGGAA GTTCTACGCC
+ ACATTCCTCA TCCAGGAGCA CTTCCGGAAG TTCATGAAGC GCCAGGAGGA ATATTATGGG
+ TATCGGCCCA AGAAGGACAC CGTGCAGATC CAGGCTGGGC TGCGGACCAT AGAGGAGGAG
+ GCGGCCCCTG AGATCCGCCG CACCATCTCA GGAGACCTGA CCGCCGAGGA GGAGCTGGAG
+ AGAGCCATGG TGGAGGCTGC GATGGAGGAG AGGATCTTCC GGAGGACCGG AGGCCTGTTT
+ GGCCAGGTGG ACACCTTCCT GGAAAGGACC AACTCCCTAC CCCCGGTGAT GGCCAACCAA
+ AGACCGCTCC AGTTTGCTGA GATAGAAATG GAAGAGCTTG AGTCGCCTGT CTTCTTGGAG
+ GACTTCCCTC AAGACGCAAG AACCAACCCT CTCGCTCGTG CCAATACCAA CAACGCCAAT
+ GCCAATGTTG CCTATGGCAA CAGCAACCAT AGCAACAACC AGATGTTTTC CAGCGTCCAC
+ TGTGAAAGGG AGTTCCCGGG AGAGGCGGAG ACACCGGCTG CCGGACGAGG AGCCCTCAGC
+ CACTCCCACA GGGCCCTGGG ACCTCACAGC AAGCCCTGTG CTGGAAAACT GAATGGGCAG
+ CTGGTCCAGC CGGGAATGCC CATCAACCAG GCACCTCCTG CCCCCTGCCA GCAGCCTAGC
+ ACAGATCCCC CAGAGCGCGG GCAGAGGAGG ACCTCCCTGA CAGGGTCTCT GCAAGACGAA
+ GCACCCCAGA GGAGGAGCTC CGAGGGGAGC ACCCCCAGGC GCCCGGCTCC TGCTACAGCT
+ CTGCTGATCC AAGAGGCTCT GGTTCGAGGG GGCCTGGACA CCTTGGCAGC TGATGCTGGC
+ TTCGTCATGG CAACAAGCCA GGCCCTGGTA GACGCCTGTC AGATGGAACC GGAGGAAGTA
+ GAGGTCGCAG CCACAGAGCT ACTGAAAGAG CGAGAGTCCG TCCAGGGCAT GGCCAGTGTC
+ CCGGGAAGCC TGAGCCGCAG GTCCTCCCTG GGCAGCCTTG ACCAGGTCCA GGGCTCCCAG
+ GAAACCCTTA TTCCTCCCAG GCCGTGATGG CTGTGCAGTG TCCACATGAC CAAGGCGAGA
+ GGGACAGTGC GTGCAGAAGC TCAGCCCTGC ATGGCAGCCT CCCTCTGTCT CAGCCCTCCT
+ GCTGAGCTGG GGCGGTCTGG AACCGACCAG GAAGCCAGGA GCCTCCCCTG GCCAGCAAGA
+ GGCATGATTC TAAAGCCATC CAGAAAGGCC TGGTCAGTGC CACTCCCCAG CAGGACATTA
+ AAGTCTCTAG GTCTGTGGCA 
+>RABGSTB Oryctolagus cuniculus glutathione S-transferase mRNA, complete cds.
+ CAGAAACCAC CACTATGGCA GGGAAGCCCA AGCTTCACTA CTTCAATGCA CGGGGCAGAA
+ TGGAGTCTAT CCGGTGGCTC CTGACTGCAG CTGGGGTAGA GTTTGAAGAG AAATGTATGA
+ AAACTCGAGA AGACCTGGAA AAGTTAAGAA AAGATGGGGT ATTGATGTTC CAGCAAGTGC
+ CCATGGTTGA GATTGATGGG ATGAAGCTGG TGCAGACCAG AGCCATTTTC AACTACATTG
+ CAGACAAGCA CAACCTGTAT GGGAAAGACA TAAAGGAGAG AGCCCTGATT GATATGTATA
+ CAGAAGGCAT AGTAGATTTG AATGAATTGA TTCTTACTCG TCCATTCCTT CCACCGGAGG
+ AACAAGAGGC AAAACTTGCT CAGATCAAAG ATAAAGCAAA AAACCGTTAT TTTCCTGCCT
+ TTGAAAAGGT GTTGAAGAGC CACGGACAAG ACTACCTTGT TGGCAACAAG CTGAGCAAGG
+ CTGACATTCT CCTGGTTGAA CTTCTCTACA ACGTGGAAGA GCTCAACCCC GGCGCGACTG
+ CCAGCTTCCC TCTGCTGCAG GCCCTGAAAA CCAGGATCAG CAATCTCCCC ACCGTGAAGA
+ AGTTTCTGCA GCCTGGCAGC CAGAGGAATC CGCCTGATGA TGAGAAATGC AGAGAAGAAG
+ CAAAAATCAT TTTCCATTAA GAAGGCAAAG ATACCAAGCA CAGGCAAGAC CAGCCTCTGA
+ CCCCCTGCAG CGATGAAGTA CTTTAAATAA ATAGTGATCC TGATTGTCAT AAGGCATATT
+ ACGTTTTCTA AGTATTGTGT AAATTTAATT AAAAACCACC CATGTAGATT TAGTTGCAAT
+ ACATGGTACT TGGTTTTGAT CAAATACAAA ATTATGAGCA CCTCCTAGGA TGTCCCTTTG
+ AA
diff --git a/seq/gst.seq b/seq/gst.seq
new file mode 100644
index 0000000..0aa82e2
--- /dev/null
+++ b/seq/gst.seq
@@ -0,0 +1,20 @@
+>gi|193547|gb|J04632|MUSGLUTA Mouse glutathione S-transferase class mu (GST1-1) mRNA, complete cds
+CCTGCCTTCCGCTTTAGGGTCTGCTGCTCTGGTTACAGACCTAGGAAGGGGAGTGCCTAATTGGGATTGG
+TGCAGGGTTGGGAGGGACCCGCTGTTTTGTCCTGCCCACGTTTCTCTAGTAGTCTGTATAAAGTCACAAC
+TCCAAACACACAGGTCAGTCCTGCTGAAGCCAGTTTGAGAAGACCACAGCACCAGCACCATGCCTATGAT
+ACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACTCAAGC
+TATGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTTGACAGAAGCCAGTGGCTGAATGAGAAGT
+TCAAGCTGGGCCTGGACTTTCCCAATCTGCCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAA
+TGCCATCCTGCGCTACCTTGCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCA
+GACATTGTGGAGAACCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTTTG
+AGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGAGTTCCTGGGCAA
+GAGGCCATGGTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTGCTTATGACATTCTTGACCAGTAC
+CGTATGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAAACCTGAGGGACTTCCTGGCCCGCTTCGAGGGCC
+TCAAGAAGATCTCTGCCTACATGAAGAGTAGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCA
+CTGGAGTAACAAGTAGGCCCTTGCTACACGGGCACTCACTAGGAGGACCTGTCCACACTGGGGATCCTGC
+AGGCCCTGGGTGGGGACAGCACCCTGGCCTTCTGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCTCCC
+TTCTGCAGCTTGGTCAGCCCCATCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGCCTTCATTCTCCCC
+AGTTTCTTTCACATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCTCACAGCCCGTTTCTGCGAACTGA
+GGTCTGTCCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACACTATCTTAGTGCTAGCCCTCCCT
+AGAGTTACCCCGAAGTCAATACTTGAGTGCCAGCCTGTTCCTGGTGGAGTAGCCTCCCCAGGTCTGTCTC
+GTCTACAATAAAGTCTGAAACACACTT
diff --git a/seq/gstm1_human.vaa b/seq/gstm1_human.vaa
new file mode 100644
index 0000000..b0cad06
--- /dev/null
+++ b/seq/gstm1_human.vaa
@@ -0,0 +1,2 @@
+>sp|P09488|GSTM1_HUMAN GLUTATHIONE S-TRANSFERASE MU 1 (EC 2.5.1.18) (GSTM1-1) (HB SUBUNI
+MPMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGAHKITQSNAILCYIARKHNLCGETEEEKIRVDILENQTMDNHMQLGMICYNPEFEKLKPKYLEELPEKLKLYSEFLGKRPWFAGNKITFVDFLVYDVLDLHRIFEPNCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPRPVFTKMAVWGNK 
diff --git a/seq/gstm1b_human.nt b/seq/gstm1b_human.nt
new file mode 100644
index 0000000..b8484a7
--- /dev/null
+++ b/seq/gstm1b_human.nt
@@ -0,0 +1,17 @@
+>gi|183668|gb|J03817.1|HUMGSTM1B Human glutathione transferase M1B (GST1) mRNA, complete cds
+GCACCAACCAGCACCATGCCCATGATACTGGGGTACTGGGACATCCGCGGGCTGGCCCACGCCATCCGCC
+TGCTCCTGGAATACACAGACTCAAGCTATGAGGAAAAGAAGTACACGATGGGGGACGCTCCTGATTATGA
+CAGAAGCCAGTGGCTGAATGAAAAATTCAAGCTGGGCCTGGACTTTCCCAATCTGCCCTACTTGATTGAT
+GGGGCTCACAAGATCACCCAGAGCAACGCCATCTTGTGCTACATTGCCCGCAAGCACAACCTGTGTGGGG
+AGACAGAAGAGGAGAAGATTCGTGTGGACATTTTGGAGAACCAGACCATGGACAACCATATGCAGCTGGG
+CATGATCTGCTACAATCCAGAATTTGAGAAACTGAAGCCAAAGTACTTGGAGGAACTCCCTGAAAAGCTA
+AAGCTCTACTCAGAGTTTCTGGGGAAGCGGCCATGGTTTGCAGGAAACAAGATCACTTTTGTAGATTTTC
+TCGTCTATGATGTCCTTGACCTCCACCGTATATTTGAGCCCAACTGCTTGGACGCCTTCCCAAATCTGAA
+GGACTTCATCTCCCGCTTTGAGGGCTTGGAGAAGATCTCTGCCTACATGAAGTCCAGCCGCTTCCTCCCA
+AGACCTGTGTTCTCAAAGATGGCTGTCTGGGGCAACAAGTAGGGCCTTGAAGGCAGGAGGTGGGAGTGAG
+GAGCCCATACTCAGCCTGCTGCCCAGGCTGTGCAGCGCAGCTGGACTCTGCATCCCAGCACCTGCCTCCT
+CGTTCCTTTCTCCTGTTTATTCCCATCTTTACTCCCAAGACTTCATTGTCCCTCTTCACTCCCCCTAAAC
+CCCTGTCCCATGCAGGCCCTTTGAAGCCTCAGCTACCCACTATCCTTCGTGAACATCCCCTCCCATCATT
+ACCCTTCCCTGCACTAAAGCCAGCCTGACCTTCCTTCCTGTTAGTGGTTGTGTCTGCTTTAAAGCCTGCC
+TGGCCCCTCGCCTGTGGAGCTCAGCCCCGAGCTGTCCCCGTGTTGCATGAAGGAGCAGCATTGACTGGTT
+TACAGGCCCTGCTCCTGCAGCATGGTCCCTGCCTAGGCCTACCTGATGGAAGTAAAGCCTCAACCAC
diff --git a/seq/gstm1b_human_fs.nt b/seq/gstm1b_human_fs.nt
new file mode 100644
index 0000000..b6fbaa8
--- /dev/null
+++ b/seq/gstm1b_human_fs.nt
@@ -0,0 +1,17 @@
+>gi|183668|gb|J03817.1|HUMGSTM1B Human glutathione transferase M1B (GST1) mRNA, complete cds
+GCACCAACCAGCACC
+ATGCCCATGATACTGGGGTACTGGGACATCCGCGGGCTGGCCCACGCCATCCGCCTGCTCCTGGAATAACAGACTCAAGCTATGAGGAAAAGAAGTCACACGATGGGGGACGCTCCTGATTATGA
+CAGAAGCCAGTGGCTGATGAAAAATTCAAGCTGGGCCTGGACTTTCCCAATCTGCCCTACTTGATTGAT
+GGGGCTCACAAGATCACCCAGAGCAACGCCATCTTGTGCTACATTGCCCGCAAGCACAACCTGTGTGGGG
+AGACAGAAGAGGAGAAGATTCGTGTGGACATTTTGGAGAACCAGACCATGGACAACCATATGCAGCTGGG
+CATGATCTGCTACAATCCAGAATTTGAGAAACTGAAGCCAAAGTACTTGGAGGAACTCCCTGAAAAGCTA
+AAGCTCTACTCAGAGTTTCTGGGGAAGCGGCCATGGTTTGCAGGAAACAAGATCACTTTTGTAGATTTTC
+TCGTCTATGATGTCCTTGACCTCCCACCGTATATTTGAGCCCAACTGCTTGGACGCCTTCCCAAATCTGAA
+GGACTTCATCTCCCGCTTTGAGGGCTTGGAGAAGATCTCTGCCTACATGAAGTCCAGCCGCTTCCTCCCA
+AGACCTGTGTTCTCAAAGATGGCTGTCTGGGGCAACAAGTAGGGCCTTGAAGGCAGGAGGTGGGAGTGAG
+GAGCCCATACTCAGCCTGCTGCCCAGGCTGTGCAGCGCAGCTGGACTCTGCATCCCAGCACCTGCCTCCT
+CGTTCCTTTCTCCTGTTTATTCCCATCTTTACTCCCAAGACTTCATTGTCCCTCTTCACTCCCCCTAAAC
+CCCTGTCCCATGCAGGCCCTTTGAAGCCTCAGCTACCCACTATCCTTCGTGAACATCCCCTCCCATCATT
+ACCCTTCCCTGCACTAAAGCCAGCCTGACCTTCCTTCCTGTTAGTGGTTGTGTCTGCTTTAAAGCCTGCC
+TGGCCCCTCGCCTGTGGAGCTCAGCCCCGAGCTGTCCCCGTGTTGCATGAAGGAGCAGCATTGACTGGTT
+TACAGGCCCTGCTCCTGCAGCATGGTCCCTGCCTAGGCCTACCTGATGGAAGTAAAGCCTCAACCAC
diff --git a/seq/gstt1_drome.aa b/seq/gstt1_drome.aa
new file mode 100644
index 0000000..27c46dd
--- /dev/null
+++ b/seq/gstt1_drome.aa
@@ -0,0 +1,4 @@
+>gi|121694|sp|P20432.1|GSTT1_DROME GLUTATHIONE S-TRANSFERASE 1-1 (EC 2.5.1.18) (CLASS-THETA). - DROS
+MVDFYYLPGSSPCRSVIMTAKAVGVELNKKLLNLQAGEHLKPEFLKINPQHTIPTLVDNGFALWESRAIQVYLVEKYG
+KTDSLYPKCPKKRAVINQRLYFDMGTLYQSFANYYYPQVFAKAPADPEAFKKIEAAFEFLNTFLEGQDYAAGDSLTVA
+DIALVATVSTFEVAKFEISKYANVNRWYENAKKVTPGWEENWAGCLEFKKYFE 
diff --git a/seq/gstt1_pssm.asn1 b/seq/gstt1_pssm.asn1
new file mode 100644
index 0000000..98c0fd2
Binary files /dev/null and b/seq/gstt1_pssm.asn1 differ
diff --git a/seq/gtm1_human.aa b/seq/gtm1_human.aa
new file mode 100644
index 0000000..03c7d46
--- /dev/null
+++ b/seq/gtm1_human.aa
@@ -0,0 +1,4 @@
+>sp|P09488|GSTM1_HUMAN GLUTATHIONE S-TRANSFERASE MU 1 (EC 2.5.1.18) (GSTM1-1) (HB SUBUNI
+MPMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGAHKITQSNAILCY
+IARKHNLCGETEEEKIRVDILENQTMDNHMQLGMICYNPEFEKLKPKYLEELPEKLKLYSEFLGKRPWFAGNKITFVD
+FLVYDVLDLHRIFEPKCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPRPVFSKMAVWGNK 
diff --git a/seq/gtt1_drome.aa b/seq/gtt1_drome.aa
new file mode 100644
index 0000000..388b4df
--- /dev/null
+++ b/seq/gtt1_drome.aa
@@ -0,0 +1,4 @@
+>GTT1_DROME GLUTATHIONE S-TRANSFERASE 1-1 (EC 2.5.1.18) (CLASS-THETA). - DROS
+MVDFYYLPGSSPCRSVIMTAKAVGVELNKKLLNLQAGEHLKPEFLKINPQHTIPTLVDNGFALWESRAIQVYLVEKYG
+KTDSLYPKCPKKRAVINQRLYFDMGTLYQSFANYYYPQVFAKAPADPEAFKKIEAAFEFLNTFLEGQDYAAGDSLTVA
+DIALVATVSTFEVAKFEISKYANVNRWYENAKKVTPGWEENWAGCLEFKKYFE 
diff --git a/seq/h10_human.aa b/seq/h10_human.aa
new file mode 100644
index 0000000..04bcf0c
--- /dev/null
+++ b/seq/h10_human.aa
@@ -0,0 +1,4 @@
+>H10_HUMAN | 90538   | HISTONE H1' (H1.0) (H1(0)).
+TENSTSAPAAKPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAGSSRQSIQKYIKSHYKVGENADSQIKLSIKRLV
+TTGVLKQTKGVGASGSFRLAKSDEPKKSVAFKKTKKEIKKVATPKKASKPKKAASKAPTKKPKATPVKKAKKKLA
+ATPKKAKKPKTVKAKPVKASKPKKAKPVKPKAKSSAKRAGKKK
diff --git a/seq/hahu.aa b/seq/hahu.aa
new file mode 100644
index 0000000..b96234d
--- /dev/null
+++ b/seq/hahu.aa
@@ -0,0 +1,4 @@
+>HAHU | 1114 | Hemoglobin alpha chain - Human, chimpanzee, and pygmy chimpanzee
+VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAV
+AHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKY
+R 
diff --git a/seq/hsgstm1b.gcg b/seq/hsgstm1b.gcg
new file mode 100644
index 0000000..9a88985
--- /dev/null
+++ b/seq/hsgstm1b.gcg
@@ -0,0 +1,214 @@
+ FROMSTADEN of: hsgstm1b.g  check: 1769  from: 1  to: 5183
+ 
+ <---No Contig Comments--->
+ 
+hsgstm1b.gcg  Length: 5183  October 12, 1994 10:58  Type: N  Check: 1769  ..
+
+       1  GCACCAACCA GCACCATGCC CATGATACTG GGGTACTGGG ACATCCGTGG 
+
+      51  GGTAAGCGAG GGTCCTCTGG TGGGTGGGAC AGGGGGCGGA GGCGGGGATG 
+
+     101  TGTGGAGTAG CTGCAGGACT GGCTCTAGGG ACCGTTCCTC TTCAGGGCTG 
+
+     151  CCCGCCTCAG AAGGGCCTGT GCATGACGCT GTGTGTGTGT TTGGGGGTGG 
+
+     201  GGGCGGGTAG AGGAGGCGAC GGGTACGTGC AGTATAGACT AGGGCTGGCC 
+
+     251  TGGTGCAGAG AAAGTCACCA AGTCAGGGAC CCTCCATCTC TGACCCGAGC 
+
+     301  CGCGGCCATC TCTCCCAGCT GGCCCACGCC ATCCGCCTGC TCCTGGAATA 
+
+     351  CACAGACTCA AGCTACGAGG AAAAGAAGTA TACGATGGGG GACGGTAATG 
+
+     401  ACACCCTTGT GTCCGGGCTC TGCACTCACG CTGAGTTGGC ACCAAGCAAC 
+
+     451  CCATGGTGGC CACCTGTCGT ACCTCTGCAG GCCTCCCCTG CTGGAGCTGC 
+
+     501  AGGCTGTCCC TTCCCTGAGC CCCGGTGAGG AGTCCTGTGG CCTTGCAAGG 
+
+     551  CAGAATGCTG GGGCGGGATA GTGGGTCCCT GTTTAATTGG GTTGGGTGTC 
+
+     601  CTCAGAGCTT CCCAAACCCT GGAAGCCTTA GCCGTGTGGG GTCCAGAGCC 
+
+     651  TCAGCGGGAT TATTTGTCCC TGAACCCTGG GATGTGGGAC TGAGTGGTCA 
+
+     701  GATTCTAGAT CCACCTGTCT CAGGGATCTT GCCACTGGTT CCTTGGGAGG 
+
+     751  GTCCCCGGAA GGAGGGCTGG GCTCTGGGGA GGTTTGTTTT CACTTCTTCT 
+
+     801  TCCCCACGGC AGCTCCTGAC TATGACAGAA CGCAGTGGCT GAATGAAAAA 
+
+     851  TTCAAGCTGG GCCTGGACTT TCCCAATGTA GGTGCAGGGG GAAGGGGCGG 
+
+     901  TTTTGGGGGA AAGTGCGACG TGTCTCTGAC TGCATCTCCT CTCCCCAGAT 
+
+     951  TAGAGGTGTT CGGATCAGGA GTCTTCTGCC CAATTCCTGG TTGTCTACAC 
+
+    1001  AGCCCCTGCA TGATGTTCTG TGTCCCAGCT CATTTGTTCA TGTGACAGTA 
+
+    1051  TTTCTATGTC AGGCCTGCAT GAGCGGGCAC AGTGAGTCTG GTCTCCCCTT 
+
+    1101  GCATATAGGA AGGGGATGCT GGGGAGCCTG CTGGCCCCAA CTGAGCTTCC 
+
+    1151  CCGGTTTCCC ATCTATCCAG CTGCCCTACT TGATTGATGG GGCTCACAAG 
+
+    1201  ATCACCCAGA GCAACGCCAT CTTGTGCTAC ATTGCCCGCA AGCACAACCT 
+
+    1251  GTGTGAGTGT GGGTGGCTGC AATGTGTGGG GGGAAGGTGG CCTCCTCCTT 
+
+    1301  GGCTGGGCTG TGATGCTGAG ATTGAGTCTG TGTTTTGTGG GTGGCAGGTG 
+
+    1351  GGGAGACAGA AGAGGAGAAG ATTCGTGTGG ACATTTTGGA GAACCAGACC 
+
+    1401  ATGGACAACC ATATGCAGCT GGGCATGATC TGCTACAATC CAGAATTTGT 
+
+    1451  GAGTGTCCCC AGTGAGCTGC ATCTGACAGA GTTTGGATTT GGGGCCAGGA 
+
+    1501  CTCTTGCATC CTGCACACAT TGGTCTTAAG TCCCTGGTAC CATTCATCCT 
+
+    1551  CCAAGTGCTT TCCCATCATC TAGCAGTATC TCTACGACTC CAATGTCATG 
+
+    1601  TCAACAAAAG CAGAGGCAAT TCCCAACCAA CCTTAGGACA CGATTCCAGG 
+
+    1651  CATTCCCAGG GTAGAAATTT CAGTTCCTGT ATGGTAAAGT TTGTGTTCAG 
+
+    1701  AATCTCCTTC ATCAGCTCTG GCCTCTGACT TCTGTCCTGG GTCATTTCTG 
+
+    1751  TCAGCCAGTT CACATCACCT GCCTGCTCCT AGAATATGCA GACTCAAGTA 
+
+    1801  GAAGACTCAG GAATGTAATG GCACCCTCGA ATTGCATCTT CTCCTCAACA 
+
+    1851  GTTTTCTGAG TGCTGTCATT GACATGCACA GGGATCTGCG CATCTTCATA 
+
+    1901  ACAGACAGCT CAGAGGCAGT CAGAGGGCCT TTATTCCTCT CCCTCCTTCC 
+
+    1951  TTTCAACTTG AACTTCTCAT CTCCCTGGAA ACTAGTCAAC GTTCATTGTT 
+
+    2001  TTCTTCTGCC ACCCCATTAG AAGGAACTTT CTACTTTCCC TGAGCTCCCT 
+
+    2051  TAGTTCTTTG CATCCTTGAT TCTGCTGGTC TGGATCCAGA GGCTGCCAGG 
+
+    2101  TGCTTGGGCG CTCCTGGGGC TGACCCAGAG GCTATTGGGA GGTCAGTGAG 
+
+    2151  GACAGATTCA GGGACAGCAT CTCATTCCTC TCTGCCTTCT GATCAGTTTA 
+
+    2201  GATAGGGTCT GACACTCAGT CAGAGTCTAA AATGCTGAGT ATCCAATTGA 
+
+    2251  AGCCTGCACT GCCCCAGTTC CAGACTTGGG GAAGATGGCT GCTTGCCCGT 
+
+    2301  GCCAGCCTGG CCGTCCACAG CCCCGGGGAG GCCACGTCTG TGCAGGGAGC 
+
+    2351  TTTTGTCCGA GGGTGGTGAC AGCTGTTTTC TGCCTCAGGA GAAACTGAAG 
+
+    2401  CCAAAGTACT TGGAGGAACT CCCTGAAAAG CTAAAGCTCT ACTCAGAGTT 
+
+    2451  TCTGGGGAAG CGGCCATGGT TTGCAGGAAA CAAGGTAAAG GAGGAGTGAT 
+
+    2501  ATGGGGAATG AGATCTGTTT TGCTTCACGT GTTATGGAGG TTCCAGCCCA 
+
+    2551  CACATTCTTG GCCTTCTGCA GATCACTTTT GTAGATTTTC TCGTCTATGA 
+
+    2601  TGTCCTTGAC CTCCACCGTA TATTTGAGCC CAACTGCTTG GACGCCTTCC 
+
+    2651  CAAATCTGAA GGACTTCATC TCCCGCTTTG AGGTGATGCC CCCAATCCTC 
+
+    2701  CCTTCTCTTT GATGCCCCTT GTTCCGTTAC CTCCTTTCAG ATGCTTTCCC 
+
+    2751  ATGCCTGGAG CTACACACAG AATAACTCGC ATGTATTGAG TACTGGTTTC 
+
+    2801  ATGCCACGAA CCGTACCCCA GCACATTATA CCTATTGTGT GAAATTTGAA 
+
+    2851  TTTTATAACA TTCCAGTAAG GTAACAGAAT TATCTCGCCC ATTTTAGAGA 
+
+    2901  TAAGGAAACT AAGAATGAGA GGGTCGGTCC TCTGCTCAGG GTCCCAGAGC 
+
+    2951  TAGTGGAGGC AGTGCTGGGC CCCTGTGAGC CTCTGGATCT ATGGGTGGCA 
+
+    3001  GTCAGGCTCT CCCATTCGAC AGAGAAAAAG CCTTAGCGTT CACCTAGCCT 
+
+    3051  GGGTTTCACA GCCCAGGACA CTTTGGAAGA GGCAGAGAAC TTCATGACCA 
+
+    3101  TAGATGGAGC TGGCAATAGT AGGACTGACA CAACGGTGAC ATTGATGTCT 
+
+    3151  AGTACTGAAC CCACAGGCAA TCTCATAGCT ACCTCCAGAA GCTTTGCATG 
+
+    3201  ATTGGACCCC AGTGTGGGAA TCCTGAGAGC CAGGGCTGTG GCTGTAGCTG 
+
+    3251  GATTAAGGTA CATATGTGGG TGTCCCTGTT GAAGGAGTAT ATGTTGAAAT 
+
+    3301  GCCCGGTGCT GGGGCACTTA CTTACTCCAC CACTATCTTT TTTTTTTTTT 
+
+    3351  TTTTTTTTTT TTTGTGCTGG AGTCTTGCTC TGTTGCCCAG GCTGGAGTTC 
+
+    3401  AATGGAGTGA TCTTGGCTCA CTGCAACCTC CGCCTCCTGG GTTCAAGCGA 
+
+    3451  TTCTACTGCC TCAGCTGCAC GATTAGTTGG GATTACAGGT GTGCACCACC 
+
+    3501  ACGTCTGGCT AATTTTTGTA TTTTTAGTAG AGATGGGGTT TTGCCATGTT 
+
+    3551  GGTCAGGCTG GTCTTCGAAC TCCTGACCTC AGGTGATCTA CCCACATCAG 
+
+    3601  CCTCCCTCAG ATCGTGTCTT GCTGTTGCCC AGGCTGGAGC AGCAGTTGCG 
+
+    3651  TGACCTCGGA CTTACTGCAA CCTCTGCTCC CGGGTTCAAA CAATTCTCTG 
+
+    3701  CCTCAGCCTC CCGAGTAGCT GGGAATTACA AGTGTCTATC ACCACGCCCA 
+
+    3751  GCTAATTTTT CTATTTTTAG TAGAGATGGG CTTTTCACCA TGTTGGCCAG 
+
+    3801  GTGGTCTTGA ACTCCTGACC TCGGTGATCC ACCCACCTCG GCTTCCCACA 
+
+    3851  TCTGAGTGTC ATGTAGCCTG ATCTGCAGCA GGGCTGTAGA TGCCATGGGT 
+
+    3901  TAGGGCACAG TGAGATTTTG CTCAGGTATT AGATGGAGAA CTTTGGACTT 
+
+    3951  TCTGCTTTAA GGGGAATGTT TAGAGCCTAG TCTCgTTTGA TTTTCTTGTG 
+
+    4001  CACTGCCACC CCCCATTCCA CTTTCATCCA GGTTTACTGA GACATTGGGG 
+
+    4051  TGAGTGTGTT CAGAGCCCCT TTGTTCTGCT GCAGGTCCCT TCTGTGTCTC 
+
+    4101  TATACCCAGA CAAGCCAAGA GCCTCCCTGT GGAAAAGGAG ACTGTTTGTG 
+
+    4151  CAGTCAAGGA GTGACAGGGC CTGGTGTGAG GGGTGGTGGG GCAGAAGAAG 
+
+    4201  AAGAGAATTT GTCAGGAAGA GGCCAGAACT GGAGAGAGAC AGAACCAGGC 
+
+    4251  TACACYGCAA GTTCTATTCC CCTTACAAGG TATCTAAACG TAAGGAAGTT 
+
+    4301  GCTGAACTTC TGTTCCACAT GAGAATGGTG ATAATAGATT CAGCCTTGCA 
+
+    4351  GAGCAGTCGA GTGGTTTTCT AAGCTTACGT TGTAATTTGT GTTGGTACAG 
+
+    4401  AGCACCCAGC ACCGTGTAGA ATCTTCGTAA GTGTTAGCTG TTACTGTGGT 
+
+    4451  ACAACATTAC CTAAAGGAAG TTGGAAGAGT TAACTCAGCA AATCTGGGGA 
+
+    4501  CCCTAAGAAG CTGTGTGATG CCTCAGCACT TGAGCCCACA TGGAAAGGCT 
+
+    4551  GTGCCAGGGC CCTGACCTGC TGTGTCTGCA GTGGGGTTGT CCCACCGCTC 
+
+    4601  ATGGGCAGCT GACCTTGAGT TCTGGCCTTA TTTTCCCCCC TCTCAGGGCT 
+
+    4651  TGGAGAAGAT CTCTGCCTAC ATGAAGTCCA GCCGCTTCCT CCCAAGACCT 
+
+    4701  GTGTTCTCAA AGATGGCTGT CTGGGGCAAC AAGTAGGGCC TTGAAGGCAG 
+
+    4751  GAGGTGGGAG TGAGGAGCCC ATACTCAGCC TGCTGCCCAG GCTGTGCAGC 
+
+    4801  GCAGCTGGAC TCTGCATCCC AGCACCTGCC TCCTCGTTCC TTTCTCCTGT 
+
+    4851  TTATTCCCAT CTTTACTCCC AAGACTTCAT TGTCCCTCTT CACTCCCCCT 
+
+    4901  AAACCCCTGT CCCATGCAGG CCCTTTGAAG CCTCAGCTAC CCACTATCCT 
+
+    4951  TCGTGAACAT CCCCTCCCAT CATTACCCTT CCCTGCACTA AAGCCAGCCT 
+
+    5001  GACCTTCCTT CCTGTTAGTG GTTGTGTCTG CTTTAAAGCC TGCCTGGCCC 
+
+    5051  CTCGCCTGTG GAGCTCAGCC CCGAGCTGTC CCCGTGTTGC ATGAAGGAGC 
+
+    5101  AGCATTGACT GGTTTACAGG CCCTGCTCCT GCAGCATGGT CCCTGCCTAG 
+
+    5151  GCCTACCTGA TGGAAGTAAA GCCTCAACCA CAc
+
diff --git a/seq/hsgstm1b.seq b/seq/hsgstm1b.seq
new file mode 100644
index 0000000..8a7123c
--- /dev/null
+++ b/seq/hsgstm1b.seq
@@ -0,0 +1,40 @@
+>gi|31932|emb|X68676|HSGSTM1B H.sapiens GSTM1b gene for glutathione S-transferase
+ATGCCCATGATACTGGGGTACTGGGACATCCGTGGGGTAAGCGAGGGTCCTCTGGTGGGTGGGACAGGGG
+GCGGAGGCGGGGATGTGTGGAGTAGCTGCAGGACTGGCTCTAGGGACCGTTCCTCTTCAGGGCTGCCCGC
+CTCAGAAGGGCCTGTGCATGACGCTGTGTGTGTGTTTGGGGGTGGGGGCGGGTAGAGGAGGCGACGGGTA
+CGTGCAGTATAGACTAGGGCTGGCCTGGTGCAGAGAAAGTCACCAAGTCAGGGACCCTCCATCTCTGACC
+CGAGCCGCGGCCATCTCTCCCAGCTGGCCCACGCCATCCGCCTGCTCCTGGAATACACAGACTCAAGCTA
+CGAGGAAAAGAAGTATACGATGGGGGACGGTAATGACACCCTTGTGTCCGGGCTCTGCACTCACGCTGAG
+TTGGCACCAAGCAACCCATGGTGGCCACCTGTCGTACCTCTGCAGGCCTCCCCTGCTGGAGCTGCAGGCT
+GTCCCTTCCCTGAGCCCCGGTGAGGAGTCCTGTGGCCTTGCAAGGCAGAATGCTGGGGCGGGATAGTGGG
+TCCCTGTTTAATTGGGTTGGGTGTCCTCAGAGCTTCCCAAACCCTGGAAGCCTTAGCCGTGTGGGGTCCA
+GAGCCTCAGCGGGATTATTTGTCCCTGAACCCTGGGATGTGGGACTGAGTGGTCAGATTCTAGATCCACC
+TGTCTCAGGGATCTTGCCACTGGTTCCTTGGGAGGGTCCCCGGAAGGAGGGCTGGGCTCTGGGGAGGTTT
+GTTTTCACTTCTTCTTCCCCACGGCAGCTCCTGACTATGACAGAACGCAGTGGCTGAATGAAAAATTCAA
+GCTGGGCCTGGACTTTCCCAATGTAGGTGCAGGGGGAAGGGGCGGTTTTGGGGGAAAGTGCGACGTGTCT
+CTGACTGCATCTCCTCTCCCCAGATTAGAGGTGTTCGGATCAGGAGTCTTCTGCCCAATTCCTGGTTGTC
+TACACAGCCCCTGCATGATGTTCTGTGTCCCAGCTCATTTGTTCATGTGACAGTATTTCTATGTCAGGCC
+TGCATGAGCGGGCACAGTGAGTCTGGTCTCCCCTTGCATATAGGAAGGGGATGCTGGGGAGCCTGCTGGC
+CCCAACTGAGCTTCCCCGGTTTCCCATCTATCCAGCTGCCCTACTTGATTGATGGGGCTCACAAGATCAC
+CCAGAGCAACGCCATCTTGTGCTACATTGCCCGCAAGCACAACCTGTGTGAGTGTGGGTGGCTGCAATGT
+GTGGGGGGAAGGTGGCCTCCTCCTTGGCTGGGCTGTGATGCTGAGATTGAGTCTGTGTTTTGTGGGTGGC
+AGGTGGGGAGACAGAAGAGGAGAAGATTCGTGTGGACATTTTGGAGAACCAGACCATGGACAACCATATG
+CAGCTGGGCATGATCTGCTACAATCCAGAATTTGTGAGTGTCCCCAGTGAGCTGCATCTGACAGAGTTTG
+GATTTGGGGCCAGGACTCTTGCATCCTGCACACATTGGTCTTAAGTCCCTGGTACCATTCATCCTCCAAG
+TGCTTTCCCATCATCTAGCAGTATCTCTACGACTCCAATGTCATGTCAACAAAAGCAGAGGCAATTCCCA
+ACCAACCTTAGGACACGATTCCAGGCATTCCCAGGGTAGAAATTTCAGTTCCTGTATGGTAAAGTTTGTG
+TTCAGAATCTCCTTCATCAGCTCTGGCCTCTGACTTCTGTCCTGGGTCATTTCTGTCAGCCAGTTCACAT
+CACCTGCCTGCTCCTAGAATATGCAGACTCAAGTAGAAGACTCAGGAATGTAATGGCACCCTCGAATTGC
+ATCTTCTCCTCAACAGTTTTCTGAGTGCTGTCATTGACATGCACAGGGATCTGCGCATCTTCATAACAGA
+CAGCTCAGAGGCAGTCAGAGGGCCTTTATTCCTCTCCCTCCTTCCTTTCAACTTGAACTTCTCATCTCCC
+TGGAAACTAGTCAACGTTCATTGTTTTCTTCTGCCACCCCATTAGAAGGAACTTTCTACTTTCCCTGAGC
+TCCCTTAGTTCTTTGCATCCTTGATTCTGCTGGTCTGGATCCAGAGGCTGCCAGGTGCTTGGGCGCTCCT
+GGGGCTGACCCAGAGGCTATTGGGAGGTCAGTGAGGACAGATTCAGGGACAGCATCTCATTCCTCTCTGC
+CTTCTGATCAGTTTAGATAGGGTCTGACACTCAGTCAGAGTCTAAAATGCTGAGTATCCAATTGAAGCCT
+GCACTGCCCCAGTTCCAGACTTGGGGAAGATGGCTGCTTGCCCGTGCCAGCCTGGCCGTCCACAGCCCCG
+GGGAGGCCACGTCTGTGCAGGGAGCTTTTGTCCGAGGGTGGTGACAGCTGTTTTCTGCCTCAGGAGAAAC
+TGAAGCCAAAGTACTTGGAGGAACTCCCTGAAAAGCTAAAGCTCTACTCAGAGTTTCTGGGGAAGCGGCC
+ATGGTTTGCAGGAAACAAGGTAAAGGAGGAGTGATATGGGGAATGAGATCTGTTTTGCTTCACGTGTTAT
+GGAGGTTCCAGCCCACACATTCTTGGCCTTCTGCAGATCACTTTTGTAGATTTTCTCGTCTATGATGTCC
+TTGACCTCCACCGTATATTTGAGCCCAACTGCTTGGACGCCTTCCCAAATCTGAAGGACTTCATCTCCCG
+CTTTGAG
diff --git a/seq/humgstd.seq b/seq/humgstd.seq
new file mode 100644
index 0000000..5072afb
--- /dev/null
+++ b/seq/humgstd.seq
@@ -0,0 +1,20 @@
+>HUMGSTD Human glutathione transferase class mu (GST1) mRNA, complete cds.
+ GCACCAACCA GCACCATGCC CATGATACTG GGGTACTGGG ACATCCGCGG GCTGGCCCAC
+ GCCATCCGCC TGCTCCTGGA ATACACAGAC TCAAGCTATG AGGAAAAGAA GTACACGATG
+ GGGGACGCTC CTGATTATGA CAGAAGCCAG TGGCTGAATG AAAAATTCAA GCTGGGCCTG
+ GACTTTCCCA ATCTGCCCTA CTTGATTGAT GGGGCTCACA AGATCACCCA GAGCAACGCC
+ ATCTTGTGCT ACATTGCCCG CAAGCACAAC CTGTGTGGGG AGACAGAAGA GGAGAAGATT
+ CGTGTGGACA TTTTGGAGAA CCAGACCATG GACAACCATA TGCAGCTGGG CATGATCTGC
+ TACAATCCAG AATTTGAGAA ACTGAAGCCA AAGTACTTGG AGGAACTCCC TGAAAAGCTA
+ AAGCTCTACT CAGAGTTTCT GGGGAAGCGG CCATGGTTTG CAGGAAACAA GATCACTTTT
+ GTAGATTTTC TCGTCTATGA TGTCCTTGAC CTCCACCGTA TATTTGAGCC CAACTGCTTG
+ GACGCCTTCC CAAATCTGAA GGACTTCATC TCCCGCTTTG AGGGCTTGGA GAAGATCTCT
+ GCCTACATGA AGTCCAGCCG CTTCCTCCCA AGACCTGTGT TCTCAAAGAT GGCTGTCTGG
+ GGCAACAAGT AGGGCCTTGA AGGCAGGAGG TGGGAGTGAG GAGCCCATAC TCAGCCTGCT
+ GCCCAGGCTG TGCAGCGCAG CTGGACTCTG CATCCCAGCA CCTGCCTCCT CGTTCCTTTC
+ TCCTGTTTAT TCCCATCTTT ACTCCCAAGA CTTCATTGTC CCTCTTCACT CCCCCTAAAC
+ CCCTGTCCCA TGCAGGCCCT TTGAAGCCTC AGCTACCCAC TATCCTTCGT GAACATCCCC
+ TCCCATCATT ACCCTTCCCT GCACTAAAGC CAGCCTGACC TTCCTTCCTG TTAGTGGTTG
+ TGTCTGCTTT AAAGCCTGCC TGGCCCCTCG CCTGTGGAGC TCAGCCCCGA GCTGTCCCCG
+ TGTTGCATGA AGGAGCAGCA TTGACTGGTT TACAGGCCCT GCTCCTGCAG CATGGTCCCT
+ GCCTAGGCCT ACCTGATGGA AGTAAAGCCT CAACCAC
diff --git a/seq/lcbo.aa b/seq/lcbo.aa
new file mode 100644
index 0000000..bd7477b
--- /dev/null
+++ b/seq/lcbo.aa
@@ -0,0 +1,5 @@
+>LCBO - Prolactin precursor - Bovine
+MDSKGSSQKGSRLLLLLVVSNLLLCQGVVSTPVCPNGPGNCQVSLRDLFDRAVMVSHYIHDLSS
+EMFNEFDKRYAQGKGFITMALNSCHTSSLPTPEDKEQAQQTHHEVLMSLILGLLRSWNDPLYHL
+VTEVRGMKGAPDAILSRAIEIEEENKRLLEGMEMIFGQVIPGAKETEPYPVWSGLPSLQTKDED
+ARYSAFYNLLHCLRRDSSKIDTYLKLLNCRIIYNNNC*
diff --git a/seq/m1r.aa b/seq/m1r.aa
new file mode 100644
index 0000000..5beac76
--- /dev/null
+++ b/seq/m1r.aa
@@ -0,0 +1,5 @@
+>test | 40001 90043 | mgstm1
+MGCEN,
+MIDYP,
+MLLAY,
+MLLGY
diff --git a/seq/m2.aa b/seq/m2.aa
new file mode 100644
index 0000000..89b65f7
--- /dev/null
+++ b/seq/m2.aa
@@ -0,0 +1,5 @@
+>tests from mgstm1
+MILGYW,
+MLLEYT,
+MGDAPD,
+MLCYNP
diff --git a/seq/mchu.aa b/seq/mchu.aa
new file mode 100644
index 0000000..d6ca8e5
--- /dev/null
+++ b/seq/mchu.aa
@@ -0,0 +1,3 @@
+>sp|P62158|CALM_HUMAN Calmodulin; CaM
+MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTIDFPEFLTMMARKMKDT
+DSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREADIDGDGQVNYEEFVQMMTAK
diff --git a/seq/mgstm1.3nt b/seq/mgstm1.3nt
new file mode 100644
index 0000000..ad02f34
--- /dev/null
+++ b/seq/mgstm1.3nt
@@ -0,0 +1,60 @@
+>pGT875 | 266
+ATGCCTATGATACTGGGATACTGGGTCCGCGGACTGACACACCCGATCCGCATGCTC
+CTGGAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
+TTTGACAGAAGCCAGTGGCTGAAATGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCT
+GCCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCT
+TGCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGT
+GGAGAACCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTT
+TGAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGA
+GTTCCTGGGCAAGAGGCCATGGTTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTG
+CTTATGACATTCTTGACCAGTACCGTATGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAA
+ACCTGAGGGACTTCCTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGA
+GTAGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCACTGGAGTAACAAGTAGG
+CCCTTGCTACACGGGCACTCACTAGGAGGACCTGTCCACACTGGGGATCCTGCAGGCCCT
+GGGTGGGGACAGCACCCTGGCCTTCTGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCT
+CCCTTCTGCAGCTTGGTCAGCCCCATCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGC
+CTTCATTCTCCCCAGTTTCTTTCACATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCT
+CACAGCCCGTTTCTGCGAACTGAGGTCTGTCCTGAACTCACGCTTCCTAGAATTACCCCG
+ATGGTCAACACTATCTTAGTGCTAGCCCTCCCTAGAGTTACCCCGAAGTCAATACTTGAG
+TGCCAGCCTGTTCCTGGTGGAGTAGCCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTG
+AAACACACTT
+NNNNNNNNNN
+GCTGAAGCCAGTTTGAGAAGACCACAGCACCAGCACCATGCCTATGATACTGGGATACTG
+GAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACTCAAGCTA
+TGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTTGACAGAAGCCAGTGGCTGAA
+TGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCTGCCTTACTTGATCGATGGATCACA
+CAAGATCACCCAGAGCAATGCCATCCTGCGCTACCTTGCCCGAAAGCACCACCTGGATGG
+AGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGTGGAGAACCAGGTCATGGACACCCG
+CATGCAGCtCATCATGCTCTGTTACAACCCTGACTTTGAGAAGCAGAAGCCAGAGTTCTT
+GAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGAGTTCCTGGGCAAGAGGCCATGGTT
+TGCAGGGGACAAGGTCACCTATGTGGATTTCCTTGCTTATGACATTCTTGACCAGTACCG
+TATgTTTGAGCCCAAGTGCCTGGACGCCTTCCCAAACCTGAGGGACTTCCTGGCCCGCTT
+CGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGAGTAGCCGCTACATCGCAACACCTAT
+ATTTTCAAAGATGGCCCACTGGAGTAACAAGTAGGCCCTTGCTACACGGGCACTCACTAG
+GAGGACCTGTCCACACTGGGgATCCTGCAGGCCCTGGGTGGGGACAGCACCCTGGCCTTC
+TGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCTCCCTTCTGCAGCTTGGTCAGCCCCA
+TCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGCCTTCATTCTCCCCAGTTTCTTTCAC
+ATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCTCACAGCCCGTTTCTGCGAACTGAGG
+TCTGTCCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACACTATCTTAGTGCTAG
+CCCTCCCTAGAGTTACCCCGAAGTCAATACTTGAGTGCCAGCCTGTTCCTGGTGGAGTAG
+CCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTGAAACACACTT
+NNNNNNNNNN
+GCTGAAGCCTAGTTTGAGAAGACCACCAGCACCACCACCATGCCTATGATATGGGATACTG
+GAAAGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACCCAAGTTA
+TGATGAGAAGAGATACACTATGGGTGACGGCTCCCGACTTTGACAGACAGTGGCTGA
+ATGAGAAGNTTCAAGCTGGGCCTGGAATTTCCCTAATCTGCCTTACTTGATCGATGGATCA
+CACAAGATCACCCAGAGAATGCCATCCTGCGCTACCTGGCCACAAAGCCCACCTGGAGGA
+GATGACAGAGGAGGAGAGGATCCGTGCAGACATTGTGGAGAACCAGATAGCATGGAAACC
+CGCTGCAGCNNNNCATGCTCTCGTTACAACCTTGACTTTGAGAAGCAGAAGCCAGAGTTC
+TTGAAGACCATCCCTGAGAAAATGAGCTCTACTCTGAGTTCCTGGGATGCAAGAGGCCATGGT
+TTGCATGGGACAAGTGTCACCTATGTGGATTTCTTTGCTTATGACATTCTTGACCAGTAC
+CGTATGTTTGAGCCAAGTGCCTGGACGCCTTCCCAAACCTGAGGTGACTTCCTGGCCCGC
+TTCGAGGGCCTCAAGAAGATCTCTGCTCTACATGAAGAGTAGCCGGTACATCGGCACAGC
+TCATATTTACAAAGATGGCCCACTGGAGTAACAAGCAGGCCCTTGCTACACGGCACTCAC
+TAGGAGGACCTGTCCNNACTGGTGGCTCCTGCAGTCCCTGTGTGGGGACAAGCACCCTGG
+CCTTCTGCACTGTGGCTCCTGGTTCCTCTCCTCCCGCTCCCTTCTGCAGTTGGTCAGCCC
+CATCTCCTCACCCTCTTCCCAGTCAAGGCCACACGCCTTCATTCGTCCCCGTCTTCTTTC
+ACATGGCCTCCTTCTTCGATTGGCTCCCTGACCCACACCTCACAGCCCGTTTCTGCGAAC
+TGAGGTCTGTCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACCACTATCTTAGT
+GCTAGCCCTGCCCTAGAGTTACCCGAAGTCAATACTTGAAGTGCCAGCCTGCTTCCTGGT
+GGTAGTAGCCTCCCCAGGTCGGTCTCGTCTACAATAAAGTCATGAAACACACT
diff --git a/seq/mgstm1.aa b/seq/mgstm1.aa
new file mode 100644
index 0000000..7c7f121
--- /dev/null
+++ b/seq/mgstm1.aa
@@ -0,0 +1,5 @@
+>sp|P10649|GSTM1_MOUSE Glutathione S-transferase Mu 1; GST 1-1; GST class-mu 1; Glutathione S-transferase GT8.7; pmGT10
+MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKLGLDFPNL
+PYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIVENQVMDTRMQLIMLCYNPDF
+EKQKPEFLKTIPEKMKLYSEFLGKRPWFAGDKVTYVDFLAYDILDQYRMFEPKCLDAFPN
+LRDFLARFEGLKKISAYMKSSRYIATPIFSKMAHWSNK
diff --git a/seq/mgstm1.aaa b/seq/mgstm1.aaa
new file mode 100644
index 0000000..5a9fef5
--- /dev/null
+++ b/seq/mgstm1.aaa
@@ -0,0 +1,8 @@
+>GT8.7 | 266 40001 90043 | transl. of pa875.con, 19 to 675
+MPMILGY at WNVRGLT#HPIRMLLEY at T#DS*S*Y at DEKR
+Y at T#MGDAPDFDRS*QWLNEKFKLGLDFPNLPY at LI
+DGS*HKIT#QSNAILRY at LARKHHLDGET#EEERIR
+ADIVENQVMDT#RMQLIMLCY at NPDFEKQKPEFL
+KT#IPEKMKLY at SEFLGKRPWFAGDKVT#Y at VDFLA
+Y at DILDQY@RMFEPKCLDAFPNLRDFLARFEGLK
+KISAY at MKSSRY@IAT#PIFSKMAHWSNK
diff --git a/seq/mgstm1.e05 b/seq/mgstm1.e05
new file mode 100644
index 0000000..bddfc21
--- /dev/null
+++ b/seq/mgstm1.e05
@@ -0,0 +1,20 @@
+>pGT875 | 266 with an average of 5% of residues modified by mutr.
+GCTGAAGCCTAGTTTGAGAAGACCACCAGCACCACCACCATGCCTATGATATGGGATACTG
+GAAAGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACCCAAGTTA
+TGATGAGAAGAGATACACTATGGGTGACGGCTCCCGACTTTGACAGACAGTGGCTGA
+ATGAGAAGNTTCAAGCTGGGCCTGGAATTTCCCTAATCTGCCTTACTTGATCGATGGATCA
+CACAAGATCACCCAGAGAATGCCATCCTGCGCTACCTGGCCACAAAGCCCACCTGGAGGA
+GATGACAGAGGAGGAGAGGATCCGTGCAGACATTGTGGAGAACCAGATAGCATGGAAACC
+CGCTGCAGCNNNNCATGCTCTCGTTACAACCTTGACTTTGAGAAGCAGAAGCCAGAGTTC
+TTGAAGACCATCCCTGAGAAAATGAGCTCTACTCTGAGTTCCTGGGATGCAAGAGGCCATGGT
+TTGCATGGGACAAGTGTCACCTATGTGGATTTCTTTGCTTATGACATTCTTGACCAGTAC
+CGTATGTTTGAGCCAAGTGCCTGGACGCCTTCCCAAACCTGAGGTGACTTCCTGGCCCGC
+TTCGAGGGCCTCAAGAAGATCTCTGCTCTACATGAAGAGTAGCCGGTACATCGGCACAGC
+TCATATTTACAAAGATGGCCCACTGGAGTAACAAGCAGGCCCTTGCTACACGGCACTCAC
+TAGGAGGACCTGTCCNNACTGGTGGCTCCTGCAGTCCCTGTGTGGGGACAAGCACCCTGG
+CCTTCTGCACTGTGGCTCCTGGTTCCTCTCCTCCCGCTCCCTTCTGCAGTTGGTCAGCCC
+CATCTCCTCACCCTCTTCCCAGTCAAGGCCACACGCCTTCATTCGTCCCCGTCTTCTTTC
+ACATGGCCTCCTTCTTCGATTGGCTCCCTGACCCACACCTCACAGCCCGTTTCTGCGAAC
+TGAGGTCTGTCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACCACTATCTTAGT
+GCTAGCCCTGCCCTAGAGTTACCCGAAGTCAATACTTGAAGTGCCAGCCTGCTTCCTGGT
+GGTAGTAGCCTCCCCAGGTCGGTCTCGTCTACAATAAAGTCATGAAACACACT
diff --git a/seq/mgstm1.eeq b/seq/mgstm1.eeq
new file mode 100644
index 0000000..3413d53
--- /dev/null
+++ b/seq/mgstm1.eeq
@@ -0,0 +1,20 @@
+>mgstm1 | 266
+ATGCCTATGATACTGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTC
+CTGGAATACACAGACTCAAGCTATAGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
+TTTGACAGAAGCCAGTGGCTGAAATGAGAAGTTCAAGCCTGGGCCTGGACTTTCCCAATCT
+GCCTTACTTATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCT
+TGCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGACCGTGCAGACATTGT
+GGAGAAGGCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTT
+TGAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGA
+GTTCCTGGCAAGAGGCCATGGTTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTG
+CTTATGACATTCTTGACCAGTACCGTTGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAA
+ACCTGAGGGACTTCCTTTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGA
+GTAGCCGCTACATCGCAACACCTATATTTTCAAAGATCCCACTGGAGTAACAAGTAGG
+CCCTTGCTACACGGGCACACTCACTAGGAGGACCTGTCCACACTGGGGATCCTGCAGGCCCT
+GGGTGGGGACAGCACCCTGGCCTTCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCT
+CCCTTCTGCAGCTTGTTTGTCAGCCCCATCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGC
+CTTCATTCTCCCCAGTTTCTTTCACATGGCCCCTTCTTCTTGGCTCCTGACCCAACCT
+CACAGCCCGTTTCTGCGAATGAGGTCTGTCCTGAACTCACGCTTCCTAGAATTACCCCG
+ATGGTCAACACTATCTTAGTGCTAGCACCTCCCTAGAGTTACCCCGAAGTCAATACTTGAG
+TGCCAGCCTGTTCCTGGTGGAGTAGCCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTGC
+AAACACACTT
diff --git a/seq/mgstm1.esq b/seq/mgstm1.esq
new file mode 100644
index 0000000..056e881
--- /dev/null
+++ b/seq/mgstm1.esq
@@ -0,0 +1,20 @@
+>mgstm1e 
+ATGCCTATGATACTGGGATACTGGGTCCGCGGACTGACACACCCGATCCGCATGCTC
+CTGGAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
+TTTGACAGAAGCCAGTGGCTGAAATGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCT
+GCCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCT
+TGCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGT
+GGAGAACCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTT
+TGAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGA
+GTTCCTGGGCAAGAGGCCATGGTTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTG
+CTTATGACATTCTTGACCAGTACCGTATGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAA
+ACCTGAGGGACTTCCTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGA
+GTAGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCACTGGAGTAACAAGTAGG
+CCCTTGCTACACGGGCACTCACTAGGAGGACCTGTCCACACTGGGGATCCTGCAGGCCCT
+GGGTGGGGACAGCACCCTGGCCTTCTGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCT
+CCCTTCTGCAGCTTGGTCAGCCCCATCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGC
+CTTCATTCTCCCCAGTTTCTTTCACATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCT
+CACAGCCCGTTTCTGCGAACTGAGGTCTGTCCTGAACTCACGCTTCCTAGAATTACCCCG
+ATGGTCAACACTATCTTAGTGCTAGCCCTCCCTAGAGTTACCCCGAAGTCAATACTTGAG
+TGCCAGCCTGTTCCTGGTGGAGTAGCCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTG
+AAACACACTT
diff --git a/seq/mgstm1.gcg b/seq/mgstm1.gcg
new file mode 100644
index 0000000..8d75321
--- /dev/null
+++ b/seq/mgstm1.gcg
@@ -0,0 +1,13 @@
+GT8.7 transl. of pa875.con, 19 to 675
+    gt87  Length: 217  July 31, 1996 19:51  Type: P  Check: 9358  ..
+
+       1  PMILGYWNVR GLTHPIRMLL EYTDSSYDEK RYTMGDAPDF DRSQWLNEKF 
+
+      51  KLGLDFPNLP YLIDGSHKIT QSNAILRYLA RKHHLDGETE EERIRADIVE 
+
+     101  NQVMDTRMQL IMLCYNPDFE KQKPEFLKTI PEKMKLYSEF LGKRPWFAGD 
+
+     151  KVTYVDFLAY DILDQYRMFE PKCLDAFPNL RDFLARFEGL KKISAYMKSS 
+
+     201  RYIATPIFSK MAHWSNK
+
diff --git a/seq/mgstm1.lc b/seq/mgstm1.lc
new file mode 100644
index 0000000..b6ac6b2
--- /dev/null
+++ b/seq/mgstm1.lc
@@ -0,0 +1,8 @@
+>GT8.7 | 40001 ! 90043 | transl. of pa875.con, 19 to 675
+MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKR
+ytmgdapdfdrsqwlnekfklgldfpnlpyli
+DGSHKITQSNAILRYLARKHHLDGETEEERIR
+adivenqvmdtrmqlimlcynpdfekqkpefl
+KTIPEKMKLYSEFLGKRPWFAGDKVTYVDFLA
+ydildqyrmfepkcldafpnlrdflarfeglk
+KISAYMKSSRYIATPIFSKMAHWSNK
diff --git a/seq/mgstm1.nt b/seq/mgstm1.nt
new file mode 100644
index 0000000..3f266a3
--- /dev/null
+++ b/seq/mgstm1.nt
@@ -0,0 +1,12 @@
+>pGT875 
+ATGCCTATGATACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTC
+CTGGAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
+TTTGACAGAAGCCAGTGGCTGAATGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCTG
+CCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCTT
+GCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGTG
+GAGAACCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTTT
+GAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGAG
+TTCCTGGGCAAGAGGCCATGGTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTGCT
+TATGACATTCTTGACCAGTACCGTATGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAAAC
+CTGAGGGACTTCCTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGAGT
+AGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCACTGGAGTAACAAGTAG
diff --git a/seq/mgstm1.nt1 b/seq/mgstm1.nt1
new file mode 100644
index 0000000..db5e404
--- /dev/null
+++ b/seq/mgstm1.nt1
@@ -0,0 +1,12 @@
+>pGT875 | 266
+ATGCCTATGATACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTC
+CTGGAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
+TTTGACAGAAGCCAGTGGCTGAATGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCTG
+CCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCTT
+GCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGTG
+GAGAACCAGGTCATGGACACCCGCATGCAGCtCATCATGCTCTGTTACAACCCTGACTTT
+GAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGAG
+TTCCTGGGCAAGAGGCCATGGTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTGCT
+TATGACATTCTTGACCAGTACCGTATgTTTGAGCCCAAGTGCCTGGACGCCTTCCCAAAC
+CTGAGGGACTTCCTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGAGT
+AGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCACTGGAGTAACAAGTAG
diff --git a/seq/mgstm1.nt12r b/seq/mgstm1.nt12r
new file mode 100644
index 0000000..f4cf7c5
--- /dev/null
+++ b/seq/mgstm1.nt12r
@@ -0,0 +1,26 @@
+>rev-comp
+CTACTTGTTACTCCAGTGGGCCATCTTTGAAAATATAGGTGTTGCGATGTAGCGG
+CTACTCTTCATGTAGGCAGAGATCTTCTTGAGGCCCTCGAAGCGGGCCAGGAAGTCCCTC
+AGGTTTGGGAAGGCGTCCAGGCACTTGGGCTCAAAcATACGGTACTGGTCAAGAATGTCA
+TAAGCAAGGAAATCCACATAGGTGACCTTGTCCCCTGCAAACCATGGCCTCTTGCCCAGG
+AACTCAGAGTAGAGCTTCATTTTCTCAGGGATGGTCTTCAAGAACTCTGGCTTCTGCTTC
+TCAAAGTCAGGGTTGTAACAGAGCATGATGaGCTGCATGCGGGTGTCCATGACCTGGTTC
+TCCACAATGTCTGCACGGATCCTCTCCTCCTCTGTCTCTCCATCCAGGTGGTGCTTTCGG
+GCAAGGTAGCGCAGGATGGCATTGCTCTGGGTGATCTTGTGTGATCCATCGATCAAGTAA
+GGCAGATTGGGAAAGTCCAGGCCCAGCTTGAACTTCTCATTCAGCCACTGGCTTCTGTCA
+AAGTCGGGAGCGTCACCCATGGTGTATCTCTTCTCATCATAGCTTGAGTCTGTGTATTCC
+AGGAGCATGCGGATCGGGTGTGTCAGTCCGCGGACGTTCCAGTATCCCAGTATCATAGGC
+AT
+NNN
+CTACTTGTTACTCCAGTGGGCCATCTTTGAAAATATAGGTGTTGCGATGTAGCGG
+CTACTCTTCATGTAGGCAGAGATCTTCTTGAGGCCCTCGAAGCGGGCCAGGAAGTCCCTC
+AGGTTTGGGAAGGCGTCCAGGCACTTGGGCTCAAAcATACGGTACTGGTCAAGAATGTCA
+TAAGCAAGGAAATCCACATAGGTGACCTTGTCCCCTGCAAACCATGGCCTCTTGCCCAGG
+AACTCAGAGTAGAGCTTCATTTTCTCAGGGATGGTCTTCAAGAACTCTGGCTTCTGCTTC
+TCAAAGTCAGGGTTGTAACAGAGCATGATGaGCTGCATGCGGGTGTCCATGACCTGGTTC
+TCCACAATGTCTGCACGGATCCTCTCCTCCTCTGTCTCTCCATCCAGGTGGTGCTTTCGG
+GCAAGGTAGCGCAGGATGGCATTGCTCTGGGTGATCTTGTGTGATCCATCGATCAAGTAA
+GGCAGATTGGGAAAGTCCAGGCCCAGCTTGAACTTCTCATTCAGCCACTGGCTTCTGTCA
+AAGTCGGGAGCGTCACCCATGGTGTATCTCTTCTCATCATAGCTTGAGTCTGTGTATTCC
+AGGAGCATGCGGATCGGGTGTGTCAGTCCGCGGACGTTCCAGTATCCCAGTATCATAGGC
+AT
diff --git a/seq/mgstm1.nt13 b/seq/mgstm1.nt13
new file mode 100644
index 0000000..4c6527e
--- /dev/null
+++ b/seq/mgstm1.nt13
@@ -0,0 +1,36 @@
+>pGT875 | 266
+ATGCCTATGATACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCGCATGCTC
+CTGGAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
+TTTGACAGAAGCCAGTGGCTGAATGAGAAGTTCAAGCTGGGCGACTTTCCCAATCTG
+CCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCTT
+GCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGTG
+GAGAACCAGGTCATGGACACCCGCATGCAGCtCATCATCCGCTCTGTTACAACCCTGACTTT
+GAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGAG
+TTCCTGGGCAAGAGGCCATGGTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTGCT
+TATGACATTCTTGACCAGTACCGTATgTTTGAGCCCAAGTCTGGACGCCTTCCCAAAC
+CTGAGGGACTTCCTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGAGT
+AGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCACTGGAGTAACAAGTAG
+NNNNNNNNN
+ATGCCTATGATACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTC
+CTGGAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
+TTTGACAGAAGCCAGTGGCTGAATGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCTG
+CCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCTT
+GCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGTG
+GAGAACCAGGTCATGGACACCCGCATGCAGCtCATCATGCTCTGTTACAACCCTGACTTT
+GAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGAG
+TTCCTGGGCAAGAGGCCATGGTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTGCT
+TATGACATTCTTGACCAGTACCGTATgTTTGAGCCCAAGTGCCTGGACGCCTTCCCAAAC
+CTGAGGGACTTCCTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGAGT
+AGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCACTGGAGTAACAAGTAG
+NNNNNNNNN
+ATGCCTATGATACTGGGATACTGGGTCCGCGGACTGACACACCCGATCCGCATGCTC
+CTGGAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
+TTTGACAGAAGCCAGTGGCTGAAATGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCT
+GCCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCT
+TGCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGT
+GGAGAACCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTT
+TGAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGA
+GTTCCTGGGCAAGAGGCCATGGTTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTG
+CTTATGACATTCTTGACCAGTACCGTATGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAA
+ACCTGAGGGACTTCCTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGA
+GTAGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCACTGGAGTAACAAGTAG
diff --git a/seq/mgstm1.nt13r b/seq/mgstm1.nt13r
new file mode 100644
index 0000000..0768c03
--- /dev/null
+++ b/seq/mgstm1.nt13r
@@ -0,0 +1,35 @@
+>rev-comp
+CTACTTGTTACTCCAGTGGGCCATCTTTGAAAATATAGGTGTTGCGATGTAGCGGCTACT
+CTTCATGTAGGCAGAGATCTTCTTGAGGCCCTCGAAGCGGGCCAGGAAGTCCCTCAGGTT
+TGGGAAGGCGTCCAGGCACTTGGGCTCAAACATACGGTACTGGTCAAGAATGTCATAAGC
+AAGGAAATCCACATAGGTGACCTTGTCCCCTGCAAAACCATGGCCTCTTGCCCAGGAACT
+CAGAGTAGAGCTTCATTTTCTCAGGGATGGTCTTCAAGAACTCTGGCTTCTGCTTCTCAA
+AGTCAGGGTTGTAACAGAGCATGATGAGCTGCATGCGGGTGTCCATGACCTGGTTCTCCA
+CAATGTCTGCACGGATCCTCTCCTCCTCTGTCTCTCCATCCAGGTGGTGCTTTCGGGCAA
+GGTAGCGCAGGATGGCATTGCTCTGGGTGATCTTGTGTGATCCATCGATCAAGTAAGGCA
+GATTGGGAAAGTCCAGGCCCAGCTTGAACTTCTCATTTCAGCCACTGGCTTCTGTCAAAG
+TCGGGAGCGTCACCCATGGTGTATCTCTTCTCATCATAGCTTGAGTCTGTGTATTCCAGG
+AGCATGCGGATCGGGTGTGTCAGTCCGCGGACCCAGTATCCCAGTATCATAGGCATNNNN
+NNNNNCTACTTGTTACTCCAGTGGGCCATCTTTGAAAATATAGGTGTTGCGATGTAGCGG
+CTACTCTTCATGTAGGCAGAGATCTTCTTGAGGCCCTCGAAGCGGGCCAGGAAGTCCCTC
+AGGTTTGGGAAGGCGTCCAGGCACTTGGGCTCAAAcATACGGTACTGGTCAAGAATGTCA
+TAAGCAAGGAAATCCACATAGGTGACCTTGTCCCCTGCAAACCATGGCCTCTTGCCCAGG
+AACTCAGAGTAGAGCTTCATTTTCTCAGGGATGGTCTTCAAGAACTCTGGCTTCTGCTTC
+TCAAAGTCAGGGTTGTAACAGAGCATGATGaGCTGCATGCGGGTGTCCATGACCTGGTTC
+TCCACAATGTCTGCACGGATCCTCTCCTCCTCTGTCTCTCCATCCAGGTGGTGCTTTCGG
+GCAAGGTAGCGCAGGATGGCATTGCTCTGGGTGATCTTGTGTGATCCATCGATCAAGTAA
+GGCAGATTGGGAAAGTCCAGGCCCAGCTTGAACTTCTCATTCAGCCACTGGCTTCTGTCA
+AAGTCGGGAGCGTCACCCATGGTGTATCTCTTCTCATCATAGCTTGAGTCTGTGTATTCC
+AGGAGCATGCGGATCGGGTGTGTCAGTCCGCGGACGTTCCAGTATCCCAGTATCATAGGC
+ATNNNNNNNNNCTACTTGTTACTCCAGTGGGCCATCTTTGAAAATATAGGTGTTGCGATG
+TAGCGGCTACTCTTCATGTAGGCAGAGATCTTCTTGAGGCCCTCGAAGCGGGCCAGGAAG
+TCCCTCAGGTTTGGGAAGGCGTCCAGACTTGGGCTCAAAcATACGGTACTGGTCAAGAAT
+GTCATAAGCAAGGAAATCCACATAGGTGACCTTGTCCCCTGCAAACCATGGCCTCTTGCC
+CAGGAACTCAGAGTAGAGCTTCATTTTCTCAGGGATGGTCTTCAAGAACTCTGGCTTCTG
+CTTCTCAAAGTCAGGGTTGTAACAGAGCGGATGATGaGCTGCATGCGGGTGTCCATGACC
+TGGTTCTCCACAATGTCTGCACGGATCCTCTCCTCCTCTGTCTCTCCATCCAGGTGGTGC
+TTTCGGGCAAGGTAGCGCAGGATGGCATTGCTCTGGGTGATCTTGTGTGATCCATCGATC
+AAGTAAGGCAGATTGGGAAAGTCGCCCAGCTTGAACTTCTCATTCAGCCACTGGCTTCTG
+TCAAAGTCGGGAGCGTCACCCATGGTGTATCTCTTCTCATCATAGCTTGAGTCTGTGTAT
+TCCAGGAGCATGCGATCGGGTGTGTCAGTCCGCGGACGTTCCAGTATCCCAGTATCATAG
+GCAT
diff --git a/seq/mgstm1.nt1r b/seq/mgstm1.nt1r
new file mode 100644
index 0000000..b7f376c
--- /dev/null
+++ b/seq/mgstm1.nt1r
@@ -0,0 +1,13 @@
+>mgstm1-rev
+CTACTTGTTACTCCAGTGGGCCATCTTTGAAAATATAGGTGTTGCGATGTAGCGG
+CTACTCTTCATGTAGGCAGAGATCTTCTTGAGGCCCTCGAAGCGGGCCAGGAAGTCCCTC
+AGGTTTGGGAAGGCGTCCAGGCACTTGGGCTCAAAcATACGGTACTGGTCAAGAATGTCA
+TAAGCAAGGAAATCCACATAGGTGACCTTGTCCCCTGCAAACCATGGCCTCTTGCCCAGG
+AACTCAGAGTAGAGCTTCATTTTCTCAGGGATGGTCTTCAAGAACTCTGGCTTCTGCTTC
+TCAAAGTCAGGGTTGTAACAGAGCATGATGaGCTGCATGCGGGTGTCCATGACCTGGTTC
+TCCACAATGTCTGCACGGATCCTCTCCTCCTCTGTCTCTCCATCCAGGTGGTGCTTTCGG
+GCAAGGTAGCGCAGGATGGCATTGCTCTGGGTGATCTTGTGTGATCCATCGATCAAGTAA
+GGCAGATTGGGAAAGTCCAGGCCCAGCTTGAACTTCTCATTCAGCCACTGGCTTCTGTCA
+AAGTCGGGAGCGTCACCCATGGTGTATCTCTTCTCATCATAGCTTGAGTCTGTGTATTCC
+AGGAGCATGCGGATCGGGTGTGTCAGTCCGCGGACGTTCCAGTATCCCAGTATCATAGGC
+AT
diff --git a/seq/mgstm1.nts b/seq/mgstm1.nts
new file mode 100644
index 0000000..4d828c3
--- /dev/null
+++ b/seq/mgstm1.nts
@@ -0,0 +1,9 @@
+>mgstm1
+GCACCATGCCTATGAT,
+GATACACCA,
+CCATCCTGCGCTACCTTGCC,
+aaggtcacctatgtggatttccttgcttat,
+CCTGTCCACACTGGG,
+TCAAGTCCACACAGCC,
+TCACGCTTCCTA,
+CAATACTTGAGTGCCAGCC
diff --git a/seq/mgstm1.raa b/seq/mgstm1.raa
new file mode 100644
index 0000000..9bf8248
--- /dev/null
+++ b/seq/mgstm1.raa
@@ -0,0 +1,5 @@
+>mgstm1.aa shuffled
+LEGLPLKPCK RPQDRFSEDR VILFESFTYG FILAAWNMGY NEAEDMDRSH YLLTKELPKS
+YGGRRYYAPD FTYLFLILRN PPVKRAAPDR GNTMLQIFMA FLDDQYVMQD AFLPIGDGLK
+DKPMRSNMKY ITHNVYIDED IVRCKWIFAD EMSTPLLLWL MHKQKPGHRF LEKSWSHTRR
+EEEYNSIIDL KKSYKYLKNM AELKITSQTI FFDKDAE
diff --git a/seq/mgstm1.rev b/seq/mgstm1.rev
new file mode 100644
index 0000000..7903f42
--- /dev/null
+++ b/seq/mgstm1.rev
@@ -0,0 +1,16 @@
+>mgstm1 reverse complement
+AAGTGTGTTTCAGACTTTATTGTAGACGAGACAGACCTGGGGAGGCTACTCCACCAGGAACAGGCTGGCACTCAA
+GTATTGACTTCGGGGTAACTCTAGGGAGGGCTAGCACTAAGATAGTGTTGACCATCGGGGTAATTCTAGGAAGCG
+TGAGTTCAGGACAGACCTCAGTTCGCAGAAACGGGCTGTGAGGTTGGGTCAGGGAGCCAATGAAGAAGGGGCCAT
+gtgaaagaaactggggagaatgaaggctgtgtggacttgactgggaagagggtgaggagatggggctgaccaagc
+tgcagaagggagcgggaaggagagagaaccaggagccacagtgcagaaggccagggtgctgtccccacccagggc
+CTGCAGGATCCCCAGTGTGGACAGGTCCTCCTAGTGAGTGCCCGTGTAGCAAGGGCCTACTTGTTACTCCAGTGG
+GCCATCTTTGAAAATATAGGTGTTGCGATGTAGCGGCTACTCTTCATGTAGGCAGAGATCTTCTTGAGGCCCTCG
+AAGCGGGCCAGGAAGTCCCTCAGGTTTGGGAAGGCGTCCAGGCACTTGGGCTCAAACATACGGTACTGGTCAAGA
+ATGTCATAAGCAAGGAAATCCACATAGGTGACCTTGTCCCCTGCAAACCATGGCCTCTTGCCCAGGAACTCAGAG
+tagagcttcattttctcagggatggtcttcaagaactctggcttctgcttctcaaagtcagggttgtaacagagc
+atgatgagctgcatgcgggtgtccatgacctggttctccacaatgtctgcacggatcctctcctcctctgtctct
+ccatccaggtggtgctttcgggcaaggtagcgcaggatggcattgctctgggtgatcttgtgtgatccatcgatc
+AAGTAAGGCAGATTGGGAAAGTCCAGGCCCAGCTTGAACTTCTCATTCAGCCACTGGCTTCTGTCAAAGTCGGGA
+GCGTCACCCATGGTGTATCTCTTCTCATCATAGCTTGAGTCTGTGTATTCCAGGAGCATGCGGATCGGGTGTGTC
+AGTCCGCGGACGTTCCAGTATCCCAGTATCATAGGCATGGTGCTGGTGCTGTGGTCTTCTCAAACTGGCTTCAGC
diff --git a/seq/mgstm1.seq b/seq/mgstm1.seq
new file mode 100644
index 0000000..bbd1363
--- /dev/null
+++ b/seq/mgstm1.seq
@@ -0,0 +1,20 @@
+>pGT875 | 266
+gctgaagccagtttgagaagaccacagcaccagcaccATGCCTATGATACTGGGATACTG
+GAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACTCAAGCTA
+TGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTTGACAGAAGCCAGTGGCTGAA
+TGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCTGCCTTACTTGATCGATGGATCACA
+CAAGATCACCCAGAGCAATGCCATCCTGCGCTACCTTGCCCGAAAGCACCACCTGGATGG
+AGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGTGGAGAACCAGGTCATGGACACCCG
+CATGCAGCtCATCATGCTCTGTTACAACCCTGACTTTGAGAAGCAGAAGCCAGAGTTCTT
+GAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGAGTTCCTGGGCAAGAGGCCATGGTT
+TGCAGGGGACAAGGTCACCTATGTGGATTTCCTTGCTTATGACATTCTTGACCAGTACCG
+TATgTTTGAGCCCAAGTGCCTGGACGCCTTCCCAAACCTGAGGGACTTCCTGGCCCGCTT
+CGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGAGTAGCCGCTACATCGCAACACCTAT
+ATTTTCAAAGATGGCCCACTGGAGTAACAAGTAGGCCCTTGCTACACGGGCACTCACTAG
+GAGGACCTGTCCACACTGGGgATCCTGCAGGCCCTGGGTGGGGACAGCACCCTGGCCTTC
+TGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCTCCCTTCTGCAGCTTGGTCAGCCCCA
+TCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGCCTTCATTCTCCCCAGTTTCTTTCAC
+ATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCTCACAGCCCGTTTCTGCGAACTGAGG
+TCTGTCCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACACTATCTTAGTGCTAG
+CCCTCCCTAGAGTTACCCCGAAGTCAATACTTGAGTGCCAGCCTGTTCCTGGTGGAGTAG
+CCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTGAAACACACTT
diff --git a/seq/mgstm1_genclone.seq b/seq/mgstm1_genclone.seq
new file mode 100644
index 0000000..7a54610
--- /dev/null
+++ b/seq/mgstm1_genclone.seq
@@ -0,0 +1,2088 @@
+>gi|22316163|emb|AL671877.15| Mouse DNA sequence from clone RP23-214E3 on chromosome 3 Contains the gene for a novel protein (0610005A07Rik), the Gstm6 gene for glutathione S-transferase mu 6, a glutathione S-transferase mu 6 (Gstm6) pseudogene, the Gstm3 gene for glutathione S-transferase mu 3, the Gstm2 gene for glutathione S-transferase mu 2, the Gstm1 gene for glutathione S-transferase mu 1, a novel gene, a ubiquitin-conjugating enzyme E2 variant 1 (Ube2v1) pseudogene, a glutathione  [...]
+GAATTCTTTAGCAGAATGCCACTAGCTGTGGCAAATGCTACTTATGTGTTTAGTCATGGCCAGTCCTCTC
+AACTCAAGTCACTAAACTGCACTTTAGCTACAGAAGGAGCCTAATGCCCCAAATTAAATTCTCACGGGGC
+TTTTCCCAGGCCAGAGTCTGCCACAAGGCACAAGTGCATGTACACACATATGCATGGGTCTCTGAATCAT
+TGTTAAGGATACGTGCTGGAGGGCTGGTGAGATGGCTCAGCAGGTAAGAGCACTTACTGCTCTTCCGAAG
+GTTCTGAGTTCAAATCCCAGCAACCACATGGTGGCTCACAACCATCTGTAATGAGATCTGACACCCTCCT
+GTTGACCAGAGTGAGCAGAGGTCCTAAAATTCAATTCCCAAACACCACATGAAGTTCACAACCATCTGTA
+CAGCTACAGTGTACTCACATACATAAAATAATTTTTAAAAAATCTTTAAAAAAAAAAGATACGTGCTGGA
+TAACTTTATGTCACCCCACAAAAGCTAGAGTCATCTGAGAGGAGGGAATTTGCAGCTGTGGGGGCCCCCA
+TAAGATTGGCTCATAGGCCATTAATGATTAATGATTGTTTAATGATTAATGATTAATTAATGATGTGGGA
+GGCCTCTGGGCTGGTGGTCTTGGATGCTATAAGAAAGCACAGTGAGCCAGCCATGGAGAGCAAACTAGTA
+AGGAGCACTCCTCCATGGCCTCTGCATCAGTTCCTGCCCTGACTTCTCTCAGTGATGGACTGTGATGTAG
+AACTGTAAGCTGAAATAAACCCTTTCTTCCTGGAGTTGCTTTTAGTTATAGTGATCTGTCAAAGCAATAA
+GCAAGACAGGGCATATCATTGTCTTCATAACTATGAACAGGAAGATATATAATTTCTGACTATAATTGTG
+AAAATAATTTTTAAGAAAAACTCATGCAAATTAAAATAACAATCTTAAAAGTATAGGATGGGGTCTGGAT
+AGATGGCTCAGTAGTTAAGAGTATTTGCTGCTCTTGCAGAGGATGGGGAGTTTTACTCTCAGTGTCCATC
+TGGAGGCTCACAACCGTCGATAATTACAGTTCCAGAGGATCTGCAACAGTCTTCTGACCTCCACAGGCAA
+AACACTCATTCTTAAAACAAATTTAAAATATATTGAACAAGTACTAGAACTTTAGTGTATACCCACCTGG
+CCCTTCCTGACTATATTTTTTAAAAGTAGTTGACATCTTCCTACAGACTGGACCCTCTGCATAGCACGGT
+CCTTCTGTAGACTAGTGATTCAGGAAATAGTGGCCACACGGTCCTGTTTCTATATTCTCCAAAGAGTGGA
+GAGGAAGGCAGGGGCAGCAGGAGAGGAGGAGCCATGATGGCCACTGCTCTGAGAGCCTGCTGGCCCAATT
+CTCCCCTTCCTTGAGCAGAGCTGTTCTCCTGCAGTGTCCCCAGGAACCAGAGAAGAGGTCTGAAACAATG
+GTGAGTAGCGACCTCACAAGGACTCACTCCAGAGTTGCCAGACCCTCTGGATCTCTGAGTGCTTGGTCCC
+AACTGCACAGTGTGGACAGTTCAAATTGTTCAAAGGGACTCTGCAGGAGCAGCAAAATGATCTGGGTAAA
+ATGTGGCCAGGGCCTAGAGAGTGCCCTTGGGACTGTGTGTCCTAGCTCCTTCTAGATCTGTAGCTAACAT
+TCTAATATTCTCAAGGCTAATTCATAGAAATTTCAAATAAGATCAGGTCGGAATGTAAGATTTTTAACAT
+ACAAATGCTTAGATATAAATGCTGACAACATGGGGTGCCTCCGGTGGAAACGCCTTCATCCAAACTTAGA
+AAATCCGGAATCTAAAGAGAAGAAAATAAAATCTCATTCGTACAGTGCTGGCAAACTAGAATTCAGTAAT
+CAAAATTTCCCACATGTTTCAGATATTAAAAATAAGGCAAAAATAAAGGTTTTCAGAAAGCAGAAAAGCA
+ACAAGATCTGAAATTACAGAAAACAAAAAAACATTTCTAAGAAGAAAAATGGGGAAGCTTAAAATTCTTT
+TTGTTGTTATTTATTTTTATTTTATGTGTATTGACGTTTTGCCTGCATGTGTGTCTGTGCACCACGTGTG
+TGCAGTGCCCTCAGAGGCCAGAAGAGGTAAGCAAATCCTCTGGAACTGAAGTTACAGATGGGTGTGAGCC
+ACTAGGTGGATGCTGGGACCTGAAACCAGATCTTCTAGAAGAACAACCAGTGCTCTTAATGGCTGGATCA
+ACTCTCCGGCTACAAAACAGGAAACTTTTGCCAATATGCTTTTTAATGGGATAGCTGGTTTTCTAAAAAC
+ATTCTAAATGGAGAAAATATTAGTCCACCAAGCTAAACATAACTTTAGAAAAGGTTGTCTGGCCTGCCCA
+TGGCCATATGCAGCAAGCCCTGGGGACTCTTTTGGAGATGTCGTTCACATACCAAGGCCACAGGATAACT
+GCAGCTAGTTAGGAGTCTAACTTGAATAGATACACTGTGAAGTGACTTTTAAGTTACACCCTTTGTCTCT
+CTGTCTCTCTCTCTCTCTCTCTTTCCCCAAGATAGGGTCTCTCTGTATAGCCCTGGCTGTCCTGGAACTC
+ATTCTGTAGACCAGGCTAGCCTTGAACTCAGAAATCCACCTGCCTCTGCTTCCCAAATGCTGGGATTAAA
+GGCATGCGCCACCACTGCCCGGCTGTATATCACTCTAAAAACATAGAATTTAGGACTGGAGAGATGGCTC
+AGTGGATAAGAGCTCTGACTATTCTTCCAGAGGTCCTGAGTTCAATTCTCAGAAACCACATGGTGGCTCA
+CAACCATCTGTAATGGAATCTGATGCCCTCTTCTGGTGTGTCTGAAGACAGCTACAGTGTACTCACTTAA
+AAAAACAAACAAACATAGAATTTAGTACCTTAGTCTAGAGATTCATTTGTCTGGCCACTCCTTCACAGAG
+TTGCTGTTCACTGTAGAAAGGTACAAAAGCATCAGCCTTTACTCGTTGCTTTAGACTTCAGTCTTGTGAA
+GATTCCCACATCTGCATAGATTATGAAATGTTTGTGGTTTTCTCTTACATTACTTGGGCTCCTAGATCCA
+ACCAGCAATCCCAAATGAGAACCAACCTCTAACCACTTGACACCCAATGTGTGGTTCGTCCTTGCTGACT
+GGAGCAGGGCTCTGCCCACCCCTAACCTCATCATAGGGGGCTGCTCCAGCTGAGATCCTGGGCCTCAAGC
+AAAAGATGAGGCTGCCTTCAGGTCTTTCTGTGTTGCCTCTCAGAATCAGTGCAGCTACAGGAAAGAAACT
+GTAGCGATCTTTCTAAGTCTGCTGTGTGCACTGGCCTGCAGACTGGCCAGTTTATGGCTTCTGTTATAAC
+AGCATTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTC
+TTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCC
+TTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCTCTTTTCTTTTCTTTTCTTTTCCTTTT
+AAGACAAAAGTTTGCTGTGGATCCCAAGAAGAAAACAAAATAAAAACAAACAAACAAAAAACCCAGCAAA
+GCAAGGACAGCAAAAAGGTGACGTTCCCCTCCCGAGTTCCATCATAAGAGTTTGAGTTTGGTTTTTCTGC
+TTTCTGAGAGCACAAGAGGGCTTGGCAGCTGCTGAGACCTGGGAGACCTGAGGAAGCAACATTCTGTGTC
+CTGCCAGCCTTGAGGAGTCTGTGCAAAAGGACTTTCCTCACTCACTACTGCCTGGCCAGTTCTGGCAAAC
+TCCCTTCCCACTATCACTCGCCCAGTGTCTCAGATGAGCAGGTCTGTCTGTGGCTGGAAGCTACTGGATT
+TCTGTAGGGAACTGGAGGTCTCATTCATACAGGCAAACACCTTTAATCTCCTATATCGACAGGGGTCTTT
+TATTGTTTCAGTCAGTTCCCAAGAGTGAATTTGGATCCTAGCTGTGACACTCCATATCTAGTTTAAGAAC
+AGCTCTGGCTTCCTAATCCGTTTCTGCTCACACAATCCTGTCTGCAGTCCCGTTTCTCCCAGCTTTATCC
+TGGATTCTGTTCCACGTTGGCAGTTTTCCTTTGTGTTTAATGATCTCTCCTGACCCACTGCAAATGTTTC
+AAGTGTAATGAAATTTTATCAGCAAAACATTGCACAGGGTTGATACAAAGAGCCGTCTCTGCTGGCCCCT
+GCCCTTTGCACTGAGAACTGGCCAGGTAGGATCTCTGGCTAGTTTTCTCAGGCAAATCTTACCACGATTT
+TAAAATGTTTTTTTAAAAAACACGTCGTAGCCGGGCGTGGTGGCGCACGCCTTTAAACCCAGCACTCGGG
+AGGCCGAGGCAGGCGGATTTCTGAGTTCGAGGCCAGCCTGGTCTACAAAGTGAGTTCCAGGACATCCAGG
+GCTATACAGAGAAACCCTGTCTCAAAATAAATAAATAAATAAATAAATAAATAAATAAATAAATAAATAA
+ATAAATACAAACACAATGTAAGGTGCCTCTTTAGTATGGGATAGCGTTTCTGCCTCATAAAAATGCATCT
+TAAACTCCTTTCTTAATGAGTCCCCTTGGGCTCCAGAGGGGGGCGCTGTTTTGAGTATTTTGATTGTGCG
+GCTCTTGAATGTGGGGCATGATGTTTTCTTCTCCCATGGATAGCTTTTGAATTCCTGCTTGTCTTGCTGC
+TGTTGATGTTCCTTCTCTGAGGGGGACACATAGTGTATTGAGGCTTTGTGTACAGAGGGCCAATCAGAAT
+GGGGCCTGAAAAGACAGTTAGGCAATAAGTGTGGGTAACAACTCTTGTGAGTGATAGGAAATCCTTTCTT
+CTTTTGCCTCGTATTGGAAGTCCTTTGAATTTTGACAGAGAACATTATTTCCTTGTTGGAGAGCTTGATT
+TTTTTTTTTTTTTTGATCCTTTAAAATAAATCTTAAAAAAAAAATTTTTTTTTTAAAGCAGGAGCTGTGT
+GCATGCTAGACACGTGCTCTACCAGACAGATTTCTGACACCCTCTCATTATTATTTGTTAGGCTCTCACA
+CATAAATAACGGTTATAGTTTACATTGCTTGTTTAAGCATGCCTCTGGTTTAAAATTCTGCTTCATTTGT
+AGAGCATGACAACAACTTTGCTTTATTCTCCCCCTACCAGCTGAAACGAATTACAAACTCACATGAGAAA
+CAATTTAGGGTCTGGGGATATAGCTAAATGATACGGAACACTACATGCATGAGTTCTGGGTTCAACCTGC
+ATGCTTGTGCGCGTGCATGCACACACACACACACACACACATATACACACACATACACACACAAGAGTTG
+TCAACACATTTTAAAAGACAAACCCTTTGTAGATTTGTAGTGGTCTGTCTTCCACTTTAGCAAAACACCT
+GAGGTAATCAACTTGAAAAGAGGAAAGGTGGGGGCTGGAGAGATGGCTCAGTGGTTAAGAGCACTGTTTG
+CTGCTCTTTCAAAGGTACTGAGTCCAATCCCCAGTAACCATGTGGTGGCTCACAAACATCTATACTAGGA
+TCTGGCGCCCTCTTCTGGCATATGAAGATAAAGCATGCACGTCCATTAAATAAATAAACAAAGAAACCAA
+AAAACCCAAAAAGATGAAAAGAGTATTTTGGTTACATTTGGGATGTCTTAGTCCATGGTTCACAGCCACT
+GCTTCGGAGCCTGCAGCAAGACAATTCATCACATTGAGAGCATGTACCCGAGGAAGCTCCTTCACTTTAT
+GACTGAAAATGAGAGAGACAGAGACAGAGTCAGAGAGAGAGAGACAGACAGAGAGAGACAGAGAGAGAGT
+TACACACTGAGAGAGGAAGAGAGATCAGAGTCCCACAATTCCTTTCAAGGCGGTATTCCTATTCCATAAA
+AGCTCCCACTATATTCTACCTCTTAACTTTTCTACCAGTTCCCAGTCACACCAAGCTAAGGACCATCATC
+CTTAACATGTGGGCCTTTGGAAAACATTCCAGATCAAAACTAAAGAACCACTCTAAATTCAAACCAGAGA
+AAGTTTTTGTTTTGTTGAAATGGGATCTCACAACCCCGAAAGCTGGCCTTGAACTAGTGATGTTGCAGAG
+GGTGGGCCTGAGTTCCTGTGTCTCCTGATTCCACTTCCCAGGTGCTGGATGTATGGGTACATCACCACGC
+CTGGCCAGAGAACTCTTGTTTCCATTTGTTGCAGTTTGGATGTCGAATATCCCAAGGTCTGCATAATAAA
+GGCTTGGCGCTCAGAAAGTCCTATTGGGAGGTGATAAACCTGTGAGAGGTAGAGCCCGAGGGAGATTATT
+ATGGTCTGGATAAGAAATGCCCTTCCAAAGAGGTTATGATGCTAATCTTGTCAGCTTAACCCACCTGCAG
+GGAGAGAACCTTAGATGAAGAACTGCCACCATCAGGTTAGCCTGCAGACATAGTTGTGGGGCATAACCTT
+GATTGTGGTTACCACAGAAGGGCCTAGCCCATTGTTGGTGGTACAATCTCTAGGTGGTTGACCTGGCATT
+ATAGCATGGTAATAGAGTCTTGGAGAGATGACACCTTACAGGTGGTCCTTGGGACTTTGGAGGCACACCT
+TTAAAAGGCACCCCACATTCTTCCTCCTTTGCTCTCTGGTGTAAACGGAGTGGTTTTCTCTGCCATATAG
+TCTTGGTGTGATGTATTCCTCATAACAGGTCCAAATGATGGGGCCAATCAGCTAAGGACTGGAACCTCCA
+TGACTATGAACTAAAAATTTATCTTTTTGAGTTAATTATCTTAGGCATTGCTTTACAATAACACAAAGTT
+GAGCAAGGCACCACCTTAGCTAAAAATCTGTGCACTGATGTAAATGAACGAATTACCAACTATACAATTA
+AGGAGTCATATCTACCTTTTGAGATTCAGGTTTTGTCAGTTCTTGGGGAAATTGGAAATTAAGTTCCAAT
+CTTGTTCTTGGCACCTTCTAGGAGGTCATCTGACCCAAAATGATGCTGAGCATCTCCTTCCAGACACTGG
+CTGTCAGAAACGTACTGAAGTGGATGATGAACACAAACTTCGCAGTTCCCACAAGACGTGCATGGCTGCA
+GACATCACTGCTGACGCTCTGGGAGAAGAGTGAAAAGGTTATGTGTAGTGGGTCCTGCCTATTATGTTAA
+TTTGGGTCTCCAAAATTGTTTGCATGAGAATCTGCACATCCACATGTAAGGCAATTGAGGGACCCTGCTC
+CCAGTTGGTTCTGATTGGTAAATAAAGTTGCTGGCGACCAATGGCTGGGTAGGACAGGACAGACAAAGGC
+GGGACTTTTAGTATTCCTGGATGAGGGCAAGGTACCAAGAAGGAAGGGAGGAGATCTGCTATGCTGGGGG
+TAGGGGTGGGAAGGAGAGAAGAGATGCCACTCCTGAGAGGGACAGAGAGCTTAGCCGCCCTGTAGGAGCT
+TGGGGAACTCAGCCCAAGAGGGCTGCACATCTGGGTCCAGGGCAGCCAAGATGGAATATAGGTTTTAGTA
+TGTAGTAACTCGGGAATATCGGGGCGGGGGGGGGGAGGGGAGGGAGGTGGATTAGCCACATGGCAGTTAG
+GAAGCGTCCCAGTCATTGAGCTGATTAAGTCATATCAAAATATAAAGGCTGTGTGAGTGTGTCTTTCTTT
+TGGAAACCCAGAACACTGGGGTGGGGTGGGTAGGGAGGAACCTGCTGCCAGGATCAATTAATTGGCAATT
+AATGCCAAGAGTCTTCTCTGTTTGACTCTATAAATTAAACCATGGGTTTTTAATGAGCACAAATCCATAT
+GAGCTAAATCCTTTTTGGATGACCTCTGCTCTTTTGAATTGCATGAAAGTCCACAGGTCCCCTTGATAAA
+ACAAGAACACATGAGTCACCACCAGTAACAGTGAGGTGAAATTCATCCCGGGCCTGGCGCATGAGCAAGG
+GTGAGCAGGAATATAGAAGCCTCTTCATTCTACGCATGAGCTGCGGAGACCATTTTCATTAGAGAATGCC
+TTCGGAAAGCAGTTGCTAATTACCAGTTGGACCATAGTCCCAGCCACTGGATAGGTCTTTGAGAGATCAC
+AGTGTATCTAGAGGTGGTGTAGAAAAGGTAACATTTACTGAATTTTACCAACAGAGAAGCTAACAGAGAA
+ACTTACAGTAGGGTCAAAGGGCATCCACAAGAGGAGCATGTTTCAAAGCATTGGAAGGTTTCATGACACC
+GTCAGACCAAGAGCACATGTTCTCTCTACAGCTCTCTTATAAAAACCCAGGGCACAGTCTGTGCCCCACA
+AAAGCCAAAGTCATCCACCCATCCTTGAGGGGTCGACCGACATACCAGATGAATAGTTAAAAGCAGAAAA
+GGATTAATTCAATGCAGCCACATTAAAAACAAGGACAAAGACCCAGAAACTTCTTCTGGTCCATCTTTGG
+GATCCTGAAATGAGATTAAAGCTTAAATGGCTGGTCAGGAAGATGATCATCTAAGCAGTGTCTGTCAAGG
+TGTCTGCTATTGATGCATTGTCTGTGGCCGGTGCTCTAGCAGGTCCAAGCATGACAAACATGGTTCCGAC
+AGGTTTTTCCCCTTCCCCTGCCCTCTCCCTTGCTAAACCATTAGATTCTATTCCTACAGATTCCATTGTT
+AGTTTGCTCTGTCCTTATGGGACCAGCAATCCAAGGAACAGCTGATTTCATATACACACAAGGAGCAGGT
+TCTAGTCTATATAACACCAGTTCGCCCCAGTCCCCAGAAGCCAGAGAGGGAGTCAGTGGTGTTCCATATG
+GTCCTTTAGCTGAACAGCCTGGTCGTGGCGCACTCTCAATAGAGGATATAGTGTCCAATGTTCTTCTTTC
+TTCACTCATCATACACCATCAGCCAGGAAAGGGGCTTTGTACTGGATGAAAATTTCTCCTGTAGCTGTGG
+TAGTGAATTGTTGACCGTTGTCCTGTGCCTTTAACCCCTAGGGTGGCCTGGGACTTCTGATACTGGTTGA
+AGTCCAGACCATTAAAATAGCCAGTGGCGTGGAGTTTTTGATCCAGTTCCAAATAAGATGACTCTGTACA
+GAGTGTGACACCTTGAAGAGATCGTTGAAGAGACTATAGAAGCATCAAAGGCTTTGTGCTCCGGTTGTAG
+AACACGGTTGTTGTTAAGTGCTGGATCCTGGCAGCTCTTAATTCATTCCTTCAGTCAACACATTAACAGT
+GCCTTGGAATATGATTTTTTACCAAGTAAAAGAATTGTACTAGAAATTAATATGATATGAGAAGTTTATT
+TCACCTGAGAAACTAGAGAACATTCAGAGGGTGATGGGGAATTCCCTAGAACCCTGTGTCTGGCTGCTGG
+TACTGGAGTGAAAACTTCAGCTTGTGTCCACCACAGGAAAGCTCATGGAGGCTGAGGTGTGCTAAAAGAA
+ACAGAATCTTAGCCAGCCCTTCACCAGGCCAGGTGAGCATCTTAGCCAGCCCTCCTCCAGGCCAGGTGAG
+CATCTTAGCCAGCCCTCCACCAGGTTAGGTGAGCATCTTAGCCAGCCCTTCACCAGGCCAGGTGAGCATC
+TTAGCCAGCCCTTCACCAGGCCAGGTGAGCATCTTAGCCAGCCCTTCACCAGGCCAGGTGAGCATCTTAG
+CCAGCCCTTCACCAGGCCAGGTGAGCATCTTAGCCAGCCCTCCACCAGGTTAGGTGAGCATCTTAGCCAG
+CCCTCCTCCAGGCCAGGTGAGCATCTTAGCCAGCCCTCCTCCAGGCCAGGTGAGCATCTTAGCCAGCCCT
+CCACCAGGTTAGGTGAGCATCTTAGCCAGCCCTTCACCAGGCCAGGTGAGCATCTTAGCCAGCCCTCCTC
+CAGGCCAGGTGAGCATCTTAGCCAGCCCTCCACCAGGTTAGGTGAGCATCTTAGCCAGCCCTCCACCAGG
+TTAGGTGTGTATGCAAAGCCAAACAATGAGCCTTTCTTGAGAATGGCTTGAAAGTGTGATGTTTACAAAC
+AAAAACATAAGTTTATCTTTTATGAAAAAATTCTATAAAGAGAAAAATCATGAAATAAAAAGGTTGAAAC
+CCTTGGAAACCTTTACTTTAGATAACAACATTTTGTCAGGGTCTGATGTGATCTGGTAGTTTCAGTTTCC
+CATTCTCTCCTAAAAAGAGTGTACACATTTTATTTCGTCTTACACATCACATATCCATTTTTATCTGCTT
+GTTTACAAATAACATATAATGTATTGGGATTGAAATAGGCAATCTAAAGGAATGTGCATCACCCATGCCC
+CTCTTCCTTAGACTTTAGTTAAAGGTGGCAGTACTAACTAATAATAGCCTCTTTGGGAGGATAAAATATA
+TTTTTCTCCTGTTAGAAAACTGTAGCGTGTAGTTTTAAAAATGATCCATGCTAAATTGGTAATGCACTGA
+CATTGGTGGTCTCGCCCACCCCCATGGCCAGCTCACTGCCAAGCCGCTTGCACTTCCCAGGCTATCTGAC
+CCCTGTGGCTAGCCTGCAGAATGCCAGCTATGGCCACTCAGTTTCCCTTAGCCCAGTAAAATAATAAAAC
+AAGTGTTCAACACGTGATGGATTTGAAGTCGGATATTTAAATCAATATGATCAGTGCCTGTTCAACGATT
+GACATAAAATGAAGTCTCTGTCTCTCTCTCTCTGTCTCTCTGTCTCTCTCTCTCTCTGTCTCTCTGTCTC
+TCTCTCTCTCTCTGTCTCTCTCTCTGTCTCTCTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG
+TGTGTTTAATGTTTGGCGTTTAAATTGACTAAGCCCTGGGGAAATGTTAGATAAACAGTTTTACCCAGAG
+AAAGTCCTAGGCCTTGATCCACCAAGTGGAAACAAGAACAAGCAGTAACAATTAGTCAAGAACGAAGAGG
+AGAAACATCAAAGAATCATTATAATTATAAACTTCAAAGGTACTGTGAAGTGGAACAATGGACTCCAAGT
+GCCTTGGTGACACATGAAAGGGGCCACATGTCTTCCTGGTGACACCATAAATGTATTCATAGAACAGGCA
+CTTAATATCTTAAAACACCACACAGCTGGTCATTCCCTGGGCCCCAGACTGTATCTTTTGGGGATCGTTG
+AAAAAACAGTAACAAAAAAAAAAAAAAAAAAAAGGAAGACAATATATTTAAAGGCAACCATAGTGGTGTA
+ATGAACGCTAAAAGCTTAGAGGTGGCCAGTAGTGTGGAAAGGATTAGGAAAGTTGTACTTTATTTACAGG
+AAGGAATAATAGATGATGGCTAAGTCTTATAAATGTACATGTGAGCAGAATGATAGTATGTCCCACTTTA
+AAAAATATGTATACACCACAGGGAGATAGAGTGGTTTACTGGATAAAGTGTTTCACATGAGGTCCTGCAT
+TCAGATCCTAATATAAGCTAGGCTTGGCTTGGGCACCTGTAGTCCCATGTCTGGGAGACAGAGGATCCTA
+GAAGTTCACTCCCCAGCCACAGTAGCCAACAGATTCAGTTAATGATTCTATTTCAAAACTTAAGGTGGAG
+AACAATAGAGAAAGGCACCTAGTGTCTACCTCTAACCTCCACGTGTTCACACACAGACAATTGTACCTAA
+ACACACTTGTGTGCACACACACATCATGCACACACAGAAACACACACACAGAAACACACACACACATCAC
+ATATAACACATACACTTATGGTCTTCTGTTTTTTTTTTTCTTTCTTATACAAGAGATCCAACCCAGAGTT
+CTACATATGCTAGGCAAATACTCTACCACGAGCCACACAGCCAGCTCCAATATGTTTTAAGTAACATGCT
+AAAGAAATAATGAGCAGAACCTCTAAGGAGCAGAGGAAATGCTGAGTGAGCAGTCTTGAGTGGATGTGTA
+GTTTGACATGGGCACAGCCACATAGAGGACTACAGGACCTCCTGGGACTATTGGCAGAACCATTACTCAT
+ATGGCCCAGTCTCAAGAGGAACAGCAAAGCTTTTCTTGGGAGTCTCAATGTGAGTCTTCATGAACAATTG
+TTCAGTGACTGCAGCTCCCTCCTCCAGGATGTTCACAGCCTGAGATTGCTTGACTACAGAGACCTAGATT
+AGCTCACAGCCCCCCTCCACCCCCCCCCCAGCCCCTATACTCTTCCTAATAATCACACTGAAGATCCAGG
+TTACAATGTTGTTAGAACCCTACCCAACCCCAGCTTCACTGTATGTGTGTCTGTCATTTCTTCATTCCCT
+CGCTGACATTGCCATCCCAAGCCACACTGATCCCAGGAAGAGCATATGATTAAAGCAGGCATTATTTAAA
+GTCAAGAGAATCTGAAGCAATGTCGCTAAATAGGCATTTATTGAACCCCTACTATGTTCAGGGCATTTCC
+CAGGTGGTAAACACTTAAAACACTAGTGAACAAGAAGAAACTAAAATACATGCCATTTTGTCATTAGCAT
+CTATGGAAAGAAGCATACAATAAATGTGAATAATAAGGTTATATATTTCCACTGATGGGTGGAACTCACA
+TGCAATAAATGTGTAGCTATTAAGTGTACGTCTGTGTGAGTTTCGAATCTGTATTCTTTGTGTGATTACC
+ATCTAGATCTAGATAAGGAATGTTCTCAGCACTGCATAAGCTCCTGTCAGCAAATCACCCTGCCAAAGTC
+AACATGCATTTGGCCCATCACACTTCCCATCTATAGACAATGATAAAGAATGCACATCCCACCTGTCTGG
+ATCAACATGATGCCATTGTCAACATTAGTCAACATTATGTCTACATGCTGTGGTGTAGAACATCACTACA
+TTTATCTTAACTTATAATATACTACTTCGTTATATGATAATGGCTTTATACGAATAAACTATACCTGATT
+TACTCATTTTCTGCTGATGAAATTGCAGAGTAAGCAGTTTCCCACATTCTTCCCTCAGTCTTTCATGGCT
+TTTGGGAGACAAAAGCTTTCATTCTGGTTGGGAGACAGACCTAGGTAAGAGGTCACTACTTGCTCAACAC
+TAGTTGATATTGTCAATGTCCCATAGTGGTTATACTCCCAGCGTTACACAAGTTCCATTATTTCACAAAT
+TACCAGCCATTGTCAGCCTGCTGCATCTTAGCCATTCTACTTGGCCTACGTTAAGCTACTACTGTGTTTA
+CACTAACTCTGCTAGTTAATGGTGCTTGTTGTCATTTTACAGGCTAAAAGGCTACTTGGATGTTCACTTT
+TAGGAAATACCTGTTTAATACCTGTTGATTCATTTTGTAAGGTATCTTTCTGGTTATATTACTTTTGAAT
+AAGAGGTCTTTGTTGAGTAGCCATGTTCTCCCATCCAAGCATGGGCCTGAATGCCTACCCATAAACCAGC
+TCTTTCTGGAACCATGAATGGGAATCACTGAGAAACTGAACCTCCTAGAGCTTGCTCTCGGGTGTTATTT
+TAAATTGCTTCAGGAATACATGAAGCAAATGCAGAACTGGTGAAGGAGGTATCTAAATAGACAAGGTTAT
+ATACACCAACAAAGTACATAAAAGGATGTGAGGTACCGCAAAGCAGGTCTCCCTACTGGCTTTTGACCTA
+GAGACTTCAGAAGGGCCAAAGCAACAGCCTTCAATACTGAAGATCATCTACAAGGGGTTTTGAATCCATT
+GGAGTCGGGCTCCTTACCAGCTGTCTGTTGGTGTAACAACCGACCAAAGAAAAACAATGTAAGCGGAGAA
+TGATTTATTCTGGTTCATGGTGCAGGAAGAACAGCTCACAGGAGCAGGGAGAGGGGTAGGAGAGGGTCAC
+AGTTGCAATGACAAGTGCTGGAGTCACGGCACACCTAGCTCATAGCTTGTCTCAGGAAACAGAGCAAGCA
+GAAGAGTGTTTTGGCTACAGTGGTCAACACACCTAGCATTCTACAGTCGCCATATGGACCCCACCTGCCG
+GAGAGCACACAGCTGCCCAAAATAGCCATGAGCTGGGAACCAAGTATCTGAACACATGAGTCTCTAAGTG
+GTGGTGGTGGGGCAGGGGGACTTTAGCTTTAGAAAGGGCTACTTTGCTAATGAAAAGGAACTGTTAGGGA
+TTTTAAGAGACTGTGTTGGGCAGAGATTGGATGACCAAAAACGAGAGCCGGTGAGGCTTGGGATCTGGAA
+CAGGATGGAAGGAAAGAAGAAGGAAAAGTTGGCTGTGGGGCTGCAGGGATCCTGCCTGCACCCCAGGGCT
+CCTGGCAAACTCAGGAGATCTGGTATCTTTATCCTACAGCCCACCCTGTCAAGGTCCTAATTGCTGCCCC
+AAGTTGCCATCTTTGTGAACATGGGTCTTGGGAGGAAGCGACTGGTCTTCATGTAGTCGGAGATCTTCTT
+CAGGCCCTGCGTGTGAGGACAGAGAGGGAGGTCAGGCCAGGGGCCCCACTGGGATAAGCAGCCCCACTCC
+AGAAACAGCTTTCCCTGTAGACTCTAATATTAGAGTTCTAGAAAGGGGAATCCTCGGTGGCCTATGCGAA
+GATGCCTGTGCATCGGGATCGTGCCCATTATAGATTATGACCTACGCTCAGAAAACACACTCCATCCTCC
+AAGATCAGTCTCACTGTGCCAACGTGTCAGCAAAGAGGTTATGTGGCTTCCTCCTGCTGGCACAGCTAGT
+CCATGAGACAGGACCTGAGTCAAGCTGGCTCTATAGCTCCCAATCTCTTCCCTCTCTACCCAGGCCTTTC
+CTGTCTTCCCACAGGAGACCCTGCCACTCCTTGCCTTCACAGACACGATCCTTTCTCACAGGGAACATCC
+TTGCTGGTTTACACAGAAACCTTCTGTAGCAGGACAAAGATGTCCATGACAGGCCAGTGCAGCAGGACAG
+AGAACCCAGACCTGAGCCAGGGACCCAGCAAACCCAGGATCAAATGCACAAGGGGAGGAGGGCCAGCTAA
+GGATGCTGAGCCAGCTCTAATTGTTCTCCTGCAAGTAGAGGTCCAACCCGCTCTCTGTGGAGCCTCAGGC
+CAATCTCACTGAGTCCCAAAGCTGCCTCATATCCAGCCCTGCTCCATGGGGCCATGCAAAGAGACACTGG
+GAGTGTTTGTTCCATCTTACCAGGAGAGCCTGCTCAGATAAATTCTGCATGGTTCAGAGACCTAGAGCTA
+ACTGCAATCAAGAAACCAGGTGCAGCAGGCCTCAGGCTGCAGCCCTCAGCTCTTCTATCCTTCCCTATCA
+TGACCACAGCAGCCCTGGTACACTTTCAAGGCCACTGGGTCAGGACCAGACCCAGAATTGGTGAAACCAG
+CTGAGAAGATATATCACTTGATATAGGGACAAGATTCCAGACTGCAAGCCTGTTGTTCTAATGCCTGCAG
+TTCCCAAATCTCTGAAAGCATATTCTGTAGGTGCTAAGTAGACTCTGGGGGACCCATGACTAGCTATGCA
+GTGAGGGCCAAGCTGTGACTCACAGCTCCCGGGTGACTCTGTGCCGGTTACGTTCACTGCACTCTGCTAT
+ACTTGGTCTCTGAATGGAGTTCTTTCCATGCCTCACTGGGCCTGATACTGAGCCTCATACAGAGGAGGGG
+GCCAGTGAATAGGAGTCTAAATAAGTATATAAATACATTGTAGAAATGGCCTCATGGAGACATGGGGAAA
+AGAAAAGGGCAAAGGCCCTATAACCATACAACTCTTTAAGATAAAATCCTGAATAGGGCTCACATACACT
+CTCAAATCTAGCACCAAAAACATCTCTGGGGTTTGGCATGTGCTCAGATGCGGATGGACCTGAGTGAAAC
+TTTCTGAAGACCTGGTATTGTCTACACGCGGCTATTTCACACAAATCCCACCATGCCTCCCTCTCTGAAT
+GTCTGCGGCTTACACACTCCTGCCACGTTCTGCCTTTCCCCAGCATTCTGGACTGAGTAACCTAGGCAGC
+ACAAGGACCCGAGGCTCTTCCTTTTGACACAAAATAGGAAATGTCTACTCCCTGCTGGCCTTAAAGGCCA
+CACACCCCCAGGGAGCTGGGCACAACCTCTGCTAGTAAGGGACTTAGCCTCTATAGTCTGTTTCCTTATC
+TGTCCCATAGGACTCATAATTCAATCTGCCCTGCAAGTTGGTTGTAATGATAATTTTTACCCAATAGATA
+ACGTATGCTGAGCCAGAAAGCAAGCACTCAGTACATGCTACTGGCTTTTGCTGATTTTAAGCTCAGTTCC
+AGGGCCAAGGGCCTTGCAAGCAGAGTGGGGAAGGGAGGACCTCAAAGAAGAACGAGGATTTAGAGCGTCC
+GTCACCTCAAAGCGCGCTATGAAGTCCTTCAGGTTTGGGAACGCATCCAGGCACTTGGCCTCAAACACTT
+GGTTCCTCTCAAGCACATCGTAAGCAATGAAATCCACAAAGGTGATCTGTAGGAAGCCGAGGGTGAGTGT
+GCTTGGATCAGGTAACCTGGGAAGGATGGCAACCCCTCCTCCCCACCCCTCCCTTTACCTTGTCCCCTGC
+GAACCATGGCCGCTTGCCCAGGAACTCAGAGTAAAGCCTCATCATTCCAGGGAGTTGCTCCAGGTACCCT
+GGCTTCAGCTTCTCCTAGAGCGGAACACACTGTGATCACCCTCAGTCACAAGATTCCGAGCAGAGGCCGC
+CCAAACAGGGCGACAAAGACCCCATCTGGGCAGAGGTGCAGCCACCTTCCCGAGACTAGGCCTGGGTTAA
+TTCTGCTTTAATTTATGATGTCACTATTATTAATTCATTATGTCTATTGATTAGAGTCCTACAGGCACCA
+GATCCAATATCAGAGGTGATGAGGATCTACACAGGGGCACGCTGTCCCTGAGACTCACCACAGGTGCTCA
+GAACAACTCTTGTGAGTCACACCCAGGACTGTGTGAGCTGGCTCTCGCATTCAGATGTAATGTGAGTCAG
+AGAAGCAAATAACTAAAGCATTCTGGAAGAGAAGTTGGATGTTCTGCAGGAAGAAGAACGGTAGGGCTCT
+GGGGCAGTTTCAGAAAGATGAAGTGTGATGGGGTGAAAGGAGATGAGGCAAGAAGAGGCTTAGCCAGCTG
+TAGACTGCCTTTTGGGAACACTTCCTTATTACTGTGAGTATCCCAGGCATGGCTCTGGGTATCTGGTTGG
+GAAGAGTCCCCAGGTGATGGCTCCCATCCATTTCCAGCTTACTCCACTTTTTCTCATTGTTGGAATGTGT
+ACTATAGAAGCAATGATGATGTAAGACATAACCGAGTAGGAGACATAACCCAGGCCAGAAATGGGTGATC
+AGAGCTGAGAGATTCTAAACACAGTCTTTACTACCCAGCTACCGACACTGGACTGTGGAGTGTTTGTTCT
+AGTTGCTGAAGTTCATTGTAAGTGCCATTTGACAAGACTCTATGTTGGCAGGGTGTGTGTGTGTGTGTGT
+GTGTGTGTGTGTGTGTGTGTGTGTGTACAAGAGTGTGTGTGTACGTGCATGGAAGGTTTGGTTTTGTTTT
+AGTGCTTTTGATTTTGAGACAGTGTAACCAGGAAGACCGTAACCTTAGTCCTTCTGTCTCAGTCTCCATA
+CTGTGTCACCATGACTCCGCCAGCAGTTCAGAGTATCATCTGTAGGTGAGCAGCGCTAAGACGTTAGATG
+CTAAGCAAACAGAGCAGATGAGAAGGCAAAGGTACAAGCTCTGGTTCCCCCACCTCATCCAGCCCAGAGC
+AGGACTCACAAAGTCAGCGTTGTAGCAAAGCCTCGCCAGCACCATGCGGTTGTCCATAAGCTGATTCTCC
+AGAATATCCACACGGATCCTTTCCTCCTCTGTCTCCCCACCTGTGAAAAACACAGCCCAGACTCACACTC
+AACATGCCACCCTGGCCAAGCCCAGCTGGTGGCCATCGGTCCCCACCCCTGCAGCCGGCCCCACTCACAC
+AGGTTGTGCTTGCGGCCAAGGTAGCGCAGGATGGCATTGCTCTGCGTGATCTTGTGTGACCCATCGATCA
+AGTAGGGCAGCTGAATGGACAAGCATGGAGAGTCAGATGGGATATGAAATCCCAGAATCCCTTCCTTGGG
+TGAGGACAGACACGAGGCCTGGCACTCAGTGTGCCTGCCATGGAAAACCTTCTATGAAAATGCTGCTCAC
+TGAACAGAGAGCTGGGATACAGCCCATCACAGAAAGATGGTGTGGACAACCAGGAATGCGAAGATGAAAG
+CCAAAGGACTGATCCCATCTGTATAAACAGGGACACCACCACCATCTCCACACCTCCCCTCTCCTGTACT
+TACGTTAGGAAAGTCCAGGCCCAGCTTGAATTTCTCATTCAGCCACTGGCTTTGGTCATAGTCAGGAGCT
+GTGGTGGACAAGATAATATGAAATCAAAGGACCCATGTGTTCAAACCCCTTTCCCAAGATTTTCCCAAGG
+GGTCAGAGACAAGACTCTCCAAAGCAGTGGACCTGCAAGCTGACTCACAAAGTCATACTGCAAGCTTGCA
+AGGATGAAGATGTTTGTAAGGGCTGTGAGCTTCTTCATCGCTAAAGCTTGTGAAAGATTTAGCCAATTGC
+TAGAAGAATGGAGCCCAGTTATAAATACAGAGTACACTCAAATCTTTCCTTAGCATCTTCCATTGGCAAA
+TCAAACCACACCATCAAGGGTTTTGGAAGGGGCATGTTACAGCTCCAACAAAAAAGTGTCAGAGCTGAGT
+GATGAGGTTGGTTGCTGGGCAACAATGGACAGGAGTTGGGCAGGGATCTGACTAGAAAGGATGCAGTCAC
+CATCACCCATGGTGTATCTCTTCTCCTCATAGCTTGAGTCTGTGTATTCCAGGAACAGGCGAATGGCATG
+GGCTAGCTGTAAGAGACAAGAGATGGTAAAATTCTTTACCCCTTCTACACGAGTCCTCATTCTCTGACCT
+AGTTTCACTATGTCCATGGATGGACCAGAACACACACACACACACACACACACACACACACACACACACA
+CACACATACACAGCTGCAAGAGCATGCCTCCATTGACTGCACGGATCCTTGTCCCTCCTCCAAGTCCTAC
+TCTGCATTCGGACCCCTCTCTCACCCCACGGATGTCCCAGTAACCCAGTGTCATGGGCATGGTGCCGAGG
+TCTGAACGGACAGGCTCAGTGTCAATCTAGCCTTGGCTGTTCAGAGAGCTGCCAGAAACGGATTGGCTTC
+GCTTTGTCAGGTCTTCAGACTGCCCTTCCCCACCTCCCAGTTCCAGCCAATACCGGCATCCAGGAACAAG
+GCCAGGGCCGCCCCCAGAGCCAAGGCTGCCTGAGCTCCATTGTTATGGTGGTGGCTTGGCTGAGTGCCAA
+GAGCGAAAGGCGGGGCTGGGTTTGCTAGCTTCTTATTGGTGGTGCTGGGTTTCCGCTGTAGGACTGCTAT
+TTTCTACCCTCTAGTCTAGATGGCAGCAGCATTCCCCAGTGGTGACAGTTCGGAAGGCCTAGGACCTTGA
+CAACTGTGTCCTGAGGGCAAAATTATCTTTTATACTGTGAATACTTAAAGAACGAAGAACTGGAAAACAT
+GATAATACAAATATATTCATGAATGCAAAACAAATTTTTCTGATGAAATTTATTTCATTTAAGTGTGTGT
+GTGTGTGTGTCCTGGATCACATAGTAAATGATGTATCTGGAGTTTTTTTTCTGTTTTTTATTCTGATATA
+ACATTAATCTTTGAAAAAATTCACAAAAGAAATGAGTGCCCACTTAATCACCTGTATACTGTAGCTGCTT
+CCTTGTTCGGGTGCCCTCCGCTAAACTCCGCCTTCCTTGGCTTGCCTTTCCCTCTCTCCTGATCTTCCCA
+CCCCCTACCTTTGACTCTTTCCAGCATGAGTACAAAATCTTATAACCAAGAGTCATTTCATTTTCTTTCC
+CACTAACACCATGCCGGGGATGCGTGAGACCCTGGAGTAGCTGTGATTGATAATCACCTCTGCCGTGCTG
+CTTCCTGGCCAGGCTTGCACTTGGACCTGTGACTTCTGACAATTGACGGAACATGTTTCTCTTTACAGAG
+CCTTGAAAGCCTTGTGCCCAATTCCCGAGGAACAAGTGTTCTTTGTCTTATGTTTCTTTTTTATCCTTTT
+CTCTTTTGCGATGGCTTTAGAGGTTGGCCTTGGGACCCTGTGTAACCCAGGCTGGTCTTGGATTAGGCTC
+AGTTCTCCTGTCTCAGCTCCCGAGTGCTAAGATTAAGGACATGGAGTGCCACACCTGTCTCTTTTAGGAA
+GCACCGCCAACTCACTCAGATCTCAACTCTTTTAAACACGTTTTATGTTTATGTTGGTTTCATTTGGTTG
+TGTTAAAATGTAGCTGAGGGTGAGGAAACCTGCCTTAGTTTGAGATAGCATTGCTCTGATTTTTTTCGAC
+CAACTTTTATTAATGTCTGATCTCAGTGACATTGTGGGGTGGGCAAGTTTCCAGCCACAAGGTACCCTTC
+ATGTTCTAGAGACCCGGTGAAGGCTGCCTGCACTGGGAGGTGTCCTTCTTAGTCCGCTTGGACCCCTGTG
+AACCAGGCTCTATTAGGGTCTCCAGACAGTCCTTAGAGGCAATAGTGAAAGCTGGAATGGCAGTCAAAGC
+AAGTGTGACTCAAGCCTTATGGGGACCAGACAGGCTCCCATATGCAGAGACCTCACTGGGGTATACTGCA
+GATAAGACCTGCCTCTGTTACACACGTGTTGCAAGTACAGAGCCTAAAGATGATGGGCTCCCCCAGCTCC
+TCAGCACAGCACAGCACAGTCACTGCTGGACAGGACAGTGAAGCCAACTTCAGGGACGGCCACTTCAGGA
+GGCCACTGAGTTGAAGGAAAAGGAGCTGGGGCAAGAATGGGGCAGGGATTAGGGAACAGCAAGATGGTGC
+CAGAGTAGCAGGGGGATGACTGCGACTGAGACTTTAGGGCTCCCATTCTGTCCCCTCTCTGCCTGTCTTC
+CCACAGGACACCCTGCCACTCCTTGCCTTCACAGACACGACCATTTCTCACAGGGAACATCCTTGCTGGT
+TTACACAGAAACGTCCTGTAACCAGACAAAGACGTCCATGACAGGCCAGTGCAGCAGGACAGAGAGCCCA
+GACCTGAGCCAGGGACCCAGCAAACCCAGGATCAAATGCACAAGGGGAGGAGGGCCAGCTAAGGATGCTG
+AGCCAGCTCTAATTGTTCTCTTGCAAGTAGAGGTCCAACCCGCTCTCTGTGGAGCCTCAGGTCAATCTCA
+CTGAGTCCCAAAGCTGCCTCATATCCAGCCCTGCTCCATGGGGCCATGCAAAGAGACACTGGGAGTGTTT
+GTTCCATCTTACCAGGAGAGCCTGCTCAGATAATTTCTGCAGGGTTCAGAGACCTACTGCTGAGGGTTAT
+CAAGAAACCAGGTACAGTGGGCCTTGGGCTGCAGCCCTCAGCTCTCCAGTTCATCCCTCTCATAGCCATA
+GGAAGCTTTGCACAGCCACCAATGCTATGATTCCTGCAACAGAACCAGAGCCAAAGAGAATCTGAGAAAG
+AGGAGCATTTGATGTAGAAAAGACTCCAGACTGCAAACCTGTTGTGCTGGTGCCTCCAGTCCCCTACTGT
+CTAACAGAGTAGCTTCTGTGTGTATCAAGCGGACTCTGGGGGACCCAGGGCTGTGGAAGTAAAAGTTACT
+GTGATTGCCGGTAGCCAGGAGGGGCCATGACTTGCATGCCTCTTGCTCTCTGCTGGACTGAGTCACTGGA
+TGGAAAGTCACCCACACAGCTCAATGTGCTTCACACATAGATAATCACCGAGTACCAATGAGAATACATA
+ATCCCATCTCCCAGGTAAAAAACCAGACACCCAGAGAACGGAGAAGGAGCGCTTGAGAGGGAAGGAAACT
+GCCCCATTTAATGTCAGGATTGCAGTCTGCCAACTAGGGAGCTGGAGAGACGGTTCAGTGGTTAAGAGCG
+GTTGCTAGACAACCATGAGGACCTGAGTTCTGATCCCGGTGTCCACGTAGCAAGCTAGGCATAGGTTCAG
+CCCACATGTAGCCATAGCATTGAGGAGTGGAGACAGGAGCATGGCTGGAGCTTGCTGGCTAAAAGTCCAT
+CTGAAATATGAGCTGGGCTACCAGATTCAGGTAGAGACCCTAAGGTGGAAAGTGGTAGAGAATCCCTGAC
+AGCTTGACGAACCTCCCTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGT
+GTGTGTGTGTGCGTGCGCGCATGTCTTCTACATGTCTGCTCAAATCTAGCACCAACTATATCCCCTGGGT
+CTCTGTTTCTCCCATTGGGGATTTAAGTCACTAAGGTAGTCTGCAGAGGGAATATGAATAGTGTCTTGTG
+ATCCTTCATCTCAGCCCTTTAATTCAACTTCCTAAGGCCCTCGGCTGACATGTTCACCCCTCCCAAGGAT
+CCTGGAGTGGGAAACACAGCCTATGCAATGACCTGAGTCCTCTCTTTTGGTTCAAAACAGGGGTATTCAC
+AACCTGCTAGCCTCAGACTTCAGAGACCAACAGAGAAGTCCTGCACAGCTATCACTAGTACCAGGTTCTC
+AAGCAAGTGACTGAATCTCTACAGTTGGCTTCCTTATCTGACCAGTGGGACAGGTAACTCTATCTGCCTT
+GTGGAAATGTTCTTATGATTCATTTGACCCCAGGTTGTAGGATGTGAGCACCCCGTCCTGGAATAAAGCG
+ATCAATGGTCATAGTTATTTTTCCTTTTGGCTCTAGATTTAGGAACAAATTGTGGAAAGAGATTCAGAGA
+GAAAGTATCCACTTGAGAAGAGAAGTGGTGCTCTTGGGACTGGAATGCATACAGAGGTCTGGTTTCCCTG
+TGCTTCTGCCGCCGACTTCTTCACTAATGTCAGAACCAGCATTTCGAGGCTTCCATGAATCATGGACTGA
+GTCAGAATCTCAGATGTGAGACAGGTTTGGGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTACTATTTTGG
+CTTAAGCTCACACATGGGCAGCCCACACAATAATAGCAGACATTTAGTTCTTCACCTCAGATAAACCCAG
+GGTTTGGCCACAATCTCCCACAACCTGCCATCAGCCCAGTCACACATACTATTCTTGGTACTCTGTAGCA
+CAAGATGGTGCCATTTTAGATGCTCCTGTGAGTGCCTGAGCAAACTATACTTCTATGCCTCACCTGCATA
+GAGGACCTTCCTGCCTTGGTTTTAGCTTGCTTTCTCTCTCTCTCTCTCTTTCTCCTTTCCATACTTGAAT
+TATAACTTCATCTAAGGACAGGGTCGTCAGGTAGAACACATGGGTAGGGAGTTTGTCTCTCCCTCCCCCG
+AGGGCCCTTCCAGACAGCATAGTCCTAAACAGAGGACTTCAAGTAACCACTTTCCTATCACAAATTAACT
+AAGCCCTCTGTTCTTCCAAAAGCCCTTATCCAGAAATGTCCTTGGGACTTTTCAAAAAGAGATACAAACC
+CACACAATCTCCTTTAGTTTAGTTGGGTGGTATCAGCTGACCTTGAACCCACTTTCTCCTATCACATCAT
+TCTCCCTCCCAGTTGTTCTCACGGGGCACCTTAGAGAACAAGCCTGCAACTTCGGACCTCACCATGCTGT
+GCTTCCTCCCTTCCCTCGGGTGTTGATAAGGCTTTTGCCTGAAGGGTAATTTGACTCAATACTTTTCTCT
+TGAGCCCAACAAAAAGGAAGCACCCTGGTGAATGGAGGAACCACAGATGGAATAAGGCTGAGACCCTGAA
+AGACTCCCTGGAGCTGCCCAGCCCCCTGGACTCTCCCCGTGAGAGAAGATGAACTGGGGACAGGCCCTTG
+CAGATGTATAGTTTGGGGCTCCTTCATGTCACATTCTCTGCCTCTTAGCCATCAAGACGTGAAATCAGTC
+TCTGCCATACACCTTGACAATCATAATATTCTGCCTAAGCACATGAGACCACACACACATATATACATAT
+ACACATAGTGTGTAAGTGTGTGTGTGTGTGCGCGCGCGTGCACGCTCGCGTGCGCATGTGTGCCTGGGTT
+TGCTTGCATGTGGAGGGCAGCAGTTGATATCAGGTGTCTTCCTCAAGCTCTCTCTTCCTTACTTTTTGAG
+ACAAGATCTCTCACTGAGCCAGGCTCTCATTGGTTTAGCTAAGCTGTCTATAGAGTAAGTTCAGAGAGTT
+TCCATTGCCCCAGCTCAGGGATAATAAATTCATATCACAACACTTGGCTTGATCTATGCAAAGGACTAAA
+CTCGTGTGTTTGCGCTTGCATGGCACACAAGTACTTTATAGACTGAGTCACATCCGCATCTTTTCCTGGT
+TCTTTTTAGTACTTCTGTTGGCTGTCAAACGCCATGTGAATGAATCCAAACATGCACACTCAGCTAGGAA
+CTATTGAAACGTATTGGGAAGTATTTTTCTAACTTCTGATATAGAATGATACTAGTATTAGTCTGGTTTA
+ATATTTCTGCCTACTTAACCTTTATATACTTCTATTAACTTACCTTTTTCCTGATTTTTTGTTGCATTTA
+TGTCTAAGTTGGTGTTTAATAAAGGTCAGTGTCTTAGTCAGGGTTTCTATTCCTGCACAAACATCATGAC
+CAAGAAGCAAGTTGGGAAGGAATGGGTTTATTCAGCTTACACTTCCACACTGCTGTTCATCACCAAGGAA
+GTCAGGACAGGAACTCACACAGGGTAGGAACTTGGAGGCAGGAGCTGATGCAGAAACCATGGAGAAGTGC
+TGCTTACTGCCTTGCTTCCCCTGGCTTGCTCAGCTTGCTTTCTTATAGAACCCAGGACTACCAGGCCAGG
+GATGGCACCACCCACAATGGGTCCTTCCCCCTTGATCACTAATTGAGAAAATGTCTTATGGCTGGATCTC
+ATGGAGGCATTTCCTCAAGGGAGGCTCCTTTCTCTGTGATAACTCCAGCTCCTGACAAGTTGACACACAA
+AGCCAGCCAGTAGAGTCAGCATAAAGGATTTTGATCTTACCAGATTAATAAACATTTCTCATCTATGAAT
+GTTTATCTTATAAGCTTTTCATTTTAATTAATCATTTATTTCACAGTGACCCGCACATCTGCCTATGAGT
+GCTTACTTCTTAACTGCTGTAACAGGCAGCAGCTGAGACTTGGCGTAAGGACAATGCCCAGCAGGGCCAT
+CCTCCAACATGTTCCAAGTTACAGGTAGGTCTCACAAGAAGTGGTTGACAAAGGCTAGAATCCATGATCT
+AGCCTCCATCAAAGGATGCTTCTTCAATGAATCTAGCTCCTTTGTGGGTGACCTGACAGCACCAATCACA
+CCCTGCTGCTCTGGGGCCTAGGGAAGTAGATCTGCCCATCTCCAACCATTCACTGGCAGCTGCACCTCTT
+AACTGCTTAGGTGAGCGAGGGCTTCTGGGTCAGACTACAACACCTCCACATGAAAAACCTCCTGTGGCCC
+ATTCTATGTTTTCTCAGGAAGCATGGTTATTTCCAGAGTCCTTACGCCAAACACTGGCACAAGCATCTCC
+AAAAGCAGATGTTCACAATTTCCCAAACGTTAACTTGCCAGACACTGCCAGATGTATCAACACAGCCAGA
+ATCTCCCCTCGATGAGCTGATGATTGAGTTTTTTGAAAATTCCTACAACGCAATCACCGACAGTAGGGAA
+TGCTTATAGTAAACCATCTTGAAAAGAGAGAAAGATGGCTTCCAGGTGAGGGTAGATGTAAAACCTGGTG
+ACTGTTCACCCTCTGCCCATCAGCCTAGCACCTCTCGGAAGCAAGACTCACAGGTGAGCATGGCCAACGG
+AGGGACTGCATGCACACTGCAGGATGACCACACTGCAGGATGATCACACTGCAGGATGACCACACAGCTT
+GATGATGACTGAGAATTCTGGTATAGAAGACACACCCACCTATAAGAACCCCAGGTGAACACTGCCAGGG
+CACATCATGATTTCTCTCTTTTTAAAAAGATTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGT
+GTGTGTATGTGTGTGTGTGTGTGTGTAAGCCCAAGCAGGCAGTAGAGAGCATCAGATTCCCTGGAGTTGT
+GATTATAGGCCCATTGTGAGCCACCATGTGGGTGGCGGGAACCAAATCCAGGCCCAAAACAAGAAAGGAA
+GTGCTCTGAAAAGTTCAGCCATATTTGCAGCCCCTGTGCGTGGTCATACATCCTGACTACCTGAGAGTCC
+ACCCCATGTTCCCATCACCTATTCATCCTTGTGAACCACAGAACAAAGACCACACAGCCACATTCATGTA
+GTCACTCCTGAGAATTCTAGGTAATACGACAGGACACAGAGCTAGTATGGATAACACTTTACTAGAGCTC
+ACACAAGACTGGGCCTTCCCACTCTGGAGATGGAGCCTGCATACTGGGGAGTATGGATCTTTGGGCTAAG
+CTGCTCAAAGTGCAGTGAAAGATTGTCTGGTTAGAAACATGGCAGAAAGAAACAAGGGAAGCAAAGGACT
+TTGCTGAGGACAGGGAGCTGACATTGGAGGCTTGAGTGAAAGGGGAGACAAGGAGTTCTTGTAGAGGGAA
+AGTTAATGAAAAGACAGAGGGAAAAAAATGGGAAGATTGATGAAGAGAAGTTACTGGAAGCTAGCTGTGG
+CCCGGTCAGTGTTCACGGTGACAGGCACACTGATAGATCTTCCCCACCAACACCGGCACTCCATGCATGG
+TCTTACTCATTGCCCCACGTGGCCTGTTTTAAGTACACAGGACTTGGAAGGAAGCGGCTGGTCTTCATAT
+AGGCAGAGATCTTCCTCAGGCCCTGCAGGTGAGAAGGAGAAACCAGGACCTCAGCTCTGCAAACCCACTC
+CAGGACCAGCCAAGAGACTTTAAAATCCAGAAGGAGAAAACCCACCCCACCACCCTGCAAAAGCAGCCCC
+ACCATCACACCAATCAGCTGAGGTAGCAGAGGTTAAGAAACTTCCTCAGTCTCCCATGGCTAAGAGGGTC
+AGCCCAGCACTGTGGCTGCCACTCTGCTCCCTGTGCACTCCCTCCTGCCTCCCATAGGGGGCACTGTCAC
+TCCTGGCTACAATGACATGCCTGTACTTAAAAGAAGCAAGGATCCAGAAAACCGAAAGGGTTGAGAACTG
+GCCTGTGTGGTTTGGAAGAGAGTCCAGGAGCCACCCAGTGACTCAGCAAACCCTGAGTGTATGCAGAGAA
+GGCCGAAGAGACTGCAGAGCATGAGTGGCCAGCTCCAGATGTTCACCAGAAGCTAACACGTCTCCACTTG
+AGCCCTAAGCACAATCCCACTGAGCCCCTACCAATCTCATCTCTAGCTCTGATGCAGTGGAGTGTGATAC
+AGGACACCTTGTCAGAGAGTGCACAGGTCATTTCCAAAGATCAAGAGAGAGCCAGAGCTACCAGTCCCCA
+ACAAGCCACAGCCATCTGTGTTCAGAACAAGGGCCTCTGCACTCCCCTCCTCAGCTATACCAGCTCGTGC
+ACACTCACAAGGCCACTCCATGTGGATCACAACCGCAGTGATGAGAGCTAACTAAAAAAAGACATGCATC
+TCTTAATGGCAAGTGGGAAAGGAGAAGTGCTTCTAGGCCGAGATCTGTTGCTATGGCAATGCCTTGAACA
+TACAAAAATGAAAAAAAGTACATCTTGTTGGTGTCAAACCAGAGAGTTCAGGGTGGATCATGGTTAACTG
+CGAGGTGAGAGCCAGGCAGCCATTAATATCACAGGCATCTCATGGTCTGTTGTGCCCCCCACCTACCGGC
+TTCACTCAGTCCTGGGATGGAGGGACAGTTTTACACGTCCAGCTGGGCCCGATACAGAGGGAAATTCCAA
+AGCATCTCAGTAAGGACAAAGTACGACCTCACTTCATAGGGGACACAATGGGAGCTGGAGGAATGGCGGG
+AGGAAAAAGCACACCCACCAAAAGAACTCAAGATGAAGTCTTCCACAGGAGCAGCCCTGAGTCACAGTAC
+TTCACAGGTCAGCCACAGGACTGACAACACCAATCAAATATTACCATGGAGTATGTCTTGGTCCTGCACA
+TCAGTCTGCCTTTCTCTCAGCCTAACGGTTGCAGGAACAAACACATTCTTCACATAATCTGCATACTGCA
+GGTCCTGGGCCGGAAACCCAAGCTGTTTAAAGGCCTGACTTTCTTTTTGGCACAAATCAGGAAGAGACAC
+TGAATGTTGCTCTTAGAAGCCAAAAGCAGTTGGGAGCCCAGAACAGCCTCTACTCGTGCTAGGATGCTGA
+GCAAGTAGCTTACCCTACACAGTCTCGTGTTCCTTATCTGTTCATCGTGACAGCTAATTCTACCCAGGTT
+GCCTGCGTGTGGCAATGACAAATTGACTCCGCAGATCTAGAAGGCTGAACATGGGTCCTGGCAGAAAGTC
+AGTGCTCAATAGATCAGAACTAGGGAAGCAACCTGGAAAGGGGGAAGGGAAGTAGGGCATCCAAGAGGAA
+GCAGGACCAGGGCATCACCTCAAAGCGGGCCATGAAGTCCTTCAGGTTTGGGAAGGCGTCCAGGCACGTG
+GGTTCAAACATTCGATGCTGATCAAGGACATCATAGACGAGGAAGTCTGCAAAGGTGATCTGCAGGAAGT
+GAGGCTCAGTGAGGTGGGATCCGGATGCCTGGGAAGAATGGCAGCTCCTCCTGCCCACCTGCTACCCTCT
+CTACCTTGTCCCCTGCAAACCATGGCTGCTTCCCCAGGAACTCCGAGTAGAGTTTCAGCTGATCTGGGAG
+GCCCTTCAAGAACTCTGGCTTCCGTTTCTCCTGAAGCAGAAAATAATAGCTGTCATAACCCTCATGCAGG
+CCCTGTGGGAGAAAGTGTGGCTTTGCCCAGCTGTGCAAAGGAACAGGCATCTTCCTAATTCTGGGCATGT
+GCCATTTAACGCTTCACCACTTTAATAATCAGACTCTCGATTGGTAGCAGGCTTCATCTCTCAGGGGTGC
+CTGGAATGGAAATGCCAATCTGTCCCTGAATCTGTCATGACTGATGTCTAGCTCAGCACTGCCTTCCAGG
+GTAGTGCCTTCCAGACAGGTTCTGAACAGAACAGTCCAGTGGAAGAAGGAAAGCCAAGTATTTAACAATC
+TCAAGAGAAGAGACTACGTGATTTCAAAGAGGGATCTCAAAGAATAGAATCAGTTGAACAGCATAGGGTG
+TTGAAAGAGACAGTGGTTCTCTAGCTGACCGTGAGCTCCAGCAAGAGGCCCCGAGTTTTGCTACCACTAA
+GAACTCAGCTCTGAGAAATCTGTTGGGTAGATTTCTAAATACCCATGACCTTTTTCCCCTCTGGTTTTCA
+TCTGTGAACCCCAGGGACAAGGTGGCTCATGGCAGTGGTCAAGGTCAAAGTTGAAAAGAATTGAACATAT
+GAATTTGCTGTGTAGGAAATGAATTTGCAGCCCTTGGTATCATAACAATACCGCAAAACTTCTTGGAAAC
+GGTATGTTGTACTTGTTAACAAAATGGGGGTGGGGCAAGAACTGTTGGCTGGATAGGAGAGCTTGATAAA
+TCAGTATGGGTGGGTTCACATTCCAAAGAGGACATGCATGGAACTGAACACAGGCCCTTCCTTGACTCCT
+GCTGCAAACCCTGTCTATACCACACAGTGCAGACTCACAAAGTCAGCGCTGTAGCAAAGCATGCCCATCT
+GAATTCGAGTGTCCATGACCTGTTTCTCCAAAATGTCCACACGGATCCTCTCCTCCTCTGTCTCTCCACC
+TGCAGCACACACAGCCCAGACTCACTCTCAGCAGCCCACCCTTGCCAAGACCAGGGTAGGTGGCCATGGT
+CCCCCACCCTGCAGCCAGCCCCACTCACACAGGTTGTGCTTCCGGCCAAGGTAGCGCAGGATGGCATTGC
+TCTGGGTGACCTTGTGTGACCCATCAATTAAGTAGGGCAGCTGGATGGACAAGCAGGGAAGGTCAGATGG
+GATATGAGGTCCCTCCATCCCTTCCCTGGGTGAGAGAAGACACAGGGCCTGGCACTCAGTGTGCCTGCCG
+TGGGAAGCCCTCCATGGGCACAGTGTGCACTGAATGAGAGAGCTGGGACACAGCCCATCACAGAACACTG
+TGCAGCCAGCAGGGATGGAGCAGGAGAGCAGAAACACTGATCCTAAACCCTCAGCCAGGCCAGGAGATGA
+GGTGAGATCACCATGTCTTCCCCAAACTTCCCCTTCACAGCACCTACATTGGGAAAGTCCAGGTCCAGCT
+TGAACTTGTCATTCAGCCACTGGCTTCTGTCATAGTCAGGAGCTGTGGTGGACAAGAGGATGTGAAAAGG
+ACGTCACGACGTCTAAGCCCCCTTCACAAGAAGTCAAAGGCAGGACCCCTTGTGCAAGGAATATGCAGTC
+TGACCCAGTAAGTCTTTCCAACCAAAGCTGCACACTTGAATAGCACAGCTATGTAGTGAGTAAAAGCCAA
+TCCAAGCCCAGGAGCTATTGAATGGCTATAGGCCTTTCAATGACTAACTAAGGCTTACAAAAGCACTAGG
+CCTGCAGTGCCTGTAGTGGGCAAACCTCACCCAATTACAGTCAGAATAGGGCCAGCATGTTACAACCATG
+ATTCTATATTGACAACCAAAGCTGCACCCTCAAGGAAGTTTCTGGAAGGGATAAGCTCTGGGTTTGCCTG
+AAGGAGATCCACAAAGCTAAGAAACAGAGAGATAGATAGAAGCTGCTGGGCACCAGTGGACAGAAGTTGG
+GGAAGAGTGTGACCAGAAGAATGTCATTACCGTCCCCCATGGCGTATCTCTTCTCTTCATAGCCTGTTTC
+TGTGTATTCCAGGAGCAGGCGGATGGCGTGACCCAGCTGTGAGAGATAGTGGGTAACATAAGTCCGTTAG
+TGAAATCCTTCACCACCTCTAGGGACGCCTTTACACCTTTCACTAAGAAGAGAGGAAGGGAGGCGGGGGA
+GAGGGAGAAAGGCAGGGACAGAGTTTGGACTGGGGGGTGTGGCTCTGACAGAGAGGGGGAGGGGGGGTCA
+AGGAAAACCCGATGGGAATCTTCTTCGTGGAACCAAGCCTGTAGTCCCGCTTTGTGTTCTGACCCTGGGC
+CCTTCCCGCCTAGCAGCCCAACCTCCCACTCACTCCACGGATATCCCAATAACCCAGAGTCACGGGCATG
+ATTGCTATTCCAGCCTCGAAGACTGTGCAGTCAGTCACTGGTCGGGAAAGACTCTGTAATGCTCAGCTGA
+TGAGTCTGAGAACAGGTCCGGAGCTGCTCTGCCCCTCCCCAAAGGAACCAACCGTGGAACCATAAAGCCC
+AACCCACTGTAACAAGTAAAAGCCAATCTAATTCCAGGAGCTATCATCCGAAGGCGGGTCTGAGCTTTCA
+GATACCGAAGGCAGTGATTGATTGGCAGGCCTGCTTCCATCTGCCCCCTGCTGGATTCTCAGGAAGTTGG
+CAGTATCCCTGGAGACAGGCCAGCTAGGACCTGTTCTGCCCATCTCCTTTCCTGGTAGATCAGTTTTTCA
+CACCTAGACTCTGGGAATCTGGGCTCAGATCTCAACATATGCATAAAAAGGCATTGCTTCAACCTGAGAG
+TCTGTTCTTGTACTCATGCTGGAAACAAGGGTTCTGCCAGTCTGACAAACTCCAGACTTCATTGTCCATA
+CAACCTGGAGGTACAGGAGCAGTGAAATGGGAATGACCCAGGACAGGGCCAAACAGAAAAGCAAGCAGTC
+TCTGTATTTCTCTTGGGGGATCTGAGGCTGTCTCAATTCCTTTCTTATTGCTGTGACAAACACCATGATC
+AAGGCAGCTTGCATGGCCATCATGGTGAGGAACACAGTAGCAGGGGAGCAGGCAGGCATGGGGCTAGAGC
+AGTAGCTGAGAGCTCGCATCTATCTGACTCACAAGCAGGAGGCAGGGGAGAGAGAGAAGGAGGGAGGGAT
+GGAGGGAGGGAAGGAGGGAGGGAGGTAGGAGAGGTAACTGGGAATTGCGTATGTTTTTGAAATCTCAAAG
+CCTACCCCAGTGACACACCTCCTCCAGCAAGGCCACACCTCCTAATCCTTCCCAAGCAGTTTCACCAACT
+GAGGACCAAGCACCCAACTGTATGAGCCTATAGGAGGTATTCTACAGGGTCATCAAGGTAGTGACTATAT
+AAATTAATAGGCACTTTGGCCATACTTGAGAGCCAAATCCAAGAATCAAACACAGAGAGGCCTTGTGAAA
+CTGAAAGAGTCCCACCACCATGGACAGAACCCCAAACTCAAAGGCACAGTAGCCTGGAGGGACCAGATAC
+CTCTCCTGATTCTAAGGGACAACAGAATGCTGTCCGGAAAGGACCTTGAAGGACTCTGCAGAGGACCAAG
+TCCCTTAGAGTCGAATTCTCAGCTATCTAACTCATTTCAGCCCTGGCCGCAGCTCCTGGCTTTTGTGTAT
+CTGGGCATAAGAGGGTTGCTCAATGTCCTTCATCATGGCACCTGTGGTTAAAACTCACAGACAAAATCCT
+ACCAATGTCAGAGAGCCCCGGTTCCCTTTAATCAGATTCTTAGCATCAGTGATAAATGAGTTTGAAAACA
+GACTTGAGGGGAAGGGGCATGGGAATGGCTCACTTGATAAGTATTTGCCAGACCAGTATTAAAATATCAG
+TACAACCCATGTGACAAAGCTTGCCGTGGTGGCAGGTGCTTCCCAGCGCTGGGGAGGTGAAGAAATACGG
+CTCCAGCAGGGTGAACTGCCTGGGGGCCAGTTCGGCTGACTCAACAGACTCCATGTTCAGTGAGAGAGCC
+GTCTCAAAAACTACGTACAAAGTGATTGATAAAGACACCAAATGTCAACCTCTAACCTCCACATACACAT
+GTGTACATGTGTGTTCACAGGAACACAGAGATATAACCCAGGGCTCAGCGACAGGGAGTCAAAATTCAGC
+TCAACGAGGGGGCAGAAGAATGAATGAGTAAGCACTTGATCATTGCTGTGAAGTTGGGAGTGATTGAGGC
+AAAGAGGGAGGACAGCGTGGAGCAGAGATCCTCTGGCTGCCCACAGGCTTCTGTTAGAAACTTGCTGATG
+GATCTGAACAGGCAGGACCAGGCACAGGCGAGCAGAGGTTAAGTCAAGGGTTCCATGCCCCGCTTTCATC
+TTGAACTACTCTTCCTCATTGGCTGAGCCTGTTTATTCCAGAGGCACAGAGAGGATCTGAACCAGCTGGG
+ATGAAGCCCACGACACAGGTCACAAGCAGTCGAGAGTTCTAGACACAGATTCTGCTCTCTGGGAGGTGAA
+GGGCCACTCTGGGGAGAAGTCAGTCTATCCTGACATTGGTGGGTCCTGAATAACAGTGTTCACTCAGTGC
+AGTTGTGTGTTATGAGACACTCTGGAGGCAGGATGTGTAGTAAACTGGATTGTAAGCAGCTTCGACGAGT
+TGGAAGGGGAGACACAGAGCCTGTGTAGACAAGCCACAGCCTGGCCCTGTCCAACAGGGGGCGTGAGATG
+GTGCAGAAAGGCTGGGCTAAGCAGGTTCTCAGCACCTGACTGGAGAGACTTCAGGATGTCTTTCTGGCTA
+CACAATGACTGATACTCAGTATTTTCTCTCTGGTAAGTTCAAGTGAAGGCCTCTGATCAGACCTTCCCCC
+TCGTCTAACTTGCACCCAGACCCTCACACATAGGCTGAGCTAAGTAAGGGGCAATGTTGCACAGGATGAC
+ATCATTCCAGGTGACCTTGGGCCTGATTATGTCACCAATAGGATGCTATTGGCCTTGACAGATGCCACTC
+TGTCCTGTGTGCATGATGGGAACTTCCTGCCTTGGCCTATTCTCTCTCTGTGTCTGTCTGTCTGTCTGTC
+TCCTTCCCATAATTGAGACACAGCTTCTTCTAAGGACAGGAGAGTAGGCAGGGAGCTTGTCTTCCCATCC
+CTGATAAAGGGCCCCTCTGAGTATCAAAGCCCTAAATGGAGGACTGCATCTAAAACAAGTGACCACTGTC
+CTATCTGTTGTGTACTAACAGGCCCTTTGTTCTTTCAGAAGCCCTTATTCAAAAATGTCCTCAAGCCTTT
+GCAAACACTTGTTTAAACCAGAAACACAAAATAACATAGTGCCTTTTATTCTAGTTATCTTAGTCTGGGT
+GATAGCAGCCAACCTTGGACCTGGTCCCTAGAGATGCAGCCCTTCCCCTCTACTTCCCCCACAGGGGATC
+TTAAAGGAACAGGCACTGCTCTCCCTAGAGGAGTCTGAAGCATCCTCCGTCAGACCCCACTGTGTTGTGC
+TGCCTGCCTGCCAATGGTTAACAAAACTCTTGCCTACAGTTAGCTAGACTCAGCACCCCACTTCTCCTCC
+TGTACCTAAAACCCCAACAAGGAGGAAGGAGGCTGGGACCCTGAAAGACTCCATGGACCAGAGATGGTCA
+GTCTCCTGGACTCTCCCCATGAGATGAAATGTGTCTAGATCAGTGATTCTTTGAGCTGAATCTCAGTCCA
+CTGGCCCTAGAAATAAGGGGGAAACTAGGCAAAGAAGTGTAATAAACAGTAAATAATGACAGACTGACAG
+AGCCAACTCCCATGCAAACAGGTATCTTTTTAAAAAGAAAGCCTGTCCCTTTTCCCAGGCTAGAGGCAGG
+AAACACAGTGAATGGAGGGAGCCCACTGGAGTCTTGCTACAGCAAATCGTGGTTTGTCTGCAGCACACAC
+GCTGCCTCTGGGCAAGGCCCTTGGGTGGATGTTTTCATCTCTTCTGAGCACAATGTTTAGGGATCTTTTT
+CATCTACAGTCACCATCTTATTTTGTCCTCTCCAGGACAGAAAGAAGGATGAAGTTGCTCTCAATCCCCT
+TCTTCTATAATCACGCTTAAATTTTCAAATCTAACATAGCTTTACCATAATTTGCCCTTTAATGGGAGCA
+TCTGAACTGGGGATAGGAATGGTAAACCACCTATTGCTATTTCCTTTCTTGCTTATTTGGTATATTTACT
+GTGGTAAATTTTGTGGCATCACATTTGACCTTGTGTCAATGCAGTATTACCTTGAGCATTAATGGTGGCA
+GTGCATTAATGGTGGCAGCTGACCCCACTCATCTTTCCCTTTTGTGACACTAGAGGCTTGTCAGTTTGTC
+TTTCCCTCCAGCCTCCAAAAGCCACTGCCTCTGTGATATCTGCTGTCCCCACATGTTGCAGCTTGAATAC
+GGAATGCCTCCCGGAGGACCGTACTCTGAACACTTGAGTAGAGGTGGTGGCTCCATCACTGGGTATGAGA
+ATGTGTGAGAGACTGAGACGTAGCTGCAGGAGGGGGATACCTGGGGCAGTTCCTTGCAGAGGTGTGGTCT
+ATGGTTCCTGTGTGTCACACTGTCTGCCTCTTCTCCGCCAAGAAGTGGAAAACAGCCTCTGTCATGTCCT
+GGCCATCATGATATTCTGCCTAAGCACATGGGGCCGAAGGACTGTGAGCCGAACTTCACAAAATGCCAGC
+CGAAATACATCTTTTCACCCTTAAGTTGTGTCCTGCTGGTATTTTATCAGAATGACGGGGAAAGTAGTTA
+ATATGAATAAAATTGTAAAGTGTGTGTGGATTGCTTGTTTGTTGTTTTGAGCAGTTTTAATGGCTCAAGT
+TTCATCTAAGTTTTATCATGGGTTAAAATAGCTCTCCTCTTTAACACTAAAAAATATTCTTCTGTGTTTC
+TACCCAGCGTTTTCTTTTTACATTCATTTGTAAATGGGTTCATAGGTTGCATCTCTGTATTATTCATCCG
+AATAATGCTTCTTTGAACATGGACACACAAATACATATTTGAGGAGCTAGGGAGATGCCTCAGTGGTTCA
+GAGCCCTTGCTGTTCCCAGGGGACCTCCATTTAATTCCCAGAATCCACATCTTCTGGCTCCAGGGCTCTG
+ATGCCCTCTACTGGTCTCTATGAGTAGAGACCTGCATGCGTGTGGCATATAAACACACACCCCAACACAT
+ATACATAAAAGTAAATCTTTTTAAAAAATACATATTTGAACTAGCCATGGTTGCACATACCTGTAACTCT
+AAGGATTCAGGAGGCTTAGGCAAGACTACAAGTCTGAGTCAGCCTGGGCTACCTAGATAATTCCAGGCCA
+GCTTGAAATAGAGAAACTATGTCTCAAAAAGCCAATATAGCTATTTAAGCCCACTATATATGGCACACAT
+AAATTATATATTATATGTGTGTATATGTTATATGTGTACCACAAGTGATAACTTTGCATTATTTTGAGTA
+TACACATAGAAAAGGAATTGATACATTAGTAATTCTATTGCATTATTTTGAGTATACACATAGAAAAGGA
+ATTGATACATTAGTAATTCTATTACTCATTTTATAAGGAACAGAGTGATTTTCACAGTAGCTGTAGCATT
+TTCCATTCCCAGAGACAACGCGAAACATTCCAATTCCTCCATACAATCACTATCTGTCATTTTCCCCTGA
+CGGTGACTAGTCATAAGTGTGTAAAGCAGATATTCAATGTGGTTTTTACATTTCTCTAATGATGAATGAC
+TTTTCAAACAACTTTTTTGTGTGACACAAAAAAGTAATGGTAATAAAAGCTATGGTAATATTTTCTCAAG
+AAAAAGAAAACTCCCTTTAGTCCTTTGCCCACGTATGTGTTGTTTGGTCTTCACTTTGCTGCAGTCACTG
+TGTGTGGAGCACATTCACTGGGACATGGACATTCATTGTATGTTAGCTATGACTTCTACAGATCCCCAGA
+AAAGACCAGGAAGGGCTGGGGAGGCGGCTCAGTCTGTAAAGCATTTACCTACCGTGCAAGCACAATGACC
+TGGGTGCAATTCCCAGCACCTCTGTAAAAGCCAGGCTAGGATGCATGTACCTGTCAGTTTCAGTGCTGAG
+GAGACAGGAAGAATCCAGGAGCTTGCTAGCCTGATAGTTTAGTTAAATAAGTAATGGCAGAGCTCCATGT
+GAGACAAAATCTCAAGGGAAAAAAAATGAAGGTAAAGGGTAGTTGAAGACATTTGATGTTGTCAACCTCT
+GATTTCCATATGCACATGCTCACACAGTCTCTCATTCTCTCTCTCCTCCTCTCCTTACTCTTCCTCCCTC
+CCTCTTTTATTCTCTATTAATATTTGTGTCTAGCTACTTAACTTATCACAGCTCCTTTTCCCTGTTATTT
+CATCTGTGCTTGTTAATGTTTAATACAGGTCAGCATAAAGGATTTTGATCTTACCAGATTAATAAACATT
+TCTCTTTGATGAACTCTTATCTTACAAACTTTGCCTTTTAATTAATCATTTATTTCACAGTGACCCACAC
+ATCTGCCTATGAGTGCTTACTTCTTAACTGCTGTAACAGGCAGCAGCTGAGACTTGGCGTAAGGACAATG
+CCCAGCAGGGCCATCCTCCAACATGTTCCAAGTTACAGGTAGGTTTCTCTCTGGTAAGCTCAAGTGGAGG
+CCTCTGATCAGACCTTCCCCCACGTCTAACTTGCACCCAGACCCTCACACATAGGCTGAGCTAAGTAAGG
+GGCAATGATGCACAGGATGACATCATTCCAGGTGACCTTGGGCCTGATTATGTCACCAATAGGATGCTAT
+TGGCCTTGACAGATGCCACTCTGTCCTGTGTGCATGATGGGAACTGACCCTAGGCATTCCCCGAGACCCT
+GATGTATGGAGAAGGAACACAGCTCCTTCTCTAAGACCAGGGATGTTCTTGGCTGAGCTGATAATGGCCT
+CGTGTTAGAGCCTTGAAGACTCTGTGGCTTCTGATCCTGAAAATCCAGGTTTTGCCACTGTACAGGGCTA
+TGGTTATCTGTCCCAAGGTCACTGTATGCTCAGATTATATTGTTCTTGAGACATTCTGTTTCCCTTTTGC
+AACATTGCTTCCTTAGCTTTTAGCTGACTACATCCCATTTCTCCACTGGAAATAGTATATAAGACGGCTA
+TATGTCTTCATGACTTTCCTCGGCATAAGACAAAGCTTGTGCAAGACCTGCTAGCCCCAGCCTTACCTTC
+ACTATTTCTATCTTTTCTATCTTTCTCATTTCCTGGCCACTTCCCACTTAGGACCCTGTCACTGAAGAAC
+AACTAGCTGGTTTAGGTCCATTACCTTTAGAAGCTAGAGGCAGTCAGGAGGAGCCCAGAACAGCTTCAGC
+GCATGCTAGGATGCTGAGCAAGTAGCTTACCCTACACAGTCTCAGGTCCCTATCTGTTCATCATGACAGC
+TAATTCTATCCAGGTTGCCTTCGTGTGGCAATGATAAATCGACTCCGCAGATCTAGGAGGCTGAACATGG
+GTCCTGGCAGAAAGTCAGTGTTCAATAGATCAAAACTAGGGAAGCAACCTGGAAAGTAGGGCATCCAAGA
+GGAAGCAGGACCAGGGCATCACCTCAAAGCGGGCCACGAAGTCCATCGGGTTTGGGAAGGCGTCCAGGCA
+CGTGGCCTCAAACATTCGATGCTGATCAAGGACATCATAGACGAGGAAGTCTGCAAAGGTGATCTGCAGG
+AAGTGAGGCTCAGTGAGGTGGGATCCGGATGCCTGGGAAGAATGGCAGCTCCTCCTGCCCACCTGCTACC
+CTCTCTACCTTGTCCCCTGCAAACCATGGCTGCTACCTAAGGAACTCAGAGCAGACCTTCAGCTGATCTG
+GGAGGCCCTATAAGAACTCTGGTTTTCACTTCTCCTAAGATAGAAAACAATTGTCACAACCCTTAGAACA
+CACAGGTGTTCTAGAGAAAGGCTGGCCTCATAGCGTTGTGCAGAAGAGAAGCCTCCTTTCTATACGGTGC
+TTGCACCAGTACATAGGTTTCTATGTAATGTGTTCCTGTTGTAATAATCAGAGTCCTGCTTGGTAGCAGA
+GTTGACTCAGAGGTGGTGACTGGAAGTTAGATGAGAATGCCTGTCCCCGAACCTGTCACCGCTGACATCC
+AAGCCAGGAATACATTTGGGTTGCTCATAAGGTCTGCACTTGAAAATCCAGTGGAACAGGGGAAGCAAGG
+TAGTTAACAAGAGACACAATTATTCTTGAAGAATGACACAGAGCAGGGTCAATATTGAGCAGCATGAGCA
+AAGTTGAAACACGTTTGATGTCGGAAGGGACAAATATCCTCAGGCTGACCATGAACTGCCAATCAGGAAG
+AGACCCTGATAATTTCTTCTACCAGGGACTACGCTCTGAGAAAACTGTTAGGTGGATTTCTGAATATATT
+AGGCTTATTTTATTTTCTTCTGGTTTGAATCTGTGCACTCAGGGAGCAGGCAGATGGTATGTGACTAACA
+AGGCAAGATGGTCCACTGTGGTGGTCAAGTTCAAAGCTGCAAAGAATTCTGAGTACATGAATGTATTATA
+TAGGAACCAAATGCCCAGTCACGGCTGCCACAACAATACCAAAGATTTCTAGGCATTGATATAAAGTATT
+TATTAATAAGAATTGGGTGTGTGTTGGCTGAATTGGGAAGCTTGAAGAATCAGAGCAATAAGTTCAGATA
+CTAAACAGGACAAGTGTAGATAATAACACAAGCCTTTAACTCCTCCAGCAAACTTCTGTCTGCCCCACAC
+AGTGAGGACTCACTAAATCAGGACTGTAGATCATGCTCATCTGCATGCCAGTGTTCATTGCCTGGTTCTC
+CAAAATCTACACACACATCCTCTCCTTCTCTATCTGCTCATCTGCAACACACACAGCACAGACTCACTCT
+CAGCATCCTGCCCCAGTCAAGCCAAGAGGGATGGCTTCTGTTCCTAACCCTGCAGCCAGCCTTAACCACA
+CAGGTCATCCTTATAGGCAGTGGGATACAGGATGTCATTGCTCTGGGTGATCTTGTGTGACCCATCAATT
+AAGTAGGACAGCTGGATAGACAAGCAGGGATGGTCAGATGGGATATGGGGTCTCAGCATCCCTTCCCTGA
+GTGAGAGCAGACACGGGGCCTGGCACTCAGTGTGCCTGCCATGGGAAGCTCTCCATGGGCACAGTGTGCA
+CTGAATGAGAGAGCTGGGACACAGCCCATCACAGAACACTGTGCAGCCAGCAGGGATGGAGCAGGAGAGC
+AGAAACACTGATCCTAAACCCTCAGCCAGACCAGGAGATGAGGTGAGATCACCGTGTCTTCCCCAAACCT
+CCCCTCCCCTGCACCTACATTGGGAAAATCCAGGCCCAGCTTGAAGTTTTCACTCAGCTGCTGGCTTCAT
+TCATAACAGCAGCTGTGATAGATAAGATAATGTGAAAAGCTCTCCTCCGTACATCCTTCCCTTCCCTGGG
+ATCCTCCCAAAATGTCAGGGCAAGTCCTCCTGAGGTATGGGAATCTGATCTGACTCATTCCATCTTACAT
+CCCACAATTTTACACGAGAATACTCCTTCAAAGGCTGTAGGCCCTACCTGACTAGGGTACACACAAAACA
+CAGGCAAGTGCTAAGCAAACCCTACATGACCACAGGTGGGGAGGGAGCCAGCGCTTCATTCCCAGGGTTA
+TTTATTGAAAACCAAAGCTGCACCTGCACCGCCTTGGAACTCTCAGGAAGGGATAAGCTCTGGGTTCGCC
+TGAAGGAGATCCACAAAGCTAAGAAACAGAGAGATAGGTAGACGTTTCCTGGCACCAGTGGACAGAAGTT
+GGGGAGGAGTGTGACCAGAAGAATGTCATTACCGTCCCCCATGGCGTATCTCTTCTCTTCATAGCCTGTT
+TCTGTGTATTCCAGGAGCAGGCGGATGGCGTGACCCAGCTGTGAAAGATAACAGGTAACAAAAGTCACAG
+TAAGATCATAAACGCATGACTTCATCTCCTCTAGGGAAGCCTCCATCTCTATACCTAACAAGAAAATGGG
+GACAGTCAGAGGCCTGCTGACAGGAAGAGACGGGAGACTCTAGAACATGCTGAAGAGGAGCTATCTCCTG
+GAACCAGCCTAACTCTTCAGTGTGTGCCAATCTAGGGCTCTTCCCACTCAGCAGCCCTACCTTCTACTCA
+CCTCACGGATATCCCAATAACCCAGAGACTTGGGCACGTTTTCTGCCCTAGCCTTGGAGACTGAGCAGAC
+AGGCTTCATTCCGTCTGGATGATTTCACTTTCAGCCGCCAGTGCATAGGGGGCGCTGTCTGCCTGCATGC
+AGTTCCTGAGCCCTCCCTATCTCACTCCCAAATGAACCAACCCTCATGGCCTGAAGCCCAACTTATCCAG
+CCGGTGATGAGATCCAGTCCAAACTTTACCATCTTCTGGGGCAGGTCTGGGTATTCAGATACCAAAGGCA
+GTGATTGGCAGGCCTGCTCTAGTGGCTCTCTGCCTACAGCCCCTCCCCCTGCTGGGTTCTCAGGAAGTTG
+GCAGTGCCCCCTGGAGATGGGCCAGATAGGCAGAGAATGAAGTGGTTCTCCTTCCTTTCATTGCTGGGCT
+CACTTTTCACACCTAGACTGTGAGAATTTGTCATCCAAGGAGGCTCTCCCCTCAATACAAGGACCAAAAA
+GTTATGGCCCCTAACCTGAGAGCTTAATCCAGCACGCATCACTGCTTCTGGGACTCTTCTAGTCTGAAAG
+CTATCAAAACTTCATTGTCTGCACAGCCCAGGGTTATGGGAATATGAAGTGACATGGGAATGACCCCAGG
+ACAGTGACAAACAGCAGAGCAAGCTGTCTCTGTGTTTCCTTTAGGGGATTTGGTTGGAGACTCATCCCTT
+TCATCATCAAGCCCATGGGCACATAAATCAGTCAGCGTTTTGGCCATGCTTGTGAGCTCAGTCCAAGACC
+CAGGCACAGAGGGCACAAGGACTTCAGGGTTTCCCAGCACCACTGACAGCGATTAGTCTGCGGTAGAAAC
+CTCACACTCACCAGCACAATGGAGGATGCGCAGAGCTCACAGTCAGAGTTAGCTTGGGTGGTAAAGCATG
+TGCTCAAGGGCACAGACAGGGCTGGCGAGGTGGCTCAGCGGGTAAGAGCACTGACTGCTCTTCTGAAGGT
+CCTGAGTTCAAATCCCAGTAACCACATGGTGGCTCACAACCACCCACAATGAGACGCCCTCTTCTGGTGT
+GCCTGGAGTCAGCAACAGTGTACTTGTGTATGATAATAAATACATCTTTAAAAAAAAAAAGAAAAAGAAA
+GAAAAAGGAAAAAAAAAGAACACAGATCTCTCCCCTGCTTCCAAGGGCTACAGAATCCTGTTTAGAGGAG
+ACATGGAGTATCTGCCTAGGTTCTAGGAAGCACAAGGCCTGTCCAGTTCCCTGGCATTAGCTATGTATGG
+CCCAGACTCCTGATGCATGCATGTCTAGGAATGAAAAAGGGTTTCAAATGCTCTACGTGACATTGTGGTA
+CCTGGGACTGGAGGTTAAAACTCAAATACCATATCTTACCAATGTTACTCTAGAGCCAAGTTTCTGTTGT
+TTAGGTGTTTAGTGTTTGTCTTGCTGACTGCTCTGTTGCTGAGAAGAGATCCTGTGACCATGGCAACTCT
+TAAAAGAAAAAGCATTTGATTGGAGCTTCTCACAGTTTCAGCGGTTTCGTCTATTTATTCAGGGAGCAAG
+GCAGCACACAGGCAGACAGGGTAGCTGAGAATCCTACATCTGGATTGGAAGGCAGCAGGAAGAAAGTACC
+CCTGGGGCTGGCTTAGGCTCTTGAGACTTCAAGCTGACCCCCACTAAAACACATCCTTCAACAAGGCCAT
+ACTGAATCCTTTCAAAGAGTATCCGTCCCTGGTGACTAAGCAGTCAATTCTGTGAGCCTATGGAGGCTAT
+TCTTATTTGAACCTCCACAGTATCACTGATAAATGAATTTTAAAACAAACTCAGCTGTAGGGGGTGGCGG
+TGCTGATGCTAAGGTCCTATTCCCCAATTGGTTCTTTTTTTTGTTTTTTTTTTTTTGTTGTTGTTGTTGT
+TGTTGTTTTGTTTTTCGAGACAGGGTTTCTCTGTGTAGCCCTGGCTGTCCTGGAACTCACTCTGTAGACT
+AGGCTGGCCTCGAACTCAGAAATCTGCCTGCCTCTGCCTCCCGAGTGCTGGGATTAAAGGCGTGCACCAC
+CACACCCGGCCCCAATTGGTTCTTGATCTATCAGTAAAGAAAGCTGGGGCCAACTGCTGAGCAGAAGGAA
+CAGGCAGAACTTCCGGGTCCCTGGAGGAGAGAGCAAGGAAGGAGAGGAAGAGTTTTTTTACCATGCTTTG
+GAGGGAGAAGAACCCGGCAGCCATGTGAGGTCTCTGGAGGAGCTGGAGCCTGTGGCCACTATTACAGGTG
+GGTGGTTAGGGATGTTTGGCAGGGGATAGGCGGAACTAGCCACTGACGTTTAGGGCAGGTGGGAGGAGCG
+GAGAAAAAAATATTAATAAGGGCACGCTTTTCCGGGTGGGAAATAGTAACACCCAGCAATTGTACAAGAA
+GGCAAGTGGAAAAGGAACAAAGTATGTGTGTGCCTCTTGTCTGAGGATTCAAGGGAAGCCCGGGGGGGGG
+GGGGGTGGTGGTAGCATGGTCACTTCCTGGAGCTAAAGGCAAGAAGAACGTAAAATTGCATACTTACACT
+CATGAAATGCTCTGTTGGTAACAACTTGCCAGGTGTTTTAGTTAGGGTTGCTCTTGCTGTGATAAAATAC
+AATGACCAAAAGCAACTTGAGGAAGGTTTGCATCAGCTCTCAGGTCACACTTTATCATGGAGGGAAGTCA
+GCGCAGGAACTGCAAGCAGGAACCCGGAGGCAGGAATTGAAGCAGAAGCCTGACACAGAAACACTGCTTA
+TTGTCTTGCTTGTCCTGGCTTTCTCAGTCTTCTTTCTTATACCACTCAGTGCTATCAGTGGATAGCACCC
+TCCTCCCCAGTGAGCTGGGTCCTCCCCCAGCAATCATGAAATAAATACCTTACAAATTTGCCTGATGGAG
+CCATTTTCTCACTTGGGTTCCTCTTCCCAAATAACTCTTACTTGTGTTAAGTTGGCAAAAACCTAACGAG
+CACATCAAACATGAGTTTGATTCCCTAGAACTGACTGAAAAGTAAGATATGGTAGGGTGCACCTATAATC
+CTAGTACTGGAGAGGTGGAGACAGGAGGCTTCCTGGATTGTCAGTCTAGCTGAACCAGTGAACTACAAAT
+TCAGTGACAGACCATGTCTTAAAATTAAGGTGGAGAGTAATTGAAGAAGACACCAGATTTCACACACATG
+TAAATACGTGTGTGTGTGTGTGTGTGTGTGCGCGCGCGCGCGCGGCGCGCTCTGATACCTTCATCTCATT
+TGGCATTTGAATTTTAAGGAGAAGCAATCGCAATGTCTCCCTTTCTTAAAGAAAAAAAGGGAAAGCAGAA
+GACTGATTGTGACAATTCAGTTTGTCTTCAAATGGCTTCTCAGTCTGCTAAGGCCAGCCTAGCTGCCTGA
+CACCAAGGACACTGTCCCTTGTACCAGTGTTCTGACTTAAAGTCAAAGGACCTTCTCTGAAGGTTCAAAA
+CCAAGAGGTGGAGGCAGGCAACTGGCCTGTGGTTGACTTTGCATGTCTAAGGAACGTGACTGGCATCAGC
+AGTAGAGAACCCGGGGAATAGGGGCCTCTTCAAAGGACAGAATAACTCTTCCAGGGGAGGAAAGGACTGA
+GCCAGAAGAAAGAGGTTGGAACTTAAAACTGAAGTGAGTTTGCAAGTCAAAGGACAAGGCTGCCACGCTG
+CAGCCCTTGGGCAGGACTTCAGATTGTGACCCAGTGCTCAGTAAATGTTTGACACCACAAAGATGTAGTA
+TAGGGAACAGGGGACAGGCAGAGACCATCTGAACACTCAAGTCAGAGAAGGTAGGTCTCTAAGAATGGAA
+GCACAGACTATGTCACAGGGGCTGGCTGAGTACACCACCTGTGGTAGAATCCACTGTGGCATTTGGATAT
+GACTATCCTGCCAGTCTCAGGTTTCTGGGCTGTGTCCAAACTCAGAAAGTAACTTGAAACTTTAAAGAGA
+ACTTCAGTGAAAGGTTGCCAAAAGCATGATAACAATAGAGTGTACACAGACACAAATTCACACACGCACA
+AACTCACACACATGAACACACATGCACACACACATTCATGCATGCTTGAATGAGCACACAAGCACACACA
+AACGTACCTACACACAAACGTACACACTCACTAACTCACACTTATGAACACAATACACACAGCACACAAA
+CACATGCATGCATGCTCAAATGTACACACAAACACATAAACACACACACACAAATGCATACACACAAACA
+CACAAGCACACACACACATACATACATTTCTCTGAACTGTTCTAGTCTCATATACCTTAAGCATTTGTCA
+TTCTAAAGTTTACACACCTTAGGTCAGGTGAGGGTAACACATGCCTTTGGTCCCAGCACTTGGGAGGCAG
+AGGCAGGCCTTATCTCTGAGTTTCAGGACAGCCAGGGATACACAGAGAAACTCTGTCTCAAAATCAAAAC
+AAAAATTTACATACCTTGAACTTAGAAAAAAGAAATTACTGAATTTAACTGAAAAAATATTTATTTAATG
+TTATGAATAATTAAAATAACTGATTTTAAAATAATTTTTCTTTATTTTTAAATCAAATTTTAAATTTTTA
+ATTACATTAAATTTTTAATTAATAAAAACTTTTTTAGTCAGGAACTCACTGTCAAAGAAGAATTTTTTGA
+AAGACTTAATTTTTATTTAAATAAAAACTCTTAAGTGGTTGCCACAGGTGGAGCAATGAAAACAAATTTT
+TCAAATGAAGGAGGACAAATGACTTCAAATTTGAAACCATTTTTTTGTATATACATTAGTGTTGAAATGA
+AGTGGTTCAAATATCGGAATGATGAATCAAGGGTTAAAAGAACTTAGGATTTGCCAGACAGGGGTGTGGG
+TGGTGGGTGTGTGGAACACACCTTTAATCCCAGCACTCAAGAGGCAGATCCAGGCAGATCTCTTGAGTTT
+GAGACCAGCCTGGTCTACAGAACAAGTTTCAGCACGGCCAAGGCTACACAGAAACCCTGTCTCAAAAAAA
+CAAGATAACAACACCTAAAAGAGATCAGGGATTTGAGTTGGCTCACCCCATCTATGAACTGCTGTAGCTT
+ATGAAGGCGCTAGTCCTGCAGGAACAGAGCTGCAGGACCTCGGTGACACCCACCGAGCAGCAACAGGATG
+GATATCTGAGTACCGATGTGTAACAGAAGCCAGAGTACTCAAGCAGGCCAATGACTAACATTAGCTTCCA
+GTCGGTGCCACAGGGCTCCTCTCAGGGACCCAGCAAGGCTTACTCATCACCTGCCGCCAGCATCCAGTAT
+CCTGGTGTGGGAGCGCACCTCTCCCCTTCCACATGTTCCAGGCTGGCCCCTCTCCCTCTTTCTGACCTTT
+CTGTCCTTTTCTGTTCTCTGTTCTGCCTTCTGTGCCCTCATAGCTCTGCTCTGATTTGTCTGCCTCGACC
+CCTTCTCTTCCTCCCCTTTCCCCAGATAAACCTCCTTTATACCAGGTCTGTCAGTCACATGGTAAGATTT
+CTCAGAGGGCACTTGGCATGGGCCCTCCAGTACCCTTGCCTGCTGCCACATTATATTTTATAAAAGCAAT
+GAACATTTGCAAGTAAAACTGTTTGGGCAAAGAGGTATATACCGATTGACACATCATGACACCCTATAGC
+TTCCACTGTGAGATGTTTTTGCTTTGTTTTGTTTTGTTGCTTTTGTTTTGTTTTGTTTTGTTTGGGGAAG
+AGGGTAACACAAAAGGACCAGGAGATGAGCGTGTTTGGGGTACATGATGTGAAATTCACAAAGGATCAAT
+AAAAAGTAAAATAAAATAAAATAGAAATAGAAATAAGGGTCCAACTATAGTTATGAAGCAGGAAGAAGAA
+AAGTCAATGAGATGAAAGGTTGTGTGTTCTCAAGAACTGGGTTCATGAAATTTCAAAGCCTCCCCAAACC
+TAGAGTTTTCCACCCACTGTCACCAATACGCACAGGTGTGGCATACACAGCCACCTGGGAGCAGTGGTGA
+GAATGGCTGTAATCACTGTCCTTCCTGCCCTGATCCCCTACACATGTGCTATCACAAAAACATATGGTGC
+TTGGTCCTTGTCACATCTGTGCTGGGACACAGTGTGCCAGGGAGAGAGTCAGAAGAAAGAAATGAGAGGG
+GGCAAGAAAGGGCTTGTGGAAACGACAGGGCTTGTGGAAAAGAGCGCTGGCTGCTTGGGAGCTCTGTGTA
+CTAGTTTTTGAAACATTTCTCAAGGTCTGGCACTGTAACAACAGCCCAAAAAAAAAAAAAAAAAAAGGTA
+AGTTGTGAAAAATGGAATATATTTATCACATGGTCCAGGACATACACACACACACACACACAAAACTATT
+GAAATAAACTTCCAACAAGAAAGACTCATCTTGTCTGGCATGGTATCTTATGCACTTGGAAGCAGAGGCA
+GGAGGATTGATGTAAGTTCCAGCCAAGCACATATAGTCAGTTATATCAAGCAAAACTTGTCTCCTAAAAA
+TAAAGACAAAAAAGAAGGGGGAGGAGGAGGAGGGGGAGGAGGAGGAGGAGGAAGAGGAGAAAACAGATGA
+CAGATTCATCTTTTATGAATAAGTTAAATTTAAATTTTAGGTTTTAGAAGGTCCAAACGCAGTCACAACC
+AAGTGAGGGGACCTAGGGTTACCTGGTCTGTGAACCAGGAATAACTCAGAAGCCACAGCTGGAGAAGCTC
+AGCCTTAGAAACGGGTGAGAGGCAGAGCAGAGCAGTTGCAGGCCTGGAAACAGCTGGGAGAAGGGGTAGT
+GGGGAGGGGCTAACAGCAGCAGTCTTGCCCTGGGGAGACCCTGGGGGCATCGGGCTGCACATTCCCACAG
+GGAGTGAGAGACTTGGGCAGAGACCTGACTCTACTAGGTGGAGAGACTAGTCAGCCTGGTCCTCCAGGAT
+ACAGGGTAGGACAGATGTGACTCTCAGAAAACCAACCCCTCTGACTGCCCCACGCCAGACCCCTGCAGCC
+CTCAGAAGGTGAGCATGTAGATTATGTGGTAAATTGCCATATGTTGTTTGAGCTCACTGAGAAGCTGACT
+CTGAAAGATCCAAGCATGTTTGTCTCATATCACAGAGCTTTGGGGGGCAGGGGATGGGTGTGCTGCTTGA
+AGTTTAATGATAAAAATAAATAAAATATTTTTAAAGTAAAGAACAGCTATGCAGAGACATCTGTATAGTA
+TAAGGCTGTTGGCCTGTGTGCCGGGGCTGTTCCTCAGCTAGCCCATCTAAGCTTAACTTTGCCATTGCTT
+CCCGTTTGCCTTTTCTTCTTGGGCAGGTAATGGAGCGACAAGCATTGACTCATCCTGCTAGCCTGGCACT
+GCTCACCCAGCAACTAAACACTGAGAACACAGGGACCCACTTCCATAGCCAGTTGATCCAATACGTGTGG
+GCTATCCCAACAGTCTCAGTTTCCCCCAAGGACTCCTCCGGGGATGGCACCCCATCTCTGCTACAGGAGG
+GAAGGAAAGAGCAAAACAGCCCTAGAGTAAAGACACACTTGGGAAATGCTAGAAGGTGAACCATGTGCAA
+CAGAGGATGCCATTGTGTAATCCCAACATGTAAAAAGGAGAGGCTGGAAGCTCAGCCAGAGGAGGCCATC
+CAAAATGAGAAAGTTCAATCCGGAACTTCAGTTGGCTGTCTCAGTTTTCCTAGCCACTTTTTCCCTGGAG
+TTGTGAGCCCTTGATTTTCTGACATCATCTCTGTTTGCTGCTATTGTTCCTGTGCCTAGAAGCCTCATCC
+AAGGGGTCAATAGCTTTTCCTCAGGGGGCTTCTGATAGATATTTTGAGTGACCAATTTGATGATATTCTG
+CTGTCTATCTATCTACACCTTGCTACTCTTTCAACTTGGCTTACTACGTAAGTGATAAATTCACTCATTC
+AAAACAAAGTACAAGCCGTAGATACACACGCACCACTCTTACAGAGGACCTGAATTTGGTCCCCAAAATT
+CACATGAGCAAGTTCACAACTGCTTTTAACTGCAGATCCCAGAGGATCTGACACCTTTGGCCCCATGCAA
+CCACGTGCAAGTATCCACAAACACACAACCGCGGACATCCATATAAATAAAAATAGGGCTGTGTGTGGAG
+GCACACTCTAAATGCCAAGGCTTGGGAGGCAGAGGCAGGTGGATCTCTCCGAGTTCCCATGTTTCCACTG
+CTAGAACAAGGGGGCTAAACATCTTCTGTACTTAAGTCTGCAATTACAGGATTAACACCTAAAGATATTT
+GGATGTCTCAGGTATTTTCTCTTCTGCTTTTTCTTGTTTACAGGGAGTGTTCAGTGTTCAAGGTCATTGG
+CATCTTGGCAGGTCCTTCATGGAATGTAAAAACTCTCTCTTACTTTCAGAGTATGTGAGACCACAGGGAG
+GAAAGACAGGAAAAATCCAAGTGATATGAAGTTTTCCAAGACTTGCAGGAGGAACAAAGTTTGGAAATTT
+GACTATACATGTACAGATAATCTTAATTTACTACACTGTGCCACATCCATACTGTGAATTAAATTCTTGT
+TAAAATTAGAAATTGAAAAGGATATAAATATGAAAATAAATATATTTTTGTTAAAGAGGGTAAATAAAAA
+TATCCGGTATTAGGTTGGATTTATATAATAAAAAAAGATTTTTGTGCTAAAAATAAAAGTCGAAGATGAT
+TTTTTTAACTTTAAATCAAGTTATAAATAGCCTGAAGAAGGCACACAGAGCCTGTGAGTGCTAAGTCTTA
+ATGATGATATATGTTTGATGACTAAAAGTTATTAACTACTGAAGTCAAAGTTCAGTATACTAGTATCAAA
+ATTATCTTTAAATCAATAGAGTTCTTCTAGGATATCTGTCTACTGTTCTATTGCTCTACTAAAATTTTTC
+TCATCAATATAAGGAGGAAAACTGAGTCTTAACAAAATCAAGACTTATTTAAAACTGGTGGGGTTTTTTT
+TCTTGTTGTTTTTTTTCATGAGCCCTATCAAAACATTACATGGAAGTTTAAATTACACAATTGGCTATGT
+TGTCCTTCTGGGTGTTCAACAAAAAAAGTTGGGAGGTCTACACCAATAATGGTAAGGGAGTCATTAAGGA
+CTGCTAAGGTTTTGACCCTTACCCTCCAAGATCAGCCAGAGCCCATGAGACAGGATCAGTAGAATGTGTA
+GACATATTATTTTGGATTCTGTGCAGATGTGCATATTATTTCTAATGGAAAGGGACTCCTGTGATTTGCA
+TCCCATCATCTGCTGACTGCAAGAGACCCCACACAGCCCGTGGTGCAGTCAGCCCTGACTGGACAGGGAG
+CACAGGGAAGCCTCCAGGTAAAGCACGAGAAGAGCATGCATGTGACTAGGCCAGCAGATGATGGGTACCT
+GGGCATGACAGAATGAACTCACACATCATCTGGCTTTGGTTCCACCTCTGTGCTTGTGAATACTGCCTAA
+CCCCACTGGTGAGGCTGGTCTTCTATACTCAGTCTACTGAGACAAAGTACAAGTCTTCTGGAATTATCGC
+CCTAGCCACACCAACATACAACATTTTATAAGCTACCTGGGTAGCTCTCAGTGCAATCCATTTGACACTT
+CTCTGGAGAGCAGATGAAGTCTGGGTGAATGGATATTGTCAATAGGAACATGCCTAGATCAAGAACCTCA
+ATGCCTCAGACCCTTGGGATGACAAGGCCCAAGGACAGATATGGACACTGTGTGCAGAAAGTACTGACCA
+CAGCTTCTCCCTCCAGGATGACTGAACCCAAGACTGGTCCCCTATAGATCCCTACTGAGATTAGCTGGCA
+CACCTCCTTGTCCACAGGTATATAATAAATCTGCTTTCCTGTTCAGCTGTACCCACTGACTTTCTGGGCA
+ACTCCTGCATCTCCTTCAAAGTACTTGCATGGCTCACAGATCCCTGTTTCTGTACTTGTTTCAGTGTTTC
+TTCATGTCCAGTTACCCTAGTCAGGTTAGTCCCAAAGCCGTGCAGGTCTTGGCAATCAACCATTCCGCTG
+TATTCAAACCCTGAATACAATAGCAGCTAATTACATCATGTCTGCAGAAAGCTAGGGTTTGATAGTCACA
+GACTCATCTTTGAACAGCTCTCCGAGAACAGGGCAGGGTGAAGAGACCATGAGGGGCTCCTCCTGGGTGC
+TCTCACCATCAGCTTCACGACTCAGTGAGACCCACAACTCTTCACAGTTCCAAGTCCCAGAATGCTGCAG
+TGAGTGAGATTCCCTCTCTATCCTCTAGAGAGCTTTATCTCTGCTGTGAGGATCTCAGACTAGAGCCAAG
+ATATGTCAAAGCTCTTCTGGTTCCATTAGATGTTTCAGAGCCCGGCACTACAGGCTCAGAGCTCAGACTC
+CTGGGAAGCAGCTGGAATGCTTAGCAAGGCTAAAGAGCAAAAGAAAAAGACACAACTCACAGCAAGTGTT
+CAGTGTTTATTGTGTTTCAAGTGTGTTTCAGGCTTTATTGTACATGAGACAGGCCTGGGGCAGCTCCTTA
+AACAGGAACAGGCTGGCACTCGAGTATTGACCTTCAGGGTAACTCTAGGGAGGGCCGTCACTAAGATAGT
+ATTGACCATCAGGGTATCTCTAGGGAGGGCTGGCACTAAGATAGTGTTGACCATTGGGGTAACTCTAGGG
+AGCCTGAGTTCAGGACAGACCTCAGATTGCAGAAAAGGGCTGTGAGGTTGGGTCTGGGCACCAATGAAGA
+AGGGGCCATGTGAAAAAAAAATGGGGAGAATAAAGGCTGCATGGGCTTGACTGGGGTGAGGATGAGGAGA
+TGGGGCTGACCAAGCTGCAGAAGGGAGCGGGAAGGAGAGAGAACCGGGAGCCACAGTGCAGAAGGCCAGG
+GTGCTGTCCCCACCCAGAGCCTGCAGGATCCGCAGTATGGACAGGTCCCCCACTCTGAGTGACAGCATGG
+CAGGGGCCTAATCAGTGCCCCACTGGGCTATCTTAGTAAACACAGGTCTTGGGAGGAAGCGGCTACTCTT
+CATGTAGGCAGAGATCTTCTTGAGGCCCTGCACAGTGGGAAGGACACAGCAGAAGCTCAGGTCTGCAGCC
+CACACAGAGGAGCAGCCCCAGGTCAGGAACTGCATTTCTCAAAGGCTCAAGCTCTGGACCACAGCACAGC
+CTGCTAAGCTCCCAACCCTGGCAAGAGAACTCTTCAGTTTCCTATAGTGAGAGACTAGACCCTGTGAAAG
+TACTTTATACCCACTAAAGGTCACAATACAGGCTCAGAAAATACTATAACCAGTTATTAGAAAGCAACCC
+GAGGTCCAGAGACCTCAACTGCCCCTTCTTACAGGGGTAGTGATTGAGACAGAGGTGAAGTCAGCTTGTC
+TCTGAAGCTGCCATTCTCCCCCTCCCCCTTCCACCCTCCTCTTTCTCCCTCTTCCTTGTCCTCTCTCTAT
+TCCCCTCTCCCTCTCCTTCTCCCTTGTTCTCTCTCTATCCCCCTCTCCTTCTCCCTCTCCCTCCCTCCTC
+CCCTCCCCTCCCCCTGTCCCCTATGAGACAGTCTACTGAGAAACTGTATTTACCAACAGAGAAGGGAGAA
+AAAGAAACCAGGAAGACAGGCAGAGCCCAGTGACTCAGCAAATCCTCAATGAAAGCCATCAAGAGAACAG
+GCCACAAAGAGAAGCTGAACAGGATTCTAATCAATTTCCTGAGAGCAGAGTTAACAATTCTTCCCGCGGT
+TCCCAAGTATAATCTCACCACACCCTGGCAGTACCTCTCTCCACCCCTAACACAGGGACCACTCTGTGTG
+ACTACATGTCATGTTATAAGCCATACATCAAGGTGGAGAATACGCAGACGATTCCAGAGGGTGCAAAGCG
+ACCATACACATGCTGGGCTAAGGTGCCACTCTCTTTCTCCAGCCACCTACCCTGGAGCCATGCCAGCTCC
+TGCACAGTCCCCAAGGGTACCCAGCCTGGACCAGAGCCAGAGCCAATGGGAACTAATAGTGTATCTTTGA
+TGATTAGTAAGCAAGGTCAACAGATGGAAGGTAGAAAAGCCTTTGGACCTACACTAATCTGATTCCCAGA
+TTTGTGTAGATGACCCAAGATTGTTGTGTACTGCGGTACAGGCTGGTTTCAGCAACCAAGAGGAATACCA
+CATGCATCTCACAGACTGGGTTTCTACGGCCTCTGCTGACTTAGTCCCAGGACTGCGGCATTATGGATGC
+CCCTCACATACACACACACATTTTGAGATTTTGAAGTGATAAATATTGCTGGATACATTTACAGAAATCT
+CAAAAAAACAAAAAACCAACAACAACAACAACAAACAAACAAACAAACAAACAAAAAACAGAACAAAACA
+AAAAAAAATCCAAAAAACAAAAACAAAAAAACTAAGTTGTTTTTCCTTTGGTCACAGAACTAGAAAGGCA
+CTGTGAAAGGTGTTGGGATTGAGACCTTGGAAAAACAGCTAAAGTGCATATTTTACATATCAAAGCAGAC
+CTGGCCTCCAGGTTCTCCCAGCATCCTTCAGTCCCTACCTGGCATACCCTGACCTCTACCCTGAACTTCC
+CAGCCTAGGGGTTGGGCTGACCTTCTCTCAGAGTCTCTTCCCATGCATTTTCTCTCTTTCTCTTCCCCTT
+CTGCTCTATCTCCCTTTCCATGGTGACATCACTGGCCTCATTCCTTGGGATCAGGGAACTCACCAGCCTG
+AGAGTGTTTTCCCGATAAACCTGCTTTTATATGCTCTAATCTGGTTTGAATTGGCTCATTTCACTGGCAG
+AAAGATAACCTATCAGAAGAAAGTGAGAAAGAAGACAACCCAAGGGGAAGCAGGATCAGAGCATCACCTC
+GAAGCGGGCCAGGAAGTCCCTCAGGTTTGGGAAGGCGTCCAGGCACTTGGGCTCAAACATACGATACTGG
+TCAAGAATGTCATAAGCAAGGAAATCCACATAGGTGACCTGCAGGAAGCCAAGGGTGAGCGCTCTGGAAT
+CTGACACCAGGGAAGGATGGCAACCCCTCCTCCCCACCCCTCCCTTTACCTTGTCCCCTGCAAACCATGG
+CCTCTTGCCCAGGAACTCAGAGTAGAGCTTCATTTTCTCAGGGATGGCCTTCAAGAACTCTGGCTTCTGC
+TTCTCCTGAGGCAGAGAACAGCTGGCTGTCACCACCTTTAGTCATAGGCTCCCTGGCAGAGAGGCTGCTG
+GCCTCGAAGGACTAGGCTACTGAGTCTCATCACCATTTTCTTAGTAGCAGGCCTCATCTATCAGAGGTGA
+TTGGAAGACAGACAGGAGTGCTACACTGTCCTTAGATCGGTCACCACTGGTACTTAGCTGCTATGAGTCA
+CACCCAGGTCTTCCTAAGTATCATTGTGCATGTTATGGTCCCAGAGATCCAATGGAACCACAGAAACAAA
+GGACCTAAAGACCCCAGGAGAAGACAAAGTTATACTGGCTTTGGGGCGGGGGCAGGGAAAATGGATGGAT
+AAGAACAATTTAGATATTTGAACGGCATGCAACTAATATTTGGAGAGCAGTCACTAATATTGAAAAGAAG
+AAAGGTCAAAGATCTTCTGGCTGACCCTGAGCTGCCTACTAGGAAGGTGTCCTGTCTTCTACTGGAGCCA
+AAGAGGGCATGCTTGGGAAGTGCTGGGTACATGTGTGAGTCCGTTGTGCACGTATGTATGTAGGCGTGCT
+TACCTGTGTGTGCATTTGTGGAAGCTAAAGCCTGCTACCCAGTGTCTTTCTCAATCACTCTCTACCTTAC
+ATTTTTGTGACAGTGTCTGTCACTGAACCTGGAGTAGAGAGACTGGCTAGATTCATTGGCTAGCAAGCAC
+AGAAATCCTCCTGTTTCTTTTCCCATGGTGCTCCAAGGTACTCCCTGGTTGGTTTGTTTTTTTAATGTGG
+GATACCGAGGATCCACAATCAAGTCCCCATGCTTGCACATCAAAGACTTCTACCCTCTGAACCATCTCCC
+CAGCCTCTACTTCATCTTTTGAGTCTGCGTTTTAAGAAGAAAATTGTTAGTGTGATCACTAGCTTAGATG
+ACACATGGAAGTGGTTACAGGACATACACATCAGGGATTTTAACTCTGCATTAAATGTCTAGGTCAGACT
+GTTCACTCTAGAATCTCAACTGTATCTAGGATGCTTAGACAAGTAATAATAGTTTCAGTGTTGATGTGAG
+AGATCACTATGGTAACGAAGGTCGTTCTGTTGGTAAGATCCGAAAGCTTGGAGAATCAGTAACATGGCTC
+CTTACTGAAGGAGATGTTACAGATCTGAATGCAAGACTTTAACTCCTGCTTCAAACCCCGTCTTCTCCAC
+CTACTGGCCACTCACAAAATCGGGACTGCAGCAGACTATCATGAGCTGTATGCGGGTGTCCATAACTTGG
+TTCTCCAAAGTATCCACACGGATCCTCTCCTCCTCTGTCTCTCCACCTGCAGCACACACAGAACACTCAG
+CATCCCAACCCAGCCAAGCCAAGGAGATTGCCTGATATGCCCTACCCTGCAGGCAACCCCACTCACACAG
+GTTGTGCTTCCGGCCAAGGTAGCGCAGGATGGCATTGCTCTGGGTGACCTTGTGTGACCCATCAATTAAG
+TAGGGCAGCTGGATGGACAAGCAGGGAAGGTCAGATGGGATATGAGGTCCCTCCATCCCTTCCCTGGGTG
+AGAGCAGACACAGGGCCTGGCACTCAGTGTGCCTGCCATGGGAAGCCCTCCATGGGCACAGTGTGCATTG
+AATGAGAGAGCTGGGACACAGCCCATCACAGAAACACTGTGCAGCCAACAGGGATGGAGCAGGAGAGCAG
+AAACACTGATCCTAAACCCTCAGCCAGGCCAGGAGATGAGGTGAGATCACCACGTCTTCCCCAAACCTCC
+CTTTCCCTGCACTTACATTGGGAAAATCCAGGCCAAGATTGAATTTCTCACTCAGCCACTGGCTTCGGTC
+AAAGTTGGGGGCTGTGGTGGACAAGAGGATGTGAAAGGATATCAGTGTGGCTGCGTCTGACACTCCAATC
+TCACTGACCCACCTAAGAAGTCACGAGCAAGATACCCTGGGGAAGGAGAACTGTAACATCTTGCACTAAG
+TCATTCTCAAGATAAAGAAGAGGGGTGATAAAGAAGGGGACCCTAAGCCCAATGGGACCCCAAAAATGAA
+CCCAGCTCAATTATAAATACAGGTGGGCCCGGAACTCCAGTCTCATTATTGTTGTTTGAGGCAAGATCTC
+TCAATTATGTAACCAAGGCTTTCCAGAAACTATATGTATAGACCTGACTGCCCTCAAACTCAGAGATCCA
+CCTGTCTCTCTGACTTTCTAGAGCTGGATTTAAAAGCAAGCTCCCACACATTGCCATTCCCAGTGCATAG
+AAAACCAAACTGCACCCTCAAGAGGTTCCTGGAAGGAGCAGGCTCCTTCCTCCTCCAGTTGTAGGAGGTC
+TGCAGAGCTGAGGAATTAGGGGCTGGGTACGGGCATCCCTGGGTAAGAGGAAAGCAGGTGTCTGACCAGG
+AAGGTGTCATTACCATCCCCCATGACATATCTCTTCTCCTCATAGCTTGAATCTGTGTATTCCAGGAGCA
+AGCGGATGGAGTGAGTCAGCTGGAAGAGAAGGTCACACAAGTTAGAAATGGTAGGACCTTTACTGGATGA
+CTTCATCTTCGCAGATGAATCCTCTCACCCTCCACAAACACAGCCACCCTGCGGTGTGTACACATGCACT
+CATAGGTGCCCGCCCCTTCACACCTGCAGCCTCCCAAGTTTGTGAGCTCCAGGGTGCTAAGAAAAGTATA
+CATCTGAGAGCCCGGATCACTTTGAACTTCCCGCCTGAGTCCTAGTCCCACCCTCCAGGCAGATCCCCCT
+CTCACTCCGCGGGTGTTCCAATAGCCCAGTGTCATAGGCATGGCGCTGATGCTGTGGTCTTCTCAGACTG
+GCTTCAGGTAGGTTGAGCTGGAGAATTTTAAGGAGGTGCAAAGGGAAAGGTGGGTGGGGCCTTGTAAACG
+GTCCCGCCCTCAATGCCCTAACCCCACTCAGGCACTGCCTTTCCCTTCAATTGTCCTCAGAGACTTTGAA
+CCCGAAGCCCTTTGGCTTCACCCACAGCCCGGGAGAGGGAGAAGTAGGATCCTATGCACCTTCAGGCCTT
+GGATTCTGAGCAGGTTCACATGAAGGACCTCCTCCCCCCTCAGCCTCTTTAGTGCTTCCTGTCACCGCCC
+CCTAATGGGTGCCCCCAAACTTGAAAATGCAGCTTTCCATTCCTAGCAGAACTTAGGTTTTCACCCCTAT
+ACTGTGGAAATCTCTAGGAGAGAGTCCCAGATCTCAACAGAAGATAAAGTAGGGTGCCCCTCGGGCAAAA
+GTTTAATTTAGCATTGACAGCTGCCAAGCTCTATTACCCACCACAGCCTTATTATTCACCTAGAATCATA
+GGACAGTCCATGAGATAGGAAGGACTCCAGAAGAGGAACAATCAGAAAACAAGTACTCCCAGTGTCTTCC
+TTTAGGAAATTTGGAAACTTCATCATTTGCTCATCAAGCTAGTGACCATATAAGCCAGTCACTATTTTTG
+GCCATGTTTGTAAGCTAAATCCAAGAACCAGATACCTAGGGCACAAGAGATCTAAGACACCATCACAGTG
+CAGGGTACCTGGCTGGCAGAACTTCCCATGGGTTGCACTGTGCAAGGGAGCAAAGAATGCACAGCTGGTG
+TGTGCCATAGAGGGGGAGAGCATGTGCCCTAAGAGGCTTGCTTTCAAGGGACAACAGGATGCTGTACACA
+CGGGATCCTGTGTACACACTTTGCACGGGACACAACGAGCCCAAAGCTAGACCCTCAGCTGTCCACATCC
+CTAGATGCTAGACCTGGCCCAAACCCCGGACATATGTTTGTATATGACAGGGTTGCTCAAATGATCTTTC
+AGTTACCCTGAAATATATGCCCTGTAGTCCAACCTTCACAAGGTAAGTGTCCTACCAAGAACACTGTCCT
+ACCTGTCTGCTTTCTTCCTTGGAGGCAACAGACTCTTTCAAAACCAAGAGGATAACATCACCTTGTTTAT
+AAAAACTTATCAGATATAATGAGAAACAAATGACAGAAGAGGAGACTAAAATGAGTTTACAAGTCAAAGG
+CCAAGGCTGTTGCTCTGCAGGACGTACATTTTGGGATTAAATCATCCTTTACTGGGACTACGGGGTATGA
+AGGCAACATATCATGTCACCCACATAGGAGCTGTGCAGTGAGGGTGCAGGGGACAAGCAGAGGCCATCTC
+ATCCTAGGACATTAAAGAAACCACATGTTCTGCAAGTCTCTGTGAGGATGCACACACACAGAGCATGTCA
+CTGGGGCTCACAGTACCTGTGGCAAGTGCTGCCTTAGCATTTTACATGTCCACCTGCCCACTTCAGGTTC
+TCTGAATGGCCACGCTCAGAAAGTACATAAGACCTTAAGGACAATGCCCAGAGTTAAGCTTGCCCACAGA
+AAGACCATACTATTTACACATGCACAAAATTTCCTGGTTCCATGAGCTCCTAGGATTTAGATCTTTAATC
+GTTTGCTATCCTAAGGATCTCAGAAAAATGAAATCTTTGGATTTACCTGAAAACTGACTTTTAGGTGTCA
+AGATGAATTAAAACATTAAGTTTGAAAGACAGGAAAAGAGCAATGAAACAATCTTTACATGGAACAGAAA
+TTGTACCATAGTGTTCATCCAATTTGGTTCTTCCTTTATTAAACAGTCAGTTCCTAGCAAGGAGATCCCA
+TGGATAAGAAGGCCCTTCAGGTTGACTCTGAACTGACTGGTAAAGACCACAAGTTTGGTGGGAATTGTAG
+AGAGAAATAAATTTGCAAGTATGAATGAGGCACATCAGCAGATTGTCATGCGCTGTGGAAAGTAGAGAAG
+GAGGAGTGGAAGAAAGCAGAAGGAATCCATACTGTCTAGGACAAGGACTCAGAGGGCCTGGGAAGGGAGC
+CAGAGCCCTGGAGCCCTTCAGAGGACAGAATCGCTCTGCAGACAGGGATAAGGGCTAGGACAGCAAGTTC
+TCAATGCAGGGTCCCCAGAGTCCGAGATCATCATCTCTTTGAATGAGAAAGGTCCCTGAAAGGCAGCAGG
+GAACCCTGGGGACTGGGGACTTGGAGAGATCTGAAGCTAAGAGTCACAGCCAGTGCTGTCCTGGAGAATT
+AATACCAGTGTTGATCACTGATCCTCTGGTTGTCTCTGTCTGTGCACTGTCACAAGTTCACAGTGCAGCC
+AGCCCTAGTGTCTTAAAAGGACACTGTACAGGAAAAAAATCTAAATGTGTTGGAGGAAATTTGGCTTATA
+ACCTATAGAGTGACATCTGAGATGGATTCCATAATTCCTTGCCAATGGGTGACATTCAACATGGTCCATG
+CAAAAACACAGAATTCTATGAATAATGGCTTCTGGGGTTTTTTTTCTTTTTAAAAACATCACAGAAGTAA
+GTGCCAACCAAGTGTTCTCTGCCTCATGGAGACTCTCTGAAAGTCTCTCACATGCTGACTGTCTAGAGAA
+ACTCAGGAAATAGAGATGATACATAGTTGAAGAGGAATTTTAATCCAGCAACATGGTTACTCCTGCTGGA
+ATATATACACACCCTTAGCACACCCCTTTAATCCCAGATAATGCTGTCTAATTGATGGACAGACAAAGTA
+CAACCAGAATAAGGGAGTCTGTGCTGCCTGTCACCAGTGAGCAGAAATGGGAACTGAGTCTTCAGGTTGT
+GCTTTCATTGGAAAACAGGCAATGTGCAATGATGATGGGTTGATTCCCTAGGGAACAAAAACTGTTTTCA
+TATCTTGTCTGCACCCAGTGGGTTTTTATAGAGTTAAATAGTAGCTTTGGGGAAGCAACCATGTGGCATG
+GAAGAGACCCCAAGTAAAAGCCTCATGCCTTGTATTTTCATTCGTTCTTCGGACATTGGATTTCAAGCTC
+TTTTTTTTTTTTTAATGCCAAGTCTAATAAGCATCTGCTTGCATCAGCCCTGCACCCTCTAGATATCACT
+TGGGCTTACAAATTAACTCAGAGCAAACAAGCCATGAACCAGGCATATCTGCAGATTCCAGTTTACAGAA
+GTACAGCGTGCCCCTATTCCTCAACTATCCAGGTATCCCCACTACTAGTTATCAGACTTATATCTATAAA
+AATCTGTCTAATATAAAAGACATCCTGCCACTGTGCATGCAGCTGGCCATCAACACAGCTTCTTTGAGGG
+AAGAAAATTTTCCAGAACAGTTATGCACATCAGAGTACATCATACGCAAAAGTAACTTATGAGTATACTC
+ATAAATTAATGCTGAAACGAATAAAGAGCAGGCAGGAAAGCTAGGCAGGCTTCTCTGAGGCCTTTGAGGT
+GAGGGAGCCAGGAGGTGAGAAAAGACAGATCAAAACTGAAGAAATGTCCTTGCTTCATCATATCAGAGGC
+AATTTCAACAACTGCCGTAACCTTCTATGAGGTCTGAACACCTTCCAGCACACTGTGAGAACAGCACATT
+CTCTGGCACATGAGACATGAGTGATAGTGCCTCACCTGCCACGAGGATGCAGATGTAGTTAAGCATCTGT
+GGAGAGTGGATGACACCCTATGTGCAAGTTGTACATACATCAAGACAGAAAATAAGCTAGAGAGACACAC
+AGTAAGAGACAGGAACCAAGGGAGGAAAGAAAAGAGAAGGGAGGAAGGAGGGAGTAACTGAGAAAAGAGA
+ATGAGGGAGGGAGAAAGAGGGAGGGAAATAGAAAAAACTTTATTTAAACACATTAGCATTTCAAGATGGG
+AGAAAAACAGCCAAGTTTAATGTAAAACGCTTTGTCTAAAAAAAAAAAACACACACACATACAAACATAC
+TAAGAAGAAAAATATTCAAACTTTCTCTAAGACTCACGAAAACCTGTAACTATCACAAAGCATCTACAAC
+ATTGGTAGAGCCTTACTCCAACCAGACAAATACAAAACCAAACACAGAGGTAAGAGAGATGGGACTCAGT
+TGGGAGAGTGCTGATCTGGCATGCACAAACCTCTGGGTTCCATCCCAGAACCCCATAAACCAAGCATAAA
+GGAGCTCACCTAATGTCAGCTCTCAGAAGGCAGAGGCTGGAAGATCAATAGTCCAAGGCCATCCTCAGTT
+ATACGGTGAGTTTAAGGCTAGCTACCTGAAACGTGAGGCTCTGCTTCACCTGCAAGGAAGTGGATATTAT
+AAACTCATAGATATTAGAAAAAGATAAGAGACTTAAAAATCTGCACAAATGATTTCAAAACTAGGAATGC
+ACTTCTTCATGTACACAATGCTACAATGAACTGTATATAATGTAAATACAACAAAGTGTAGAATGAACTA
+TTAAAACCCAAATGAAGCAATAATGCGTAATAAGTAGGGGAACAAAAGATATTTGGTTTTCATGAACTAG
+CTGGATGGTATTTCAAAGCCTCCCAAAATTAGTAAGCTTGACTACCAATTGTCATGAGAGTACCAATTGT
+ACTCTCAGGTCTGGTGTAAATGTCCACCTGGGAGGCAGCATGATGATGGAGGCTGTAAGTCTCTGTTCCT
+TCCAGGTCTGACTTCCCATCTAGGCATTGCAGAGAGAACATGGTGTCTTGGTTGTAACTGATTCTGTGCT
+CAGACATGGCAATGCAAACATAGAGAAAGATGAAGTAGAGAGAGAAGAGGAGAGCATGTAGACACGACAG
+CTGAGGAGCACAGGAGAGACTGGAGCCAGTGCAAGCTGTTTTGTATATTCTGGGTGTCACAAAACAGACT
+TGGACACTCAGCGATCCTGCACAATTTGTCCAATCATGACTCTTGTCAGGAGGAAATCTGAGATCCAGAA
+AGCTTAAGTAATTTCCTTCTAACATAGCTAGTCAGTAGGACAGGGCCCAAGTCAGACTGGCTTTGTGAGG
+GCCATTCTGTGAGGTTCCTAAGCCCATACCTCCTGTCTTGCAGGAGACACTATTCTTCTCTGTTTCATCA
+ACATGTTTCTGGCTCAAAGCTGAGCTCCTTCTAGCCTAGGTTTGCACACAACCTAAACTTCTGGCAAGAC
+AAAGGAGTCCAACAGGTCAGCACAACATAGAAGAAATCTTACCCACAGACCTACCAAAGACAGGATGAAG
+TAGGACACTGGGGGAGGGTTCAGGAAGAAAGCCTCATCAGGCTTGTAATCAAGAGAGTCCAAGCTCCCCA
+ATGTGGTGTCTGAGGTCATTCTCACTGAGCCAAAGCTGACCTCATCTCCAGGCCTGATAAGGGACCATAC
+TGGATGATACCCACATATGCCATTGACCTTCTACTAGAATGGTAAGTGTCACAGAGTTTAAAGAGTTCAA
+AGATACCCATGACTTTCTGTCCTCCAAAATCTAGGTGCACGCAGCCTCAAGTGCAATCCTGAATGGTCCC
+TCCTGACCCTCTCCTCCCTCTTTCCTCACAGCCATGCACAATCCCTAAGGCCATTAGCCTAGACCAGAAC
+CAGCACTGATGAGAACTAGCTGAGAAAGTGTGTACCTTGATGGGTGTAAGGCAGAAAAAATAGATTCCAG
+ACTGAATATTTCCATAACATCTCCAACTCCACGATATCTAATAAAGTAGGTTGTGTGTTAAAAATTAGTT
+AGTGACCCATGGTTATCTATAACAGGAGGGTCTGACTTCCCAGAACCCAGAAGGTACCACGTACACATCG
+GAGTCTTTCTATGTCTGTCTTGTCTCTGTCTGTCTCCATCTCCCTGTCTCTCTGTCTCTCTCTTTCCTAC
+CCTCAGGATGAGGACATGTTTCTCTATCTCAATGTACCTGACACCATGCCTCATCAATAGTGGTATTGTA
+CTGGCTGGTTTTGTGTGTTAACTTGACACAAGCTGGAGTTATAATAGAAAAAGATCCTCCCTTGAGGAAA
+TGCCTCCATGAGATCTAGCAGTAAGGCATTTTCTCAATTATCAAACAATGGGGGAGAGCCCAGCCCATTA
+CGGGTGGTGCCATCCCTGGGCTGGTAGTCTTGGGTTCTATAAGAAAGCAAGCTGAGCAAGCCAGGGGAAG
+CAAGCCACAGCCTCTGCATCAGCTCCTGCTTCCTGACCTGCTTGAGTTCCTGTCCTGGCTTCCTTTGGTG
+ATGAACAGCAATGTGGAAGTGTAAGCTGAATAAACCCTTTCCTCCCCAACTTGCTTCTTGGTCATGATGT
+TTGTGCAGGAATAGAAACCTTGACTAAGACAGGTGTGAAAATGAGAATCTGTCCCATTTTACAGAGAATC
+CATGTGCTACAGGGGAAAGTACATGATAGCCAGGTGTCTGTTACTTAGACAAAGCAGGAGAGGAAAGCGG
+TCCAAATGAGGACATATTTATTTTTCCCCATGGCCTCAGAGCTTTCGGTCCATGATCTCCTGGCTCCACG
+ATGGGAAGAACATTTACTGCAAGGATGAGTGCTGGAGAAAAGCTGCTCACCACAGGGTTTGAGGAAGCCA
+AGAGATGAAAGGGACAGGTTGTGATGTACCCAGCCATGCCCACCCCAACATCTACTTCTTCCTAGTAGCC
+CACCCCTGGGTTTCCACCACCTCTCAATAGCCTTTTATTAATCCACACCTACATCACAGCATTGATGGTC
+TTAGATTCATGTTCCCATCACCCCAACTCTGAACACTATTACTCTGGGACCATGCCTGTATTGAAAGCCA
+TTGAAGGAGAGCTCAGATCCATCTCTAACAGGAAATGATGGGTAAAATGAAACTGCCACACCACAAGATG
+TTCAAGGTGTAGTCCTTGAGAAAGGACACCCAGCACAACCACAACTTTGGAAGTCAGTGCTTCTTAGTGG
+GGGCTCTGATTAGATAGTGTTCTCTGATAATCATCTGCACCTGTGAACATTTATTAGTGACATGATTGTG
+AGAAAAATACCTGATATAGACAACATGTGAAAAGGGAGGGTTTTAAAAGATATGTTGCATGACTGTGCCT
+GTTTTGTTGTTGTTGTTGTTTGTTTGTTTGTTTGTTTGTTTGTAGGGGGACTAGGAGAATACTTGTGACA
+ATCAGAGAGTAACTTGTAGGAGTCCCAGGTACTGACCTTAAGTCATCAGGCTTGGAAGCAAGTGCCTTTA
+TCCGCTGAACCACATTACTGAGCACATGAGAGCAATTTTGGTTCATGGATCCAGAGGGGTTTCAGCTCAT
+CTTTTCAGGGAAGATAGGGTAGCAAGAGCAGTTCTCTTCCTTATACCCCGTGCCTGGAGAGCAATGGCCA
+CCCCTCAGTATCTGTAACATCGTCAGTGTTCAGAGCGGTCCTCCACACACTTCTCTTTCTCCTGAAGCCA
+GAACCAGCTGCCATCCCCCTCGTGCACAAGCTTTCTGGCAAAGAGGGTGTCCTGCTGAGGCTCTGAAGGC
+CCCTGGCAGAGAGGGTGTCCTGGCGAGGCTCTGCAGGCCCCTGGCAGAGAGGGTGTCCTGGCGAGGCTCT
+GCAGGTCCCAGCTGGGCAGGATGAACAGCCACCTTCCTGAGCCCCGGCTTGGGTTATGATATAAGCTTCA
+AGTTAATGAGTTGGCATTGGTAGAAGCTTTAACACATCAGCATTGGCTGGACTTCAGGTGAAGCTGATGT
+CTACAGATATGATGGGAAACCAGATAGGAACACGGTACTGTCGGCCTGCCGTGGTGGTGCACATCTGTAA
+TGCCAGAACTCATTTGCTATTTTTCAGAACGTGTAGTTTATCTGTCTAGAAACACCCCCCCTCCCCCGCA
+ACACACACACACACACACTCTGAGTTTTAAAGTCTCAACTGAAGAATCTGTGGCTACTGTGATGTGACTT
+GCTGCTTTTCTCTGATCTCCTTCAACAAGATCATGGGAGTTCATAGTGATCCTGAGGCCCGGTCACAAAA
+ATACGGCAGGAAAGCTGGCCCAGCAGTAAAGACACTTGCTGCTCTTGCAGAGGACCAGAGTTTGGTTCTC
+AACACCCACCCACATGAGATGGCTCTTAACCACCTGTAACCCTTGTTCCAGGGGATCTGGCCACTGAGAA
+CACCGGAAAGGGCGTGTGCATGCCCTCACCCAGAAGAACACAGACACATACATTTAGAAGTCTTTAAAAA
+TTAATTTCATGTCATTTTAAACTCTGGAGAAAGAACAAGAACAGCATCAACAACCCACTGGCCCTGTGTG
+TACACTGACTGTATCCCACAACGGACGGACTGAAGAGGCAAGACACGAAATTGCTCCTAGATTAAGAGCT
+TTGCCCAATAGAACTACAGAAGAATGAAACAAGCATCTGAGGAGATTTCTATAAAGCCAAGGGCTAACTG
+AGACACAGAGGAAGGAAGGAGGCGGGGCAGTGGTGGCTCACGGCTTTAATCCCTGCACCTGGGAGGCAGA
+GGCAAATGGATCTCTGAGTTTGAGGCCAGCCTGCAAAGACTGCAAGGTTAATGATGTTTCAGAAAACTGG
+TTGACTTGATTGAACCAAAAAACACACAGAGTTCAGAAACGCTATGTAGAATTCAGAATCCAGGGTTCCA
+TGACAGTCCATCTTGGAGATCTGGCTCTCGTTGATTGAGTCTGTTTAACCCAGAAATGCACAGGCAGCAT
+GAATCAGCTAGACCGGAGCCCTTGGCTGGGGCTGAGGCCAGAGGTAGAGAGAGTTCAGGACAGGACTTTA
+CTCTCTGGGAACTGGAGGTCCACTCTGGGAAGGACAGGCACTGTGTTGAGATAGGAGGGCCCTGATAACA
+GTGCTCACTTGATACAGGGGTTTGCAAGGTCAGTGATACCAACACGAGGAGTGGCCTGGATTGTAAGTGG
+CTGCAAGGAGTTCAAACAGGAAACAGGACCTGCTGGTTCATCCCAGTTCAAAGCACTGGATGATTCAGAA
+AGGCTGGAGAAGAGATTCTGGTCACCTGGGTGCAGGTGGCTCTAGTTGGCTTCAGAATGTCCAAGATGTG
+TCTTCTCTTCTGCTCATGTACACCACACAGAGTGACACATAGTACTTCGTCAAAGCAAGGTCATGAGGGT
+GGACACTATCCCCCAACTTTTATCCTAGGCTGCTTTCACACAGAAACACAGAGACATGATGTTTGTGGGT
+GATATAACACAGGATGGCATTCATCTAGGTGAGCGTGTGCCTAACTATGCCACACACTGAATTCTGCACA
+CGATGAATGGTCCCTGCTTGAGCAGCATAATGGGAACTTGCTTCCTTAGCTTGGGCTCTCCTGCATGTCT
+GTCTGTCTGTCTGTCTGTCTGTCTGTCTGCCTCTTTGTGTGTCTGTGTGTCTGTCTCTCTGTCTATATGT
+GTGTGTGTATCTGTGTCTGTCTCTGTCTCTCTGTCTTCAGTTCTCAGTGCTTATGACACAGCATCCTATG
+AGGCGGCAGGGCCACAGTCAGAAAGAACTTCTATACCCAAATGATGCCACGGAACAAGACCACTCAGCCT
+CCTGGATGCTCCCCATGGAAGTTCTGTAAGGAAAGTGAACTGTTTTGCAGATTGGGGTCTGTTCCTTCTC
+CTCTAACACACTAGGAGTGAGGAGCACTCAAGAAGAAGGAAGACCACAGCGTGGATACTTCGGTCATTCT
+TAGAAGGGGGAACAAAATACCCATGGGAGGAAATACAAAGACAAAGTGTGGAGCAGAAACTGAAGGAATG
+ACCATCCAGAGACTGCCCACCTGGGGATCCATCCAACCCCAGACACTACTGTGGATGCCAACAAGTGCTT
+GCTAACAGAAGCCTGAAATAGCTTTCTCCTGAAAGCCTCTACCAGTGCCTGACAAATACAGAAGTGGATG
+CTCTCAGCCAACCATTGGATGGAGTACAGGGTCCCTAATGAAGGCCCCAAGGAGCTGAAGGGGTTTGCAG
+CCCCATAGGAGGAACAACGATATGAACTAACCAGTAACTCCAGAGCTCCCAAGGACTAAACCACCAACCA
+AAGAGTACACATGGAGGGACCCATGGCTCCAGCTGCATATGTAGCAGAGGATGGCCTTGTCAGTCATTAA
+AGGGAGGAGAGGCCCTTGGTCTTGTGAGGGCGAGATACCCCAGTGTAGGGGAATGCCGAGACCAGGAAGA
+GGGAGTGGATGGATTGGTGAGCAGGGGAGGGAGGAGGAAAAAGGGGGTTTTCAGAGGGGAAACCAGGAAA
+GGGGATAATATTTGAAATGTGAATAGAGAAAAGGTTTGGGGAGTGTTAGCCATGGCTGTGTAAAACCAGC
+AGGTGGCTGCAGCCAAACTCAGGGGGATCCAGTGCCAATGCTACAAACATTCTAAAGTGTCAGGACTTCT
+AGTGGATTCCAGCCACATCAAGTCATTGATCTTTTGGGCTAAGTTCGAAGGCCTTGCCTTACAACTCTGT
+TACCTGTAGCTTTATTTTCCTAGTTTTGCCACAATGCTTTAAAACTTGTTTTCAGGCATTCGGAGTGAGA
+CATGGAGCTGAGCGATCGCAGCGGAGAACGAGATGCCTGTGGCCGTGGGTCCCTACAGGCAGTCCCAGCC
+CAGCTGCTTCGACTGCGTGAAGATGGGCTTCGTCATGGGTTGCGCCGTGGGTATGGCGGCCGGGGTGCTG
+TTCCGCACCTTCTCCTGTCTCAGGATCGGAATGCGGGGTTGGGAGCTAATGGGCAGCATTGGGAAAACCA
+TGGTGCAGAGTAGCAGCACGTTTGGCACTTTCATGGCCATTGGAATGGGCATACGATGCTAATTAGGGCT
+AGGATGCCCTGCAATATCTAAACTTCCCCATCCATTTCGACCCTTGTACAATAATAAAGTTGTTTTCTTC
+TTGTTTAAAAAAAAAAAAACAAAAACAAAAAAACCAAACTTGTTTTCATTCCAAATTCTCTTCGTTTGCC
+ACACCTTCCCCTTTACATAGAGAATGATTCAGATTTGGTTTGCATAGAGATCATAACCCAGAGTTCCATT
+TGCCAAAAGTGGAGTGAATATGGCCCACAAAGCCTAAATACTGGGCCCTTATGTGGGCCGTTTGCCAACC
+TTGGTTCTAGGTTCATGTCTACTTTGGTGTGGTTTGGTAATTTTATTCTCTCTACTCAACGTTATCATAG
+TGCCACTCCTGACTTCACTGCATCCTTTCCAGCTTTTTGTCTTTGTCTTCTAGTTAAAATTTAACACACA
+TTGACTGAAATAATGTGAACTTAACCAGTTTCACGAACATTTCACTTCTATGAATTTTTTATCTTATGAC
+CTTTAACTTTTAGTTCCTTATATATTTTAGCAAGCCTGCCTGTTCCTTAGCTGTAGTAGGCAACAGCAGG
+CAGGGGGCATGAAGACAACACCTGTCAGGGCTCTCATCCAGATGTGCTACCTGGTACAGGTAAGTTGCAG
+GTGCTGCCTGACAAAGGTCAGGCACTATGATCCAGCCCTGGTGTAGGAGCATGCTCAGTAGGTACTTCTG
+CAATTTGTCCAAGTCCTTCATGGACAACTTGATGCAAAATGTCCACAGCGTCTGTGACCATACACTGTGG
+CACTGGGGCCCAGCAAGCCAGGCCTGCCCATCCCTGATCATGCTCTTCCAAGGCAGCTGCAGCTTCTTAA
+TGAATCTGCTTAGCTCAGAGACTGCTGGTTGTATGGAGGGACCAAAATATCTCTGGAGATAATGCACTCA
+AATTCAAAATTTTCCAATCATAGAGCATTTGCCAAGCCCTGCCATGCATATTCAAGATGAATAATCCAAA
+GCTGGCACTGTGGTACACACCTGTAATGCCAGTAGTTGGGAGGTAGAAGAAGGAGGATTGTGGGATCAAG
+GGCCATCTCAGGCCCATAGTGAGTGAGGACCCAGCTAGTATTGGAGACATGAGATCCTGGATCAAAAACA
+CCAGCAACCAGAGGCTGGACATGGGTCAGAGGTGAAGAGTACACTGCTCTTAGATCCGTTCTGTTCCCAG
+CACCCACAGGGCCATCTGTAATTCTAGCTCCAGGAATTTGATGCTCTCTTCTGGCATCTGCAGGCACCAG
+TAACCCCTGTGGTGGTCATACACAAATACACATAAAGTGATAATAAATATTTTAAAAAGAACAATGAAAA
+AAGTGGAGAGAGAGAGAGGGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAA
+GAGATTCCCTCAATTAAGTTCTGCTTGAGTTGCAGACAGTAGAGTCTGAGAACACTGATGTGGGATGCTC
+CACTCAGGAAGGAGAATGACGAGGAGTGCAGAACATGCTCACCCAGGGCCTGCATGCTCCGCCCCTTCCC
+ATCAGCACAAGATCACTCTGAAGCATAACGCAAGCTGGCTACACATGGACAACTGTGGTACTGCACTCCT
+CAGAACAAATTAGAAATGAAGGAGATCTCTCTGATGGGAATCCCAGATTAAGGAACAAGGGGACATGGAG
+CAGGGACATCAAGGACTTTAAATAGCCCAGAGTCCCTACATCTCTACCTCTAGCTCCTCCTGCAAGCAAC
+AAAATAGACACCACCCAACCAAGTCACCACAGCCAGCCCTGAGTGTCACAGAAAAATAATACACTGAACA
+AATGTGATTCACACTTTATTGAGAACCAAATGGGGCTTAGCTGCTTACTCTGAGGACCAAGGCAGCACAC
+AGACAGATCTAATCTCTCAGGCCAGGCCGGGGATTCTCGGGTCTAGTACATTCCTCCAACCATCACTGAG
+GGAAGGGCTGGATGCAGGGCAGAGACAAGGGATAGGAGGAGACTAATGAAGAAATGGAGAGCCCAAGGAC
+CCAGAGGCTTGGCTGAGGGAAGGGCTGGGAAGAGGAAATGGAGGAATAGGGAATGGAAAGGAGAAGAAAG
+CTGCACGTGGTAAGAGCTAGGGCCAGCAGAGCACTCATGAGTATTCCCCCAGGTCTGGGCTTTGTAATCC
+TACTTTGGGTTCCAAAAGGCCATCTTTGCAAAGATTGGCTTGGAGAGGAAGCGGCTGCTCTTCATGTAGT
+CAGATATCTTCTTCAGGCCCTGTGGGGAGGAGGAGAAAGCAAGAATTGAGGAACTGGGGCTCCAGTGGGT
+GCCCAGGTTTCACTCTAAGAACCCCAACACAGCCCAGGCTGCTCAGGTGCCCAGCTCTGGTAAGGGAATG
+CTTTAATTTCCTGTGACGATGGAAAACTGGCACTGCGTTCAGCTGTGTGTCCGTCTGCAGGTGTGAGGGA
+GAGAGAGCACTCCAGAGAATCCTGCAGAACAGCGCTGTGCTCTCTGCATTCATCAGGAGGAAGCCTGACA
+CACAGGCTCAGCCAGCGGGAACTGCAAGAGTCTGCATCCCTGCCCACTCCCTTGGTTCACTCACCAAGCA
+AGAAATTCCTTGTTGCTATCTGTTTATACAAACTCCCGATTATTTATTTATTTATTTATTTATTTTTACA
+AGCAAGCTCCTTCTTGCCTGTGTTTCCACATTGGGGAGGGACCAAGCAGATCAAAGGGACCAGTAACATG
+CCAGTGTACTTTGGAAGACAGCAAAGATCTCTCCCAGTACCTCAGTAAATCCTGACTGAATGAAGCTAGA
+GATAGGTGAGCCCGCACGGAGACAATGACATGGCCTTGGCCATTCTCCTGATGCAGAAAAACCTAAACTC
+GGAACACTGGCATCTGAATGCATACTTAGTGTGTCCATCCCATTTCTCTCGGCCCTGCTAAAGAACATTC
+TACAGAATGGCATTGTGCTGTGTGCTATCTTTCCAGTACAGTGCCCAGATAAGCTGTTATGAATTCAAAG
+TTCCAGGGCGGCCTGTACCCAGGAGACGAGGCTTGGCTGGCCTTGGACTGCAGCCTCCCACACTCTCCCA
+CGGCACTGTCCAACTGTTCTCACACAATCTCCAGAGCCGAGAAGCCTGGAGCAGAATGAAAACTAAGAAA
+AACACATCACCTGTGCAGCAAGGACAGAAAAGAAGATCCCATGCTAAGATTCCCATTGGACCATGGCCTT
+GATCCCCCAAATCTCTAATAAAGTAGGGTTTGTGCCTATCACACGTGCACTACAGTCAGGCATGCCTGCC
+ACTCTGCATGCCTGGACAGGGATGCATGCTTCCATGCACTACTGAGCTCAGAACTGTGCCTCATAGAAGC
+AACACTAGAGAAGATGGATGAAGATGGCAGTGAAATCCCACTATAATGATTTTACAGACAAACATCAAGA
+TGTTGCTGAGGAGGAAAATAAAAGGAAAGGAACAGGATGCCTAAATATTGAACAATTTAAGATAAAATCC
+TGCAGCAGCAGTGAACTGTCTACGCAGCACTAGCTGTAGTCCTGAGTCCCAGTGCTTCCCCAGTCAGGGA
+CCCGACTAAGCAAAGAATTCTACAGGCCATATGGATATAGCATCTTGGTACTGTACATCAATCCTGAATA
+GTACACCCTACACACTCTGGCCATGTCCTGGGTTTCCATAGGCTTCAGGGCTGGAAAATCCAGGCTCCAT
+GACAACTTATGACTTCTCTCCTTTGGCATCAGGTGGATCTACTGCCTTTGTTTTTTGGTCACGAGAAGCA
+CACAGGAGAAAACCACAACCCCTTCCAGTGGTACTGGTTCCCTGAAAAGCTGACAGTCTTCACAATCAGC
+TCCCAACACGATCCAATGGAGCTGATAAGTTCACACAAAACGCAGGGTGTGATGGGAAATTCCATGCAAG
+AGATGAAAGAACTGAACACAGTTCCATGCTTAACAAAAGCTAATCATTTTCGTTTCCATTTGAAGGAGTA
+GAAAGGCAGTCTGGAAACAGGGGAGGGAAGTAGGGCATTCAAGAGGAAGTGAGATCAGGGCATCACCTCA
+AAGCGACCCATGAAGTCCTTCAGGTTTGGGAAGGCATCCAGGCACTTGGGCTCAAATATTCGGTGTTGAT
+CAAGGACATCATAAACAAGAAAATCCACATAGGTGACCTGCAGGAAGCCAAGGGTGAGTGCTCTGGAATT
+TGATGCTTGGAAAGGATGGCAACCCCTCCTTCCCACCCCCTCCCTTTACCTTGTTCCCTGCAAACCATGG
+CTGCTTGCCCAGAAACTCAGAGTAGAGCTTCATCTTCTCAGGGAGACCCTCTAAGTACTCTGGCTTCTTT
+TTCTCCTGAAGCAGAAAACAACTGTCACCACTTTCAGACACAGACCCCCTGGCAGAGGCTGGCCTCACAG
+GGCTGTGCAGGCCCCAGCAAGCAGAGGAGAAGTCACCTACATGAGATTCAGTCAGGGATGATATCCAAGC
+ATTTAGTTAGAATGATTAGACAGCCCAACAGGATACAGCCCTGTCTCCTGGTCTCTACCACCATTGTCAG
+GACCACTGTGCATGAGACGGCTTTGAGCACATGGCTCTGAACTCTGACCTTACATGAGGCAGAAGCAAAC
+AACTGAAATCGTCCAGGAAACAGGCCGTGCTCTACCTACTAGTGCGGTGAGGACTTCAGTACAGGTTCCA
+AAGAGATGGGGTGAATTTGAGAGGACAAAAGGAAACAGGCGCAGGATCAAAGGCTTTTGGCTGACTGAGT
+TGCCTATTGGGGACATCTGCTTATCTCTGAGTGTGTCAAGGACACAGCTCTGGGAAGCCATTTAGGAGGA
+GAGTGTCCCCTATCCTCAGTGTCTTCTTCCCATGAGTGTGCAATCTGCAGACATGGCAAGACTCCAGGAC
+AACAGAAAGACATGACTCAGGACAGAACCAGCACTTAGACCTGGAGGGATCGAACTCTGTCTGTCCTATC
+AGGGACTAAAGTGTCACTGTCGAGAGTCTGGACTGGACCTTAGGCCAATGACTGGAAGTTTTCATTTGGC
+AAACTAAGCTTTGAGGTTACTCTGTTGGCAATTTCAAACACTGTGCCATGAGCAGCACTGAGAGGCTCTG
+GTGGCAAACAGGGCCAGAGCAGATGCAAGGGCAAGAGTGTGGGGCCAGCTCCAACTCACAAAGTCAGGGC
+TGTAGCAAACCATGGCCAACTGTATGCGGGTGTCCATAGCCTGGTTCTCCAAAATGTCCACACGAATCCT
+CTCCTCCTCTGTCTCTCCACCTGCAGCACACACAGGCCAGACTCACCCTCAGCATCCCACTCCAGATAAG
+CCTAGAAAATGACTACTATCCTCCACCCTGCAGCCAGCCCCACTCACACAGGTTGTGCTTTCGGGCAAGG
+TAGCGCAGGATGGCATTGCTCTGGGTGATCTTGTGTGATCCATCAATCAAGTAGGGCAGCTGGATGGACA
+AGCAGGGAGGGTCAGGTGGGATATGGGGCACCTGAATCCTTTCTCTGGGTGAGAGCAGACACAGGGCCTG
+ACACTCAGCCTGCCTGCCATGGGAAGCCCTCCATGGGCACAGTGTGCACTGAATGGGAGAGCTGGGACAC
+AGCCCATCACAGAAACACTGTGCAGCCAACAGGGATGGAGCAGGAGAGCAGAAACACTGACCCCCAACCC
+TCAGCCAGGCCAGGAGATGAGGTGAGAACAGTGTGTCTTCCCCAAACCTCCTTTCCTCCTCCACCTACAT
+TGGGAAAGTCCAGGCCCAGCTTGAACTTCTCACTCAGCCACTGGCTTCGGTCATAGTCAGGAGCTGTGGT
+GGACAAGAGGATGTGAAAAGAAATCATGCCAGTATCCCAGCTTCCTTCTCTAGGGAAGGAGTCAGGTATG
+AGAACACCCCCAAGAAGATCTGCAGCCTCACCCAAGAGTAGACAATGCTTCTGAGGACTGTGGACTGTAT
+ATCACTAAGATTTTCAAAAGGTTTGAATAAGCCAATTAGTTGAAAAGAGAACCCAGCCCCCCAAATTCAA
+AACAGTATCTGGAAGAGACAGTTTCTAAATCCAACTCCAGGAGCTCTGTAGAGCTGAGCAACAGGGTGTC
+AGAGCAGTGTCTCTAGGCACCAGTTAGCAGGGGAGCAGGGGCCTGAGAAACAGGGTGTAATCACCGTCCC
+CCATGGTGTATTTCTTGTCCTCATAGCTTGTGTCTGTGTATTCCAGGAGCAGGCGGATGGCGTGAGCCAG
+CTGGAAAGATGACATATGACAAGTCACAGATTGCAAAACCCTGACAGTCTGACTCTGCTGATTCTTGGTG
+AGCCTACATCTTACACGTAACTGTCTCCCCACAAGCTCACACACACATACCACATAAGCACAAGCTCTCG
+TGTTTGAACTTCCAACAACTTCTGAACTCAGTCTATCTCCTGGAAATCAGTTCCCTAGCTACTCCCCACC
+CCACCCCTGTCCCACCCAGCAGAAGCGTTCTCACTCACCCCACGGATGTCCCAGTAACCTAGTGTCATAG
+GCATAGTGCTGGTGTCCAGAGCAGACCAACCCGGGCCTTGCTAGTCTGAGCTTCCTGATAATGTCTTAGG
+CAAGGAAGAGGCGGGACCTGGATATGGCTTTCTATTGGACCATTGGCCTACAGTGTCTCAAATTTCAGAC
+ATTCCAGTTGAAGGAAGGAGCCAGGGATCCAGGGGCAGGGCAAGATATAGCTAATCTCAGGAACCAGTCT
+AAGCAGTCAGAAATAAAAGATGGGAATGGGCTAGCACCTGTGTCAGCAACTCCCTCTTCTTGCTCTAGCG
+TGCTGTTTCCATGGAGAACAGTTGTGCAGCTAGGGACATTCTTGATGGTCAAAGTGGAACCCTGTTATTA
+TCTTCCTGACATTGTCCCTTCTTCATTATCAACTCCCTTCCTGGGTTCTGGGTGGCTCCTGGTGGCTGAG
+CCAGGACAAAACAGGACACCTCAAGCCTGAGAGATTAATGTCTCTGTGGTCTATAGCTTATCATAGACAG
+CAGAGGGCATGCAGGACTCTGCTAAGCTGGGCAACAGAGCCACATACAGGTACGACCAGTGGATGCAGTA
+CAGATATTCAGGATCACTGGGCACTGGAAAGATGTAAAGCAAACCACAGTGTGATACACCTTCCCCAGAA
+AGAACTGCCATTCGCACATGGGGGCTGGTGAGACTGTGGGCAGGAGAGAACATTCGACACTGCAGGTAAG
+AGTATCAACTGTCACAACCGTTAGGGAAAACAACCTGGTGTTCCTCGAAAAGACAAAACTAGAGCTACCA
+CGTGATCCAGCAATACTCGATATACATCCACAGGAAATGAAATTAGAATGACAGGGGAACACAATTTACC
+TGAGCTAAAAAATAGATGGCATCTAAGAACCCATTCTCTGACAAATGTACGATGAAAGTGCAGTCCACGT
+GCACCATGGATTACTAGTCACTCTATAAAAAAGGGGAAAAAATAAAACTAGATATCCACACACAGAGAAA
+CTAGAAGTGAGGGACTGGAGAGATGGCTCAACAGTTAAGAGCACTAATTGCTCTTCCAGAGGTCCTGAGT
+TCAAGTCCCAGCAACCATGTGGTGGCTCACAACCATCTGTAATGGGATCCAATGCCCTCTTCTGGTATGT
+CTGAAGACAGCTACAATGTACTCAAAAAAACAAAAATAATAATAGAAGTGAGATTCCTATTTCTCATCCC
+GGGGTGGGGGGCAGACAGGAAGGGATACAAATAAATCAATGGCCTTAATGTAAGACTTGAAGTTTAGGCC
+AGCAGAGTCTTGCCCAACACCCGCAAGGGCCCACACGGGACTCCCCACGGGATCCTAAGACCTCTGGTGA
+GTGGAACACAACTTCTGCCAGGAGTCTGGTTCGAACACCAGATATCTGGGTACCTGCCCTGCAAGAAGAG
+AGCTTGCCTGCAGAGAATACTCTGCCCACTGAAACTAAGGAGAGTGCTACCCTCCAGGTCTGCTTATAGA
+GGCTAACAGAGTCACCTGAAGAACAAGCTCTTAACAGTGACAACTAAAACAGCTAGCTTCAGAGATTACC
+AGATGGCGAAAGGCAAACATAAGAATCCTACTAACAGAAATCAAGACCACTCACCATCATCAGAACGCAG
+CACTCCCACCCCACCTAGTCCTGGGCACCCCAACACAACCGAAAATCTAGACCCAGATTTTAAAACATTT
+CTCATGATGATGATAGAGGACATCAAGAAGGACTTTCATAAGTCACTTAAAGAATTACAGGAGAGCACTG
+CTAAAGAGTTACAGGCCCTTAAAGAAAAGCAGGAAAACACAGCCAAACAGGTAGAAATCATTAAAGAAAA
+ACAGGAAAACACATCCAAACAGGTGATGGAAATGAACAAAACCATACTAGAACTAAAAGGGGAAGTAGAC
+ACAATAAAGAAAACCCAAAGCGAGGCAACACTGGAGATAGAAACCCTAGGAAAGAGATCTGGAACCATAG
+ATGTGAGCATCAGCAACAGAATACAAGAAATGGAAGAGAGAATCTCAGGTGCAGAAGATTCCGTAGAGAA
+CATCGACACAACAGTCAAAGAAAATACAAAATGCAAAAGGATCCTAACTCAAAACATCCAGGTAATGCAG
+GACACAATGAGAAGACCAAACCTACGGATAATAGGAATTGATGAGAATGAAGATTTTCAACTTAAAGGGC
+CAGCTAATATCTTCAACAAAATAATAGAAGAAAACTTCCCAAACATAAAAAAAGAGATGCCCATGATCAT
+ACAAGAAGCCTACAGAACTCCAAATAGACTGGACCAGAAAAGAAATTCCTCCCAACACATAATAATCAGA
+ACAACAAATGCACTAAATAAAGATAGAATATTAAAAGCAGTAAGGGAGAAAGGTCAAGTAACATATAAAG
+GAAGGCCTATCAGAATTACACCAGACTTTTCACCAGAGATTATGAAAGCCAGAAGAGCCTGGACAGATGT
+TATACAGACACTAAGAGAACACAAATGCCAGCCCAGGCTACTATACCCGTCCAAACTCTCAATTACCATA
+GTTGGAGAAACCAAAGTATTCCACGACAAAAACAAATTCACACAATATCTTTCCACAAATCCAGCCCTTC
+AAAGGATAATAACAGAAAAGAAGCAATACAAGGACGGAAATCACGCCCTAGAACAACCAAGAAAGTAATC
+ATTCAACAAACCAAAAAGAAGACAGCCACAAGAACAGAATGCCAACTCTAACAACAAAAATAAAAGGAAG
+CAACAATTACTTTTCCTTAATATCTCTTAATATCAATGGACTCAATTCCCCAATAAAAAGACATAGACTA
+ACAGACTGGCTACACAAACAGGACCCAACATTCTGCTGCTTACAGGAAACCCATCTCAGGGAAAACGACA
+GACACTACCTCAGAGTGAAAGGCTGGAAAACAATTTTCCAAGCAAATGGACTGAAGAAACAAGCTGGAGT
+AGCCATTTTAATATCGGATAAAATCGACTTCCAACCCAAAGTTATCAAAAAAGACAAGGAGGGACACTTC
+ATACTCATCAAAGGTAAAATCCTCCAAGAGGAACTCTCAATTCTGAATATCTACGCACCAAATGCAAGGG
+CAGCCACATTCATTAGAGACACTTTAGTAAAGCTCAAAGCATACATTGCACCACACACAATAATAGTGGG
+AGACTTCAACACACCACTTTCTTCAAAGGACAGATCGTGGAAACAGAAACTAAACAGGGACACAGTGAAA
+CTAACAGAAGTTATGAAACAAATGGACCTGACAGATATCTACAGAACATTTTATCCTAAAACAAAAGGAT
+ATACCTTCTTCTCAGCACCTCACGGGACCTTCTCCAAAATTGACCATATAATTGGTCACAAAACAGGCCT
+CAATAGATACAAAAATATTGAAATTGTCCCATGTATCCTATCAGACCACCAAGGCCTAAGACTGATCTTC
+AATAACAACATAAATAATGGAAAGCCAACATTCACGTGGAAACTGAGTAACACTCTTCTCAATGATACCT
+TGGTCAAGGAAGGAATAAAGAAAGAAATTAAAGACTTTTTAGAGTTTAATGAAAATGAAGCCACAACGTA
+CCCAAACCTATGGGACACAATGAAAGCATTTCTAAGAGGGAAACTCATAGCTCTGAGTGCCTCCAAGAAG
+AAACGGGAGACAGCACATACTAGCAGCTTGACAACACATCTAAAAGCCCTAGAAAAAAAGGAAGCAAATT
+CACCCAAGAGGAGTAGAAGGCAGGAAATAATCAAACTCAGGGGTGAAATCAACCAAGTGGAAACAAGAAG
+AACTATTCAAAGAATTAACCAAACGAGGAGTTGGTTCTTTGAGAAAATCAACAAGATAGATAAACCCTTA
+GCTAGACTCACTAAAGGGCACAGGGACAAAATCCTAATTAACAAAATCAGAAATGAAAAGGGAGACATAA
+CAACAGATCCTGAAGAAATCCAAAACACCATCAGATCCTTCTACAAAAGGCTATACTCAACAAAACTGGA
+AAACCTGGACGAAATGGACAAATTTCTGGACAGATACCAGGTACCAAAGTTGAATCAGGATCAAGTTGAT
+CATCTAAACAGTCCCATATCACCTAAAGAAATAGAAGCAGTTATTAATAGTCTCCCAACCAAAAAAAGCC
+CAGGACCAGATGGGTTTAGTGCAGAGTTCTATCAGACCTTCAAAGAAGATCTAATTCCAGTTCTGCACAA
+ACTATTTCACAAAATAGAAGTAGAAGGTACTCTACCCAACTCATTTTATGAAGCCACTATTACTCTGATA
+CCTAAACCACAGAAAGACCCAACAAAGATAGAGAACTTCAGACCAATTTCTCTTATGAATATCGATGCAA
+AAATCCTCAATAAAATTCTCGCTAACCGAATCCAAGAACACATTAAAGCAATCATCCATCCTGACCAAGT
+AGGTTTTATTCCAGGGATGCAGGGATGGTTTAATATACGAAAATCCATCAATGTAATCCATTATATAAAC
+AAACTCAAAGACAAAAACCACATGATCATCTCGTTAGATGCAGAAAAAGCATTTGACAAGATCCAACACC
+CATTCATGATAAAAGTTTTGGAAAGATCAGGAATTCAAGGTCCATACCTAAACATGATAAAAGCAATCTA
+CAGCAAACCAGTAGCCAACATCAAAGTAAATGGAGAGAAGCTGGAAGCAATCCCACTAAAATCAGGGACT
+AGACAAGGCTGCCCACTTTCTCCCTACCTTTTCAACATAGTACTTGAAGTATTAGCCAGAGCAATTCGAC
+AACAAAAGGAGATCAAGGGGATACAAATTGGAAAAGAGGAAGTCAAAATATCACTTTTTGCAGATGATAT
+GATAGTATATATAAGTGACCCTAAAAATTCTACCAGAGAACTCCTAAACCTGATAAACAGCTTCGGTGAA
+GTAGCTGGATATAAAATAAACTCAAACAAGTCAATGGCCTTTCTGTATACAAAAAATAAACAGGCTGAGA
+AAGAAATTAGGGAAACAACACCCTTCTCAATAGTCACAAATAATATAAAATATCTTGGCGTGACTCTAAC
+TAAGGAAGTGAAAGATCTGTATGATAAAAACTTCAAATCTCTGAAGAAAGAAATTAAGGAAGATCTCAGA
+AGATGGAAAGATCTCCCATGCTCATGGATTGGCAGGATCAACATTGTAAAAATGGCTATCTTGCCAAAAG
+CAATCTACAGATTCAATGCAATCCCCATCAAAATTCCAACTCAATTCTTCAACGAATTGGAAGGAGCAAT
+TTGCAAATTTGTCTGAAATAACAAAAAACCTAGGATAGCAAAAAGTCTTCTCAAGGATAAAAGAACTTCT
+GGCGGAATCACCATGCCAGACCTAAAGCTTTACTACAGAGCAATTGTGATAAAAACTGCATGGTACTGGT
+ATAGCGACAGACAAGTAGACCAATGGAATAGAATTGAAGACCCAGAAATGAACCCACACACCTATGGTCA
+CTTGATCTTCGACAAGGGAGCTAAAACCATCCAGTGGAAGAAAGACAGCATTTTCAACAATTGGTGCTGG
+CACAACTGGTTGTTATCGTGTAGAAGAATGCGAATCAATCCATACTTATCTCCTTGTACTAAGGTCAAAG
+CTAAGTGGATCAAGGAACTTCACATAAAACCAGAGACACTGAGACTTATAGAGGAGAAAGTGGGGAAAAG
+CCTTGAAGATATGGGCACAGGGGAAAAATTCCTGAACAGAACAGCAATGGCTTGTGCTGTAAGATCGAGA
+ATTGACAAATGGGACCTAATGAAACTCCAAAGTTTCTGCAAGGCAAAAGACACCGTCAATAAGACAAAAA
+GACCACCAACAGATTGGGAAAGGATCTTTACCTATCCTAAATCAGATAGGGGACTAATATCCAACATATA
+TAAAGAACTCAAGAAGGTGGACTTCAGAAAATCAAATAACCCCATTAAAAAATGGGGCTCAGAACTGAAC
+AAAGAATTCTCACCTGAAGAATACCGAATGGCAGAGAAACACCTGAAAAAATGTTCAACATCCTTAATCA
+TCAGGGAAATGCAAATCAAAACAACCCTGAGATTCCACCTCACACCAGTCAGAATGGCTAAGATCAAAAA
+TTCAGGTGACAGCAGATGCTGGCGTGGATGTGGAGAAAGAGGAACACTCCTCCATTGTTGGTGGGAGTGC
+AGGCTTGTACAACCACTCTGGAAATCAGTCTGGCGGTTCCTCAGAAAACTGGACATAGTACTACCGGAGG
+ATCCAGCAATACCTCTCCTGGGCATATATCCAGAAGATGCCCCAACTGGTAAGAAGGACACATGCTCCAC
+TATGTTCATAGCAGCCTTATTTATAATATCCAGAAGCTGGAAAGAACCTAGATGCCCCTCAACAGAGGAA
+TGGATACAGAAAATGTGGTACATCTACACAATGGAGTACTACTCAGCTATTAAAAAGAATGAATTTATGA
+AATTCCTAGCCAAATGGATGGACCTGGAGGGCATCATCCTGAGTGAGGTAACACATTCACAAAGAAACTC
+ACACAATATGTACTCACTGATAAGTGGATATTAGCCCCAAACCTAGGATACCCAAGATATAAGATATAAT
+TTGCTAAACACATGAAACTCAAGAAGAATGAAGACTGAAGTGTGGACACTATGCCCCTCCTTAGATTTGG
+GAACAAAACACCCATGGAAGGAGTTACAGAGACAAAGTTTGGAGCTGAGATGAAAGGATGGACCATGTAG
+AGACTGCCATATCCAGGGATCCACCCCATAATCAGCATCCAAACGCTGACACCATTGCATACACTAGCAA
+GATTTTATTGAAAGGACCCAGATGTAGCTGTCTCTTGTGAGACTATGCCGGGGCCTAGCAAACACAGAAG
+TGGATGCTCACAGTCAGCTAATGGATGGATCATAGGGCTCCCAATGGAGGAGCTAGAGAAAGTAGCCAAG
+GAGCTAAAGGGATCTGCAACCCTATAGGTGGAACAACATTATGAACTAACCAGTACCCCGGAGCTCTTGA
+CTCTAGCTGCATATATATCAAAAGATGGCCTAGTCAGACATCACTGGAAAGAGAGGCCCATTGGACTTGC
+AAACTTTATATGCCCCAGTACAGGGTAACACCAGGGCCAAAAAGGGGGAGTGGGTGGGCAGGGGAGTGGG
+GGTGGGTGGATATGGGGGACTTTTGGTATAGCATTGGAAATGTAAATGAGCTAAATACCTAATAAAAAAT
+GGAAAAAAAAATTAAAAAAAAAAAAAGACTTGAAGTTTAGAAGTTGCTTGAAGGACACACGTGGAAAAAG
+TTCTAAGATGTTGGCTAGAATTTTTTTTCTTTCAGAATAACTGTTTAACAGTTCAGAAAATAACAAGATT
+TGACTAGCAGGTGTGCATCAAATCAAAGGGCTTCTACACAATACAGAGGCTCCCCTGCAGAACTTGAGAA
+AATCTTTACCCGTGATTCCTTTGACAGAGGATTACTATACAAATATGTAAAGAACACAACATCTTAGCCA
+AATGTAATGACACTGTAATCTGAGCACTGGGTGCTGGAATCAGGAGAATCATTAGGACTTTGAGGCCAGC
+CTGGGCTGCTCTGTTGTGGTAAATCTCCAACCCAAAAAAGCCCAGGAAAAGAAATCACAACTCAATTAAT
+ATGAATACAAGCTGTGTGCCTAGATTGGGCAGATCTACCGCTACACTACCATCTTCCTCTGTTGGGAGCT
+ATTAAAACAACACAATTGTCCTCATCTCTGAATTGGGCCTCTCCCCCGAGAAGAAAAGGGGGTCAAAAGC
+GGGCCACCAACGCACTGCTCCGAGAACAACCACAGATTGTTCCAGCCCTAAGTCAGCACCAGATGTCCTA
+ACCACAAGATGTACCCTGATACCACCAAGTTCCTGCTTCCCCATTTAGTAGCCAAAAGGAAAATTTCTGC
+TGCCCCATTCCTCCTGCTGAGAAGTACTTCCTTCCCCCCCGCCCCCCTGCAGCCTCATCCCTTCTGCTGA
+GAAGTACTTCCTCCTTTGCTTGTGCATTTCCGTTGCCCTATTCCTCCTGCTGAGAAGTACTTCCTCTTTT
+GCTTGTGCATTTAAGCCTTGAGCCTTGCTGAATACAGTGAGACCTTGATGCTAATCAGACTGCTTCCATG
+TGTCATTCATTGCACTTGGTTCTCATCTCTCCCTCCCCCCATTTGGTTCTTAGGAGAAGGTCCCCTCGAG
+ACCCAATAACTGGACCTGCTGGACGGGTCATTCCCCTCTATTAGATCCCTCATAACTTGCAGTTTCTCCA
+GGCCATGTATTTCTGCTCCATTGTCCTTCCAGCTCCTCCTCCATCATCATCTCTCCCCTCCTCTCTCCCT
+TCTCCCCCCAAACTTTTCAGCTTCACCTTCCCTTCCACTGCCCAATCATGGGCTCTAGCCTTTTTACAAG
+TTAAGGTGGGGAGAAGCTTCACAAGAAGTCACCTGTAGGGCTGGTGAGGTGGCTCAGTGGGTTAGAGCAC
+CCGACTGCTCTTCTGAAGGTCCAGAGTTCAAATCCCAGCAACCACATGGTGGCTCACAACCATCCGTAAC
+GAGATCTGACTCCCTCTTCTGGAGTGTCTGAAGACAGCTACAATGTACTTACATATAATAAATAAATAAA
+TCTTTAAAAAAAAAAAAAAAAAAAAGATGTGTTTCAAAAAAAAAAAAAAGAAACTCTTTAAAAAAAAAAA
+AAAAAAAAAAAGAAGTCACCTGTATAAGTGATTCACTCCTCATCTGCCACCCCTCCCAGGAAAGCAGAAT
+TAGCACCAAAATACAAGACCAGGGCTATTCACAACATTTCTCAAAGATATAGGAGGGGAAGGGGAAGGCG
+GGGAGGAGGTGTTGGTAGCTCAGTGGAAATGCTGAGTACGCAAAGCTAAGGAGTGCAAGAGAGATGAAAT
+AGAGCCCAACAAGTCCTAAGCCGCACTTAGGAGGAGGTTCTGATGTGCCTTTGCACATACCCAGTATGAC
+ACCATAGGATTTCTAGCCAAACAACTGGAAGAAAGGACTGCCTTCCTGTGATTCCACCAGAGGAAAGTGA
+CAAGTGTTTAAGGGATGGATGTTTAGCTTGATTTCAACAGAATTTCCATGTGTATCCAAAAGTAATTCAC
+ATGGTACAGCCCAATAATGTGAGTTTTTACATTTTTCTGTATGTTAGAAACAAATTTATAAATCATTTTT
+AAAGGGTGGCTGGAAAGATGACTCAATATTAAGAAGACTTGCTACTCTTACAGAGGACCTGAGTTCATAT
+TGTTATGATCTTGAATGTCGAGCAGCTTCCAAAGGACCACACCAGGCCAAACAGTTCCACGTGAGGTTTA
+TTGGGAGAGAGAGGCAAGGTGGCAGCAGAAAGAGAAGGGGAAGTAGTGCCACGGACAGGGCTCAGTGGGC
+CCTTCTCAATCTGTGAGAATCAGAGTCTTGGACAGATGGGCATAGAGTCGGCGGAAATGGGGGTGACAAA
+CAGACACGACCCGAGAGAGTGTGTTGAATCTGAGTGTAACGTCCAAACAAAAAACAGACTTTTTTTATAC
+AGAAGAAAAAACAAGAAAAGCCAGGCAGGACACATCCTCCCTAGTTACAGTGACACAAAACAAAAGGAAT
+GTATACATCAAAAGATGGCGGGAGACCAGGCCACAGTTTATAGCTTAGAATGGAGCCAGGTGTAATGCTA
+GTCTATTGTTAAGCCCACCACCAGGGATTCTTAGTAAATACCTGATTATGTTGTTCCTTTGGGCCTAGAG
+AAGAAACCTGTCCCAGGGGGGACTCCCTAACTCTTTCATGGTAAAACCACCTATTTGCTAGGCCATTGTA
+TATTTCCTTGTTTGGGTGAGACTCGGCTATTGTCCTAAATAATCACTTTGCAGACTAGCCCTGAGCTATT
+TTCTAGCTCCATTCTTTGTAATGCCTAATTAGTTTCACTGTCTCTACTAGAAGTAAATTTGAATGTTACT
+GAATAGGTAACCTTCTCACTGAATTCCTACTGAATTCCAAGCTCATCTGCTTCAAGGATTTTCTAGGACA
+TTGGAACACTGATGGAGGCTCACCTATGTTAAAATTCAATCTTTAAAGGCACTTATAATAAAACAATACT
+GAAAGAGAGCATACACCATACTGACTCGGGGACAGGGTTTTAATATATGGGCTATGAGAATGCCAAGGTT
+CCAGGAGGCTGAGTTTCCTTGAAACTCTTTGCCTTGTGACTGCCTCCAGGCCTTTCGACCCCAACAGGCA
+GACTTCACTGGAGTGGGCATAGCAGGTGGGGAGAGGGAGGGAGAGGGAGGGAGAGAGAAAGAGAGAGAGA
+GAGAGAGAGATGGGGGGGGCTTTCTTTATATATGGGTGATGATGTAATCACAGGTAAAGGTGGGAGACGG
+GCCAAGTGGATTCTGGGAATATGGTGGCTGTTGCTTTGGCAACAGGTCTGCAGGTCCCGCCTATGTGATG
+TCACGGGTTTCAGAAGTCCTGATACCAACACATATACCAGTACCCAGCCTGTAACTTGAGCTCCAAGAGA
+CCAGATACCCACTCTGGTCTTTGTGTACACCTTCATTTATGTGGACATACTCACAACAGACACATAGACA
+CACAGACACAGACCTCTCTCTCTCTCTCTCTCTCACACACACACACACACACACAATTAAAAAATAAAAA
+TGAATCTTTAAGAAAATTTAATAGTTTAAAAGTTTACAAGATTCTTAGACAGTATAAATGGAGGGAAATT
+CCTACAAAACTTGAAGGGATACAAGTTCTGAGATGTTAAAAGCAGTTCTAAGAGGGAAATTTATAGCTCT
+GGTGTGGGAACTAAAGGGGAGCAGGGGAAGGAGGGGGGAAGGGAGGATAGCCCACATCTGGCCAGAGTTC
+CTCCTATGCTCTGGGCAGGAGGATGCAGGAGAGCTGCCAGACACTTTCCACTAGGTCCCAGGTGGGCATC
+TAAGCCACTGACCCCACTCGACGGGGGGGGGGGAGCCCCCAAAGCCAGGGGGCTCTGGGGTGACACCCTG
+TAGCCCCAGGTTTATGGGAGAGAGGGCTGAGGGAGAGAGGTTCCCACACAGGCGAGAGTCCTTAGTCTGG
+GCCTTGACTGGAACACAGGAAGGCCTTCCATTGGGAGATTAGAAACAGCTCATTAGGAGAAAGCCTATCC
+CATCTTCCGAGTGCAGCAGGCATTGATGTACAGAGACAGTCTATGGTTTTAGAGCTTTATCATAGAAAGG
+CAGGGAGAAAGGAGAGAAGGTAAGGGGGGGGGCATGGCCAAGAGGAGACAAAGAGGAAAGGGAGAGAGAG
+AGAGAGAGAGAAGAAAGTCTAGAGAGAAAGAGAGAGTAAGGGGGGGGGGGAGAGAGAGGAAGAGAGTGAC
+AGATAGAGGAGAGTAAGGGAAGTAAGAGCAAGAGATTGAGAGATCGAGGAGGGGCCAAACAGCCCCTCTT
+AAAGTATACTGCTATCTTTTCTGTTGCTAGGTAACTTGGGGAGGAGTTTAGCCTGAAGGTCAGAAGTTTG
+GAAGATTGTCTACATGCTTCTCTTGGGGGGGCTGAGGGGGGGTAACTTCAACAGGATCCAGGGTTCCAGG
+GGACACGAGAGAACTTCTTCTGTCCTATGTAGGTGAATTATCACCACCAGGTCCCGGGGTTCCACATCTC
+AGCTCTACTGGAGACCAGACTGTCTGTGTATAGCCCAAATACCCAACACTCTGCAGGCCTACATTTAAAA
+ATCAGAAAGAAGACAAATAAATTACTTAATGGCACAACCCAAGGCTTGGAAAAGCAAAACAAATTCAACC
+CTAAACCCTGTAGATAGGGGATAAATAATAAAAACCAGAGCAGAAACCAATGAAACCGAAACAAAGAAAA
+CAATACAAAGACTCCACCAACCTAAGAGCTGGGTCTTTGAAAACATAGGCAAGATTGATAGACTCTTAGC
+CCTGTAAACAAAAAGAAAGAGGAACCCCCAAATTAACAAAGATAAACAGAAGATGTTGCAACAAACACCA
+AAGAATTTTAGAACATTATAAGATATTTTACAACCTATACTCCACTGAGTTAGAAAACCTAAAAGAAATG
+CACAAATGTCTAGTTTGATCCAAAGCACCCAAGTTAAACCAAGAAGAGAATACCTTAAACAGATCTGTAA
+CAGATGAGTCTCAAAGAAACCCAAGACAAGTCTCACTCAGTACCCAGTACTTTACATCCCAGCCCCTAAC
+CTAAACCTCTCCAGCCCAAGGACTGGGGTCCCCTTCCCCCTTCTTCTGATTCCCGTATAACCCAGCCATT
+TTGGGCGTGTATTCTCTTGGGCACCCACTGCTCTCTTGGCTCCTGGTTTCTCTCTTGGCCTTGGGAGTCA
+CTAAAAAAAATCATAGCCTTGGGAATAAAACTTTTCAGTGCCCTGGGCAGCATAATGGGCTCCTCAGAGG
+GAGCCACTGACAATGGGTCTTCAGTCAGTGATGACCCTGAACTAGGGAGGATGGCTCCAGAGCTGAGACA
+GTCTCCTGGACACCCGGCCAGAAGAGATAACAAGGCCCAGAACAGGGCTGCTCAGTGAAACTCTTCAAGG
+TCTCACCAAATATAGTGAAATTAACATAATAGCCAATCAAAGCTGTACTAATGTCATTGCTTTGCCCATA
+CCAATCCTATTAGAAGTTACCTAATCCTTCTGGAAATTCCCCTGACCCTGCATAAACAGGGGCCTGCAAG
+GCCCTAGAGTTGTCAGCATTTTGATAAATGATTGACCCTGCATGCTGGTAATTTCTGCAGAGTTAATGTT
+CTTTGCATACTATTTGAGTCTTGGGTCTTCCTTCAGTGATCATTGGACCCTCTTGGCCTGTGGCTCCTCT
+CTTGGGCGCCTTGCCTTCTCTCCTCTTTTCCCTCCCTCTCTCATTCTCCCTCCCCACTTCTCTCATGGCC
+CAGCTTAGTCTGGAACCTTCCAGAGGCCTCTGGTTATTCTCTTCCTTTTCATCTACAATAAAATCCTTCT
+CCTCAATCATACCTAGGAGAGGTTATGTTCTCATTCTCTCTCTTTCTTTCTTTAACTAGAAACTGGCATT
+TACTATATGTGGAGGAAGGAGCCCAGGGACCCATACGTAATACAGGCACATGGCGTCCTAAGCTCGGCCA
+CTATGTGACTATCCTTGTAGCCACACTTCTGATACGGGGTGGTCTTTGGTCCTCATCATCATAGGTTCCT
+GGGACATGTCTGCAATGATCTGGTTCAGCCTGTCTACTTGTGAGTGATTCTGTTGATGTAGATGCAGCTG
+TTTTCTGCTTCCTGCTGGTAGTTTACAATTCTGGCACGCTTATCGCAGAATGTGGTTCCTTGTCCTCCTT
+GGGGTACAACATGTTATTACTGTGGAGAATTGGAGGTTGGAGGTCAGCCTGGAAACCAATCCTCACTGCC
+TTTCCATGCCAGCTGCCTGGACTGCACCATTCATTCCTGGAAGAATAGAATACCCACAAAGCAGGGCTCA
+TAGGTCTGGTCTGATTCCCCAGCAATGGGGCTGCCCATGTTCTCATTTTCATTCAACAAAGAGAGAAACA
+GTGATAAACAGCCTTCTGATGTAAAATTAATAATAATAATAATACTTTTCATAGGCTCAAGGCCACCTGA
+TGGACTCACAGAAGAATTCTTCCAGACCTCCAAAGAAGAATGATAACCAAGACTCCTTAGATAATCCAAA
+TAAAATAGAAATAAAAGGAGCAGTTTTAAATTCTTACAAAGTCAGTATTAGTACCAAGCCAGATAAAAAC
+ACAACAACAAAGAAAGCTACAGGCCAATATCCCTGATAAACATAGATGCAAAATTTCTTAGTAAGATGCT
+TATGAAAGATCCTGACAGAATGTAAAAGGTCACAGAAGCCCTGAAATTGGCAAGATAGATGTCACTGTTA
+GCAGAACTAGCTTCACTGATTTAGAAAAATAGAGGTGCACAATGCTCTGGTCACTCCTCGAACCTGTGTG
+TCTGCCAATGTTCTGACCAGGTGTGTGCCCATTGCTGCACCTTCATTAGACTCTTTCCTTGTACCCCTCC
+CATACCCATTTCTTGAGAATAGACATTGTTTAGATCTGGAAATCCCCTACTCTCCCCCTTCTCCTTTCCC
+CCCTGAGGGCCTATAAAAACTGGGACCTCTTTCCCCTCGAGATTGACTCCTCTACCCCTGCGTGGGATAT
+GAGTCATCCCCAGAGCTCTGGCTTTCCCCGAATAAAGCCTCATGTGGTTTGCAACAAGCTCGGTCTGTCG
+TGAGTTCTTGGGTGTCCACTATTGTCCTGAGGCCTGAGCGAGGGGCTCCTCTCGGAGTCTTTCACTTAAA
+AACTGAATATAGTTATATATCAAAAAGATCATCCATTACAATCAAGTTGGCTTTCTCCTGGAGCTCAGGA
+TATATAAAAATCAATATATATAAATCAATAAATATAGCAAATAATATAAATGAACTAAAGACAAAAATCA
+CATGATCATCTCAATACATGCAACATACCATCATGATAAAATTTCTAAGTGCACAGGACTGGAGGGAACA
+TACCTCAACATAAGGGCTATATATGGCAACACCACAGCTAGCCTCATATTAAATACAGAAAATATCAGAG
+CAATTCCATTAAAATCAGGAACACACGGGGTGATCCACAAGTCACATAATGTGTGACTCAAACCAGAGAA
+GGAAATTAAAGGGATCCAAGCAAAAAAAAAAGAAGTAAAATTATTCACATTTGCTGATAATATTTATATT
+ATATATAAATGATCACAAAAATTTCAGCCAGAAAACTTCTAGAAATGATCAATACTTCAAGCAAAGTGGC
+AAGACGCAAAATCGACTTACAAAGTTTAGTAGCCTTTCTATATATTAATAGCTAGCACGCTGAGAATGAT
+CATGGAAAGAACCCCACAGACTCAAAAAAATAATAAAATATATAGGAATAAAATTAACCAAGGAGATGAA
+AGAGCTCTATAATGAGCCGGGCGTGGTGACGCTTAATACCTCACCCCTGCAGAAATCCAGGTAGTATGCT
+GACTGCGATGAGCAAGCTTAGCACAGGCTAGCTTTCCTGCCACTTAACGATATCTAAGCACACTCAACCC
+TAGCTCCCTGCCTCTGTTTCTTTTTCCCCTGAAAAAAAAAAAAATTCTAAAAATGATTCTAACAATCTCA
+AATTCCGCCGAGCGGTGGTGGCACATGCCTTTAATCCCAGCACTCGGGATTATATATATATATATTATAT
+ATATTTATTCCTATATATTTTATTATTTTTTTTGAGTCTGTGGGGTTCTTTCCATGATCATTCTCAGCGT
+GCTAGCTATTAATATATAGAAAGGCAGGGGAGTCTCTGAGTTCGAGGCCAGCCTGGTCTACAAAGTGAGT
+TCCAGGACAGCCAGGGCTACACAGAGAAACCCTGTCTTGGAAAAAAAAAAAAAAAAAAGCTCTATAATAA
+AAACTTTAAATCACTGAAGAAAAAGATTGAGGACGACACTAGAAGAGAGGTCTCCCACGCTCACAGATTG
+CTAGAATTAATACTGTGAAAATGACCATTTTACCAAAGGCAAATTACAGATGTCATTCTTCAGAGGAAAA
+AAGAAAACCCCTCAAATTCCTCTGGAGGCACAAAAGTCACCAGGTAGTCAAAGGATTCTTCATCAGAAAG
+AACAAAGCTAGAAGTACAGTACAGATGTCAAGATGTAGTACAGAGAGCTATGGTGGGAAAAACAGGATAG
+TACTGGTACAAAAGCAATAGGCCAATGGAATCAGACAGAAGACTCAAACACGAGTGTGTTTAACATTTGG
+CAAACATGCCAAAAATGCATACCAGAACAGAGACAGCAACAGTGCTCGGAAACCGGACGTCCTCAGGAAG
+AGGAGTGACATTAGATCCATATCGATCACTGTGCTCAAAAATAAACTCAGAGTGTGTCAAAGCTCTCACT
+GAGTGGGACCTAAATCACTGAGGCTGCTGGAGGAAAATGTAGACCGTTCCTTACAAGGTGCACGTGGAAG
+GACGGGCCCTTGGAACAGGATTTGCTTCTCTCAAAAATTAAGGCCAACCAGTGAAAAAATGGGACTCAGT
+AGACCAAAGAGAAGGCCTGCAGACTGGGAGGAAACTCTGTAACCTAAACATCTGATAGAGGGTTAATATA
+CATAATAGACAAAGATCTCAAAATTCAAAGAAACGAGGAGTGGGAAAATGGAGAACTCTTCCTCAGGAAG
+GTAGAGGGAGATAAAGACAGAAGGAGGGAGGGATAAAATAAGAGTAAGGATGTAAGATTACTAATAAATA
+CTATTAATAGTATAATAGGCATACTATTAATTTTCTACCTAAAAATACCTTTAGCATATGTAAATTGGTC
+CATAATATAAATTTATACTTCAAATAAAATTTTGCAACTAGTCTTGTGATGCTTCCCCCAAGAGCTATAG
+GCAAACAATACCCCAGCACCAGTCATGAGCAACCCTCTTTTGATCTGTTAATCAGGGTTGTCCAAGGATT
+TCTCAAAACAAAATGTTATAGCTATTGATATAGCCCTTGGTTCCCACCTAGTGGTGGAAGGTAAGTCTCT
+ATTTCAGAGGCCCCCAAGTTGGAACAGACCTGAAAGCTGCCTCCCTGAGAACTACATATCATGGTACCAG
+AAGGGACTAAGCAAGCTCCCAAAGGAAGGAGGCAACCAGCAGTCTTAAACAACTCTGACAACTATGAACC
+ACGACAATGACTGGCATGGCATGATATCCCTGAGGGCACGCATACCTTGGCGGTAACCAATGGCTTTCTT
+ATCGGATTTAAGGCCTGCTAACCAAGACAGAAATTAGGCCTGGTACTGCAAACCTAGCCAACTGTCCAGG
+GCTAGTGGATTTATGGACCTTGGAGGAGAACTTAAAACTATCCCTTCATTAAACCGGCATAATAACCCCT
+ACTATATGCTTGTCCTTATACTCACAGGTAAGTGAAGTCCTCACCTCTCATCAAGGAAACTTCTTAAGAA
+AAGTAACAGAGACTCTTTCAGAAAACCACAACCAATCAAGTCAAGTACTAACTGATATGTCTACAACATA
+ATTCCTACACCCAAGGCTCGGAGATCACTGTAGAAGAGGGTGTGCAAAGACTTTAAGAGCTAGAAAAACA
+GGGAGTTTGCTGTGAGACCGTGTCTTTAGGAATGTCAGAAGCTATACCCAGGAAGCCTCACCAGCATGGT
+GGCCTAAACATGTGCGGAACAAGGATGTCACCAAAAGACAGGCCAAAGTGGGCAGTGGAAAAACTCATGA
+GGTCTCAAACCTAGACAAATAACTACAGGCAACGAAGGGATGCTGAGAGTGGGAAAACAGCCTAATCCCA
+GGAGGAAATAACTATCTAATACCAAATGATCAGCCCGAAAACATGTACATATGTAACATTATATGAACTG
+AGTGGGTTATATTTATATATTAGTAAATAAAGACACACACCAACAATTAAAGAAAAGGAGGCCATGGTGG
+TACATGCCTTAAATCCCAGAACTCAGGAGGCAGACTTAGGTTATATAGGTTATTCTCTGTGAGCTCAAGG
+CCAGCCTGGTCTACAGAGTGAGTTCCAGGACAGCCAGGGCTACACAGAGAACCCCTGCCTTCAAAAACCA
+ACAGAATAAAAAAGAAAAAGAGGCCATGAATTTAAAAGATAGCAAGATGGGGCTTCGTGAGAGAGTTTGG
+AGGGAGGAACACACACCTTTAGTGCTAAGTGAAATAAAATTTCTTCAAACAAGAGCCATAACAGCTGCAT
+GCGTTGCATAATTTTATTCTTAATTTTTCCTTTACACTTTTTTTTTTAAAGTGAGTCTCCGTCCAGTAAT
+CTCCGACCTGTAAGAGTAGGTAATTTGCAGCTTACAGCATCCTTGACAATGGTCAGAGACTTTCTGAACG
+GTCACCACTGGGAAGCACTGCAGCCTGTAGTGGGTGAGGGATGGGAAGCTGATGAATGTCCCACTATGCT
+GCCTGTAGATCTCCCCTTCTCCTTCTGTTCCTAGGAGCTCTGATCCAACTCCTCAGTCTCGTTCCCAGCA
+CTATAGAACACCATCTGGGGCAACCTCTCCTTATTGTCTCCCCCATCCCAAAAAGACAAGATAAGTAAAT
+CCTGGCTGAACACTCTGCTCCTTCCTGAAGCTTGTTCAGTCCTTAGCCCATCCCCATCCCTAAGTGTCAG
+GTCCCAATCCCCAGAAATACATTTTGTCAGGAATACCTAGTAGACACAACTATCAGGTACAAAGAGACTG
+TTTTGCCAAGCAACATTTGTCCATCATACTCTCCAGAGCTCCAGTAACAAAACATAAACCAAGGAACAAA
+ACATCCACACAATAAAGACAAATCCAGAAATCACCATCTAGACCTATAATCATCCCAACCTCAGAGGCCT
+GGATGCCAGCATAAAAATACAAATAACATCCAAGGCAGTATGTCTCCACTACATTCCAGCTAGCTTACCA
+CAGCAGGCCCCTCGCTAACTGCAGCACTTAGGAGAGTGAGCCTTGCGCCTCACCTACACAACTCAGGCTG
+GCCCTGAAAGCAGAGTACCAGTTAGCCCTCGAACTTGTGAGCCAGAGAGAGCTGGCCCTGCTTCTTGCTT
+GCTGTGGCATTGGGTGAGCTAGCTGGGGCAGTACTGAAGAGCTCCTCCTGGTAGAGTGGGTATGGGAGAG
+CTAGTGGGCTGGCCAACTTAACTACCACCCAGGCCCAGACAGAGTGATTTGAGTTGGTTCACCCTAACAT
+CTACCCCATCTATGAACTGCTGGAGTGCATGAAAGGGCGGCCCTGCAGATCCAAAGCTGCAGGGTCTCCA
+AGACACAGGGGAACAACAGGGTATCTGAGAGGCATCCTGGTGAGGATCCAGTATTGTAGCAAAAGCCAGA
+CGCCTCAAACCAAAACAATGATTCATTGCAATGTTCATTTGCAAGTAAAGATGTGGTGACAAAGATGGGA
+CACATTGTCATAAGCTACAGCTACCACAGTGAGATGCTTTGTTCTGTTTTCTTTTTCTTTTTTCTTTTCT
+TGTATGAAGGGAGGTTGCGAGAGCAGAGGGAGGATATGAGGATGAGTGGGGTTGCAGTGTATGATATGAA
+GTTCACAAAGAAATAATTAAAAGGTTTTAAAATTATTATATATATAAGAAATGGGACCTCATGAAATTGA
+AAAGCTCCTGTAAGGGAAAGGGCACCGTCATTCAGACGGAGCATCAGTCTATAGAATGGAAAAAGATTTT
+CACCAACTCCACATCCAATAGAGAGCTAATATCTGAAAATATACAAAGAACTCCAAAAACTAGATATCAA
+AAACCTAAATAATCACATTTAAAAATAGGGCATAGAGCTACTCAAACAAAGATTTCTCAATAGAGGAAAC
+TCAAATGGCTGGGAAACTCTTAAAGAACTACTCAACATCCTTAGCTATCAGGGAACTCCAAGTCACAACT
+GCTTTGAGATTCTATCTTACACCTTTCAGAATGGCTAAGATCAATAGCACAAGTGACAGCTCATGCTGGC
+AAAGATGTACAATAAGAGAAACACTTAGCCATTGCTGATGGGAGTGCAAACTTGTACAGCCACTATGGAA
+ATCTATATGGTAGATCCTTAGAAAGCTGGAAATCAATCTACCTCAAGATCCAGATATACTACTCCTGGCA
+TATACCCAGAGAAAACTTTGTCCTACCACAAGGATACTTGCTCAGCTATGTTCATTGCAGCTTTATTTAC
+AATAGCCAGAAACTAGAAACAACCTAAATGTTCCTCAATACAAGAATGGACAAAGAAAAGGTGGTACGTT
+TACACAATAGACTATTACTCAGCAGTTTAAAAAGTGACATCATGAAATTTACAAGTAAGTGGGTGGAACT
+AGAATGATAAAAACATTCTGGGTGAGTTAACCCAATCCTAGAAAGGTAAGTATATGTAGTCACTTATGAG
+TGGATAGTAACCATTAAATAAATGAGAACTAACTTACAATCTGTAGACCCAGAGAAGTTAGGTGTAAGGG
+AAGAAACTAAGGGGGATACATGGGGTCTCCCTTGGAGAGGGAAATAGAATAGATTGTATGGGTGGACTGG
+AGAAAGGTGGGGACAGAAACAAAAGGATCAGGTGGGGAGGGGGAGGGAGATAAGATTGAGGAAATGAATA
+AGAGGAAAGACAGCAAGAATTAAGGAGGATTTGAGAGTTGATAGGGAAACCCAATGTAATAGAAACTTCC
+TTAAACATATGAAGGCAATTCAAATGAGGTCTCCTAATAGTGGAGGTAGAGAGTCCCAGTTAGCCATCTC
+TTGTCACTAAAAGAGCTCCAATACCAGTACTGGGTTGCATTCAATTGAGTTGTTGGCCAAGGGAGAACCA
+TGAAAATCCCCAAGCAAGCCAGGCTGTTGCTAAGACAATGAGTTGCTCTCTGCAAACCAACAACAGGGCC
+CCATTGCTGAGAACAATACCCACACAACTCATTGAACATGAAGAAGCAGAGGTGGTGTCTATATAGAGAC
+TTTACCCCTACATTCCAGTGTCTTTGGTGTGGGAAGATACTCTGCAGGCTACCAAAGGAGAAATGTAGAC
+ACTAATCCTTCCACAAAACCTTTGACCTACAATCTGTCCTTTTCTACAAGATATGCTAGAACAACAGTGG
+CACAGAACTGGTGGGAGTAGCCAGTCCATGCCTGATCTGACTTAAGGCCCACTCCATAAGACTGAACCCA
+TGCTCAACACTGCTTGGGTGACCAAGAACCAGAGACCAGATAGCCCAGAGACCTAGGGTAAAACCAAACG
+CTATTGGTCATATGTATATATGTGTATGTACACACATACACACACTATATATATGTATATATATATATAT
+ATAGTGATGGTTTGTATATCCTTGGACCAGGGAGTGGCACCATCTGAAGGTGTGGCCTTGTTGGAATAGA
+TGTGACCTGGTTGGAATGGGTGTGTCACTGTGGGTATGGGTATAAGATCCTCACCCTAGTTGCCTGGAAG
+TCAGTCTTCCACTAGCAGCCTTTGGATGAAGACATAGAACTCTCAGCTCCTCCTGCGCCATGCCTGCCTG
+GATACTGCCATGCTCCCACCTTGATGATAATGGACTGAACCTCTGAACCTGTAAGCCAGCCCCAATTAAA
+TATTGTTTTTTATAAGACTTGCCTTGGTCATGGTGTCTGTTCACAGCAGTAAAACCCTAACTAATACATA
+TATATACACACACACTATATGATATTTTGCTATACCCTAGATTGGTTCCTTATTCAGTCATTATCAGAGA
+TGCTTCCTCTTGCAGCAGAGGGGAAAAGATGCAGAGACCTACAGGTAGACATTACACAGAGAGAGACTCC
+TTGGAACACACAGCTCTATATGGGATATCTCCATCAAATCTCTCTCCTCAGAGCTCAGGGAAACATACAA
+AAAAAAAAAAAAAAAAAAAAAAGAGGCAGAAAAAGTATAAGGGCCAGAAAAAGTGAAGGTCACCAAGAGA
+ACAAAGCCATCTGAATGGACTAAGCAAGGCTCATATGAACTCACAGAGACTGACACTGCAAGCAGGGGAC
+CGACATGGTGTGCACTACATCCTCTGCATACCTGTTATAGCTTTCAGTTTAGTATTTCATGGGACTCCGA
+GTATGTGAGTGAGTCTCTGATTCTTGGGCCTGCTCTTGCACTTGTTTCTTTCTGTTGGGTTTGCCTTGTC
+CAGTCTCAATGTGATGGGGTTTGCTTCATTTTATATTTTATTTTATCATTTTGGGTTGTGTGCTTCTTGG
+AAGCCTATTCTCTTCTAATGAGAGACAGAAAGAGGGTGGATCCAGAGGGGAGGGGAAGGGGGAAAGAACT
+GGGAGGGGTAGAGGGAGAGGAAAGAGAGGAAACTGTAATCAGGATATATTTTATTTTATTTTATTTTATT
+TTATTTTATTTATTTATTTATTTATTTATTTATTTATTTATTTTGGTTTTTCGAGACAGGGTTTCTCTGT
+ATAGCCCTGGCTGTCCTGGAACTCACTTTGTAGACCAGGCTGGCCTCGAACTCAGAAATCCGCCTGCCTC
+TGCCTCCCGAGTGCTGGGATTAAAGGCGTGCGCCACCACGCCCGGCAGGATATATTTTATGAGAAAAGAA
+TCTGTTTACAATAAATTTTTAAAATGACAAAAACAGATTATTAACTGGGTTTGCCTGTCCAAGATTAAAT
+CTTATTTTCCAAAAGCACTGGAAGGAAAAGCAGGAGAGCCCTCTTGAAACGGTAAACCAACTGAGGACTT
+ACACTTGCTCTTCAGAAGGGACCCCAAAGGTCACCTGTGAACTGGAGATCCAGAGCTCTTACTATTATAA
+ACAGCATTCTCGTGAAAGATGTTTGGCAGCATTGTGACCATTTTCACCTGCGCCTATCTGGTAAAGCCTT
+TTTGACAACAGCCTGACTCCTGAATGACTAACCAGATGTGTCCCTTAAAAAATCCAACAAATCACTCCTG
+GGATATCATACTCTACACAGCCAACGCGGCTGTGAGACTTTCCAGTCCTTCTTATGGAGAGGCTCTGGGG
+CCCTTTTCTAACTGTTCTAAATTTGTGTGTGTGTTAAGTCCTTGATCATTTACAACTAATGTCTTCCAGA
+GTCCAGGCCCTAAGACTTCAAACAAAGCTTCACCACCATCACCACATCCTTCCACCAAGGGCCGTCGGAT
+CCCCTGAAGCAAGGCCATCTTTTGCTAGCACCGTTTGCTAGTAAGACACTCAGGAGGGGGTCAAGCCGAA
+CCCTTTTACCAGTGTCCCTGTCTGGATCTAAAGCAGTGAGGAACAGCCATCACCTGATACTCCCCAGAAC
+CGAGTCCCAAAATCCTAATGAGAAAAACATTGAAAAAAAATCCCAAACGTTTACATTAAATATGTTAAAA
+TGTTAGCGTTAAAAGGAGATAAATCATCTTACCGCTGGGAATAAGAGAGACAAAGTTAACCAGATTTTTA
+AAAAATACTCAAAAGACAAAAACAAACAAAGAACCCATACACAACAGAATCTTTACAGTTAACCTTTTGC
+TTTATTACATTTGACCCAAACTATAGTAATTCAATACATAAAAAGATCCTACCACACATGGCAATTTTCC
+AGGCTGGATTTATTGTACGTTGTGTTGATAAAGTGTTGCGTGCTGTTTGTGCTTTCTGAGCAGCTGGATC
+TAGATAAGGGCTGCATCCTGCCCCCCACACCCATTTCAGAACCAAGCAACTTTGTCTCAAAGTTTGCTCC
+TGCCAGGCCCTTTGTCTGGGTTTGTTATGGCACCCTCCCCAGCTCTGTGACAGAGAGTCTCAGAGACTGG
+GAGTCAAAATCATCCACCTGGAGAACAGTAAAACCAGACACAGACGGTTGTCAAACCCATGTCATAAAAG
+TGCCTACAGTGGCTGGGTGGCAGTGGCACATGCCTGTAATCCAAGCACTCACTTGTGGTGCAAGCCTTTA
+ATCTGGCGCATACAGAAGCAGGCGGATCTCTGTGAGTTCGGGGCCAGCCTCGTCTACAAAGCAAGTTCCA
+GGACAGCCAGGGCTACACAGAGAAACTGTCTCAAAAAAGGAGAAGGGGGGGGGGGGGAGAAGAAAGGAAG
+GAAGGGAGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAACCAG
+AAATTCATTGGTGGAAAGCTAAGCCATGGAGTGAAACCTCAATCCAACCCTACTTAGTTCCCCAGCCATT
+ATTGAACACCTAATTTGATGATGCTTTCCTTACTATTTTTGTCTGTCTATCTGTCTGTCTGTCTGTCTGT
+CTGTCTGTCTATCTACCCATCTATCTATCAGTCATCTCTTACTATTAATTCTTTTATTTTGCCATATACT
+TAAGAAAGCTTTCAAATTTAAAACCTAAAGTAGGGCTTCATAAATCATTCTCTGAGTGATACAGAACACA
+AGAGCAGGCAACTAAACCCACTCACACATCATAATTCCTCCAGAGCTGCAGGGTAGGGCCACGCAGGATC
+CACTGAGCTTTCCAAAGAAGTACACCAGGATGAGAAGAATGTGTGGTCATCTTCCACAAGCCATTGCCAG
+TGGCTTCACTAGGACCCTGCCTTGGGAACGCCAAGAGTGACTGCATAAGTGCCTGGTGATTGTCATGGGG
+CCAAGGCATCAGTGACAGAGGTAACCATAGCAGCAGTTCCTGGCAGCTGCTCATACACTCTTTCAAGCTT
+CCTCATGACCATGAGCTGGATAAAATGTGCAAGCAGACAGAGACCCATAACACACTGAGATTGTTAGGAC
+AAAGAAAGATATCTTCCCTACAAAAGATTTTTTTTCCTGTATATCTCCTTGCAGCTGTATTTTAAGGCAA
+TTCTTCTAGCTAGTCCAAGTAAGAAAAAAAGGCCTTAAATTAATGCACTTACTGTAGTATTATGCATTGA
+GTGTCTGAACGAACTTACTCATTGAACAGTTTTTGCTGTGTGTAGTGGTGAGGGTTCTCCAAGAAACAGA
+ATCAATAGAAAATAGAGTCTATATTATAGAGACAATTAATTTGGGGGTTATGTGGGGATGCATGTGCTGT
+TTGTCATGTGATTTGTTGCCCCATGGCCTGTCCATGGAAAGAGGAAAAGCCAGTACGCCATTGATATATT
+CAGTCCAGACCTGACTATATAGGAGACACTGGGACAGTCCCAGATCATGATAGCAGATGGCCAGGCCAGG
+AGATGATAGACATCTGGGAATTATAGAAGGAATTCACTCATTATCTGCCACATTTTTATTTTATTTTGTG
+TGTATGAGTGATTTTTATTTTTTTATCTTCTGCATATAGATATTTTCCCTGCATATATATCTATGCACCA
+CATGCATGCAGTACACGCAGAGGCCAGAGGAGAGAGGAAGGGCCCCTAACTTGAGACAGAAAGCACTGTT
+AGCAACCATGTGGGTACTGGAACTAAACGCGGGTCTTTAGAAGAAAAGCCAGTGCTCCGAACCTCTGAGC
+CACTCTTCAGCCCCCAAACCAGAGATAATTAAAGCAGACTCATCTGTAATCAGTTCTCCAAGAACTGAAT
+GAATTTCACAATATTAAACAGCTTCCCTGAACGGGGGGTGGGTGGGGGGTGGTGGTGGTGGTGCATGGAC
+ATTCGAGGAGGCACAAACAATAAGTAACTGAGGGACGGTATATTCAATTCAGCTTCCCCAGAGAGAAAAG
+AATTAACCAAGAAAGAAATTCTCCATGCCAGGAGAACTCTAAGCCAAAAGAGACTCAGGAAAGACAACAG
+CCTAGAGGACTAAAATGACACAGCCGTGGCATGTGCAAGGTGGAGAGACCACCCAGGGCCCCTTCTGGGC
+ACTCTCACCAGCCTCATCATTCCTCAAGGAGACACACAGCTCTTCACAGTTCCAAGTCCCAGCATGCTTC
+AGTGATTGAGATTCCCAGCATGCTTCAGTGAATGAGATTCCCAACACGGTGCAGTGAATGAGATTCCCAG
+CATGCTTCAGTGAATGAGATTCCCAGCATGGTGCAGTGAATGAGATTACTGGCAGGCACACTTTATCCTC
+TCACATGCTGACTGTCGATCAGTGGTTTTCAACCTTCCTCATGCTATGACCCTTAATACAGTTCCTCATG
+ATGTGGTGACCCCCAACCATAAAATTATTCCATTGCTACTTCATAACTGTATTTTTGCTACTGTTATAAA
+TCATGATATTCATGATAATTGATATGTGACCCTCCCACAAAAGGGATCATGACCCACAGAGAAACACTGC
+CTTTATTAAAGAGCTTTATCCCTGCTGTGAGGATCTCAGACTAGAACCAAGATATGTCAAAGCTCTTCTG
+GTTCCATTAGATGTTTCAGAGCCCGGCACTACAGGCTCAGACTTCTGGGAAGCAGCTGTGATGCTTGGTG
+AGGCTAAAGAGCAAAAGAAAAAGACACAACTCATGGCAAGTGTGTTTCAGACTTTATTGTAGACGAGACA
+GACCTGGGGAGGCTACTCCACCAGGAACAGGCTGGCACTCAAGTATTGACCTTCGGGGTAACTCTAGGGA
+GGGCTAGCACTAAGATAGTGTTGACCATCGGGGTAATTCTAGGAAGCGTGAGTTCAGGACAGACCTCAGT
+TCGCAGAAACGGGCTGTGAGGTTGGGTCAGGGAGCCAATGAAGAAGGGGCCATGTGAAAGAAACTGGGGA
+GAATGAAGGCTGTGTGGACTTGACTGGGAAGAGGGTGAGGAGATGGGGCTGACCAAGCTGCAGAAGGGAG
+CGGGAAGGAGAGAGAACCAGGAGCCACAGTGCAGAAGGCCAGGGTGCTGTCCCCACCCAGGGCCTGCAGG
+ATCCCCAGTGTGGACAGGTCCTCCTAGTGAGTGCCCGTGTAGCAAGGGCCTACTTGTTACTCCAGTGGGC
+CATCTTTGAAAATATAGGTGTTGCGATGTAGCGGCTACTCTTCATGTAGGCAGAGATCTTCTTGAGGCCC
+TGCACATTGGGAAGGACACAGCAGGAGCTCAGGTCTGGAGCACAGAGCCGCATGCATGCACGCATGCACG
+CACGGGTCTCCTGGCTCACTTGTCCTGAACTCATGCTGGAGCAGCGCTGCCTGCTTAGCTCCAGCCCAGG
+CAAAAGAACTCTCCCATTCCCCATATTCAAGGGGAGACTGGATCCTGCATAAGTTGTTTTATACTCGCTA
+AAAACTACAACAGAGGTTCGGAAAAAAAACCTAGACATCAATCTTCACCCCCCACACACACACATACACA
+CCCCCACCACGCATACCCACCCACCTCTACACACATACCCCCCCACACACATATACCCTACACCCACACA
+TACCCACCCTCCCCACATACACATACCCCCACACACAAATATATCCCCCCACATACACATACCCACCCCC
+ACACCCCCACACACACACATGCATCACGTGCTTCCTACCCCTCTTCAGCTCTCTGAAGGAGCCTCGATAA
+CTGTTTCTCTGCATGGACATGATCCTGGACCTGGGGAAGCTCTTTCTAGCTTGTATTTACTCACAGAGGA
+GGAATCTAACAGGACAAATGAGCCAAAACTTGACTATGTGTGCTTAGGAAGACAGCCAAGCACCCAGTGA
+CTCAGCAAATCATGAAGAAAGGGAACAACAAGCGAATCGGGCCAGGAAGAAAAGCTGAACAGGATTCTAA
+TCAGTCTCCTGACAGAGGAGTTAATTCTCTACCTGCTGCTCAGTGCATGCTCACTGCACCCTGGCTGAAT
+ACAGGGACCACTCCATGTGACTGGAGGTTGTACCATAAGCCATACAGCAAGCTGGAGAATACAGACAATT
+CCAGAGGGTGCGCAGAGGTCCACCACAGACAACACACACAAGCTAAGCTAAGGCACTGCTCTTTCTCCAT
+CCACCCACCCCACAGTGGTGCCAACTCTTGCCGTTTGCCAGGACACCCAACCTGAACCAGGACCAGACTG
+ATTAGACCAAGCTGAGAAAGGCTAGGGGTTTCAGTGGTGAGTGCTTGCCCAGTAAGCACAGGACCCTAGG
+GTTAGCCCTCAGCTCTGGGGAGAGAGGGGGAGAAGGAGGGAGGGGGAAAGAGAGAGAAAGAGAGGGAAGG
+AGGACTCAGCAGCCCTGGGGAATTTCACAAGTATCTCACAGACTGGCTTTCCACAGCTCTCTGAAGACTC
+AGTCCCAAGATGGCAGTGTTGTGCATGCCCCCTGAGGCCCAACATGAACCTCATCTAGAGGAGAATGCCA
+GAGAATAATAGTGAAGAAAAAACTTTGACCCCATTTCACAGACAGGCCCTGGGACATGAAAGAGGAGAAG
+AGTGTAGAGGAGGAAACAACTCACCAGCACACACTCACACCAGCACACACTCACACCAGCACACACTCGC
+ACCAGCACACACTCAGGCTAAATTTCCCCTAACATGGAAGGCACTCACAAGTCTACTCAGTGAGGTGCCA
+CCTTCAGCCCTGGGTTCTCAGTGCTCTACAGATCAGTTCTAGAGCCCTGGGCTCTCAGCACTCAAGTCTG
+GACTTTCTTCCATCTGGCTATGTTACATTAGCTCTGCCTCTTTCACTCCCCTAATGCCAAGACCCACACA
+CTATCAACATGCACAGCCTTGCCAGGGATCCCAAACAGGAGAATGAGGTTGTGCAAGGGCCTGAAGCTTT
+TTCTTTTGACCCAAGGCAGGGGCAGTCACTGAATGCTGCCCCAGATCCCAGTGCAGCTGCAGGTGCAGAG
+ACCTTGGGCACATGAGTACCCTCTGTAGTACCAGGCTCCTTATCTGTCCAATGGGACTCAGAATCGCATC
+AATCTTTTGGAATTTTGAAGGGATAAATATTGCCCAATATGCTTCTTTACAGAAGCTCAGAAAATCCTGG
+TTATTTTCCTAGTCTTGGGACTAGAAAGCATCATGGACCCGAGAAAGAAGACAACCCAAGGGGAAGCAGG
+ATCAGGGCATCACCTCGAAGCGGGCCAGGAAGTCCCTCAGGTTTGGGAAGGCGTCCAGGCACTTGGGCTC
+AAACATACGGTACTGGTCAAGAATGTCATAAGCAAGGAAATCCACATAGGTGACCTGCAGGAAGCCAAGG
+GTGAGCGCTCTGGAATCTGACACCAGGGAAGGATGGCAACCCCTCCTCCCCACCCCTCCCTTTACCTTGT
+CCCCTGCAAACCATGGCCTCTTGCCCAGGAACTCAGAGTAGAGCTTCATTTTCTCAGGGATGGTCTTCAA
+GAACTCTGGCTTCTGCTTCTCCTGAGGCAGAGAACAGCTGGCTGTCACCACCTTTAGACTCAGGCTCCCC
+AGCAAAGGGTTTGGCAATGGAACAGCCACCTTTCCAAGAAAGGGCCAGGTCCTAGTCTAGGAGTCCATTT
+AACAGGTCACGATTTTACTAATGACAGAATTCATGCCAGAGGTGATGGACAGCCTGGCAGAAATTTCACA
+CTGTCTCTGAAATCTGTCACCACTGAAGTCCAAACCAGGTGCTGTGAGTCACACCCAGAACCTCCTGAGT
+ATCTAGCATGCTTTGGCCTCAGAGACCCAGCGGAACTACAGAAACAAAGGGCACAGAGACACCAGGAGAA
+GACAGTTATACTGGATCCACCGGGGAGCAGGAGAACAGATCAGATATTTGAAGGATGTGCTTCAGTTTTA
+ATCAGTGATTAATACCGAAAGGAAAAGGACTCAAAGTTCCTCTGGCTGACCCTGAGCTGGCTACTAGGAA
+GATGTCCTGCTTTTCTATTGGTACCTAAGACAGACCTGTGGGGAAAGAACTGGGTAGATGTGTGAGACCA
+GTGTAGACATAACTTATTTACTTACTTGAGTCTGGAATACACAGACTCAAGCTATGAGGAGAAGAGATAC
+ACCATGGGCGATGGTAATGCCACCTTCCTGGTCAGACATCTGCTTCCTTATCACTTAGGGAGTATACTGG
+TGCCTGTTGGTGCATGGGGGTCCAATGTTGATGCCAAGTGTCTTTCTCAATCACTGTGCACCTTATTTCT
+GAGACAGGGTTTCTTAGTAAATCTAGAGCTCAAAGAACAACTAGCCTCAGTGGTCAGCAAAACCCAAGAA
+GCCCTCCTCTCCTCCCAGAGCACTATATTTATAAGACCAGGCTGCCACACTGGGTGTGTGTGTGTGTGGG
+GGGGTGTTATAGGGTGCTAAGAGTTCATATTGAAGCCTTCATGTTATGTAGCAAGCACTTTTACTCCACA
+TCCCCTAATTTCATCCTGGGGGCTTTTGGTATAAACACTGGGTAAGACGGCACATTGGAAGAGACCAATG
+GACAGATTTAGCAGAGATTTACAGCTGTACTGAATAACTTAGATCAACTGTCCACCAGCAGAACCTTAAC
+CATATCCTAAGACCAGCAGGGATAGTTTCACTGTGTTGATGTGATAGATCTCAGGGGTGACAAGGGTCAT
+CCTGCTGGTAAGATCAGAAAGCTTAGAGAATCATTACCAAAGGAGGATGCTACAGATCTGAACGTGAGAC
+TTTAACTCCTGCTGTGAACCCTGTCTGCCGCACCCACTGGCCACTCACAAAGTCAGGGTTGTAACAGAGC
+ATGATGAGCTGCATGCGGGTGTCCATGACCTGGTTCTCCACAATGTCTGCACGGATCCTCTCCTCCTCTG
+TCTCTCCATCTGCAGCACACACAGTACACTAGGCATCCCAACCTTGCCAAGCCGAGGAGATTATCCCCCA
+TCTCCCACCCTGTGGCCAGCCCCACTCACCCAGGTGGTGCTTTCGGGCAAGGTAGCGCAGGATGGCATTG
+CTCTGGGTGATCTTGTGTGATCCATCGATCAAGTAAGGCAGCTGGATGGACAAGCAGGGAGGGTCAGATG
+GGATATGGGGTACCTGAATCCTTTCTCTGGGTGAGAGAAGACACAGGGCCTGACAGTGTGCCTGCCATGG
+GAAGCCCTCCATGGGCACAGTGTGCACTGAATGAGAGAGCTGGGGCACAGCCCATCACAGAACACTGTGC
+AGCCAGCAGGGATGGAGCAGGAGAGCAGAAACACTGATCCTAAACCCTCAGCCAGGCCAGGAGATGAGGT
+GAGATCACCGTGTCTTCCCCAAACCTCTCCTCCCCTGCACCTACATTGGGAAAGTCCAGGCCCAGCTTGA
+ACTTCTCATTCAGCCACTGGCTTCTGTCAAAGTCGGGAGCTGTAAACAAGAAGATGTGAAAAGGCATTAC
+CCCACTGTCCCACCCTTCTACAGATTATCGACCAGTAGCAAGAACCCCTAAGGCAGGGCCTCTGCAACCT
+GTTACAGAGAGTTACTTTCCAATGTGGAGAAGGGTCTGAATAACAACTGATAATAAAGGCAGCCTGGACC
+CAATAAGACCCCAGTAGTGAATTCTGTCGAATTATAAATACAAATGGACCAAGGAACTCCATTCTCCATG
+ATCTATATTGGAAAATCAAACTGCACTCTCAAAGAGGTTTCTGGAAGCAGCAGCCTCTATCTTTAGCTAG
+AGGGAGACCAGCAGGGCTAAATAACGGGAAATCTAGGCATAGTTTGATAGGTAAGAGGGAAGCAGGTGTC
+TGACCAGGAAGGTGGCATTACCGTCACCCATGGTGTATCTCTTCTCATCATAGCTTGAGTCTGTGTATTC
+CAGGAGCATGCGGATCGGGTGTGTCAGCTGGGAGAGATGACCAGAAGAGACAGAGGTCAGACATTGTGGG
+GACTTCTTTGTATGACTTCATCTTCACCCGTTGAGACCTCCCACACTGCACAAACGCCGTCACCCCGCAC
+GTGCATGTGCACACCACACACATACGCGCGCGCGCACGCACACACACACAGCCACCCAAGTGCGTAAACT
+GAGCGCTGAGAGCTACAGGCAGGAGATTCCAGTGAGAGCTGAAGAGTACCCGCTGTAGGAAGGGCTCCTG
+CTCTTAAGAACTTTCTGCTTCACATCCCAGTCCCACCCTCCAAGTGGACCCCTCTCTCACTCCGCGGACG
+TTCCAGTATCCCAGTATCATAGGCATGGTGCTGGTGCTGTGGTCTTCTCAAACTGGCTTCAGCAGGACTG
+ACCTGTGTGTTTGGAGTTGTGACTTTATACAGACTACTAGAGAAACGTGGGCAGGACAAAACAGCGGGTC
+CCTCCCAACCCTGCACCAATCCCAATTAGGCACTCCCCTTCCTAGGTCTGTAACCAGAGCAGCAGACCCT
+AAAGCGGAAGGCAGAGGCCACACCCTCTCCGGGAGAGGCGGGGTTAAGATCCTTAGCGGCCTCTGTCCCG
+GGATTCTGTTTTGCACACGCTGAAACAGAATCTGCTCTCCTGCTTCCCCCGAAACCCTCTTAGCGACTGC
+CGCCATCGCCGCCTGCTGGATGCTCCCTAGACTAGCTAGCACTTTGCCTGGAGTCTGGCCAGACAGCGAA
+AGAACATTGGGTGGGTGTTCTCCCGCCTTCCTTTCTTTTGCAGAGCTCGGTTTTCACACCCAGACTGTGG
+GAATCTGTCATTGGGGAGGCTCAACTCTCAGCTTAAAGACAAAAAGGCGAGAGCCCCCGACCTGAGAGTT
+TAATGTAGCACTTATCGTCATGGAAAGAACTCTGTCAGTCTGACAGGTACCGGATTTTTGCGCACACAGC
+CTCAAGTTACAGGAACAGTCGGTGACATGGAAACAACCCCAAGGCAGAGATAAACAGGGCTCCGTATTTT
+CCTTTAGGGGGTTTGAGAACTGTTATCAGTTATTAAAAAACTAATCTGTATTTTGGCCGTGTTTATGAAT
+CTAAATTCAAGTACCAACCACAGAGGGGCCGGAAGTTTGAGGGAAGCCACAGCTGTGGACAACAGTCAGC
+TGACAAAACCCAGCAGAGCACAAGGGAGAAAAAGAGCCCACGCTGGGTTGTGCCATAGAGAGGGAGAGCA
+TGTGACCCAAGAAGTTTGCTTTCAAGGGATACCAGGATGCTGTACACATGGGAGCCCAAAGCAAGACCCT
+CAGCTGTCCCTAAATGCAGGACCTGGCCCAGGCTCCGGACATGCGTTTGTATATGACAGGGTGGCTCAAA
+TGACCTTTCAGGTTACCCTGAGATATATAGTAGTATATCCTTCACAAGTAGTATATGCCCTGTAGTATTA
+CCTTCACAAGGTCAATGTCCTCCCAAGAACACTGTCCTACCTGTTTACTTTCTTTTTTGCAGACAACCAA
+GAGGCTTGATGGAAGGGGAGACTAAAATGAGTTTACAAGTCAAAGGCCAAGGCTGTTGCTCTGCAGGACG
+TACATTTTGGGATTAAATCATCCTTTACTGGGACTACGGGGTATGAAGGCAACATATCATGTCACCCACA
+TAGGAGCTGTGCAGTGAGGTTGCAGGGGGCAAGCAGAGGCCATCTCATCCTAGGACATTAAAGAAACCAC
+ATGTTCTGCAGGTCTTGGTGAGGATGCACGAAAAGTGACCATGTCACTGGGGCTTTAGCAGCAGTTTGGT
+GGGAACTACAGGGACATATTTTGCAAGTATGAATGAGGCACTTCAGCAGATTGTCACACACTGTGGGAAG
+TAGAGAAGGAGGAGTGGAAGAAAGCAGAAGGAATCCATACTGTCTAGGACAAGGACTCAGAGGGCCTGGG
+AAGGGAGCCAGAGCCCTGGAGCCCTTCAGAGGACAGAATCTATCTGCAGACAGGGATAAGGGCTAGGACA
+GCAAGTTCTCAATGCAGGGTCCCCAGAGTCCGAGATCATTGTCCCTTTGAATGAGAAAGGTCCCTGAAAG
+GCAGCAGGGAACTGTGGGGACTGGAGACGTGGAGAGATCTGAAGCTAGGAGTCACAGCCAGTGCTGTCTT
+GGAGAATTTAATCCCAGCGTTGGTCACTGATCCTCTGGGTGTCTCTGTGCACTGTCACAAGTTCACAGTG
+CAGCCAGCCCTAGTGTCTTAAAAGGACACTGTACAGGAAAAAAATCTAAATGTGCTGGAGGAAATTTGGC
+TTATAACCTATAGAGTGACATCTGAGATGGATTCCGTAATTTCTTCCAGGCCAATGGGTGACATTCAACA
+TGGTCCATGCAAAACCACAGAATTCTATAAATAATGGCTTTAAAAATTTTAAAACTCGGGGTGGGGGTGA
+AGAAGGGGCCGGCCTTCGAGTGACAGCGGCGCAAGATGTCAGCCACCACAGGCTCGGGAGTAAAAGTCCC
+TCAAAATTTCTGACTGTTAGAAGAATTGGAAGAAGGACAGAAAGGAGTAGGCGATGGCACAGTTAGCTGG
+GGTCTGGAGGACGACGAGGGCATGACACTTACAAGATGGACAGGCATGATAATTGGGCCTCCACGAACAA
+TCTATGAAAACCGAATATACAGCCTTAAGATAGAGTGTGGGCCTAAGTACCCAGAGGCACCCCTGTCTGT
+AAAATTTGTAACAAGAGTCAATATGAGCGGCGTGAGCAGTTCGAATGGAGTGGTGGATCCGAGAGCCACG
+GCAGTGCTGGCAAAGTGGCAGAACTCCCACAGCATCAAAGTCATCCTGCAGGAACTGCGGCGCCTGATGA
+TGCCAAAAGAGAACATGAAGCTGCCACAGCCACCGGAAGGACAGCGTTACAGCAATTAGTCACCAAGGCC
+CCGGCCTCCCCTCCCCATCCAACCCAAGTCTTCATTTTCCACAGTAGTGAATTTTCTAGATACATCTGTA
+GACCTCAAAGTACTGGAGAGGAAGCTCCACTCAGCCTTCATCCTGAGGCACTGTTCTGATACTAACTTCT
+CGTCCATTTGAAATACCTAAGTTGTGCTGTGTAACATCAACTGTCAAGTGTAACCGCTGTCTGCCTGGTT
+GAACGTCTGGGATCAAGAAGGTGTTGAAATCGGTTTCCTGTGGGAGCGGTGGGCACAGCTAACACAACTG
+TGAACAGACACGTCACACAATCACCTGCTGCTGGCCCTTGGCCTGAGTCTGCCTTTGCCCTCCCCGCCCT
+CTGCCACGGCTGTGTGGTGGCCCTTAGAATAGATGGGAAGGCTTCAAGTAGCAGTTGTGGGACTGACTAC
+TGCTGGGCTTGGGGCGCTTTGGCTGCACCCCTGCTTTCTTCAGTCTTAAGTGATGCCCCATCCAAGCCAT
+GGTCCCCACTCTTCCACTCCCACCCTTGGCCAAAGCTTAGATTGTAACCCTCCCCTCCCTCTGAAATTGG
+CCATGGGTGAGGAATTCGGGGCTTCCCGTGTCCCCACCTTTATCAAGGGGTACTGCTTTCCCCTCCTCAA
+CTCCCCTGTTGCCCATCACCACCCAACACTTGCTGTGGCCAGAAGCCATCAGATGAGGTTGGAAGAGCCT
+GGCCTACCTCACTTAGCTCCGGACCACACTCACCTGCCACCAGCCTGGGAAGGGAATGCCAGGTCCTTAG
+CCCTGCTGCCACCATCTTTGCACTCAGGTCTAGAGGTAAACAGAGCAGTCACAGGGCGACTCGGACCGGC
+CAGCGATCAGGGTGGTGGTACACACCTTTAATCCCAGCACTCGGGAGGCAGAGGCAAGCGGATTTCTGAG
+TTCGAGGCCAGCCTGGTCTACAAAGTGAGTTCCAGGACAGCCAGGGCTACACAGAGAAACCTTGTCTCGA
+AAAACCAAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAAAATTC
+CCGAGACTGGAGAAATGGCCCAAGTGGCAGAGAGCTTGCTAGGCATTCATAAAGCCTTGCAGTCAATCCC
+AAGAACATCATTAACAAGGCTGGTGGCGCTCACCTGTAACCTTAACACTCAGGAGGCTGGGGCTGGAACA
+GAAGAACCATCAAGGTCATCTTCAGCCTCACAGCAACTTTAAGTCCAGCCTGGGACAGAAGACCTTATTC
+AACATGGGAAGATTCCAAGTTAGTACACATCAGGAGACAGAATAAAGCAACTATAAATCTGAACAAATGA
+TTTCAAAATTGAGGATACATTTTTTCCATAGATGCATAGTTTTAACCGACTGCATCTAATAAATACAACA
+AGATATGGTTTTTAAAAATTAAGAAATGAGTGAAGTAATAATGCATACTAGCGATGTTTTCCCAAGCACT
+GTTTTCCAGAAACTGGCTGTACAAAATTACAAACCTGCCTAAAATGTACACGTTACACCAGTTGTCAGCA
+ACTACACAGATCAGACATGGGCGGCGGCACAGAGAGCATGTCTGTAACTTACTTATTCTGAGCCTAAATC
+CCCACTACACACTACACATGTGTAACTACATATACACATAGTATACTGTAACTTGGTAAAAATAAATAAA
+TAAATAATAAAAATCATTCATCCAGAGCATGGTAGAGTCACCCATGAGGAAGAAAGGAGAGGGGTACGTG
+ACCAAATGGGACAGCTGGAATCCAAGGCAGGAACTAGACCCTCTGCAGGGTGCTTTGAACACACTAGATA
+TCACAGTGCAGACATGGACACCCACTCAAGAACCCTTCAAAATCTGTCCGACCATGTTCCCTGGTCAGGA
+GGGAACCTGAGGCGCAGAAGGCTTAAGCAACTTCTTTCTTTTCACACTGCTAGTTAGTGGAATAGGGGCC
+CAGTCAGACTGGCTCTGTGGGTGTCGTTCAGTGACATTCCTGGGCTCATGTCTCCCATCTCCTTCGAAGG
+GCTTCATCCCTCTCTCTTCATATGCATAGGCATGTTCCCGGTTCACAGCTAAGCTCTTTTTAAAGCTTGT
+TTGCACACAAGTCGAACCTCCAACAGGACAAAGAGGGTCAACAGGTCAGTGCAGACTAGAAGAGATCTTA
+TCCTGTGACCGAGCATGAGAAAGGTCTGGCCAGCAGATTCAGGAAGAAAACCTCATCAGGGCTGCCATCA
+AGTCCCAAGACCCACTTGGGGAGCCTGAGATCAGTCTCGCTGAGCCGAAACTAACCTGCTCCCCAGTGGA
+CCAGTCAGAACAACAGCTGTGTATATTCCAGGCCATCTATGTCAGGACAGCGAGTGGCAAATTGTTTCTA
+AAGAGTTCAAAGATGACCAGGGATTCCTGTACCCAGCAAGACAGGTGCAGCCAGCCTCAAGCACAGATCT
+TGGCTCTCCTTCCCTGTCCCATCCTTCTTCTCCCCCCACAGCCGTGCCAGCTCTTGCACAATGAGAACTA
+GCTGAGAGAGTGTGTAACTTGATGGGTGTGGTCCAGAAAACAGACTCCAGGCTGAAATAACCGTTTCCCT
+AACATTTCCGACTCCGAAATCAATAACAAAGTAGGTTGTGTGGGTGTCGGAAAAAAAGTTAGTGCTCCAT
+GATTATCTGAGAGATGAGCATCAGACTGGGCTCCCCAACACCCAGTAGGCACCATATACACCTCAGAATC
+TGGGGTCTTCCTCCCTCCCCCTCCCCCTCCCCCTGCCCCTCCTCCTCCCCCTCTTCTCCCCCTGTCCCCC
+TCCTCCTCCCTCTCTCTTTCTCTCTCCTACCCTCAGTCTCAGGATGAAGACATTTCTATCTGCCCTACTG
+TACCTGACACCAGAGGCCCCTGGTGTGAAAATGAGAATCTGTCCCATTTTACAGAGAATCCATGTGCTAC
+AGGGGAAAGTACATGATAGCCAGGTGTCTGTTACTTAGACAAAGCAGGAGAGGAAATCGGTCCAAATGAG
+GACATATTTATTCTTCCCCATGGCCTCAGAGCTTTCAGTCCATGATCTCCTGGCTCCACGATGGGAAGAA
+CATTTACTGCAAAGAGTGAGTGCTGGAGAAAAGCTGCTCACCACATGGGGTGAGGAAGCCAAAAGATGAA
+AGAGACAGGTTGTGATATACCCAGCCACACCTACCCCAACATCTACTTCCTCCTAGTAGCCCACCCCTGG
+GTTTCCAACACCTGTCAAAAACCTTTTATTAATCCACAACTACACCACAGCATTGGTGGGTCATGGCCTC
+ATGATGTAGTCACGTCTTAAAGATCCACCTTTGAATACCATTACACTGAGAACAATGCCTTCATTCACTA
+TATAAGCCTTTGAGGGATGTCTTAGTCAAGGTTTCTTTACTTTGTTTAATTTTAATTTTATGTGCATTTG
+GTGTTTTGCCTGTATGTAGGACTTCATGAGGGTGCCAGATCTCCGAGTTACTGACAGTTGTAAAGCTGCC
+ATGTGGGTACTGGGAATTGAACCCTGGACCCTCTGGAAGAGCAGAGTCATCTCTCTTGGCCCCATCACTA
+GTCAGGGTTTCTATTGCTGTAATAAAGTGTTCTGACCAAAAGCAACTTGGGACCTCCCCCAACTAATTAA
+GAAAGTGCCATACAGGTTTGCCTATGACACAATCTTAAAGAGGCATTTTCTAAATTGTGGCTTCCTCCTC
+TCTGATAATTAGCCTGTGTTGAATTGACATAAAACTAGCCAGAACAAGGACAATTCAGATACACACTTTA
+ACATGCAGAAGGAGATGAAAAGAAACTACCACACCACAAGATGTTCCAGATATAGTCCTTGAGAAAGAAC
+ACTGGATACTTACTCAGACCCAGCCCCAACCACAGCCTGGGAAGTCAGAGAGATTTCCAGTCCATCATGG
+CAGCAGGAGCATCTCCCTTCCTTCCACCTTGTGCCTGGGAAGCACTGGCCACTCCCCAGAATCTGTAACA
+TCTTTCATGACCCAGAGAGAGTCTCCAGATACTCTGGGTTCTGGTTCTGAGGACAAAAGCAGCTGTCATT
+ATCCTTATGCACAACCTCCCTGGATGTCCTTGCAGGGCCGTGCAGGCCCCAGCTGGGCAGGCTGAGCAGC
+CATCTTCCTGAGCCTGGGCTCGGGTTATGGTAGAAGCTTCAACACATCAGCATTGGCTGGACTTCAGGTG
+AAGCTGATGTCTACAGATGTGATGGGAAACCAGATCGGAACACAGTACTGTTGGCCCGCCATGGAGGCGC
+ACATCTGTAACGCCACTCAGGTGGGTGAGCTGGGAAGATTGCAGTGAGTTTGAAGCCTATATAGTGGTCT
+CAAGACCAGTATGGGATGGAGTGAATCTCATCAACAACATGTCTAAAGAAATGGCCCAGCCATTAAGACA
+CTTGCTGCTCTTGAAGGTGACCAGAATTTGGTTCCCAGTACTCACATGGGATGACTCACAACCACCTCTA
+ACACCAGCTCCAGAAAACCTGACCACTTCTTCTTGCCTCTTAGGGTATCCATAAATATGTGCATCACACA
+CACACACACACACAAACACACACACACACACACACACACAAACACAAACCTTTAGAAACAATATTTTAAT
+CTGAAAAAGAAAAAACAGCACAATAAAAACAAAACAAAACAAAAAAGGCAAAAACAAAATCAAGACCCTG
+TCCCTGTGTGTGTTGCCACTGAAGCAAACTCTCTCCTGCTGGCTAAGCAAGCCCAAGAGGCTGTGAATAC
+CTGATTATATTCCACAATGGATAGGCTGAGAAGGCAAGGCACTGAAACTGCTCCCCATAGGTCGAGAGCT
+TTACCCAGTAGATGTCGGAAGAATGAATAAAGCATCTGGGGAGCTTTCTACAAAGCCGGGGAGTAACTGA
+GGCCAAAAGGAGGGAAGGAGAGGAATGGAGGTTCTTTTGACCCTGAAAACAGCACTAAAGCCAGTGCAGC
+AGACTGAGTACATGAGAGGGGCAGTACCGGGAGAGACTTGCAGCGTTCATAAATCAGAGTTCTGTGTCAA
+CCTCACAACCCGGAGTTCTGGTTCTCACTGATTGAGCCTGTCTAACCCAGAAATGCACAGGCAGCATGAA
+TCAGCTAGACCGGAGCCCTTGGCTGGGGGCTGAGGCCAGAGGTAAAGAGAGTTCAGGACAGGACTTTACT
+CTCTGGGAACTGGAGGTCCACTCTGGGAAGGACAAACACTGTGTTGAGATAGGAGGGCCCTGATAACAGT
+GCTCACTTGATACAGGGGTTTGCAAGGTCAGTGGCACTAATATGGGGAGAGACCTGGGTTGTAAGTGGCT
+TCAAGGGGTTCAAACAGGAAACAGGACCTGGGTTCACAAGAACCCAAGAGTCCTGACCCTGATCCAGTCT
+GGTTCATCCCAGTTCAAGGCACTGGACGATTCAGAAAGGTCTCAAAGAAAGATGGACAGCACTGAGAGAG
+GGCCCCAACTTTGGAGTCACTGACCCTTATTTATATACATGAGTGCATGCACACACACACATATGCACAC
+ACATACATGTGCATACACACATAAGATAAGCTGAGTGTTCTTAGTTAATATTTTGTCAATCTGACACAAA
+CTAGAGTCATTTGAGAAAGGGATTCTCAATTCAAAAAATGCCCTCATAAGATTTGCCTGTAGGTAAGTCT
+GCAGTGCATTTTGTGATTATTGATTGATTGGTTGATTGATGTGAGTAGGGGTGGTGTCACCCACTGGGCA
+GGTGGTCCTAGATGATGTAAAAAAGCAAGGTGAGCAAGCCAGTGAGCAGGACTCCTCCAAGCTTCTGCTG
+CAGTTCCTATTCCCAGGCTCCTTCCTTATTGCCCTTATCTGATAGGCTGGATTGTGGAAATGTACGCCAC
+GTAAGACCCACCCCCAGCCACGTTACTCTTATCTGGTTGGTGGTTTATCATGACCATAGAAGCTAAGTAA
+GACAGCGAGTGAGCTGAGAAATCCCTGTACTTCTGATTCACAAGTCTCTAGCAACTGGTGATGCTGAACT
+ATTTTTATATGCTCATTGGTCGGGAAAATGTCTACTCAAGTCTATTGTCCACATTTGAATCCAATGATTT
+GCTAATGCTGTGGTTACTTGCTTGTGAATAATAATCTCTTGGGACACACTAATTATCCTGTGCTGTACTG
+TTTCTGTTGGGGGGAGAGTGTCGTTTGGTTGGTTGTTTTATTTGAGACAGGATCTGTCAACATAGCAATG
+CTATTCAGGAACTCACTAAGTAGACCAGTTTGGTCTCGAATGCACAGAGATCCTCCTAACTCTGCCTCTG
+GAGTGCTAGGACTAAAGATGTGTACCACCAGCACCACTATGGCCAGCATTGTATTGTTGCTTCTCCATTC
+TAATTGCAGGGTGGAGCTACAGAGGAAAGAGTATGGCCCACAAAGCCTAAATACTGGGCCCTTATGGGGA
+ATGTTTGCCAACCTTGGTTCTAGGTCCATGTCAACATTATCAGAGTGCCACTCCTGACTTCACTGCATCC
+TTTCCAGCTTATCTTCTTTGTCTTCTAGTTAAAATTTAACAAACACTGACTAAAATAATGTGAACTTACC
+AGTTTCACAAACATTTCACTTCTATGAATTTTTTATCTTATGACCTTTAACTTTTAGTTCCTCATATATT
+TTAGCAAGCCTGCCTGTTCCTTAGCTGCAGTAGGCAACAGCAGGCAGGGGGCATGAACACAACACCTGTC
+AGGGCTCTCATCCAGATGTGCTACCTGATACAGGTAGGTAGCATCCAGATGTGCTACCTGATACAGGTAG
+GTGCCATCCAGATGTGCTACCTGATACAGGTAGGTGCCATCCAGATGTGCTACCTGATACAGGTAGGTAG
+CATCCAGATGTGCTACCTGGTACAGGTAGGTAGCAGGCACCGCCTGACAAAGGTCAGGCACTATGATCCA
+GACCCTGGCGTAGGAGCATGCTCAGTAGATACTTCTGCAGTGTGTCCAAGTCCTTCATGGACAACTTGAT
+ACAAAATGTCCACAGCATCTTCAACCATAGGCTGAGACACTGGGGCCCAGCAAGCCAGGCCTGCCCATTC
+CTGATCATGTTCTTCCAAGGCAGCTGCAGCTTTTTAATGAGCCTGCTTAGCTGAGAGAGCCTCTGACTCA
+GACTCCAGCACCTCTGTGTGGACACATTGCTGTAATGTCTCCGGATAAGATTTTTCTTTCCAGAGCTCTG
+AGCCCTTACAGTATAGTGTCTTAGGGCTTTACTGATGTGAACAGACACCATGACCAAGGTAACTCTTATA
+AGGACAACATTTAATTGGAGCTGGCTTACAGGTTTGGAGATTCAGTTCATTATCATCAAGGTGTGAGAGC
+ATGACAGTGTCCAGACAAGCATGATGCAGGAGGGGCTGAGATTCTACATCTTCATCTGAAGGCTGCTAGC
+AGAATACTCGCTTCCAGGCAGCTAGGATGAGGGTCTTATAGCCCACACCCACAGTGACACACCTATTCCA
+ATAGGGCCACACCTTCTAATAGTGCCACTCCCTGAGCCAAGCATATACAAACCATCACATATGAATAAGT
+CAAAACTACTCTGAAGATAATTCATTCAGATTCCAAATTTCCTAAACACCAACCAAATGCCAAGCTCTGC
+CATCAACAAAGATGGAGCAGTCCAAAGAAAGGGACCTCTTCTGTAAGAATCCCAGGTTGAGGACCAAAAA
+CCATGGACTAGGGACACCAAGGATCTTAATGACTTACAGTCCCCGCATTCCCATCACCCAGGTCCTGCTG
+TGGAGAACAAAACAGAGACAACCTAACCAAGTCAACATAGTCCTGTGAGCCACAGACAGTTAGGCAAGAC
+ACAGAACAAACGTGGTTCAGACTTTACCAGGGATCAGGCAAGACTTGGCTGCTCACTCCAATGACACGAG
+AAGCACATGGGTGGGTCTGGTCCTTCCGTCCATACTGGGGATTCTGGGGTCAGATAATTTCCTCTAAGCA
+GAGAGATGTCTGAATGCAGAACATAGACAGGGAATAGGAGGAAACTGATGAAGAAATAGGGAGCCCCAAT
+GACCTGGGAATTGGGAACTTAGGCTGAGGGAAGGGCTAGGAAGAAGAAACGGGGGAAAGATAAATGGAAA
+GAAGAGGAAACTGGACTGGGGGAAAGGACACAGTTGTGCCCAAGGGCCAGAGCCAGCAAGGGTATTCATG
+AAAAGGCCCCCAGGTCTAGGCTTTGTAGTCTTGTTTTGGGTTCCAAAGGCCATGTTTGCAAAAACTGGTC
+TGCGAAGGAAGCAGATGCCCTTTCTGTGTGTTAGCAACACACATGGAGGCTAAGGGCTTCTCTCCTGCTC
+ACACAGCTAGAAAGGCAGCAGTCTGTCCCCTGTCTACACAGGGATGGACTCTGCGGCTTCCATGTTGCCC
+TCCTAGTTCACTCTCAAAGCAAGAACTCCTTGTCACTCTTGTTCTTTCACCAGCAGGCTCCTCCTTGCTG
+GGGCAGTGAGGGTCAGGGGACTGCAAGCAGGGCACAGGGACCAGTAACATTCCAGTGTACTTTGGAAGAG
+AGCAAAGATCTCACCCAGTACCTCAGTAAATCCTGCCTGAATGACCATAAGGATGGGAGACCAGCAAACA
+GATAACTGAGTGGCCTTGGCCAGTCTCCTGAAGCAGAAAATCTAAACTCAGAAATCTAGTAGCTGAATGC
+AGACTTAATGCATCCACCCTATTCTTCCCAGCCCTGCTAGCGAAAGACATTACGCTCTGGGCCATCTTTT
+CAGTTGAGTGCCCATGAATAAGTCAAAGTTCCAAGGCTGCCTGTACCCAAGAGGCCAGGCCTGGCTGGCC
+TTGGACTGTGGCCTCCCACTCTCTCCCACCACACTGTACAGCCTGGGAATACACAGTAGAATAGAGATTG
+CTCACACGGTCTCCAGGGTGAGCAGCCTGGATCAGAAGCAGTGTGGTGGGAACTAGCTGAGAAATTACTT
+TCTCTCCCCAGATGCCTGTCCCTTACACACTCCTGACTACAGCTTTTCTCTTTGTCATAAATCAGGGACA
+AAGAAGAAGACTGTAGTGACTGCCCTCTGCCACAGGAACCAACACCACGGGAGGATGTATCCATGCTCAC
+TATCACTCGGGAGGATGCGTTATTCATCATCACTACCATCTGTCCAAGAGCTCTCGATAATTCTAGCCAC
+CCCGTAGGTTGCTGTAATGACAAACTTCATTCAGTAGATGTAAGACCTTATTGAACATAGGCCCAGTCTC
+CAATAAATAACAATTATTCTTCATTCCCATTCCAAGGGAGGCATTGTGGAAAGGGGGAAAGGAAGAAATT
+CGTCCAAAAGGAGGGGGATCCAGGAAAGATCGCCTAAAATGGAGCTATGAAATCCTTCCGGTCTGGGAAC
+GGGTTCCGACACTCAAATGCACAGTGCTGGTGAAAACCATTACAAGTGAGGAAATCCACAGAGATAGGTG
+CTCTACAGAAAGGTGAGGGCGTTGAGACTTGGTAAACCTGGAAGAACAGCTACTCCTCAGCCTCATCTCC
+CCCCCCCAACTTTACCTTATCTCCTGCAAACCATGGGTGAACCCAAAGAACTCACAGCAGAGCTTGATCT
+TCCCAAAGATGCTCAGGGATGCCCTCTAAGTCCTCTGGCTTCCATCTCTCCTGAAGCACAGTGTCCATAT
+CCTCAGACATGGTGCATCTCAGGGTGATGTTCAGGCACTAAGTTAGAGCATTACCACGATTAGACTGCTC
+TTAGGGAACAGCCCAACAGGATACAGCCCTGTCTCCTGGTCTCTGCCACTATTGTCAGGACCACTGAGCA
+TGAGACAGCTGTGAGCACATGGCTCTGAACCTCACATGAGGACTGAAATTGTCCAGGAAACAGGCCGTGC
+TCTACCTACTGGTGCGGTGAGGACCTCAATACAGATTCCAAAGAGATGAGGTGAAAAGAGGACAAAAGGA
+AACAGGGGCAGGATCAAAGGCCTGACTGACTGAGTTGCCTATGGGAACGTCTCTGAGTGTGTCAAGGACA
+CAGCTCTGGGAAGCCCTTTGGAAGGAGAGTGTCCCCTATCCCCAGTGTCTTCTTCCCATGAGTGTGCAAT
+CTGCAGACATGGCAAGACTCCAGGACAACAGAAAGACATGACTCAGGACAGAACCAGCGCTTAGACCTGG
+AGGGATCGAACTCTGTCTGTCCTATCAGGGACTAAAGTGTCACTGTGGAGAGTCTAAACTGGACCTTAGG
+CCGATGACTGGAAGTTTTCATTTGGCAAACTAAGCTTTGAGGTTACTCTGTTGGCAATTTCAAACACTGT
+GCCATGAGCAGCACTGAGAGGCTCTGGTGGCAAACAGGGCCAGAGCAGATGCAAGGGCAAGAGTGCGGGG
+CCAGCTCCAACTCACAAAGTCAGGGCTGTAGCAAACCATGGCCAACTGTATGCGGGTGTCCATAGCCTGG
+TTCTCCAAAATGTCCACACGAATCCTCTCCTCCTCTGTCTCTCCACCTGCAGCACACACAGGCCAGACTC
+ACCCTCAGCATCCCACTCCAGATAAGCCAAGAGGAATATATGGTTCTGTCCCTAACCCTGCAGCCAGCCC
+CAACCACTCTGTTAAACCTTATAGGCAGTGAGATACAGGATGGCATTGCTCTGGGTGACCTTATGTGACC
+CATCAATTAAGTAAGGCAGCTGAACAGACAAGCAGGGAGGGTCAGATGAGATAATGGGTCCCTCCATCCC
+TTCCCTGGATGAGAGCAGACACGGGGCCTGACCCTCAGTGTGCCTGCCATGGGAAGCCCTCCATGGGCAC
+AGTGTGCACTGAATGAGAGAGCTGGGACACAGCCCATCACGAAACATTGTGCAGCCAGCAGGGATGGAGC
+AGGAGAGCAGAAACACTGATCCTAAACCCTCATCCAGGCCAAGAGATGAGGTGAGATCACCGTGTCTTCC
+CCAAACCTCCCCTCCCCTGCACCTATGTTGAGAATCCAAGGCCCAACTTGAATTTTCAACCCAGCCACTG
+GCTTCAGTCATAGTCAGGAGCTGTGGACAAAATGACAAAGATTGCTCTCTGTTCACTCCCCCTTATTGAG
+GACCTTCCCAAGAAATCAAGGACAAGACTCCCTGAAGCAATAAACCCGCAGCCTCATGCACTGCTTCCCA
+TTGGAAGGCTGCAGAGATGAAGCTCACGTCACCCCTGTGGTAGGACTGTGGGCCCTACTTAACTAAGCTT
+ATACAAGTCTGCATACCCTCCAAAGTGAAATGTGGCCCAATTACAACAAAAGGAAACTCAGCCCTTCATT
+TCCATTACCCTGTCTTAGAAGACCAACTTCAAGGAGGTTTCTGGAAGGGCACATTGTAGTTCCAGCAGGA
+GCAACCTAATAACAACAGGAGGACGAGGCGTGGTCTGTTGGACATTGATGGTTAGAAAATCAACAAAAGT
+CTGACAAATAAATGTTGTAATCACCATTGCACTCGGTGTGTTATCTCTTCTTCTTGTGGCTTGGATCTGT
+GGACGAAGCAGGAGGATGGTGTGAACCACCCATGAGACGATGAAACCCTGGCAGGGTGACTGCCTCACTG
+CCTGAAGAGTCTCCACCTTACTACTAACAACCAGCCCTCTACACATGCACACACCACACACACACACACA
+CACAAACACATGACACTCTTGTGTTTGAACTCCTGATAGGCAAAGCTTTCCAGCAACTTCTGAACTAAAC
+CCATATTTTAAAATCTATTCTTAGGTACTCCCCAGTCCTGTGAAGCCCCTCATTCATCCCACATATGTTC
+CAGTGTCATGGGCCTGATATGAGTCTCCAGCAGGTGGAATAGGGCCTTTGTTTGTACAGCCTTCTTATGG
+AGGCCTGGGTAGGAGAGAGGCAGGACTTGGCTGCAGTCTCCACTTGAGCCACTATTCTACCTTTCTCCAA
+CTCCAGACATTCCACTAGCAGGCAGAAGAGAAGTGCTGTCCAATACCATTGTGGTTTATCAGAAACAGGT
+CAGGGCACTCAGGAGCCAAAGGAAGGGCTGGACTGGCTGGCTGCCCACACTGGTCTGCCAGTGGCTTCCT
+GGTTGGTTATGTCCACAGGGAAATGGCAGTTTAGTTGTAGACACTCTTGATGGCCAAAGTGGAACACTGT
+TATTATGACCCTGGCATTGTCATTTCTTCATTCATTATTAACTCCCTGCCTGGGCTCTGGGTGGCTCCTG
+GTGGCTGAGCCAGGACAAAGTAGGATACCTCAAGCCTGAGAGATTAGTGTCTCTGTGGTCTATAACCTTT
+CATAGACAGCAGAGGGCATGCAGGACTCTGCTAAGCTGGGCAACAGAGCCACATACAGGTACGACTAGTG
+GATGCAGTACAGATATTCAGGATCACTGGGCACTGGGCAGATGTAAAGCAAACCACAGTGAGATACACCT
+TCCCCAGAAAGAACTGCCATTCTCACATGGGGGCTGGTGAGACTGTGGGCAGGAGAGAACATTCGACACT
+GCAGGTAAGAGTGTCAACTGTCACAACCATTAGGGAAAACAACCTGGTGTTCCTCGAAAAGATAAAACTA
+GAGCTACCACGTGATCCATCAGTACTGGATATACATCCACAGGGAGTGAAATTACAGGAGAACACACTTT
+ATCATAGCCAAGAAATGGATGCCATCTAAGAGTTCATCAACTGATAAACAGATGAGGAAATGTGTTACTG
+TGCACCATGGAATACTATCTAGCCATAAAGAAGGAAAATCTCTAACCAAGAATGCTCGGAAAACTCAATA
+GTCACATGCAAAAGATAGAAATAAGATTCCCCCAACTCTCATCCTGTACAAAAATAAATTCCAAACGAAC
+CAAAGGCCTTAAGTGTAAGAGTTCCATAGACCTCAAATCTAAGAGCAAGACTGGGCAAGTGAGAGGGCAT
+CAAAAAGAAAGTTGCTACACAGCGAGGGAAACTCTCAGGAGAGGAAGAAGACACATTCCGAGTGGGGGGA
+AGTCTCAGCCAGCTGCTCATCGGACCAGGGATTAATATCCCAAACACGAAAAGAATACAAAAGTTCAGCC
+AAGTATGACAGCACATGCCGGCAATCCAAATACCAAGAGGCAAAGACATGAGGACCCCAAGTTTGACCCC
+AGCCTGGGCTATTATATAGTGGAACTTTGTTAACAGACAAAAAGATAAAAACCATAACAACAAAATACTA
+AACAAATAATGAATGGGAAAATGCTCTGAAAGTTGCTGCAGATAACAGCAGCTGTGTTTTTCAGATCTTT
+GGCATTTTTAGTCATCAAGGGAAAGCAAATGAAAGCTAGGCTGAGACTCCACCTCAGCTCTGTCAGAATG
+CCACTGAAAAATACAAAACAAACTGTCAAGGAAACAGTAGTTATATACTGTCTGTAACCGCACAAAATAG
+AGCAGCTCTATAAAAACTGATCCTCAGAAAACAAAACAAACAAACAAACAAAAAACAACAACAAAATGTC
+TGCCATACAACCCTGCTTCAACACGGCACACCCATGTCTACTGCAACTAGCTAGAGTAGCCAAGTTATGG
+GACCAGCCTAGGTGCAGCGCAACAGATGTGTGGCTAAATAAGATACCATGGAGTGTTATCCATAAAGGAT
+GGAGCTGGCACTATCTAAAGGAGTTGGGGAGCATCTCACTAAGTGGAGTAAGCCAGATGCAGGTGACAAA
+TGTTGCATGCTTTCTCTCATGTAAGGAAACAGATAGCCCTGTGTTGGAAAGGGATTGTGGGCTTCCCAAA
+CCCTAGGCACGGATACTGCAGGGAGAGGATGCTAAAAGCCATCCTCATGTTTGGAAGGTCCACTCCTATC
+CTTACCAGATCCTGGTGCTTTTGAACACATCTTCCTTCTCGCAGCCTGTTGGAACAGCCTTTCCTGCCTT
+CCCTAGCTGCTGAGAAGAAAAGCCTCCCGAAGCCGGTGTGTTGATAGCTCCCTCTTCCCTGTCCCTTGCC
+TTCCAGGTGTTTGTGGACCCCAGAGGGCCAGGGAGGGGACAGCAGGGCCACCATACAAAGTGGAACTGGA
+GACACTGCCCACTCAAAACATCATGAAATGACTTTTTACACTGTTAGAAACTGGATGGGAGACAGCAGAC
+ACCATCAGTTGCCTCAGAGGGTTATGTCCTGTCCCTGTGTTGAGTGTTTCTATTGAATTTGTTGCTAGTT
+TTTCTGCTACCTTCCCAACAACTATGTAATAAACCTTATAGATTCTTAAAAAAACAAAAACAAAATGAAG
+AATTACTAGAAATGAGAGAGGGACAAGTAGGGACTGGCAAGGGAATTGAGGGAGGACTGTGAGGCTATGG
+AAGGGAAACAGTAAGCTATAAGGGAAGTAAATATGAACAAAGTACAATATATGCCTATGTTAAAAGACCA
+CAGTGAAATCTACTAGTTCATATAATTAACATGAATTAATCGTTATAATAAAAAACGTTTCTAAAGAATA
+AAATCTTGAAATCTTGCAATATGCATGGATGCAATTAGAGACCATTAAGCTAAATGGCACCTGCCAGGCA
+TGGAGAGATTAAGTCCCACCTGATTTCAGTCAGAGCTAAAGCAGTTCATCTCATCCTTGGTGAGACTCCA
+GCCAGCCAGTATACTGCATGCCTACAATCCCAGCATGCAGAAGCAGAGGGAGGAGGGTCAACATACTGTG
+AGGGTAATGTGGCCCATACAAGGCTATGCCTCAGAGGAGGAAGAGAGGGGATGAGGGCAGGGAGAGGAAG
+GGGGGGTTCTGTGGGAATGGGGGTCACAAAAGCTGAGGAGAGATGCTGTTCAGTGGGTCCTAAGCTGCAG
+TTGGGAGGAAGATACTCTGGTGTGCTGCTGCACAGTAGACTATACACAATGGCCATGTGTGTATTAATCC
+CAGTAACCAGAGGAAAGGACTTCCCGTGTTTCCACTGCGAAGAACTGATATATGATATATGATATATGAT
+ATATGTTTATGTTTAGATATGCTTGGCCTGGTTTCAACATTACATAATATTTAAGTGTATCAAAAGCATT
+ACATGGCATCCTGTTCATACATATGGTTTTTATGATTTTGTGTAGGTTAAAAAAATAAACTTGTGAATCT
+GTTCTTTAAAGGGAGGAAATAGTTGGAGAGATGGCTTAGTGTTTAAGAACACATGACAGCATCCACAACT
+GGTAGCTCACAGCTGCCCGAATGGCAGTTCCAGAGGATCTCATGCCCTCTTCTGGCTTCTGTAAATACCT
+ACAGTCATATAGTCATACACACAAACACATAACCATAATTTACAAATCAGGTCATTTTTCAATGAAAAAT
+AAGCAAGGGAATGTGCTCTTCACATGATAAGGAATACACTCACATTGCTAAATTAAAGTTTCATCAAACA
+AGGCCCGTAATAGTTGTATGAGTTGTATTATTATTATTATTTACTTTTTAAAAGTGGGTATCTCCATCCA
+CTATATTACCAGTGTGTACAGGAAGCAATTTACAGCTCTTGACACATTTAATAAAGCTCAGAAACTTTCT
+GAATGGTCTCCTCTGGGGAGTGCTGCAGCATCTAGATCGGGAAGCTATGAATGCCCCACAATACACAGCA
+GAGCCCCACAGCAAAGAACCAGGCAGTCCAAATGCACTCACAACCAAGTGAGGGGACCTAGGGTTACCTG
+GCCTGTGAACCAGGAATGACTCAGAAGCCACAGCTGGAGAAGCTCAGTCTTAGAAAAGGGTGAGAGGCAG
+AGCAGAGCAGCTGCAGGCCTGGAAACAGCTGAGGGAAGGGGTAGTAGGGAGGGGCTAGCAGCAGCAGTCT
+TGCCCTGGGGAGGCCCTGGGGGCATTGGGCTGCACGTTTGCAAAGGGAGTGAGAGACTTTCTCTATTCCT
+AAGGAATAAACATTTCTTTCTTTCTTTTCTTTTTTAATTTTATGTACATTGGTGTTTTACCTCCATGTAT
+GTCTGTGAGGTGTCAGATCCCCTGGAACGGGGGTTACAGACAGTTGTGAACTGCCCTTGGTGCTGGAAAT
+TGAACCCAGATCTTCTAGAAGAGCAGCCAGTGTGTATGATTGCTGAGCCATCTCTCCAGTCCCATAACAA
+ACATTTCTTTTACAACTCCTAAGAAATCCTAGGTTGGAGAGATGGCTCAGCAGTTAAGATCACCTGCTGC
+TCTCACAGAGGACCTGGGTTTAGTTCCCAGCACTCAACATGGCACCTTGCAATCAGCCATAATTCCAGTT
+CCAGAGGATCTGATGCCCACTTCTGGCCTCTGTGGGCTTCTGCACTCATGTGGGGCACATAGTACATGCA
+GGCAAAACATTCATATTCATAAAATACAAATAAATAAACCTTGAAAAAAAAAGAAAGAACATAAACATAT
+GGCACACAGGGAAGGACAAGAATTAAATATAAAAGCACATAATGGCTACCAAGGGCCAACGACAGCAACT
+ATGGGGTCACCATAGCAGTAACTCCATTCTAATGCCTGCCCAGCCTTGCAAGCCTCCTGGATAGCCTCTT
+ACCAAAGAACTGAGCAAAATGAGCAGGCTGATTTTCCCTTTGGACTCCCTCCAAATCACTCATAATGAAC
+TGAAAGATTTACTAGAACAGAAAAAGATATCTTACCTCTAAACCTTCATCCAATCTGTTCCAAATAGAAA
+AGCTGCCCCCGCCCCGGGCCCCGCCACTGCCCCCTCCCCCACCAAATGGAAGTGCTGTAATGTTTTATGT
+TTTATTTCTTTGTAGATTTATTCTTTCGTTGTTGTAATTAATTTTTGTGTTTGTTTGCTATATATTATCA
+AGGTTCTCCACACAGAGCAAATAGATAGCAATAGATTAGATATGAATAACTTATGATTGGACATGGGTGA
+AAAGGTTATGGCGCAGGGAAGTTTCATCATCTCTGCAATTCAAAGCATGGGAATTGGTGGCATAATTCCT
+TCAAGTTGAAGGGGCAGATAGATGGAGGGCAAGGCTGTAAAGCTCTGCCAAGTTTTAGAGTAAAGGACCT
+ATAGTCCATGACAGGAGAAGATGAAATGTCCTGGTAAACCGAGAGAGAAAATTCACCCTTCTGTGTTAGA
+ATTGCCCAGGCTCTGTGAGTAGCTGGTGCCCCTCCTACTGGTGAGCTTTACCCAGGCTTTGTGAGCCCTA
+CCTACCTCCTATACTCAAATACTAAGTAAGCTTTGCTGGAAGTAAGAGAGATGACATATTGGAGCCTATG
+TGATATTTACATAAGACTAGCTCTCACCTTATGTGCTAGGTAACCTAAAACAGCACAGTTATAACATCCC
+TAAGGAAACCAAGGCATGGACTGCATCAGAGTCTTTCAACTTTCTCCAGGAAATGACTGGGCAATGGGTA
+GGAAATGAATAAGGAGCTTTAATCAGGGAGATTACCAGACATTAATTCCTCAGGTGAAAACATGCCTGAG
+CAATGATCTACAAAGTGGCCATGCAAACGTTACAACGCATTATGAAATGATGAAATTGAGCTAGGTGTGG
+TGACACACACCTTTAGTCCCCGCAGTCGGGGGGGGGGTGCACAACGGGGGTGGGGGGAGATGGGGGAAGG
+GGTCTGTGAGTCTGAGACCAGCCTGGCCTACTAGGTGAGTTCCAGGATAGCCAGGGCTTGAAAAAAAACA
+AAAAAAGAACGAAAGGAAGGAAAGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA
+GGAAGGAAATGACTGAAATTGTTGGTGCCTATGTATATGATTTACTGTTGCCAGCAAAATTTAAATTATT
+ATATCTGAAGAGCACAGATAATCCCATATAAAGTACTTTTATTTAGTTTTACCTTGAAGTATTTACTCAG
+TGATTTATTCAAATAAAAGGACTAAAGCCAGGGGAAACGGTTGAGAGATTCTGGTTAGAAGCTTTTAAAT
+TAATGAAATCCACACATCTAATAACACTGAGATGACGGACAGAGTGTACGGAGACTACACTCACCTGCAC
+AAAGAAGTAGGTATCCACAAAAATTAGAGGGGAGACTGGAGTGAGTAAAACACCTGCCTACAAACTAAAC
+TGTTCATTTTGGTATGTCGAGAGGGCTCACGATGCTGTTAGAAACCTTGGAATCTCCTAATGTTCACTCT
+AGGTGTAATTTTAAATCACTTCAGGAGAATATCATGCAGATGACAGACTGGATCTGACAAAATTCACCAA
+CTCCTGTCCCCAATTGGATGTGAGCCATTCTGCAGGGAATAATCTAGGATTATGCAGGATTTAGACACTG
+TTCTGCCAACTCATAGTGTTTGTCATGATTCCATGGGCTTAGTGACTGGCTGTTCACTTTGGGGAAATGC
+CTATGCAAGTCAAGTCTTAAGTCATTTTTGGAGATTGATTGATTGATTGATTGATTTATGTATTTGGTTT
+TCCAAGACAAAGTTTCTCTGTATAGCCCTGACTGTCTGGAATTCACTTTGTAGACCAGGCTAGCCTCGAA
+CTCAGAAATCCGCCTGCCTCTGCCTCCTGAGTTCTGGGATTAAGTGTGTGCCACCATGCCCGGCTGGAGA
+TGTATTTCTGGCTATAGTTTTTGAACAAAATCTTTACTAAATTAGCAACCGAACACACAGAGACACACCT
+TCACACAATGTACCCCAACAGGTAAGACATAAAAATATTGAAATTAGCCTTATTTTAATTTTTAATTATT
+TTTTATTTTTCAGATTATAATATACTTGCATCATTTTCTCCTTCCCTTTCCTCCCTCCAGACCCTTCCAT
+ATACCTTTTTTCATTAATTGTTGCACATATAAATTCTATGTTGCATATATATGTGTATATATATATATAT
+ATGTATATATATTATATATTCCAAATACACAAATACAACTTAGTCTACATGTCAGTTGTATGTATGTTTT
+CAAGGCTGATCATTGGTATTGGGTAACTAGAGAGTTCTTCCCTGGAGAAGACTATGTCTGCCGCTCTCAG
+CATTCCTTAGTCACCTGTAGTTCTTTGTCTAGAGTTGGGGCCCCATGAGCTTTCCCACCGCCCACTTTAT
+GTCTCTGTTGGTGTTGTCCTTGTTCAGTTCATGTTTAGGCAGCCAGGATGGTGAGACTTCATGAGACTTC
+TGACATTCCTACAGACACAGTCTCACAGCAAACTCCTTGGTCCTTTGGCCCTTAGCAGTCTTTCACCCAC
+TCTTCTGGAGTGATCGCTGAGCCTTAGGTAGGAGTTGAGCTGAAGATGCTATCAGCTGGGACTAGGCTCC
+ATAGCTATGCCATTTGGTTGGTTATGGTTTTCTGTAATGCTCTCCATCTACACTGATCTGTAGCTACAAG
+GACAAGTTGTTGGAATATATTTCAGGATTAGGCTGGTTTAGCAAAGTGGTAGCTGTATGCTCTCCTCTAA
+GATCTATGGCTTCACTAGCCCTGGGCAGTTGGCTAGGTGCCCTATTCCTGGCAGGTTTTTCCTCTTGTTC
+AGCAAGCCTTAAGTCCAACTCAGGAGCTTGGTCACCATCAAAGTAAGGGTTATTGTGACATGCTGGTCAT
+TGCTGTGGTTCATAGGCATGATAGTTGAGAAGGGTTATAGGTTGCCTCCCTCCTTTGGCACCATGAATGT
+TAGTCCCCCAGGAGAAGGGTTTCAAGTCAGATCCAGCTTAGGAGCCTCTGGGCCCTGTCTGAAGCACACA
+GTGTCTTCAGCAATACAGGCTTGACTCCCACTTCTCGGAGGTAGCAATAGCCTGGAACGTTCTGGGAGTC
+TTTTGGACAACCCTGACCAACAACTCAAAAGAGGATTGTTCATGATTGGTGTGGTTTTTGTTCAATGGCC
+TTTGACTCTTGGTGGCAGCATCGTTTACCCAAAGCAACTGTATCTTAAATCTGAGTGGGAGAGGCTCAGA
+GAGGGTGTGGAGAAGCACCAGGCCCAGTACAGATCCTGACAAAGGCTTTGATTTGGGGATCTCAAGACAA
+AGGTGGCCATCTGAAGTATCAAAGTGGAACACATACAAAAGAATCCTAGTCAGAGTGACTGGATACTTCT
+CAGCAGAAACACTGCATTTCTGAAAGGGGCGAGGCCATCTGAAATGGAAAGTTCACTCTACAGTTCTATA
+CCTATCCAAAGTATTGCTGACATCAATGGTAGGAAAAGACATCTGAAATTACGAAAGCATGAAGAAATTT
+ATTTCACGGGGGCTCACAAAGAGGAGGGATTAACCAAGATGGCCCCCATCCCTGGTGACCAAGACGGGAT
+ACTCTGAAGACATCAGAGACAGGATCCTGGAGACATATAGATATAAGGCAGCCATAGCACAGTGGGCAGA
+GAGGATCCTGGCTTTCTGCTGGGGACTCCTATCAGCCTGGATCCTCACCACTGAGACCTGCATGACAAAT
+CTCAGTGCTGCTAGCATAAAGGAGATCTAAGCTCCCTTTGAGCTCCACAGAGCCTTCTGCAGCTGCCTCC
+TGAGAAACAAACAGGTGGGGCTAGGAAGGGCCAAACCCCTGCCTATCCAACTAAAATCCCTGTGAACTCA
+GCCTCTGCCCGGGTCACTAGGCAGCCACTGGAGATAGTAGTAAAAGCTGGTAGTGCGGCAGCCAGAGCCA
+GTGTGGTTCAGGCTTTACAGGAGGGACCCAAAAGAGCTGTGCAGAGACCAAATTCACACACCAGCCGTCT
+CTGGTCTCCAGACAAGCATGGGGAGCCCCAGGCTGAGTGCTGATGGAGAGTCCCAACTCTTCACAACAGC
+ACAGCTGCTGGCAGGCAAGACCATCAAGTGGGCGCGGGACACAGTCATGCTAGAAGGGCATTTTGCTAAA
+GGAAAACTAGGAGCCTAGGCTTTAGAGGGTTTGAAAGGGCAGGGACTGGATGAAAGAAAAGAGGACCTTT
+GACACTCTGGGGTCAGGAAGGGGATGGACAGAAAGCAGCACTAGGGCTACAGGAAGTTCAACTGTTCCCA
+GAACCCCCGGTTCCCACTTCCTGCCCAGTCAAGGCTCTACTTATTGCCCCAAGTGGCCACCTTTGTATAT
+AGGGGTGTTCGGAGGAAGCGGCTGGTCTTCATGTAAGCAGAGATCCTCTTCAGTACCTGAGAGTTAAAGA
+AAAACAGAAAACCAAACACTAAGATTTGCTCAGGGCCAGAGAGGTTAAACAGCTTCCTCCTGCCCACACT
+GCTATTACATGGCACCCATCTGGGATTAGCCTGGCCTCTGTCTCTCAGATTTTGTACTCTCTTGGCCCAC
+TCCCTTCCTGACCTCCCCATGCTCCTCTTGCACACATCTTGCCTGGGTTTGCACACTGACTATGAATCTG
+TCTAGCAGGACAAAGAGGCATGTTTTGAAGATAACCCCTATGTCATCCAGTGACTCTGCAAAACCAGGGT
+CAAAGTTTATAGATACAGATAGATCAGAAAAGAAAAGCCAGGCTCACTGTAATCATTCCCTAAGGAGAAA
+GCCACACTTCTGAACCTGGTACTGAGCTTCATCCAATCCCGTCTCCAGCCTCGATACAGGCATGGACAAG
+GTGCACTGGCATTTACATACGCTGTTGGCCTTCTAGCAGGAGCGTGAGCCCCTAGAATATCTCTAAAGAC
+ATCAATGAGGTCCAGAGCTGCCTGTATCCAAGGGAAGTCAAGGGAGACCCTGGCACAGCTCTGCTAGTGC
+TGAAGCCCTGAGCAAGTATAACTCCACAGTTTCACATTTGCTTAGCCAAGCAATGGGACTGATAGTCCCC
+TCCACCACGGAGGAACGCTGATCACTATAAGCCTGATCAACAGATGTAGGGTGCTCGGCACTGACACTGG
+CAGAAAGCAATTGCTCAATTCCTGATTCTTATCCTCAGCCCAAGAAAACATCAAGGCAAAGTTGACAGCC
+AAGGGGAAGAAGGACCAGGGCATCACCTCAAAGCGGGCCACAAAGTCCTTCAGGTTTGGGAAGGCGTCCA
+GGCACGTGGGTTCGAATATAAGGTGCAGGTCCAGGATATCGTAAGCCAGGAAATCTACAAAAGTAATCTG
+CAAAGGGCCACGAATGAGAGATGGCATGGAATTCTGTCCCCGAGAAGAATGACAATGCTCAACTTTTCCC
+CCTTTACCTTTTCACCAACAAACCATGTCCGCTGGCCCAGGAACTGTGAGAAGAGCTTCACCATTCCAGG
+GAGCTGCTCCAAGTATTCCACCTTCAGTTTCTCCTGAGGCAGAAAACAGCTGTCACCATGTAAGTTCCTG
+AAGAAGGACTGGCCCTTCAGTATCATGCACGGCCCTAAATAGCCAGAGGTGGGCAGCCGTCTCCACTAAG
+ATGGAGACTCAGTCACAGTCCAGGTCTCGATTTAACATATGAGCACTTACTTACTAGACGCCTCGGTAGC
+CCACCCCACCCTTCAAATGCAGTGGAAGACCAGGGAAGAATAAGACTGCCCACAGTGGGGCATCATCATT
+CTTGATGCCTACATGGCTGGGTACCCTGCGCACCTGGCAGGTACCAATGCCAGGCAGCATCAAAGAATCA
+AGAACTAAAATACTCAGAGGAGATGAGAGTCCACACAATGGGTCCAGAAGAATGAAATATACTTGAGTTT
+TCCCCATAAATTGAGGAGAATCTGAAGCAAAAAAGGAGAGAAAATCCTTCAGGCTGACCCTGTGCTACCT
+AAGGAATATGCTGACTCCAGGGGGTGTGAACAGAAGCACTCTGAGAGGCTGGAAAGGGTTCTGTGACAGT
+CCCTCATCTTGGACTTCATTTTCTAATCCATGGGATATTCCTGTATTTCGTGACCAGCAAAGATATGAAC
+TAGCTGGCAGAGGACACAGGTAGAAAACACTTGGAAAACACAACTCCACTATCTAGGAACTTAATGTCCA
+CTCTGGGAAGTCTGGACTGCACCCTAAAATAGGTGGGGCCTAAGTAAGAGCATTCATTTGATAGTTCAGT
+GTCATGGGGACAGCTGATGACAACACATGAAACATCCTGGAGGGTAAGAGGCTAAGAAATTCAAAAGGTA
+AATAAGATCTGTGCAGATTGGGAACCTGAGGGAGCTGACTCCTGACCCAAACGCTGCCTGGTCCAGCCCA
+GGGAGGGAGTGAGATGAGACTCACAAAGTCTGGGCTGTAACAGACTCGAGCCAGCTGATTGGAGACATCC
+ATAGCCTGGTTCTCCAAAATGTCCACGCGAATCTTCTCTTCCTCTGTCTCCCCACCTGCAGCACACACAG
+CACAGACGCATCTTCAACACCTCACACCAGCCAAGCCAAGGGGGTTGTCCCCCACCCCACCCTGCTGACG
+CCCCACTCACACAGGTTGTGCTTGCGGGCAATGTAGCGCAGGATGGCATTGCTCTGCGTGATCTTGTGTG
+ACCCATCAATCAAGTAAGGCAACTGCTTGGACAATAAAGGATGGTCAGACAGGATGTGAGGTCCCAGCCT
+TCCTTCTCTGGATTAGGACTGACATGTGGCCTGCCGTTCATCATGGCTAGCATGGGAAGCCCTCTACGAA
+CACACTGCACACTGAATGGAAAAGCTAGGACACAGCCCATCTCAGAAACGCCACCAGGAACGGAAGCAAG
+TGAACAGAAGGAAGCATTGACTCCAAGCCCCTCTTCCAGGTGAGGAGGTGAGATGACTTCCCCAAACCTC
+CCCGTCCCCAAACCTCCCCGTCCCCCTCTACCTACATTGGGAAAGTCCAGGCCCAATTTGAACTTCTCAC
+TCAGCCACTGGCTTCGGTCATAGTCAGGAGCTGTGATTGATGAGATTGCATAAAGAGAAATCAGCCCAGT
+GTCCAACACACCTATTTCAGGGACCCAACCCCCTAAGACAAGGATCTGTAACCTCACACAGTGAGCCTTT
+ATCCCTGAAAGTTTCAGGAATAAAGAGTGCTTGGAACTGAGGGGCCTACAAAGTTTAGGCAAATCTTGAT
+GAGGAATGTGATCTAATTTCAACAGGAGGGGTATCCACCCCTTCACTTCCAGGAGGAAGGAAGGTCAAGC
+CCAGCTCTCTGGGGCTTACTGAAAGAGGCAGATTCTAAGTCCATCTGGGGGAGCCCCGTAGTGCGGGAGG
+CGTGAAATGTAATGTAGGCACCAATTGCTAGGCGCCAATGGACAGGAGACGTCTGACCGACAAATAGCAT
+TACCGTCTCCCATGGTGTATCTCTTCTCTTCATAGCTTGAGCCTGTGTATTCTAGGAGCAGCCGAATAGC
+GTGAGCTAGCTGAAAAGATAGGAATGGGGGCGGGGGACTTCTGTGTGACTTCACCTCCTCCAAGTGCATA
+TACACGGGCGCACAAACCAGCAAGGACATGTGTGCAAGACCCTGAGCCAGACAAGCAAGGGATTCCTCTC
+GCCCCACCCCATCCTCACCCCCCAGCTCGCCTTCTCCTAACCCTGCAATCCTTCCCACTCATCAGGTGGT
+TGCTCACCCCACGGATGTCCCAGTAACCCAGTGTCATAGGCATGATGCTGGTGCCTGAGACTGAGTCCAC
+CGGCCTCCGGTGTGAGCAACTTTGACTTTATCCTAGTGCCCTCGGGTCCCTGGGCGGGGCTGGTGCCCGG
+CGTGATCCTGCCCCTCCAGACTCTGCCCCGCAACACACACCCTGCTCTACAGCTTTTCCCCTGACAGTCA
+CCTACAGAAGCCAAATTACTGAGCTCCGCCCCCAACAGACCGGAAAGCGTAATAAGCGATCCCGCCTGCA
+GTTCCGCCCAATCGTCCAGCCCGGTGCGCAGATGCAGAGCTCTAGTAACTCCCTGCCTTTGCCGTACCGC
+TCTGCCTTCTTACCCTGTTGCCTTCGGTCTGTGGGAGGGTCAGGCCACTAGGAGAGGCCCCAACCCGGAA
+GTGCTGATTAAGCACTTCAACCCTGCTGTCAAACAAGACAGACTTCTGCGGGGCTCTGCTGATGACAGTG
+GTCCCCAGGTGATGTTTTTCCGTCTGGCAGAGAACATGTTGTCTTTCTGTTCAGAAGTTCCTCATCTGAA
+TTCAAAGAGATTATTTCTATCACAGATAAACCCCTGACTCAAAAAAAGGGTGGAAAATGGAAAATTTGAA
+GTTTCATACATGGTCAGGATGTCTGTATTACCCAAGGACAGAGGCAGCTTTCTCTATTGAGTGACCGTCT
+TAGTGTGTACTTACACAGACCTAAGACTTCTGAAATGTCCGAGGGTACACAGGGTCAGCATGTGACAACT
+CAGGATTAGTTTATAGCAAATGCAATCAAGAGAGAGATGTCTGAGTTCTCAATAGCTGCTGTTCATAGCA
+AGGCACACTGTTTCTTCACACAACATTGGTTGTCGTATGTGTCCAAAGACTGTAGTCTGCGGAGGGGGGC
+TTAGGGAAGTGGGCTCTCATTTTGTACAACAGACATCACACACTCACTGCATTTTGTTATTACATTAAGA
+CTTCTACAAGATCACCAAATAAGAATTTTAAGCTCCATTATAAGTCTTCCTGAGTATATAGGGTCCATCT
+TTAACTAAAATGTGCTAAAGTGGCAGGTGACCATACTTACAATTCTTACTAATGTTAAGAATGTACTGTG
+GCAATGCATTGTGACAAAAATAATTCTCACATTCTAAGAGACTGGGACTTTCACTGGACCTCACAGCAAG
+AGTTAATAATACATCTTGAAGTCCCTTAAGCTTGCTATTCTACTATGTACTCTGTCTGTCTGTTTAGCCT
+TCCTTTAAGAGAATGATGTAAGAAGAAATACTTGTCTTGGATTTTCCATGTAATAAACTTCTATGACCTA
+AGATAACACGCAGCTGTAATCTGTTTTTTGGGTTTTTTGGTTGATGATGGTGGTGTTTTGAAGGTTTGGG
+GGGGTGGGGGGGGCAAGTTTTTGTTTGGTTTTTATTGTTGTTTCTCAGAGACAAGGATGTGTCACTCTCT
+GCATATCCCCAGATGTCCTAGAACTCACTGTATAGACCAGGCTGGCCTTGAATTCACGGAGATCCACCTG
+TCTCTGCCTCTGTGTGCTGAAATTAAAGGCGTGTGTCATCACACATGGACCTGTAATCCTTAATTGTGTA
+CTTGTCTTAGTGACTTTTCTACTGCTGTGGCAAAGCATGTGTTTTGCATGTTGGTTTATTGTGTTGCTTA
+ACTTGCCATTTCCCTTGCTTGTCTTCTGACTTCCCTACTCATTATCAAGCATAGAACTATTTCTACATAT
+CTCTCAATGTGTTTGTCTCTTTGATTTTTGGTGTTTGAGATTAGGGGTCTCACTATGTAGCCCAGGCTGC
+CCTGTAACTCACAGAGTTCCTTCTGCCAATTTTTGCTGTATGTATTTTTGTCCCAGTCCCACTATGGAAG
+TGATCATTTATAAAAAGGAATCCCGTAACCCTCCTGGGAGTTCTCCCAAATTATCTGTGCTTGCCTCGTT
+TTGAAAGGGTTATCTAGTTATCCTTGAATAAGCGTGATTTCTTGGCAGATAGTTTTAAATATCTGGAACA
+CTCTAAGGACTAGTTGGAATCAGAGTAAGTTAGTGTACAGGAGAAGGTGTCAGCCTCATGTCACTAGAGA
+CATGGTGGAACTCATATGGCTCCAGACGTGTGAGCATCCTTTGTGTCTCAAGATGAACAAAGTTCTGGGC
+GTTGGGTTCTTGTTTTTTGTTTCATCTTGTTGCTTAAAACAACAACAACTTATTTTTGACTATTTAATAC
+CTCCGTACAAGATAGACTTATTGAATAGAATTTAAACAGTTCCTGCTAGACAGGTTTAGATAGCCCTGAA
+GAAAGCAAGAAGCTGTGGCAGCCTGAGTAACCCCAAAATAAATTATAGTTGATTGGCCTTGCTGATAGGC
+TTGTAGTGACCTCAGGAGGACCCAAAAGTTTTGCAAGACTGGAGACTGGAATCCAGACCATGAGCAAAGG
+TAACTACCAGAGGGGCCTCTTAAAGGGAAGTTCCCTTCTGACTTTTTTTTTTTTAAAGCAGCTGTGAAAT
+TTTTGTTGTCAGAGTCATGTGAGGGTCATAAGTTAGCAACCTTCAGATCCAGAGCTGCAGCGGTGATCTG
+GGAGAAGGGCAGGGAAGTATTATAAACTCTTAAGTGACAGGTAGAGAGTACCTGGCATCAGACATAGAAG
+GATCAAGATGGGAGAAATAACGCTTTTGACTTGAGAGCTTCAAAGTGGAAAGAAGAGAAGGACTAATGGG
+CAGGAAAGGGCTCTGGATCTGAATGTTAATCTTTACCAGTGTCTGACATGGAAGCTGTCTACCTGTGACT
+CAGCATCTTCTAGAGGACTCCCAGGGATCTCATTTTAAGATTTCTATGGTTGGCTGGCAAGATGACTCAG
+CAATTAAGAACACTTAGTGCTCTTGCAGAGGACCTGGGTTTGGTTTCCATCACCTATATCAGGTAGCTCA
+CTACCTCCTGACCCACACCCCTTCTGGATTCCATGAGCACTTGCACACATGCAATACATACATACATACA
+TACATACATACATGGAGAATAATTAATAATTTTTTAAAATTTTCTACTGAGTGGTGGAGGCATGCATGTT
+TAATCCCAGAACTCAGAAGGCAGAGGCAATCAGATTTGTATGAGTTCGAGGCCAGCCTACCTACAGAATG
+AGTTCCAAGACAGCCAGAGCTACACAAAGAAACCCTGTCTCAAAAGAAACAAAAACAAACAAAACCATTT
+CCGATAGAATGGGTGCTCAGTGCTTTGGAGTGAATATGTACTCTTCAAGCTGATCAATACTAACCTTTGG
+ATAAATATTTTAGAGTTTCACAAAACCCTTCTTATGTTGACTAAAGGAAAATTCTTCTCAGGCCTTTAAA
+AATAGTCTCATGCTATAGACAATAGCTAGGGAGTTTGACATTGAAGCTAGTACCTCTTGATAGCATTGTC
+GGTGCTAAGTGAATCTCTTACACTCCTTGATTGAAGTTCATCACAATTACATCGCGCCTAAGATTGGAGG
+TGAAACTTTCAAACACATGGGCCAGCATCTTATTAACTCCTCAGGAGGTCATCCCCAGGCCTGGAAGGGA
+CTGATCTGGTATTCTTTAAAGCCACTGGTAAAGTGTTAAGCTAAGGGAGAAAGGGGGCTTGGTTTGTTTA
+GTTTTACAATTTTAGTAGCTCTAGTTTTCCCCTGAAAATGAATATATACATATATATATATAGCACAGTT
+AAAGCAATACAAGAAAGGAAAAGCTTCAATTTGCCCTGCGTGGAGAAGGGAGCTGGGAAGTCATCAGAGC
+ATATTCCACTGTGGGAGGCAGTCCATCCATTTAGAGGTTGGCAAGGTCTCCAAGATCTTGGCTCTGTTCC
+ACATCGCACACTACACTGATTAGCAGCACACAGCTGGCAGGAAGACATAGTTTTCAAACCTCTTTGGTTA
+CATCTGTGAAGTCACAGCACCGATGTTGGTTTATAATTGCTGCTGATTTATGTGCCATGAGACACTTTTG
+CTCAGCTACCTTTGAAATTACCTTTATAATATGAGCAGACATCCTGACCAGATAATGAGCACTCAAAGCT
+TCCTGGTTTTCTGCCTTTTTTTTTTTTTTTGAGTCACTTATTTGTAATAATTAGAAGCAATCTCTTCAAG
+TTCCTCCAAAGCCTGTCTTTTCCAGGTGATTAGAGTCTTGGTTACTGTTGCTGTGTTGAGCACGCTGCTT
+GCCTAAAGCTCTTCTCTGAAGTTCTAGATCCCATGCTCAACAGATATTTATTTAAATTGTATTAAAGCTA
+AAAGTTTAATTATTGTTTTATAGGTTTGGGGTTTCTTTTTGGAAGGGGTTGTTTGGTTTGGGGGGGTGTT
+TGTTATTGTTTTGACTGTGAGTTTTGTTTTAGTCTTGGGTTTGTTTTGTTGAAACAGCATCTCACACTAC
+AGCCCAGACCGTGCTGGAACTCACTATTATGCCCAAGCTGATTTCACACCCCCCCCCCGGGCAATATTTC
+TGCCTCATGCTCCTGAGAGCTGGGGCACAAGGTGCAAGCACACCACAGGGGGCTTCAGTGTTTCATTTGG
+ATCTGTTACTGCAGTTTTATAATTCTTGCCAAAGCCAGAAACCTCTACATTTCCAATGCATTCAATGTTG
+ACAGAACCTCACAATGATTTTGGATACCAGAAGTGATTTCATTCCAGAGTGGTCTATATTTTAAGGGTGA
+ATTTAGGTCTACGCTAACTTATTTAAAGATTTTTAGAAATATGTGTCTGTGAGCACAACTGCAAGTGGCC
+CAGGAGGCCAGAAGCATCAGATCTTCTCTGTAGCTGGAGTTACAGGAGGTTGGGAGCCACCCAGTGTGGG
+TGCTGGGACCCAAACTCAGGTCCTCTGCAAGAGCAGTGCATGCTCTTAACCCCTAAGCCATCTCTCCAGC
+CTCCGTCATCACATTTTTTAGAAAGATTTATCAGAAGAGGGCATCAGGTCCCATTACAGATGGTCGTGAG
+TCACCATGCAGTTGGTGGGAATTGAACTCAGGACCTCTGAAAAGAGCAGTCAGTGCTCTTAACCACAGAG
+CCATCTCTCCAGCCTCTGTCACCACATTTAAAAACATGAGAAAATAAAACCAGGTCAGGACTCTCCAAGC
+GACTTTATAACAAAAACAGTTAAGAATAAAGGAAGAGCATGGAGAGTTAACAAAACAATGCCGGGAGAAA
+AAGCTCTAGGAGCCTGGGTGCACAATAACTAAGTCACTTAGCATGCAAAATCCGCATCCTAAATCCACCC
+CTGTAGCATGCTATGTGCACTGGCTGGTTCTGTGTATCCGCTTGACACAAGCTGGAGCTATCACAGAGAA
+AGGAGCCTCCCTTGAAGAAATGCCTCCATGAGATCCAGCTGTAAGCGTTTTCTCAATTAGTGATCAAGGG
+TGGAAGGGCCCAGCCCATTGTGGGTGGTGCCATCCCTAGGCTAAGCAAGCTGAGCAAGCCAAGGGAAGCA
+AGCCAGTAAGTAGCATCCCTCCATGGCCTCTGCATCAGCTCCTGCTTCCTGACCTGCTTGAGTTCCAGTC
+CTGACTTCCTTTGGTGATGAACAGCAATATGAAAGTGAAAGCTGAATAAACCCTTTCCTCCCCAACTTGC
+TTCTTGGTCATGATGTTTGTGCAGGGATAGAAACCCTGACCATGACACTATATTTGTGACAGATTGCAGT
+CAATTTTCACACATAAACTAAAAACATACATGATATATTAAAGCTTACTTTCTTTGTATTTGAGACAGGG
+TCTCTGTAGGTAGCCCAGGCTGCCTCACAGTCTCCCTCTTTGTGCCTCTGCCTCTCAAGTGCTAGAATTA
+TACAATGTGCCACCATGCCCTACTAAAAGCTTTGGATAAGTTTTTAAGTACACAGTGTAGTTCTGCAAGC
+ATATTATCCTATAGCGGACACATAGAACATATTTCCCTTGAGTCCCTGAGGTGATGAACCCATTCTTTGC
+AGTTGGCATTTTCTGCTCCAGTAGCCGCAGGCAGACTCCATTCTCATACTACTCTGACTGTACAAGTTCC
+CGTTTGTTAAACATCTCCTCTACGCAATTATGCAATATATGTTTCCCTATGAATAGTTCATTTCACTTCC
+CTGGTTTATCCATGTTGTTGCATGTTGCAGAATTTCTCTCTTCATCAAGGCTACGTAATAGACCACATTT
+TCTCCATCCATTAACCCACTGATGAGCATTTAAGTGGTTTGCATGCCTTGACCATGGTTTTGTGCAGCAA
+TAAAAAGAAGAATAGCAGTATCTCTTTGAGATCTTGATTTCAATTGGGAGCAGCTTATATCACTGGACAT
+GGAAATATTGGAGTCATGTGATAGTTTCTTTTTTTTTTTTTAAACTACTGCCATACTGTTTTTCATAGGG
+TTTTTGACACTTTCTATTCTGCTAACAGTATATGAGCATTCTAGTTTCTGCACTTGTTATGGGGAACTGG
+GGCAGGTAGCTACTCTGGTCAGGTGTAAAATGATAGTTAATTGTGGGTTTGATGTTGAGAGTCTTTTAAC
+ACATCTGTTAAACGTTTTCATGTCTTCTTTGGAGAAATGTCTAATTCTGGTCCTTTGCAAAGATGGTTCA
+ACAAATGCAAATCAATCCATTGGACTTGCCATGTTAACAGAAGGAAAGACAAAAACCACATATCATTTAA
+ATAGATACAGAAATGTCATCTTGTGGAATTCAACCACTCTCATGATTAAATAACTCAAAAAATAAATACA
+GAAGTTTTTGATTATGTTTATACCCACATGTCAACTAGACACGTCCACATAATAAAAAGCTTACATCATA
+ATTGGCTGTAGAAACTGGGAATTTTTCTTCTCAAAGAGCATGCATATCTGTGTTCCATGTGCTTGATGCC
+CAAGAAGGTCAAGAGGGGGCACTGGATGCCCTGGAGTTACAGGCAGTTCTAAGGTGCCATGTGGGTGCTA
+GAAATCCAACCTGGCTCCCCTGCAAGAGCAGCCACTGCTCTTAACCACTAACCCATTCCTGCAGCCCCTT
+ACTCAAAGACCACAAACAAAACAAACAATGTTAGTTAGCTCTCACAGTTCCTGCTCAACGCAGCACCAGA
+GCTGTGGCACAGCAATCAGGCAAGCAAAAGAGATAATGACACCCACAGGGCAAAGAAAAGAAAATGTACC
+TTGATTTACAAATTTACTCATAGAAAACCTCGGTGTATGCTTCAGTGGTAGAGCACCTTCCTAGCACCTG
+CAAAGCCCAGAGTTCAATCCCTAGTACCAATAAAAACAAATAATTAAGGACGGGAGAAGTTATTCACTGG
+TTAAGAGCACATACTAGTCTTACATATAACCCAAGTTTGGTTGCCAGCACCTATACCGGATGGCTCACAA
+GTGCTTACAATCCCAGCCCTACAAGATCCAATACCCTCTCCTGGCTTCCTGCACTCACATACATATAACC
+ACAGACAAGTGCACATAATTTTTAAATTAGATACTATTTTGAACCAGTTAAAAGCACTGCCCTTCCCAAG
+AACCCGAGTTCAATTCCCAGCACCCACATCAAGTGACTCACAACCATTTTCTTCTTTCTTTTCAATGTGG
+CTGCCTTTAAAGTCTGGCCCCAGAGGGATCTGACACCTCTGGCCTCTTAGATACCTGCACTCATGTGCAG
+ACACCCACACAGAGACACAAAGGCATATAATTAACTGTAAGTACCATTTGTGAAAGAGCTAACCTTCCTC
+ATTCCCCGTAAAAAAACGAAGCTGAAACCTTCGACCATCAATAAAAGTCAAGTCACACCGGGTTTGAGGT
+TGAAATTTGAGACCTGATTCTATAAAACCCTTAGAAGAAAACAATGGTCTAAGCAAGACTTCTTGGAGAT
+GGTACCAGACACACAGACAGCAAAAGTAAAAATAGAGGCCAAGAGGGTGACACCAAGCTACAAAACTTCT
+ACGAGGTACAGGAAGCGACCAACAGAGTAGGGAAAACATCTGTGAATCTCATACCCAATGAGAGGTTGAT
+ACCGCAAATAAACCAAGAACACCCACCACTCAGCAGAAAAACATGCTTTAAAGCCTTGAACTATATACAT
+TTAAAATATAATTGATTTATTGGCCATGCTAAATAAAGTAAGTTTTAAAAATCATCAGTTAGTGATAAGT
+TTTCTAAACAAGCATAAAACCATTCCCCATTCTGCACACATAACTGGGTCTCTCTGTAATTACTGCAATC
+AGTGCAATTATAACTGTCCAGCCTATTTATTTTCTTTAATTAGAGTAACCAATCTTTTTTTCATTTTCTC
+TAGACACAAAATAATCCAAGAGGAAAAAAAAAGTTTTCTAAACTTTTGAGCTATTTTTAAATTTTTTAGC
+TTATTTTGGAGACAGTGTCTTAGTTATTCCAAGCTGCCCTCAAACTTCTTATGTAGCCTGGATGACCTTG
+AACTCCTGACCCTTCAGACCGAATCTCCTGAATGCTGGGCTCACAGTACTGCACCACAACACTTAGTTTA
+TGTGGTAGTGGGAATCAAACCCAGGGCATGCAGGGCAAACACTCTACCAACTGAACTACATGTCCAACCC
+CTTTAAATTTATTTTTAACTAATACATAAAAATTACACATATAATAGGTATCACATTATTTTTGATAAGC
+TTATACATCATGTACTGTTTAAATCATGTTAAATATATTTCACACTTAATTATTTCTTTATAGTTAAAAA
+CATCCGGACCCTTTCCTTCCAGCTTTCTGAAGAACACATGGCATTATTTGTAGTCATCTTCCTGTCCCAT
+AACACACCAAGGGTTTATTTTAACCATAACTTGGTACTTGAGCGATCTTTCTACAAGCATGTGTCTCACG
+TTACTCAAACCTTCTGTAAAGCTAAAACAAACACACATGTGTTAGCATCTCTAAAAGTTACTGCATTTTA
+AGAAATAAGGACAGAGTAAAATTTATTAAAATTTTAAAAACAAACTGACAACAACAACAAACAATGGGTT
+TGGCTTTCCCCACCTCCCCAATTCCTAAAACCTAGTTTCATTTGTCTGGAATCAGGAGGCCCTGTCCAGT
+TCTGAATGGTGATGGCTTTGCCTTTGACTGACTCACACACAGGCAAGTCACAGACAGCCATTCATTTGAC
+TTGCCCCTCTAGCTCTTCCCACTGAAACTGCCCCTTCTGACACATGTTTCTCCCCAGAGACAGCAAATTC
+TTGTGTCTACCTAATAACTTGTAATCCTAAAAGGCAGGCTCTTAAATGCTGTAATCTTACCTGTTGAAAT
+CCCAAGAGATTAAAATTTCTAACCTGAAAAAAAAATCACAACCTCAAAAGATTAAAATCCCAGATGCTGA
+AATCCCGAAAGCTTAAGACAGGCAGCCAGCCAGCCTGAGCGGGGAGAATCAGCGCGGTCTGGGGTTTGTA
+TACTGTTCACATCTTGCTAGGTGGAAAAAAAACCTAAGAATGTGGTGAAGCACTTTCAGGGCTTGCCTAG
+AAGGTGTTTCCCGAGAAGATTAGTGTGGGAGTTTGAGTGGACTATTCAGGGAAGATAACGCCTCAGTGTG
+GTACTGTTACCGGATCTTAGGGTCAGAGCCTGGGATCTGGCAGTCCCAACAAGGTAGGCTGAGCCATCTG
+TTCCCAACACACCAATTAAAGAGACAAGTGATATTGAAAAGCGTAGAAAGAAGAGTTATTCAACACCATC
+ACGTTGGAAAGAAGAACAAATAATGAGGCCAGGTGACACTCGAGTTGGTCTTTGGGGTACCAATGTAGGA
+AGTTGATCCCACCCAGCACTCACAATTTCTTGGCTCTGATCTGTCCTGCCTGGAGGTCTGACAAGTGTCA
+GACTGTGTGAAATCTCATCATGGTAAGCAAGTTCCTTCTTTGGTTGGCACAAAGACTTCAGCCTTTCCCT
+TCCTCCATCATGGGATTAATGGGAAATTTTTAACTTCTTGAAGCTCATTATTTCAATGGTGTGGTAGTAC
+AGACGTGTCTCTCTTTTGAATAGCCTTTCCCACCAGCACCATCCAGTATGCCCAGCTAAAAAGAACAAAC
+ACAGAAAGCTAACTTATCTCTGAAAGTGGAGACCAAGCCTGTTGCCACTGCCTTGGATACCAGCCCTCCA
+GAATCCCTGGCCTTTGGAGTCCAGCACCTGTGCCAGTGAAAGACCCACAACGTGAGGACTCCCCCACGTT
+CTGACTCAGGAGAGGCGACACCCCAAAATCACCCACAAGAAACGATCTTGCTGCAAACTGCAAGAGGATT
+TTTATTCAAGAGCACTCTCGAGCCCACGGTCATACACCACGCAGGGGTAGAGGACCGCAGTGCCCCGAGT
+AGCTGGATAAGGGAGTATTTAAAGGAAGAAACCACAACTCAAGGAGGTGGGAAGGGCGTTGTTGGAAAAT
+ACCAAAAATACCAGTTAAGAGTCACAAGGAAGAGCAAAGTCACAAGTGTCACAAGGAATACCTGGTAATT
+GTTAACTCTCAAGACAGTTTCTAAGAGCCCCTAACAGTAGCACATTTGCATTGCAGGTTCCGGTAATGGT
+CAGGTGACTTTCTTTGAATGAATACTCCTTGAACCCAGGAAGTGGGTGGGTGGAGGGATGTCGATATCTG
+TTTTATGACCAGCACACCTAGGAGCACTGAGTCACAAAGGCCACATTCCCAAGCCTGGGCCTAAAGGCCT
+AGAATTTTGTTTTTACGTTTACTCTTTCACCAGCATTTTTCAGATCCTTGTGCTTGCTTGCTTGCTTGCT
+TGCTTGCTTGCTTGCTTGCTTCCTTCCTTCCTTCCTTCTTCCCTCCCTCCCTCACTCACTCCCTCTCTTC
+CTCCCTTTCTTCCTTCCTTTCTTTCTCTCTCTTTCTTTCTTTCTCTTCCTTCCTTCCTTCCTTCCTTCCT
+TTCTGTCTTCCTTCCTTTCTTTCTCAGCTTTCTTCTTTCTTTCTCTCACTCTCTCTTTCCTTCTTTCTTT
+CGTTTTTTCTTTTTTAGCTAGGGTCTCTTCACCTACTTGTCAGCACTAGTTTTTACAATTTGCTAAGCCA
+TATAGTTCAGGTTTGCATTCTATTGAATCCTGGAAATATAGATTCTACAAAGACATCCAGAGTTCATTCA
+CGGCAAGCTTTCCCATTTTGTTTGCAAATTTTACTCTATGGAAATTCAGTTCCACCACAGTGACTTACTG
+AGCACTGCCCATGCATGTGAAAATGCTGAAACTTCCCGGTAAGTGAGGGCATGTTCTTTTTCACACATCT
+GCATTTGTGGAAGATGAAGTTTTTAAGGATTCCACCCCTTTGATCGAGTGCCTGTGCTGTGGGCTGTGGT
+GACTTGCAGAGGCTTTTTGTCTTGTTACAGATGGGCAGTGGCAGAGAGAAACGCACTGCCGCACATGGTT
+GAAGAAGCTGTACCACTAAGCCGCGGCCCAGAACCCAAGCTCTGGTTTCTCTTTCTTTTTTGTTTGTTTA
+TTCTTTGTTACTTTTTTTTTTTCAAGACAGAGTTTCTCTATAGCCCTGGCTGTCCTAGAACTCACTTTGT
+AGACCAGGCTGGCCTCGAACTCAGAAATCCACCTGCTTCTGCCTCCTGAGTGCTGGGATTAAAGGCGTGC
+GCCACCATGTGTTTGTTTATTCTTAGTTGAGGTTTTGTTTGGTTTTTGTTGTTGTTGTTTTCTTTTCTTT
+AGTCCTGGCTGTCCTGGAACTCACTGGGTAGACCAGACTGGCCTCAAACTAAGAGATCTGTTGCTGCCTC
+TGCCTCATGAATGTGGGACTAAAGGCGTGCACTGTCTTTTTCCTTTTCTATTTTATTATCCAAAGAGGAA
+TATGTATGAAGGAATCTTTACAGGGGTATGGGCAACTAACACCACTGAAGAAAATATCTCAAAATATCTC
+TTCCTCCCGCCAGCAACCATTAACTGTACTGAAAGCCTCAAGGTGGGAGGGAAAATTCTAATTAAAGTCA
+AAGACTACCATTGACCTGATAGCCTTCTGAGGCTCAGTGTGTAATGGGGGACTCCCTTTATTAACAGAGT
+TTTTCCTTCTCTGACCTTGTCTGGGGAATTCTACGCCCCAGACTCTCCTCTACCTGAGGAATGTCATGGA
+GCCTCAATTACTTTATAGGATCAATTTCCCTTCTCCTGACAAAAGGTGTTTAGCCAGCATCTGAGATTCC
+TGAAATGCTTCCCCACGCTAAAGAGGCATTCCAGGTCCCCTAAGCCCCCAGTGACCTTTGCCCATCCTGG
+ATGTCCCTACTATCAGTCCCCAAAAACTTCATAAGCCTTGAATCACCCTAAGTAAAGTTGATCTGCCTAC
+CGTCGAGGGGTCCCGGTGTGGTCGTTTACCATCTGTATTCACAGGGCTTCCGGACCCTCTGTTCAGCTTT
+TCCACTCCCTCTCCCTTGTGGTCCCAAATGGCCAATGATCCACAAGGGTAGGGAGTCCCACAAAGGTGGT
+TCGCTCCTGTGTGTTTTGAGACAGGGTCTTATTATGCAGCCACAGGCTGGCCTCAAACTTCAGAGCACCC
+TGCCTCTTCCCCCACTCCCGGTGTTGGGATTGTTGGGACGGCAGGTATGCAGGGCCATCATTGTTTTAGA
+TCAAGCAACAAAAGGCTTGGGTTAGAACTTCCAAGACAACTCACCTGGTAAACCCTATAGCATCATTGAA
+TATGGAGAGCTGTTCTTTTCTGCATAACAGAAAAGTTGGTACAACTCCAAAGCCAAAATTATCTTGAGTT
+ACTCTAGCCTTATTAGTACTCAGCCTGTTGTTGCTTACCTCTGTGTATGGTGTAATATTCCTATTATGGG
+AACTCCAAACTGCTTAATGAAATAAACTTTAAGGACAAACCAATTCTAGTAACGCCTGTCTGTTATGGCT
+TGGATGAGAAACGTCTCCCAACGTGCGTAAGCACTTGGTTCTGGTTTGCGAAGGTGAAGTATTGTAGAGG
+AAGCACATCACCAGGGATGAGTTTTATGGCTTTGTTTTTTATTTGTTTGTTTTGTTTAGGTTTTTAAGAC
+TTTTATTTTTGTTTGTTTTGATTTTTTGAGATAGGGTTTCTCTGTATAGCCCTGACTATCCTGGAACTCA
+CTCTAGAGACCAGGCTGTCCTCAAGTTCATAAGAGATGCATGTGCCTCTGCCTCCAGAGTGATGGAATTA
+AAGTCATGTACCACCACTGCCCAACAGCTTTGAGGTTTTATAACCCAGCCCTACCTCCTGTGTTGTAGTT
+CCCCCCACCCCCATTGTCTCCCACCACCCTAATTCCAGACTGCCTTTCAGGGCATATTGGTTCTACCCAC
+TCAAGAATTGTAAGTAAAAAATAAAAGCTTTCTTCCTAAAGCTACTCCGATCGGGGTGTTTTATCACAGC
+AATAAAAAAGTGAAGCATCCTCTTGGCCCACTGATGCTCTCCCCATCCCCTCCGCTAACCTACACAGGTT
+AACCTCACCTTAAACCCCAACATGTGGGAAGAGAGGCAAACAGAACTCTGTGGGTTCCAGACCAGCCAGG
+GCCACACGGTGAGGATGCTCCTCAGAAACAAAAGCAAAGTGATCACGTTGTCATTAACACTTCTATTTAC
+CTCATCCCTCCTTCTTAACAAATTCAATCCAAAAATAATATCAAAGCTTGTCATAGAATCCCTGATCTAC
+ACTTGTGGTTCTTCCTGGAATTACTTTTTGAGTCTTCCACCTGTGAGCCCTGCTCTATTCTGCTTGCTGA
+TTCGATTTACCCAGGTTGCCATTCAGCATTTATATGGACAATATTTCCCCACTTCATAAATCCTGGTACC
+AACCTCTGCCTCAAAGACCACGGAAAAGAGGGGAGGTGTGGTGGCTAGATTGAAAATGGTCCCGGTAGGC
+TCATGTATTTGAATACTTGTCTCCTGCACTCCGTGGCAGTATCTGGGAAGGTTACAGTCTGGGTAGGAGG
+TGGAGCCTTGCTGGAAGGAATCAGTCAGTCATAGGAGCAGGTTTTGTGTTTAGAGCCTTGCCTGACTTAA
+CAGCTACAAAAGAAGCACCCCCCCCCCACGACCCTACTACACCTCAATAGTCAACTTGAGGAAGC
+
diff --git a/seq/mgtt2_x.seq b/seq/mgtt2_x.seq
new file mode 100644
index 0000000..236d72b
--- /dev/null
+++ b/seq/mgtt2_x.seq
@@ -0,0 +1,12 @@
+>>mgtt2_x      Length: 1089  January 26, 2000 04:00  Type: N  Check: 1394  ..
+CTGAGTTGGG TCCACGAAAG CCCAGCTAGG CCATTACCGC GTCCGGGTGA GACTAAGGTC CTGGGCTGGA TTCCTGGCTC CACGGTCCGC TGGAGCAAAT 
+CGCATAAGTC AGTCTGAGTG CGCGCGCCCT CAGCCCTGCT TTTGGTATAA AGTCCTCCAA AGCGTCTCCC TCCCCAANNN NGATCagCAg GtGTCAGCTA 
+TCCAGAGGAG GAAATCGTTT GGCTTGGcCA ACTGAGGcTG TGCTGGACCC CAGCTTGCTG TTATCGAACG CAGTCGGCAC ACCATCTTGT GTCGCTACCG 
+GCAATGGGCT TGGAGCTCTA CCTGGACCTG CTGTCACAAC CCAGCCGCGC TGTCTACATC tTCNGCCAaG AAGAATGGCA TCCCCTTCCA GACGCGTACC 
+GTGGATATAC TCAAAGGGCA GCACATGAGC GAGCAATTCT CCCAGGTGAA CTGCTTAAAC AAAGTTCCTG TACTCAAAGA CGGAAGCTTC GTGTTGACCG 
+AAAGCACAGC CATCTtGATT TACCTGAGTT CCAAGTACCA GGTGGCAGAC CACTGGTACC CGGCCGACCT ACAGGCCCGT GCCCAAGTCC ACGAATACCT 
+GGGCTGGCAT GcCGACAACA TCCgtGGTAC TTtcgGAGTG CTCCTATGGA CCNAAgGTGT TgGGGCCACT CATTGgGGTc CAgGTTCCCC agGAGAAGGT 
+GGAACgGAAC agAGATAGAA TGGTCCTGGt TCTGCaACAG CTGGAgGACA AGTTCTCAGG GACAGGsCTC CTGTTGGCAG CAGTGAGCTA GCGATCTCAT 
+TCTCTGGAGA GTGATGCAGC GTGCTCTTGC TATACCTGTT GAGGACGGCT CAGCTGACAG CATGCGAGAA AGGTGGAGGC GTCTTGGTGC TGAGCTGTGT 
+AGAGCTCATA GACATCTGGC ATCTGGACAA GCAGCAGAAA TGTACAGTAC CCCTTCGAGT CATGCACATG CACTCAATTG TAGATCCTGA TGGTTGACCA 
+CATAAGACTA TTTGTGTTAA AAAAGGGGGC CGTCCCATTC CCTTATGATC GATACATACT GGCTCCTTTA CACATNGATG GAAAACTGC
diff --git a/seq/ms1.aa b/seq/ms1.aa
new file mode 100644
index 0000000..46f476a
--- /dev/null
+++ b/seq/ms1.aa
@@ -0,0 +1,6 @@
+>test m1
+MPMIL,
+MLLEY,
+MGDAP,
+MDTRX,
+MLCYN
diff --git a/seq/mu.lib b/seq/mu.lib
new file mode 100644
index 0000000..e7aec3e
--- /dev/null
+++ b/seq/mu.lib
@@ -0,0 +1,50 @@
+>GTM1_MOUSE GLUTATHIONE S-TRANSFERASE GT8.7 (EC 2.5.1.18) (GST 1-1) (CLASS-MU
+PMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKLGLDFPNLPYLIDGSHKIT
+QSNAILRY
+LARKHHLDGETEEERIRADIVENQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFA
+GDKVTYVD
+FLAYDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSSRYIATPIFSKMAHWSNK 
+>GTM1_HUMAN GLUTATHIONE S-TRANSFERASE MU 1 (EC 2.5.1.18) (GSTM1-1) (HB SUBUNI
+PMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGAHKIT
+QSNAI
+LCYIARKHNLCGETEEEKIRVDILENQTMDNHMQLGMICYNPEFEKLKPKYLEELPEKLKLYSEFLGKRP
+WFAGN
+KITFVDFLVYDVLDLHRIFEPKCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPRPVFSKMAVWGNK
+>GTMU_CRILO GLUTATHIONE S-TRANSFERASE Y1 (EC 2.5.1.18) (CHAIN 3) (CLASS-MU).
+PMILGYWNVRGLTNPIRLLLEYTDSSYEEKKYTMGDAPDSDRSQWLNEKFKLGLDFPNLPYLIDGSHKIT
+QSNAI
+LRYIARKHNLCGETEEERIRVDIVENQAMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKMYSEFLGKRP
+WFAGD
+KVTLCGFLAYDVLDQYQMFEPKCLDPFPNLKDFLARFEGLKKISAYMKTSRFLRRPIFSKMAQWSNK
+>GTM1_RAT GLUTATHIONE S-TRANSFERASE YB1 (EC 2.5.1.18) (CHAIN 3) (CLASS-MU).
+PMILGYWNVRGLTHPIRLLLEYTDSSYEEKRYAMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGSRKIT
+QSNAI
+MRYLARKHHLCGETEEERIRADIVENQVMDNRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRP
+WFAGD
+KVTYVDFLAYDILDQYHIFEPKCLDAFPNLKDFLARFEGLKKISAYMKSSRYLSTPIFSKLAQWSNK
+>GTMU_RABIT GLUTATHIONE S-TRANSFERASE MU 1 (EC 2.5.1.18) (GST MU I) (CLASS-MU
+PMTLGYWDVRGLALPIRMLLEYTDTSYEEKKYTMGDAPNYDQSKWLSEKFTLGLDFPNLPYLIDGTHKLT
+QSNAI
+LRYLARKHGLCGETEEERIRVDILENQLMDNRFQLVNVCYSPDFEKLKPEYLKGLPEKLQLYSQFLGSLP
+WFAGD
+KITFADFLVYDVLDQNRIFVPGCLDAFPNLKDFHVRFEGLPKISAYMKSSRFIRVPVFLKKATWTGI
+>GTM4_HUMAN GLUTATHIONE S-TRANSFERASE MU 4 (EC 2.5.1.18) (GSTM4-4) (GTS-MU2) 
+MSMTLGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGAHKI
+TQSNAILC
+YIARKHNLCGETEEEKIRVDILENQAMDVSNQLARVCYSPDFEKLKPEYLEELPTMMQHFSQFLGKRPWF
+VGDKITFV
+DFLAYDVLDLHRIFEPNCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPKPLYTRVAVWGNK 
+>GLNA_ANASP GLUTAMINE SYNTHETASE (EC 6.3.1.2) (GLUTAMATE--AMMONIA LIGASE).
+TTPQEVLKRIQDEKIELIDLKFIDTVGTWQHLTLYQNQIDESSFSDGVPFDGSSIRGWKAINESDMTMVL
+DPNTA
+WIDPFMEVPTLSIVCSIKEPRTGEWYNRCPRVIAQKAIDYLVSTGIGDTAFFGPEAEFFIFDSARFAQNA
+NEGYY
+FLDSVEGAWNSGKEGTADKPNLAYKPRFKEGYFPVSPTDSFQDIRTEMLLTMAKLGVPIEKHHHEVATGG
+QCELG
+FRFGKLIEAADWLMIYKYVIKNVAKKYGKTVTFMPKPIFGDNGSGMHCHQSIWKDGKPLFAGDQYAGLSE
+MGLYY
+IGGLLKHAPALLAITNPSTNSYKRLVPGYEAPVNLAYSQGNRSASIRIPLSGTNPKAKRLEFRCPDATSN
+PYLAF
+AAMLCAGIDGIKNKIHPGEPLDKNIYELSPEELAKVPSTPGSLELALEALENDHAFLTDTGVFTEDFIQN
+WIDYK
+LANEVKQMQLRPHPYEFSIYYDV
diff --git a/seq/musplfm.aa b/seq/musplfm.aa
new file mode 100644
index 0000000..f6bf80b
--- /dev/null
+++ b/seq/musplfm.aa
@@ -0,0 +1,9 @@
+>musplfm transl. of musplfm.seq, 2 to 676
+ MLPSLIQPCSWILLLLLVNSSLLWKNVASFP
+MCAMRNGRCFMSFEDTFELAGSLSHNISIEVS
+ELFTEFEKHYSNVSGLRDKSPMRCNTSFLPTP
+ENKEQARLTHYSALLKSGAMILDAWESPLDDL
+VSELSTIKNVPDIIISKATDIKKKINAVRNGV
+NALMSTMLQNGDEEKKNPAWFLQSDNEDARIH
+SLYGMISCLDNDFKKVDIYLNVLKCYMLKIDN
+C
diff --git a/seq/mwkw.aa b/seq/mwkw.aa
new file mode 100644
index 0000000..f992fdd
--- /dev/null
+++ b/seq/mwkw.aa
@@ -0,0 +1,31 @@
+>MWKW Myosin heavy chain - Caenorhabditis elegans
+MEHEKDPGWQYLRRTREQVLEDQSKPYDSKKNVWIPDPEEGYLAGEITATKGDQVTIVTAREMSVIQVTL
+KKELVQEMNPPKFEKTEDMSNLSFLNDASVLHNLRSRYAAMLIYTYSGLFCVVINPYKRLPIYTDSCARM
+FMGKRKTEMPPHLFAVSDEAYRNMLQDHENQSMLITGESGAGKTENTKKVICYFAAVGASQQEGGAEVDP
+NKKKVTLEDQIVQTNPVLEAFGNAKTVRNNNSSRFGKFIRIHFNKHGRLASCDIEHYLLEKSRVIRQAPG
+ERCYHIFYQIYSDFRPELKKELLLDLPIKDYWFVAQAELIIDGIDDVEEFQLTDEAFDILNFSAVEKQDC
+YRLMSAHMHMGNMKFKQRPREEQAEPDGTVEAEKASNMYGIGCE
+EFLKALTKPRVKVGTEWVSKGQNCEQVNWAVGAMAKGLYSRVFNWLVKKCNLTLDQKGIDRDYFIGVLDI
+AGFEIFDFNSFEQLWINFVNEKLQQFFNHHMFVLEQEEYAREGIQWVFIDFGLDLQACIELIEKPLGIIS
+MLDEECIVPKATDLTLASKLVDQHLGKHPNFEKPKPPKGKQGEAHFAMRHYAGTVRYNCLNWLEKNKDPL
+NDTVVSAMKQSKGNDLLVEIWQDYTTQEEAAAKAKEGGGGGKKKGKSGSFMTVSMLYRESLNNLMTMLNK
+THPHFIRCIIPNEKKQSGMIDAALVLNQLTCNGVLEGIRICRKGFPNRTLHPDFVQRYAILAAKEAKSDD
+DKKKCAEAIMSKLVNDGSLSEEMFRIGLTKVFFKAGVLAHLEDI
+RDEKLATILTGFQSQIRWHLGLKDRKRRMEQRAGLLIVQRNVRSWCTLRTWEWFKLYGKVKPMLKAGKEA
+EELEKINDKVKALEDSLAKEEKLRKELEESSAKLVEEKTSLFTNLESTKTQLSDAEERLAKLEAQQKDAS
+KQLSELNDQLADNEDRTADVQRAKKKIEAEVEALKKQIQDLEMSLRKAESEKQSKDHQIRSLQDEMQQQD
+EAIAKLNKEKKHQEEINRKLMEDLQSEEDKGNHQNKVKAKLEQTLDDLEDSLEREKRARADLDKQKRKVE
+GELKIAQENIDESGRQRHDLENNLKKKESELHSVSSRLEDEQALVSKLQRQIKDGQSRISELEEELENER
+QSRSKADRAKSDLQRELEELGEKLDEQGGATAAQVEVNKKREAE
+LAKLRRDLEEANMNHENQLGGLRKKHTDAVAELTDQLDQLNKAKAKVEKDKAQAVRDAEDLAAQLDQETS
+GKLNNEKLAKQFELQLTELQSKADEQSRQLQDFTSLKGRLHSENGDLVRQLEDAESQVNQLTRLKSQLTS
+QLEEARRTADEEARERQTVAAQAKNYQHEAEQLQESLEEEIEGKNEILRQLSKANADIQQWKARFEGEGL
+LKADELEDAKRRQAQKINELQEALDAANSKNASLEKTKSRLVGDLDDAQVDVERANGVASALEKKQKGFD
+KIIDEWRKKTDDLAAELDGAQRDLRNTSTDLFKAKNAQEELAEVVEGLRRENKSLSQEIKDLTDQLGEGG
+RSVHEMQKIIRRLEIEKEELQHALDEAEAALEAEESKVLRAQVE
+VSQIRSEIEKRIQEKEEEFENTRKNHARALESMQASLETEAKGKAELLRIKKKLEGDINELEIALDHANK
+ANADAQKNLKRYQEQVRELQLQVEEEQRNGADTREQFFNAEKRATLLQSEKEELLVANEAAERARKQAEY
+EAADARDQANEANAQVSSLTSAKRKLEGEIQAIHADLDETLNEYKAAEERSKKAIADATRLAEELRQEQE
+HSQHVDRLRKGLEQQLKEIQVRLDEAEAAALKGGKKVIAKLEQRVRELESELDGEQRRFQDANKNLGRAD
+RRVRELQFQVDEDKKNFERLQDLIDKLQQKLKTQKKQVEEAEELANLNLQKYKQLTHQLEDAEERADQAE
+NSLSKMRSKSRASASVAPGLQSSASAAVIRSPSRARASDF 
diff --git a/seq/mwrtc1.aa b/seq/mwrtc1.aa
new file mode 100644
index 0000000..8eadfb3
--- /dev/null
+++ b/seq/mwrtc1.aa
@@ -0,0 +1,8 @@
+>MWRTC1 - Myosin heavy chain 1, cardiac muscle - Rat (fragment)
+/DLTEQLGEGGKNVHELEKIRKQLEVEKLELQSALEEAEASLEHEEGKILRAQLEFNQIKAEIE
+SKLAEKDEEMEQAKRNHLRVVDSLQTSLDAETRSRNEALRVKKKMEGDLNEMEIQLSQANRIAS
+EAQKHLKNAQAHLKDTQLQLDDAVRANDDLKENIAIVERRNTLLQAELEELRAVVEQTERSRKL
+AEQELIETSERVQLLHSQNNSLINQKKKMDADLSQLQTEVEEAVQECRNAEEKAKKAITDAAMM
+AEELKKEQDTSAHLERMKKNMEQTIKDLQHRLDEAEQIALKGGKKQLQKLEARVRELENELEAE
+QKRNAESVKGMRKSERRIKELNYQTEEDKKNLVRLQDLVNKLQLKVKAYKRQAEEAEEQANTNL
+SKFRKVQHELDEAEERADIAESQVNKLRAKSRDIGAKQKIHDEE*
diff --git a/seq/myosin_bp.aa b/seq/myosin_bp.aa
new file mode 100644
index 0000000..c3a4957
--- /dev/null
+++ b/seq/myosin_bp.aa
@@ -0,0 +1,20 @@
+>gi|46049110|ref|NP_996557| myosin binding protein C, slow type isoform 4; myosin-binding protein C, slow-type; skeletal muscle C-protein [Homo sapiens]
+MPEPTkkeenevpapapppeepskekeaGTTPAKDWTLVETPPGEEQAKQNANSQLSILF
+IEKPQGGTVKVGEDITFIAKVKAEDLLRKPTIKWFKGKWMDLASKAGKHLQLKETFERHS
+RVYTFEMQIIKAKDNFAGNYRCEVTYKDKFDSCSFDLEVHESTGTTPNIDIRSAFKRSGE
+GQEDAGELDFSGLLKRREVKQQEEEPQVDVWELLKNAKPSEYEKIAFQYGITDLRGmlkr
+lkrmrreekkSAAFAKILDPAYQVDKGGRVRFVVELADPKLEVKWYKNGQEIRPSTKYIF
+EHKGCQRILFINNCQMTDDSEYYVTAGDEKCSTELFVREPPIMVTKQLEDTTAYCGERVE
+LECEVSEDDANVKWFKNGEEIIPGPKSRYRIRVEGKKHILIIEGATKADAAEYSVMTTGG
+QSSAKLSVDLKPLKILTPLTDQTVNLGKEICLKCEISENIPGKWTKNGLPVQESDRLKVV
+HKGRIHKLVIANALTEDEGDYVFAPDAYNVTLPAKVHVIDPPKIILDGLDADNTVTVIAG
+NKLRLEIPISGEPPPKAMWSRGDKAIMEGSGRIRTESYPDSSTLVIDIAERDDSGVYHIN
+LKNEAGEAHASIkvkvvdfpdppvaptvtEVGDDWCIMNWEPPAYDGGSPILGYFIERKK
+KQSSRWMRLNFDLCKETTFEPKKMIEGVAYEVRIFAVNAIGISKPSMPSRPFVPLAVTSP
+PtlltvdsvtdttvtMRWRPPDHIGAAGLDGYVLEYCFEGTEDWIVANKDLIDKTKFTIT
+GLPTDAKIFVRVKAVNAAGASEPKYYSQPILVkeiieppkiriprHLKQTYIRRVGEAVN
+LVIPFQGKPRPELTWKKDGAEIDKNQINIRNSETDTIIFIRKAERSHSGKYDLQVKVDKF
+VETASIDIQIIDRPGPPQIVKIEDVWGENVALTWTPPKDDGNAAITGYTIQKADKKSMEW
+FTVIEHYHRTSATITELVIGNEYYFRVFSENMCGLSEDATMTKESAVIARDGKIYKNPVY
+EDFDFSEAPMFTQPLVNTYAIAGYNATLNCSVRGNPKPKITWMKNKVAIVDDPRYRMFSN
+QGVCTLEIRKPSPYDGGTYCCKAVNDLGTVEIECKLEVKVIAQ
\ No newline at end of file
diff --git a/seq/n0.aa b/seq/n0.aa
new file mode 100644
index 0000000..d34f73f
--- /dev/null
+++ b/seq/n0.aa
@@ -0,0 +1,4 @@
+>mgstm1
+MLLEYTD,
+MGDAPDFD
+
diff --git a/seq/n1.aa b/seq/n1.aa
new file mode 100644
index 0000000..ab84beb
--- /dev/null
+++ b/seq/n1.aa
@@ -0,0 +1,5 @@
+>tests from mgstm1
+MILGYW,
+MLLE,
+MGDAP,
+MLCYNP
diff --git a/seq/n2.aa b/seq/n2.aa
new file mode 100644
index 0000000..2d43da7
--- /dev/null
+++ b/seq/n2.aa
@@ -0,0 +1,28 @@
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GSIEREDGGLQGPAGNQHIYQPVGKPDHAAPPK,
+LIGVITENPVWIIMELCTLGELRSFLQVR,
+KPPRPGAPHLGSLASLNSPVDSYNEGVK,
+EDGGLQGPAGNQHIYQPVGKPDHAAPPK,
+QVTVSWDSGGSDEAPPKPSRPGYPSPR,
+GANPTHLADFNQVQTIQYSNSEDKDR,
+LPMPPNCPPTLYSLMTKCWAYDPSR,
+PGAPHLGSLASLNSPVDSYNEGVK,
+GANPTHLADFNQVQTIQYSNSEDK,
+LSHLQSEEVHWLHLDMGVSNVR,
+QVTVSWDSGGSDEAPPKPSR,
+VFHYFENSSEPTTWASIIR,
+TLLATVDESLPVLPASTHR,
+RQVTVSWDSGGSDEAPPK,
+AQLSTILEEEKLQQEER,
+EKFELAHPPEEWKYELR,
+LAQQYVMTSLQQEYKK,
+FELAHPPEEWKYELR,
+LVNGATQSFIIRPQK,
+KQMLTAAHALAVDAK,
+SNDKVYENVTGLVK,
+QMLTAAHALAVDAK,
+GMGQVLPTHLMEER,
+PQEISPPPTANLDR,
+IQPAPPEEYVPMVK,
+GMGQVLPTHLMEER,
+QFANLNREESILK,
diff --git a/seq/n2_fs.lib b/seq/n2_fs.lib
new file mode 100644
index 0000000..de927e1
--- /dev/null
+++ b/seq/n2_fs.lib
@@ -0,0 +1,84 @@
+>GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
+ILGYWN,
+DQYRMFEP,
+SRYIATP,
+KCLDAFP,
+EYTDS,
+SYDEKR
+>GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
+ILGYWN,
+DQYRMFEP,
+SRYIATP,
+KCLDAFP,
+EYTDS,
+SYDEKR,
+YTMGD,
+EKQKPEFL,
+VRGLTHP,
+TRMQLI,
+FKLGLDFP,
+NLPYLI,
+DGSHKIT,
+LRYLAR,
+KTIPEK,
+KRPWFA,
+ETEEERIR,
+GDKVTYVD,
+HWSNK
+>tests from mgstm1
+MLLE,
+MILGYW,
+MGADP,
+MLCYNP
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GANPTHLADF,
+QVTVSWDSGG,
+EDGGLQGPA,
+TLLATVDE,
+LSHLQSEE,
+PGAPHLGS,
+GANPTHLA
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GSIEREDGGLQGPAGNQHIYQPVGKPDHAAPPK,
+LIGVITENPVWIIMELCTLGELRSFLQVR,
+KPPRPGAPHLGSLASLNSPVDSYNEGVK,
+EDGGLQGPAGNQHIYQPVGKPDHAAPPK,
+QVTVSWDSGGSDEAPPKPSRPGYPSPR,
+GANPTHLADFNQVQTIQYSNSEDKDR,
+LPMPPNCPPTLYSLMTKCWAYDPSR,
+PGAPHLGSLASLNSPVDSYNEGVK,
+GANPTHLADFNQVQTIQYSNSEDK,
+LSHLQSEEVHWLHLDMGVSNVR,
+QVTVSWDSGGSDEAPPKPSR,
+VFHYFENSSEPTTWASIIR,
+TLLATVDESLPVLPASTHR,
+RQVTVSWDSGGSDEAPPK,
+AQLSTILEEEKLQQEER,
+EKFELAHPPEEWKYELR,
+LAQQYVMTSLQQEYKK,
+FELAHPPEEWKYELR,
+LVNGATQSFIIRPQK,
+KQMLTAAHALAVDAK,
+SNDKVYENVTGLVK,
+QMLTAAHALAVDAK,
+GMGQVLPTHLMEER,
+PQEISPPPTANLDR,
+IQPAPPEEYVPMVK,
+GMGQVLPTHLMEER,
+QFANLNREESILK,
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GANPTHLADF,
+QVTVSWDSGG,
+EDGGLQGPA,
+TLLATVDE,
+LSHLQSEE,
+PGAPHLGS,
+GANPTHLA,
+AQLSTILE,
+KPPRPGA,
+GSIERED,
+VFHYFEN,
+LIGVIT,
+LPMPP,
+RQVTV,
+QVTV
diff --git a/seq/n2s.aa b/seq/n2s.aa
new file mode 100644
index 0000000..4f1875c
--- /dev/null
+++ b/seq/n2s.aa
@@ -0,0 +1,8 @@
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GANPTHLADF,
+QVTVSWDSGG,
+EDGGLQGPA,
+TLLATVDE,
+LSHLQSEE,
+PGAPHLGS,
+GANPTHLA
diff --git a/seq/n2t.aa b/seq/n2t.aa
new file mode 100644
index 0000000..f141ca3
--- /dev/null
+++ b/seq/n2t.aa
@@ -0,0 +1,16 @@
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GANPTHLADF,
+QVTVSWDSGG,
+EDGGLQGPA,
+TLLATVDE,
+LSHLQSEE,
+PGAPHLGS,
+GANPTHLA,
+AQLSTILE,
+KPPRPGA,
+GSIERED,
+VFHYFEN,
+LIGVIT,
+LPMPP,
+RQVTV,
+QVTV
diff --git a/seq/n_fs.lib b/seq/n_fs.lib
new file mode 100644
index 0000000..9be7ba2
--- /dev/null
+++ b/seq/n_fs.lib
@@ -0,0 +1,20 @@
+>tests from mgstm1
+MLLE,
+MILGYW,
+MGADP,
+MLCYNP
+>GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
+ILGYWN,
+DQYRMFEP,
+SRYIATP,
+KCLDAFP,
+EYTDS,
+SYDEKR
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GANPTHLADF,
+QVTVSWDSGG,
+EDGGLQGPA,
+TLLATVDE,
+LSHLQSEE,
+PGAPHLGS,
+GANPTHLA
diff --git a/seq/ngt.aa b/seq/ngt.aa
new file mode 100644
index 0000000..c09be5a
--- /dev/null
+++ b/seq/ngt.aa
@@ -0,0 +1,20 @@
+>GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
+ILGYWN,
+DQYRMFEP,
+SRYIATP,
+KCLDAFP,
+EYTDS,
+SYDEKR,
+YTMGD,
+EKQKPEFL,
+VRGLTHP,
+TRMQLI,
+FKLGLDFP,
+NLPYLI,
+DGSHKIT,
+LRYLAR,
+KTIPEK,
+KRPWFA,
+ETEEERIR,
+GDKVTYVD,
+HWSNK
diff --git a/seq/ngts.aa b/seq/ngts.aa
new file mode 100644
index 0000000..bec7ea8
--- /dev/null
+++ b/seq/ngts.aa
@@ -0,0 +1,7 @@
+>GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
+ILGY*WN,
+EYTDS?,
+S?YDEKR,
+DQY*RMFEP,
+KCLDAFP,
+S*RY*IATP
diff --git a/seq/oohu.aa b/seq/oohu.aa
new file mode 100644
index 0000000..092d62a
--- /dev/null
+++ b/seq/oohu.aa
@@ -0,0 +1,6 @@
+>OOHU | 1358 rhodopsin - human
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRT
+PLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC
+KPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVV
+HFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQG
+SNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 
diff --git a/seq/oohu.raa b/seq/oohu.raa
new file mode 100644
index 0000000..8c11539
--- /dev/null
+++ b/seq/oohu.raa
@@ -0,0 +1,7 @@
+>oohu.aa shuffled
+KLILINAIFT GLQNSGCTAQ PTPEFFVMAQ YLFAMVSNMG GVFFQTALLN SAGQGFYSWC
+IFIFMTYPGF MLFIQLTGAD FVTVNEGANL CMTFCTQVTA VEYAKPTPVN AAPSSYRILR
+VIGGPYQAIF HSIATVFINS PTTEELQFLR IVVIHIAFIV VAVPLTDPRA VKFNAGELTF
+GCIFYMQYYM VISLFAANPF YYAFIRVPFE VYCETELIMG PLCAKRYVLA AASNGAYLGW
+LKLLEVYSAF PSVKCLNMLR GHVFTTIPET QNAVMYKDVI SSTLFVLSEQ LSAWITSEYP
+VPGKCYWMPF GANTHKNINP DPFAEHEKEY ILVWMVCKFG LGMTVMAG
diff --git a/seq/prio_atepa.aa b/seq/prio_atepa.aa
new file mode 100644
index 0000000..9e31979
--- /dev/null
+++ b/seq/prio_atepa.aa
@@ -0,0 +1,5 @@
+>PRIO_ATEPA | 90377   | MAJOR PRION PROTEIN PRECURSOR (PRP) (PRP27-30) (PRP33-35C).
+MANLGYWMLVLFVATWSDLGLCKKRPKPGGWNTGGSRYPGQGSPGGNRYPPQGGGWGQPHGGGWGQPHGGGWGQP
+HGGGWGQPHGGGWGQAGGTHNQWNKPSKPKTNMKHMAGAAAAGAVVGGLGGYMLGSAMSRPLIHFGNDYEDRYYR
+ENMYRYPNQVYYRPVDQYNNQNNFVHDCVNITIKQHTVTTTTKGENLTETDVKMMERVVEQMCITQYERESQAYY
+QRGSSMVLFSSPPVILLISFLIFLIVG
diff --git a/seq/prot_test.lib b/seq/prot_test.lib
new file mode 100644
index 0000000..603a00b
--- /dev/null
+++ b/seq/prot_test.lib
@@ -0,0 +1,51 @@
+>HAHU | 1114 | Hemoglobin alpha chain - Human, chimpanzee, and pygmy chimpanzee
+VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAV
+AHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKY
+R 
+>K1HUAG | 1091 |  Ig kappa chain V-I region (Ag) - Human
+DIQMTQSPSSLSASVGDRVTITCQASQDINHYLNWYQQGPKKAPKILIYDASNLETGVPSRFSGSGFGTD
+FTFTISGLQPEDIATYYCQQYDTLPRTFGQGTKLEIKR/ 
+>CCHU | 1 | Cytochrome c - Human
+MGDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGIIWGEDTLMEYLE
+NPKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE 
+>N2KF1U | 1021 | Long neurotoxin 1 - Many-banded krait
+IVCHTTATIPSSAVTCPPGENLCYRKMWCDAFCSSRGKVVELGCAATCPSKKPYEEVTCCSTDKCNHPPK
+RQPG 
+>TPHUCS | 1322 | Troponin C, skeletal muscle - Human
+DTQQAEARSYLSEEMIAEFKAAFDMFDADGGGDISVKELGTVMRMLGQTPTKEELDAIIEEVDEDGSGTI
+DFEEFLVMMVRQMKEDAKGKSEEELAECFRIFDRNADGYIDPEELAEIFRASGEHVTDEEIESLMKDGDK
+NNDGRIDFDEFLKMMEGVQ 
+>FEPE | 25 | Ferredoxin - Peptostreptococcus asaccharolyticus
+AYVINDSCIACGACKPECPVNIQQGSIYAIDADSCIDCGSCASVCPVGAPNPED 
+>RKMDS | 677 | Ribulose-bisphosphate carboxylase (EC 4.1.1.39) small chain - Cry
+MRLTQGAFSFLPDLTDEQIVKQIQYAISKNWALNVEWTDDPHPRNAYWDLWGLPLFGIKDPAAVMFEINA
+CRKAKPACYVKVNAFDNSRGVESCCLSFIVQRPTSNEPGFQLIRSEVDSRNIRYTIQSYASTRPEGERY*
+
+>K3HU | 1099 | Ig kappa chain C region - Human
+/TVAAPSVFIFPPSDEQLKSGTASVVCLLNNFYPREAKVQWKVDNALQSGNSQESVTEQDSKDSTYSLSS
+TLTLSKADYEKHKVYACEVTHQGLSSPVTKSFNRGEC 
+>HMIVV | 2581 | Hemagglutinin precursor - Influenza A virus (2 strains)
+MKTIIALSYIFCLVFAQDLPGNDNNSTATLCLGHHAVPNGTLVKTITNDQIEVTNATELVQSSSTGKICN
+NPHRILDGINCTLIDALLGDPHCDGFQNEKWDLFVERSKAFSNCYPYDVPDYASLRSLVASSGTLEFINE
+GFNWTGVTQNGGSSACKRGPDSGFFSRLNWLYKSGSTYPVQNVTMPNNDNSDKLYIWGVHHPSTDKEQTN
+LYVQASGKVTVSTKRSQQTIIPNVGSRPWVRGLSSRISIYWTIVKPGDILVINSNGNLIAPRGYFKMRTG
+KSSI
+MRSDAPIGTCSSECITPNGSIPNDKPFQNVNKITYGACPKYVKQNTLKLATGMRNVPEKQTRGIFGAIAG
+FIENGWEGMIDGWYGFRHQNSEGTGQAADLKSTQAAIDQINGKLNRVIEKTNEKFHQIEKEFSEVEGRIQ
+DLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTRRQLRENAEDMGNGCFKIYHKCDNAC
+IGSIRNGTYDHDVYRDEALNNRFQIKGVELKSGYKDWILWISFAISCFLLCVVLLGFIMWACQKGNIRCN
+ICI 
+>OKBO2C | 296 | Protein kinase (EC 2.7.1.37), cAMP-dependent, catalytic chain - B
+GNAAAAKKGSEQESVKEFLAKAKEDFLKKWENPAQNTAHLDQFERIKTLGTGSFGRVMLVKHMETGNHYA
+MKILDKQKVVKLKQIEHTLNEKRILQAVNFPFLVKLEFSFKDNSNLYMVMEYVPGGEMFSHLRRIGRFSE
+PHARFYAAQIVLTFEYLHSLDLIYRDLKPENLLIDQQGYIQVTDFGFAKRVKGRTWTLCGTPEYLAPEII
+LSKGYNKAVDWWALGVLIYEMAAGYPPFFADQPIQIYEKIVSGKVRFPSHFSSDLKDLLRNLLQVDLTKR
+FGNLKDGVNDIKNHKWFATTDWIAIYQRKVEAPFIPKFKGPGDTSNFDDYEEEEIRVSINEKCGKEFSEF
+>GT8.7 | 266 | transl. of pa875.con, 19 to 675
+MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKR
+YTMGDAPDFDRSQWLNEKFKLGLDFPNLPYLI
+DGSHKITQSNAILRYLARKHHLDGETEEERIR
+ADIVENQVMDTRMQLIMLCYNPDFEKQKPEFL
+KTIPEKMKLYSEFLGKRPWFAGDKVTYVDFLA
+YDILDQYRMFEPKCLDAFPNLRDFLARFEGLK
+KISAYMKSSRYIATPIFSKMAHWSNK
diff --git a/seq/prot_test.lseg b/seq/prot_test.lseg
new file mode 100644
index 0000000..1c63e8e
--- /dev/null
+++ b/seq/prot_test.lseg
@@ -0,0 +1,66 @@
+>sp|P69905|HBA_HUMAN Hemoglobin subunit alpha OS=Homo sapiens GN=HBA1 PE=1 SV=2
+MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG
+KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP
+AVHASLDKFLASVSTVLTSKYR
+
+>sp|P00502|GSTA1_RAT Glutathione S-transferase alpha-1 OS=Rattus norvegicus GN=Gsta1 PE=1 SV=3
+MSGKPVLHYFNARGRMECIRWLLAAAGVEFDEKFIQSPEDLEKLKKDGNLMFDQVPMVEI
+DGMKLAQTRAILNYIATKYDLYGKDMKERALIDMYTEGILDLTEMIMQLVICPPDQKEAK
+TALAKDRTKNRYLPAFEKVLKSHGQDYLVGNRLTRVDIHLLELLLYVEEFDASLLTSFPL
+LKAFKSRISSLPNVKKFLQPGSQRKLPVDAKQIEEARKIFKF
+
+>sp|P01593|KV101_HUMAN Ig kappa chain V-I region AG OS=Homo sapiens PE=1 SV=1
+DIQMTQSPSSLSASVGDRVTITCQASQDINHYLNWYQQGPKKAPKILIYDASNLETGVPs
+rfsgsgfgtdftftisgLQPEDIATYYCQQYDTLPRTFGQGTKLEIKR
+
+>sp|P99998|CYC_PANTR Cytochrome c OS=Pan troglodytes GN=CYCS PE=1 SV=2
+MGDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGIIW
+GEDTLMEYLENPKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE
+
+>sp|P60615|NXL1A_BUNMU Alpha-bungarotoxin isoform A31 OS=Bungarus multicinctus PE=1 SV=1
+MKtllltlvvvtIVCLDLGYTIVCHTTATSPISAVTCPPGENLCYRKMWCDAFCSSRGKV
+VELGCAATCPSKKPYEEVTCCSTDKCNPHPKQRPG
+
+>sp|P02585|TNNC2_HUMAN Troponin C, skeletal muscle OS=Homo sapiens GN=TNNC2 PE=1 SV=2
+MTDQQAEARSYLSEEMIAEfkaafdmfdadgggdISVKELGTVMRMLGQTPTKEELDAII
+EEVDEDGSGTIDFEEFLVMMVRQMKEDAKGKSEEELAECFRIFDRNADGYIDPEELAEIF
+RASGEHVTDEEIESLMKDGDKNNDGRIDFDEFLKMMEGVQ
+
+>sp|P00193|FER_PEPAS Ferredoxin OS=Peptostreptococcus asaccharolyticus PE=1 SV=1
+AYVINDSCIACGACKPECPVNIQQGSIYAIDADSCIDCGSCASVCPVGAPNPED
+
+>sp|P14960|RBS_GUITH Ribulose bisphosphate carboxylase small chain OS=Guillardia theta GN=rbcS PE=3 SV=1
+MRLTQGAFSFLPDLTDEQIVKQIQYAISKNWALNVEWTDDPHPRNAYWDLWGLPLFGIKD
+PAAVMFEINACRKAKPACYVKVNAFDNSRGVESCCLSFIVQRPTSNEPGFQLIRSEVDSR
+NIRYTIQSYASTRPEGERY
+
+>sp|P01834|IGKC_HUMAN Ig kappa chain C region OS=Homo sapiens GN=IGKC PE=1 SV=1
+TVAAPSVFIFPPSDEQLKSGTASVVCLLNNFYPREAKVQWKVDNALQSGNSQESVTEQDS
+KDstyslsstltlsKADYEKHKVYACEVTHQGLSSPVTKSFNRGEC
+
+>sp|P03435|HEMA_I75A3 Hemagglutinin OS=Influenza A virus (strain A/Victoria/3/1975 H3N2) GN=HA PE=1 SV=1
+MKTIIALSYIFCLVFAQDLPGNDNNSTATLCLGHHAVPNGTLVKTITNDQIEVTNATELV
+QSSSTGKICNNPHRILDGINCTLIDALLGDPHCDGFQNEKWDLFVERSKAFSNCYPYDVP
+DYASLRSLVASSGTLEFINEGFNWTGVTQNGGSSACKRGPDSGFFSRLNWLYKSGSTYPV
+QNVTMPNNDNSDKLYIWGVHHPSTDKEQTNLYVQASGKVTVSTKRSQQTIIPNVGSRPWV
+RGLSSRISIYWTIVKPGDILVINSNGNLIAPRGYFKMRTGKSSIMRSDAPIGTCSSECIT
+PNGSIPNDKPFQNVNKITYGACPKYVKQNTLKLATGMRNVPEKQTRGIFGAIAGFIENGW
+EGMIDGWYGFRHQNSEGTGQAADLKSTQAAIDQINGKLNRVIEKTNEKFHQIEKEFSEVE
+GRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTRRQLRENAEDMG
+NGCFKIYHKCDNACIGSIRNGTYDHDVYRDEALNNRFQIKGVELKSGYKDWILWISFAIS
+CFLLCVVLLGFIMWACQKGNIRCNICI
+
+>sp|P00517|KAPCA_BOVIN cAMP-dependent protein kinase catalytic subunit alpha OS=Bos taurus GN=PRKACA PE=1 SV=3
+MGNAAAAKKGSEQESVKEFLAKAKEDFLKKWENPAQNTAHLDQFERIKTLGTGSFGRVML
+VKHMETGNHYAMKILDKQKVVKLKQIEHTLNEKRILQAVNFPFLVKLEFSFKDNSNLYMV
+MEYVPGGEMFSHLRRIGRFSEPHARFYAAQIVLTFEYLHSLDLIYRDLKPENLLIDQQGY
+IQVTDFGFAKRVKGRTWTLCGTPEYLAPEIILSKGYNKAVDWWALGVLIYEMAAGYPPFF
+ADQPIQIYEKIVSGKVRFPSHFSSDLKDLLRNLLQVDLTKRFGNLKNGVNDIKNHKWFAT
+TDWIAIYQRKVEAPFIPKFKGPGDTSNFDDYEEEEIRVSINEKCGKEFSEF
+
+>sp|P09488|GSTM1_HUMAN Glutathione S-transferase Mu 1 OS=Homo sapiens GN=GSTM1 PE=1 SV=3
+MPMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNL
+PYLIDGAHKITQSNAILCYIARKHNLCGETEEEKIRVDILENQTMDNHMQLGMICYNpef
+eklkpkyleelpeklklYSEFLGKRPWFAGNKITFVDFLVYDVLDLHRIFEPKCLDAFPN
+LKDFISRFEGLEKISAYMKSSRFLPRPVFSKMAVWGNK
+
diff --git a/seq/prot_test_s.lseg b/seq/prot_test_s.lseg
new file mode 100644
index 0000000..c72b3ce
--- /dev/null
+++ b/seq/prot_test_s.lseg
@@ -0,0 +1,25 @@
+>sp|P09488|GSTM1_HUMAN GLUTATHIONE S-TRANSFERASE MU 1 (EC 2.5.1.18) (GSTM1-1) (HB SUBUNI
+MPMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGAHKITQSNAILCYIARKHNLCGETEEEKIRVDILENQTMDNHMQLGMICYNPEFEKLKPKYLEELPEKLKLYSEFLGKRPWFAGNKITFVDFLVYDVLDLHRIFEPNCLD^AFPNLKDFISRFEGLEKISAYMKSSRFLPRPVFTKMAVWGNK 
+>XURTG | 266 | glutathione transferase (EC 2.5.1.18) Ya - rat
+MSGKPVLHYFNARGRMECIRWLLAAAGVEFDEKFIQSPEDLEKLKKDGNLMFDQVPMVEIDGMKLAQTRA
+ILNYIATKYDLYGKDMKERALIDMYTEGILDLTEMIMQLVICPPDQKEAKTALAKDRTKNRYLPAFEKVL
+KSHGQDYLVGNRLTRVDIHLLELLLYVEEFDASLLTSFPLLKAFKSRISSLPNVKKFLQPGSQRKLPMDA
+KQIEEARKIFKF 
+>sp|P00517|KAPCA_BOVIN cAMP-dependent protein kinase catalytic subunit alpha;  PKA
+GNAAAAKKGSEQESVKEFLAKAKEDFLKKWENPAQNTAHLDQFERIKTLGTGSFGRVMLV
+KHMETGNHYAMKILDKQKVVKLKQIEHTLNEKRILQAVNFPFLVKLEFSFKDNSNLYMVM
+EYVPGGEMFSHLRRIGRFSEPHARFYAAQIVLTFEYLHSLDLIYRDLKPENLLIDQQGYI
+QVTDFGFAKRVKGRTWTLCGTPEYLAPEIILSKGYNKAVDWWALGVLIYEMAAGYPPFFA
+DQPIQIYEKIVSGKVRFPSHFSSDLKDLLRNLLQVDLTKRFGNLKDGVNDIKNHKWFATT
+DWIAIYQRKVEAPFIPKFKGPGDTSNFDDYEEEEIRVSINEKCGKEFSEF
+>sp|P62161|CALM_RAT Calmodulin;  CaM
+ADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTID
+FPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREA
+DIDGDGQVNYEEFVQMMTAK
+>sp|P10649|GSTM1_MOUSE Glutathione S-transferase Mu 1; GST 1-1; GST class-mu 1;
+MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKLGLDFPNL
+pylidgshkitqsnailrylarkhhldget
+EEERIRADIVENQVMDTRMQLIMLCYNPDF
+ekqkpeflktipekmklyseflgkrpwfag
+DKVTYVDFLAYDILDQYRMFEPKCLDAFPN
+LRDFLARFEGLKKISAYMKSSRYIATPIFSKMAHWSNK
diff --git a/seq/qrhuld.aa b/seq/qrhuld.aa
new file mode 100644
index 0000000..1ecbfd2
--- /dev/null
+++ b/seq/qrhuld.aa
@@ -0,0 +1,15 @@
+>QRHULD LDL receptor precursor - Human
+MGPWGWKLRWTVALLLAAAGTAVGDRCERNEFQCQDGKCISYKWVCDGSAECQDGSDESQETCLSVTCKS
+GDFSCGGRVNRCIPQFWRCDGQVDCDNGSDEQGCPPKTCSQDEFRCHDGKCISRQFVCDSDRDCLDGSDE
+ASCPVLTCGPASFQCNSSTCIPQLWACDNDPDCEDGSDEWPQRCRGLYVFQGDSSPCSAFEFHCLSGECI
+HSSWRCDGGPDCKDKSDEENCAVATCRPDEFQCSDGNCIHGSRQCDREYDCKDMSDEVGCVNVTLCEGPN
+KFKCHSGECITLDKVCNMARDCRDWSDEPIKECGTNECLDNNGGCSHVCNDLKIGYECLCPDGFQLVAQR
+RCEDIDECQDPDTCSQLCVNLEGGYKCQCEEGFQLDPHTKACKAVGSIAYLFFTNRHEVRKMTLDRSEYT
+SLIPNLRNVVA
+LDTEVASNRIYWSDLSQRMICSTQLDRAHGVSSYDTVISRDIQAPDGLAVDWIHSNIYWTDSVLGTVSVA
+DTKGVKRKTLFRENGSKPRAIVVDPVHGFMYWTDWGTPAKIKKGGLNGVDIYSLVTENIQWPNGITLDLL
+SGRLYWVDSKLHSISSIDVNGGNRKTILEDEKRLAHPFSLAVFEDKVFWTDIINEAIFSANRLTGSDVNL
+LAENLLSPEDMVLFHNLTQPRGVNWCERTTLSNGGCQYLCLPAPQINPHSPKFTCACPDGMLLARDMRSC
+LTEAEAAVATQETSTVRLKVSSTAVRTQHTTTRPVPDTSRLPGATPGLTTVEIVTMSHQALGDVAGRGNE
+KKPSSVRALSIVLPIVLLVFLCLGVFLLWKNWRLKNINSINFDNPVYQKTTEDEVHICHNQDGYSYPSRQ
+MVSLEDDVA 
diff --git a/seq/titin_hum.aa b/seq/titin_hum.aa
new file mode 100644
index 0000000..8e2b064
--- /dev/null
+++ b/seq/titin_hum.aa
@@ -0,0 +1,431 @@
+>gi|108861911|sp|Q8WZ42|TITIN_HUMAN Titin (Connectin) (Rhabdomyosarcoma antigen MU-RMS-40.14)
+MTTQAPTFTQPLQSVVVLEGSTATFEAHISGFPVPEVSWFRDGQVISTSTLPGVQISFSDGRAKLTIPAVTKANSGRYSL
+KATNGSGQATSTAELLVKAETAPPNFVQRLQSMTVRQGSQVRLQVRVTGIPTPVVKFYRDGAEIQSSLDFQISQEGDLYS
+LLIAEAYPEDSGTYSVNATNSVGRATSTAELLVQGEEEVPAKKTKTIVSTAQISESRQTRIEKKIEAHFDARSIATVEMV
+IDGAAGQQLPHKTPHRIPPKPKSRSPTPPSIAAKAQLARQQSPSPIRHSPSPVRHVRAPTPSPVRSVSPAARISTSPIRS
+VRSPLLMRKTQASTVATGPEVPPPWKQEGYVASSSEAEMRETTLTTSTQIRTEERWEGRYGVQEQVTISGAAGAAASVSA
+SASYAAEAVATGAKEVKQDADKSAAVATVVAAVDMARVREPVISAVEQTAQRTTTTAVHIQPAQEQVRKEAEKTAVTKVV
+VAADKAKEQELKSRTKEVITTKQEQMHVTHEQIRKETEKTFVPKVVISAAKAKEQETRISEEITKKQKQVTQEAIRQETE
+ITAASMVVVATAKSTKLETVPGAQEETTTQQDQMHLSYEKIMKETRKTVVPKVIVATPKVKEQDLVSRGREGITTKREQV
+QITQEKMRKEAEKTALSTIAVATAKAKEQETILRTRETMATRQEQIQVTHGKVDVGKKAEAVATVVAAVDQARVREPREP
+GHLEESYAQQTTLEYGYKERISAAKVAEPPQRPASEPHVVPKAVKPRVIQAPSETHIKTTDQKGMHISSQIKKTTDLTTE
+RLVHVDKRPRTASPHFTVSKISVPKTEHGYEASIAGSAIATLQKELSATSSAQKITKSVKAPTVKPSETRVRAEPTPLPQ
+FPFADTPDTYKSEAGVEVKKEVGVSITGTTVREERFEVLHGREAKVTETARVPAPVEIPVTPPTLVSGLKNVTVIEGESV
+TLECHISGYPSPTVTWYREDYQIESSIDFQITFQSGIARLMIREAFAEDSGRFTCSAVNEAGTVSTSCYLAVQVSEEFEK
+ETTAVTEKFTTEEKRFVESRDVVMTDTSLTEEQAGPGEPAAPYFITKPVVQKLVEGGSVVFGCQVGGNPKPHVYWKKSGV
+PLTTGYRYKVSYNKQTGECKLVISMTFADDAGEYTIVVRNKHGETSASASLLEEADYELLMKSQQEMLYQTQVTAFVQEP
+KVGETAPGFVYSEYEKEYEKEQALIRKKMAKDTVVVRTYVEDQEFHISSFEERLIKEIEYRIIKTTLEELLEEDGEEKMA
+VDISESEAVESGFDLRIKNYRILEGMGVTFHCKMSGYPLPKIAWYKDGKRIKHGERYQMDFLQDGRASLRIPVVLPEDEG
+IYTAFASNIKGNAICSGKLYVEPAAPLGAPTYIPTLEPVSRIRSLSPRSVSRSPIRMSPARMSPARMSPARMSPARMSPG
+RRLEETDESQLERLYKPVFVLKPVSFKCLEGQTARFDLKVVGRPMPETFWFHDGQQIVNDYTHKVVIKEDGTQSLIIVPA
+TPSDSGEWTVVAQNRAGRSSISVILTVEAVEHQVKPMFVEKLKNVNIKEGSQLEMKVRATGNPNPDIVWLKNSDIIVPHK
+YPKIRIEGTKGEAALKIDSTVSQDSAWYTATAINKAGRDTTRCKVNVEVEFAEPEPERKLIIPRGTYRAKEIAAPELEPL
+HLRYGQEQWEEGDLYDKEKQQKPFFKKKLTSLRLKRFGPAHFECRLTPIGDPTMVVEWLHDGKPLEAANRLRMINEFGYC
+SLDYGVAYSRDSGIITCRATNKYGTDHTSATLIVKDEKSLVEESQLPEGRKGLQRIEELERMAHEGALTGVTTDQKEKQK
+PDIVLYPEPVRVLEGETARFRCRVTGYPQPKVNWYLNGQLIRKSKRFRVRYDGIHYLDIVDCKSYDTGEVKVTAENPEGV
+IEHKVKLEIQQREDFRSVLRRAPEPRPEFHVHEPGKLQFEVQKVDRPVDTTETKEVVKLKRAERITHEKVPEESEELRSK
+FKRRTEEGYYEAITAVELKSRKKDESYEELLRKTKDELLHWTKELTEEEKKALAEEGKITIPTFKPDKIELSPSMEAPKI
+FERIQSQTVGQGSDAHFRVRVVGKPDPECEWYKNGVKIERSDRIYWYWPEDNVCELVIRDVTAEDSASIMVKAINIAGET
+SSHAFLLVQAKQLITFTQELQDVVAKEKDTMATFECETSEPFVKVKWYKDGMEVHEGDKYRMHSDRKVHFLSILTIDTSD
+AEDYSCVLVEDENVKTTAKLIVEGAVVEFVKELQDIEVPESYSGELECIVSPENIEGKWYHNDVELKSNGKYTITSRRGR
+QNLTVKDVTKEDQGEYSFVIDGKKTTCKLKMKPRPIAILQGLSDQKVCEGDIVQLEVKVSLESVEGVWMKDGQEVQPSDR
+VHIVIDKQSHMLLIEDMTKEDAGNYSFTIPALGLSTSGRVSVYSVDVITPLKDVNVIEGTKAVLECKVSVPDVTSVKWYL
+NDEQIKPDDRVQAIVKGTKQRLVINRTHASDEGPYKLIVGRVETNCNLSVEKIKIIRGLRDLTCTETQNVVFEVELSHSG
+IDVLWNFKDKEIKPSSKYKIEAHGKIYKLTVLNMMKDDEGKYTFYAGENITSGKLTVAGGAISKPLTDQTVAESQEAVFE
+CEVANPDSKGEWLRDGKHLPLTNNIRSESDGHKRRLIIAATKLDDIGEYTYKVATSKTSAKLKVEAVKIKKTLKNLTVTE
+TQDAVFTVELTHPNVKGVQWIKNGVVLESNEKYAISVKGTIYSLRIKNCAIVDESVYGFRLGRLGASARLHVETVKIIKK
+PKDVTALENATVAFEVSVSHDTVPVKWFHKNVEIKPSDKHRLVSERKVHKLMLQNISPSDAGEYTAVVGQLECKAKLFVE
+TLHITKTMKNIEVPETKTASFECEVSHFNVPSMWLKNGVEIEMSEKFKIVVQGKLHQLIIMNTSTEDSAEYTFVCGNDQV
+SATLTVTPIMITSMLKDINAEEKDTITFEVTVNYEGISYKWLKNGVEIKSTDKCQMRTKKLTHSLNIRNVHFGDAADYTF
+VAGKATSTATLYVEARHIEFRKHIKDIKVLEKKRAMFECEVSEPDITVQWMKDDQELQITDRIKIQKEKYVHRLLIPSTR
+MSDAGKYTVVAGGNVSTAKLFVEGRDVRIRSIKKEVQVIEKQRAVVEFEVNEDDVDAHWYKDGIEINFQVQERHKYVVER
+RIHRMFISETRQSDAGEYTFVAGRNRSSVTLYVNAPEPPQVLQELQPVTVQSGKPARFCAVISGRPQPKISWYKEEQLLS
+TGFKCKFLHDGQEYTLLLIEAFPEDAAVYTCEAKNDYGVATTSASLSVEVPEVVSPDQEMPVYPPAIITPLQDTVTSEGQ
+PARFQCRVSGTDLKVSWYSKDKKIKPSRFFRMTQFEDTYQLEIAEAYPEDEGTYTFVASNAVGQVSSTANLSLEAPESIL
+HERIEQEIEMEMKEFSSSFLSAEEEGLHSAELQLSKINETLELLSESPVYSTKFDSEKEGTGPIFIKEVSNADISMGDVA
+TLSVTVIGIPKPKIQWFFNGVLLTPSADYKFVFDGDDHSLIILFTKLEDEGEYTCMASNDYGKTICSAYLKINSKGEGHK
+DTETESAVAKSLEKLGGPCPPHFLKELKPIRCAQGLPAIFEYTVVGEPAPTVTWFKENKQLCTSVYYTIIHNPNGSGTFI
+VNDPQREDSGLYICKAENMLGESTCAAELLVLLEDTDMTDTPCKAKSTPEAPEDFPQTPLKGPAVEALDSEQEIATFVKD
+TILKAALITEENQQLSYEHIAKANELSSQLPLGAQELQSILEQDKLTPESTREFLCINGSIHFQPLKEPSPNLQLQIVQS
+QKTFSKEGILMPEEPETQAVLSDTEKIFPSAMSIEQINSLTVEPLKTLLAEPEGNYPQSSIEPPMHSYLTSVAEEVLSPK
+EKTVSDTNREQRVTLQKQEAQSALILSQSLAEGHVESLQSPDVMISQVNYEPLVPSEHSCTEGGKILIESANPLENAGQD
+SAVRIEEGKSLRFPLALEEKQVLLKEEHSDNVVMPPDQIIESKREPVAIKKVQEVQGRDLLSKESLLSGIPEEQRLNLKI
+QICRALQAAVASEQPGLFSEWLRNIEKVEVEAVNITQEPRHIMCMYLVTSAKSVTEEVTIIIEDVDPQMANLKMELRDAL
+CAIIYEEIDILTAEGPRIQQGAKTSLQEEMDSFSGSQKVEPITEPEVESKYLISTEEVSYFNVQSRVKYLDATPVTKGVA
+SAVVSDEKQDESLKPSEEKEESSSESGTEEVATVKIQEAEGGLIKEDGPMIHTPLVDTVSEEGDIVHLTTSITNAKEVNW
+YFENKLVPSDEKFKCLQDQNTYTLVIDKVNTEDHQGEYVCEALNDSGKTATSAKLTVVKRAAPVIKRKIEPLEVALGHLA
+KFTCEIQSAPNVRFQWFKAGREIYESDKCSIRSSKYISSLEILRTQVVDCGEYTCKASNEYGSVSCTATLTVTEAYPPTF
+LSRPKSLTTFVGKAAKFICTVTGTPVIETIWQKDGAALSPSPNWKISDAENKHILELSNLTIQDRGVYSCKASNKFGADI
+CQAELIIIDKPHFIKELEPVQSAINKKVHLECQVDEDRKVTVTWSKDGQKLPPGKDYKICFEDKIATLEIPLAKLKDSGT
+YVCTASNEAGSSSCSATVTVREPPSFVKKVDPSYLMLPGESARLHCKLKGSPVIQVTWFKNNKELSESNTVRMYFVNSEA
+ILDITDVKVEDSGSYSCEAVNDVGSDSCSTEIVIKEPPSFIKTLEPADIVRGTNALLQCEVSGTGPFEISWFKDKKQIRS
+SKKYRLFSQKSLVCLEIFSFNSADVGEYECVVANEVGKCGCMATHLLKEPPTFVKKVDDLIALGGQTVTLQAAVRGSEPI
+SVTWMKGQEVIREDGKIKMSFSNGVAVLIIPDVQISFGGKYTCLAENEAGSQTSVGELIVKEPAKIIERAELIQVTAGDP
+ATLEYTVAGTPELKPKWYKDGRPLVASKKYRISFKNNVAQLKFYSAELHDSGQYTFEISNEVGSSSCETTFTVLDRDIAP
+FFTKPLRNVDSVVNGTCRLDCKIAGSLPMRVSWFKDGKEIAASDRYRIAFVEGTASLEIIRVDMNDAGNFTCRATNSVGS
+KDSSGALIVQEPPSFVTKPGSKDVLPGSAVCLKSTFQGSTPLTIRWFKGNKELVSGGSCYITKEALESSLELYLVKTSDS
+GTYTCKVSNVAGGVECSANLFVKEPATFVEKLEPSQLLKKGDATQLACKVTGTPPIKITWFANDREIKESSKHRMSFVES
+TAVLRLTDVGIEDSGEYMCEAQNEAGSDHCSSIVIVKESPYFTKEFKPIEVLKEYDVMLLAEVAGTPPFEITWFKDNTIL
+RSGRKYKTFIQDHLVSLQILKFVAADAGEYQCRVTNEVGSSICSARVTLREPPSFIKKIESTSSLRGGTAAFQATLKGSL
+PITVTWLKDSDEITEDDNIRMTFENNVASLYLSGIEVKHDGKYVCQAKNDAGIQRCSALLSVKEPATITEEAVSIDVTQG
+DPATLQVKFSGTKEITAKWFKDGQELTLGSKYKISVTDTVSILKIISTEKKDSGEYTFEVQNDVGRSSCKARINVLDLII
+PPSFTKKLKKMDSIKGSFIDLECIVAGSHPISIQWFKDDQEISASEKYKFSFHDNTAFLEISQLEGTDSGTYTCSATNKA
+GHNQCSGHLTVKEPPYFVEKPQSQDVNPNTRVQLKALVGGTAPMTIKWFKDNKELHSGAARSVWKDDTSTSLELFAAKAT
+DSGTYICQLSNDVGTATSKATLFVKEPPQFIKKPSPVLVLRNGQSTTFECQITGTPKIRVSWYLDGNEITAIQKHGISFI
+DGLATFQISGARVENSGTYVCEARNDAGTASCSIELKVKEPPTFIRELKPVEVVKYSDVELECEVTGTPPFEVTWLKNNR
+EIRSSKKYTLTDRVSVFNLHITKCDPSDTGEYQCIVSNEGGSCSCSTRVALKEPPSFIKKIENTTTVLKSSATFQSTVAG
+SPPISITWLKDDQILDEDDNVYISFVDSVATLQIRSVDNGHSGRYTCQAKNESGVERCYAFLLVQEPAQIVEKAKSVDVT
+EKDPMTLECVVAGTPELKVKWLKDGKQIVPSRYFSMSFENNVASFRIQSVMKQDSGQYTFKVENDFGSSSCDAYLRVLDQ
+NIPPSFTKKLTKMDKVLGSSIHMECKVSGSLPISAQWFKDGKEISTSAKYRLVCHERSVSLEVNNLELEDTANYTCKVSN
+VAGDDACSGILTVKEPPSFLVKPGRQQAIPDSTVEFKAILKGTPPFKIKWFKDDVELVSGPKCFIGLEGSTSFLNLYSVD
+ASKTGQYTCHVTNDVGSDSCTTMLLVTEPPKFVKKLEASKIVKAGDSSRLECKIAGSPEIRVVWFRNEHELPASDKYRMT
+FIDSVAVIQMNNLSTEDSGDFICEAQNPAGSTSCSTKVIVKEPPVFSSFPPIVETLKNAEVSLECELSGTPPFEVVWYKD
+KRQLRSSKKYKIASKNFHTSIHILNVDTSDIGEYHCKAQNEVGSDTCVCTVKLKEPPRFVSKLNSLTVVAGEPAELQASI
+EGAQPIFVQWLKEKEEVIRESENIRITFVENVATLQFAKAEPANAGKYICQIKNDGGMEENMATLMVLEPAVIVEKAGPM
+TVTVGETCTLECKVAGTPELSVEWYKDGKLLTSSQKHKFSFYNKISSLRILSVERQDAGTYTFQVQNNVGKSSCTAVVDV
+SDRAVPPSFTRRLKNTGGVLGASCILECKVAGSSPISVAWFHEKTKIVSGAKYQTTFSDNVCTLQLNSLDSSDMGNYTCV
+AANVAGSDECRAVLTVQEPPSFVKEPEPLEVLPGKNVTFTSVIRGTPPFKVNWFRGARELVKGDRCNIYFEDTVAELELF
+NIDISQSGEYTCVVSNNAGQASCTTRLFVKEPAAFLKRLSDHSVEPGKSIILESTYTGTLPISVTWKKDGFNITTSEKCN
+IVTTEKTCILEILNSTKRDAGQYSCEIENEAGRDVCGALVSTLEPPYFVTELEPLEAAVGDSVSLQCQVAGTPEITVSWY
+KGDTKLRPTPEYRTYFTNNVATLVFNKVNINDSGEYTCKAENSIGTASSKTVFRIQERQLPPSFARQLKDIEQTVGLPVT
+LTCRLNGSAPIQVCWYRDGVLLRDDENLQTSFVDNVATLKILQTDLSHSGQYSCSASNPLGTASSSARLTAREPKKSPFF
+DIKPVSIDVIAGESADFECHVTGAQPMRITWSKDNKEIRPGGNYTITCVGNTPHLRILKVGKGDSGQYTCQATNDVGKDM
+CSAQLSVKEPPKFVKKLEASKVAKQGESIQLECKISGSPEIKVSWFRNDSELHESWKYNMSFINSVALLTINEASAEDSG
+DYICEAHNGVGDASCSTALTVKAPPVFTQKPSPVGALKGSDVILQCEISGTPPFEVVWVKDRKQVRNSKKFKITSKHFDT
+SLHILNLEASDVGEYHCKATNEVGSDTCSCSVKFKEPPRFVKKLSDTSTLIGDAVELRAIVEGFQPISVVWLKDRGEVIR
+ESENTRISFIDNIATLQLGSPEASNSGKYICQIKNDAGMRECSAVLTVLEPARIIEKPEPMTVTTGNPFALECVVTGTPE
+LSAKWFKDGRELSADSKHHITFINKVASLKIPCAEMSDKGLYSFEVKNSVGKSNCTVSVHVSDRIVPPSFIRKLKDVNAI
+LGASVVLECRVSGSAPISVGWFQDGNEIVSGPKCQSSFSENVCTLNLSLLEPSDTGIYTCVAANVAGSDECSAVLTVQEP
+PSFEQTPDSVEVLPGMSLTFTSVIRGTPPFKVKWFKGSRELVPGESCNISLEDFVTELELFEVQPLESGDYSCLVTNDAG
+SASCTTHLFVKEPATFVKRLADFSVETGSPIVLEATYTGTPPISVSWIKDEYLISQSERCSITMTEKSTILEILESTIED
+YAQYSCLIENEAGQDICEALVSVLEPPYFIEPLEHVEAVIGEPATLQCKVDGTPEIRISWYKEHTKLRSAPAYKMQFKNN
+VASLVINKVDHSDVGEYSCKADNSVGAVASSAVLVIKERKLPPFFARKLKDVHETLGFPVAFECRINGSEPLQVSWYKDG
+VLLKDDANLQTSFVHNVATLQILQTDQSHIGQYNCSASNPLGTASSSAKLILSEHEVPPFFDLKPVSVDLALGESGTFKC
+HVTGTAPIKITWAKDNREIRPGGNYKMTLVENTATLTVLKVGKGDAGQYTCYASNIAGKDSCSAHLGVQEPPRFIKKLEP
+SRIVKQDEFTRYECKIGGSPEIKVLWYKDETEIQESSKFRMSFVDSVAVLEMHNLSVEDSGDYTCEAHNAAGSASSSTSL
+KVKEPPIFRKKPHPIETLKGADVHLECELQGTPPFHVSWYKDKRELRSGKKYKIMSENFLTSIHILNVDAADIGEYQCKA
+TNDVGSDTCVGSIALKAPPRFVKKLSDISTVVGKEVQLQTTIEGAEPISVVWFKDKGEIVRESDNIWISYSENIATLQFS
+RVEPANAGKYTCQIKNDAGMQECFATLSVLEPATIVEKPESIKVTTGDTCTLECTVAGTPELSTKWFKDGKELTSDNKYK
+ISFFNKVSGLKIINVAPSDSGVYSFEVQNPVGKDSCTASLQVSDRTVPPSFTRKLKETNGLSGSSVVMECKVYGSPPISV
+SWFHEGNEISSGRKYQTTLTDNTCALTVNMLEESDSGDYTCIATNMAGSDECSAPLTVREPPSFVQKPDPMDVLTGTNVT
+FTSIVKGTPPFSVSWFKGSSELVPGDRCNVSLEDSVAELELFDVDTSQSGEYTCIVSNEAGKASCTTHLYIKAPAKFVKR
+LNDYSIEKGKPLILEGTFTGTPPISVTWKKNGINVTPSQRCNITTTEKSAILEIPSSTVEDAGQYNCYIENASGKDSCSA
+QILILEPPYFVKQLEPVKVSVGDSASLQCQLAGTPEIGVSWYKGDTKLRPTTTYKMHFRNNVATLVFNQVDINDSGEYIC
+KAENSVGEVSASTFLTVQEQKLPPSFSRQLRDVQETVGLPVVFDCAISGSEPISVSWYKDGKPLKDSPNVQTSFLDNTAT
+LNIFKTDRSLAGQYSCTATNPIGSASSSARLILTEGKNPPFFDIRLAPVDAVVGESADFECHVTGTQPIKVSWAKDSREI
+RSGGKYQISYLENSAHLTVLKVDKGDSGQYTCYAVNEVGKDSCTAQLNIKERLIPPSFTKRLSETVEETEGNSFKLEGRV
+AGSQPITVAWYKNNIEIQPTSNCEITFKNNTLVLQVRKAGMNDAGLYTCKVSNDAGSALCTSSIVIKEPKKPPVFDQHLT
+PVTVSEGEYVQLSCHVQGSEPIRIQWLKAGREIKPSDRCSFSFASGTAVLELRDVAKADSGDYVCKASNVAGSDTTKSKV
+TIKDKPAVAPATKKAAVDGRLFFVSEPQSIRVVEKTTATFIAKVGGDPIPNVKWTKGKWRQLNQGGRVFIHQKGDEAKLE
+IRDTTKTDSGLYRCVAFNEHGEIESNVNLQVDERKKQEKIEGDLRAMLKKTPILKKGAGEEEEIDIMELLKNVDPKEYEK
+YARMYGITDFRGLLQAFELLKQSQEEETHRLEIEEIERSERDEKEFEELVSFIQQRLSQTEPVTLIKDIENQTVLKDNDA
+VFEIDIKINYPEIKLSWYKGTEKLEPSDKFEISIDGDRHTLRVKNCQLKDQGNYRLVCGPHIASAKLTVIEPAWERHLQD
+VTLKEGQTCTMTCQFSVPNVKSEWFRNGRILKPQGRHKTEVEHKVHKLTIADVRAEDQGQYTCKYEDLETSAELRIEAEP
+IQFTKRIQNIVVSEHQSATFECEVSFDDAIVTWYKGPTELTESQKYNFRNDGRCHYMTIHNVTPDDEGVYSVIARLEPRG
+EARSTAELYLTTKEIKLELKPPDIPDSRVPIPTMPIRAVPPEEIPPVVAPPIPLLLPTPEEKKPPPKRIEVTKKAVKKDA
+KKVVAKPKEMTPREEIVKKPPPPTTLIPAKAPEIIDVSSKAEEVKIMTITRKKEVQKEKEAVYEKKQAVHKEKRVFIESF
+EEPYDELEVEPYTEPFEQPYYEEPDEDYEEIKVEAKKEVHEEWEEDFEEGQEYYEREEGYDEGEEEWEEAYQEREVIQVQ
+KEVYEESHERKVPAKVPEKKAPPPPKVIKKPVIEKIEKTSRRMEEEKVQVTKVPEVSKKIVPQKPSRTPVQEEVIEVKVP
+AVHTKKMVISEEKMFFASHTEEEVSVTVPEVQKEIVTEEKIHVAVSKRVEPPPKVPELPEKPAPEEVAPVPIPKKVEPPA
+PKVPEVPKKPVPEEKKPVPVPKKEPAAPPKVPEVPKKPVPEEKIPVPVAKKKEAPPAKVPEVQKRVVTEEKITIVTQREE
+SPPPAVPEIPKKKVPEERKPVPRKEEEVPPPPKVPALPKKPVPEEKVAVPVPVAKKAPPPRAEVSKKTVVEEKRFVAEEK
+LSFAVPQRVEVTRHEVSAEEEWSYSEEEEGVSISVYREEEREEEEEAEVTEYEVMEEPEEYVVEEKLHIISKRVEAEPAE
+VTERQEKKIVLKPKIPAKIEEPPPAKVPEAPKKIVPEKKVPAPVPKKEKVPPPKVPEEPKKPVPEKKVPPKVIKMEEPLP
+AKVTERHMQITQEEKVLVAVTKKEAPPKARVPEEPKRAVPEEKVLKLKPKREEEPPAKVTEFRKRVVKEEKVSIEAPKRE
+PQPIKEVTIMEEKERAYTLEEEAVSVQREEEYEEYEEYDYKEFEEYEPTEEYDQYEEYEEREYERYEEHEEYITEPEKPI
+PVKPVPEEPVPTKPKAPPAKVLKKAVPEEKVPVPIPKKLKPPPPKVPEEPKKVFEEKIRISITKREKEQVTEPAAKVPMK
+PKRVVAEEKVPVPRKEVAPPVRVPEVPKELEPEEVAFEEEVVTHVEEYLVEEEEEYIHEEEEFITEEEVVPVIPVKVPEV
+PRKPVPEEKKPVPVPKKKEAPPAKVPEVPKKPEEKVPVLIPKKEKPPPAKVPEVPKKPVPEEKVPVPVPKKVEAPPAKVP
+EVPKKPVPEKKVPVPAPKKVEAPPAKVPEVPKKLIPEEKKPTPVPKKVEAPPPKVPKKREPVPVPVALPQEEEVLFEEEI
+VPEEEVLPEEEEVLPEEEEVLPEEEEVLPEEEEIPPEEEEVPPEEEYVPEEEEFVPEEEVLPEVKPKVPVPAPVPEIKKK
+VTEKKVVIPKKEEAPPAKVPEVPKKVEEKRIILPKEEEVLPVEVTEEPEEEPISEEEIPEEPPSIEEVEEVAPPRVPEVI
+KKAVPEAPTPVPKKVEAPPAKVSKKIPEEKVPVPVQKKEAPPAKVPEVPKKVPEKKVLVPKKEAVPPAKGRTVLEEKVSV
+AFRQEVVVKERLELEVVEAEVEEIPEEEEFHEVEEYFEEGEFHEVEEFIKLEQHRVEEEHRVEKVHRVIEVFEAEEVEVF
+EKPKAPPKGPEISEKIIPPKKPPTKVVPRKEPPAKVPEVPKKIVVEEKVRVPEEPRVPPTKVPDVLPPKEVVPEKKVPVP
+PAKKPEAPPPKVPEAPKEVVPEKKVPVPPPKKPEVPPTKVPEVPKAAVPEKKVPEAIPPKPESPPPEVPEAPKEVVPEKK
+VPAAPPKKPEVTPVKVPEAPKEVVPEKKVPVPPPKKPEVPPTKVPEVPKVAVPEKKVPEAIPPKPESPPPEVFEEPEEVA
+LEEPPAEVVEEPEPAAPPQVTVPPKKPVPEKKAPAVVAKKPELPPVKVPEVPKEVVPEKKVPLVVPKKPEAPPAKVPEVP
+KEVVPEKKVAVPKKPEVPPAKVPEVPKKPVLEEKPAVPVPERAESPPPEVYEEPEEIAPEEEIAPEEEKPVPVAEEEEPE
+VPPPAVPEEPKKIIPEKKVPVIKKPEAPPPKEPEPEKVIEKPKLKPRPPPPPPAPPKEDVKEKIFQLKAIPKKKVPEKPQ
+VPEKVELTPLKVPGGEKKVRKLLPERKPEPKEEVVLKSVLRKRPEEEEPKVEPKKLEKVKKPAVPEPPPPKPVEEVEVPT
+VTKRERKIPEPTKVPEIKPAIPLPAPEPKPKPEAEVKTIKPPPVEPEPTPIAAPVTVPVVGKKAEAKAPKEEAAKPKGPI
+KGVPKKTPSPIEAERRKLRPGSGGEKPPDEAPFTYQLKAVPLKFVKEIKDIILTESEFVGSSAIFECLVSPSTAITTWMK
+DGSNIRESPKHRFIADGKDRKLHIIDVQLSDAGEYTCVLRLGNKEKTSTAKLVVEELPVRFVKTLEEEVTVVKGQPLYLS
+CELNKERDVVWRKDGKIVVEKPGRIVPGVIGLMRALTINDADDTDAGTYTVTVENANNLECSSCVKVVEVIRDWLVKPIR
+DQHVKPKGTAIFACDIAKDTPNIKWFKGYDEIPAEPNDKTEILRDGNHLYLKIKNAMPEDIAEYAVEIEGKRYPAKLTLG
+EREVELLKPIEDVTIYEKESASFDAEISEADIPGQWKLKGELLRPSPTCEIKAEGGKRFLTLRKVKLDQAGEVLYQALNA
+ITTAILTVKEIELDFAVPLKDVTVPERRQARFECVLTREANVIWSKGPDIIKSSDKFDIIADGKKHILVINDSQFDDEGV
+YTAEVEGKKTSARLFVTGIRLKFMSPLEDQTVKEGETATFVCELSHEKMHVVWFKNDAKLHTSRTVLISSEGKTHKLEMK
+EVTLDDISQIKAQVKELSSTAQLKVLEADPYFTVKLHDKTAVEKDEITLKCEVSKDVPVKWFKDGEEIVPSPKYSIKADG
+LRRILKIKKADLKDKGEYVCDCGTDKTKANVTVEARLIKVEKPLYGVEVFVGETAHFEIELSEPDVHGQWKLKGQPLTAS
+PDCEIIEDGKKHILILHNCQLGMTGEVSFQAANAKSAANLKVKELPLIFITPLSDVKVFEKDEAKFECEVSREPKTFRWL
+KGTQEITGDDRFELIKDGTKHSMVIKSAAFEDEAKYMFEAEDKHTSGKLIIEGIRLKFLTPLKDVTAKEKESAVFTVELS
+HDNIRVKWFKNDQRLHTTRSVSMQDEGKTHSITFKDLSIDDTSQIRVEAMGMSSEAKLTVLEGDPYFTGKLQDYTGVEKD
+EVILQCEISKADAPVKWFKDGKEIKPSKNAVIKADGKKRMLILKKALKSDIGQYTCDCGTDKTSGKLDIEDREIKLVRPL
+HSVEVMETETARFETEISEDDIHANWKLKGEALLQTPDCEIKEEGKIHSLVLHNCRLDQTGGVDFQAANVKSSAHLRVKP
+RVIGLLRPLKDVTVTAGETATFDCELSYEDIPVEWYLKGKKLEPSDKVVPRSEGKVHTLTLRDVKLEDAGEVQLTAKDFK
+THANLFVKEPPVEFTKPLEDQTVEEGATAVLECEVSRENAKVKWFKNGTEILKSKKYEIVADGRVRKLVIHDCTPEDIKT
+YTCDAKDFKTSCNLNVVPPHVEFLRPLTDLQVREKEMARFECELSRENAKVKWFKDGAEIKKGKKYDIISKGAVRILVIN
+KCLLDDEAEYSCEVRTARTSGMLTVLEEEAVFTKNLANIEVSETDTIKLVCEVSKPGAEVIWYKGDEEIIETGRYEILTE
+GRKRILVIQNAHLEDAGNYNCRLPSSRTDGKVKVHELAAEFISKPQNLEILEGEKAEFVCSISKESFPVQWKRDDKTLES
+GDKYDVIADGKKRVLVVKDATLQDMGTYVVMVGAARAAAHLTVIEKLRIVVPLKDTRVKEQQEVVFNCEVNTEGAKAKWF
+RNEEAIFDSSKYIILQKDLVYTLRIRDAHLDDQANYNVSLTNHRGENVKSAANLIVEEEDLRIVEPLKDIETMEKKSVTF
+WCKVNRLNVTLKWTKNGEEVPFDNRVSYRVDKYKHMLTIKDCGFPDEGEYIVTAGQDKSVAELLIIEAPTEFVEHLEDQT
+VTEFDDAVFSCQLSREKANVKWYRNGREIKEGKKYKFEKDGSIHRLIIKDCRLDDECEYACGVEDRKSRARLFVEEIPVE
+IIRPPQDILEAPGADVVFLAELNKDKVEVQWLRNNMVVVQGDKHQMMSEGKIHRLQICDIKPRDQGEYRFIAKDKEARAK
+LELAAAPKIKTADQDLVVDVGKPLTMVVPYDAYPKAEAEWFKENEPLSTKTIDTTAEQTSFRILEAKKGDKGRYKIVLQN
+KHGKAEGFINLKVIDVPGPVRNLEVTETFDGEVSLAWEEPLTDGGSKIIGYVVERRDIKRKTWVLATDRAESCEFTVTGL
+QKGGVEYLFRVSARNRVGTGEPVETDNPVEARSKYDVPGPPLNVTITDVNRFGVSLTWEPPEYDGGAEITNYVIELRDKT
+SIRWDTAMTVRAEDLSATVTDVVEGQEYSFRVRAQNRIGVGKPSAATPFVKVADPIERPSPPVNLTSSDQTQSSVQLKWE
+PPLKDGGSPILGYIIERCEEGKDNWIRCNMKLVPELTYKVTGLEKGNKYLYRVSAENKAGVSDPSEILGPLTADDAFVEP
+TMDLSAFKDGLEVIVPNPITILVPSTGYPRPTATWCFGDKVLETGDRVKMKTLSAYAELVISPSERSDKGIYTLKLENRV
+KTISGEIDVNVIARPSAPKELKFGDITKDSVHLTWEPPDDDGGSPLTGYVVEKREVSRKTWTKVMDFVTDLEFTVPDLVQ
+GKEYLFKVCARNKCGPGEPAYVDEPVNMSTPATVPDPPENVKWRDRTANSIFLTWDPPKNDGGSRIKGYIVERCPRGSDK
+WVACGEPVAETKMEVTGLEEGKWYAYRVKALNRQGASKPSRPTEEIQAVDTQEAPEIFLDVKLLAGLTVKAGTKIELPAT
+VTGKPEPKITWTKADMILKQDKRITIENVPKKSTVTIVDSKRSDTGTYIIEAVNVCGRATAVVEVNVLDKPGPPAAFDIT
+DVTNESCLLTWNPPRDDGGSKITNYVVERRATDSEVWHKLSSTVKDTNFKATKLIPNKEYIFRVAAENMYGVGEPVQASP
+ITAKYQFDPPGPPTRLEPSDITKDAVTLTWCEPDDDGGSPITGYWVERLDPDTDKWVRCNKMPVKDTTYRVKGLTNKKKY
+RFRVLAENLAGPGKPSKSTEPILIKDPIDPPWPPGKPTVKDVGKTSVRLNWTKPEHDGGAKIESYVIEMLKTGTDEWVRV
+AEGVPTTQHLLPGLMEGQEYSFRVRAVNKAGESEPSEPSDPVLCREKLYPPSPPRWLEVINITKNTADLKWTVPEKDGGS
+PITNYIVEKRDVRRKGWQTVDTTVKDTKCTVTPLTEGSLYVFRVAAENAIGQSDYTEIEDSVLAKDTFTTPGPPYALAVV
+DVTKRHVDLKWEPPKNDGGRPIQRYVIEKKERLGTRWVKAGKTAGPDCNFRVTDVIEGTEVQFQVRAENEAGVGHPSEPT
+EILSIEDPTSPPSPPLDLHVTDAGRKHIAIAWKPPEKNGGSPIIGYHVEMCPVGTEKWMRVNSRPIKDLKFKVEEGVVPD
+KEYVLRVRAVNAIGVSEPSEISENVVAKDPDCKPTIDLETHDIIVIEGEKLSIPVPFRAVPVPTVSWHKDGKEVKASDRL
+TMKNDHISAHLEVPKSVRADAGIYTITLENKLGSATASINVKVIGLPGPCKDIKASDITKSSCKLTWEPPEFDGGTPILH
+YVLERREAGRRTYIPVMSGENKLSWTVKDLIPNGEYFFRVKAVNKVGGGEYIELKNPVIAQDPKQPPDPPVDVEVHNPTA
+EAMTITWKPPLYDGGSKIMGYIIEKIAKGEERWKRCNEHLVPILTYTAKGLEEGKEYQFRVRAENAAGISEPSRATPPTK
+AVDPIDAPKVILRTSLEVKRGDEIALDASISGSPYPTITWIKDENVIVPEEIKKRAAPLVRRRKGEVQEEEPFVLPLTQR
+LSIDNSKKGESQLRVRDSLRPDHGLYMIKVENDHGIAKAPCTVSVLDTPGPPINFVFEDIRKTSVLCKWEPPLDDGGSEI
+INYTLEKKDKTKPDSEWIVVTSTLRHCKYSVTKLIEGKEYLFRVRAENRFGPGPPCVSKPLVAKDPFGPPDAPDKPIVED
+VTSNSMLVKWNEPKDNGSPILGYWLEKREVNSTHWSRVNKSLLNALKANVDGLLEGLTYVFRVCAENAAGPGKFSPPSDP
+KTAHDPISPPGPPIPRVTDTSSTTIELEWEPPAFNGGGEIVGYFVDKQLVGTNEWSRCTEKMIKVRQYTVKEIREGADYK
+LRVSAVNAAGEGPPGETQPVTVAEPQEPPAVELDVSVKGGIQIMAGKTLRIPAVVTGRPVPTKVWTKEEGELDKDRVVID
+NVGTKSELIIKDALRKDHGRYVITATNSCGSKFAAARVEVFDVPGPVLDLKPVVTNRKMCLLNWSDPEDDGGSEITGFII
+ERKDAKMHTWRQPIETERSKCDITGLLEGQEYKFRVIAKNKFGCGPPVEIGPILAVDPLGPPTSPERLTYTERTKSTITL
+DWKEPRSNGGSPIQGYIIEKRRHDKPDFERVNKRLCPTTSFLVENLDEHQMYEFRVKAVNEIGESEPSLPLNVVIQDDEV
+PPTIKLRLSVRGDTIKVKAGEPVHIPADVTGLPMPKIEWSKNETVIEKPTDALQITKEEVSRSEAKTELSIPKAVREDKG
+TYTVTASNRLGSVFRNVHVEVYDRPSPPRNLAVTDIKAESCYLTWDAPLDNGGSEITHYVIDKRDASRKKAEWEEVTNTA
+VEKRYGIWKLIPNGQYEFRVRAVNKYGISDECKSDKVVIQDPYRLPGPPGKPKVLARTKGSMLVSWTPPLDNGGSPITGY
+WLEKREEGSPYWSRVSRAPITKVGLKGVEFNVPRLLEGVKYQFRAMAINAAGIGPPSEPSDPEVAGDPIFPPGPPSCPEV
+KDKTKSSISLGWKPPAKDGGSPIKGYIVEMQEEGTTDWKRVNEPDKLITTCECVVPNLKELRKYRFRVKAVNEAGESEPS
+DTTGEIPATDIQEEPEVFIDIGAQDCLVCKAGSQIRIPAVIKGRPTPKSSWEFDGKAKKAMKDGVHDIPEDAQLETAENS
+SVIIIPECKRSHTGKYSITAKNKAGQKTANCRVKVMDVPGPPKDLKVSDITRGSCRLSWKMPDDDGGDRIKGYVIEKRTI
+DGKAWTKVNPDCGSTTFVVPDLLSEQQYFFRVRAENRFGIGPPVETIQRTTARDPIYPPDPPIKLKIGLITKNTVHLSWK
+PPKNDGGSPVTHYIVECLAWDPTGTKKEAWRQCNKRDVEELQFTVEDLVEGGEYEFRVKAVNAAGVSKPSATVGPCDCQR
+PDMPPSIDLKEFMEVEEGTNVNIVAKIKGVPFPTLTWFKAPPKKPDNKEPVLYDTHVNKLVVDDTCTLVIPQSRRSDTGL
+YTITAVNNLGTASKEMRLNVLGRPGPPVGPIKFESVSADQMTLSWFPPKDDGGSKITNYVIEKREANRKTWVHVSSEPKE
+CTYTIPKLLEGHEYVFRIMAQNKYGIGEPLDSEPETARNLFSVPGAPDKPTVSSVTRNSMTVNWEEPEYDGGSPVTGYWL
+EMKDTTSKRWKRVNRDPIKAMTLGVSYKVTGLIEGSDYQFRVYAINAAGVGPASLPSDPATARDPIAPPGPPFPKVTDWT
+KSSADLEWSPPLKDGGSKVTGYIVEYKEEGKEEWEKGKDKEVRGTKLVVTGLKEGAFYKFRVSAVNIAGIGEPGEVTDVI
+EMKDRLVSPDLQLDASVRDRIVVHAGGVIRIIAYVSGKPPPTVTWNMNERTLPQEATIETTAISSSMVIKNCQRSHQGVY
+SLLAKNEAGERKKTIIVDVLDVPGPVGTPFLAHNLTNESCKLTWFSPEDDGGSPITNYVIEKRESDRRAWTPVTYTVTRQ
+NATVQGLIQGKAYFFRIAAENSIGMGPFVETSEALVIREPITVPERPEDLEVKEVTKNTVTLTWNPPKYDGGSEIINYVL
+ESRLIGTEKFHKVTNDNLLSRKYTVKGLKEGDTYEYRVSAVNIVGQGKPSFCTKPITCKDELAPPTLHLDFRDKLTIRVG
+EAFALTGRYSGKPKPKVSWFKDEADVLEDDRTHIKTTPATLALEKIKAKRSDSGKYCVVVENSTGSRKGFCQVNVVDRPG
+PPVGPVSFDEVTKDYMVISWKPPLDDGGSKITNYIIEKKEVGKDVWMPVTSASAKTTCKVSKLLEGKDYIFRIHAENLYG
+ISDPLVSDSMKAKDRFRVPDAPDQPIVTEVTKDSALVTWNKPHDGGKPITNYILEKRETMSKRWARVTKDPIHPYTKFRV
+PDLLEGCQYEFRVSAENEIGIGDPSPPSKPVFAKDPIAKPSPPVNPEAIDTTCNSVDLTWQPPRHDGGSKILGYIVEYQK
+VGDEEWRRANHTPESCPETKYKVTGLRDGQTYKFRVLAVNAAGESDPAHVPEPVLVKDRLEPPELILDANMAREQHIKVG
+DTLRLSAIIKGVPFPKVTWKKEDRDAPTKARIDVTPVGSKLEIRNAAHEDGGIYSLTVENPAGSKTVSVKVLVLDKPGPP
+RDLEVSEIRKDSCYLTWKEPLDDGGSVITNYVVERRDVASAQWSPLSATSKKKSHFAKHLNEGNQYLFRVAAENQYGRGP
+FVETPKPIKALDPLHPPGPPKDLHHVDVDKTEVSLVWNKPDRDGGSPITGYLVEYQEEGTQDWIKFKTVTNLECVVTGLQ
+QGKTYRFRVKAENIVGLGLPDTTIPIECQEKLVPPSVELDVKLIEGLVVKAGTTVRFPAIIRGVPVPTAKWTTDGSEIKT
+DEHYTVETDNFSSVLTIKNCLRRDTGEYQITVSNAAGSKTVAVHLTVLDVPGPPTGPINILDVTPEHMTISWQPPKDDGG
+SPVINYIVEKQDTRKDTWGVVSSGSSKTKLKIPHLQKGCEYVFRVRAENKIGVGPPLDSTPTVAKHKFSPPSPPGKPVVT
+DITENAATVSWTLPKSDGGSPITGYYMERREVTGKWVRVNKTPIADLKFRVTGLYEGNTYEFRVFAENLAGLSKPSPSSD
+PIKACRPIKPPGPPINPKLKDKSRETADLVWTKPLSDGGSPILGYVVECQKPGTAQWNRINKDELIRQCAFRVPGLIEGN
+EYRFRIKAANIVGEGEPRELAESVIAKDILHPPEVELDVTCRDVITVRVGQTIRILARVKGRPEPDITWTKEGKVLVREK
+RVDLIQDLPRVELQIKEAVRADHGKYIISAKNSSGHAQGSAIVNVLDRPGPCQNLKVTNVTKENCTISWENPLDNGGSEI
+TNFIVEYRKPNQKGWSIVASDVTKRLIKANLLANNEYYFRVCAENKVGVGPTIETKTPILAINPIDRPGEPENLHIADKG
+KTFVYLKWRRPDYDGGSPNLSYHVERRLKGSDDWERVHKGSIKETHYMVDRCVENQIYEFRVQTKNEGGESDWVKTEEVV
+VKEDLQKPVLDLKLSGVLTVKAGDTIRLEAGVRGKPFPEVAWTKDKDATDLTRSPRVKIDTRADSSKFSLTKAKRSDGGK
+YVVTATNTAGSFVAYATVNVLDKPGPVRNLKIVDVSSDRCTVCWDPPEDDGGCEIQNYILEKCETKRMVWSTYSATVLTP
+GTTVTRLIEGNEYIFRVRAENKIGTGPPTESKPVIAKTKYDKPGRPDPPEVTKVSKEEMTVVWNPPEYDGGKSITGYFLE
+KKEKHSTRWVPVNKSAIPERRMKVQNLLPDHEYQFRVKAENEIGIGEPSLPSRPVVAKDPIEPPGPPTNFRVVDTTKHSI
+TLGWGKPVYDGGAPIIGYVVEMRPKIADASPDEGWKRCNAAAQLVRKEFTVTSLDENQEYEFRVCAQNQVGIGRPAELKE
+AIKPKEILEPPEIDLDASMRKLVIVRAGCPIRLFAIVRGRPAPKVTWRKVGIDNVVRKGQVDLVDTMAFLVIPNSTRDDS
+GKYSLTLVNPAGEKAVFVNVRVLDTPGPVSDLKVSDVTKTSCHVSWAPPENDGGSQVTHYIVEKREADRKTWSTVTPEVK
+KTSFHVTNLVPGNEYYFRVTAVNEYGPGVPTDVPKPVLASDPLSEPDPPRKLEVTEMTKNSATLAWLPPLRDGGAKIDGY
+ITSYREEEQPADRWTEYSVVKDLSLVVTGLKEGKKYKFRVAARNAVGVSLPREAEGVYEAKEQLLPPKILMPEQITIKAG
+KKLRIEAHVYGKPHPTCKWKKGEDEVVTSSHLAVHKADSSSILIIKDVTRKDSGYYSLTAENSSGTDTQKIKVVVMDAPG
+PPQPPFDISDIDADACSLSWHIPLEDGGSNITNYIVEKCDVSRGDWVTALASVTKTSCRVGKLIPGQEYIFRVRAENRFG
+ISEPLTSPKMVAQFPFGVPSEPKNARVTKVNKDCIFVAWDRPDSDGGSPIIGYLIERKERNSLLWVKANDTLVRSTEYPC
+AGLVEGLEYSFRIYALNKAGSSPPSKPTEYVTARMPVDPPGKPEVIDVTKSTVSLIWARPKHDGGSKIIGYFVEACKLPG
+DKWVRCNTAPHQIPQEEYTATGLEEKAQYQFRAIARTAVNISPPSEPSDPVTILAENVPPRIDLSVAMKSLLTVKAGTNV
+CLDATVFGKPMPTVSWKKDGTLLKPAEGIKMAMQRNLCTLELFSVNRKDSGDYTITAENSSGSKSATIKLKVLDKPGPPA
+SVKINKMYSDRAMLSWEPPLEDGGSEITNYIVDKRETSRPNWAQVSATVPITSCSVEKLIEGHEYQFRICAENKYGVGDP
+VFTEPAIAKNPYDPPGRCDPPVISNITKDHMTVSWKPPADDGGSPITGYLLEKRETQAVNWTKVNRKPIIERTLKATGLQ
+EGTEYEFRVTAINKAGPGKPSDASKAAYARDPQYPPAPPAFPKVYDTTRSSVSLSWGKPAYDGGSPIIGYLVEVKRADSD
+NWVRCNLPQNLQKTRFEVTGLMEDTQYQFRVYAVNKIGYSDPSDVPDKHYPKDILIPPEGELDADLRKTLILRAGVTMRL
+YVPVKGRPPPKITWSKPNVNLRDRIGLDIKSTDFDTFLRCENVNKYDAGKYILTLENSCGKKEYTIVVKVLDTPGPPVNV
+TVKEISKDSAYVTWEPPIIDGGSPIINYVVQKRDAERKSWSTVTTECSKTSFRVANLEEGKSYFFRVFAENEYGIGDPGE
+TRDAVKASQTPGPVVDLKVRSVSKSSCSIGWKKPHSDGGSRIIGYVVDFLTEENKWQRVMKSLSLQYSAKDLTEGKEYTF
+RVSAENENGEGTPSEITVVARDDVVAPDLDLKGLPDLCYLAKENSNFRLKIPIKGKPAPSVSWKKGEDPLATDTRVSVES
+SAVNTTLIVYDCQKSDAGKYTITLKNVAGTKEGTISIKVVGKPGIPTGPIKFDEVTAEAMTLKWAPPKDDGGSEITNYIL
+EKRDSVNNKWVTCASAVQKTTFRVTRLHEGMEYTFRVSAENKYGVGEGLKSEPIVARHPFDVPDAPPPPNIVDVRHDSVS
+LTWTDPKKTGGSPITGYHLEFKERNSLLWKRANKTPIRMRDFKVTGLTEGLEYEFRVMAINLAGVGKPSLPSEPVVALDP
+IDPPGKPEVINITRNSVTLIWTEPKYDGGHKLTGYIVEKRDLPSKSWMKANHVNVPECAFTVTDLVEGGKYEFRIRAKNT
+AGAISAPSESTETIICKDEYEAPTIVLDPTIKDGLTIKAGDTIVLNAISILGKPLPKSSWSKAGKDIRPSDITQITSTPT
+SSMLTIKYATRKDAGEYTITATNPFGTKVEHVKVTVLDVPGPPGPVEISNVSAEKATLTWTPPLEDGGSPIKSYILEKRE
+TSRLLWTVVSEDIQSCRHVATKLIQGNEYIFRVSAVNHYGKGEPVQSEPVKMVDRFGPPGPPEKPEVSNVTKNTATVSWK
+RPVDDGGSEITGYHVERREKKSLRWVRAIKTPVSDLRCKVTGLQEGSTYEFRVSAENRAGIGPPSEASDSVLMKDAAYPP
+GPPSNPHVTDTTKKSASLAWGKPHYDGGLEITGYVVEHQKVGDEAWIKDTTGTALRITQFVVPDLQTKEKYNFRISAIND
+AGVGEPAVIPDVEIVEREMAPDFELDAELRRTLVVRAGLSIRIFVPIKGRPAPEVTWTKDNINLKNRANIENTESFTLLI
+IPECNRYDTGKFVMTIENPAGKKSGFVNVRVLDTPGPVLNLRPTDITKDSVTLHWDLPLIDGGSRITNYIVEKREATRKS
+YSTATTKCHKCTYKVTGLSEGCEYFFRVMAENEYGIGEPTETTEPVKASEAPSPPDSLNIMDITKSTVSLAWPKPKHDGG
+SKITGYVIEAQRKGSDQWTHITTVKGLECVVRNLTEGEEYTFQVMAVNSAGRSAPRESRPVIVKEQTMLPELDLRGIYQK
+LVIAKAGDNIKVEIPVLGRPKPTVTWKKGDQILKQTQRVNFETTATSTILNINECVRSDSGPYPLTARNIVGEVGDVITI
+QVHDIPGPPTGPIKFDEVSSDFVTFSWDPPENDGGVPISNYVVEMRQTDSTTWVELATTVIRTTYKATRLTTGLEYQFRV
+KAQNRYGVGPGITSACIVANYPFKVPGPPGTPQVTAVTKDSMTISWHEPLSDGGSPILGYHVERKERNGILWQTVSKALV
+PGNIFKSSGLTDGIAYEFRVIAENMAGKSKPSKPSEPMLALDPIDPPGKPVPLNITRHTVTLKWAKPEYTGGFKITSYIV
+EKRDLPNGRWLKANFSNILENEFTVSGLTEDAAYEFRVIAKNAAGAISPPSEPSDAITCRDDVEAPKIKVDVKFKDTVIL
+KAGEAFRLEADVSGRPPPTMEWSKDGKELEGTAKLEIKIADFSTNLVNKDSTRRDSGAYTLTATNPGGFAKHIFNVKVLD
+RPGPPEGPLAVTEVTSEKCVLSWFPPLDDGGAKIDHYIVQKRETSRLAWTNVASEVQVTKLKVTKLLKGNEYIFRVMAVN
+KYGVGEPLESEPVLAVNPYGPPDPPKNPEVTTITKDSMVVCWGHPDSDGGSEIINYIVERRDKAGQRWIKCNKKTLTDLR
+YKVSGLTEGHEYEFRIMAENAAGISAPSPTSPFYKACDTVFKPGPPGNPRVLDTSRSSISIAWNKPIYDGGSEITGYMVE
+IALPEEDEWQIVTPPAGLKATSYTITGLTENQEYKIRIYAMNSEGLGEPALVPGTPKAEDRMLPPEIELDADLRKVVTIR
+ACCTLRLFVPIKGRPAPEVKWARDHGESLDKASIESTSSYTLLIVGNVNRFDSGKYILTVENSSGSKSAFVNVRVLDTPG
+PPQDLKVKEVTKTSVTLTWDPPLLDGGSKIKNYIVEKRESTRKAYSTVATNCHKTSWKVDQLQEGCSYYFRVLAENEYGI
+GLPAETAESVKASERPLPPGKITLMDVTRNSVSLSWEKPEHDGGSRILGYIVEMQTKGSDKWATCATVKVTEATITGLIQ
+GEEYSFRVSAQNEKGISDPRQLSVPVIAKDLVIPPAFKLLFNTFTVLAGEDLKVDVPFIGRPTPAVTWHKDNVPLKQTTR
+VNAESTENNSLLTIKDACREDVGHYVVKLTNSAGEAIETLNVIVLDKPGPPTGPVKMDEVTADSITLSWGPPKYDGGSSI
+NNYIVEKRDTSTTTWQIVSATVARTTIKACRLKTGCEYQFRIAAENRYGKSTYLNSEPTVAQYPFKVPGPPGTPVVTLSS
+RDSMEVQWNEPISDGGSRVIGYHLERKERNSILWVKLNKTPIPQTKFKTTGLEEGVEYEFRVSAENIVGIGKPSKVSECY
+VARDPCDPPGRPEAIIVTRNSVTLQWKKPTYDGGSKITGYIVEKKELPEGRWMKASFTNIIDTHFEVTGLVEDHRYEFRV
+IARNAAGVFSEPSESTGAITARDEVDPPRISMDPKYKDTIVVHAGESFKVDADIYGKPIPTIQWIKGDQELSNTARLEIK
+STDFATSLSVKDAVRVDSGNYILKAKNVAGERSVTVNVKVLDRPGPPEGPVVISGVTAEKCTLAWKPPLQDGGSDIINYI
+VERRETSRLVWTVVDANVQTLSCKVTKLLEGNEYTFRIMAVNKYGVGEPLESEPVVAKNPFVVPDAPKAPEVTTVTKDSM
+IVVWERPASDGGSEILGYVLEKRDKEGIRWTRCHKRLIGELRLRVTGLIENHDYEFRVSAENAAGLSEPSPPSAYQKACD
+PIYKPGPPNNPKVIDITRSSVFLSWSKPIYDGGCEIQGYIVEKCDVSVGEWTMCTPPTGINKTNIEVEKLLEKHEYNFRI
+CAINKAGVGEHADVPGPIIVEEKLEAPDIDLDLELRKIINIRAGGSLRLFVPIKGRPTPEVKWGKVDGEIRDAAIIDVTS
+SFTSLVLDNVNRYDSGKYTLTLENSSGTKSAFVTVRVLDTPSPPVNLKVTEITKDSVSITWEPPLLDGGSKIKNYIVEKR
+EATRKSYAAVVTNCHKNSWKIDQLQEGCSYYFRVTAENEYGIGLPAQTADPIKVAEVPQPPGKITVDDVTRNSVSLSWTK
+PEHDGGSKIIQYIVEMQAKHSEKWSECARVKSLQAVITNLTQGEEYLFRVVAVNEKGRSDPRSLAVPIVAKDLVIEPDVK
+PAFSSYSVQVGQDLKIEVPISGRPKPTITWTKDGLPLKQTTRINVTDSLDLTTLSIKETHKDDGGQYGITVANVVGQKTA
+SIEIVTLDKPDPPKGPVKFDDVSAESITLSWNPPLYTGGCQITNYIVQKRDTTTTVWDVVSATVARTTLKVTKLKTGTEY
+QFRIFAENRYGQSFALESDPIVAQYPYKEPGPPGTPFATAISKDSMVIQWHEPVNNGGSPVIGYHLERKERNSILWTKVN
+KTIIHDTQFKAQNLEEGIEYEFRVYAENIVGVGKASKNSECYVARDPCDPPGTPEPIMVKRNEITLQWTKPVYDGGSMIT
+GYIVEKRDLPDGRWMKASFTNVIETQFTVSGLTEDQRYEFRVIAKNAAGAISKPSDSTGPITAKDEVELPRISMDPKFRD
+TIVVNAGETFRLEADVHGKPLPTIEWLRGDKEIEESARCEIKNTDFKALLIVKDAIRIDGGQYILRASNVAGSKSFPVNV
+KVLDRPGPPEGPVQVTGVTSEKCSLTWSPPLQDGGSDISHYVVEKRETSRLAWTVVASEVVTNSLKVTKLLEGNEYVFRI
+MAVNKYGVGEPLESAPVLMKNPFVLPGPPKSLEVTNIAKDSMTVCWNRPDSDGGSEIIGYIVEKRDRSGIRWIKCNKRRI
+TDLRLRVTGLTEDHEYEFRVSAENAAGVGEPSPATVYYKACDPVFKPGPPTNAHIVDTTKNSITLAWGKPIYDGGSEILG
+YVVEICKADEEEWQIVTPQTGLRVTRFEISKLTEHQEYKIRVCALNKVGLGEATSVPGTVKPEDKLEAPELDLDSELRKG
+IVVRAGGSARIHIPFKGRPTPEITWSREEGEFTDKVQIEKGVNYTQLSIDNCDRNDAGKYILKLENSSGSKSAFVTVKVL
+DTPGPPQNLAVKEVRKDSAFLVWEPPIIDGGAKVKNYVIDKRESTRKAYANVSSKCSKTSFKVENLTEGAIYYFRVMAEN
+EFGVGVPVETVDAVKAAEPPSPPGKVTLTDVSQTSASLMWEKPEHDGGSRVLGYVVEMQPKGTEKWSIVAESKVCNAVVT
+GLSSGQEYQFRVKAYNEKGKSDPRVLGVPVIAKDLTIQPSLKLPFNTYSIQAGEDLKIEIPVIGRPRPNISWVKDGEPLK
+QTTRVNVEETATSTVLHIKEGNKDDFGKYTVTATNSAGTATENLSVIVLEKPGPPVGPVRFDEVSADFVVISWEPPAYTG
+GCQISNYIVEKRDTTTTTWHMVSATVARTTIKITKLKTGTEYQFRIFAENRYGKSAPLDSKAVIVQYPFKEPGPPGTPFV
+TSISKDQMLVQWHEPVNDGGTKIIGYHLEQKEKNSILWVKLNKTPIQDTKFKTTGLDEGLEYEFKVSAENIVGIGKPSKV
+SECFVARDPCDPPGRPEAIVITRNNVTLKWKKPAYDGGSKITGYIVEKKDLPDGRWMKASFTNVLETEFTVSGLVEDQRY
+EFRVIARNAAGNFSEPSDSSGAITARDEIDAPNASLDPKYKDVIVVHAGETFVLEADIRGKPIPDVVWSKDGKELEETAA
+RMEIKSTIQKTTLVVKDCIRTDGGQYILKLSNVGGTKSIPITVKVLDRPGPPEGPLKVTGVTAEKCYLAWNPPLQDGGAN
+ISHYIIEKRETSRLSWTQVSTEVQALNYKVTKLLPGNEYIFRVMAVNKYGIGEPLESGPVTACNPYKPPGPPSTPEVSAI
+TKDSMVVTWARPVDDGGTEIEGYILEKRDKEGVRWTKCNKKTLTDLRLRVTGLTEGHSYEFRVAAENAAGVGEPSEPSVF
+YRACDALYPPGPPSNPKVTDTSRSSVSLAWSKPIYDGGAPVKGYVVEVKEAAADEWTTCTPPTGLQGKQFTVTKLKENTE
+YNFRICAINSEGVGEPATLPGSVVAQERIEPPEIELDADLRKVVVLRASATLRLFVTIKGRPEPEVKWEKAEGILTDRAQ
+IEVTSSFTMLVIDNVTRFDSGRYNLTLENNSGSKTAFVNVRVLDSPSAPVNLTIREVKKDSVTLSWEPPLIDGGAKITNY
+IVEKRETTRKAYATITNNCTKTTFRIENLQEGCSYYFRVLASNEYGIGLPAETTEPVKVSEPPLPPGRVTLVDVTRNTAT
+IKWEKPESDGGSKITGYVVEMQTKGSEKWSTCTQVKTLEATISGLTAGEEYVFRVAAVNEKGRSDPRQLGVPVIARDIEI
+KPSVELPFHTFNVKAREQLKIDVPFKGRPQATVNWRKDGQTLKETTRVNVSSSKTVTSLSIKEASKEDVGTYELCVSNSA
+GSITVPITIIVLDRPGPPGPIRIDEVSCDSITISWNPPEYDGGCQISNYIVEKKETTSTTWHIVSQAVARTSIKIVRLTT
+GSEYQFRVCAENRYGKSSYSESSAVVAEYPFSPPGPPGTPKVVHATKSTMLVTWQVPVNDGGSRVIGYHLEYKERSSILW
+SKANKILIADTQMKVSGLDEGLMYEYRVYAENIAGIGKCSKSCEPVPARDPCDPPGQPEVTNITRKSVSLKWSKPHYDGG
+AKITGYIVERRELPDGRWLKCNYTNIQETYFEVTELTEDQRYEFRVFARNAADSVSEPSESTGPIIVKDDVEPPRVMMDV
+KFRDVIVVKAGEVLKINADIAGRPLPVISWAKDGIEIEERARTEIISTDNHTLLTVKDCIRRDTGQYVLTLKNVAGTRSV
+AVNCKVLDKPGPPAGPLEINGLTAEKCSLSWGRPQEDGGADIDYYIVEKRETSHLAWTICEGELQMTSCKVTKLLKGNEY
+IFRVTGVNKYGVGEPLESVAIKALDPFTVPSPPTSLEITSVTKESMTLCWSRPESDGGSEISGYIIERREKNSLRWVRVN
+KKPVYDLRVKSTGLREGCEYEYRVYAENAAGLSLPSETSPLIRAEDPVFLPSPPSKPKIVDSGKTTITIAWVKPLFDGGA
+PITGYTVEYKKSDDTDWKTSIQSLRGTEYTISGLTTGAEYVFRVKSVNKVGASDPSDSSDPQIAKEREEEPLFDIDSEMR
+KTLIVKAGASFTMTVPFRGRPVPNVLWSKPDTDLRTRAYVDTTDSRTSLTIENANRNDSGKYTLTIQNVLSAASLTLVVK
+VLDTPGPPTNITVQDVTKESAVLSWDVPENDGGAPVKNYHIEKREASKKAWVSVTNNCNRLSYKVTNLQEGAIYYFRVSG
+ENEFGVGIPAETKEGVKITEKPSPPEKLGVTSISKDSVSLTWLKPEHDGGSRIVHYVVEALEKGQKNWVKCAVAKSTHHV
+VSGLRENSEYFFRVFAENQAGLSDPRELLLPVLIKEQLEPPEIDMKNFPSHTVYVRAGSNLKVDIPISGKPLPKVTLSRD
+GVPLKATMRFNTEITAENLTINLKESVTADAGRYEITAANSSGTTKAFINIVVLDRPGPPTGPVVISDITEESVTLKWEP
+PKYDGGSQVTNYILLKRETSTAVWTEVSATVARTMMKVMKLTTGEEYQFRIKAENRFGISDHIDSACVTVKLPYTTPGPP
+STPWVTNVTRESITVGWHEPVSNGGSAVVGYHLEMKDRNSILWQKANKLVIRTTHFKVTTISAGLIYEFRVYAENAAGVG
+KPSHPSEPVLAIDACEPPRNVRITDISKNSVSLSWQQPAFDGGSKITGYIVERRDLPDGRWTKASFTNVTETQFIISGLT
+QNSQYEFRVFARNAVGSISNPSEVVGPITCIDSYGGPVIDLPLEYTEVVKYRAGTSVKLRAGISGKPAPTIEWYKDDKEL
+QTNALVCVENTTDLASILIKDADRLNSGCYELKLRNAMGSASATIRVQILDKPGPPGGPIEFKTVTAEKITLLWRPPADD
+GGAKITHYIVEKRETSRVVWSMVSEHLEECIITTTKIIKGNEYIFRVRAVNKYGIGEPLESDSVVAKNAFVTPGPPGIPE
+VTKITKNSMTVVWSRPIADGGSDISGYFLEKRDKKSLGWFKVLKETIRDTRQKVTGLTENSDYQYRVCAVNAAGQGPFSE
+PSEFYKAADPIDPPGPPAKIRIADSTKSSITLGWSKPVYDGGSAVTGYVVEIRQGEEEEWTTVSTKGEVRTTEYVVSNLK
+PGVNYYFRVSAVNCAGQGEPIEMNEPVQAKDILEAPEIDLDVALRTSVIAKAGEDVQVLIPFKGRPPPTVTWRKDEKNLG
+SDARYSIENTDSSSLLTIPQVTRNDTGKYILTIENGVGEPKSSTVSVKVLDTPAACQKLQVKHVSRGTVTLLWDPPLIDG
+GSPIINYVIEKRDATKRTWSVVSHKCSSTSFKLIDLSEKTPFFFRVLAENEIGIGEPCETTEPVKAAEVPAPIRDLSMKD
+STKTSVILSWTKPDFDGGSVITEYVVERKGKGEQTWSHAGISKTCEIEVSQLKEQSVLEFRVFAKNEKGLSDPVTIGPIT
+VKELIITPEVDLSDIPGAQVTVRIGHNVHLELPYKGKPKPSISWLKDGLPLKESEFVRFSKTENKITLSIKNAKKEHGGK
+YTVILDNAVCRIAVPITVITLGPPSKPKGPIRFDEIKADSVILSWDVPEDNGGGEITCYSIEKRETSQTNWKMVCSSVAR
+TTFKVPNLVKDAEYQFRVRAENRYGVSQPLVSSIIVAKHQFRIPGPPGKPVIYNVTSDGMSLTWDAPVYDGGSEVTGFHV
+EKKERNSILWQKVNTSPISGREYRATGLVEGLDYQFRVYAENSAGLSSPSDPSKFTLAVSPVDPPGTPDYIDVTRETITL
+KWNPPLRDGGSKIVGYSIEKRQGNERWVRCNFTDVSECQYTVTGLSPGDRYEFRIIARNAVGTISPPSQSSGIIMTRDEN
+VPPIVEFGPEYFDGLIIKSGESLRIKALVQGRPVPRVTWFKDGVEIEKRMNMEITDVLGSTSLFVRDATRDHRGVYTVEA
+KNASGSAKAEIKVKVQDTPGKVVGPIRFTNITGEKMTLWWDAPLNDGCAPITHYIIEKRETSRLAWALIEDKCEAQSYTA
+IKLINGNEYQFRVSAVNKFGVGRPLDSDPVVAQIQYTVPDAPGIPEPSNITGNSITLTWARPESDGGSEIQQYILERREK
+KSTRWVKVISKRPISETRFKVTGLTEGNEYEFHVMAENAAGVGPASGISRLIKCREPVNPPGPPTVVKVTDTSKTTVSLE
+WSKPVFDGGMEIIGYIIEMCKADLGDWHKVNAEACVKTRYTVTDLQAGEEYKFRVSAINGAGKGDSCEVTGTIKAVDRLT
+APELDIDANFKQTHVVRAGASIRLFIAYQGRPTPTAVWSKPDSNLSLRADIHTTDSFSTLTVENCNRNDAGKYTLTVENN
+SGSKSITFTVKVLDTPGPPGPITFKDVTRGSATLMWDAPLLDGGARIHHYVVEKREASRRSWQVISEKCTRQIFKVNDLA
+EGVPYYFRVSAVNEYGVGEPYEMPEPIVATEQPAPPRRLDVVDTSKSSAVLAWLKPDHDGGSRITGYLLEMRQKGSDFWV
+EAGHTKQLTFTVERLVEKTEYEFRVKAKNDAGYSEPREAFSSVIIKEPQIEPTADLTGITNQLITCKAGSPFTIDVPISG
+RPAPKVTWKLEEMRLKETDRVSITTTKDRTTLTVKDSMRGDSGRYFLTLENTAGVKTFSVTVVVIGRPGPVTGPIEVSSV
+SAESCVLSWGEPKDGGGTEITNYIVEKRESGTTAWQLVNSSVKRTQIKVTHLTKYMEYSFRVSSENRFGVSKPLESAPII
+AEHPFVPPSAPTRPEVYHVSANAMSIRWEEPYHDGGSKIIGYWVEKKERNTILWVKENKVPCLECNYKVTGLVEGLEYQF
+RTYALNAAGVSKASEASRPIMAQNPVDAPGRPEVTDVTRSTVSLIWSAPAYDGGSKVVGYIIERKPVSEVGDGRWLKCNY
+TIVSDNFFTVTALSEGDTYEFRVLAKNAAGVISKGSESTGPVTCRDEYAPPKAELDARLHGDLVTIRAGSDLVLDAAVGG
+KPEPKIIWTKGDKELDLCEKVSLQYTGKRATAVIKFCDRSDSGKYTLTVKNASGTKAVSVMVKVLDSPGPCGKLTVSRVT
+QEKCTLAWSLPQEDGGAEITHYIVERRETSRLNWVIVEGECPTLSYVVTRLIKNNEYIFRVRAVNKYGPGVPVESEPIVA
+RNSFTIPSPPGIPEEVGTGKEHIIIQWTKPESDGGNEISNYLVDKREKKSLRWTRVNKDYVVYDTRLKVTSLMEGCDYQF
+RVTAVNAAGNSEPSEASNFISCREPSYTPGPPSAPRVVDTTKHSISLAWTKPMYDGGTDIVGYVLEMQEKDTDQWYRVHT
+NATIRNTEFTVPDLKMGQKYSFRVAAVNVKGMSEYSESIAEIEPVERIEIPDLELADDLKKTVTIRAGASLRLMVSVSGR
+PPPVITWSKQGIDLASRAIIDTTESYSLLIVDKVNRYDAGKYTIEAENQSGKKSATVLVKVYDTPGPCPSVKVKEVSRDS
+VTITWEIPTIDGGAPVNNYIVEKREAAMRAFKTVTTKCSKTLYRISGLVEGTMYYFRVLPENIYGIGEPCETSDAVLVSE
+VPLVPAKLEVVDVTKSTVTLAWEKPLYDGGSRLTGYVLEACKAGTERWMKVVTLKPTVLEHTVTSLNEGEQYLFRIRAQN
+EKGVSEPRETVTAVTVQDLRVLPTIDLSTMPQKTIHVPAGRPVELVIPIAGRPPPAASWFFAGSKLRESERVTVETHTKV
+AKLTIRETTIRDTGEYTLELKNVTGTTSETIKVIILDKPGPPTGPIKIDEIDATSITISWEPPELDGGAPLSGYVVEQRD
+AHRPGWLPVSESVTRSTFKFTRLTEGNEYVFRVAATNRFGIGSYLQSEVIECRSSIRIPGPPETLQIFDVSRDGMTLTWY
+PPEDDGGSQVTGYIVERKEVRADRWVRVNKVPVTMTRYRSTGLTEGLEYEHRVTAINARGSGKPSRPSKPIVAMDPIAPP
+GKPQNPRVTDTTRTSVSLAWSVPEDEGGSKVTGYLIEMQKVDQHEWTKCNTTPTKIREYTLTHLPQGAEYRFRVLACNAG
+GPGEPAEVPGTVKVTEMLEYPDYELDERYQEGIFVRQGGVIRLTIPIKGKPFPICKWTKEGQDISKRAMIATSETHTELV
+IKEADRGDSGTYDLVLENKCGKKAVYIKVRVIGSPNSPEGPLEYDDIQVRSVRVSWRPPADDGGADILGYILERREVPKA
+AWYTIDSRVRGTSLVVKGLKENVEYHFRVSAENQFGISKPLKSEEPVTPKTPLNPPEPPSNPPEVLDVTKSSVSLSWSRP
+KDDGGSRVTGYYIERKETSTDKWVRHNKTQITTTMYTVTGLVPDAEYQFRIIAQNDVGLSETSPASEPVVCKDPFDKPSQ
+PGELEILSISKDSVTLQWEKPECDGGKEILGYWVEYRQSGDSAWKKSNKERIKDKQFTIGGLLEATEYEFRVFAENETGL
+SRPRRTAMSIKTKLTSGEAPGIRKEMKDVTTKLGEAAQLSCQIVGRPLPDIKWYRFGKELIQSRKYKMSSDGRTHTLTVM
+TEEQEDEGVYTCIATNEVGEVETSSKLLLQATPQFHPGYPLKEKYYGAVGSTLRLHVMYIGRPVPAMTWFHGQKLLQNSE
+NITIENTEHYTHLVMKNVQRKTHAGKYKVQLSNVFGTVDAILDVEIQDKPDKPTGPIVIEALLKNSAVISWKPPADDGGS
+WITNYVVEKCEAKEGAEWQLVSSAISVTTCRIVNLTENAGYYFRVSAQNTFGISDPLEVSSVVIIKSPFEKPGAPGKPTI
+TAVTKDSCVVAWKPPASDGGAKIRNYYLEKREKKQNKWISVTTEEIRETVFSVKNLIEGLEYEFRVKCENLGGESEWSEI
+SEPITPKSDVPIQAPHFKEELRNLNVRYQSNATLVCKVTGHPKPIVKWYRQGKEIIADGLKYRIQEFKGGYHQLIIASVT
+DDDATVYQVRATNQGGSVSGTASLEVEVPAKIHLPKTLEGMGAVHALRGEVVSIKIPFSGKPDPVITWQKGQDLIDNNGH
+YQVIVTRSFTSLVFPNGVERKDAGFYVVCAKNRFGIDQKTVELDVADVPDPPRGVKVSDVSRDSVNLTWTEPASDGGSKI
+TNYIVEKCATTAERWLRVGQARETRYTVINLFGKTSYQFRVIAENKFGLSKPSEPSEPTITKEDKTRAMNYDEEVDETRE
+VSMTKASHSSTKELYEKYMIAEDLGRGEFGIVHRCVETSSKKTYMAKFVKVKGTDQVLVKKEISILNIARHRNILHLHES
+FESMEELVMIFEFISGLDIFERINTSAFELNEREIVSYVHQVCEALQFLHSHNIGHFDIRPENIIYQTRRSSTIKIIEFG
+QARQLKPGDNFRLLFTAPEYYAPEVHQHDVVSTATDMWSLGTLVYVLLSGINPFLAETNQQIIENIMNAEYTFDEEAFKE
+ISIEAMDFVDRLLVKERKSRMTASEALQHPWLKQKIERVSTKVIRTLKHRRYYHTLIKKDLNMVVSAARISCGGAIRSQK
+GVSVAKVKVASIEIGPVSGQIMHAVGEEGGHVKYVCKIENYDQSTQVTWYFGVRQLENSEKYEITYEDGVAILYVKDITK
+LDDGTYRCKVVNDYGEDSSYAELFVKGVREVYDYYCRRTMKKIKRRTDTMRLLERPPEFTLPLYNKTAYVGENVRFGVTI
+TVHPEPHVTWYKSGQKIKPGDNDKKYTFESDKGLYQLTINSVTTDDDAEYTVVARNKYGEDSCKAKLTVTLHPPPTDSTL
+RPMFKRLLANAECQEGQSVCFEIRVSGIPPPTLKWEKDGQPLSLGPNIEIIHEGLDYYALHIRDTLPEDTGYYRVTATNT
+AGSTSCQAHLQVERLRYKKQEFKSKEEHERHVQKQIDKTLRMAEILSGTESVPLTQVAKEALREAAVLYKPAVSTKTVKG
+EFRLEIEEKKEERKLRMPYDVPEPRKYKQTTIEEDQRIKQFVPMSDMKWYKKIRDQYEMPGKLDRVVQKRPKRIRLSRWE
+QFYVMPLPRITDQYRPKWRIPKLSQDDLEIVRPARRRTPSPDYDFYYRPRRRSLGDISDEELLLPIDDYLAMKRTEEERL
+RLEEELELGFSASPPSRSPPHFELSSLRYSSPQAHVKVEETRKDFRYSTYHIPTKAEASTSYAELRERHAQAAYRQPKQR
+QRIMAEREDEELLRPVTTTQHLSEYKSELDFMSKEEKSRKKSRRQREVTEITEIEEEYEISKHAQRESSSSASRLLRRRR
+SLSPTYIELMRPVSELIRSRPQPAEEYEDDTERRSPTPERTRPRSPSPVSSERSLSRFERSARFDIFSRYESMKAALKTQ
+KTSERKYEVLSQQPFTLDHAPRITLRMRSHRVPCGQNTRFILNVQSKPTAEVKWYHNGVELQESSKIHYTNTSGVLTLEI
+LDCHTDDSGTYRAVCTNYKGEASDYATLDVTGGDYTTYASQRRDEEVPRSVFPELTRTEAYAVSSFKKTSEMEASSSVRE
+VKSQMTETRESLSSYEHSASAEMKSAALEEKSLEEKSTTRKIKTTLAARILTKPRSMTVYEGESARFSCDTDGEPVPTVT
+WLRKGQVLSTSARHQVTTTKYKSTFEISSVQASDEGNYSVVVENSEGKQEAEFTLTIQKARVTEKAVTSPPRVKSPEPRV
+KSPEAVKSPKRVKSPEPSHPKAVSPTETKPTPTEKVQHLPVSAPPKITQFLKAEASKEIAKLTCVVESSVLRAKEVTWYK
+DGKKLKENGHFQFHYSADGTYELKINNLTESDQGEYVCEISGEGGTSKTNLQFMGQAFKSIHEKVSKISETKKSDQKTTE
+STVTRKTEPKAPEPISSKPVIVTGLQDTTVSSDSVAKFAVKATGEPRPTAIWTKDGKAITQGGKYKLSEDKGGFFLEIHK
+TDTSDSGLYTCTVKNSAGSVSSSCKLTIKAIKDTEAQKVSTQKTSEITPQKKAVVQEEISQKALRSEEIKMSEAKSQEKL
+ALKEEASKVLISEEVKKSAATSLEKSIVHEEITKTSQASEEVRTHAEIKAFSTQMSINEGQRLVLKANIAGATDVKWVLN
+GVELTNSEEYRYGVSGSDQTLTIKQASHRDEGILTCISKTKEGIVKCQYDLTLSKELSDAPAFISQPRSQNINEGQNVLF
+TCEISGEPSPEIEWFKNNLPISISSNVSISRSRNVYSLEIRNASVSDSGKYTIKAKNFRGQCSATASLMVLPLVEEPSRE
+VVLRTSGDTSLQGSFSSQSVQMSASKQEASFSSFSSSSASSMTEMKFASMSAQSMSSMQESFVEMSSSSFMGISNMTQLE
+SSTSKMLKAGIRGIPPKIEALPSDISIDEGKVLTVACAFTGEPTPEVTWSCGGRKIHSQEQGRFHIENTDDLTTLIIMDV
+QKQDGGLYTLSLGNEFGSDSATVNIHIRSI
diff --git a/seq/titin_hum.seq b/seq/titin_hum.seq
new file mode 100644
index 0000000..a135656
--- /dev/null
+++ b/seq/titin_hum.seq
@@ -0,0 +1,1174 @@
+>gi|20143913|ref|NM_003319.2| Homo sapiens titin (TTN), transcript variant N2-B, mRNA
+AGCAGTCGTGCATTCCCAGCCTCGCCTCGGGTGTAGGGATTGCATAGAAAAGCAAAACTACACAGTCTTG
+ACTGTGTAGTTTTGTTTTTAGGATTAGAGGCTCACCGATTCATGTCGGAGATGGTCAGAAAAACCAACTC
+TCCATAGGACGTCGTTTCAGAAGCAACCTTGGGCTTAGTCCCACCCTTTTTAGGCACTCTTGAGAAATCA
+AGTGCCTAGAAAGATGACAACTCAAGCACCGACGTTTACGCAGCCGTTACAAAGCGTTGTGGTACTGGAG
+GGTAGTACCGCAACCTTTGAGGCTCACATTAGTGGTTTTCCAGTTCCTGAGGTGAGCTGGTTTAGGGATG
+GCCAGGTGATTTCCACTTCCACTCTGCCCGGCGTGCAGATCTCCTTTAGCGATGGCCGCGCTAAACTGAC
+GATCCCCGCCGTGACTAAAGCCAACAGTGGACGATATTCCCTGAAAGCCACCAATGGATCTGGACAAGCG
+ACTAGTACTGCTGAGCTTCTCGTGAAAGCTGAGACAGCACCACCCAACTTCGTTCAACGACTGCAGAGCA
+TGACCGTGAGACAAGGAAGCCAAGTGAGACTCCAAGTGAGAGTGACTGGAATCCCTACACCTGTGGTGAA
+GTTCTACCGGGATGGAGCCGAAATCCAGAGCTCCCTTGATTTCCAAATTTCACAAGAAGGCGACCTCTAC
+AGCTTACTGATTGCAGAAGCATACCCTGAGGACTCAGGGACCTATTCAGTAAATGCCACCAATAGCGTTG
+GAAGAGCTACTTCGACTGCTGAATTACTGGTTCAAGGTGAAGAAGAAGTACCTGCTAAAAAGACAAAGAC
+AATTGTTTCGACTGCTCAGATCTCAGAATCAAGACAAACCCGAATTGAAAAGAAGATTGAAGCCCACTTT
+GATGCCAGATCAATTGCAACAGTTGAGATGGTCATAGATGGTGCCGCTGGGCAACAGCTGCCACATAAAA
+CACCTCCCAGGATTCCTCCGAAGCCAAAGTCAAGATCCCCAACACCACCGTCTATTGCTGCCAAAGCACA
+GCTGGCTCGGCAGCAGTCCCCATCGCCCATAAGACACTCCCCTTCCCCGGTCAGACACGTGCGGGCACCG
+ACCCCATCTCCGGTCAGGTCCGTGTCTCCAGCAGCAAGAATCTCCACATCCCCCATCAGGTCTGTTAGGT
+CTCCATTGCTCATGCGTAAGACTCAGGCATCCACCGTGGCCACAGGTCCTGAAGTGCCTCCCCCTTGGAA
+GCAAGAGGGCTACGTGGCCTCCTCATCTGAGGCTGAGATGAGAGAGACAACGCTGACAACCTCTACTCAG
+ATCAGGACAGAAGAGAGATGGGAAGGGAGATACGGTGTCCAGGAGCAAGTGACCATCAGTGGTGCTGCGG
+GTGCTGCCGCCAGTGTGTCGGCCAGTGCTAGCTACGCAGCAGAGGCTGTTGCCACTGGTGCTAAAGAGGT
+GAAACAAGATGCTGACAAAAGTGCAGCTGTTGCGACTGTTGTTGCTGCCGTTGATATGGCCAGAGTGAGA
+GAACCAGTGATCAGCGCTGTAGAGCAGACTGCTCAGAGGACAACCACGACTGCTGTGCACATCCAACCTG
+CTCAAGAACAGGTAAGAAAGGAAGCGGAGAAGACTGCTGTAACTAAGGTAGTAGTGGCCGCCGATAAAGC
+CAAGGAACAAGAATTAAAATCAAGAACCAAAGAAGTAATTACCACAAAGCAAGAGCAGATGCACGTAACT
+CATGAGCAGATAAGAAAAGAAACTGAAAAAACATTTGTACCAAAGGTAGTAATTTCCGCAGCTAAAGCCA
+AAGAACAAGAAACTAGAATTTCTGAAGAAATTACTAAGAAACAGAAACAAGTAACTCAAGAAGCAATAAT
+GAAGGAAACTAGGAAAACAGTTGTACCTAAAGTCATAGTTGCCACACCCAAAGTCAAAGAACAAGATTTA
+GTATCAAGAGGTAGAGAAGGCATTACTACCAAAAGAGAACAAGTGCAAATAACTCAGGAGAAGATGAGAA
+AGGAAGCCGAGAAAACTGCCTTGTCTACAATAGCAGTTGCTACTGCTAAAGCCAAAGAACAAGAAACAAT
+ACTGAGAACTAGAGAAACTATGGCTACTAGACAAGAACAAATCCAAGTTACCCATGGAAAGGTGGACGTT
+GGAAAAAAGGCTGAAGCTGTAGCAACAGTTGTTGCTGCAGTAGACCAGGCCCGAGTCAGAGAGCCCAGAG
+AGCCTGGGCATCTTGAAGAATCCTATGCTCAGCAGACCACTTTGGAGTACGGATATAAGGAACGCATTTC
+CGCCGCAAAGGTAGCTGAGCCTCCCCAACGTCCAGCCTCAGAACCCCACGTTGTCCCTAAAGCAGTCAAG
+CCTAGAGTAATCCAGGCTCCTTCTGAGACTCATATCAAAACTACTGATCAAAAGGGAATGCACATATCAT
+CACAGATCAAGAAAACTACAGATCTAACAACGGAAAGATTAGTCCATGTGGATAAACGCCCCCGCACAGC
+TAGCCCTCACTTTACTGTTTCAAAAATTTCTGTTCCTAAGACAGAACATGGATATGAGGCATCAATAGCC
+GGTAGTGCTATTGCCACATTACAAAAAGAGTTGTCAGCCACATCTTCTGCTCAGAAGATCACCAAATCGG
+TGAAGGCTCCTACTGTGAAGCCCAGTGAGACTAGAGTAAGGGCAGAGCCCACACCCTTGCCACAGTTCCC
+CTTCGCTGACACACCAGATACTTACAAGAGTGAAGCTGGCGTTGAGGTGAAAAAGGAAGTAGGGGTGAGC
+ATCACTGGCACCACCGTCCGTGAAGAGCGCTTTGAAGTACTGCACGGACGCGAAGCCAAGGTAACAGAAA
+CAGCAAGAGTACCAGCACCTGTTGAAATTCCTGTTACTCCACCAACTTTGGTCTCGGGCTTAAAAAATGT
+GACTGTCATAGAAGGTGAATCTGTCACCTTGGAGTGCCACATCTCTGGATACCCATCCCCGACAGTGACA
+TGGTACAGGGAAGACTACCAAATCGAAAGTTCCATTGACTTCCAGATAACCTTCCAGAGTGGAATTGCTC
+GTCTTATGATTCGCGAAGCATTTGCGGAAGACAGCGGGCGATTTACTTGCAGTGCTGTAAATGAGGCTGG
+AACCGTCAGCACATCCTGCTATCTGGCTGTGCAGGTGTCAGAAGAATTTGAAAAGGAAACCACAGCCGTG
+ACTGAGAAATTTACTACAGAAGAGAAACGCTTTGTTGAGTCAAGAGATGTGGTTATGACTGATACTAGCC
+TCACAGAGGAACAAGCAGGGCCTGGAGAACCTGCCGCGCCTTACTTTATTACAAAACCAGTGGTCCAGAA
+ACTGGTGGAAGGTGGGAGCGTGGTGTTTGGATGCCAAGTTGGCGGCAACCCAAAGCCCCATGTATACTGG
+AAAAAATCTGGTGTTCCTCTAACCACTGGATACAGATACAAAGTGAGTTACAACAAACAAACCGGTGAAT
+GCAAGCTGGTGATTTCTATGACTTTTGCTGATGATGCTGGAGAATACACTATTGTTGTTCGCAATAAGCA
+TGGAGAAACTTCTGCATCTGCTTCCTTGCTTGAAGAAGCTGATTATGAGTTACTGATGAAGTCCCAGCAA
+GAAATGCTTTATCAGACACAAGTGACTGCATTTGTTCAAGAACCTAAAGTTGGAGAAACAGCACCTGGAT
+TTGTATACTCTGAGTATGAAAAAGAGTATGAAAAAGAACAAGCCTTAATTAGGAAGAAAATGGCCAAAGA
+TACTGTAGTGGTCAGAACTTATGTAGAAGATCAGGAATTCCATATTTCTTCCTTTGAAGAGAGACTTATT
+AAAGAAATTGAATATAGAATAATAAAGACTACATTAGAAGAACTTCTTGAAGAAGATGGAGAAGAAAAGA
+TGGCAGTTGACATTTCTGAATCTGAAGCTGTTGAATCAGGATTTGATTTAAGAATCAAGAATTATAGAAT
+TCTTGAGGGGATGGGTGTCACTTTTCATTGCAAGATGTCTGGATATCCATTACCAAAGATTGCTTGGTAC
+AAAGATGGCAAGCGCATCAAACATGGAGAAAGATACCAAATGGACTTTCTACAAGATGGCAGAGCTAGTC
+TGCGTATACCTGTTGTTCTTCCAGAAGATGAAGGAATCTACACTGCATTTGCCAGCAATATTAAAGGAAA
+TGCAATTTGCTCAGGGAAATTGTATGTGGAGCCTGCTGCACCACTTGGAGCTCCGACTTACATTCCCACA
+CTAGAGCCAGTGAGCAGAATCAGATCTCTCTCTCCACGTTCAGTGAGCAGGTCTCCTATACGCATGTCTC
+CTGCACGGATGTCACCTGCAAGGATGTCTCCTGCACGGATGTCCCCTGCAAGAATGTCCCCTGGACGTAG
+GCTGGAGGAGACAGATGAGTCACAACTTGAGAGACTATATAAACCAGTCTTTGTGTTAAAACCTGTTTCT
+TTCAAATGTTTAGAAGGGCAAACTGCCAGATTTGACTTAAAGGTTGTTGGTAGACCTATGCCAGAGACGT
+TCTGGTTTCATGATGGCCAGCAAATTGTCAATGACTATACCCATAAAGTAGTCATTAAAGAAGATGGTAC
+TCAATCACTAATTATTGTCCCTGCCACACCCAGTGATTCTGGGGAATGGACTGTGGTTGCCCAAAACAGG
+GCAGGCAGATCTTCAATTTCAGTGATTTTAACTGTGGAAGCTGTGGAACATCAGGTAAAACCGATGTTTG
+TAGAAAAACTGAAAAATGTCAATATAAAGGAAGGTTCCCGACTTGAAATGAAAGTCAGAGCTACGGGTAA
+CCCCAACCCTGACATTGTATGGTTGAAAAACAGTGACATCATTGTGCCTCATAAATATCCCAAAATCAGA
+ATTGAAGGAACCAAGGGAGAAGCTGCCCTTAAAATCGATTCCACTGTCAGCCAAGATTCTGCCTGGTATA
+CTGCGACTGCTATTAATAAAGCTGGCAGAGACACTACAAGATGCAAAGTAAATGTTGAAGTTGAGTTTGC
+AGAGCCTGAGCCAGAGAGAAAGTTAATCATCCCACGGGGGACATATAGAGCAAAGGAGATTGCAGCCCCA
+GAACTGGAGCCCCTCCATTTGCGATATGGCCAAGAGCAATGGGAAGAAGGTGATCTCTATGACAAAGAGA
+AACAACAGAAACCATTTTTCAAGAAAAAACTCACTTCCTTAAGACTTAAGCGCTTTGGGCCTGCCCACTT
+TGAATGCAGGCTAACACCCATTGGTGACCCAACGATGGTGGTGGAGTGGCTCCATGATGGAAAGCCACTT
+GAAGCAGCCAACAGGCTCCGTATGATCAATGAATTTGGGTACTGCAGCCTTGATTATGGCGTTGCATATT
+CTAGAGACAGTGGTATCATTACTTGCAGAGCCACTAACAAATATGGAACAGATCACACATCTGCTACCCT
+TATTGTTAAAGATGAGAAAAGTCTTGTGGAAGAATCCCAATTGCCTGAGGGGAGGAAAGGCTTACAGAGA
+ATTGAAGAATTAGAGAGAATGGCTCATGAAGGTGCACTTACAGGTGTAACAACAGATCAGAAAGAAAAGC
+AAAAGCCAGACATTGTCTTGTACCCAGAGCCAGTTAGAGTACTTGAAGGGGAGACTGCAAGGTTCCGCTG
+CAGGGTAACAGGCTACCCTCAGCCCAAAGTCAACTGGTACCTCAATGGACAGCTCATCCGCAAAAGCAAA
+AGGTTCAGAGTTCGCTATGATGGTATCCATTACCTGGACATCGTGGACTGCAAATCATATGACACAGGTG
+AAGTGAAGGTCACCGCGGAAAATCCTGAAGGTGTGATAGAGCATAAAGTGAAGCTTGAGATTCAACAGAG
+GGAAGATTTTAGGTCTGTCCTTAGGAGAGCTCCTGAACCAAGGCCTGAGTTTCACGTACATGAACCAGGA
+AAGCTTCAGTTTGAAGTACAAAAAGTGGATAGACCTGTTGACACCACTGAAACCAAAGAAGTTGTGAAGT
+TGAAAAGGGCTGAAAGAATTACCCATGAAAAAGTGCCTGAAGAGTCGGAAGAGCTGCGCAGTAAATTCAA
+GCGCAGAACAGAAGAGGGCTATTATGAAGCCATTACCGCTGTGGAGCTCAAGTCTCGAAAGAAGGATGAA
+TCCTATGAGGAACTCCTCAGGAAGACAAAAGATGAACTTCTCCACTGGACCAAAGAGTTAACTGAAGAGG
+AAAAGAAAGCTCTTGCCGAAGAAGGCAAAATCACGATTCCAACTTTTAAACCTGACAAGATTGAACTAAG
+TCCTAGTATGGAGGCTCCAAAAATCTTCGAAAGAATCCAGAGCCAAACAGTGGGCCAAGGATCTGATGCA
+CACTTCCGGGTCAGAGTCGTGGGGAAACCAGACCCCGAATGTGAATGGTACAAAAATGGTGTCAAAATTG
+AACGGTCTGACCGGATCTACTGGTACTGGCCCGAAGACAATGTTTGTGAATTGGTCATAAGAGATGTGAC
+TGCTGAGGACTCTGCCAGCATCATGGTAAAAGCCATCAACATAGCTGGAGAAACCTCCAGTCACGCATTC
+TTACTTGTCCAAGCCAAGCAATTGATCACTTTCACACAGGAATTACAAGATGTTGTTGCTAAGGAAAAAG
+ACACTATGGCAACCTTTGAATGTGAAACTTCAGAACCATTTGTCAAAGTGAAATGGTATAAAGATGGTAT
+GGAGGTTCATGAGGGAGATAAATACAGGATGCACTCTGACAGAAAGGTTCACTTCCTCTCCATACTGACC
+ATTGATACGTCTGATGCTGAAGATTACAGCTGTGTACTTGTGGAAGATGAAAATGTCAAAACGACTGCTA
+AACTTATTGTTGAAGGTGCAGTTGTTGAGTTTGTGAAAGAACTTCAGGACATAGAAGTTCCAGAATCATA
+TTCAGGAGAATTAGAGTGCATTGTATCCCCAGAAAATATAGAAGGAAAATGGTATCATAATGATGTGGAG
+CTTAAATCCAATGGCAAATATACAATTACATCTCGTCGTGGACGTCAGAACCTCACGGTCAAGGATGTAA
+CCAAGGAGGACCAGGGAGAATACAGCTTTGTCATCGACGGGAAAAAGACAACCTGTAAATTAAAGATGAA
+ACCCCGCCCCATTGCTATCCTACAAGGACTTAGTGACCAAAAAGTCTGTGAGGGTGACATTGTTCAGCTT
+GAAGTTAAAGTCTCCTTGGAAAGTGTGGAAGGCGTCTGGATGAAAGACGGCCAAGAAGTGCAGCCCAGTG
+ACAGGGTTCACATTGTGATAGACAAACAATCTCATATGCTGCTCATTGAAGACATGACTAAGGAAGATGC
+TGGAAATTACTCTTTCACCATTCCAGCCCTTGGCCTCTCCACCAGTGGGCGTGTCTCTGTCTATAGTGTG
+GACGTGATAACACCTCTAAAAGATGTTAATGTGATTGAAGGCACCAAGGCTGTGCTTGAATGTAAGGTGT
+CAGTCCCTGATGTGACTTCTGTTAAGTGGTACTTAAATGATGAACAAATCAAGCCTGATGACCGTGTACA
+GGCCATTGTGAAAGGTACTAAACAGCGACTAGTCATTAACCGAACTCATGCTTCAGACGAAGGACCTTAC
+AAGCTGATAGTTGGCAGAGTTGAAACCAACTGTAATCTCTCTGTAGAAAAAATTAAAATTATCAGAGGTC
+TTCGTGACCTTACCTGTACAGAAACTCAAAATGTGGTGTTTGAGGTTGAGCTGTCCCACTCTGGAATTGA
+TGTCCTGTGGAATTTTAAGGACAAGGAAATCAAGCCCAGTTCTAAATATAAAATTGAAGCACATGGAAAA
+ATATATAAATTGACAGTTCTAAATATGATGAAAGATGATGAAGGAAAATACACATTTTACGCGGGAGAAA
+ATATCACATCTGGAAAACTTACTGTGGCAGGTGGGGCCATCTCCAAGCCACTCACAGATCAGACCGTAGC
+TGAATCCCAGGAAGCTGTGTTTGAATGTGAAGTTGCCAACCCAGATTCCAAAGGCGAATGGTTGAGGGAT
+GGCAAACACCTACCACTGACTAACAACATCAGAAGTGAGTCTGATGGCCACAAAAGGAGACTTATCATTG
+CTGCCACCAAATTAGATGACATTGGAGAATATACCTACAAGGTGGCCACCTCCAAAACATCTGCCAAACT
+CAAAGTTGAAGCTGTCAAAATTAAGAAGACTCTGAAGAACCTCACAGTGACAGAAACACAGGATGCTGTT
+TTCACTGTCGAGCTTACACACCCTAATGTCAAAGGTGTCCAGTGGATCAAAAATGGAGTTGTGCTGGAAT
+CCAATGAAAAGTATGCTATCTCTGTCAAAGGAACAATTTACTCTCTGAGGATTAAAAACTGTGCCATCGT
+GGATGAGTCTGTTTATGGCTTCAGGCTTGGAAGGCTTGGAGCCAGTGCCAGACTGCACGTGGAGACTGTC
+AAGATCATTAAAAAGCCAAAGGATGTGACAGCCTTGGAAAATGCCACTGTTGCCTTTGAAGTTAGTGTTT
+CCCATGACACTGTTCCAGTAAAATGGTTCCATAAGAGTGTGGAAATTAAGCCAAGTGACAAACACAGACT
+GGTCTCAGAAAGGAAAGTCCACAAGCTGATGCTGCAGAACATCTCCCCCTCAGATGCTGGGGAATACACA
+GCTGTGGTCGGGCAATTGGAATGCAAAGCAAAACTGTTTGTGGAGACATTACATATTACAAAAACCATGA
+AAAATATCGAGGTGCCTGAGACCAAAACTGCCTCTTTTGAGTGTGAGGTGTCCCACTTCAATGTCCCTTC
+CATGTGGCTGAAGAATGGTGTGGAAATTGAGATGAGTGAAAAGTTCAAGATAGTTGTGCAGGGAAAACTC
+CATCAGCTGATCATCATGAACACCAGCACAGAGGACTCGGCAGAATACACATTTGTCTGTGGCAATGACC
+AAGTCAGTGCCACCCTGACAGTCACCCCAATCATGATTACTTCCATGCTGAAAGACATCAACGCTGAAGA
+AAAAGACACTATTACTTTTGAGGTGACAGTGAACTATGAAGGCATCTCTTACAAATGGTTAAAGAATGGT
+GTGGAAATCAAATCAACTGACAAGTGCCAGATGAGAACCAAAAAGCTCACACACTCACTGAACATCAGGA
+ATGTTCACTTTGGGGATGCTGCTGACTACACCTTTGTGGCTGGAAAAGCAACATCAACAGCCACACTTTA
+TGTGGAAGCTCGTCATATAGAATTTAGGAAACACATTAAGGACATTAAGGTACTGGAGAAGAAGCGAGCC
+ATGTTTGAATGTGAAGTTTCTGAACCTGACATCACTGTACAGTGGATGAAAGATGACCAGGAACTGCAGA
+TCACAGACAGAATAAAGATTCAGAAGGAGAAATATGTCCACCGCCTTCTGATCCCATCCACCCGGATGTC
+TGATGCTGGGAAGTACACAGTGGTGGCAGGAGGCAACGTGTCAACTGCAAAACTCTTTGTAGAAGGCAGA
+GATGTTCGCATCCGAAGTATTAAAAAGGAGGTTCAGGTCATTGAGAAACAGCGTGCTGTTGTTGAATTTG
+AGGTCAATGAAGACGATGTTGATGCCCACTGGTATAAAGATGGCATTGAAATCAATTTCCAAGTTCAAGA
+ACGACACAAATATGTAGTGGAAAGAAGAATCCACCGAATGTTTATCTCTGAGACCAGACAGAGCGATGCA
+GGAGAATACACCTTTGTGGCAGGAAGGAACAGGAGTTCTGTCACTCTCTATGTCAATGCTCCTGAACCGC
+CCCAAGTTCTGCAGGAGCTCCAGCCTGTCACTGTGCAGTCTGGCAAGCCTGCCCGCTTCTGTGCCGTGAT
+ATCCGGAAGACCACAGCCCAAAATTTCCTGGTACAAGGAAGAGCAGCTGCTTTCCACTGGCTTCAAGTGC
+AAATTTCTTCATGATGGGCAAGAGTACACGCTTTTGCTAATTGAAGCCTTCCCAGAGGATGCGGCAGTCT
+ATACCTGTGAAGCCAAGAATGACTATGGTGTTGCCACAACATCAGCTTCACTCTCAGTGGAAGTTCCAGA
+AGTTGTGTCTCCTGATCAGGAAATGCCTGTTTATCCACCTGCCATCATCACCCCGCTTCAGGACACTGTC
+ACTTCTGAAGGGCAGCCAGCCCGTTTTCAATGCCGGGTTTCTGGAACAGATCTAAAAGTGTCGTGGTACA
+GCAAAGACAAGAAAATCAAGCCATCTCGGTTCTTTAGAATGACTCAATTTGAAGACACTTATCAACTGGA
+AATTGCCGAAGCTTATCCAGAAGATGAAGGAACTTACACGTTTGTTGCTAGTAATGCTGTAGGCCAAGTA
+TCAAGCACAGCCAACCTGAGTCTGGAAGCTCCTGAATCAATTTTGCATGAGAGGATTGAACAAGAGATTG
+AGATGGAAATGAAAGAGTTTTCTAGTTCTTTTCTGTCTGCCGAGGAAGAAGGACTTCATAGCGCCGAACT
+TCAATTATCTAAAATAAATGAAACACTTGAACTTTTGTCTGAATCTCCAGTTTACTCAACTAAATTTGAT
+TCCGAAAAGGAAGGCACTGGCCCAATTTTCATCAAAGAAGTGTCAAATGCTGATATAAGCATGGGGGATG
+TGGCTACACTGTCTGTAACTGTCATTGGCATCCCCAAACCTAAAATTCAGTGGTTCTTTAATGGAGTGCT
+ATTAACCCCTTCTGCTGACTACAAATTTGTTTTTGACGGTGATGATCATAGCCTGATCATTCTGTTCACC
+AAATTGGAGGATGAGGGAGAGTATACATGTATGGCCAGTAATGACTATGGAAAGACAATATGTAGTGCCT
+ATCTAAAAATTAATTCCAAAGGAGAGGGTCACAAAGACACTGAAACAGAATCAGCAGTGGCAAAATCTCT
+GGAAAAGCTGGGAGGTCCTTGTCCTCCTCACTTCCTTAAGGAGTTAAAACCAATTCGCTGTGCTCAAGGG
+CTTCCTGCCATCTTTGAGTACACAGTGGTTGGAGAGCCTGCCCCTACTGTTACATGGTTCAAAGAAAACA
+AGCAGCTTTGCACCAGTGTTTATTACACTATCATTCATAACCCTAATGGCTCTGGAACTTTCATTGTCAA
+TGACCCTCAGAGGGAAGACAGTGGCCTCTATATCTGTAAAGCAGAGAATATGTTGGGTGAGTCCACCTGT
+GCAGCAGAGCTGCTTGTGCTTCTGGAAGACACAGACATGACTGATACCCCCTGCAAAGCAAAGTCCACAC
+CAGAGGCTCCTGAGGATTTTCCACAGACACCCTTAAAGGGTCCCGCAGTTGAAGCACTTGACTCAGAGCA
+GGAAATTGCAACGTTTGTAAAAGACACCATTTTGAAAGCTGCTTTAATTACAGAAGAAAACCAGCAACTA
+TCTTATGAGCATATTGCTAAAGCCAATGAATTGAGCAGTCAGCTTCCTTTGGGAGCTCAGGAATTGCAAT
+CCATTTTGGAGCAAGACAAGCTCACTCCTGAAAGCACCAGGGAATTTCTTTGCATCAATGGCAGTATTCA
+CTTTCAGCCTCTCAAGGAACCATCTCCCAACCTACAGCTGCAGATTGTACAGTCCCAGAAAACCTTCTCC
+AAAGAAGGTATTCTAATGCCTGAAGAGCCTGAGACACAGGCAGTTCTATCAGATACCGAGAAAATCTTCC
+CAAGTGCCATGTCCATAGAACAAATTAATTCATTAACAGTTGAGCCTCTGAAAACTTTATTAGCTGAACC
+TGAAGGGAATTATCCACAGTCTTCAATAGAACCTCCAATGCATTCTTATCTAACCTCTGTGGCTGAGGAA
+GTACTTTCACCAAAAGAAAAGACAGTATCTGACACCAACAGAGAGCAAAGAGTGACTCTTCAAAAGCAAG
+AGGCACAAAGTGCGCTCATCTTGAGTCAGAGCTTAGCTGAGGGACACGTGGAGAGTCTCCAGAGTCCTGA
+TGTCATGATCTCTCAGGTAAACTATGAGCCCCTAGTCCCTTCAGAACACTCATGCACAGAAGGAGGTAAA
+ATTTTGATAGAAAGTGCAAATCCACTGGAAAATGCAGGGCAAGATTCTGCGGTCAGAATTGAGGAAGGCA
+AGTCCTTAAGATTTCCACTAGCACTTGAAGAAAAGCAGGTACTGCTCAAAGAAGAGCATTCTGACAACGT
+GGTGATGCCCCCAGACCAAATCATTGAGTCTAAAAGAGAGCCCGTGGCAATAAAGAAAGTGCAGGAGGTA
+CAGGGAAGGGACCTTCTTTCTAAGGAAAGCTTGCTTTCTGGTATTCCAGAAGAGCAGAGATTAAACCTGA
+AAATTCAAATCTGCCGGGCTTTGCAAGCAGCCGTGGCCAGCGAGCAGCCAGGTCTTTTCTCTGAGTGGCT
+AAGAAATATTGAAAAGGTGGAGGTCGAGGCTGTAAACATCACCCAAGAGCCCAGACACATCATGTGCATG
+TACCTTGTTACTTCGGCAAAGTCTGTAACAGAAGAAGTAACCATCATTATTGAAGATGTTGATCCTCAAA
+TGGCTAACCTGAAAATGGAACTTAGGGATGCTTTGTGTGCTATTATATATGAGGAAATAGACATCCTAAC
+AGCTGAGGGTCCTAGAATTCAGCAAGGAGCCAAAACAAGTTTGCAAGAAGAAATGGATTCTTTTTCAGGT
+TCACAGAAGGTTGAACCCATTACTGAACCAGAAGTTGAATCTAAATATCTGATCTCAACTGAAGAGGTCA
+GTTATTTTAACGTGCAAAGTAGGGTTAAATATTTGGATGCCACACCTGTCACTAAAGGGGTTGCTTCAGC
+TGTTGTCTCTGACGAAAAACAAGATGAGAGTCTGAAACCATCAGAGGAAAAAGAGGAGTCTTCCTCTGAA
+AGTGGTACTGAGGAGGTTGCTACAGTAAAGATACAGGAAGCTGAGGGTGGCTTAATCAAAGAGGATGGCC
+CCATGATACATACACCTTTAGTGGACACTGTTTCTGAGGAAGGTGATATTGTACACCTCACAACATCCAT
+AACAAATGCTAAAGAGGTGAATTGGTATTTTGAGAATAAACTGGTGCCTTCAGATGAAAAGTTCAAGTGT
+TTACAAGATCAAAATACATATACGCTAGTCATCGACAAAGTAAATACCGAAGACCATCAAGGAGAGTATG
+TCTGTGAGGCCTTGAATGACAGCGGAAAAACAGCAACTTCAGCCAAACTCACTGTAGTAAAAAGAGCTGC
+CCCAGTGATCAAGAGGAAAATCGAACCCCTGGAAGTAGCACTGGGCCACCTAGCCAAATTCACCTGTGAG
+ATCCAAAGTGCTCCCAATGTCCGGTTCCAGTGGTTTAAAGCTGGCCGAGAAATTTATGAGAGTGACAAGT
+GTTCTATTCGATCTTCAAAGTATATCTCCAGCCTTGAAATCCTGAGAACCCAGGTGGTTGACTGCGGCGA
+GTATACATGCAAAGCTTCCAATGAGTATGGCAGTGTCAGCTGTACAGCCACACTAACTGTGACAGTGCCT
+GGAGGTGAAAAGAAAGTTCGCAAATTACTTCCGGAACGTAAACCTGAACCAAAGGAAGAAGTTGTTCTGA
+AAAGCGTTCTAAGAAAAAGACCTGAAGAAGAAGAACCTAAAGTAGAACCTAAAAAACTAGAAAAAGTTAA
+AAAACCTGCAGTACCAGAACCACCACCTCCAAAACCTGTTGAAGAGGTTGAAGTACCTACTGTTACAAAA
+AGGGAAAGGAAGATTCCTGAACCAACAAAAGTGCCTGAAATCAAGCCAGCAATACCTCTCCCTGCACCTG
+AACCGAAACCAAAGCCCGAAGCAGAAGTGAAAACAATCAAACCACCTCCTGTGGAACCTGAACCAACCCC
+CATCGCTGCCCCAGTAACAGTGCCAGTGGTTGGAAAGAAAGCAGAAGCCAAAGCACCTAAGGAAGAGGCT
+GCCAAGCCAAAAGGTCCTATCAAAGGTGTACCCAAAAAGACTCCTTCACCAATAGAAGCCGAAAGGAGAA
+AGTTAAGGCCAGGAAGTGGTGGAGAGAAACCTCCTGATGAAGCCCCGTTCACCTACCAGCTAAAGGCTGT
+GCCACTGAAGTTTGTGAAAGAAATCAAAGACATCATCTTGACAGAATCAGAGTTCGTTGGCTCTTCAGCA
+ATCTTTGAATGTTTGGTCTCCCCTTCCACTGCAATTACAACCTGGATGAAAGACGGTAGCAATATCCGTG
+AGAGTCCCAAGCACAGGTTTATTGCAGATGGTAAAGACAGAAAGCTGCACATCATTGATGTTCAACTTTC
+CGATGCTGGTGAATACACCTGTGTTTTACGTTTGGGAAACAAAGAAAAGACCTCCACGGCTAAACTTGTT
+GTAGAAGAACTTCCTGTGCGTTTTGTAAAAACACTGGAAGAGGAAGTCACAGTGGTCAAAGGACAGCCAT
+TGTACTTGAGCTGCGAGTTAAACAAAGAGCGTGACGTGGTCTGGAGGAAGGATGGCAAGATTGTGGTGGA
+GAAACCTGGCCGAATTGTGCCAGGCGTCATTGGCTTGATGCGGGCTCTGACCATCAACGATGCAGATGAC
+ACAGATGCTGGAACATACACAGTTACTGTGGAAAACGCCAACAACCTGGAGTGTTCATCTTGCGTAAAAG
+TAGTAGAAGTCATTAGAGATTGGCTGGTGAAACCTATACGAGACCAGCATGTGAAACCCAAGGGGACAGC
+TATTTTTGCCTGTGATATAGCAAAAGATACTCCAAACATTAAGTGGTTCAAAGGATATGATGAAATCCCT
+GCGGAACCAAATGATAAGACTGAAATACTGAGAGATGGAAATCATCTGTACCTCAAAATTAAGAATGCTA
+TGCCAGAAGATATTGCTGAGTATGCAGTGGAAATTGAAGGAAAAAGATACCCTGCAAAGCTGACACTTGG
+AGAGCGTGAAGTTGAACTGCTTAAACCAATAGAGGACGTTACCATTTATGAGAAAGAAAGTGCAAGCTTT
+GATGCAGAAATCTCAGAGGCAGACATTCCTGGACAATGGAAACTGAAAGGAGAACTTCTAAGGCCCTCAC
+CTACTTGTGAAATCAAAGCAGAAGGTGGAAAACGCTTCTTAACTTTGCACAAAGTCAAACTGGACCAAGC
+TGGTGAAGTCCTCTACCAGGCCCTTAATGCAATTACAACTGCCATTTTGACAGTAAAAGAAATCGAACTT
+GACTTTGCTGTGCCCCTGAAGGATGTCACTGTTCCAGAAAGGCGACAGGCTCGATTCGAATGTGTCCTCA
+CCCGAGAGGCAAATGTTATATGGTCCAAAGGACCTGATATAATTAAGTCATCTGACAAATTTGATATCAT
+CGCTGATGGAAAGAAACATATTCTTGTTATTAATGATTCTCAATTTGATGATGAAGGGGTCTATACTGCT
+GAGGTGGAGGGCAAGAAGACCTCAGCTCGGTTGTTTGTCACAGGTATAAGACTGAAATTCATGTCACCTC
+TTGAAGATCAAACAGTAAAAGAAGGTGAAACAGCAACTTTTGTTTGTGAACTTTCTCATGAAAAAATGCA
+TGTAGTCTGGTTCAAAAATGATGCCAAACTCCATACAAGCAGAACAGTACTCATCTCTTCTGAGGGCAAG
+ACTCACAAATTGGAAATGAAAGAAGTGACATTGGATGATATATCTCAGATAAAAGCTCAAGTCAAGGAGC
+TGAGCTCCACAGCACAGCTGAAGGTCTTAGAGGCCGATCCCTACTTCACTGTGAAATTACATGACAAAAC
+TGCAGTGGAGAAGGATGAGATTACTTTGAAGTGTGAAGTGAGCAAAGATGTACCAGTGAAATGGTTCAAA
+GATGGTGAAGAGATTGTCCCTTCACCCAAATATTCTATCAAGGCAGATGGCCTGCGCCGCATCTTAAAAA
+TCAAAAAGGCGGACCTTAAAGATAAAGGCGAATATGTGTGTGACTGTGGCACAGACAAGACCAAGGCAAA
+TGTTACTGTTGAGGCTCGACTAATAAAAGTGGAAAAGCCTCTGTACGGAGTAGAGGTGTTTGTTGGTGAA
+ACAGCCCACTTTGAAATTGAACTTTCTGAACCTGATGTTCACGGCCAGTGGAAGCTGAAAGGACAGCCTT
+TGACAGCTTCCCCTGACTGTGAAATCATTGAGGATGGAAAGAAGCATATTCTGATCCTTCATAACTGTCA
+GCTGGGTATGACAGGAGAGGTTTCCTTCCAGGCTGCTAATGCCAAATCTGCAGCCAATCTGAAAGTGAAA
+GAATTGCCTCTTATCTTCATCACACCTCTCAGTGATGTTAAAGTCTTCGAGAAAGATGAGGCTAAGTTTG
+AGTGTGAAGTATCCAGGGAGCCCAAAACATTCCGTTGGCTAAAAGGAACCCAGGAAATCACAGGTGATGA
+CAGATTTGAGCTTATAAAGGATGGCACTAAGCATTCAATGGTGATCAAGTCAGCTGCTTTTGAAGATGAA
+GCAAAATACATGTTTGAAGCTGAAGATAAGCACACAAGTGGCAAACTGATCATTGAAGGAATCCGGCTCA
+AATTCCTCACCCCTCTCAAAGATGTAACTGCCAAAGAGAAGGAAAGTGCTGTATTTACTGTGGAGTTATC
+TCATGATAACATCCGAGTTAAATGGTTCAAGAATGACCAGCGCCTACACACCACCAGGTCGGTCTCAATG
+CAAGACGAAGGGAAAACTCATTCGATCACATTCAAAGACCTGTCTATTGATGACACCTCCCAAATTAGAG
+TAGAAGCTATGGGGATGAGTTCAGAAGCTAAACTCACTGTGCTTGAGGGAGACCCATATTTTACAGGAAA
+ACTTCAAGATTATACTGGTGTAGAGAAAGATGAAGTTATTCTACAGTGTGAAATTAGCAAAGCAGATGCA
+CCAGTGAAATGGTTTAAGGATGGGAAGGAAATAAAGCCATCCAAAAATGCTGTTATTAAGGCAGATGGCA
+AGAAACGCATGCTAATCCTAAAGAAAGCCTTGAAATCAGATATTGGACAGTACACCTGTGACTGTGGGAC
+AGATAAGACCTCAGGAAAACTTGACATTGAGGATCGGGAAATTAAACTGGTGCGACCCCTGCACAGTGTG
+GAGGTGATGGAGACTGAGACAGCACGCTTTGAAACCGAAATCTCTGAAGATGATATCCACGCCAACTGGA
+AACTCAAGGGAGAGGCCCTACTCCAAACACCTGATTGTGAAATTAAGGAAGAAGGCAAAATACACTCCCT
+TGTTTTGCACAACTGTCGCCTGGACCAGACGGGTGGGGTGGATTTCCAAGCTGCCAATGTTAAATCTAGT
+GCCCACCTCCGAGTTAAGCCACGAGTAATTGGTCTTCTGAGGCCTTTAAAGGATGTCACCGTGACTGCAG
+GGGAAACAGCCACCTTCGACTGCGAGCTCTCCTACGAAGATATCCCAGTGGAATGGTATCTCAAAGGGAA
+GAAACTAGAGCCCAGCGATAAGGTGGTCCCACGTTCAGAAGGAAAAGTTCATACACTTACTCTGAGGGAT
+GTAAAGTTAGAAGATGCTGGGGAAGTCCAACTAACAGCAAAAGATTTCAAAACTCACGCCAACCTCTTTG
+TGAAAGAACCCCCAGTTGAATTCACTAAGCCTCTTGAGGACCAGACGGTCGAAGAGGGAGCCACTGCAGT
+GCTGGAGTGTGAAGTCTCCAGAGAAAATGCTAAGGTGAAATGGTTCAAAAATGGGACAGAAATCCTCAAA
+AGCAAGAAGTATGAAATTGTTGCTGATGGCAGGGTCAGAAAACTTGTTATACATGACTGTACCCCAGAGG
+ATATTAAAACATACACTTGTGATGCTAAGGATTTTAAGACTTCCTGTAACCTGAATGTCGTGCCTCCTCA
+TGTGGAATTCTTAAGACCACTCACCGACCTTCAAGTTAGAGAAAAAGAAATGGCTCGATTTGAGTGTGAA
+CTTTCCCGAGAAAATGCTAAGGTTAAGTGGTTTAAAGATGGTGCTGAAATTAAAAAGGGCAAAAAATATG
+ACATCATATCCAAGGGAGCAGTGCGCATTCTTGTCATCAACAAATGTCTACTGGATGATGAAGCTGAATA
+TTCCTGTGAAGTAAGGACAGCGAGAACTTCTGGCATGCTGACAGTTCTGGAAGAAGAAGCTGTCTTTACC
+AAAAATCTTGCCAACATTGAAGTTAGTGAAACAGACACTATAAAACTGGTTTGTGAAGTCTCCAAACCTG
+GCGCAGAAGTGATTTGGTATAAAGGGGATGAGGAGATCATTGAAACAGGAAGATATGAAATACTGACTGA
+AGGACGGAAGAGAATCCTGGTCATTCAGAACGCTCACCTTGAGGATGCTGGCAACTACAACTGTCGACTC
+CCAAGCTCTCGAACCGATGGCAAAGTCAAAGTACATGAACTGGCTGCTGAATTTATCTCAAAGCCTCAAA
+ACCTTGAAATACTTGAAGGAGAAAAGGCTGAATTTGTCTGCTCTATATCAAAAGAAAGCTTTCCAGTCCA
+GTGGAAGAGGGATGATAAGACACTTGAATCTGGAGATAAATATGACGTTATTGCTGATGGTAAAAAGAGG
+GTCCTAGTTGTGAAAGATGCCACATTACAAGATATGGGCACTTACGTTGTCATGGTAGGGGCCGCCAGAG
+CAGCAGCTCACTTGACAGTCATTGAAAAACTCAGGATCGTAGTTCCTCTTAAGGACACCCGGGTGAAGGA
+ACAACAGGAAGTTGTCTTCAACTGTGAAGTCAATACTGAAGGTGCCAAAGCCAAATGGTTCAGAAATGAA
+GAAGCTATATTTGATAGTTCAAAATACATCATTCTCCAAAAAGACCTAGTCTACACCCTCAGAATTAGAG
+ATGCACACTTAGATGACCAAGCCAACTATAATGTGTCTTTGACCAATCACAGAGGTGAAAATGTTAAAAG
+TGCAGCCAATCTAATAGTAGAAGAGGAAGACCTTAGGATTGTTGAGCCTCTTAAAGATATTGAAACAATG
+GAGAAGAAATCTGTCACATTCTGGTGCAAGGTGAATCGTCTCAATGTAACACTGAAGTGGACCAAAAATG
+GTGAAGAAGTGCCTTTTGACAACCGTGTCTCATACAGAGTTGATAAGTACAAGCACATGTTAACCATTAA
+AGACTGTGGCTTCCCAGATGAAGGTGAATACATTGTCACTGCTGGACAAGATAAATCTGTTGCTGAGCTT
+CTCATCATAGAAGCCCCGACAGAATTTGTGGAACACTTGGAAGATCAGACAGTCACTGAGTTCGATGACG
+CTGTCTTCTCCTGCCAGCTCTCCAGAGAGAAAGCCAATGTAAAATGGTACAGAAATGGGAGAGAAATCAA
+AGAAGGCAAAAAATACAAATTTGAAAAAGATGGAAGTATACACAGACTCATTATAAAAGATTGCAGGCTG
+GATGATGAGTGTGAATATGCTTGCGGGGTAGAAGACAGGAAGTCTCGTGCTAGACTTTTTGTGGAAGAAA
+TTCCTGTTGAGATCATCAGGCCTCCACAAGATATTCTTGAAGCCCCTGGTGCTGATGTTGTCTTTTTAGC
+AGAACTCAATAAAGATAAGGTGGAAGTCCAATGGCTAAGAAATAACATGGTTGTTGTCCAGGGTGATAAA
+CACCAGATGATGAGTGAAGGAAAGATACATCGACTACAGATTTGTGATATTAAGCCCCGTGACCAGGGTG
+AATACAGATTTATTGCCAAAGACAAAGAAGCCAGAGCTAAGCTTGAACTGGCAGCTGCACCAAAAATCAA
+GACAGCTGACCAAGACCTTGTGGTTGATGTTGGCAAGCCTCTGACAATGGTGGTGCCATATGATGCCTAC
+CCCAAAGCAGAAGCTGAATGGTTTAAAGAAAATGAACCTTTATCTACAAAAACCATTGATACTACGGCTG
+AACAAACTTCTTTCAGAATTTTAGAAGCCAAGAAAGGAGACAAAGGGAGGTATAAAATTGTGCTTCAGAA
+CAAACATGGAAAAGCAGAAGGATTCATCAATTTAAAAGTTATTGATGTTCCTGGGCCAGTACGTAACTTA
+GAAGTGACAGAAACATTTGATGGTGAAGTGAGCCTTGCTTGGGAAGAACCTTTAACTGATGGTGGAAGCA
+AAATCATAGGTTACGTTGTTGAAAGACGTGACATTAAGAGAAAGACCTGGGTTCTGGCCACAGACCGTGC
+AGAGAGTTGTGAGTTTACTGTCACTGGTCTACAGAAAGGAGGAGTTGAGTACCTATTCCGTGTGAGTGCA
+AGAAACAGAGTTGGCACTGGTGAGCCAGTAGAAACTGACAATCCTGTAGAAGCAAGGAGTAAATATGATG
+TTCCAGGCCCTCCTTTGAATGTAACCATCACTGATGTGAATCGATTTGGTGTCTCACTGACATGGGAACC
+ACCAGAGTATGATGGAGGTGCTGAGATCACAAACTACGTCATTGAATTAAGAGACAAGACTTCTATCAGG
+TGGGATACTGCCATGACTGTGAGAGCTGAAGACCTGTCTGCAACTGTTACTGATGTGGTAGAAGGACAGG
+AGTACAGTTTCCGAGTGAGAGCCCAAAATCGAATTGGAGTTGGAAAACCAAGTGCAGCCACACCCTTCGT
+CAAAGTTGCTGATCCAATTGAGAGACCAAGTCCTCCTGTAAACCTAACTTCCTCAGATCAGACTCAGTCA
+TCAGTTCAGCTCAAATGGGAACCTCCTCTGAAAGATGGAGGAAGCCCAATATTAGGCTATATAATTGAGC
+GATGCGAAGAAGGAAAAGATAATTGGATTCGTTGCAATATGAAACTTGTCCCTGAACTGACTTACAAGGT
+TACCGGATTGGAAAAAGGAAATAAATATTTATATAGAGTATCTGCAGAAAATAAAGCTGGTGTTTCAGAT
+CCATCTGAAATTCTTGGTCCTCTCACCGCTGACGATGCATTTGTTGAACCAACAATGGATTTAAGTGCAT
+TTAAAGATGGTCTGGAAGTTATTGTCCCAAATCCTATCACGATCCTGGTTCCAAGTACAGGCTATCCAAG
+GCCAACTGCAACCTGGTGTTTTGGAGATAAAGTACTAGAAACAGGGGACCGGGTGAAAATGAAGACCTTG
+TCTGCCTATGCCGAACTTGTCATTTCTCCAAGTGAACGTTCAGACAAGGGCATTTATACACTGAAATTAG
+AAAACCGTGTGAAAACAATTTCTGGGGAAATTGATGTCAATGTAATTGCTCGCCCAAGTGCACCCAAAGA
+ATTGAAATTTGGTGATATAACCAAGGACTCAGTACATTTGACTTGGGAACCACCTGATGATGATGGAGGA
+AGTCCGTTAACTGGATACGTTGTTGAAAAACGAGAAGTCAGCCGGAAAACATGGACTAAAGTTATGGACT
+TTGTGACTGATCTAGAATTCACAGTTCCTGATCTTGTTCAAGGAAAAGAGTACTTATTTAAAGTTTGTGC
+TCGTAACAAATGTGGCCCTGGAGAACCTGCATATGTTGATGAACCTGTAAATATGTCAACTCCTGCAACG
+GTACCTGACCCACCAGAGAATGTTAAATGGAGAGATCGAACAGCCAATAGCATCTTCTTAACATGGGATC
+CACCTAAAAATGATGGTGGTTCACGCATCAAAGGATATATAGTTGAAAGATGTCCACGTGGTTCTGATAA
+ATGGGTTGCCTGTGGAGAACCTGTTGCAGAAACAAAAATGGAAGTGACAGGTCTTGAGGAAGGCAAATGG
+TATGCCTACCGCGTGAAGGCCTTAAACAGGCAGGGTGCTAGCAAACCAAGCAGACCCACAGAGGAAATCC
+AGGCTGTGGACACACAAGAGGCCCCAGAAATCTTCCTCGATGTGAAGCTCCTTGCTGGTCTCACTGTAAA
+AGCTGGGACCAAGATTGAACTTCCTGCCACCGTAACCGGAAAACCTGAACCTAAAATAACTTGGACAAAG
+GCTGATATGATTCTGAAGCAGGACAAAAGAATTACCATTGAAAATGTCCCTAAGAAATCCACAGTGACTA
+TTGTTGATAGTAAGAGAAGTGACACTGGCACATATATCATTGAGGCTGTGAATGTGTGTGGCCGGGCCAC
+TGCTGTGGTGGAAGTGAACGTCTTAGATAAACCCGGACCACCAGCTGCCTTTGACATCACAGATGTAACC
+AATGAGTCATGTCTTCTAACATGGAACCCACCACGCGATGATGGTGGATCTAAGATCACAAACTATGTTG
+TGGAGAGACGAGCAACTGATAGTGAAGTGTGGCACAAGCTCTCATCCACCGTCAAGGATACAAACTTCAA
+GGCCACCAAATTAATCCCCAATAAAGAGTACATCTTCAGAGTTGCTGCAGAAAACATGTATGGTGTTGGT
+GAACCAGTTCAGGCCTCTCCAATAACAGCCAAATATCAGTTTGATCCACCTGGTCCTCCAACTCGCCTAG
+AACCTTCTGATATCACTAAAGACGCAGTGACTCTCACATGGTGTGAGCCAGATGATGATGGTGGCAGCCC
+AATCACAGGATACTGGGTTGAAAGACTGGATCCTGATACAGATAAATGGGTTAGATGCAATAAGATGCCA
+GTAAAGGACACAACATACAGAGTGAAAGGTCTCACTAATAAGAAAAAATACAGATTCCGTGTGTTGGCTG
+AAAATCTTGCTGGACCTGGAAAACCAAGCAAATCAACTGAACCAATCTTAATAAAGGATCCCATAGATCC
+TCCATGGCCCCCTGGAAAACCAACTGTAAAAGATGTAGGCAAAACATCAGTAAGGTTGAATTGGACAAAA
+CCAGAACATGATGGAGGTGCAAAGATTGAGTCTTATGTCATTGAAATGCTGAAGACTGGAACAGATGAGT
+GGGTCAGAGTGGCGGAAGGGGTTCCCACCACTCAGCACTTGCTCCCAGGGCTCATGGAAGGACAGGAATA
+CTCATTCCGAGTTAGAGCTGTGAATAAGGCTGGGGAAAGTGAACCCAGTGAACCCAGTGACCCTGTGCTT
+TGCCGGGAGAAGCTATATCCTCCATCACCACCACGCTGGCTTGAAGTTATTAATATCACAAAAAATACAG
+CAGACCTAAAATGGACAGTTCCTGAGAAAGATGGAGGGTCCCCCATCACCAACTACATTGTGGAAAAGAG
+AGACGTCAGGCGAAAAGGCTGGCAAACAGTGGATACCACTGTCAAGGACACCAAGTGCACAGTCACCCCA
+CTGACTGAGGGCTCTTTATATGTGTTCCGAGTTGCTGCAGAAAATGCTATAGGACAAAGCGACTACACCG
+AAATTGAGGACTCTGTGCTGGCCAAAGACACCTTTACCACTCCTGGACCACCCTACGCCCTGGCAGTGGT
+TGATGTGACAAAACGACATGTTGACCTAAAGTGGGAGCCACCTAAAAATGATGGTGGAAGACCAATACAG
+AGATATGTCATTGAGAAGAAAGAAAGGTTAGGTACCCGTTGGGTGAAAGCTGGAAAGACTGCAGGACCTG
+ACTGTAACTTCAGAGTAACTGATGTCATCGAAGGAACAGAGGTCCAGTTTCAGGTTCGGGCTGAAAATGA
+AGCTGGAGTTGGCCACCCAAGTGAACCCACAGAAATCCTATCCATTGAAGATCCAACAAGTCCTCCCTCA
+CCACCCCTTGACCTACATGTGACTGATGCTGGGAGAAAACACATTGCCATTGCTTGGAAGCCTCCAGAGA
+AAAATGGTGGAAGTCCTATCATAGGATACCATGTTGAAATGTGTCCAGTAGGCACTGAGAAATGGATGAG
+AGTTAATTCTCGCCCAATAAAGGACTTGAAATTCAAGGTTGAAGAAGGTGTTGTTCCTGACAAAGAATAT
+GTCCTGAGAGTGAGAGCAGTCAATGCTATTGGTGTCAGCGAGCCATCTGAAATCTCTGAAAATGTGGTTG
+CCAAAGACCCAGACTGCAAGCCAACAATTGACCTGGAGACTCATGACATTATTGTTATTGAAGGTGAAAA
+GTTAAGCATTCCTGTTCCCTTCAGAGCTGTCCCAGTTCCAACTGTTAGTTGGCATAAAGATGGCAAAGAA
+GTTAAAGCAAGTGATAGATTAACAATGAAGAATGATCACATCTCTGCACACCTTGAAGTTCCCAAGAGTG
+TCCGTGCAGATGCCGGAATTTATACCATTACACTGGAGAATAAGCTCGGCTCAGCAACAGCCTCAATCAA
+TGTCAAAGTCATAGGCCTACCTGGACCATGCAAAGATATTAAAGCAAGTGACATTACCAAGAGTTCTTGT
+AAGTTAACTTGGGAACCTCCAGAATTTGATGGTGGAACCCCAATTCTTCATTATGTCCTGGAGCGCAGAG
+AAGCTGGGAGGAGAACATATATACCAGTCATGTCTGGTGAGAACAAACTGTCATGGACTGTGAAGGATCT
+CATACCAAATGGTGAATACTTCTTCCGTGTTAAAGCAGTCAACAAGGTTGGTGGAGGAGAATATATTGAA
+CTGAAAAATCCAGTCATTGCTCAAGATCCAAAGCAACCCCCTGATCCACCTGTAGATGTAGAGGTTCATA
+ATCCTACAGCGGAGGCAATGACTATTACATGGAAGCCACCTTTGTATGATGGAGGGAGCAAGATAATGGG
+CTACATCATAGAGAAGATTGCTAAGGGTGAAGAAAGGTGGAAGAGATGCAATGAACACCTGGTACCAATC
+CTGACCTATACAGCAAAAGGACTTGAAGAGGGGAAAGAGTACCAATTCCGTGTGCGAGCAGAGAACGCCG
+CGGGTATTAGTGAACCTTCTCGGGCTACTCCTCCAACCAAAGCTGTAGATCCCATTGATGCCCCCAAAGT
+CATTCTGAGAACAAGCCTAGAAGTGAAACGAGGTGATGAAATAGCACTTGATGCAAGTATTTCTGGATCA
+CCTTACCCAACTATTACATGGATAAAGGATGAAAATGTTATTGTACCAGAGGAAATTAAGAAGCGTGCAG
+CACCCTTGGTTAGGAGAAGGAAGGGTGAAGTTCAAGAAGAAGAACCATTTGTCCTGCCTCTGACACAGCG
+TTTGAGTATTGACAACAGCAAAAAGGGAGAATCTCAGCTACGCGTCCGAGATTCTCTCCGACCTGACCAT
+GGTCTGTATATGATCAAAGTTGAAAATGACCACGGTATTGCAAAAGCTCCTTGTACTGTCAGTGTGTTAG
+ATACACCGGGACCACCAATCAACTTTGTATTTGAAGATATCAGAAAGACCTCAGTCCTTTGTAAATGGGA
+ACCACCCCTTGATGATGGTGGCAGTGAAATCATAAACTACACTTTGGAAAAGAAAGACAAGACAAAACCC
+GACTCAGAATGGATTGTTGTCACTTCAACACTTAGACATTGCAAATATTCAGTAACAAAACTGATTGAAG
+GAAAAGAGTACCTCTTCCGTGTAAGAGCTGAAAACAGATTTGGGCCAGGTCCACCATGTGTTTCAAAGCC
+ACTTGTGGCTAAAGATCCATTTGGACCACCTGATGCACCAGATAAGCCCATTGTGGAAGATGTTACCAGC
+AACAGTATGCTAGTGAAATGGAATGAACCAAAAGATAATGGAAGCCCCATTTTGGGTTACTGGCTTGAAA
+AACGTGAAGTTAACAGTACACATTGGTCTCGTGTCAACAAAAGCCTTCTGAATGCCTTGAAAGCCAATGT
+AGATGGCTTATTAGAAGGACTCACCTATGTCTTCAGAGTATGTGCTGAAAATGCAGCTGGACCTGGAAAG
+TTCAGTCCACCTTCAGATCCCAAAACAGCACATGATCCAATCTCTCCTCCTGGGCCACCTATCCCAAGAG
+TCACTGACACAAGCTCTACAACTATTGAACTAGAATGGGAACCCCCAGCTTTCAATGGTGGTGGGGAAAT
+TGTTGGCTATTTTGTTGATAAGCAGTTGGTTGGCACAAATGAATGGTCACGCTGCACAGAGAAGATGATC
+AAGGTCCGTCAGTACACCGTCAAAGAAATCCGAGAGGGTGCTGATTACAAACTTCGGGTGAGTGCTGTCA
+ATGCCGCAGGGGAAGGACCGCCTGGAGAAACACAACCTGTTACTGTGGCTGAACCACAAGAGCCTCCAGC
+TGTGGAACTGGATGTTTCTGTCAAGGGTGGAATACAAATAATGGCTGGGAAGACTCTTAGAATTCCAGCT
+GTGGTGACTGGTCGCCCTGTACCTACAAAAGTATGGACCAAAGAAGAAGGGGAGCTGGATAAAGACCGTG
+TTGTAATAGACAACGTTGGAACCAAATCTGAACTAATTATCAAGGATGCACTGCGAAAAGACCATGGCAG
+ATATGTGATTACAGCTACAAATAGCTGTGGTTCCAAATTTGCAGCAGCCAGGGTAGAAGTTTTTGATGTC
+CCTGGTCCAGTTCTTGACTTAAAACCTGTTGTAACAAACAGAAAAATGTGTCTACTTAACTGGTCTGATC
+CAGAAGATGATGGAGGAAGTGAAATAACAGGCTTTATCATTGAAAGAAAAGATGCCAAGATGCATACTTG
+GAGACAACCAATAGAGACTGAGAGATCTAAATGTGACATCACAGGTCTGCTTGAGGGACAAGAATATAAG
+TTCCGTGTTATTGCCAAGAACAAGTTTGGCTGTGGCCCTCCTGTTGAAATAGGACCAATTCTTGCAGTTG
+ATCCACTAGGTCCTCCAACATCTCCAGAGAGGCTCACATACACTGAAAGGACAAAGTCCACTATCACACT
+TGACTGGAAAGAGCCCCGCAGTAATGGTGGCAGTCCCATCCAAGGATATATCATTGAAAAACGGCGTCAT
+GACAAACCTGACTTTGAAAGAGTTAACAAGCGACTCTGCCCAACCACATCTTTTCTGGTTGAAAATCTTG
+ATGAACACCAAATGTATGAGTTCCGTGTCAAAGCTGTCAATGAAATTGGTGAAAGTGAACCATCCCTACC
+TCTTAATGTAGTCATACAAGATGATGAAGTGCCTCCAACTATTAAGTTGCGTCTGAGTGTTCGAGGAGAC
+ACTATCAAAGTTAAGGCAGGAGAGCCTGTCCACATCCCTGCAGATGTGACAGGCCTTCCAATGCCTAAGA
+TTGAATGGTCCAAAAATGAAACTGTAATTGAAAAACCCACTGATGCACTTCAGATAACCAAGGAAGAGGT
+ATCCCGAAGTGAGGCAAAAACTGAGCTTAGCATTCCCAAAGCGGTCCGGGAGGACAAAGGCACTTACACA
+GTTACTGCTTCCAATCGCCTTGGCTCAGTGTTCCGAAATGTTCACGTTGAAGTATATGACCGCCCATCCC
+CACCAAGAAATCTTGCTGTTACTGACATTAAAGCTGAATCTTGCTACTTGACATGGGATGCCCCTCTTGA
+TAATGGTGGCAGTGAAATCACCCATTATGTTATTGACAAACGTGATGCAAGTAGGAAGAAAGCAGAATGG
+GAGGAAGTCACCAACACTGCTGTAGAGAAAAGATATGGGATCTGGAAACTTATCCCCAATGGTCAGTATG
+AGTTCCGAGTCAGGGCAGTGAATAAATATGGAATCAGTGATGAGTGCAAATCAGATAAAGTAGTCATTCA
+AGATCCTTATCGCCTTCCTGGACCTCCAGGAAAACCAAAAGTTTTGGCACGCACCAAAGGATCAATGCTA
+GTGAGCTGGACTCCTCCTTTGGACAATGGTGGCTCTCCAATTACTGGCTACTGGCTGGAGAAAAGAGAAG
+AGGGAAGTCCTTATTGGTCACGTGTTAGCCGAGCACCAATAACCAAAGTGGGATTGAAAGGCGTGGAATT
+TAATGTTCCTCGTTTGCTTGAAGGCGTTAAATACCAGTTCAGAGCCATGGCAATAAATGCTGCAGGAATT
+GGTCCTCCCAGTGAACCATCAGATCCAGAGGTTGCAGGAGATCCCATATTTCCACCGGGGCCACCTTCTT
+GCCCAGAAGTTAAAGATAAAACGAAGTCAAGCATCTCACTAGGATGGAAACCTCCAGCCAAAGATGGTGG
+CAGCCCAATCAAAGGATACATTGTAGAAATGCAAGAAGAAGGTACTACTGACTGGAAAAGAGTAAATGAA
+CCAGACAAACTTATAACTACCTGTGAATGTGTGGTGCCTAATCTGAAAGAGCTCAGGAAGTACAGATTCA
+GAGTGAAAGCTGTCAATGAAGCTGGTGAATCTGAACCAAGTGATACAACTGGGGAGATCCCTGCCACTGA
+TATTCAAGAGGAACCAGAAGTTTTCATTGACATTGGAGCACAGGACTGTCTGGTTTGTAAAGCTGGCTCA
+CAGATTAGGATTCCTGCTGTCATCAAGGGACGCCCAACACCAAAATCATCTTGGGAATTTGATGGAAAGG
+CAAAGAAAGCAATGAAGGATGGAGTTCATGACATACCCGAAGATGCACAGCTGGAGACTGCTGAAAACTC
+CTCAGTAATTATTATTCCGGAGTGTAAACGATCTCATACAGGCAAATACAGCATCACAGCCAAGAATAAA
+GCAGGACAAAAGACTGCAAATTGCAGAGTTAAAGTCATGGATGTACCAGGCCCACCCAAAGATCTGAAAG
+TCAGTGATATCACAAGGGGTAGTTGCAGACTTTCATGGAAGATGCCAGACGACGATGGAGGAGACAGGAT
+CAAAGGCTATGTTATTGAGAAGAGGACTATTGATGGAAAAGCCTGGACCAAAGTCAATCCAGACTGTGGA
+AGCACCACATTTGTAGTGCCTGATCTCCTCTCTGAACAGCAATATTTCTTCCGTGTGCGAGCAGAAAACC
+GTTTTGGTATTGGCCCACCTGTGGAAACCATTCAGAGGACCACTGCCAGAGATCCGATATATCCTCCTGA
+TCCTCCTATTAAACTCAAGATTGGCCTCATCACAAAGAACACAGTGCATCTGTCATGGAAACCCCCGAAG
+AATGATGGGGGCTCCCCTGTTACCCACTATATTGTTGAGTGCCTTGCATGGGACCCTACTGGGACAAAGA
+AAGAAGCCTGGAGGCAGTGCAATAAGCGTGATGTGGAAGAACTGCAATTTACTGTTGAAGACCTAGTAGA
+AGGTGGGGAATATGAATTCCGAGTCAAAGCTGTCAATGCTGCAGGAGTCAGCAAGCCTTCAGCCACTGTT
+GGGCCCTGTGACTGTCAAAGACCAGACATGCCACCATCAATTGATCTAAAAGAATTCATGGAGGTTGAAG
+AAGGAACCAATGTTAACATTGTGGCCAAAATTAAAGGTGTGCCATTCCCGACACTAACCTGGTTTAAAGC
+TCCTCCAAAGAAGCCTGATAACAAAGAACCTGTTCTCTATGACACCCATGTCAACAAACTGGTGGTAGAT
+GATACTTGCACTTTAGTTATTCCGCAGTCTCGCAGGAGTGACACTGGCTTATATACCATCACAGCTGTAA
+ATAATCTGGGAACAGCATCAAAGGAGATGAGACTGAATGTCCTGGGTCGTCCTGGCCCTCCAGTGGGACC
+CATAAAATTTGAATCTGTTTCAGCAGATCAAATGACACTATCTTGGTTTCCACCTAAAGATGATGGTGGG
+TCTAAGATTACAAACTATGTAATTGAGAAAAGAGAAGCTAACAGGAAGACATGGGTCCATGTCTCCAGTG
+AACCTAAGGAGTGCACGTACACGATTCCCAAATTGCTAGAAGGCCATGAATATGTATTCCGAATCATGGC
+CCAGAATAAATATGGCATTGGAGAACCTCTTGACAGTGAACCTGAAACAGCAAGAAACCTCTTCTCTGTC
+CCTGGAGCACCAGATAAACCAACAGTTAGCAGCGTGACTCGTAACTCCATGACTGTCAACTGGGAAGAGC
+CAGAATATGATGGAGGCTCTCCTGTGACAGGGTACTGGCTGGAAATGAAAGACACCACTTCAAAGAGATG
+GAAGAGAGTTAACCGAGATCCTATCAAAGCCATGACTTTGGGTGTTTCTTATAAAGTGACTGGTCTTATT
+GAAGGTTCCGACTATCAATTCCGGGTATATGCAATCAATGCTGCTGGCGTGGGTCCAGCAAGTCTGCCAT
+CAGACCCAGCGACTGCTAGAGATCCAATTGCCCCTCCTGGTCCTCCATTTCCCAAAGTGACAGATTGGAC
+TAAATCATCTGCAGATCTGGAGTGGTCTCCCCCACTAAAAGATGGTGGATCCAAAGTAACTGGATACATC
+GTTGAATATAAAGAAGAAGGAAAAGAAGAATGGGAAAAGGGTAAAGATAAAGAAGTGAGAGGAACAAAGC
+TCGTTGTGACAGGATTAAAGGAAGGAGCATTCTACAAATTTAGAGTTAGTGCAGTCAACATTGCTGGCAT
+TGGAGAACCTGGAGAGGTCACAGATGTCATTGAAATGAAGGACAGACTTGTTTCACCTGACCTTCAGCTA
+GATGCCAGTGTCAGAGATAGAATTGTTGTCCATGCTGGAGGGGTGATCCGAATCATTGCCTATGTGTCTG
+GAAAGCCTCCTCCAACCGTCACCTGGAACATGAATGAAAGAACCTTACCTCAAGAAGCCACCATTGAGAC
+CACAGCCATTAGCTCATCCATGGTCATCAAGAACTGCCAGAGGAGCCATCAAGGCGTCTATTCTCTTCTT
+GCCAAAAATGAAGCCGGAGAAAGAAAGAAGACAATTATTGTTGATGTATTAGATGTTCCAGGTCCCGTTG
+GAACACCATTCCTAGCTCACAACCTAACCAATGAGTCCTGCAAACTGACATGGTTTTCTCCAGAAGATGA
+TGGAGGCTCTCCAATCACCAATTATGTCATTGAAAAGCGTGAATCTGACCGCAGAGCATGGACCCCAGTG
+ACATATACAGTTACCCGACAAAATGCTACTGTCCAGGGTCTCATTCAAGGAAAAGCCTACTTTTTCCGAA
+TTGCGGCTGAAAATAGTATTGGCATGGGTCCATTTGTTGAGACATCAGAGGCACTTGTTATCAGAGAGCC
+AATAACTGTACCAGAGCGTCCTGAAGACCTGGAAGTCAAAGAAGTTACTAAAAATACTGTAACTTTGACT
+TGGAATCCTCCTAAGTATGATGGTGGGTCAGAAATTATTAACTATGTCCTAGAAAGTCGGCTCATTGGGA
+CTGAGAAGTTCCACAAAGTTACAAATGACAACTTGCTTAGCAGAAAATACACTGTTAAAGGCTTAAAAGA
+AGGTGATACCTATGAGTACCGTGTCAGTGCTGTCAACATTGTTGGACAAGGCAAACCATCATTTTGCACC
+AAACCAATTACTTGCAAGGATGAGCTGGCACCCCCAACGCTTCACCTCGACTTCAGAGATAAGCTCACGA
+TTCGAGTTGGTGAAGCTTTTGCCCTCACTGGCCGTTACTCAGGCAAACCAAAGCCTAAGGTTTCCTGGTT
+CAAAGATGAAGCTGATGTGCTGGAAGATGATCGCACTCATATAAAGACTACACCAGCAACACTTGCTTTA
+GAGAAGATCAAGGCCAAACGTTCAGATTCCGGCAAATACTGTGTGGTTGTGGAGAACAGTACAGGCTCTA
+GGAAAGGTTTCTGTCAAGTTAATGTTGTTGACCGTCCTGGACCACCAGTAGGACCAGTTAGTTTTGATGA
+GGTGACCAAAGATTACATGGTTATCTCTTGGAAGCCTCCTTTAGATGATGGAGGCAGTAAAATCACCAAT
+TATATTATTGAGAAGAAGGAAGTGGGTAAAGACGTCTGGATGCCAGTGACATCTGCAAGTGCTAAAACAA
+CATGCAAAGTTTCTAAACTACTTGAAGGAAAAGATTATATTTTCCGGATACATGCTGAAAATCTGTATGG
+AATAAGTGATCCTCTGGTGTCTGATTCAATGAAAGCCAAAGATCGTTTCAGGGTTCCTGATGCACCTGAT
+CAGCCAATTGTTACAGAAGTTACCAAAGACTCTGCATTAGTAACCTGGAATAAGCCACATGATGGAGGAA
+AACCCATCACAAACTACATCCTGGAAAAGAGAGAAACTATGTCTAAACGATGGGCTAGAGTTACCAAAGA
+TCCTATTCATCCATACACTAAATTTAGGGTTCCTGATCTTCTAGAAGGATGTCAGTATGAATTCCGGGTT
+TCTGCAGAAAATGAAATTGGTATTGGAGATCCAAGCCCACCATCCAAACCAGTCTTTGCTAAAGATCCAA
+TTGCTAAACCAAGTCCACCTGTTAATCCTGAAGCAATAGATACAACATGCAATTCAGTCGATCTAACTTG
+GCAGCCACCACGTCATGATGGTGGGAGCAAGATTCTGGGTTATATTGTTGAGTACCAGAAAGTTGGAGAT
+GAAGAGTGGAGAAGAGCCAATCACACCCCTGAGTCATGTCCTGAAACTAAATATAAAGTCACCGGTCTTC
+GGGACGGTCAAACCTATAAGTTTAGAGTGTTAGCAGTCAATGCAGCTGGTGAATCAGATCCAGCTCATGT
+TCCGGAGCCAGTCCTAGTAAAAGACAGGCTTGAACCCCCTGAGTTGATTCTTGATGCCAACATGGCAAGA
+GAACAACACATTAAAGTTGGTGATACTCTAAGACTTAGTGCCATCATCAAAGGAGTGCCATTCCCAAAAG
+TAACTTGGAAAAAAGAAGACAGAGATGCTCCAACTAAAGCAAGAATTGATGTGACTCCAGTTGGTAGCAA
+GCTTGAAATTCGTAATGCTGCCCATGAAGATGGTGGAATTTATTCTTTAACAGTGGAGAATCCAGCTGGT
+TCAAAAACTGTCTCAGTAAAAGTACTTGTATTAGATAAACCTGGGCCACCTAGAGATCTGGAAGTCAGTG
+AAATTAGGAAAGATTCATGTTACCTTACTTGGAAAGAACCACTGGATGATGGTGGTTCTGTTATTACCAA
+TTATGTGGTTGAGAGGAGAGATGTTGCCAGCGCCCAGTGGTCACCTCTCTCAGCTACATCAAAGAAAAAG
+AGTCACTTCGCTAAGCATCTGAATGAAGGCAACCAGTACCTCTTCCGAGTAGCTGCGGAGAACCAGTATG
+GACGTGGTCCTTTTGTTGAAACACCAAAACCAATCAAGGCTTTGGATCCTCTCCATCCCCCAGGGCCACC
+CAAGGACCTGCACCATGTAGATGTTGACAAGACTGAAGTCTCCCTAGTCTGGAATAAGCCGGATCGTGAT
+GGTGGTTCTCCAATCACTGGATATTTGGTAGAATATCAAGAAGAAGGCACCCAGGACTGGATTAAATTTA
+AGACTGTGACAAACTTAGAGTGTGTGGTTACTGGACTACAACAAGGAAAGACCTATAGATTCCGTGTAAA
+AGCTGAAAACATTGTGGGTCTTGGTCTCCCTGACACAACTATCCCGATAGAATGTCAAGAAAAACTAGTG
+CCTCCATCCGTGGAGCTAGATGTGAAATTAATTGAAGGTCTTGTGGTAAAGGCTGGAACCACAGTCAGAT
+TCCCTGCTATTATAAGAGGTGTGCCTGTTCCTACTGCAAAGTGGACAACCGATGGGAGTGAGATTAAAAC
+CGATGAGCACTACACAGTTGAAACAGACAACTTCTCATCAGTACTTACCATTAAGAACTGCTTAAGGAGA
+GACACTGGGGAATATCAAATCACAGTTTCCAATGCAGCCGGTAGCAAAACAGTAGCCGTACATCTTACTG
+TTCTTGATGTTCCTGGGCCACCAACAGGTCCTATTAATATTCTGGATGTTACTCCTGAACACATGACTAT
+CTCATGGCAGCCACCTAAGGATGATGGAGGAAGCCCTGTGATAAATTATATTGTTGAGAAACAAGATACA
+AGGAAAGACACGTGGGGTGTTGTCTCTTCCGGAAGCAGTAAGACAAAGCTGAAAATCCCACATCTGCAGA
+AGGGCTGTGAATATGTTTTCCGAGTTAGAGCAGAGAATAAGATAGGTGTTGGTCCTCCCCTTGACTCCAC
+ACCTACTGTTGCTAAGCATAAATTTAGTCCTCCGTCTCCTCCTGGTAAACCAGTGGTTACTGACATTACT
+GAAAATGCAGCAACAGTGTCTTGGACCCTGCCAAAATCTGATGGTGGCAGTCCAATAACTGGCTACTATA
+TGGAACGTCGAGAAGTAACTGGCAAATGGGTGAGGGTCAACAAAACACCTATCGCTGACCTGAAGTTCAG
+AGTGACTGGACTCTATGAAGGAAATACATATGAGTTTAGAGTTTTTGCTGAAAATCTTGCAGGACTAAGC
+AAACCATCCCCAAGTTCTGATCCAATAAAAGCTTGCCGGCCCATCAAACCACCTGGACCACCTATTAATC
+CTAAACTGAAAGACAAGAGCAGAGAAACAGCTGATTTGGTGTGGACAAAGCCTCTCAGTGATGGTGGTAG
+CCCCATTCTAGGATATGTAGTGGAATGTCAGAAACCTGGCACGGCACAATGGAACAGGATTAATAAAGAT
+GAACTCATTAGGCAATGTGCCTTTAGGGTACCTGGACTAATTGAAGGAAATGAGTACAGATTCCGTATAA
+AGGCAGCTAATATTGTAGGAGAGGGTGAGCCAAGAGAACTAGCAGAATCTGTGATTGCAAAAGATATCCT
+TCATCCTCCAGAAGTAGAACTTGATGTTACTTGTCGTGATGTTATTACCGTGAGAGTAGGCCAAACTATC
+CGCATTCTAGCTCGAGTCAAAGGCAGACCTGAACCAGACATAACTTGGACTAAGGAAGGCAAAGTATTGG
+TCCGAGAAAAGAGGGTGGACCTTATTCAGGATCTACCTCGTGTTGAGTTACAAATTAAAGAAGCTGTTAG
+AGCTGATCATGGCAAGTATATCATCTCAGCTAAGAACAGCAGTGGACATGCCCAAGGTTCAGCCATCGTT
+AACGTCCTTGACAGACCTGGGCCTTGCCAGAATTTGAAGGTTACCAATGTAACCAAAGAGAACTGTACAA
+TTTCTTGGGAAAACCCACTAGATAATGGTGGCTCAGAAATAACAAACTTCATAGTAGAATATCGCAAACC
+AAACCAGAAAGGCTGGTCAATTGTTGCATCAGATGTCACTAAACGATTAATCAAGGCCAACCTTTTAGCC
+AACAATGAATACTATTTCCGAGTTTGTGCAGAGAATAAAGTAGGTGTTGGGCCAACCATCGAAACAAAAA
+CTCCCATTCTGGCTATTAACCCTATTGACAGACCAGGTGAGCCTGAAAACCTTCACATTGCAGATAAAGG
+AAAGACATTTGTCTATCTAAAGTGGCGGAGGCCTGACTATGATGGTGGCAGTCCAAATCTGTCATATCAT
+GTTGAGAGAAGGCTTAAGGGCTCCGATGACTGGGAAAGAGTGCATAAAGGAAGCATTAAAGAAACTCACT
+ACATGGTTGACAGATGTGTTGAAAACCAGATTTATGAGTTCAGAGTGCAAACAAAGAATGAAGGTGGGGA
+AAGTGACTGGGTGAAGACAGAGGAAGTTGTTGTGAAAGAAGACTTACAAAAACCAGTACTTGATCTGAAA
+TTAAGTGGGGTCCTAACTGTCAAAGCAGGGGACACCATTAGGCTTGAGGCAGGGGTTAGAGGCAAACCAT
+TCCCAGAAGTTGCATGGACCAAGGACAAAGACGCTACAGACTTAACAAGATCACCAAGGGTCAAGATTGA
+TACCCGTGCTGATTCATCTAAATTTTCTCTTACTAAAGCAAAGCGAAGTGATGGGGGTAAATATGTAGTT
+ACGGCAACTAACACGGCTGGCAGTTTTGTGGCCTATGCCACTGTCAATGTTTTAGATAAGCCTGGTCCTG
+TGAGAAATCTGAAAATTGTTGATGTGTCCAGTGATAGGTGTACTGTTTGCTGGGATCCACCAGAAGATGA
+TGGTGGCTGTGAAATCCAAAATTATATTCTAGAAAAATGTGAGACAAAGCGAATGGTTTGGTCTACCTAT
+TCTGCTACTGTCTTGACACCTGGTACTACAGTAACACGTCTCATAGAAGGAAATGAATATATTTTCAGAG
+TCCGTGCAGAAAATAAAATAGGCACAGGGCCTCCAACAGAAAGTAAACCAGTCATAGCCAAAACCAAGTA
+TGATAAACCTGGTCGCCCTGATCCCCCAGAAGTCACTAAAGTAAGCAAAGAAGAGATGACTGTGGTTTGG
+AATCCACCTGAATATGATGGTGGAAAGTCTATAACTGGATACTTTTTGGAGAAAAAGGAAAAGCATTCAA
+CACGATGGGTCCCTGTCAACAAGAGTGCAATCCCTGAGAGACGTATGAAAGTACAGAATCTCCTCCCAGA
+CCATGAATATCAGTTCCGTGTCAAGGCAGAAAATGAAATTGGAATTGGAGAACCAAGCTTGCCTTCAAGA
+CCGGTGGTGGCAAAAGACCCCATAGAGCCACCTGGTCCACCAACCAATTTCAGAGTGGTTGATACAACCA
+AACATTCCATAACTCTTGGGTGGGGAAAACCAGTCTATGATGGTGGTGCACCGATCATTGGATATGTTGT
+GGAAATGAGACCAAAAATAGCAGATGCGTCTCCTGATGAAGGCTGGAAACGGTGTAATGCTGCAGCACAG
+CTTGTACGCAAGGAATTCACTGTTACCAGCTTGGATGAAAACCAGGAATATGAGTTCAGGGTGTGTGCCC
+AAAACCAAGTTGGTATTGGGCGCCCTGCAGAGCTAAAGGAAGCTATCAAACCTAAAGAAATACTAGAACC
+TCCGGAGATTGATTTGGATGCCAGCATGAGGAAACTGGTCATAGTGAGAGCAGGATGCCCTATTCGTCTC
+TTTGCTATAGTGAGAGGACGACCAGCCCCTAAAGTCACTTGGCGAAAAGTTGGCATTGATAATGTGGTCA
+GAAAAGGACAAGTTGATCTGGTTGACACTATGGCCTTCCTTGTCATCCCCAATTCTACCCGTGATGACTC
+AGGAAAATATTCCTTAACACTTGTGAACCCAGCAGGAGAAAAGGCTGTATTCGTAAATGTCAGAGTATTA
+GACACTCCTGGGCCTGTGTCTGATTTAAAAGTTTCAGATGTCACTAAAACATCATGCCATGTGTCCTGGG
+CCCCTCCTGAAAACGACGGTGGGAGCCAAGTGACACATTATATCGTGGAGAAACGTGAGGCAGACAGAAA
+GACATGGTCGACCGTTACCCCAGAAGTTAAGAAAACAAGCTTCCATGTAACCAATCTTGTCCCTGGGAAT
+GAGTATTACTTCAGAGTAACTGCTGTCAACGAATATGGCCCTGGCGTCCCAACAGATGTCCCAAAACCAG
+TGCTTGCATCAGATCCTCTAAGTGAGCCGGATCCCCCAAGGAAATTAGAAGTGACTGAAATGACCAAGAA
+CAGTGCCACCTTAGCCTGGTTACCTCCCCTACGTGATGGAGGTGCTAAAATCGATGGCTACATCACTAGT
+TACAGAGAAGAAGAGCAGCCTGCAGATCGCTGGACAGAGTACTCAGTGGTAAAAGATCTGAGCCTTGTTG
+TCACTGGCCTAAAGGAAGGAAAGAAATACAAATTTAGAGTAGCGGCCAGAAATGCTGTTGGAGTCAGTTT
+GCCAAGAGAAGCTGAAGGAGTGTATGAAGCCAAAGAACAACTGTTGCCACCAAAGATCCTTATGCCAGAG
+CAAATAACTATCAAAGCTGGGAAAAAACTCCGAATTGAAGCCCATGTGTATGGAAAGCCTCATCCCACCT
+GTAAATGGAAAAAAGGAGAAGATGAAGTTGTCACATCCAGCCACCTGGCAGTGCATAAAGCAGACAGCTC
+TTCAATTCTGATCATAAAAGATGTGACTAGGAAAGACAGTGGTTACTACAGCCTCACAGCAGAGAACAGT
+TCTGGGACAGACACTCAGAAAATCAAAGTTGTAGTCATGGATGCCCCCGGCCCCCCTCAGCCTCCATTTG
+ACATTTCTGATATAGACGCTGATGCTTGCTCCCTGTCATGGCACATCCCTCTGGAGGACGGAGGCAGTAA
+CATCACCAATTATATAGTGGAGAAGTGTGATGTAAGCCGAGGTGACTGGGTCACGGCTCTAGCTTCAGTC
+ACAAAAACTTCCTGCAGGGTTGGAAAGCTGATCCCAGGCCAGGAGTACATCTTCCGGGTCCGTGCTGAAA
+ACCGATTTGGCATTTCAGAGCCTCTCACATCTCCAAAGATGGTTGCGCAGTTCCCATTTGGTGTTCCTAG
+TGAACCAAAGAATGCACGAGTCACCAAAGTCAACAAGGACTGTATTTTTGTTGCTTGGGACAGACCAGAT
+AGTGATGGAGGGAGCCCCATTATTGGTTATCTGATTGAACGCAAGGAAAGAAACAGTTTGCTGTGGGTGA
+AAGCCAATGATACTCTTGTCCGGTCAACTGAATATCCTTGTGCTGGCCTTGTAGAAGGTCTTGAGTATTC
+ATTCAGAATCTATGCCCTAAACAAAGCTGGATCCAGCCCACCCAGCAAACCCACAGAATATGTAACTGCA
+AGAATGCCAGTTGATCCTCCTGGGAAACCTGAGGTTATTGATGTCACCAAGAGTACTGTATCTCTGATCT
+GGGCTCGTCCAAAGCATGATGGAGGCAGTAAAATTATTGGCTATTTCGTAGAAGCTTGCAAACTTCCTGG
+TGATAAATGGGTACGGTGCAATACTGCACCTCACCAGATTCCCCAGGAAGAGTACACAGCTACTGGCCTA
+GAAGAGAAAGCTCAGTATCAATTTAGAGCTATTGCCAGGACCGCGGTAAACATTAGCCCACCTTCTGAAC
+CTTCTGATCCAGTGACTATCCTCGCAGAAAATGTCCCTCCCAGGATAGACCTGAGTGTGGCTATGAAATC
+TTTGCTTACTGTGAAAGCTGGAACTAATGTCTGCTTGGATGCTACTGTTTTTGGTAAACCGATGCCAACA
+GTTTCTTGGAAAAAAGATGGCACACTGCTAAAACCAGCAGAAGGCATAAAGATGGCCATGCAGCGGAATC
+TGTGCACCTTGGAGCTATTCAGCGTGAACCGGAAGGACTCAGGAGACTATACCATTACTGCTGAAAATTC
+AAGTGGTTCTAAATCAGCCACCATTAAGCTTAAAGTGTTAGATAAACCGGGTCCTCCAGCATCTGTTAAA
+ATCAACAAAATGTATTCAGATCGTGCTATGCTTTCTTGGGAACCGCCTCTTGAAGATGGAGGCTCAGAAA
+TCACCAACTATATTGTTGACAAACGTGAAACAAGCAGGCCCAACTGGGCTCAAGTCTCTGCAACTGTGCC
+TATCACCAGCTGCAGCGTGGAGAAACTTATAGAGGGCCATGAGTATCAGTTCCGTATTTGTGCTGAAAAT
+AAATATGGAGTAGGCGATCCAGTCTTCACTGAACCAGCAATTGCCAAAAACCCATATGACCCACCAGGAC
+GCTGTGATCCTCCTGTTATTAGCAACATAACCAAAGATCACATGACAGTCAGCTGGAAGCCACCAGCAGA
+TGATGGGGGCTCACCCATCACTGGCTATTTGCTTGAAAAGCGGGAAACCCAGGCTGTTAACTGGACTAAG
+GTCAACAGAAAACCTATTATAGAAAGAACATTAAAAGCAACAGGTCTTCAAGAAGGTACCGAATATGAGT
+TCCGTGTTACAGCTATAAATAAAGCTGGACCAGGCAAACCCAGTGACGCATCCAAGGCCGCTTATGCTCG
+GGACCCTCAGTATCCTCCTGCGCCACCGGCTTTCCCTAAAGTATATGATACAACTCGCAGCTCTGTGAGT
+CTATCTTGGGGCAAGCCAGCCTATGACGGCGGCAGCCCTATCATTGGTTATCTCGTTGAAGTAAAACGGG
+CTGACTCCGATAACTGGGTGAGGTGCAACTTACCACAGAATCTACAGAAAACCCGCTTTGAGGTTACTGG
+CCTGATGGAAGACACACAATATCAATTCCGTGTGTATGCCGTTAATAAGATTGGATACAGTGACCCCAGT
+GATGTGCCAGATAAACACTATCCCAAGGACATCTTAATTCCACCTGAGGGAGAACTTGATGCGGACTTAA
+GGAAGACACTCATATTACGTGCTGGAGTTACTATGAGACTATATGTACCAGTAAAAGGACGCCCACCTCC
+AAAGATTACTTGGTCTAAACCAAATGTCAATCTAAGAGACAGGATTGGACTGGACATAAAGTCAACTGAC
+TTTGACACTTTCTTGCGCTGTGAAAATGTGAACAAATATGATGCAGGAAAATATATCTTAACCCTGGAGA
+ACAGCTGTGGTAAAAAGGAATATACCATTGTTGTGAAAGTGCTTGATACTCCTGGGCCACCTGTCAATGT
+GACTGTTAAGGAAATATCCAAAGACTCTGCTTATGTTACCTGGGAGCCTCCCATTATTGATGGCGGAAGC
+CCCATCATAAACTATGTGGTACAAAAACGTGATGCAGAGAGGAAATCCTGGTCTACAGTGACAACTGAGT
+GCTCCAAAACAAGCTTCAGAGTAGCTAATTTGGAGGAGGGAAAATCCTACTTCTTCCGAGTGTTTGCTGA
+AAATGAGTATGGCATTGGTGATCCCGGTGAAACTCGTGATGCTGTCAAAGCTTCCCAAACTCCTGGACCA
+GTTGTGGACCTGAAAGTGAGGTCTGTATCTAAGTCATCCTGTAGCATTGGCTGGAAAAAGCCTCACAGTG
+ATGGTGGAAGTCGGATTATTGGATATGTAGTTGATTTCCTGACTGAAGAAAATAAGTGGCAACGAGTTAT
+GAAATCCTTAAGCCTACAGTACTCTGCAAAAGATTTGACTGAAGGGAAGGAATATACCTTCAGAGTGAGT
+GCTGAGAATGAAAATGGAGAAGGAACCCCAAGCGAAATCACTGTTGTGGCAAGGGATGATGTTGTGGCTC
+CTGATCTTGACTTAAAGGGTCTACCTGATTTGTGCTACTTGGCTAAAGAAAACAGCAACTTCCGGCTTAA
+GATCCCCATAAAAGGCAAGCCAGCTCCATCAGTCTCCTGGAAGAAAGGGGAAGATCCTCTAGCAACTGAC
+ACTAGAGTCAGTGTTGAGTCATCTGCGGTTAACACAACTCTTATAGTGTACGATTGCCAAAAATCTGATG
+CTGGAAAATACACAATCACACTTAAGAATGTTGCTGGCACCAAGGAAGGAACTATCTCCATAAAGGTTGT
+TGGCAAGCCTGGCATCCCCACTGGACCAATCAAATTTGATGAAGTCACAGCAGAAGCCATGACCTTAAAG
+TGGGCTCCTCCAAAGGATGATGGAGGTTCTGAAATCACCAACTATATCCTAGAGAAGAGGGATTCTGTGA
+ACAACAAGTGGGTGACGTGCGCCTCAGCTGTCCAGAAAACCACCTTTAGAGTAACCAGACTTCATGAGGG
+CATGGAATATACCTTCAGGGTCAGTGCCGAAAATAAATATGGTGTAGGGGAAGGCCTGAAATCGGAGCCA
+ATTGTTGCGAGACATCCATTTGATGTGCCTGATGCTCCCCCACCTCCCAATATTGTGGATGTCAGACACG
+ATTCAGTATCTCTAACTTGGACTGACCCCAAGAAAACTGGTGGTTCTCCAATTACAGGGTATCATCTCGA
+GTTCAAGGAAAGAAACAGCCTTTTGTGGAAGAGAGCTAACAAGACTCCGATAAGGATGAGAGACTTTAAA
+GTGACAGGATTAACTGAAGGTCTTGAATATGAATTCCGAGTTATGGCAATCAATTTAGCAGGTGTGGGCA
+AGCCAAGCCTACCATCAGAGCCTGTTGTGGCACTGGACCCAATTGATCCTCCTGGAAAACCTGAGGTTAT
+TAACATAACAAGGAATTCAGTGACTCTCATTTGGACTGAACCTAAATATGACGGTGGTCATAAGTTAACT
+GGATATATAGTGGAGAAGCGAGATCTACCTTCGAAGTCTTGGATGAAAGCCAACCATGTTAATGTCCCAG
+AATGTGCCTTTACTGTAACTGACCTTGTTGAGGGTGGAAAATATGAATTCAGAATTAGAGCAAAGAATAC
+AGCAGGTGCTATCAGTGCTCCATCAGAAAGTACAGAAACCATTATTTGCAAGGATGAATACGAGGCACCA
+ACAATTGTCCTTGATCCCACAATAAAAGATGGGCTAACAATTAAAGCAGGGGATACCATTGTTTTGAATG
+CCATTAGCATTCTTGGCAAACCCCTTCCAAAATCAAGTTGGTCCAAGGCAGGAAAAGACATTAGACCATC
+AGATATCACTCAGATAACTTCAACCCCAACATCTTCCATGCTTACTATCAAGTATGCCACTAGAAAAGAT
+GCGGGTGAATATACCATCACTGCTACCAATCCTTTTGGCACGAAGGTGGAACATGTGAAGGTAACAGTCC
+TTGATGTACCTGGTCCCCCAGGTCCTGTTGAAATCAGTAATGTTTCTGCTGAAAAAGCAACACTTACATG
+GACACCTCCCTTGGAAGATGGCGGCTCACCAATTAAGTCCTATATACTTGAAAAGAGAGAAACCAGCCGA
+CTTTTGTGGACAGTGGTTTCTGAAGATATTCAGTCTTGCAGGCATGTGGCAACCAAACTTATCCAAGGAA
+ATGAGTACATCTTCCGGGTCTCAGCTGTAAACCACTATGGCAAAGGAGAACCTGTACAGTCTGAACCTGT
+CAAAATGGTAGACAGATTTGGTCCCCCTGGCCCTCCTGAAAAACCAGAGGTATCAAATGTCACTAAGAAC
+ACTGCCACTGTCAGCTGGAAAAGGCCAGTGGATGATGGTGGCAGCGAAATTACAGGATATCATGTAGAAA
+GGAGAGAAAAGAAAAGCCTGCGATGGGTGAGAGCAATAAAAACACCAGTTTCCGATCTCAGGTGCAAAGT
+AACAGGACTGCAAGAAGGAAGCACCTACGAATTCCGTGTCAGTGCAGAAAACAGAGCAGGAATTGGTCCA
+CCCAGTGAGGCTTCAGATTCTGTTCTGATGAAAGATGCAGCATATCCTCCAGGACCACCTTCAAATCCGC
+ATGTCACTGATACTACCAAGAAATCTGCTTCTTTGGCATGGGGCAAGCCTCATTATGATGGTGGACTTGA
+AATCACTGGCTATGTCGTGGAGCATCAAAAAGTAGGAGACGAGGCCTGGATAAAAGATACCACAGGAACC
+GCCCTCAGAATCACTCAGTTCGTTGTTCCTGATCTTCAGACTAAAGAAAAATACAACTTCAGAATCAGTG
+CCATCAACGATGCAGGTGTTGGGGAGCCAGCGGTGATTCCAGATGTTGAAATCGTAGAACGGGAGATGGC
+TCCTGATTTTGAACTAGATGCCGAGCTTCGAAGAACACTTGTTGTTAGAGCAGGACTCAGTATTAGGATA
+TTTGTGCCAATTAAAGGTCGTCCTGCTCCTGAAGTGACATGGACCAAAGATAACATCAACCTGAAAAACC
+GAGCCAACATTGAAAATACGGAATCATTTACTCTTCTGATTATCCCAGAATGTAACAGATATGATACCGG
+TAAATTTGTCATGACCATTGAAAACCCGGCTGGGAAGAAAAGTGGCTTTGTGAACGTCAGAGTCTTGGAC
+ACGCCAGGCCCAGTCCTCAACCTGCGGCCTACAGACATCACAAAGGACAGTGTCACCCTGCACTGGGACC
+TCCCTCTGATAGATGGAGGCTCACGTATAACAAACTACATTGTAGAGAAACGTGAAGCAACACGGAAATC
+TTATTCCACAGCCACCACTAAGTGCCATAAATGCACATATAAAGTTACCGGCTTGTCTGAAGGGTGTGAA
+TATTTCTTCAGAGTGATGGCAGAGAATGAATATGGAATTGGTGAGCCAACAGAAACTACAGAGCCCGTAA
+AAGCCTCTGAAGCACCATCTCCACCAGACAGCCTTAACATCATGGACATAACTAAGAGCACCGTCAGCCT
+GGCATGGCCTAAGCCCAAACACGATGGTGGCAGCAAGATCACTGGCTATGTGATTGAAGCCCAAAGAAAA
+GGCTCTGACCAGTGGACCCACATCACAACCGTGAAAGGGTTAGAATGTGTTGTGAGGAATCTAACTGAAG
+GAGAGGAATATACCTTCCAAGTGATGGCAGTGAACAGCGCGGGGAGAAGTGCCCCTAGAGAAAGCAGACC
+CGTCATTGTCAAGGAGCAGACAATGCTTCCAGAGCTGGATCTCCGTGGCATCTATCAGAAACTGGTCATT
+GCCAAAGCTGGTGACAACATCAAAGTTGAAATTCCAGTGCTCGGTCGACCGAAGCCCACAGTGACATGGA
+AAAAAGGAGACCAAATTCTTAAACAGACACAGAGAGTTAATTTTGAAACCACAGCGACTTCAACCATTTT
+AAATATCAATGAGTGTGTCAGAAGTGATAGTGGGCCCTATCCATTAACAGCAAGGAACATTGTAGGAGAG
+GTTGGTGATGTCATCACCATTCAAGTCCATGATATCCCAGGGCCACCTACTGGACCAATCAAATTTGATG
+AAGTTTCATCTGATTTTGTAACCTTCTCTTGGGACCCACCTGAGAACGATGGTGGTGTACCAATAAGCAA
+CTATGTAGTGGAAATGCGGCAGACTGACAGTACTACCTGGGTTGAGTTAGCAACCACCGTTATACGTACT
+ACCTATAAAGCCACCCGCCTTACTACTGGATTAGAGTATCAGTTCCGTGTAAAAGCTCAGAATAGATATG
+GAGTTGGACCAGGCATCACATCAGCATGCATAGTTGCCAACTATCCATTTAAGGTTCCTGGACCTCCTGG
+TACCCCTCAGGTAACTGCAGTTACCAAGGATTCAATGACAATTAGCTGGCATGAGCCACTTTCTGATGGT
+GGAAGCCCCATTTTAGGATATCATGTTGAAAGAAAAGAACGAAATGGTATTCTCTGGCAGACTGTGAGCA
+AAGCTTTAGTACCAGGCAACATTTTCAAATCAAGTGGACTTACAGATGGTATTGCTTATGAGTTCCGGGT
+GATTGCAGAAAACATGGCAGGCAAAAGTAAGCCAAGCAAGCCATCAGAACCTATGTTGGCTCTGGATCCC
+ATTGACCCACCTGGAAAACCAGTACCTCTAAATATTACAAGACACACAGTAACACTTAAATGGGCTAAGC
+CTGAATATACTGGGGGCTTTAAAATTACCAGTTATATCGTTGAAAAGAGAGACCTTCCTAATGGACGGTG
+GCTGAAGGCCAACTTCAGCAACATTTTGGAGAATGAATTTACAGTCAGTGGCCTAACAGAAGATGCTGCA
+TATGAATTCCGTGTGATCGCCAAAAATGCTGCAGGTGCCATCAGTCCACCATCTGAGCCATCTGATGCTA
+TCACTTGCAGGGATGATGTTGAGGCACCAAAGATAAAGGTGGATGTTAAATTTAAGGACACGGTTATATT
+AAAAGCAGGTGAAGCATTCAGACTGGAAGCTGATGTTTCAGGCCGCCCACCTCCAACAATGGAATGGAGC
+AAAGATGGAAAAGAGCTGGAAGGCACAGCAAAGTTAGAAATAAAAATTGCAGATTTCTCTACTAATCTGG
+TAAACAAAGATTCAACAAGAAGGGATAGTGGTGCCTATACCCTTACAGCGACTAATCCTGGTGGCTTTGC
+TAAACACATTTTCAATGTCAAAGTTCTTGACAGACCAGGCCCACCTGAAGGACCTTTGGCTGTAACTGAA
+GTGACATCAGAAAAGTGTGTACTATCATGGTTCCCTCCACTGGATGATGGAGGTGCCAAAATTGATCATT
+ACATAGTACAGAAACGTGAAACCAGCAGATTGGCATGGACAAATGTAGCCTCAGAAGTCCAAGTAACAAA
+GCTAAAGGTCACTAAACTCTTGAAAGGCAATGAATACATATTCCGTGTCATGGCTGTAAATAAATATGGA
+GTGGGAGAGCCACTGGAATCAGAGCCTGTGCTTGCAGTGAATCCTTATGGACCCCCTGATCCGCCCAAAA
+ACCCTGAAGTGACAACTATTACTAAAGATTCGATGGTTGTCTGCTGGGGACATCCTGATTCTGATGGTGG
+AAGTGAAATCATCAATTATATTGTGGAACGGCGTGATAAAGCTGGCCAACGCTGGATTAAATGCAACAAA
+AAAACTCTTACTGATTTAAGATATAAAGTGTCTGGACTGACAGAAGGACATGAATATGAGTTCAGGATTA
+TGGCTGAAAATGCTGCTGGAATTAGTGCACCAAGTCCTACCAGTCCATTTTACAAGGCTTGTGACACTGT
+GTTTAAACCTGGACCACCAGGTAACCCACGTGTTCTGGATACAAGCAGATCATCCATTTCAATCGCTTGG
+AATAAACCTATCTATGATGGTGGTTCAGAAATCACTGGGTATATGGTTGAGATTGCCCTGCCAGAGGAAG
+ATGAATGGCAGATTGTCACTCCACCAGCAGGACTCAAGGCAACTTCGTATACTATCACTGGCCTCACAGA
+GAATCAGGAATATAAGATCCGCATCTATGCCATGAATTCCGAAGGACTTGGGGAACCTGCCCTTGTTCCT
+GGAACTCCAAAGGCTGAAGACAGAATGCTGCCTCCAGAAATTGAACTGGATGCTGACCTGCGCAAAGTTG
+TTACTATAAGGGCCTGCTGCACCCTGAGACTTTTTGTTCCCATCAAAGGAAGGCCTGCACCTGAGGTGAA
+GTGGGCCCGGGACCATGGAGAATCTTTAGATAAAGCTAGCATCGAATCCACAAGCTCTTACACCCTGCTT
+ATTGTTGGAAATGTAAACAGATTTGACAGTGGCAAATATATACTAACTGTAGAAAATAGTTCAGGCAGCA
+AGTCTGCATTTGTCAATGTTAGAGTTCTCGATACACCAGGCCCCCCACAGGATCTGAAGGTAAAAGAGGT
+CACTAAGACATCTGTCACACTCACATGGGACCCACCTCTCCTTGATGGAGGTTCAAAAATCAAGAACTAT
+ATTGTTGAAAAGCGGGAATCAACAAGAAAAGCATATTCAACTGTTGCAACAAACTGCCACAAGACTTCCT
+GGAAGGTAGACCAGCTTCAAGAAGGCTGTAGCTACTATTTCAGGGTTCTCGCAGAAAATGAATATGGCAT
+TGGGCTGCCTGCTGAAACCGCAGAATCTGTGAAAGCATCAGAACGACCTCTTCCTCCAGGAAAAATAACT
+TTGATGGATGTCACAAGAAATAGTGTGTCACTCTCTTGGGAGAAACCAGAGCATGATGGAGGCAGCCGAA
+TTCTAGGCTACATTGTGGAGATGCAGACCAAAGGCAGTGACAAATGGGCCACGTGTGCCACAGTCAAGGT
+CACTGAAGCCACTATCACTGGATTAATTCAGGGTGAAGAATACTCTTTCCGTGTTTCAGCTCAGAATGAA
+AAGGGCATCAGTGATCCTAGACAACTGAGTGTGCCAGTGATCGCCAAAGATCTTGTCATTCCACCAGCCT
+TCAAACTCCTGTTCAATACTTTCACTGTACTGGCAGGTGAAGACCTAAAAGTTGATGTTCCATTCATTGG
+CCGCCCTACCCCAGCTGTAACCTGGCATAAAGATAATGTACCACTGAAGCAGACAACTAGAGTAAATGCA
+GAGAGCACAGAAAATAATTCACTACTGACAATAAAGGACGCCTGCCGAGAAGATGTTGGCCATTATGTGG
+TTAAACTGACTAACTCAGCTGGTGAAGCTATTGAAACCCTTAATGTTATCGTTCTTGACAAACCAGGGCC
+TCCAACTGGACCAGTTAAAATGGATGAAGTGACAGCTGATAGTATTACTCTTTCCTGGGGCCCACCCAAG
+TATGATGGTGGAAGTTCTATCAATAATTACATTGTTGAGAAACGGGACACTTCCACAACCACCTGGCAAA
+TTGTATCAGCTACAGTTGCAAGGACAACAATAAAGGCTTGCAGACTGAAGACTGGATGTGAATATCAGTT
+TAGAATTGCAGCTGAAAACAGATATGGGAAGAGTACCTACCTCAATTCAGAGCCTACTGTAGCCCAATAT
+CCATTCAAAGTTCCTGGTCCTCCTGGCACTCCAGTTGTCACACTGTCCTCCAGGGACAGCATGGAAGTAC
+AATGGAATGAGCCAATCAGTGATGGAGGAAGTAGAGTCATTGGCTATCATCTAGAACGCAAGGAAAGAAA
+TAGCATCCTCTGGGTTAAGTTGAATAAAACACCTATTCCTCAAACCAAGTTTAAGACAACTGGCCTTGAA
+GAAGGTGTTGAATATGAATTTAGAGTCTCTGCAGAGAACATCGTGGGCATTGGCAAGCCGAGTAAAGTAT
+CAGAATGTTATGTGGCTCGTGACCCATGTGATCCACCAGGACGGCCAGAGGCAATCATTGTCACAAGGAA
+TTCTGTGACTCTTCAGTGGAAGAAACCCACCTATGACGGTGGAAGCAAGATCACTGGTTATATTGTTGAG
+AAGAAAGAATTACCTGAGGGCCGTTGGATGAAAGCCAGTTTTACAAATATTATTGACACTCATTTTGAAG
+TAACTGGCCTAGTTGAAGATCACAGATATGAGTTCCGGGTTATAGCCCGAAATGCCGCAGGAGTGTTTAG
+TGAGCCTTCAGAAAGCACAGGAGCAATAACAGCTAGAGATGAGGTAGATCCACCACGAATAAGTATGGAT
+CCAAAATACAAAGACACAATCGTGGTTCATGCTGGTGAATCATTCAAGGTTGATGCAGATATTTATGGCA
+AACCAATACCAACCATTCAGTGGATAAAAGGTGATCAGGAGCTTTCAAACACAGCTCGATTAGAAATAAA
+GAGCACCGACTTTGCCACCAGTCTCAGTGTAAAAGATGCAGTACGTGTCGACAGTGGAAATTACATACTG
+AAGGCCAAAAATGTTGCAGGAGAAAGATCAGTTACTGTGAATGTCAAGGTTCTTGACAGACCAGGGCCAC
+CTGAAGGACCTGTTGTTATCTCAGGAGTTACAGCAGAAAAATGCACACTAGCTTGGAAACCCCCACTTCA
+GGATGGTGGGAGTGACATCATAAATTATATTGTGGAAAGGAGAGAAACCAGCCGCTTAGTTTGGACTGTG
+GTTGATGCCAATGTGCAGACTCTCAGCTGCAAGGTTACTAAGCTTCTTGAAGGCAATGAATATACTTTCC
+GTATAATGGCAGTAAACAAATATGGTGTTGGTGAACCTCTTGAATCTGAGCCAGTAGTTGCCAAGAATCC
+ATTTGTAGTACCAGATGCACCAAAAGCTCCAGAAGTCACAACAGTGACCAAGGACTCAATGATTGTTGTA
+TGGGAAAGACCAGCATCTGATGGTGGTAGTGAAATTCTTGGATATGTTCTTGAGAAACGGGATAAAGAAG
+GCATTAGATGGACAAGATGCCATAAGCGTCTGATTGGAGAGTTGCGCCTGAGAGTAACTGGACTCATAGA
+AAATCACGATTATGAGTTCAGAGTTTCTGCTGAGAATGCTGCTGGACTTAGTGAACCAAGCCCTCCTTCT
+GCTTACCAAAAGGCTTGTGATCCTATTTATAAACCAGGACCCCCAAACAACCCCAAAGTCATAGACATAA
+CCAGATCTTCAGTATTCCTTTCTTGGAGCAAACCAATATATGATGGTGGCTGTGAAATTCAAGGATACAT
+TGTTGAAAAATGTGATGTGAGTGTTGGTGAATGGACAATGTGCACTCCACCAACAGGAATTAATAAAACA
+AACATAGAAGTAGAGAAGCTGTTGGAAAAGCATGAATACAACTTCCGTATCTGTGCTATTAATAAAGCTG
+GAGTTGGAGAACATGCTGACGTCCCTGGACCTATTATAGTTGAAGAAAAATTAGAAGCACCAGACATTGA
+TCTTGACCTAGAACTAAGGAAAATCATAAATATAAGGGCAGGTGGCTCCTTAAGGTTATTTGTTCCTATA
+AAAGGTCGTCCTACACCAGAAGTTAAATGGGGAAAGGTGGATGGTGAAATCCGAGATGCAGCTATAATTG
+ATGTCACTAGCAGTTTCACCTCTCTTGTTCTTGACAATGTCAACCGATATGATAGTGGAAAATATACGCT
+TACATTAGAAAACAGCAGTGGAACAAAGTCTGCCTTTGTTACTGTGAGAGTTCTGGACACGCCAAGTCCA
+CCTGTTAACCTGAAAGTCACAGAAATCACCAAAGACTCAGTATCAATTACATGGGAACCTCCTTTGTTGG
+ATGGGGGATCCAAAATAAAAAATTACATTGTTGAGAAACGTGAAGCCACAAGAAAATCATATGCTGCTGT
+TGTAACTAACTGCCATAAGAATTCTTGGAAAATCGATCAGCTCCAAGAAGGTTGCAGTTATTACTTTAGA
+GTCACAGCTGAGAATGAGTATGGTATTGGCCTTCCTGCCCAGACTGCTGATCCAATTAAGGTTGCAGAAG
+TGCCACAACCTCCTGGAAAAATAACTGTGGATGATGTCACCAGAAACAGTGTCTCTCTGAGTTGGACAAA
+ACCTGAACATGATGGTGGCAGTAAAATCATTCAGTATATTGTGGAAATGCAAGCTAAACACAGTGAGAAA
+TGGTCAGAGTGTGCTCGAGTAAAGTCTCTTCAGGCAGTAATTACCAACCTAACTCAAGGGGAAGAATATC
+TTTTTAGAGTTGTTGCTGTAAATGAAAAGGGGAGAAGTGATCCTCGGTCCCTTGCAGTTCCAATAGTTGC
+CAAAGATCTGGTAATTGAGCCAGATGTAAAACCTGCATTCAGTAGTTACAGTGTACAGGTTGGCCAAGAT
+TTGAAAATAGAAGTGCCAATTTCTGGACGTCCTAAGCCAACCATTACCTGGACTAAAGATGGTCTCCCAC
+TGAAGCAGACCACAAGAATCAATGTTACCGATTCACTGGATCTCACCACACTCAGTATTAAAGAAACTCA
+TAAGGATGATGGTGGACAATATGGAATCACAGTTGCCAATGTTGTTGGTCAGAAGACAGCATCCATCGAA
+ATTGTAACTCTAGATAAACCTGATCCTCCAAAAGGACCTGTTAAATTTGATGACGTCAGTGCTGAAAGTA
+TTACATTATCTTGGAACCCTCCATTATATACAGGGGGCTGCCAAATCACCAACTACATTGTTCAGAAAAG
+AGATACAACCACCACAGTATGGGATGTTGTTTCTGCTACTGTTGCTAGAACTACACTCAAAGTGACCAAA
+CTGAAAACTGGTACAGAATACCAATTTAGAATATTTGCCGAAAACAGATATGGACAAAGCTTTGCCTTAG
+AGTCTGATCCAATTGTAGCTCAATATCCCTACAAAGAACCAGGCCCTCCAGGTACACCATTTGCCACAGC
+CATTTCCAAAGACTCCATGGTCATACAGTGGCATGAACCAGTCAACAATGGTGGAAGCCCCGTCATAGGT
+TACCACCTGGAGAGAAAAGAAAGAAACAGTATTTTGTGGACAAAGGTCAACAAAACTATTATTCATGACA
+CCCAATTCAAAGCACAGAATCTTGAAGAAGGCATTGAATATGAATTCAGAGTGTATGCTGAAAATATTGT
+TGGTGTAGGCAAAGCAAGCAAGAATTCTGAATGCTATGTAGCCAGAGATCCCTGTGACCCACCAGGAACC
+CCAGAACCAATAATGGTTAAAAGAAATGAAATCACTTTACAGTGGACCAAACCTGTGTATGATGGTGGAA
+GTATGATTACAGGCTACATTGTAGAGAAACGTGATTTGCCTGATGGTCGTTGGATGAAAGCTAGCTTTAC
+AAATGTCATTGAAACTCAATTTACTGTGTCAGGTCTTACTGAAGATCAAAGATATGAATTCAGAGTCATT
+GCAAAGAATGCAGCTGGTGCAATAAGTAAACCCTCTGACAGTACTGGACCAATAACTGCCAAGGATGAGG
+TTGAACTCCCAAGAATTTCAATGGATCCAAAATTCAGAGACACAATTGTGGTAAATGCTGGAGAAACATT
+CAGACTTGAGGCTGATGTCCATGGAAAGCCCCTACCTACCATTGAGTGGTTAAGAGGAGATAAGGAAATT
+GAAGAATCTGCTAGATGTGAAATAAAGAACACAGATTTCAAGGCTTTACTTATTGTAAAAGATGCAATTA
+GAATTGATGGTGGGCAGTATATTTTAAGAGCTTCCAATGTTGCAGGTTCTAAGTCATTCCCAGTAAATGT
+AAAAGTATTAGATAGACCAGGACCTCCAGAAGGGCCAGTCCAGGTTACTGGAGTCACTTCTGAAAAATGC
+TCTTTAACATGGTCTCCACCACTTCAAGATGGTGGCAGTGACATTTCTCACTATGTTGTTGAAAAGCGAG
+AAACCAGTCGACTTGCCTGGACTGTTGTTGCTTCAGAAGTTGTGACCAATTCTCTGAAAGTTACCAAACT
+CTTAGAAGGTAATGAATATGTTTTCCGTATAATGGCTGTCAACAAATATGGTGTTGGAGAGCCTTTGGAA
+TCTGCACCAGTACTAATGAAAAATCCATTTGTGCTTCCTGGACCACCAAAAAGCTTGGAAGTCACAAATA
+TTGCCAAAGACTCCATGACCGTCTGTTGGAACCGTCCAGATAGTGATGGTGGAAGTGAGATTATTGGTTA
+CATTGTAGAGAAAAGAGACAGAAGTGGCATTCGATGGATAAAATGTAATAAACGCCGCATTACAGATTTG
+CGTCTAAGAGTGACAGGATTAACAGAAGATCATGAGTATGAATTCAGGGTCTCTGCAGAAAATGCTGCTG
+GAGTTGGGGAACCAAGTCCAGCTACAGTTTATTATAAAGCCTGTGATCCTGTGTTCAAACCTGGCCCACC
+TACCAATGCACACATTGTAGACACCACTAAAAATTCAATCACACTTGCCTGGGGTAAACCCATCTATGAT
+GGCGGCAGTGAGATCTTGGGATATGTAGTAGAAATCTGTAAAGCAGATGAAGAAGAATGGCAAATAGTTA
+CTCCACAGACTGGCCTGAGAGTCACTCGATTTGAAATTTCAAAACTCACTGAACACCAAGAGTATAAAAT
+ACGAGTCTGTGCCCTCAACAAAGTTGGTTTAGGTGAGGCTACATCAGTTCCTGGTACTGTGAAACCAGAA
+GATAAACTTGAAGCACCTGAACTTGACCTTGACTCCGAATTAAGAAAAGGAATTGTTGTAAGAGCTGGTG
+GATCTGCCAGAATTCACATTCCATTCAAAGGTCGTCCAACGCCTGAGATCACTTGGTCTCGAGAGGAAGG
+TGAATTCACAGATAAGGTCCAAATTGAAAAGGGAGTAAACTATACCCAACTATCAATAGATAACTGTGAT
+AGAAATGATGCTGGAAAATACATTCTTAAGTTGGAAAACAGCAGTGGATCAAAGTCTGCTTTTGTAACTG
+TGAAAGTTCTTGACACTCCAGGACCACCACAGAATTTGGCAGTCAAAGAAGTGAGAAAAGATTCTGCCTT
+CCTGGTATGGGAGCCACCCATCATTGATGGAGGGGCAAAGGTCAAGAACTATGTGATTGACAAACGTGAG
+TCAACCAGAAAAGCGTATGCTAATGTGAGTAGTAAATGCAGCAAAACAAGTTTTAAAGTGGAAAACCTTA
+CAGAAGGAGCCATTTATTACTTCAGAGTCATGGCTGAAAATGAATTTGGAGTTGGTGTTCCAGTGGAAAC
+TGTTGATGCCGTGAAAGCTGCTGAACCTCCTTCCCCACCAGGAAAGGTTACACTCACTGATGTGTCCCAG
+ACCAGTGCATCACTTATGTGGGAGAAACCTGAACATGATGGCGGTAGCAGAGTCCTGGGGTACGTTGTTG
+AAATGCAGCCCAAAGGAACTGAAAAATGGAGCATTGTGGCTGAATCCAAAGTCTGTAATGCAGTTGTTAC
+TGGTTTGAGTTCTGGACAAGAATATCAGTTCCGTGTCAAGGCTTATAATGAGAAAGGAAAAAGCGATCCA
+AGAGTGTTGGGTGTTCCTGTCATAGCCAAGGACTTGACTATACAGCCTAGTTTAAAGTTACCATTTAACA
+CATATAGTATCCAAGCTGGAGAAGATCTTAAAATAGAAATTCCAGTTATAGGCCGACCAAGACCTAACAT
+TTCTTGGGTCAAAGATGGTGAGCCTCTTAAACAGACAACAAGAGTAAACGTTGAAGAAACAGCTACCTCA
+ACTGTTTTGCACATTAAAGAAGGTAACAAAGATGACTTTGGAAAATACACCGTAACGGCAACAAATAGTG
+CAGGCACAGCAACAGAAAATCTCAGTGTTATCGTTTTAGAAAAGCCTGGACCTCCAGTTGGCCCAGTTCG
+GTTTGATGAAGTTAGTGCAGACTTTGTAGTCATATCTTGGGAACCTCCAGCCTATACTGGTGGCTGCCAA
+ATAAGCAACTACATTGTAGAGAAGCGAGATACAACCACCACCACTTGGCACATGGTATCAGCAACAGTTG
+CAAGAACAACAATTAAAATAACCAAACTGAAAACAGGCACGGAGTACCAGTTTAGAATTTTTGCTGAAAA
+CAGGTATGGAAAAAGTGCCCCACTGGATTCTAAGGCAGTTATTGTACAATATCCATTTAAAGAACCTGGA
+CCACCTGGAACTCCTTTTGTGACATCAATCTCAAAAGATCAGATGCTTGTGCAATGGCATGAGCCAGTGA
+ATGATGGAGGCACCAAAATTATTGGCTACCATCTTGAACAGAAAGAAAAGAACAGTATTTTATGGGTCAA
+GTTAAATAAGACCCCCATTCAGGACACCAAATTCAAAACAACTGGGCTTGATGAGGGCCTTGAGTATGAG
+TTCAAAGTTTCTGCTGAAAATATTGTTGGCATTGGCAAGCCTAGCAAAGTGTCAGAATGCTTTGTTGCTC
+GTGATCCATGTGACCCACCTGGTCGCCCTGAAGCCATTGTTATTACAAGAAACAATGTCACACTGAAATG
+GAAGAAACCTGCCTATGATGGTGGTAGCAAAATAACAGGTTATATTGTAGAAAAGAAAGATCTACCTGAT
+GGCCGCTGGATGAAAGCCAGCTTTACCAACGTATTAGAAACTGAATTTACAGTGAGTGGACTTGTAGAAG
+ACCAAAGATATGAATTTAGAGTAATTGCAAGAAATGCAGCTGGAAACTTTAGTGAACCATCTGATAGTAG
+TGGTGCCATTACTGCAAGAGATGAAATTGATGCACCAAATGCCTCTCTGGATCCAAAATATAAAGATGTC
+ATCGTTGTTCATGCAGGAGAGACTTTTGTTCTTGAAGCCGACATCCGTGGCAAACCTATACCTGATGTTG
+TTTGGTCAAAAGATGGAAAAGAACTTGAAGAAACAGCTGCTAGAATGGAAATTAAATCTACTATTCAGAA
+AACAACTCTTGTTGTCAAAGACTGTATACGGACTGATGGAGGACAATATATTCTGAAACTCAGCAATGTT
+GGTGGTACAAAGTCTATACCCATCACTGTAAAGGTACTTGACAGGCCAGGGCCTCCTGAAGGGCCTCTGA
+AAGTTACTGGAGTTACTGCGGAAAAATGTTACCTGGCATGGAACCCACCTTTGCAAGATGGTGGTGCTAA
+TATTTCACATTACATCATTGAAAAGAGGGAGACAAGCCGACTCTCTTGGACCCAGGTTTCAACTGAGGTA
+CAGGCCCTTAACTACAAAGTTACTAAACTTCTTCCTGGTAATGAGTACATTTTCCGTGTCATGGCTGTGA
+ATAAATATGGAATTGGAGAGCCCTTGGAATCTGGGCCTGTTACGGCCTGTAATCCTTATAAGCCACCAGG
+TCCTCCCTCAACACCTGAAGTCTCAGCAATCACCAAAGATTCTATGGTAGTAACATGGGCACGCCCAGTA
+GACGACGGAGGTACCGAAATTGAGGGCTACATTCTTGAAAAACGAGATAAGGAAGGCGTTAGATGGACCA
+AGTGCAACAAGAAAACATTAACGGATCTGCGGCTCAGGGTAACTGGTCTTACCGAAGGCCATTCCTATGA
+ATTCAGAGTTGCTGCTGAAAATGCAGCTGGTGTGGGAGAACCTAGTGAGCCATCTGTTTTCTACCGTGCG
+TGTGATGCCTTGTATCCACCAGGTCCCCCAAGCAATCCAAAAGTGACGGACACTTCCAGATCTTCTGTCT
+CCCTGGCATGGAGTAAGCCAATTTATGATGGTGGCGCACCTGTTAAAGGCTATGTTGTAGAGGTCAAAGA
+AGCTGCTGCGGATGAATGGACAACCTGCACTCCACCAACAGGATTACAAGGAAAGCAGTTCACAGTGACC
+AAGCTTAAAGAAAACACTGAATATAACTTCCGTATTTGTGCCATCAATTCTGAAGGTGTAGGTGAACCTG
+CAACTCTACCTGGCTCAGTGGTTGCTCAGGAGAGGATAGAGCCACCAGAAATAGAACTCGATGCTGATCT
+CAGAAAGGTGGTCGTTCTGCGTGCAAGTGCTACTTTACGCTTATTTGTCACTATCAAAGGTCGACCAGAA
+CCCGAAGTTAAATGGGAAAAGGCAGAAGGCATTCTCACTGACAGGGCTCAGATAGAGGTGACCAGCTCAT
+TTACAATGTTGGTGATTGATAATGTTACCAGATTTGACAGTGGTCGGTATAATCTGACATTAGAAAATAA
+TAGTGGCTCCAAAACAGCTTTTGTTAACGTCAGAGTTCTTGACTCACCAAGTGCCCCTGTGAATTTGACC
+ATAAGAGAAGTGAAGAAAGACTCAGTGACGTTGTCCTGGGAACCACCACTTATTGATGGTGGAGCTAAGA
+TTACAAACTACATTGTCGAAAAACGAGAAACTACAAGAAAAGCCTATGCTACCATTACAAATAATTGCAC
+TAAAACTACTTTCAGAATTGAAAATCTACAAGAAGGATGTTCTTACTACTTCCGAGTCTTGGCTTCCAAT
+GAATATGGGATTGGTTTGCCAGCTGAAACAACAGAACCCGTTAAAGTGTCTGAACCACCCCTCCCACCTG
+GAAGAGTAACTCTTGTTGATGTGACCCGTAATACAGCTACAATTAAGTGGGAGAAACCAGAAAGTGATGG
+TGGCAGCAAAATTACTGGTTATGTGGTTGAAATGCAGACTAAAGGGAGTGAAAAGTGGAGCACCTGCACA
+CAAGTTAAGACTCTAGAAGCAACTATATCTGGCTTAACTGCAGGAGAAGAGTATGTCTTCAGGGTAGCTG
+CAGTTAACGAAAAGGGAAGAAGTGATCCAAGACAACTTGGAGTGCCAGTAATTGCAAGGGATATTGAAAT
+AAAGCCTTCAGTTGAGCTTCCTTTCCATACTTTCAATGTAAAGGCTAGAGAACAACTTAAGATTGATGTG
+CCATTCAAAGGAAGACCTCAAGCTACTGTGAACTGGAGAAAAGATGGTCAGACTCTTAAAGAGACAACTA
+GAGTCAATGTTTCTTCTTCAAAGACTGTAACATCACTATCTATTAAGGAAGCTTCAAAGGAAGATGTTGG
+AACTTATGAATTATGTGTTTCAAACAGTGCTGGATCCATAACAGTTCCTATTACTATAATTGTCCTTGAC
+AGACCAGGACCTCCAGGTCCTATACGTATTGATGAGGTTAGTTGTGACAGCATAACCATTTCTTGGAATC
+CTCCAGAATATGATGGTGGCTGCCAAATTAGCAATTACATTGTTGAAAAGAAAGAAACCACCTCTACAAC
+ATGGCACATAGTTTCACAAGCAGTTGCAAGAACATCCATTAAAATAGTTCGCCTGACAACAGGAAGTGAG
+TATCAGTTCCGTGTTTGTGCAGAAAACCGCTATGGAAAGAGCTCCTACAGTGAATCTTCAGCTGTTGTTG
+CAGAGTATCCATTCAGTCCCCCAGGTCCTCCTGGTACTCCTAAAGTTGTGCATGCCACAAAATCTACCAT
+GCTTGTAACCTGGCAAGTGCCAGTTAATGATGGAGGAAGTCGAGTAATTGGCTATCATCTTGAGTATAAA
+GAAAGAAGCAGCATTCTTTGGTCAAAAGCAAATAAAATCCTCATTGCTGATACTCAAATGAAAGTCTCCG
+GCCTTGATGAAGGACTGATGTATGAGTATCGTGTATATGCTGAAAATATTGCTGGAATTGGTAAATGCAG
+TAAATCTTGTGAACCAGTCCCTGCAAGAGATCCTTGTGACCCTCCTGGACAACCTGAAGTCACAAATATC
+ACAAGAAAATCAGTGTCACTTAAATGGTCTAAACCACATTATGATGGTGGAGCTAAGATCACAGGATACA
+TTGTTGAACGCAGAGAACTACCAGATGGCCGGTGGCTGAAGTGCAATTATACTAATATACAAGAAACATA
+CTTTGAAGTAACTGAACTTACTGAAGATCAGCGTTATGAATTCCGGGTTTTTGCAAGGAATGCTGCTGAC
+TCAGTTAGTGAGCCATCTGAATCCACTGGGCCTATTATAGTTAAAGATGATGTTGAGCCTCCAAGAGTTA
+TGATGGATGTCAAGTTCCGAGACGTTATTGTTGTCAAAGCTGGAGAGGTCCTTAAGATAAATGCAGACAT
+TGCAGGGCGACCTCTGCCAGTAATTTCCTGGGCCAAGGATGGTATAGAAATTGAAGAAAGAGCAAGAACA
+GAAATCATCTCAACAGACAATCATACTTTGTTAACAGTTAAAGACTGTATAAGACGAGACACTGGGCAAT
+ATGTACTAACACTGAAGAATGTTGCCGGCACTCGGTCTGTGGCCGTTAATTGCAAAGTACTTGATAAGCC
+TGGTCCACCAGCAGGACCACTTGAAATAAATGGCCTCACTGCTGAGAAATGCTCTCTTTCCTGGGGACGT
+CCCCAAGAAGATGGTGGTGCAGATATCGACTATTACATCGTAGAAAAACGTGAAACAAGCCACCTTGCAT
+GGACAATATGTGAAGGAGAGTTACAGATGACATCCTGTAAAGTAACCAAGTTACTCAAAGGCAATGAATA
+TATATTTAGAGTAACTGGTGTTAATAAATATGGTGTTGGTGAGCCCCTAGAGAGTGTAGCTATAAAGGCA
+CTAGATCCATTTACAGTTCCAAGTCCACCCACGTCTTTGGAAATTACTTCTGTGACCAAAGAATCTATGA
+CACTTTGCTGGTCAAGACCCGAGAGTGATGGAGGTAGTGAAATATCTGGATATATAATTGAAAGGCGAGA
+GAAAAATAGCCTAAGATGGGTGCGTGTAAACAAAAAACCAGTTTATGATCTAAGAGTGAAATCAACAGGA
+CTTCGGGAAGGATGTGAATATGAATATCGTGTTTATGCAGAAAATGCTGCTGGCCTAAGTCTTCCAAGTG
+AAACCTCTCCCTTAATTAGGGCAGAAGATCCAGTGTTCCTACCATCTCCTCCATCCAAACCCAAAATTGT
+GGACTCAGGCAAGACAACTATAACTATTGCCTGGGTTAAGCCGCTGTTTGATGGTGGGGCCCCGATAACT
+GGATATACTGTAGAATACAAAAAATCTGATGACACTGACTGGAAAACTTCCATTCAGAGCTTACGAGGGA
+CAGAATATACAATAAGCGGACTAACAACAGGAGCTGAATATGTTTTCAGAGTAAAATCTGTCAATAAGGT
+TGGTGCTAGTGACCCCAGTGATAGCTCTGACCCTCAGATAGCAAAGGAAAGAGAAGAAGAACCTTTATTT
+GATATTGACAGTGAAATGAGGAAGACCTTGATTGTCAAGGCTGGTGCCTCATTTACCATGACTGTGCCTT
+TCCGAGGAAGACCAGTACCCAATGTCTTGTGGAGTAAGCCAGACACTGACCTCCGTACTAGAGCTTATGT
+TGATACCACAGACTCCCGTACATCACTGACCATTGAAAATGCCAACAGAAATGACTCTGGAAAGTACACA
+TTAACAATTCAGAATGTTTTGAGTGCTGCTTCACTGACCTTAGTTGTCAAAGTTTTAGATACCCCAGGTC
+CTCCAACCAACATTACTGTGCAAGATGTAACCAAAGAGTCTGCAGTGTTATCCTGGGATGTTCCTGAAAA
+CGATGGTGGAGCACCAGTGAAGAATTACCACATAGAAAAACGTGAGGCCAGCAAGAAAGCATGGGTCTCT
+GTGACCAACAACTGTAACCGCCTCTCCTACAAAGTTACCAATTTACAAGAAGGAGCTATCTATTACTTCA
+GAGTCTCTGGAGAAAATGAGTTTGGTGTTGGTATACCAGCTGAAACAAAGGAAGGAGTAAAAATAACAGA
+AAAACCAAGCCCACCTGAAAAACTTGGAGTAACAAGTATATCCAAAGACAGTGTTTCCCTGACCTGGCTG
+AAGCCTGAACATGATGGCGGAAGCAGAATTGTACACTATGTCGTTGAAGCACTAGAAAAAGGACAGAAAA
+ACTGGGTTAAATGTGCAGTGGCAAAGTCAACCCATCACGTTGTTTCCGGTCTGAGAGAGAATTCTGAATA
+CTTTTTCCGAGTGTTTGCTGAAAATCAAGCTGGCCTGAGTGACCCGAGAGAGCTTCTGCTTCCTGTTCTT
+ATTAAGGAGCAACTAGAACCACCTGAAATTGATATGAAGAATTTCCCAAGTCACACTGTATATGTTAGAG
+CTGGTTCAAACCTTAAAGTTGACATTCCAATCTCTGGAAAACCACTTCCCAAAGTGACCTTATCAAGAGA
+TGGTGTCCCCCTTAAGGCAACCATGAGATTTAATACCGAAATTACTGCTGAGAACCTGACCATCAATCTC
+AAAGAAAGTGTTACAGCTGACGCTGGGAGATATGAAATCACTGCTGCCAACTCCAGTGGTACAACCAAAG
+CTTTCATTAACATTGTTGTGCTAGACAGGCCTGGTCCTCCAACTGGCCCTGTTGTTATTAGTGATATAAC
+TGAAGAAAGTGTGACTCTCAAATGGGAGCCACCTAAGTATGACGGTGGAAGTCAAGTTACCAACTACATT
+CTACTCAAAAGAGAAACAAGTACTGCAGTGTGGACTGAAGTGTCTGCAACAGTTGCAAGAACCATGATGA
+AAGTCATGAAACTGACCACAGGAGAAGAATACCAATTCCGCATCAAGGCAGAAAACCGCTTTGGCATCAG
+TGATCATATAGATTCAGCTTGTGTGACTGTCAAACTACCATACACAACACCTGGACCACCATCTACACCA
+TGGGTCACTAATGTTACTCGAGAAAGCATCACTGTGGGCTGGCATGAACCAGTGTCAAATGGAGGCAGTG
+CAGTCGTAGGCTATCACCTGGAAATGAAAGACAGAAACAGTATTTTATGGCAAAAAGCCAACAAACTGGT
+CATCCGCACAACTCACTTCAAAGTCACAACAATCAGTGCTGGACTTATTTATGAATTCAGGGTGTATGCA
+GAAAATGCTGCTGGAGTTGGAAAACCTAGCCATCCTTCTGAACCAGTCTTGGCAATTGATGCTTGTGAAC
+CCCCAAGAAATGTTCGTATCACTGATATTTCAAAGAACTCTGTCAGCCTTTCATGGCAACAACCAGCTTT
+CGATGGAGGTAGCAAGATTACAGGCTACATTGTTGAGAGACGTGACCTTCCAGATGGCAGATGGACCAAG
+GCCAGCTTCACCAATGTTACTGAAACTCAATTCATCATCTCTGGCTTGACTCAGAATTCCCAGTATGAAT
+TCCGTGTCTTTGCTAGGAATGCTGTTGGTTCCATTAGCAATCCATCTGAGGTTGTAGGGCCCATTACTTG
+CATCGATTCTTATGGTGGTCCTGTAATTGATTTGCCTCTAGAATATACAGAAGTTGTCAAATACAGAGCA
+GGTACATCTGTGAAGCTCAGAGCTGGCATTTCTGGCAAACCTGCGCCTACTATTGAGTGGTATAAAGATG
+ATAAAGAATTACAAACCAATGCACTGGTGTGTGTTGAAAATACCACGGACCTCGCATCTATACTCATCAA
+AGATGCCGATCGCCTTAATAGTGGATGCTATGAATTAAAACTAAGGAATGCCATGGGCTCAGCCTCAGCC
+ACCATCAGAGTACAGATCCTTGACAAACCAGGCCCACCTGGTGGACCAATTGAATTTAAGACTGTAACTG
+CTGAGAAGATCACCCTTCTCTGGCGGCCTCCAGCTGATGATGGTGGTGCAAAAATCACTCACTACATTGT
+GGAAAAGCGTGAGACAAGCCGCGTTGTGTGGTCTATGGTGTCTGAACATTTGGAAGAGTGCATCATTACA
+ACCACCAAAATTATCAAAGGAAATGAATACATCTTCCGGGTCCGAGCCGTGAACAAATATGGAATTGGCG
+AGCCACTGGAATCTGATTCCGTTGTAGCCAAGAACGCATTTGTTACACCTGGGCCACCAGGCATACCAGA
+AGTGACAAAGATTACCAAGAATTCGATGACTGTTGTATGGAGCAGGCCAATTGCAGATGGCGGTAGTGAT
+ATAAGTGGCTATTTCCTTGAAAAACGAGACAAGAAGAGCCTAGGATGGTTTAAAGTACTAAAAGAGACTA
+TCCGTGACACCAGACAAAAAGTAACAGGACTCACAGAAAACAGTGACTATCAATACAGAGTTTGTGCTGT
+AAACGCTGCTGGACAGGGTCCATTTTCTGAACCATCTGAATTCTACAAAGCTGCTGATCCTATTGATCCT
+CCAGGTCCACCTGCTAAGATAAGAATCGCAGATTCAACCAAGTCATCCATCACCCTTGGCTGGAGTAAGC
+CTGTCTATGATGGGGGCAGTGCTGTTACTGGGTATGTTGTCGAGATAAGACAAGGAGAGGAAGAGGAATG
+GACTACTGTCTCTACCAAAGGAGAGGTCAGAACTACAGAATATGTGGTATCCAACCTGAAACCTGGAGTC
+AATTACTACTTCCGGGTATCTGCTGTAAACTGTGCTGGACAAGGAGAACCTATAGAAATGAATGAACCTG
+TACAAGCTAAAGATATACTTGAGGCACCAGAGATTGACCTGGATGTGGCTCTCAGAACTTCTGTTATTGC
+CAAAGCTGGTGAAGATGTACAAGTGTTGATTCCCTTTAAAGGCAGACCTCCACCTACTGTCACATGGAGA
+AAAGATGAGAAGAATCTTGGCAGTGATGCCAGATACAGCATTGAAAACACTGATTCATCCTCATTACTCA
+CCATTCCTCAAGTTACTCGCAATGATACAGGAAAATATATTCTCACAATAGAAAATGGAGTTGGTGAACC
+TAAGTCTTCAACTGTGAGTGTTAAAGTGCTTGACACACCAGCTGCCTGCCAGAAACTACAGGTTAAACAT
+GTTTCTCGAGGCACAGTCACTTTGCTCTGGGATCCTCCTCTCATTGATGGAGGATCTCCAATAATTAATT
+ATGTCATTGAAAAGAGAGATGCCACCAAGAGAACATGGTCTGTCGTGTCACACAAATGTTCTAGCACATC
+CTTCAAGCTAATAGATTTGTCGGAGAAGACTCCATTCTTCTTCAGAGTTCTTGCAGAAAATGAAATTGGA
+ATTGGGGAACCCTGTGAAACTACAGAGCCAGTGAAGGCTGCTGAAGTACCAGCTCCTATACGTGATCTCT
+CAATGAAAGACTCAACAAAGACATCTGTCATCCTCAGCTGGACCAAACCTGACTTTGATGGTGGTAGCGT
+CATCACAGAATATGTTGTAGAAAGGAAAGGTAAAGGTGAACAGACGTGGTCCCACGCTGGCATAAGTAAG
+ACATGTGAAATTGAGGTTAGCCAACTTAAGGAGCAGTCAGTCCTGGAGTTCAGAGTGTTTGCCAAAAATG
+AGAAAGGACTGAGTGATCCTGTCACTATTGGGCCAATTACAGTGAAAGAACTTATTATTACACCTGAAGT
+TGACCTGTCAGATATCCCTGGGGCACAAGTCACTGTGAGAATTGGGCACAATGTGCACCTTGAATTACCT
+TATAAGGGAAAACCCAAACCATCCATCAGTTGGCTGAAAGATGGCTTGCCACTGAAAGAAAGTGAATTTG
+TTCGCTTCAGTAAAACTGAAAACAAAATTACTTTGAGTATTAAGAATGCCAAGAAGGAGCATGGAGGAAA
+ATACACTGTTATTCTTGATAATGCAGTGTGTAGAATTGCAGTCCCCATTACAGTCATCACCCTTGGCCCA
+CCATCAAAGCCCAAAGGACCCATTCGATTTGATGAAATCAAGGCTGATAGTGTCATCCTGTCATGGGATG
+TACCTGAAGATAATGGAGGAGGAGAAATTACTTGTTACAGCATCGAGAAGCGGGAAACTTCACAAACTAA
+CTGGAGGATGGTGTGTTCAAGTGTTGCCAGAACGACTTTCAAAGTTCCTAATCTAGTCAAAGATGCTGAG
+TACCAGTTTAGAGTGAGAGCAGAAAACAGATACGGAGTCAGCCAACCACTTGTCTCAAGCATTATTGTGG
+CAAAACACCAGTTCAGGATTCCTGGTCCCCCAGGAAAGCCAGTTATATACAATGTGACTTCTGATGGCAT
+GTCACTAACTTGGGATGCTCCAGTTTATGATGGTGGTTCAGAAGTTACTGGATTCCATGTTGAAAAGAAA
+GAAAGAAATAGCATCCTCTGGCAAAAAGTTAATACATCACCAATCTCTGGAAGAGAATATAGAGCCACTG
+GACTGGTAGAAGGTCTGGATTACCAATTCCGTGTATATGCTGAAAATTCTGCTGGCCTAAGCTCACCTAG
+TGACCCAAGCAAATTTACCTTAGCTGTTTCTCCAGTAGACCCACCTGGCACTCCTGACTACATTGATGTC
+ACCCGGGAAACCATCACACTTAAATGGAACCCACCATTGCGTGATGGAGGCAGTAAGATTGTGGGCTATA
+GCATTGAGAAACGGCAAGGAAATGAACGCTGGGTGAGATGCAACTTTACTGACGTCAGTGAATGTCAGTA
+CACAGTTACAGGACTCAGTCCTGGGGATCGCTATGAGTTCAGAATAATTGCAAGAAATGCTGTTGGAACT
+ATAAGCCCGCCCTCACAGTCTTCTGGCATTATTATGACAAGAGATGAAAATGTTCCACCAATAGTAGAGT
+TTGGCCCTGAATACTTTGATGGTCTCATTATTAAGTCCGGAGAGAGCCTTAGAATTAAAGCTTTGGTACA
+AGGAAGACCAGTGCCTCGAGTAACTTGGTTCAAAGATGGAGTGGAAATCGAAAAGAGGATGAATATGGAA
+ATAACCGACGTACTTGGATCCACCAGCCTATTTGTTAGAGATGCTACTCGGGACCATCGTGGTGTATACA
+CAGTGGAAGCCAAAAATGCATCTGGTTCTGCAAAAGCAGAAATTAAAGTGAAAGTACAAGATACACCAGG
+AAAAGTAGTTGGGCCAATAAGATTCACCAATATTACTGGGGAGAAGATGACTCTGTGGTGGGATGCCCCA
+CTCAATGACGGTTGTGCTCCCATAACCCACTACATCATTGAAAAACGGGAAACCAGCAGACTTGCCTGGG
+CACTAATTGAGGATAAATGTGAAGCCCAAAGTTACACTGCCATTAAACTAATAAACGGCAATGAATACCA
+ATTCCGTGTTTCTGCAGTTAACAAGTTTGGTGTTGGCAGGCCACTTGATTCTGATCCAGTGGTTGCTCAA
+ATACAATATACTGTTCCTGATGCCCCTGGCATTCCAGAACCTAGCAACATAACAGGCAACAGCATTACCC
+TGACATGGGCAAGGCCAGAATCAGATGGTGGCAGTGAAATTCAACAGTATATCCTTGAAAGAAGAGAAAA
+GAAAAGCACAAGATGGGTAAAAGTGATCAGCAAACGACCAATCTCTGAAACAAGATTCAAAGTCACTGGT
+CTGACAGAAGGCAATGAGTATGAATTCCATGTCATGGCTGAAAATGCTGCAGGAGTTGGACCTGCAAGTG
+GCATCTCAAGACTCATTAAATGTAGAGAGCCCGTCAACCCACCAGGTCCTCCCACAGTGGTCAAAGTAAC
+AGACACATCAAAGACAACTGTGAGCTTAGAATGGTCCAAACCAGTGTTTGATGGTGGCATGGAAATAATT
+GGGTATATTATTGAAATGTGTAAGGCCGACTTAGGAGACTGGCACAAGGTGAATGCAGAGGCATGTGTGA
+AAACAAGATATACAGTCACTGATCTACAAGCAGGTGAAGAATACAAATTCCGAGTTAGTGCTATCAATGG
+TGCTGGAAAAGGCGACAGCTGTGAAGTGACTGGCACAATTAAAGCAGTTGACCGGTTAACAGCTCCTGAG
+TTAGACATAGATGCAAACTTCAAACAGACTCATGTTGTTAGAGCTGGGGCCAGTATTCGCCTCTTCATTG
+CCTACCAAGGTAGACCTACTCCTACAGCTGTGTGGAGCAAACCAGACTCTAACCTTAGCCTTCGGGCTGA
+TATCCATACAACAGATTCCTTCAGCACCCTCACTGTGGAAAACTGCAACAGAAATGATGCAGGGAAATAT
+ACCCTTACTGTGGAAAACAACAGTGGTAGTAAGTCAATCACATTCACCGTGAAAGTGCTAGACACTCCAG
+GCCCACCTGGCCCAATTACCTTCAAAGATGTGACCCGGGGATCTGCTACATTGATGTGGGATGCCCCTCT
+TCTTGACGGTGGTGCCCGAATCCATCATTATGTGGTAGAGAAACGAGAGGCAAGTCGCCGTAGTTGGCAG
+GTTATCAGTGAAAAATGCACTCGTCAGATCTTCAAGGTCAATGACCTGGCCGAAGGTGTTCCGTACTATT
+TCCGTGTTTCTGCAGTAAATGAGTATGGTGTTGGTGAGCCCTATGAAATGCCAGAACCAATTGTAGCCAC
+AGAACAGCCTGCTCCACCTAGGAGACTTGATGTTGTTGATACTAGCAAATCCTCCGCAGTCTTAGCTTGG
+CTTAAACCTGACCACGATGGAGGCAGCCGGATCACTGGCTACCTGCTTGAAATGAGACAAAAGGGATCTG
+ACTTCTGGGTTGAAGCTGGTCACACCAAACAGCTAACTTTCACAGTAGAGCGTCTTGTTGAGAAAACTGA
+ATATGAATTCCGTGTGAAGGCCAAGAATGATGCTGGCTATAGTGAACCCAGAGAAGCCTTCTCTTCTGTC
+ATCATTAAGGAGCCTCAAATCGAGCCCACTGCTGACCTCACTGGAATTACCAATCAGCTTATAACTTGCA
+AAGCAGGAAGCCCATTTACCATTGACGTACCAATCAGTGGTCGTCCTGCCCCCAAAGTAACATGGAAACT
+GGAAGAAATGAGACTTAAAGAGACAGATCGAGTGAGCATTACAACAACAAAAGACAGAACCACACTGACT
+GTAAAGGACAGCATGAGAGGTGACTCTGGAAGATACTTCTTGACCCTGGAAAATACAGCTGGTGTTAAAA
+CATTTAGCGTCACAGTTGTGGTCATTGGAAGGCCAGGTCCAGTAACCGGCCCCATTGAGGTCTCATCTGT
+CTCAGCTGAATCGTGTGTCCTGTCATGGGGAGAACCTAAAGATGGAGGAGGCACTGAAATTACTAATTAC
+ATAGTTGAAAAGCGTGAATCGGGTACAACAGCTTGGCAGCTTGTCAATTCCAGTGTCAAGCGCACTCAAA
+TTAAAGTCACTCATCTCACAAAATACATGGAATATTCTTTCCGTGTCAGTTCAGAGAACAGATTTGGTGT
+CAGCAAACCTCTAGAATCAGCACCAATAATTGCTGAACATCCATTTGTCCCACCAAGCGCTCCTACCAGA
+CCTGAGGTCTACCATGTGTCTGCCAATGCCATGTCTATTCGTTGGGAAGAACCCTACCACGATGGTGGCA
+GTAAAATCATTGGCTACTGGGTTGAGAAGAAAGAACGTAATACAATTCTTTGGGTGAAAGAAAACAAAGT
+GCCATGCTTAGAGTGCAACTACAAAGTAACTGGTTTAGTAGAAGGACTGGAATATCAGTTCAGAACTTAT
+GCACTCAATGCTGCAGGTGTTAGCAAGGCCAGCGAAGCTTCAAGACCTATAATGGCTCAAAATCCAGTTG
+ATGCACCAGGCAGACCAGAGGTGACAGATGTCACAAGATCAACAGTATCACTGATTTGGTCTGCCCCAGC
+GTATGATGGAGGCAGCAAGGTTGTGGGCTACATCATAGAGCGTAAGCCAGTCAGTGAGGTAGGAGATGGT
+CGCTGGCTGAAGTGCAACTACACCATTGTATCTGACAATTTCTTCACCGTGACTGCTCTCAGTGAAGGAG
+ACACTTATGAGTTCCGTGTGTTAGCCAAGAATGCAGCAGGCGTAATTAGCAAAGGGTCTGAATCTACAGG
+CCCTGTCACTTGCCGAGATGAATACGCTCCACCCAAAGCCGAACTGGATGCCCGATTACACGGTGATCTG
+GTTACCATCAGAGCAGGTTCTGATCTTGTTCTGGATGCTGCAGTTGGTGGCAAACCTGAACCCAAAATTA
+TCTGGACCAAAGGAGACAAGGAGCTAGATCTCTGTGAAAAAGTCTCTTTGCAGTATACTGGCAAACGAGC
+AACTGCTGTGATCAAGTTCTGTGACAGAAGTGACAGTGGAAAATACACTTTAACAGTGAAAAATGCCAGC
+GGGACCAAGGCCGTGTCTGTCATGGTCAAAGTGCTTGATTCCCCTGGCCCATGTGGAAAGCTCACCGTCA
+GCAGAGTAACACAGGAGAAGTGCACTTTAGCCTGGAGCCTTCCGCAGGAAGACGGAGGAGCAGAAATCAC
+TCACTACATCGTGGAAAGACGCGAGACTAGCAGGCTCAACTGGGTGATTGTTGAAGGCGAATGCCCAACC
+CTATCCTATGTCGTTACCAGGCTCATCAAGAACAATGAGTACATATTCCGAGTGAGGGCAGTAAACAAAT
+ATGGCCCTGGTGTGCCTGTTGAATCAGAGCCAATTGTAGCCAGAAACTCATTCACTATTCCATCACCACC
+CGGCATACCTGAAGAAGTTGGGACTGGCAAAGAGCATATCATCATTCAGTGGACAAAACCTGAATCTGAT
+GGTGGCAATGAAATCAGCAACTACCTAGTAGACAAACGTGAGAAGAAGAGCCTGCGCTGGACACGTGTCA
+ACAAAGACTATGTGGTGTATGATACCAGGCTGAAGGTGACCAGCCTGATGGAGGGTTGTGATTACCAGTT
+CCGGGTGACCGCAGTGAATGCAGCTGGTAACAGTGAGCCCAGCGAAGCTTCCAACTTCATCTCATGCAGA
+GAACCATCATATACCCCTGGACCACCTTCTGCTCCAAGAGTTGTGGATACCACCAAACACAGCATTAGTT
+TGGCATGGACCAAACCCATGTACGATGGTGGTACTGACATTGTAGGATATGTTCTGGAAATGCAAGAGAA
+GGACACTGATCAGTGGTACCGAGTGCATACCAATGCCACAATAAGAAATACTGAATTCACTGTGCCAGAC
+CTTAAAATGGGCCAGAAATATTCCTTCAGAGTTGCTGCCGTGAACGTGAAGGGTATGAGCGAATACAGCG
+AATCAATTGCTGAAATTGAGCCCGTGGAAAGAATAGAAATACCAGATCTTGAGCTTGCAGATGATCTAAA
+GAAGACTGTGACCATCAGGGCTGGGGCCTCCTTGCGCTTGATGGTGTCTGTATCTGGAAGACCACCTCCT
+GTCATAACGTGGAGCAAGCAGGGCATTGACCTTGCAAGCCGGGCAATTATTGACACCACTGAGAGCTACT
+CATTGCTAATAGTGGACAAAGTTAATCGGTACGATGCTGGAAAATACACAATTGAAGCTGAAAACCAATC
+TGGCAAGAAATCAGCAACAGTCCTTGTTAAAGTCTATGATACTCCTGGTCCCTGTCCTTCAGTGAAAGTT
+AAGGAAGTATCAAGAGATTCTGTGACTATAACTTGGGAAATTCCCACGATTGATGGTGGAGCTCCAGTCA
+ACAATTACATCGTTGAGAAGCGTGAAGCTGCTATGAGAGCATTCAAAACAGTAACTACCAAATGCAGCAA
+GACACTTTACAGAATTTCTGGACTTGTAGAAGGAACCATGTACTATTTCAGAGTGCTGCCAGAAAATATT
+TATGGCATTGGAGAACCTTGTGAAACATCTGATGCAGTACTGGTCTCAGAAGTGCCTTTGGTGCCTGCAA
+AGCTAGAAGTGGTCGATGTCACCAAATCCACTGTTACCCTTGCCTGGGAAAAACCACTCTACGATGGTGG
+TAGCCGACTCACTGGATATGTTCTCGAGGCCTGCAAAGCTGGCACAGAGAGATGGATGAAGGTTGTCACC
+TTAAAACCCACAGTCCTAGAGCACACTGTTACTTCCTTAAATGAAGGTGAACAATACTTATTTAGAATAA
+GGGCACAAAATGAGAAAGGTGTGTCAGAACCAAGAGAGACTGTCACAGCCGTGACTGTACAAGACCTCAG
+AGTGTTGCCAACAATCGATCTTTCTACAATGCCTCAGAAGACCATCCATGTCCCAGCTGGCAGACCAGTA
+GAGCTGGTGATACCTATTGCTGGCCGTCCACCTCCTGCTGCTTCCTGGTTCTTTGCTGGTTCTAAACTGA
+GAGAATCAGAGCGTGTCACAGTTGAAACTCACACTAAAGTAGCTAAATTAACCATCCGTGAAACCACTAT
+CAGAGATACTGGAGAATACACACTTGAATTGAAGAATGTTACCGGAACTACTTCAGAAACCATTAAAGTT
+ATCATTCTTGACAAGCCTGGTCCACCAACAGGACCTATTAAGATTGATGAAATTGATGCTACATCAATTA
+CCATTTCCTGGGAACCACCTGAATTGGACGGTGGTGCTCCACTGAGTGGTTATGTGGTAGAACAACGTGA
+CGCTCATCGTCCAGGATGGCTGCCCGTTTCTGAATCAGTGACTAGGTCCACGTTTAAGTTTACCAGACTC
+ACCGAAGGAAATGAGTATGTGTTCCGTGTGGCTGCAACAAACCGCTTCGGGATTGGCTCTTACTTGCAGT
+CTGAGGTCATAGAGTGTCGCAGCAGCATCCGTATTCCTGGACCCCCAGAAACATTACAGATATTTGATGT
+TTCCCGTGATGGCATGACACTTACTTGGTACCCACCAGAGGATGACGGTGGCTCCCAAGTGACTGGATAT
+ATTGTGGAGCGCAAAGAAGTGAGAGCAGATCGATGGGTCCGTGTAAATAAAGTACCTGTGACAATGACAC
+GGTACCGCTCCACTGGCCTTACTGAAGGCTTAGAATATGAACACCGTGTCACAGCCATTAATGCAAGAGG
+GTCTGGGAAACCAAGTCGTCCTTCCAAACCCATCGTTGCCATGGATCCAATTGCTCCTCCAGGAAAGCCA
+CAAAACCCAAGAGTTACTGATACAACAAGGACATCAGTCTCCCTGGCCTGGAGTGTTCCAGAAGATGAAG
+GAGGATCTAAAGTCACAGGCTACTTGATTGAAATGCAAAAAGTAGATCAACATGAATGGACCAAGTGTAA
+CACCACTCCAACCAAGATTCGAGAGTATACTCTAACACACCTACCTCAGGGTGCAGAATACAGGTTCCGC
+GTCCTAGCTTGTAATGCTGGTGGACCTGGTGAGCCTGCTGAGGTACCAGGAACAGTCAAAGTCACTGAAA
+TGCTTGAATATCCTGATTATGAACTTGATGAAAGATACCAAGAAGGTATCTTTGTAAGGCAAGGTGGCGT
+CATCAGACTTACCATACCAATCAAAGGAAAACCATTCCCAATATGTAAATGGACCAAGGAAGGCCAGGAT
+ATTAGTAAGCGTGCCATGATTGCAACATCTGAAACACACACTGAGCTTGTGATCAAAGAAGCAGACAGGG
+GTGATTCTGGCACTTATGACCTGGTTCTGGAAAATAAATGTGGCAAGAAGGCTGTCTACATCAAGGTCAG
+GGTGATAGGAAGTCCCAACAGTCCAGAAGGGCCACTGGAATATGATGACATCCAAGTCCGCTCTGTGAGG
+GTCAGCTGGAGACCTCCTGCTGATGATGGTGGTGCTGACATCTTAGGCTACATCCTCGAGAGACGAGAAG
+TGCCTAAAGCCGCCTGGTATACCATTGATTCCAGAGTCCGAGGTACATCTCTGGTGGTAAAAGGCCTCAA
+AGAGAATGTAGAATACCATTTCCGTGTTTCAGCAGAAAACCAGTTTGGCATAAGCAAACCCTTGAAATCT
+GAGGAACCAGTCACACCAAAAACACCATTGAATCCTCCAGAACCTCCAAGCAATCCTCCAGAAGTACTCG
+ATGTAACCAAGAGTTCTGTTAGCTTGTCCTGGTCCCGGCCCAAAGATGATGGTGGTTCTAGAGTCACAGG
+CTACTACATCGAACGCAAAGAGACATCCACTGACAAGTGGGTCAGACACAACAAGACTCAGATCACCACC
+ACAATGTACACTGTCACAGGGCTTGTTCCCGATGCTGAGTATCAGTTCCGCATCATCGCACAGAATGATG
+TTGGCCTGAGTGAGACCAGCCCTGCTTCTGAACCAGTTGTTTGCAAAGATCCATTTGATAAACCAAGCCA
+ACCAGGAGAACTTGAGATTCTTTCAATATCCAAAGATAGTGTCACTCTACAGTGGGAGAAACCTGAATGT
+GATGGTGGTAAAGAAATTCTTGGATACTGGGTTGAATATAGACAGTCTGGAGACAGTGCCTGGAAGAAGA
+GCAATAAGGAACGTATTAAGGACAAGCAATTCACAATAGGAGGTTTGCTGGAAGCTACTGAGTATGAATT
+CAGGGTTTTTGCTGAGAATGAGACTGGGCTGAGCAGACCTCGCAGAACTGCTATGTCTATAAAGACTAAA
+CTCACATCTGGAGAGGCCCCAGGAATACGCAAAGAAATGAAGGATGTTACCACAAAATTGGGTGAAGCTG
+CTCAACTCTCATGCCAGATTGTTGGAAGGCCTCTTCCTGACATTAAATGGTACAGATTTGGTAAAGAGCT
+CATACAAAGCCGGAAATACAAAATGTCTTCAGATGGACGCACACACACTCTTACAGTAATGACAGAGGAA
+CAGGAAGATGAAGGTGTTTATACCTGCATAGCCACCAATGAGGTTGGAGAAGTAGAAACCAGTAGTAAGC
+TTCTCCTGCAAGCAACACCGCAGTTCCATCCTGGTTACCCACTGAAAGAGAAATATTATGGAGCTGTGGG
+TTCCACACTTCGGCTTCATGTTATGTACATTGGTCGTCCAGTACCTGCCATGACTTGGTTCCATGGTCAG
+AAACTTTTGCAAAACTCAGAAAACATTACTATTGAAAACACTGAGCACTATACTCATCTTGTCATGAAGA
+ATGTCCAACGTAAGACTCATGCTGGGAAATACAAAGTCCAGCTCAGCAATGTTTTTGGAACAGTTGATGC
+CATCCTTGATGTGGAAATACAAGATAAACCAGACAAACCTACAGGACCAATTGTGATCGAAGCTCTATTG
+AAGAACTCCGCAGTGATCAGCTGGAAACCACCCGCAGATGACGGAGGCTCCTGGATCACCAACTATGTGG
+TGGAAAAATGTGAGGCCAAGGAGGGGGCTGAATGGCAATTGGTGTCTTCAGCCATCTCAGTGACAACCTG
+TAGAATTGTGAACCTCACAGAAAATGCTGGCTATTACTTCCGGGTTTCAGCTCAGAACACTTTCGGCATC
+AGTGACCCTCTAGAAGTGTCCTCAGTTGTGATCATTAAGAGTCCATTTGAAAAGCCAGGTGCTCCTGGCA
+AACCAACTATTACTGCTGTCACAAAAGATTCTTGTGTTGTGGCCTGGAAGCCACCTGCCAGTGATGGAGG
+TGCAAAGATTAGAAATTACTACCTTGAGAAGCGTGAGAAGAAGCAGAATAAATGGATTTCTGTGACAACA
+GAAGAAATTCGAGAAACTGTCTTTTCAGTGAAAAACCTTATTGAAGGTCTTGAATACGAGTTTCGTGTGA
+AATGTGAAAATCTAGGTGGGGAAAGTGAATGGAGTGAAATATCAGAACCCATCACTCCCAAATCTGATGT
+CCCAATTCAGGCACCACACTTTAAAGAGGAACTGAGAAATCTAAATGTCAGATATCAGAGCAATGCTACC
+TTGGTCTGCAAAGTGACTGGTCATCCAAAACCTATCGTCAAATGGTACAGACAAGGCAAAGAAATCATTG
+CAGATGGATTAAAATATAGGATTCAAGAATTTAAGGGTGGCTACCACCAGCTCATCATTGCAAGTGTCAC
+AGATGATGATGCCACAGTTTACCAAGTCAGAGCTACCAACCAAGGGGGATCTGTGTCTGGCACTGCCTCC
+TTGGAAGTGGAAGTTCCAGCTAAGATACACTTACCTAAAACTCTTGAAGGCATGGGAGCAGTTCATGCTC
+TCCGAGGTGAAGTGGTCAGCATCAAGATTCCTTTCAGTGGCAAACCAGATCCTGTGATCACCTGGCAGAA
+AGGACAAGATCTCATTGACAATAATGGCCACTACCAAGTTATTGTCACAAGATCCTTCACATCACTTGTT
+TTCCCCAATGGGGTAGAGAGAAAAGATGCTGGTTTCTATGTGGTCTGTGCTAAAAACAGATTTGGAATTG
+ATCAGAAGACAGTTGAACTGGATGTGGCTGATGTTCCTGACCCACCCAGAGGAGTCAAAGTTAGTGATGT
+CTCACGAGATTCTGTCAACTTAACATGGACTGAGCCAGCCTCTGATGGTGGCAGCAAAATCACCAACTAC
+ATTGTTGAAAAATGTGCAACTACTGCAGAAAGATGGCTCCGTGTAGGACAGGCCCGAGAAACACGTTATA
+CCGTGATCAACTTATTTGGAAAAACAAGTTACCAGTTCCGGGTAATAGCTGAAAATAAATTTGGTCTGAG
+CAAGCCTTCAGAGCCTTCAGAACCAACCATAACCAAAGAAGATAAGACCAGAGCTATGAACTATGATGAA
+GAGGTAGATGAAACCAGGGAAGTCTCCATGACTAAAGCATCTCACTCTTCAACCAAGGAACTCTATGAGA
+AATATATGATTGCTGAAGATCTTGGGCGTGGTGAGTTTGGAATTGTCCATCGTTGTGTTGAAACATCCTC
+AAAGAAGACATACATGGCCAAATTTGTTAAAGTCAAAGGGACTGATCAGGTTTTGGTAAAGAAGGAAATT
+TCCATTCTGAATATTGCTAGGCATAGAAACATCTTACACCTCCATGAATCATTTGAAAGCATGGAAGAAT
+TAGTTATGATCTTTGAGTTTATATCAGGACTTGACATATTTGAGCGCATTAACACAAGTGCTTTTGAACT
+TAATGAAAGAGAAATTGTAAGTTATGTTCACCAGGTCTGTGAAGCACTTCAGTTTTTACACAGTCATAAT
+ATTGGACACTTTGACATTAGACCAGAAAATATCATTTACCAAACCAGAAGAAGCTCTACCATTAAAATCA
+TAGAATTTGGTCAAGCCCGTCAGCTGAAACCAGGGGACAACTTCAGGCTTCTATTCACTGCCCCAGAATA
+CTATGCACCTGAAGTCCACCAGCATGATGTTGTCAGCACAGCCACAGACATGTGGTCACTTGGAACACTG
+GTATATGTGCTATTGAGTGGTATCAACCCATTCCTGGCTGAAACTAACCAACAGATCATTGAGAATATCA
+TGAATGCTGAATATACTTTCGATGAGGAAGCATTCAAAGAGATTAGCATTGAAGCCATGGATTTTGTTGA
+CCGGTTGTTAGTGAAAGAGAGGAAATCTCGCATGACAGCATCGGAGGCTCTCCAGCACCCATGGTTGAAG
+CAGAAGATAGAAAGAGTCAGTACTAAAGTTATCAGAACATTAAAACACCGGCGTTATTACCACACCCTGA
+TCAAGAAAGACCTCAACATGGTTGTGTCAGCAGCCCGGATCTCCTGTGGTGGTGCAATTCGATCTCAGAA
+GGGAGTGAGTGTTGCTAAAGTTAAAGTGGCATCCATTGAAATTGGCCCAGTTTCTGGGCAGATAATGCAT
+GCAGTTGGTGAAGAAGGAGGACATGTCAAATATGTATGCAAAATTGAAAATTATGATCAGTCTACCCAAG
+TGACTTGGTACTTTGGCGTCCGACAGCTGGAGAACAGTGAGAAATACGAAATCACCTACGAAGATGGAGT
+GGCCATCCTCTATGTCAAAGACATTACCAAATTAGATGATGGTACCTACAGATGCAAAGTAGTCAATGAC
+TATGGTGAAGACAGTTCTTATGCAGAGCTATTTGTTAAAGGTGTGAGAGAAGTCTATGACTATTACTGCC
+GTAGAACCATGAAGAAAATTAAGCGCAGAACAGACACAATGAGACTCCTGGAAAGGCCACCAGAATTTAC
+CCTGCCTCTCTATAATAAGACAGCTTATGTAGGTGAAAATGTCCGGTTTGGAGTAACTATAACTGTCCAC
+CCAGAGCCTCATGTAACATGGTATAAATCAGGTCAGAAAATCAAACCAGGTGACAATGACAAGAAGTACA
+CATTTGAGTCAGACAAGGGTCTTTACCAATTAACAATCAACAGTGTCACTACAGATGATGACGCTGAATA
+TACTGTTGTGGCAAGGAACAAATATGGTGAAGACAGCTGTAAAGCAAAGCTGACAGTAACCCTACACCCA
+CCTCCAACAGATAGTACCTTAAGACCCATGTTCAAAAGGTTACTGGCAAATGCAGAATGCCAAGAAGGCC
+AAAGTGTCTGCTTTGAGATCAGAGTGTCTGGCATCCCCCCACCAACATTAAAATGGGAGAAAGATGGTCA
+GCCACTGTCCCTCGGGCCTAACATTGAAATTATCCATGAAGGCTTGGATTATTATGCTCTGCACATCAGG
+GACACTTTGCCTGAAGACACGGGTTATTATAGAGTCACAGCCACTAACACAGCTGGGTCCACCAGCTGCC
+AGGCTCACCTACAAGTGGAACGCCTGAGGTACAAGAAACAGGAATTCAAGAGTAAGGAGGAGCATGAGCG
+ACACGTACAAAAACAAATTGACAAAACCCTCAGAATGGCTGAAATTCTTTCTGGAACTGAAAGTGTACCA
+CTGACACAGGTAGCTAAAGAGGCTCTGAGAGAAGCTGCTGTCCTTTATAAACCGGCTGTAAGCACCAAGA
+CTGTAAAAGGGGAATTCAGACTTGAGATAGAAGAAAAGAAGGAGGAGAGAAAACTCCGGATGCCTTATGA
+TGTACCAGAGCCACGCAAGTATAAGCAGACTACCATAGAAGAAGACCAACGCATCAAGCAGTTCGTGCCC
+ATGTCTGACATGAAGTGGTATAAAAAGATACGTGATCAGTATGAAATGCCTGGGAAACTTGACAGAGTTG
+TACAGAAACGACCCAAGCGCATCCGCCTTTCAAGATGGGAACAGTTCTATGTGATGCCTCTTCCACGCAT
+TACAGATCAATACAGACCTAAATGGCGTATTCCTAAACTGTCCCAAGATGATCTTGAGATAGTGAGACCA
+GCCCGCCGGCGTACACCTTCTCCTGATTATGACTTTTACTACCGACCTAGAAGACGTTCTCTTGGGGACA
+TCTCTGATGAAGAATTACTCCTCCCCATTGATGACTACTTAGCAATGAAAAGAACAGAGGAAGAGAGGCT
+GCGTCTTGAAGAAGAGCTTGAGTTAGGTTTTTCAGCTTCACCCCCAAGTCGAAGCCCTCCACACTTTGAG
+CTTTCTAGCCTACGTTACTCTTCACCACAAGCTCATGTCAAGGTGGAGGAAACAAGAAAAGACTTCAGGT
+ATTCAACCTATCACATCCCAACGAAGGCTGAAGCTAGTACAAGTTATGCAGAACTGAGGGAACGGCATGC
+CCAGGCTGCGTACAGACAGCCAAAGCAACGGCAAAGAATCATGGCTGAGAGGGAGGATGAAGAGTTGCTT
+CGCCCAGTTACGACCACCCAGCATCTCTCAGAATACAAAAGCGAACTTGACTTCATGTCAAAGGAGGAAA
+AGTCTAGAAAGAAATCAAGGCGACAAAGAGAAGTGACAGAAATAACAGAAATTGAGGAAGAATACGAAAT
+CTCAAAACATGCTCAAAGAGAATCATCCTCATCTGCGTCTAGACTACTGAGACGACGGCGCTCCCTGTCT
+CCAACTTATATTGAGTTAATGAGGCCAGTGTCTGAGCTGATCCGGTCACGTCCACAACCGGCTGAGGAAT
+ACGAAGATGACACAGAAAGAAGGTCACCTACTCCAGAGAGAACTCGCCCACGATCCCCCAGCCCTGTGTC
+TAGTGAGAGATCACTCTCGAGATTTGAGAGGTCTGCAAGATTTGATATCTTTTCCAGGTATGAGTCCATG
+AAAGCTGCTTTAAAAACTCAGAAGACATCAGAAAGGAAGTATGAAGTTTTGAGTCAGCAGCCTTTCACAC
+TGGACCATGCCCCTCGAATCACACTGAGAATGCGCTCGCACAGGGTACCATGTGGCCAAAATACACGTTT
+TATTTTAAATGTTCAGTCTAAGCCAACTGCCGAGGTTAAATGGTACCACAATGGTGTGGAACTCCAAGAA
+AGCAGTAAGATTCATTACACCAACACGAGTGGAGTCCTCACCCTGGAAATTCTGGACTGTCATACTGATG
+ACAGTGGAACCTACCGTGCTGTGTGCACCAACTACAAGGGCGAAGCTTCTGACTATGCAACGTTGGACGT
+GACAGGAGGGGATTATACCACCTATGCTTCCCAACGCAGAGATGAAGAGGTCCCCAGATCTGTTTTCCCT
+GAGCTGACAAGAACAGAGGCGTATGCTGTTTCATCATTTAAGAAAACATCTGAGATGGAAGCTTCGTCTT
+CTGTCAGGGAAGTGAAATCACAGATGACGGAGACAAGGGAAAGTCTCTCCTCATATGAACACTCTGCATC
+TGCAGAAATGAAAAGTGCTGCATTAGAAGAAAAGTCACTGGAAGAAAAATCCACAACCAGAAAGATCAAG
+ACGACTTTGGCAGCAAGAATTCTAACAAAGCCACGGTCCATGACCGTCTACGAGGGCGAGTCTGCAAGGT
+TTTCTTGTGACACCGATGGTGAGCCGGTACCAACTGTGACCTGGCTGCGTAAAGGACAAGTGCTAAGTAC
+TTCTGCCCGCCACCAAGTGACCACCACAAAGTACAAATCAACCTTTGAGATCTCTTCAGTCCAGGCTTCC
+GATGAGGGCAATTACAGCGTGGTGGTAGAAAACAGTGAAGGGAAACAAGAAGCAGAGTTCACTCTGACTA
+TTCAAAAGGCCAGGGTAACTGAAAAGGCTGTGACATCACCACCAAGAGTCAAATCCCCAGAGCCTCGGGT
+GAAATCCCCAGAAGCAGTTAAGTCTCCAAAACGAGTGAAATCTCCAGAACCTTCTCACCCGAAAGCCGTA
+TCACCCACAGAGACAAAACCAACACCAACAGAGAAAGTTCAGCACCTCCCAGTCTCTGCCCCACCAAAGA
+TTACTCAGTTCCTGAAAGCAGAAGCTTCTAAAGAGATTGCAAAACTGACCTGTGTGGTTGAAAGCAGTGT
+ATTAAGGGCAAAAGAGGTCACCTGGTATAAAGATGGCAAGAAACTGAAGGAAAATGGGCATTTCCAGTTT
+CATTATTCAGCAGATGGTACCTATGAGCTCAAAATCAATAACCTCACTGAATCTGATCAAGGAGAATATG
+TTTGTGAGATTTCTGGTGAAGGTGGAACGTCTAAAACCAACTTACAATTTATGGGGCAAGCCTTTAAGAG
+TATCCATGAGAAGGTATCAAAAATATCAGAAACTAAGAAATCAGATCAGAAAACCACTGAGTCAACAGTA
+ACCAGAAAAACTGAACCAAAAGCTCCTGAACCAATTTCCTCAAAACCAGTAATTGTTACTGGGTTGCAGG
+ATACAACTGTTTCTTCAGACAGTGTTGCTAAATTTGCAGTTAAGGCTACTGGAGAACCCCGGCCAACTGC
+CATCTGGACAAAAGATGGAAAGGCCATTACACAAGGAGGTAAATATAAACTCTCTGAAGACAAGGGAGGG
+TTCTTCTTAGAAATTCATAAGACTGATACTTCTGACAGTGGACTTTATACTTGTACAGTAAAAAATTCAG
+CTGGATCTGTGTCCTCTAGCTGCAAATTAACAATAAAAGCTATAAAAGATACTGAGGCACAGAAAGTCTC
+TACACAAAAGACTTCTGAAATTACACCTCAGAAGAAAGCTGTTGTCCAAGAGGAAATTTCCCAAAAAGCC
+CTAAGGTCTGAAGAAATTAAGATGTCAGAGGCAAAATCTCAAGAAAAGTTAGCCCTCAAAGAGGAAGCTT
+CAAAGGTTCTGATTTCTGAAGAAGTCAAGAAATCAGCAGCAACCTCCCTGGAAAAATCCATTGTCCATGA
+GGAAATCACTAAAACATCACAGGCATCAGAAGAAGTCAGAACTCATGCTGAGATTAAAGCATTTTCTACT
+CAGATGAGCATAAACGAAGGTCAAAGACTGGTTTTAAAAGCCAACATTGCTGGTGCCACTGATGTGAAAT
+GGGTACTGAATGGCGTAGAGCTTACCAACTCTGAGGAGTACCGATATGGTGTCTCAGGCAGCGATCAGAC
+CCTAACCATCAAGCAAGCCAGTCACAGAGATGAAGGAATCCTCACCTGCATAAGCAAAACCAAGGAAGGA
+ATCGTCAAGTGTCAGTATGATTTGACACTGAGCAAAGAACTCTCAGATGCTCCAGCCTTCATCTCACAGC
+CTAGATCTCAAAATATTAATGAAGGACAAAATGTTCTCTTTACTTGTGAAATCAGTGGCGAGCCATCCCC
+TGAAATCGAATGGTTTAAAAACAACCTGCCAATTTCTATTTCTTCAAATGTCAGCATAAGCCGCTCCAGA
+AATGTATACTCCCTTGAAATCCGAAATGCATCAGTCAGCGACAGTGGAAAGTACACAATTAAGGCCAAAA
+ATTTCCGTGGCCAGTGTTCAGCTACAGCTTCCTTAATGGTCCTTCCTCTAGTTGAAGAACCTTCCAGAGA
+GGTAGTATTGAGAACAAGTGGTGACACAAGCTTGCAAGGAAGCTTCTCGTCTCAGTCAGTCCAAATGTCT
+GCCTCCAAGCAGGAGGCCTCCTTCAGCAGTTTCAGCAGCAGCAGTGCTAGCAGCATGACTGAGATGAAAT
+TTGCAAGCATGTCTGCCCAAAGCATGTCCTCCATGCAAGAGTCCTTTGTAGAAATGAGTTCCAGCAGCTT
+TATGGGAATATCTAATATGACACAACTGGAAAGCTCAACTAGTAAAATGCTTAAAGCAGGCATAAGAGGA
+ATTCCGCCTAAAATTGAAGCTCTTCCATCTGATATCAGCATTGATGAAGGCAAAGTTCTAACAGTAGCCT
+GTGCTTTCACGGGTGAGCCTACCCCAGAAGTAACATGGTCCTGTGGTGGAAGAAAAATCCACAGTCAAGA
+ACAGGGGAGGTTCCACATTGAAAACACAGATGACCTGACAACCCTGATCATCATGGACGTACAGAAACAA
+GATGGTGGACTTTATACCCTGAGTTTAGGGAATGAATTTGGATCTGACTCTGCCACTGTGAATATACATA
+TTCGATCCATTTAAGAGGGCCTGTGCCCTTATACTCTACACTCATTCTTAACTTTTCGCAAACGTTTCAC
+ACGGACTAATCTTTCTGAACTGTAAATATTTAAAGAAAAAAAAGTAGTTTTGTATCAACCTAAATGAGTC
+AAAGTTCAAAAATATTCATTTCAATCTTTTCATAATTGTTGACCTAAGAATATAATACATTTGCTAGTGA
+CATGTACATACTGTATATAGCCGGATTAACGGTTATAAAGTTTTGTACCATTTATTTTATGACATTTTAC
+AATGTAAGTTTTGAAACTAACTGTTGGTAGGAGAAAGTTTCTTATGGAACGAATACCCTGCTCAACATTT
+AATCAATCTTTGTGCCTCAACATACTGTTGATGTCTAAGTATGCCTCAGTGGGTTGAGAAAATCCCCATT
+GAAGATGTCCTGTCCACCTAAAAGAGAATGATGCTGTGCATATCACTTGATATGTGCACCAATACCTACT
+GAATCAGAAATGTAAGGCATTGGTGATGTTTGCATTTACCCTCCTGTAAGCAACACTTTAACGTCTTACA
+TTTTCTCTGATGATGTCACACAAAATTATCATGACAAATATTACCAGAGCAAAGTGTAACGGCCAACACT
+TTGTTCGCTCATTTTACGCTGTCTCTGACATAAGGAGTGCCTGAATAGCTTGGAAAAGTAACATCTCCTG
+GCCATCCCTTCATTTAACCAAGCTATTCAAGTATTCCTATGCCAGAGCAGTGCCAACTCTTGGAGGTCCC
+AGAGTGCAGCCAATGCCTTTGTGTGGTAGTTCTAAATTTTAATTGCACCTGAAAAACCTGGGCACCTAAG
+CAATGAGCCACAGCAAAAAGTAAAGAACAACAACAAAATAAAGCTGTTGTTAAATTTTAAACAATATTAC
+TAATTGCCCAAAATGTCAATTTGATGTAGTTCTTTTCATGCAAGTATAAATTCAATTGTTAGTTATAATT
+GTTGGACCTCCTTGAGATAGTAACAACAAAATAAAGCAAGCTATCTGCACCTCAAAA
+
diff --git a/seq/xurt8c.aa b/seq/xurt8c.aa
new file mode 100644
index 0000000..1cd68f0
--- /dev/null
+++ b/seq/xurt8c.aa
@@ -0,0 +1,5 @@
+>XURT8C | 40001 | glutathione transferase (EC 2.5.1.18) 8, cytosolic - rat
+MEVKPKLYYFQGRGRMEVIRWLLATAGVEFEEEFLETREQYEKLQKDDCLLFGQVPLVEIDGMLLTQTRA
+ILSYLAAKYNLYGKDLKERVRIDMYADGTQDLMMMIIGAPFKAPQEKEESLALAVKRAKNRYFPVFEKIL
+KDHGEAFLVGNQLSWADIQLLEAILMVEEVSAPVLSDFPLLQAFKTRISNIPTIKKFLQPGSQRKPPPDG
+HYVDVVRTVLKF 
diff --git a/seq/xurt8c.lc b/seq/xurt8c.lc
new file mode 100644
index 0000000..5a161b3
--- /dev/null
+++ b/seq/xurt8c.lc
@@ -0,0 +1,5 @@
+>XURT8C | 40001 | glutathione transferase (EC 2.5.1.18) 8, cytosolic - rat
+MEVKPKLYYFQGRGRMEVIRWLLATAGVEFEEEFLETREQYEKLQKDDCLLFGQVPLVEIDGMLLTQTRA
+ilsylaakynlygkdlkervridmyadgtqdlmmmiigapfkapqekeeslalavkraknryfpvfekil
+KDHGEAFLVGNQLSWADIQLLEAILMVEEVSAPVLSDFPLLQAFKTRISNIPTIKKFLQPGSQRKPPPDG
+HYVDVVRTVLKF 
diff --git a/seq/xurtg.aa b/seq/xurtg.aa
new file mode 100644
index 0000000..84c3414
--- /dev/null
+++ b/seq/xurtg.aa
@@ -0,0 +1,5 @@
+>XURTG glutathione transferase (EC 2.5.1.18) Ya - rat
+MSGKPVLHYFNARGRMECIRWLLAAAGVEFDEKFIQSPEDLEKLKKDGNLMFDQVPMVEIDGMKLAQTRA
+ILNYIATKYDLYGKDMKERALIDMYTEGILDLTEMIMQLVICPPDQKEAKTALAKDRTKNRYLPAFEKVL
+KSHGQDYLVGNRLTRVDIHLLELLLYVEEFDASLLTSFPLLKAFKSRISSLPNVKKFLQPGSQRKLPMDA
+KQIEEARKIFKF 
diff --git a/sql/README b/sql/README
new file mode 100644
index 0000000..d0bede0
--- /dev/null
+++ b/sql/README
@@ -0,0 +1,26 @@
+
+22-Jan-2014
+
+fasta36/sql
+
+================
+Perl and SQL for creating and searching mySQL and pgSQL sequence
+databases.
+
+See A. J. Mackey and W. R. Pearson (2004) "Using SQL databases for
+sequence similarity searching and analysis" Current Protocols in
+Bioinformatics L. Stein, Ed. Wiley, New York, pp 9.4.1-9.4.25
+
+create_seq_demo.sql
+mysql_demo1.sql   -- SQL file for fasta36 format 16 to search seqdb_demo databse
+mysql_demo_pv.sql -- SQL file for searching 50000 Swissprot sequences using database format 16 (mysql)
+		     fasta36 ../seq/mgstm1.aa "../sql/myql_demo_pv.sql 16"
+nr_to_sql.pl	  -- perl script to create mysql seqdb_demo sequence database
+pirpsd.sql        -- SQL file for fasta36 format 16 to search custom PIR database (obsolete)
+psql_demo1.sql    -- pgSQL version of mysql_demo1.sql
+psql_demo_pv.sql  -- pgSQL version of mysql_demo_pv.sql
+
+================
+alternate expansion strategy
+
+join_up50.pl      -- more sophisticated way to generate expanded sequences using SQL
diff --git a/sql/create_seq_demo.sql b/sql/create_seq_demo.sql
new file mode 100644
index 0000000..35188a0
--- /dev/null
+++ b/sql/create_seq_demo.sql
@@ -0,0 +1,30 @@
+
+DROP DATABASE seq_demo;
+CREATE DATABASE seq_demo;
+
+USE seq_demo;
+
+CREATE TABLE prot (
+id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
+seq TEXT NOT NULL,
+bin BLOB NOT NULL,
+len INT UNSIGNED NOT NULL
+);
+
+CREATE TABLE annot (
+prot_id INT UNSIGNED NOT NULL,
+gi INT UNSIGNED NOT NULL PRIMARY KEY,
+db ENUM("gb","emb","dbj","prf","ref","pdb","pir","sp") NOT NULL,
+descr TEXT NOT NULL,
+
+INDEX (prot_id),
+INDEX (db)
+);
+
+CREATE TABLE sp (
+    gi INT UNSIGNED NOT NULL,
+    acc   VARCHAR(10),
+    name  VARCHAR(10),
+
+    PRIMARY KEY (gi)
+);
diff --git a/sql/join_up50.pl b/sql/join_up50.pl
new file mode 100755
index 0000000..e0e5145
--- /dev/null
+++ b/sql/join_up50.pl
@@ -0,0 +1,99 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2004, 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+# usage - join_up50.pl link_acc_XYZ12 > "link_acc_XYZ12.sql 16" file
+#
+# this program is designed to read a link_acc file from fasta36 -e "expand.sh" and
+# (1) extract the uniref accessions
+# (2) build a mySQL fasta_tmp table using those accessions
+# (3) write out a link_mysql.sql file that can be used to produce the
+#     sequences using format 16
+#
+
+use strict;
+use DBI;
+
+my $in_file = $ARGV[0];
+open (FIN, $in_file) || die "cannot open $in_file\n";
+
+my $db = 'uniprot';
+my $db_tmp = 'fasta_tmp';
+
+my $host='host';
+my $user = 'user';
+my $password = 'password';
+
+my $dbh = DBI->connect("dbi:mysql:host=xdb2:$db",
+		       $user, $password,
+                       { RaiseError => 1, AutoCommit => 1}
+    ) or die $DBI::errstr;
+
+my $dbh_tmp = DBI->connect("dbi:mysql:host=xdb2:$db_tmp",
+			   $user, $password,
+			   { RaiseError => 1, AutoCommit => 1}
+    ) or die $DBI::errstr;
+
+my %up_sth = ( 
+    ur50_to_upacc => "SELECT uniprot_acc FROM  uniref50link WHERE uniref50_acc=?",
+    upacc_to_seq => "SELECT * FROM  trFull WHERE acc=?",
+    );
+
+
+for my $sth (keys(%up_sth)) {
+  $up_sth{$sth} = $dbh->prepare($up_sth{$sth});
+}
+
+my %q_acc_uniq = ();
+
+while (my $line = <FIN>) {
+  next if ($line =~ m/^UniRef50_UPI/);
+  chomp($line);
+  my ($descr, $score) = split(/\t/,$line);
+
+  my ($acc) = ($descr =~ m/UniRef\d+_(\w+)/i);
+  $q_acc_uniq{$acc} = 1;
+}
+close FIN;
+
+$dbh->disconnect();
+
+# now we have a hash of unique accessions
+# make a table and put them in
+
+$dbh_tmp->do(qq{DROP TABLE IF EXISTS $in_file;}) or die $DBI::errstr;
+$dbh_tmp->do(qq{ CREATE TABLE $in_file ( acc CHAR(10) PRIMARY KEY );})
+    or die $DBI::errstr;
+
+my @acc_list = sort keys(%q_acc_uniq);
+
+my $sql_insert = "INSERT INTO $in_file (acc) VALUES (\"" . join(q/"),("/, at acc_list) . "\");\n" ;
+$dbh_tmp->do($sql_insert);
+$dbh_tmp->disconnect();
+
+# now the $in_file table should be full; write out the SQL join to produce the sequences we need.
+
+print "$host $db $user $password;\n";
+print qq{SELECT up.acc, protein.seq
+ FROM $db_tmp.$in_file AS fa_acc JOIN uniref50link on(fa_acc.acc=uniref50_acc)
+ JOIN annot AS up ON(uniprot_acc = up.acc AND fa_acc.acc != up.acc) JOIN protein USING(prot_id);
+SELECT acc, concat('up|',acc,'|',name,' ',descr) FROM annot WHERE acc='#';
+SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
+ WHERE annot.acc='#';
+DROP TABLE $db_tmp.$in_file;
+};
diff --git a/sql/mysql_demo1.sql b/sql/mysql_demo1.sql
new file mode 100644
index 0000000..07fa46d
--- /dev/null
+++ b/sql/mysql_demo1.sql
@@ -0,0 +1,6 @@
+seqdb_host seqdb_demo seqdb_user password;
+SELECT acc, protein.seq, sp_name
+ FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp' LIMIT 50000;
+SELECT acc, concat('sp|',acc,'|',sp_name,' ',descr) FROM annot WHERE acc='#' AND db='sp';
+SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
+ WHERE annot.acc='#' AND db='sp';
diff --git a/sql/mysql_demo_pv.sql b/sql/mysql_demo_pv.sql
new file mode 100644
index 0000000..70dcff3
--- /dev/null
+++ b/sql/mysql_demo_pv.sql
@@ -0,0 +1,6 @@
+seqdb_host seqdb_demo seqdb_user password;
+SELECT acc, protein.seq, sp_name, concat('sp|',acc,'|',sp_name,' ',descr)
+ FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp' LIMIT 50000;
+SELECT acc, concat('sp|',acc,'|',sp_name,' ',descr) FROM annot WHERE acc='#' AND db='sp';
+SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
+ WHERE annot.acc='#' AND db='sp';
diff --git a/sql/nr_to_sql.pl b/sql/nr_to_sql.pl
new file mode 100755
index 0000000..d64d897
--- /dev/null
+++ b/sql/nr_to_sql.pl
@@ -0,0 +1,103 @@
+#!/usr/bin/perl -w
+
+################################################################
+# copyright (c) 2004, 2014 by William R. Pearson and The Rector &
+# Visitors of the University of Virginia */
+################################################################
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under this License is distributed on an "AS
+# IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied.  See the License for the specific language
+# governing permissions and limitations under the License. 
+################################################################
+
+use DBI;
+
+$SIG{__WARN__} = sub { die @_ };
+
+my $mysql = DBI->connect("DBI:mysql:database=seq_demo;user=seq_demo;password=demo_pass");
+
+$mysql->do(q{LOCK TABLES prot WRITE,
+	     annot WRITE,
+	     sp WRITE });
+
+my $EL = 125;
+my $NA = 123;
+
+my @aatrans = ($EL,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$EL,$NA,$NA,$EL,$NA,$NA,
+	       $NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,
+	       $NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA, 24,$NA,$NA,$NA,$NA,$NA,
+	       $NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,
+	       $NA,  1, 21,  5,  4,  7, 14,  8,  9, 10,$NA, 12, 11, 13,  3,$NA,
+	        15,  6,  2, 16, 17,$NA, 20, 18, 23, 19, 22,$NA,$NA,$NA,$NA,$NA,
+	       $NA,  1, 21,  5,  4,  7, 14,  8,  9, 10,$NA, 12, 11, 13,  3,$NA,
+	        15,  6,  2, 16, 17,$NA, 20, 18, 23, 19, 22,$NA,$NA,$NA,$NA,$NA
+	      );
+
+my $ins_prot = $mysql->prepare(q{
+    INSERT INTO prot (seq,bin,len) VALUES (?, ?, ?)
+    });
+
+my $ins_annot = $mysql->prepare(q{
+    INSERT INTO annot (gi, prot_id, db, descr) VALUES (?, ?, ?, ?)
+    });
+
+my $ins_sp = $mysql->prepare(q{
+    INSERT INTO sp (gi, acc, name) VALUES (?, ?, ?)
+    });
+
+use vars qw( $seq $bin $tot_seq $tot_annot $tot_sp );
+use vars qw( $gi $prot_id $db $desc $sp_acc $sp_name );
+use vars qw( $header $seq @entries );
+use vars qw( $gi $db $db_acc $db_name $desc);
+
+$tot_seq = $tot_annot = $tot_sp = 0;
+
+for my $db_file ( @ARGV ) {
+    open(DATA, "<$db_file") or die $!;
+    local $/ = "\n>";
+    while (<DATA>) {
+	chomp; # remove trailing "\n>" record header
+	($header, $seq) = $_ =~ m/^>?  # record separator (first entry)
+	    ( [^\n]* ) \n  # header line
+		(     .* )     # the sequence
+		    /osx; # optimize, multiline, commented
+	
+	$seq =~ s/\W|\d//sg;
+	$bin = pack('C*', map { $aatrans[unpack('C', $_)] } split(//, $seq));
+	$ins_prot->execute($seq,$bin,length($seq));
+	$prot_id = $ins_prot->{mysql_insertid};
+
+	$tot_seq++;
+
+#	print STDERR "Inserted $prot_id: ". length($seq)."\n";
+
+	@entries = split(/\001/, $header);
+
+	for ( @entries ) {
+	    ($gi,$db,$db_acc,$db_name,$desc)= 
+		$_ =~ /^gi\|(\d+)\|([a-z]+)\|(\S*)\|(\S*) (.*)$/o;
+#	    print "$prot_id: $gi\t$db\t$db_acc\t$desc\n";
+	    $ins_annot->execute($gi,$prot_id,$db,$desc);
+
+	    $tot_annot++;
+
+	    if ($db eq "sp") {
+		$ins_sp->execute($gi,$db_acc,$db_name);
+		$tot_sp++;
+	    }
+	}
+    }
+    close(DATA);
+}
+
+print "Inserted $tot_seq sequences; $tot_annot annotations; $tot_sp swissprot\n";
+
+
+
diff --git a/sql/pirpsd.sql b/sql/pirpsd.sql
new file mode 100644
index 0000000..8507733
--- /dev/null
+++ b/sql/pirpsd.sql
@@ -0,0 +1,8 @@
+xdb.wrplab PIRPSD seq_demo demo_pass;
+SELECT PIRID, SEQUENCES, PIRID
+ FROM c_psdsequence;
+SELECT PIRID, concat(PIRID," ",TITLE) FROM c_psdmain
+ WHERE PIRID='#';
+SELECT PIRID, SEQUENCES, PIRID
+ FROM c_psdsequence
+ WHERE PIRID='#';
diff --git a/sql/psql_demo.sql b/sql/psql_demo.sql
new file mode 100644
index 0000000..8f2ec58
--- /dev/null
+++ b/sql/psql_demo.sql
@@ -0,0 +1,7 @@
+@ seqdb_demo seqdb_demo @;
+SELECT acc, protein.seq, sp_name
+ FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp';
+SELECT acc, 'sp|'||acc||'|'||sp_name||' '||descr FROM annot WHERE acc='#' AND db='sp';
+SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
+ WHERE annot.acc='#' AND db='sp';
+
diff --git a/sql/psql_demo1.sql b/sql/psql_demo1.sql
new file mode 100644
index 0000000..a80e38a
--- /dev/null
+++ b/sql/psql_demo1.sql
@@ -0,0 +1,6 @@
+seqdb_host seqdb_demo seqdb_user password;
+SELECT acc, protein.seq, 'sp|'||acc||'|'||sp_name||' '||descr
+ FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp' LIMIT 50000;
+SELECT acc, 'sp|'||acc||'|'||sp_name||' '||descr FROM annot WHERE acc='#' AND db='sp';
+SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
+ WHERE annot.acc='#' AND db='sp';
diff --git a/sql/psql_demo_pv.sql b/sql/psql_demo_pv.sql
new file mode 100644
index 0000000..1b6a5ec
--- /dev/null
+++ b/sql/psql_demo_pv.sql
@@ -0,0 +1,7 @@
+seqdb_host seqdb_demo seqdb_user password;
+SELECT acc, protein.seq, 'sp|'||acc||'|'||sp_name||' '||descr
+ FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp' LIMIT 50000;
+SELECT acc, descr FROM annot WHERE acc='#' AND db='sp';
+SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
+ WHERE annot.acc='#' AND db='sp';
+
diff --git a/src/a_mark.h b/src/a_mark.h
new file mode 100644
index 0000000..9396f34
--- /dev/null
+++ b/src/a_mark.h
@@ -0,0 +1,51 @@
+/* a_mark.h - symbols used to indicate match/mismatch alignment code */
+
+/* $Id: a_mark.h 1024 2012-08-07 18:08:45Z wrp $ */
+
+/* copyright (c) 2003 by William R. Pearson and The Rector & Vistors
+   of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* character types in aln_code strings */
+#define M_BLANK 0
+#define M_NEG 1
+#define M_ZERO 2
+#define M_POS 3
+#define M_IDENT 4
+#define M_DEL 5
+
+#define MX_A0 0
+#define MX_A1 1
+#define MX_A2 2
+#ifdef M10_CONS_L
+#define MX_A10 3
+#else
+#define MX_A10 4
+#endif
+#define MX_ACC 5
+#define MX_ABLAST 6
+
+static char *
+aln_map_sym[] = {"  ..: ",	/* 0 */
+		 " Xxx  ",	/* 1 */
+		 "    . ",	/* 2 */
+		 " mzp=-",	/* 3: 10a */
+		 "  ..:-",	/* 4: 10b */
+		 " <z>=-",	/* 5: calc_code */
+		 "   += "	/* 6: MX_MBLAST blast */
+		  };
+
+
+
diff --git a/src/aamap.h b/src/aamap.h
new file mode 100644
index 0000000..019d70f
--- /dev/null
+++ b/src/aamap.h
@@ -0,0 +1,17 @@
+
+/*	aamap.gbl	character and number translations */
+
+/* $Id: aamap.h 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+char aacmap[64]={
+	'K','N','K','N','T','T','T','T','R','S','R','S','I','I','M','I',
+	'Q','H','Q','H','P','P','P','P','R','R','R','R','L','L','L','L',
+	'E','D','E','D','A','A','A','A','G','G','G','G','V','V','V','V',
+	'X','Y','X','Y','S','S','S','S','X','C','W','C','L','F','L','F'
+	};
+
+int aamap[64];	/* integer aa values */
+int aamapr[64]; /* reverse sequence map */
+
+
diff --git a/src/ag_stats.c b/src/ag_stats.c
new file mode 100644
index 0000000..ff7d3cc
--- /dev/null
+++ b/src/ag_stats.c
@@ -0,0 +1,129 @@
+/* $Id: ag_stats.c $ */
+
+/* this procedure implements Altschul's pre-calculated values for lambda, K */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "alt_parms.h"
+
+static double K, Lambda, H;
+extern int
+look_p(struct alt_p parm[], int gap, int ext, 
+       double *K, double *Lambda, double *H);
+
+int
+ag_parm(char *pam_type, int gdelval, int ggapval)
+{
+  int r_v, t_gdelval, t_ggapval;
+
+#ifdef OLD_FASTA_GAP
+  t_gdelval = gdelval;
+  t_ggapval = ggapval;
+#else
+  t_gdelval = gdelval+ggapval;
+  t_ggapval = ggapval;
+#endif
+
+  if (strcmp(pam_type,"BL50")==0 || strcmp(pam_type,"BLOSUM50")==0)
+      r_v = look_p(bl50_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_type,"BL62")==0 || strcmp(pam_type,"BLOSUM62")==0)
+      r_v = look_p(bl62_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_type,"P250")==0)
+      r_v = look_p(p250_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_type,"P120")==0)
+      r_v = look_p(p120_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_type,"MD10")==0)
+      r_v = look_p(md10_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_type,"MD20")==0)
+      r_v = look_p(md20_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_type,"MD40")==0)
+      r_v = look_p(md40_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_type,"DNA")==0 || strcmp(pam_type,"+5/-4")==0)
+      r_v = look_p(nt54_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+  else r_v = 0;
+
+  return r_v;
+}
+
+int
+look_p(struct alt_p parm[], int gap, int ext,
+       double *K, double *Lambda, double *H)
+{
+  int i;
+
+  gap = -gap;
+  ext = -ext;
+
+  if (gap > parm[1].gap) {
+    *K = parm[0].K;
+    *Lambda = parm[0].Lambda;
+    *H = parm[0].H;
+    return 1;
+  }
+
+  for (i=1; parm[i].gap > 0; i++) {
+    if (parm[i].gap > gap) continue;
+    else if (parm[i].gap == gap && parm[i].ext > ext ) continue;
+    else if (parm[i].gap == gap && parm[i].ext == ext) {
+      *K = parm[i].K;
+      *Lambda = parm[i].Lambda;
+      *H = parm[i].H;
+      return 1;
+    }
+    else break;
+  }
+  return 0;
+}
+
+int E1_to_s(double e_val, int n0, int n1) {
+  double mp, np, a_n0, a_n0f, a_n1, a_n1f, u;
+  int score;
+
+  a_n0 = (double)n0;
+  a_n0f = log(a_n0)/H;
+
+  a_n1 = (double)n1;
+  a_n1f = log(a_n1)/H;
+
+  mp = a_n0 - a_n0f - a_n1f;
+  np = a_n1 - a_n0f - a_n1f;
+
+  if (np < 1.0) np = 1.0;
+  if (mp < 1.0) mp = 1.0;
+
+  /*
+  e_val = K * np * mp * exp ( - Lambda * score);
+  log(e_val) = log(K np mp) - Lambda * score;
+  (log(K np mp)-log(e_val)) / Lambda = score;
+  */
+  score = (int)((log( K * mp * np) - log(e_val))/Lambda +0.5);
+  if (score < 0) score = 0;
+  return score;
+}
+
+double s_to_E4(int score, int n0, int  n1)
+{
+  double p_val;
+  double mp, np, a_n0, a_n0f, a_n1, a_n1f, u;
+  
+  a_n0 = (double)n0;
+  a_n0f = log(a_n0)/H;
+
+  a_n1 = (double)n1;
+  a_n1f = log(a_n1)/H;
+
+  mp = a_n0 - a_n0f - a_n1f;
+  np = a_n1 - a_n0f - a_n1f;
+
+  if (np < 1.0) np = 1.0;
+  if (mp < 1.0) mp = 1.0;
+
+  p_val = K * np * mp * exp ( - Lambda * score);
+
+  if (p_val > 0.01) p_val = 1.0 - exp(-p_val);
+
+  return p_val * 10000.0;
+}
+
diff --git a/src/aln_structs.h b/src/aln_structs.h
new file mode 100644
index 0000000..9c69711
--- /dev/null
+++ b/src/aln_structs.h
@@ -0,0 +1,61 @@
+/* $Id: aln_structs.h 1139 2013-04-16 01:00:09Z wrp $ */
+
+#ifndef A_STRUCT
+#define A_STRUCT
+
+struct a_struct {
+  int smin0;		/* coordinate of display start in seqc0 */
+  int smin1;		/* coordinate of display start in seqc1 */
+  int amin0, amax0;	/* coordinate of alignment start in seqc0 */
+  int amin1, amax1;	/* coordinate of alignment start in seqc1 */
+  int calc_last_set; 	/* boolean that indicates structure was set by
+			   calc_code, calc_cons, etc */
+
+  int llen;
+  int llcntx, llcntx_set, showall;
+
+  int qlrev, qlfact;
+  int llrev, llfact, llmult;
+  int frame;
+
+  int nident, nsim, npos, nmismatch, lc, ngap_q, ngap_l, nfs;	/* number of identities, gaps in q, l */
+  long q_start_off, q_end_off;	/* used in -m 9 calculate for full segment offset */
+  long l_start_off, l_end_off;	
+  long q_offset, l_offset;	/* offsets that include everything */
+  long d_start0,d_stop0;
+  long d_start1,d_stop1;
+};
+
+struct annot_var_str {
+  long v_pos;
+  unsigned char o_res;
+  unsigned char v_res;
+};
+
+struct a_res_str {
+  int sw_score;		/* do_walign() score */
+  struct rstruct rst;
+  int score_delta;	/* variant score change */
+  int min0, max0;	/* boundaries of alignment in aa0 */
+  int min1, max1;	/* boundaries of alignment in aa1 */
+  int v_start, v_len;	/* virtual start, length */
+  int *res;		/* encoded alignment */
+  int nres;		/* length of decoded alignment */
+  int mres;		/* length of encoding in res[] */
+  int n1;		/* length of library sequence used for this (sub) alignment */
+  struct a_res_str *next;	/* pointer to next alignment */
+
+  int index;		/* position in a_res chain */
+  /* encoded alignment/annotation information */
+  char *aln_code;
+  int aln_code_n;
+
+  char *annot_code;	/* annotation info written by calc_code() */
+  int annot_code_n;
+
+  char *annot_var_s;	/* annotation info written by calc_cons_a() */
+  char *annot_var_id; 	/* annotation info written by calc_id() */
+  char *annot_var_idd; 	/* annotation info written by calc_idd() */
+  struct a_struct aln;
+};
+#endif
diff --git a/src/alt_parms.h b/src/alt_parms.h
new file mode 100644
index 0000000..055025a
--- /dev/null
+++ b/src/alt_parms.h
@@ -0,0 +1,399 @@
+/* tables of Altschul-Gish parameters */
+
+/* $Id: alt_parms.h 625 2011-03-23 17:21:38Z wrp $ */
+
+/* first entry must be for (inf,inf) penalty */
+
+struct alt_p {
+  int gap;
+  int ext;
+  float Lambda;
+  float K;
+  float H;
+};
+
+/* BL80 1/2 bit */
+struct alt_p bl80_p[] = {
+  {0, 0, 0.343, 0.177, 0.66},
+  {14, 2, 0.336, 0.150, 0.62},
+  {12, 2, 0.328, 0.130, 0.54},
+  {12, 1, 0.314, 0.096, 0.41},
+  {11, 2, 0.320, 0.110, 0.51},
+  {11, 1, 0.296, 0.066, 0.36},
+  {10, 2, 0.311, 0.097, 0.46},
+  {10, 1, 0.282, 0.052, 0.29},
+  { 9, 2, 0.292, 0.069, 0.33},
+  { 9, 1, 0.248, 0.026, 0.18},
+  { 8, 2, 0.271, 0.050, 0.27},
+  { 8, 1, 0.189, 0.0071, 0.07}
+};
+
+/* BL62 1/2 bit */
+struct alt_p bl62_p[] = {
+  {0, 0, 0.318, 0.13, 0.40},
+  {12, 3, 0.305, 0.10, 0.38},
+  {12, 2, 0.300, 0.09, 0.34},
+  {12, 1, 0.275, 0.05, 0.25},
+  {11, 3, 0.301, 0.09, 0.36},
+  {11, 2, 0.286, 0.07, 0.29},
+  {11, 1, 0.255, 0.035, 0.19},
+  {10, 4, 0.293, 0.08, 0.33},
+  {10, 3, 0.281, 0.06, 0.29},
+  {10, 2, 0.266, 0.04, 0.24},
+  {10, 1, 0.216, 0.014, 0.12},
+  {9, 5, 0.286, 0.08, 0.29},
+  {9, 4, 0.273, 0.06, 0.25},
+  {9, 4, 0.273, 0.06, 0.25},
+  {9, 2, 0.244, 0.030, 0.18},
+  {9, 1, 0.176, 0.008, 0.06},
+  {8, 8, 0.270, 0.06, 0.25},
+  {8, 7, 0.270, 0.06, 0.25},
+  {8, 6, 0.262, 0.05, 0.23},
+  {8, 5, 0.262, 0.05, 0.23},
+  {8, 4, 0.262, 0.05, 0.23},
+  {8, 3, 0.243, 0.035, 0.18},
+  {8, 2, 0.215, 0.021, 0.12},
+  {7, 7, 0.247, 0.05, 0.18},
+  {7, 6, 0.247, 0.05, 0.18},
+  {7, 5, 0.230, 0.030, 0.15},
+  {7, 4, 0.230, 0.030, 0.15},
+  {7, 3, 0.208, 0.021, 0.11},
+  {7, 2, 0.164, 0.009, 0.06},
+  {6, 6, 0.200, 0.021, 0.10},
+  {6, 5, 0.200, 0.021, 0.10},
+  {6, 4, 0.179, 0.014, 0.08},
+  {6, 3, 0.153, 0.010, 0.05},
+  {5, 5, 0.131, 0.009, 0.04},
+  {-1, -1, -1.0, -1.0, -1.0},
+};
+
+/* BL50 1/3 bit */
+
+struct alt_p bl50_p[] = {
+  {0, 0, 0.232, 0.11, 0.34},
+  {16, 4, 0.222, 0.08, 0.31},
+  {16, 3, 0.213, 0.06, 0.27},
+  {16, 2, 0.207, 0.05, 0.24},
+  {16, 1, 0.180, 0.024, 0.15},
+  {15, 8, 0.222, 0.09, 0.31},
+  {15, 7, 0.219, 0.08, 0.29},
+  {15, 6, 0.219, 0.08, 0.29},
+  {15, 5, 0.216, 0.07, 0.28},
+  {15, 4, 0.216, 0.07, 0.28},
+  {15, 3, 0.210, 0.06, 0.25},
+  {15, 2, 0.202, 0.05, 0.22},
+  {15, 1, 0.166, 0.018, 0.11},
+  {14, 8, 0.218, 0.08, 0.29},
+  {14, 7, 0.214, 0.07, 0.27},
+  {14, 6, 0.214, 0.07, 0.27},
+  {14, 5, 0.214, 0.07, 0.27},
+  {14, 4, 0.205, 0.05, 0.24},
+  {14, 3, 0.201, 0.05, 0.22},
+  {14, 2, 0.188, 0.034, 0.17},
+  {14, 1, 0.140, 0.009, 0.07},
+  {13, 8, 0.211, 0.06, 0.27},
+  {13, 7, 0.205, 0.05, 0.24},
+  {13, 6, 0.205, 0.05, 0.24},
+  {13, 5, 0.205, 0.05, 0.24},
+  {13, 4, 0.202, 0.05, 0.22},
+  {13, 3, 0.188, 0.034, 0.18},
+  {13, 2, 0.174, 0.025, 0.13},
+  {13, 1, 0.114, 0.006, 0.04},
+  {12, 7, 0.205, 0.06, 0.24},
+  {12, 6, 0.197, 0.05, 0.21},
+  {12, 5, 0.197, 0.05, 0.21},
+  {12, 4, 0.192, 0.04, 0.18},
+  {12, 3, 0.178, 0.028, 0.15},
+  {12, 2, 0.158, 0.019, 0.10},
+  {11, 8, 0.197, 0.05, 0.21},
+  {11, 7, 0.190, 0.04, 0.19},
+  {11, 6, 0.190, 0.04, 0.19},
+  {11, 5, 0.184, 0.04, 0.17},
+  {11, 4, 0.177, 0.031, 0.15},
+  {11, 3, 0.167, 0.028, 0.11},
+  {11, 2, 0.130, 0.009, 0.06},
+  {10, 8, 0.183, 0.04, 0.17},
+  {10, 7, 0.178, 0.035, 0.16},
+  {10, 6, 0.178, 0.035, 0.16},
+  {10, 5, 0.168, 0.026, 0.13},
+  {10, 4, 0.156, 0.020, 0.10},
+  {10, 3, 0.139, 0.013, 0.07},
+  {10, 2, 0.099, 0.007, 0.03},
+  {9, 7, 0.164, 0.029, 0.13},
+  {9, 6, 0.152, 0.021, 0.10},
+  {9, 5, 0.152, 0.021, 0.10},
+  {9, 4, 0.134, 0.014, 0.07},
+  {9, 3, 0.107, 0.008, 0.04},
+  {8, 8, 0.139, 0.017, 0.08},
+  {8, 7, 0.134, 0.015, 0.07},
+  {8, 6, 0.127, 0.013, 0.06},
+  {8, 5, 0.117, 0.011, 0.05},
+  {8, 4, 0.101, 0.009, 0.03},
+  {7, 7, 0.100, 0.010, 0.04},
+  {7, 6, 0.094, 0.010, 0.03},
+  {-1, -1, -1.0, -1.0, -1.0},
+};
+
+struct alt_p p250_p[] = {
+  {0, 0, 0.229, 0.09, 0.23},
+  {16, 4, 0.217, 0.07, 0.21},
+  {16, 3, 0.208, 0.05, 0.18},
+  {16, 2, 0.200, 0.04, 0.16},
+  {16, 1, 0.172, 0.018, 0.09},
+  {15, 5, 0.215, 0.06, 0.20},
+  {15, 4, 0.208, 0.05, 0.18},
+  {15, 3, 0.203, 0.04, 0.16},
+  {15, 2, 0.193, 0.035, 0.14},
+  {15, 1, 0.154, 0.012, 0.07},
+  {14, 6, 0.212, 0.06, 0.19},
+  {14, 5, 0.204, 0.05, 0.17},
+  {14, 4, 0.204, 0.05, 0.17},
+  {14, 3, 0.194, 0.035, 0.14},
+  {14, 2, 0.180, 0.025, 0.11},
+  {14, 1, 0.131, 0.008, 0.04},
+  {13, 6, 0.206, 0.06, 0.17},
+  {13, 5, 0.196, 0.04, 0.14},
+  {13, 4, 0.196, 0.04, 0.14},
+  {13, 3, 0.184, 0.029, 0.12},
+  {13, 2, 0.163, 0.016, 0.08},
+  {13, 1, 0.110, 0.008, 0.03},
+  {12, 7, 0.199, 0.05, 0.15},
+  {12, 6, 0.191, 0.04, 0.13},
+  {12, 5, 0.191, 0.04, 0.13},
+  {12, 4, 0.181, 0.029, 0.12},
+  {12, 3, 0.170, 0.022, 0.10},
+  {12, 2, 0.145, 0.012, 0.06},
+  {11, 7, 0.186, 0.04, 0.13},
+  {11, 6, 0.180, 0.034, 0.11},
+  {11, 5, 0.180, 0.034, 0.11},
+  {11, 4, 0.165, 0.021, 0.09},
+  {11, 3, 0.153, 0.017, 0.07},
+  {11, 2, 0.122, 0.009, 0.04},
+  {10, 8, 0.175, 0.031, 0.11},
+  {10, 7, 0.171, 0.029, 0.10},
+  {10, 6, 0.165, 0.024, 0.09},
+  {10, 5, 0.158, 0.020, 0.08},
+  {10, 4, 0.148, 0.017, 0.07},
+  {10, 3, 0.129, 0.012, 0.05},
+  {9, 7, 0.151, 0.020, 0.07},
+  {9, 6, 0.146, 0.019, 0.06},
+  {9, 5, 0.137, 0.015, 0.05},
+  {9, 4, 0.121, 0.011, 0.04},
+  {9, 3, 0.102, 0.010, 0.03},
+  {8, 8, 0.123, 0.014, 0.05},
+  {8, 7, 0.123, 0.014, 0.05},
+  {8, 6, 0.115, 0.012, 0.04},
+  {8, 5, 0.107, 0.011, 0.03},
+  {7, 7, 0.090, 0.014, 0.02},
+  {-1, -1, -1.0, -1.0, -1.0},
+};
+
+struct alt_p p120_p[] = {
+  {0, 0, 0.342, 0.19, 0.63},
+  {12, 4, 0.334, 0.14, 0.60},
+  {12, 3, 0.330, 0.13, 0.57},
+  {12, 2, 0.330, 0.13, 0.57},
+  {12, 1, 0.219, 0.11, 0.46},
+  {11, 3, 0.330, 0.13, 0.57},
+  {11, 2, 0.323, 0.12, 0.51},
+  {11, 1, 0.296, 0.06, 0.38},
+  {10, 5, 0.323, 0.12, 0.54},
+  {10, 4, 0.314, 0.09, 0.50},
+  {10, 3, 0.314, 0.09, 0.50},
+  {10, 2, 0.301, 0.07, 0.42},
+  {10, 1, 0.273, 0.04, 0.28},
+  {9, 5, 0.316, 0.11, 0.49},
+  {9, 4, 0.311, 0.10, 0.45},
+  {9, 3, 0.311, 0.10, 0.45},
+  {9, 2, 0.284, 0.05, 0.35},
+  {9, 1, 0.239, 0.023, 0.18},
+  {8, 6, 0.307, 0.10, 0.43},
+  {8, 5, 0.295, 0.08, 0.39},
+  {8, 4, 0.295, 0.08, 0.39},
+  {8, 3, 0.284, 0.06, 0.34},
+  {8, 2, 0.262, 0.04, 0.26},
+  {8, 1, 0.183, 0.009, 0.08},
+  {7, 7, 0.286, 0.08, 0.34},
+  {7, 6, 0.286, 0.08, 0.34},
+  {7, 5, 0.276, 0.06, 0.31},
+  {7, 4, 0.276, 0.06, 0.31},
+  {7, 3, 0.255, 0.04, 0.24},
+  {7, 2, 0.224, 0.023, 0.16},
+  {6, 6, 0.248, 0.04, 0.23},
+  {6, 5, 0.248, 0.04, 0.23},
+  {6, 4, 0.234, 0.033, 0.19},
+  {6, 3, 0.216, 0.025, 0.15},
+  {6, 2, 0.160, 0.009, 0.06},
+  {5, 5, 0.191, 0.019, 0.11},
+  {5, 4, 0.173, 0.013, 0.09},
+  {5, 3, 0.134, 0.006, 0.05},
+  {-1, -1, -1.0, -1.0, -1.0}
+};
+
+struct alt_p bl55_p[] = {
+  {0, 0, 0.224, 0.12, 0.36},
+  {16, 4, 0.213, 0.08, 0.32},
+  {16, 3, 0.205, 0.07, 0.28},
+  {16, 2, 0.198, 0.06, 0.23},
+  {16, 1, 0.164, 0.020, 0.12},
+  {15, 8, 0.212, 0.09, 0.31},
+  {15, 7, 0.209, 0.08, 0.30},
+  {15, 6, 0.209, 0.08, 0.30},
+  {15, 5, 0.205, 0.07, 0.28},
+  {15, 4, 0.205, 0.07, 0.28},
+  {15, 3, 0.199, 0.06, 0.25},
+  {15, 2, 0.190, 0.05, 0.20},
+  {15, 1, 0.146, 0.013, 0.09},
+  {14, 7, 0.207, 0.08, 0.29},
+  {14, 6, 0.203, 0.07, 0.27},
+  {14, 5, 0.203, 0.07, 0.27},
+  {14, 4, 0.195, 0.05, 0.24},
+  {14, 3, 0.189, 0.04, 0.21},
+  {14, 2, 0.175, 0.030, 0.16},
+  {14, 1, 0.119, 0.006, 0.05},
+  {13, 8, 0.201, 0.07, 0.27},
+  {13, 7, 0.196, 0.06, 0.24},
+  {13, 6, 0.196, 0.06, 0.24},
+  {13, 5, 0.196, 0.06, 0.24},
+  {13, 4, 0.191, 0.05, 0.21},
+  {13, 3, 0.176, 0.032, 0.17},
+  {13, 2, 0.158, 0.020, 0.12},
+  {12, 8, 0.195, 0.06, 0.24},
+  {12, 7, 0.188, 0.05, 0.21},
+  {12, 6, 0.188, 0.05, 0.21},
+  {12, 5, 0.188, 0.05, 0.21},
+  {12, 4, 0.180, 0.04, 0.18},
+  {12, 3, 0.165, 0.026, 0.14},
+  {12, 2, 0.140, 0.014, 0.08},
+  {11, 8, 0.185, 0.05, 0.20},
+  {11, 7, 0.179, 0.04, 0.18},
+  {11, 6, 0.179, 0.04, 0.18},
+  {11, 5, 0.171, 0.033, 0.16},
+  {11, 4, 0.163, 0.027, 0.13},
+  {11, 3, 0.151, 0.022, 0.10},
+  {11, 2, 0.110, 0.008, 0.04},
+  {10, 10, 0.173, 0.04, 0.16},
+  {10, 9, 0.173, 0.04, 0.16},
+  {10, 8, 0.167, 0.035, 0.15},
+  {10, 7, 0.167, 0.035, 0.15},
+  {10, 6, 0.167, 0.035, 0.15},
+  {10, 5, 0.155, 0.025, 0.12},
+  {10, 4, 0.142, 0.017, 0.09},
+  {10, 3, 0.121, 0.011, 0.06},
+  {9, 9, 0.152, 0.026, 0.11},
+  {9, 8, 0.152, 0.026, 0.11},
+  {9, 7, 0.152, 0.026, 0.11},
+  {9, 6, 0.137, 0.018, 0.08},
+  {9, 5, 0.137, 0.018, 0.08},
+  {9, 4, 0.117, 0.011, 0.05},
+  {9, 3, 0.090, 0.007, 0.03},
+  {8, 8, 0.125, 0.014, 0.07},
+  {8, 7, 0.119, 0.013, 0.06},
+  {8, 6, 0.113, 0.012, 0.05},
+  {8, 5, 0.102, 0.010, 0.04},
+  {8, 4, 0.085, 0.009, 0.03},
+  {7, 7, 0.087, 0.010, 0.03},
+  {-1, -1, -1.0, -1.0, -1.0}
+};
+
+struct alt_p opt5_p[] = 
+{
+  {  0, 0, 0.1432, 0.111, 0.316},
+  { 22, 4, 0.128, 0.055, 0.16},
+  { 21, 4, 0.124, 0.044, 0.14},
+  { 20, 4, 0.121, 0.039, 0.13},
+  { 19, 4, 0.117, 0.033, 0.11},
+  { 18, 4, 0.112, 0.026, 0.09},
+  { 25, 3, 0.128, 0.051, 0.16},
+  { 24, 3, 0.124, 0.041, 0.14},
+  { 23, 3, 0.122, 0.038, 0.13},
+  { 22, 3, 0.118, 0.032, 0.11},
+  { 21, 3, 0.113, 0.025, 0.09},
+  { 29, 2, 0.129, 0.051, 0.16},
+  { 28, 2, 0.127, 0.047, 0.14},
+  { 27, 2, 0.124, 0.041, 0.13},
+  { 26, 2, 0.121, 0.035, 0.11},
+  { 25, 2, 0.117, 0.029, 0.10},
+  { 24, 2, 0.111, 0.021, 0.08},
+  { -1,-2, -1.0, -1.0, -1.0}
+};
+
+struct alt_p nt54_p[] =
+{
+  {0, 0, 0.192, 0.173, 0.36},
+  {16, 4, 0.192, 0.177, 0.36},
+  {-1, -1, -1.0, -1.0, -1.0}
+};
+
+struct alt_p rnt54_p[] =
+{
+  {0, 0, 0.192, 0.173, 0.36},
+  {16, 4, 0.192, 0.177, 0.36},
+  {-1, -1, -1.0, -1.0, -1.0}
+};
+
+struct alt_p nt32_p[] = {
+  {0,  0, 0.2712, 0.131, 0.22},
+  {18, 2, 0.2620, 0.100, 0.22},
+  {16, 4, 0.2600, 0.098, 0.22},
+  {16, 2, 0.2540, 0.081, 0.19},
+  {12, 4, 0.2340, 0.054, 0.15},
+  {-1, -1, -1.0, -1.0, -1.0}
+};
+
+struct alt_p nt13_p[] = {
+  {0,  0,  1.374, 0.711, 1.31},
+  {4,  1,  1.36,  0.67,  1.30},
+  {3,  1,  1.34,  0.58,  1.19},
+  {2,  1,  1.21,  0.34,  0.77},
+  {-1, -1, -1.0, -1.0, -1.0}
+};
+
+/* PAM-10 (1/10 Hartley ~ 1/3 bit scale) */
+
+struct alt_p md10_p[] = {
+  {0, 0, 0.2299, 0.309, 3.45},
+  {20, 4, 0.222, 0.21, 3.1},
+  {20, 2, 0.218, 0.18, 2.9},
+  {18, 4, 0.220, 0.20, 2.9},
+  {18, 2, 0.217, 0.18, 2.7},
+  {16, 4, 0.217, 0.19, 2.8},
+  {16, 2, 0.212, 0.17, 2.3},
+  {14, 4, 0.212, 0.17, 2.5},
+  {14, 2, 0.205, 0.15, 1.9},
+  {12, 4, 0.206, 0.16, 2.1},
+  {12, 2, 0.190, 0.11, 1.3},
+  {-1, -1, -1.0, -1.0, -1.0}
+};
+
+/* PAM-20 (1/10 Hartley ~ 1/3 bit scale) */
+struct alt_p md20_p[] = {
+  {0, 0, 0.230, 0.287, 2.94},
+  {20, 4, 0.221, 0.19, 2.6},
+  {20, 2, 0.219, 0.18, 2.5},
+  {18, 4, 0.220, 0.19, 2.5},
+  {18, 2, 0.218, 0.18, 2.3},
+  {16, 4, 0.218, 0.18, 2.4},
+  {16, 2, 0.213, 0.17, 2.0},
+  {14, 4, 0.213, 0.17, 2.1},
+  {14, 2, 0.204, 0.14, 1.6},
+  {12, 4, 0.207, 0.17, 1.8},
+  {12, 2, 0.187, 0.10, 1.1},
+  {-1, -1, -1.0, -1.0, -1.0}
+};
+
+/* PAM-40 (1/10 Hartley ~ 1/3 bit scale) */
+struct alt_p md40_p[] = {
+  {0,  0, 0.2293, 0.257, 2.22},
+  {20, 4, 0.225, 0.22, 2.1},
+  {20, 2, 0.222, 0.20, 1.9},
+  {18, 4, 0.224, 0.22, 2.0},
+  {18, 2, 0.220, 0.20, 1.8},
+  {16, 4, 0.219, 0.19, 1.8},
+  {16, 2, 0.212, 0.16, 1.5},
+  {14, 4, 0.211, 0.15, 1.6},
+  {14, 2, 0.199, 0.11, 1.2},
+  {12, 4, 0.203, 0.14, 1.3},
+  {12, 2, 0.177, 0.064, 0.7},
+  {-1, -1, -1.0, -1.0, -1.0}
+};
diff --git a/src/altlib.h b/src/altlib.h
new file mode 100644
index 0000000..b48e3d4
--- /dev/null
+++ b/src/altlib.h
@@ -0,0 +1,142 @@
+
+/* $Id: altlib.h 905 2012-01-30 17:33:06Z wrp $ */
+/* $Revision: 905 $  */
+
+/* #ifdef UNIX */
+/* ncbi blast 1.3 format */
+/*
+#define NCBIBL13 11
+extern int ncbl_getliba();
+extern void ncbl_ranlib();
+void ncbl_closelib();
+*/
+#define NCBIBL20 12
+/* #endif */
+
+#ifdef MYSQL_DB
+#define MYSQL_LIB 16
+#define LASTLIB MYSQL_LIB+1
+#endif
+
+#ifdef PGSQL_DB
+#define PGSQL_LIB 17
+#define LASTLIB PGSQL_LIB+1
+#endif
+
+#if !defined (LASTLIB) && defined(NCBIBL20)
+#define LASTLIB NCBIBL20+1
+#endif
+#if !defined (LASTLIB)
+#define LASTLIB 10
+#endif
+
+#define FASTA_F 0
+#define DEFAULT 0
+#define FULLGB 1
+#define UNIXPIR 2
+#define EMBLSWISS 3
+#define INTELLIG 4
+#define VMSPIR 5
+#define GCGBIN 6
+#define FASTQ 7
+#define LASTTXT 7
+#define ACC_LIST 10
+
+#include "mm_file.h"
+
+/* pearson fasta format */
+int agetlib(unsigned char *, int, char *, int, fseek_t *, int *,
+	    struct lmf_str *, long *);
+void aranlib(char *, int, fseek_t, char *, struct lmf_str *);
+/* full uncompressed GB FULLGB*/
+extern int lgetlib(unsigned char *, int, char *, int, fseek_t *, int *,
+	    struct lmf_str *, long *);
+extern void lranlib(char *, int, fseek_t, char *, struct lmf_str *);
+/* PIR UNIX protein UNIXPIR */
+extern int pgetlib(unsigned char *, int, char *, int, fseek_t *, int *,
+	    struct lmf_str *, long *);
+extern void pranlib(char *, int, fseek_t, char *, struct lmf_str *);
+/* EMBL/SWISS-PROT EMBLSWISS */
+extern int egetlib(unsigned char *, int, char *, int, fseek_t *, int *,
+	    struct lmf_str *, long *);
+extern void eranlib(char *, int, fseek_t, char *, struct lmf_str *);
+
+/* Intelligenetics INTELLIG */
+extern int igetlib(unsigned char *, int, char *, int, fseek_t *, int *,
+	    struct lmf_str *, long *);
+extern void iranlib(char *, int, fseek_t, char *, struct lmf_str *);
+/* PIR VMS format */
+extern int vgetlib(unsigned char *, int, char *, int, fseek_t *, int *,
+	    struct lmf_str *, long *);
+extern void vranlib(char *, int, fseek_t, char *, struct lmf_str *);
+/* GCG 2bit format */
+extern int gcg_getlib(unsigned char *, int, char *, int, fseek_t *, int *,
+	    struct lmf_str *, long *);
+extern void gcg_ranlib(char *, int, fseek_t, char *, struct lmf_str *);
+
+/* FASTQ format (ignoring quality scores) */
+int qgetlib(unsigned char *, int, char *, int, fseek_t *, int *,
+	    struct lmf_str *, long *);
+void qranlib(char *, int, fseek_t, char *, struct lmf_str *);
+
+#ifdef NCBIBL20
+/* ncbi blast 2.0 format */
+extern int ncbl2_getliba(unsigned char *, int, char *, int, fseek_t *, int *,
+	    struct lmf_str *, long *);
+extern void ncbl2_ranlib(char *, int, fseek_t, char *, struct lmf_str *);
+void ncbl2_closelib();
+#endif
+
+#ifdef MYSQL_DB
+extern int mysql_getlib(unsigned char *, int, char *, int, fseek_t *, int *,
+	    struct lmf_str *, long *);
+extern void mysql_ranlib(char *, int, fseek_t, char *, struct lmf_str *);
+int mysql_closelib();
+#endif
+
+int (*getliba[LASTLIB])(unsigned char *, int, char *, int, fseek_t *, int *,
+	    struct lmf_str *, long *)={
+  agetlib,lgetlib,pgetlib,egetlib,
+  igetlib,vgetlib,gcg_getlib,qgetlib,
+  agetlib,agetlib
+#ifdef UNIX
+  ,agetlib
+#ifdef NCBIBL13
+  ,ncbl_getliba
+#else
+  ,ncbl2_getliba
+#endif
+#ifdef NCBIBL20
+  ,ncbl2_getliba
+#endif
+#ifdef MYSQL_DB
+  ,agetlib
+  ,agetlib
+  ,agetlib
+  ,mysql_getlib
+#endif
+#endif
+};
+
+void (*ranliba[LASTLIB])(char *, int, fseek_t, char *, struct lmf_str *)={
+  aranlib,lranlib,pranlib,eranlib,
+  iranlib,vranlib,gcg_ranlib,qranlib,
+  aranlib,aranlib
+#ifdef UNIX
+  ,aranlib
+#ifdef NCBIBL13
+  ,ncbl_ranlib
+#else
+  ,ncbl2_ranlib
+#endif
+#ifdef NCBIBL20
+  ,ncbl2_ranlib
+#endif
+#ifdef MYSQL_DB
+  ,aranlib
+  ,aranlib
+  ,aranlib
+  ,mysql_ranlib
+#endif
+#endif
+};
diff --git a/src/apam.c b/src/apam.c
new file mode 100644
index 0000000..a4f51d6
--- /dev/null
+++ b/src/apam.c
@@ -0,0 +1,502 @@
+/* apam.c	19-June-86 */
+
+/* $Id: apam.c 1281 2014-08-21 17:32:06Z wrp $ */
+
+/* copyright (c) 1987, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/*
+	read in the alphabet and pam matrix data
+	designed for universal matcher
+
+	This version reads BLAST format (square) PAM files
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "defs.h"
+#include "param.h"
+
+#define XTERNAL
+#include "uascii.h"
+#include "upam.h"
+#undef XTERNAL
+
+extern void alloc_pam (int d1, int d2, struct pstruct *ppst);
+extern void init_altpam(struct pstruct *ppst);
+
+/* pam_opts -- modify PAM matrix (pamoff, -MS) if -MS or +off is part
+   of PAM matrix name, e.g. MD20-MS or BL50+2 
+*/
+
+void
+pam_opts(char *smstr, struct pstruct *ppst) {
+  char *bp;
+
+  ppst->pam_ms = 0;
+  ppst->pamoff = 0;
+
+  if ((bp=strchr(smstr,'-'))!=NULL) {
+    if (!strncmp(bp+1,"MS",2) || !strncmp(bp+1,"ms",2)) {
+      ppst->pam_ms = 1;
+    }
+    else {
+      ppst->pamoff=atoi(bp+1);
+    }
+    *bp = '\0';
+   }
+  else if ((bp=strchr(smstr,'+'))!=NULL) {
+    ppst->pamoff= -atoi(bp+1);
+    *bp = '\0';
+  }
+}
+
+/* modified 13-Oct-2005 to accomodate asymmetrical matrices */
+/* modified 15-Jul-2010 to ensure constant NCBIstdaa encoding */
+/* ensure that all entries in NCBIstdaa have values */
+
+int
+initpam (char *mfname, struct pstruct *ppst)
+{
+   char    line[512], *lp;
+   int     i, j, iaa, pval, p_i, p_j;
+   int l_nsq;
+   unsigned char l_sq[MAXSQ+1];
+   int ess_tmp, max_val, min_val;
+   int have_es = 0;
+   FILE   *fmat;
+
+   pam_opts(mfname, ppst);
+
+   if ((fmat = fopen (mfname, "r")) == NULL)
+   {
+      printf ("***WARNING*** cannot open scoring matrix file %s\n", mfname);
+      fprintf (stderr,"***WARNING*** cannot open scoring matrix file %s\n", mfname);
+      return 0;
+   }
+
+/* removed because redundant, and causes crash under MacOSX -- because copying on top of itself */
+/*
+   SAFE_STRNCPY (ppst->pamfile, mfname, MAX_FN);
+*/
+   SAFE_STRNCPY(ppst->pam_name, ppst->pamfile, MAX_FN);
+
+   if (ppst->pam_ms) {
+     SAFE_STRNCAT(ppst->pam_name,"-MS",MAX_FN-strlen(ppst->pam_name));
+   }
+
+   /* 
+      the size of the alphabet is determined in advance 
+   */
+   ppst->nt_align = (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA);
+
+   /* 
+     look for alphabet line, skipping the comments, alphabet ends up in line[]
+   */
+   while (fgets (line, sizeof(line), fmat) != NULL && line[0]=='#');
+
+   /* transfer the residue line into l_sq[] */
+   l_nsq = 1;
+   l_sq[0] = '\0';
+   for (i=0; i<strlen(line); i++) {
+     if (isalpha(line[i]) || line[i] == '*') {
+       l_sq[l_nsq++] = line[i];
+     }
+   }
+
+   /* if we have a DNA matrix, various defaults must be updated,
+      particularly pascii, which is used to map the residue ordering
+      in the matrix file to the residue ordering used by the
+      program */
+
+   if (l_nsq < 20) {
+     if (ppst->dnaseq <= SEQT_PROT) {
+       ppst->dnaseq = SEQT_DNA;
+     }
+     ppst->nt_align=1;
+     pascii = nascii;	/* use correct DNA mapping, NCBIstdaa by default */
+   }
+
+   /* we no-longer re-initialize sascii[], we either use NCBIstdaa
+      mapping for protein, or nascii for DNA */
+
+   /* 11-July-2014 -- need to check that alphabet is consistent with pascii */
+   /* 
+   for (i=0; i < l_nsq; i++) {
+   }
+   */
+
+   /* check for 2D pam  - if not found, allocate it */
+   if (!ppst->have_pam2) {
+     alloc_pam (MAXSQ+1, MAXSQ+1, ppst);
+     ppst->have_pam2 = 1;
+   }
+
+   max_val = -1;
+   min_val =  1;
+   ppst->pam2[0][0][0] = -BIGNUM;
+   /* make certain the [0] boundaries are -BIGNUM */
+   for (j=1; j < l_nsq; j++) {
+     p_j = pascii[l_sq[j]];
+     ppst->pam2[0][0][p_j] = ppst->pam2[0][p_j][0] = -BIGNUM;
+   }
+
+   /*  read the scoring matrix values */
+   for (iaa = 1; iaa < l_nsq; iaa++) {	/* read pam value line */
+     p_i = pascii[l_sq[iaa]];
+     if (p_i > MAXSQ) {
+       fprintf(stderr,"*** error [%s:%d] - residue character %c out of range %d\n",
+	       __FILE__, __LINE__, l_sq[iaa], p_i);
+       p_i = pascii['X'];
+     }
+     if (fgets(line,sizeof(line),fmat)==NULL) {
+       fprintf (stderr," error reading pam line: %s\n",line);
+       exit (1);
+     }
+     /*     fprintf(stderr,"%d/%d %s",iaa,nsq,line); */
+     strtok(line," \t\n");		/* skip the letter (residue) */
+
+     for (j = 1; j < l_nsq; j++) {
+       p_j = pascii[l_sq[j]];
+       lp=strtok(NULL," \t\n");		/* get the number string */
+       pval=ppst->pam2[0][p_i][p_j]=atoi(lp);	/* convert to integer */
+       if (pval > max_val) max_val = pval;
+       if (pval < min_val) min_val = pval;
+     }
+   }
+   ppst->pam_h = max_val;
+   ppst->pam_l = min_val;
+
+   if (ppst->dnaseq==0) {
+     pam_sq = apam_sq;
+     pam_sq_n = apam_sq_n;
+     init_altpam(ppst);
+   }
+   else {
+     pam_sq = npam_sq;
+     pam_sq_n = npam_sq_n;
+   }
+
+   /* is protein but do not have '*' in alphabet*/
+   p_i = pascii['*'];
+   p_j = pascii['X'];
+   if (!ppst->nt_align && strchr((char *)l_sq,'*')==NULL) {
+      /* add it */
+     for (i=0; i< l_nsq; i++) {
+       ppst->pam2[0][p_i][i] = ppst->pam2[0][p_j][i];
+       ppst->pam2[0][i][p_i] = ppst->pam2[0][i][p_j];
+     }
+   }
+
+   /* make sure that X:X is < 0 if -S */
+   if (ppst->ext_sq_set && ppst->pam2[0][p_j][p_j] >= 0) {
+     ppst->pam2[0][p_j][p_j] = -1;
+   }
+
+   fclose (fmat);
+   return 1;
+}
+
+/* make a DNA scoring from +match/-mismatch values */
+
+void mk_n_pam(int *arr,int siz, int mat, int mis)
+{
+  int i, j, k;
+  /* current default match/mismatch values */
+  int max_mat = +5;
+  int min_mis = -4;
+  float f_val, f_scale;
+  
+  f_scale = (float)(mat - mis)/(float)(max_mat - min_mis);
+  
+  k = 0;
+  for (i = 0; i<nnt-1; i++)
+    for (j = 0; j <= i; j++ ) {
+      if (arr[k] == max_mat) arr[k] = mat;
+      else if (arr[k] == min_mis) arr[k] = mis;
+      else if (arr[k] != -1) { 
+	f_val = (arr[k] - min_mis)*f_scale + 0.5;
+	arr[k] = f_val + mis;
+      }
+      k++;
+    }
+}
+
+int
+standard_pam(char *smstr, struct pstruct *ppst, int del_set, int gap_set) {
+
+  struct std_pam_str *std_pam_p;
+
+  pam_opts(smstr, ppst);
+
+  for (std_pam_p = std_pams; std_pam_p->abbrev[0]; std_pam_p++ ) {
+    if (strcmp(smstr,std_pam_p->abbrev)==0) {
+      pam = std_pam_p->pam;
+      strncpy(ppst->pam_name,std_pam_p->name,MAX_FN);
+      ppst->pam_name[MAX_FN-1]='\0';
+      if (ppst->pam_ms) {
+	strncat(ppst->pam_name,"-MS",MAX_FN-strlen(ppst->pam_name)-1);
+      }
+      ppst->pam_name[MAX_FN-1]='\0';
+#ifdef OLD_FASTA_GAP
+      if (!del_set) ppst->gdelval = std_pam_p->gdel+std_pam_p->ggap;
+#else
+      if (!del_set) ppst->gdelval = std_pam_p->gdel;
+#endif
+      if (!gap_set) ppst->ggapval = std_pam_p->ggap;
+      ppst->pamscale = std_pam_p->scale;
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/* scan through the sorted (by decreasing entropy) std_pams[] array
+   for a scoring matrix with enough entropy
+*/
+int
+min_pam_bits(int n0_eff, double bit_thresh, struct pstruct *ppst, int del_set, int gap_set) {
+  struct std_pam_str *std_pam_p;
+  int curr_pam_idx = 0;
+
+  pam_opts(ppst->pamfile, ppst);
+
+  /* get the index for the current (standard) pam file */
+  for (curr_pam_idx = 0; std_pams[curr_pam_idx].abbrev[0]; curr_pam_idx++) {
+    if (strcmp(ppst->pamfile,std_pams[curr_pam_idx].abbrev)==0) break;
+  }
+
+  /* only use matrices from the VT series */
+  for ( ; curr_pam_idx > 0 ; curr_pam_idx-- ) {
+    if ((strncmp(std_pams[curr_pam_idx].name,"VT",2)!=0) &&
+	(strcmp(ppst->pamfile, std_pams[curr_pam_idx].abbrev) != 0)) continue;
+    if (n0_eff * std_pams[curr_pam_idx].entropy >= bit_thresh) goto new_pam;
+  }
+  return 0;
+
+ new_pam:
+  std_pam_p = &std_pams[curr_pam_idx];
+
+  pam = std_pam_p->pam;
+  strncpy(ppst->pam_name,std_pam_p->name,MAX_FN);
+  if (ppst->pam_ms) {
+    strncat(ppst->pam_name,"-MS",MAX_FN-strlen(ppst->pamfile)-1);
+  }
+  ppst->pam_name[MAX_FN-1]='\0';
+#ifdef OLD_FASTA_GAP
+  if (!del_set) ppst->gdelval = std_pam_p->gdel+std_pam_p->ggap;
+#else
+  if (!del_set) ppst->gdelval = std_pam_p->gdel;
+#endif
+  if (!gap_set) ppst->ggapval = std_pam_p->ggap;
+  ppst->pamscale = std_pam_p->scale;
+  return 1;
+}
+
+/* build_xascii is only used for SEQT_UNK - it replaces the default
+   input mapping (aascii[])  with a mapping that preserves any letter
+   in either the aax[], ntx[], or othx[] alphabets, or in
+   save_str[]. othx[] was added to support letters that are mapped,
+   but are not (yet) in aax[], e.g. 'OoUu'.  Because build_xascii
+   makes a qascii[] that is all ascii characters with values > '@',
+   these values must be replaced using either aascii[] or nascii[] and
+   the initial query sequence re-coded.
+ */
+void
+build_xascii(int *qascii, char *save_str) {
+  int i, max_save;
+  int comma_val, term_val;
+  int save_arr[MAX_SSTR];
+
+  comma_val = qascii[','];
+  term_val = qascii['*'];
+
+  /* preserve special characters */
+  for (i=0; i < MAX_SSTR && save_str[i]; i++ ) {
+    save_arr[i] = qascii[save_str[i]];
+  }
+  max_save = i;
+
+  for (i=1; i<128; i++) {
+    qascii[i]=NA;
+  }
+
+  /* range of values in aax, ntx is from 1..naax,nntx - 
+     do not zero-out qascii[0] - 9 Oct 2002 */
+
+  for (i=1; i<NCBIstdaa_ext_n; i++) {
+    qascii[NCBIstdaa_ext[i]]=NCBIstdaa_ext[i];
+  }
+
+  for (i=1; i<nntx; i++) {
+    qascii[ntx[i]]=ntx[i];
+  }
+
+  /* put it letters that are not in other alphabets because they are
+     mapped -- now included in NCBIstdaa */
+  /*
+  for (i=1; i<nothx; i++) {
+    qascii[othx[i]]=othx[i];
+  }
+  */
+
+  qascii['\n'] = qascii['\r'] = EL;
+
+  qascii[','] = comma_val;
+  qascii['*'] = term_val;
+  qascii[0] = ES;
+
+  for (i=0; i < max_save; i++) {
+    qascii[save_str[i]]=save_arr[i];
+  }
+}
+
+
+/* init_ascii0 -- initializes an ascii mapping from a sequence
+ordering
+*/
+void 
+init_ascii0(int *xascii, char *sq_map, int n_sq_map, struct pstruct *ppst) {
+  int i;
+
+  /* first map everything as non-sequence */
+  for (i=0; i<128; i++) {
+    xascii[i] = NA;
+  }
+
+  /* then map the actual sequence letters */
+  for (i = 1; i < n_sq_map; i++) {
+    xascii[sq_map[i]] = i;
+    if (n_sq_map <= MAXUC) { /* only uppercase */
+      xascii[tolower(sq_map[i])] = i;	/* map lowercase */
+    }
+  }
+
+  ppst->nsq = n_sq_map;
+  for (i=1; i < n_sq_map; i++) {
+    ppst->sq[i] = sq_map[i];
+  }
+  ppst->sq[0] = 0;
+
+  /* then map the other stuff, EL  etc */
+  xascii[0] = ES;
+  xascii[10] = EL;
+  xascii[13] = EL;
+}
+
+/* init_ascii()
+
+   checks for lower case letters in *sq array;
+   if not present, map lowercase to upper
+
+*/
+void
+init_ascii(int is_ext, int *xascii, int p_nsq, int is_dna) {
+
+  int isq, have_lc;
+  char *sq, term_char;
+  int nsq;
+  
+  if (is_dna==SEQT_UNK) return;
+
+  term_char = xascii['*'];
+
+  if (is_dna==SEQT_DNA || is_dna == SEQT_RNA) {
+    if (is_ext) { 
+      sq = &ntx[0];
+      nsq = nntx;
+    }
+    else {sq = &nt[0]; nsq = nnt;}
+  }
+  else {
+    if (is_ext) { sq = NCBIstdaa_ext; nsq = NCBIstdaa_ext_n; }
+    else {sq = NCBIstdaa; nsq = NCBIstdaa_n;}
+  }
+
+  /* initialize xascii from sq[], checking for lower-case letters */
+  /* this code guarantees that all characters in sq are represented in
+     xascii[], but it does not guarantee that everything else in
+     xascii[], particularly xascii[O,U,J], have appropriate values
+ */
+  have_lc = 0;
+  for (isq = 1; isq <= nsq; isq++) {
+     xascii[sq[isq]] = isq;
+     if (sq[isq] >= 'a' && sq[isq] <= 'z') have_lc = 1;
+  }
+  
+  /* no lower case letters in alphabet, map lower case to upper */
+  if (have_lc != 1) { 
+    for (isq = 1; isq <= nsq; isq++) {
+      if (sq[isq] >= 'A' && sq[isq] <= 'Z') xascii[sq[isq]-'A'+'a'] = isq;
+    }
+    if (is_dna==1) xascii['u'] = xascii['t'];
+  }
+
+  xascii['*']=term_char;
+  xascii[0] = ES;
+}
+
+void
+validate_novel_aa(int *xascii, int p_nsq, int dnaseq) {
+  int isq, err_p_nsq_limit;
+  /* these checks need to be done after xascii[] has been
+     re-initialized */
+
+  if (dnaseq != SEQT_DNA && dnaseq!=SEQT_RNA) {
+    if (xascii['O'] > p_nsq || xascii['o'] > p_nsq) { xascii['O'] = xascii['K']; xascii['o'] = xascii['k'];}
+    if (xascii['U'] > p_nsq || xascii['u'] > p_nsq) { xascii['U'] = xascii['C']; xascii['u'] = xascii['c'];}
+    if (xascii['J'] > p_nsq || xascii['j'] > p_nsq) { xascii['J'] = xascii['L']; xascii['j'] = xascii['l'];}
+  }
+
+  /* one final check for characters out of range (> nsq)*/
+  err_p_nsq_limit = 0;
+  for (isq = 'A'; isq <= 'Z'; isq++) {
+    if (xascii[isq] < NA && xascii[isq] > p_nsq) {
+      fprintf(stderr, " *** ERROR *** xascii['%c']:%d > %d\n",isq, xascii[isq], p_nsq);
+      err_p_nsq_limit = 1;
+    }
+  }
+  if (err_p_nsq_limit) {exit(1);}
+}
+
+
+void 
+print_pam(struct pstruct *ppst) {
+  int i, nsq, ip;
+  unsigned char *sq;
+
+  fprintf(stderr," ext_sq_set: %d\n",ppst->ext_sq_set);
+
+  nsq = ppst->nsq;
+  ip = 0;
+  sq = ppst->sq;
+
+  fprintf(stderr," sq[%d]: %s\n",nsq, sq);
+
+  if (ppst->ext_sq_set) {
+    nsq = ppst->nsqx;
+    ip = 1;
+    sq = ppst->sqx;
+    fprintf(stderr," sq[%d]: %s\n",nsq, sq);
+  }
+
+  for (i=1; i<=nsq; i++) {
+    fprintf(stderr," %c:%c - %3d\n",sq[i], sq[i], ppst->pam2[ip][i][i]);
+  }
+}
diff --git a/src/best_stats.h b/src/best_stats.h
new file mode 100644
index 0000000..4fc29d4
--- /dev/null
+++ b/src/best_stats.h
@@ -0,0 +1,52 @@
+/* Concurrent read version */
+
+/* $Id: best_stats.h 808 2011-07-19 20:05:24Z wrp $ */
+/* $Revision: 808 $  */
+
+#include "param.h"
+
+/*
+#ifndef FSEEK_T_DEF
+#define FSEEK_T_DEF
+#ifndef USE_FSEEKO
+typedef long fseek_t;
+#else
+typedef off_t fseek_t;
+#endif
+#endif
+*/
+
+struct beststr {
+  struct seq_record *seq;	/* sequence info */
+  struct mseq_record *mseq;	/* sequence meta-info */
+  struct beststr *bbp_link;	/* link to a previous beststr entry with the same sequence */
+  struct rstruct rst;		/* results info */
+
+  int n1;		/* duplicate of seq.n1, used for error checking/debugging */
+#ifdef DEBUG
+  long adler32_crc;	/* duplicate of seq.adler32_crc for error checking/debugging */
+#endif
+  int frame;		/* in buf2_str */
+  int repeat_thresh;	/* threshold for additional alignments */
+  double zscore;	/* the z-score mostly exists for sorting best scores */
+  double zscore2;	/* z-score - from high-scoring shuffles  */
+  double bit_score;	/* move to bit-scores for consistency */
+  double bit_score2;	/* bit-score for second shuffle */
+
+  int a_res_cnt;
+  struct a_res_str *a_res;	/* need only a_res, not a_res[2], because different frames
+				   for the same sequence are stored separately */
+  int have_ares;
+  float percent, gpercent;
+};
+
+struct stat_str {
+  int score;
+  int n1;
+  double comp;
+  double H;
+  double escore;
+  int segnum;
+  int seglen;
+};
+
diff --git a/src/build_ares.c b/src/build_ares.c
new file mode 100644
index 0000000..ffe26ff
--- /dev/null
+++ b/src/build_ares.c
@@ -0,0 +1,248 @@
+/* $Id: build_ares.c $ */
+
+/* copyright (c) 2010, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* build_ares_code is called by showbest() (in threaded/serial code) or
+   p2_workcomp in PCOMPLIB code to produce the cur_ares-> chain that
+   is displayed in showbest().
+
+   For PCOMPLIB, the cur_ares->chain is passed to bbp->a_res by
+   do_stage2(), where it is available to showbest();
+
+   By using this code, the a_res chain used in either mode will be the
+   same, so the code required to display an a_res should be the same.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+#include "structs.h"
+#include "param.h"
+
+/* #include "mm_file.h" */
+#include "best_stats.h"
+#include "drop_func.h"
+
+extern void calc_coord(int n0, int n1, long qoffset, long loffset,
+		      struct a_struct *aln);
+
+extern void calc_astruct(struct a_struct *aln_p, struct a_res_str *a_res_p, void *f_str);
+
+/* in build_ares_code, *aa1 is separate from *seq because *seq has
+   permanent information about aa1, but aa1 may be temporary
+
+   build_ares_code() calculates various annotation strings, depending
+   on what kinds of annotations are requested with -m "F# file" and -m #
+
+   There are three fundamentally different annotation formats:
+   (1) annot_var_s -- the long version shown with alignments
+   (2) annot_code -- a very compact version, shown with -m 9[cC] and -m 8CC
+   (3) annot_id -- a different compact version
+
+   These need to be saved separately, and used at the right time.
+   They are NOT exclusive (which older code assumed).
+
+*/
+
+struct a_res_str *
+build_ares_code(unsigned char *aa0, int n0, 
+		unsigned char *aa1, struct seq_record *seq,
+		int frame, int *have_ares, int repeat_thresh,
+		const struct mngmsg *m_msp, struct pstruct *ppst,
+		void *f_str
+		)
+{
+  unsigned char *aa1_ann;
+  struct rstruct rst;
+  struct a_res_str *my_ares_p, *cur_ares_p;
+  struct a_struct *aln_p;
+  struct dyn_string_str *annot_str_dyn, *align_code_dyn;
+  long loffset;		/* loffset is offset from beginning of real sequence */
+  long l_off;		/* l_off is the the virtual coordinate of residue 1 */
+  int seqc_max, annc_max;
+  char *seq_code;
+  int seq_code_len, annot_str_len;
+  int score_delta;
+  int variant_calc_done = 0;
+
+  align_code_dyn = init_dyn_string(2048, 2048);
+  annot_str_dyn = init_dyn_string(2048, 2048);
+
+  if (seq->annot_p) {aa1_ann = seq->annot_p->aa1_ann;}
+  else aa1_ann = NULL;
+  loffset = seq->l_offset;
+  l_off = seq->l_off;
+
+  if (! (*have_ares & 0x1)) {	/* we don't have an a_res, and we need one */
+
+      my_ares_p = do_walign(aa0, n0, aa1, seq->n1,
+                            frame, 
+                            repeat_thresh, ppst, f_str,
+                            have_ares);
+  }
+  else {	/* we already have the a_res */
+      pre_cons(aa1,seq->n1,frame,f_str);
+      my_ares_p = NULL;
+  }
+
+  /* here, we need to loop through all the alignments, and produce
+     the statistics/codes for each */
+
+  for (cur_ares_p = my_ares_p; cur_ares_p != NULL; cur_ares_p = cur_ares_p->next) {
+
+    seqc_max = my_ares_p->nres + 4*m_msp->aln.llen+4;
+    cur_ares_p->aln_code = seq_code = NULL;
+    cur_ares_p->aln_code_n = seq_code_len = 0;
+    cur_ares_p->annot_code = NULL;
+    cur_ares_p->annot_code_n = 0;
+    cur_ares_p->annot_var_s = NULL;
+    cur_ares_p->annot_var_id = NULL;
+    cur_ares_p->annot_var_idd = NULL;
+
+    aln_p = &cur_ares_p->aln;
+
+    /* this sets a number of constants, from the alignment function
+       and frame, and only needs to be called once */
+    aln_func_vals(frame, aln_p);
+
+    if (m_msp->tot_show_code & (SHOW_CODE_ALIGN+SHOW_CODE_CIGAR+SHOW_CODE_EXT)) {
+      cur_ares_p->aln_code = seq_code=(char *)calloc(seqc_max,sizeof(char));
+      /* if we have an annotation string, allocate space for the
+	 encoded annotation */
+
+      if (seq_code != NULL) {
+
+	calc_astruct(aln_p, cur_ares_p, f_str);
+
+	/* we need this for offset information for calc_code, but it is
+	 incomplete so we must do it again */
+	
+	calc_coord(m_msp->n0,seq->n1,
+		 m_msp->q_offset + (m_msp->q_off-1) + (m_msp->sq0off-1),
+		 loffset + (l_off-1) + (m_msp->sq1off-1),
+		 aln_p);
+
+	aln_p->lc=calc_code(aa0, m_msp->n0,
+			    aa1,seq->n1, 
+			    aln_p,cur_ares_p,
+			    ppst,
+			    align_code_dyn,
+			    m_msp->ann_arr,
+			    m_msp->aa0a, m_msp->annot_p,
+			    aa1_ann, seq->annot_p,
+			    annot_str_dyn,
+			    &score_delta,
+			    f_str, m_msp->pstat_void,
+			    m_msp->tot_show_code);
+
+	cur_ares_p->aln_code_n = seq_code_len = strlen(seq_code);
+	if (seq_code[1] == '0' && seq_code[0] == '=') {
+	  fprintf(stderr," code begins with 0: %s\n", seq_code);
+	}
+
+	if (align_code_dyn != NULL) {
+	  if (seq_code && cur_ares_p->aln_code == seq_code) {
+	    free(seq_code);	/* free it since it is replaced below */
+	  }
+	  seq_code_len = strlen(align_code_dyn->string);
+	  cur_ares_p->aln_code = (char *)calloc(seq_code_len+2,sizeof(char));
+	  cur_ares_p->aln_code_n = seq_code_len+2;
+	  SAFE_STRNCPY(cur_ares_p->aln_code,align_code_dyn->string, seq_code_len+2);
+	  reset_dyn_string(align_code_dyn);
+	}
+
+	if (annot_str_dyn != NULL) {
+	  annot_str_len = strlen(annot_str_dyn->string);
+	  cur_ares_p->annot_code = (char *)calloc(annot_str_len+2,sizeof(char));
+	  SAFE_STRNCPY(cur_ares_p->annot_code,annot_str_dyn->string, annot_str_len+2);
+	  reset_dyn_string(annot_str_dyn);
+	}
+	else {annot_str_len = 0;}
+	cur_ares_p->annot_code_n = annot_str_len;
+	variant_calc_done = 1;
+      }
+    }
+
+    if ((m_msp->tot_show_code & SHOW_CODE_IDD) == SHOW_CODE_IDD) {
+      aln_p->lc=calc_idd(aa0,m_msp->n0,aa1,seq->n1,
+			 aln_p, cur_ares_p,
+			 ppst,
+			 m_msp->annot_p, seq->annot_p,
+			 &score_delta,
+			 annot_str_dyn, f_str);
+      variant_calc_done = 1;
+
+      if (annot_str_dyn != NULL) {
+	  annot_str_len = strlen(annot_str_dyn->string);
+	  cur_ares_p->annot_var_idd = (char *)calloc(annot_str_len+2,sizeof(char));
+	  SAFE_STRNCPY(cur_ares_p->annot_var_idd,annot_str_dyn->string, annot_str_len+2);
+	}
+	else {annot_str_len = 0;}
+
+    }
+
+    /* ensure that calc_id (or something else) is ALWAYS done to set score_delta */
+    if (!variant_calc_done || (m_msp->tot_show_code & SHOW_CODE_ID) == SHOW_CODE_ID) {
+      aln_p->lc=calc_id(aa0,m_msp->n0,aa1,seq->n1,
+			aln_p, cur_ares_p,
+			ppst,
+			m_msp->annot_p, seq->annot_p,
+			&score_delta,
+			annot_str_dyn, f_str);
+      
+      if ((m_msp->tot_show_code & SHOW_CODE_ID)==SHOW_CODE_ID && (annot_str_dyn->string[0] != '\0')) {
+	if ((cur_ares_p->annot_var_id = (char *)calloc(strlen(annot_str_dyn->string)+2, sizeof(char)))==NULL) {
+	  fprintf(stderr,"*** ERROR *** [%s/%d] cannot allocate cur_ares_p->annot_var_s [%d]\n",
+		  __FILE__, __LINE__, (int)strlen(annot_str_dyn->string)+2);
+	}
+	else {
+	  strncpy(cur_ares_p->annot_var_id,annot_str_dyn->string,strlen(annot_str_dyn->string)+2);
+	}
+      }
+    }
+
+    if (score_delta > 0) {
+      cur_ares_p->rst.score[0] += score_delta;
+      cur_ares_p->rst.score[1] += score_delta;
+      cur_ares_p->rst.score[2] += score_delta;
+      cur_ares_p->sw_score += score_delta;
+      cur_ares_p->score_delta = score_delta;
+    }
+    else {
+      cur_ares_p->score_delta = 0;
+    }
+
+    /*
+    if (annot_str_dyn->string[0] != '\0') {
+      if ((cur_ares_p->annot_var_s = (char *)calloc(strlen(annot_str_dyn->string)+2, sizeof(char)))==NULL) {
+	fprintf(stderr,"*** ERROR *** [%s/%d] cannot allocate cur_ares_p->annot_var_s [%d]\n",
+		__FILE__, __LINE__, (int)strlen(annot_str_dyn->string)+2);
+      }
+      else {
+	strncpy(cur_ares_p->annot_var_s,annot_str_dyn->string,strlen(annot_str_dyn->string)+2);
+      }
+    }
+    */
+	
+    /* this should be all the information we need on the alignment */
+  } /* end for (cur_ares_p;) */
+  free_dyn_string(annot_str_dyn);
+  free_dyn_string(align_code_dyn);
+  return my_ares_p;
+}
diff --git a/src/c_dispn.c b/src/c_dispn.c
new file mode 100644
index 0000000..5c9741e
--- /dev/null
+++ b/src/c_dispn.c
@@ -0,0 +1,573 @@
+/*	dispn.c	associated subroutines for matching sequences */
+
+/* $Id: c_dispn.c 1124 2013-03-13 20:24:57Z wrp $ */
+/* $Revision: 1124 $  */
+
+/* copyright (c) 1988, 1995, 1996, 2008, 2013, 2014 by William R. Pearson and 
+   The Rector and Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "defs.h"
+#include "structs.h"
+#include "param.h"
+
+#define XTERNAL
+
+#define YES 1
+#define NO 0
+
+#define MAXOUT 201
+
+/* the seqca[] array has the following codes:
+   0 - no alignment symbol
+   1 - align; pam < 0
+   2 - align; pam == 0
+   3 - align; pam > 0
+   4 - align; ident
+   5 - align; del
+
+   the map_sym arrays determine the value to be displayed with each
+   type of aligned residue
+*/
+
+#include "a_mark.h"
+
+void
+discons(FILE *fd, const struct mngmsg *m_msp, 
+	char *seqc0, char *seqc0a, 
+	char *seqc1, char *seqc1a,
+	char *seqca, int *cumm_seq_score, int nc,
+	int n0, int n1, char *name0, char *name1, int nml,
+	struct a_struct *aln)
+{
+  char line[3][MAXOUT];	/* alignment lines [0,2], similarity code [1] */
+  char cline[2][MAXOUT+10], *clinep[2];	/* coordinate line */
+  int il, i, lend, loff, id, tot_score;
+  int del0, del1, ic, ll0, ll1, ll01, cl0, cl1, rl0, rl1;
+  int ic_save;
+  char *map_sym_p;
+  int l_llen;
+  int ioff0, ioff00, ioff1, ioff10;
+  long q_start, q_end, qf_flag, s_start, s_end, sf_flag;
+  long qqoff, lloff;
+  int llsgn, llfact, qlsgn, qlfact, qfx0, qfxn, lfx0, lfxn;
+  long s_digit_max, q_digit_max;
+  char digit_tmp[32];
+  int digit_len;
+  int have_res;
+  char *name01;
+  char blank[32], afmt[32], afmt0[32];
+  int disp_dna_align = ((m_msp->qdnaseq>0) && (m_msp->ldb_info.ldnaseq > 0));
+
+  memset(blank,' ',sizeof(blank)-1);
+  blank[sizeof(blank)-1]='\0';
+
+  clinep[0]=cline[0]+1;
+  clinep[1]=cline[1]+1;
+
+  if (aln->qlfact == 0) {qlfact = 1;}
+  else qlfact = aln->qlfact;
+  if (aln->qlrev == 1) {
+    qlsgn = -1;
+    qfx0 = 0;
+    qfxn = 1;
+  }
+  else {
+    qlsgn = 1;
+    qfx0 = 1;
+    qfxn = 0;
+  }
+
+  if (aln->llfact == 0) {llfact = 1;}
+  else llfact = aln->llfact;
+
+  if (aln->llrev == 1) {
+    llsgn = -1;
+    lfx0 = 0;
+    lfxn = 1;
+  }
+  else {
+    llsgn = 1;
+    lfx0 = 1;
+    lfxn = 0;
+  }
+
+  l_llen = aln->llen;
+  if ((m_msp->markx & MX_M9SUMM) && m_msp->show_code != 1) { l_llen += 40; }
+
+  if ((m_msp->markx & MX_ATYPE)==2) name01=name1;
+  else name01 = "\0";
+
+  ioff0=aln->smin0;
+  ioff00 = ioff0;
+  ioff1=aln->smin1;
+  ioff10 = ioff1;
+  
+  if (m_msp->markx& MX_AMAP && (m_msp->markx & MX_ATYPE)==7) return;
+
+  /* set *map_sym_p to correct match symbol */
+  map_sym_p = aln_map_sym[MX_A0];
+  if ((m_msp->markx&MX_ATYPE)==1) {map_sym_p = aln_map_sym[MX_A1];}
+  else if ((m_msp->markx&MX_ATYPE)==2) {map_sym_p = aln_map_sym[MX_A2];}
+  else if (m_msp->markx&MX_M10FORM) { map_sym_p = aln_map_sym[MX_A10]; }
+  if (m_msp->markx & MX_MBLAST) { map_sym_p = aln_map_sym[MX_ABLAST];}
+
+  if (m_msp->markx & MX_ASEP) {
+    fprintf(fd,">%s ..\n",name0);
+    for (i=0; i<nc && seqc0[i]; i++) {
+   /* if (seqc0[i]=='-') fputc('.',fd); else */
+      fputc(seqc0[i],fd);
+      if (i%50 == 49) fputc('\n',fd);
+    }
+    if ((i-1)%50 != 49) fputc('\n',fd);
+    fprintf(fd,">%s ..\n",name1);
+    for (i=0; i<nc && seqc1[i]; i++) {
+    /* if (seqc1[i]=='-') fputc('.',fd); else */
+      fputc(seqc1[i],fd);
+      if (i%50 == 49) fputc('\n',fd);
+    }
+    if ((i-1)%50 != 49) fputc('\n',fd);
+    return;
+  }
+
+  if (m_msp->markx & MX_M10FORM) {
+    fprintf(fd,">%s ..\n",name0);
+    fprintf(fd,"; sq_len: %d\n",n0);
+    fprintf(fd,"; sq_offset: %ld\n",aln->q_offset+1);
+    fprintf(fd,"; sq_type: %c\n",m_msp->sqtype[0]);
+    fprintf(fd,"; al_start: %ld\n",aln->d_start0);
+    fprintf(fd,"; al_stop: %ld\n",aln->d_stop0);
+    /* in the past, this al_display_start does not include sq0off */
+    fprintf(fd,"; al_display_start: %ld\n",
+	    aln->q_offset+qlsgn*ioff0*aln->llmult+qfx0);
+
+    have_res = 0;
+    for (i=0; i<nc && seqc0[i]; i++) {
+      if (!have_res && seqc0[i]==' ') fputc('-',fd);
+      else if (seqc0[i]==' ') break;
+      else {
+	have_res = 1;
+	fputc(seqc0[i],fd);
+      }
+      if (i%50 == 49) fputc('\n',fd);
+    }
+    if ((i-1)%50!=49 || seqc0[i-1]==' ') fputc('\n',fd);
+    fprintf(fd,">%s ..\n",name1);
+    fprintf(fd,"; sq_len: %d\n",n1);
+    fprintf(fd,"; sq_offset: %ld\n",aln->l_offset+1);
+    fprintf(fd,"; sq_type: %c\n",m_msp->sqtype[0]);
+    fprintf(fd,"; al_start: %ld\n",aln->d_start1);
+    fprintf(fd,"; al_stop: %ld\n",aln->d_stop1);
+    /* in the past, this al_display_start does not include sq1off */
+    fprintf(fd,"; al_display_start: %ld\n",aln->l_offset+llsgn*ioff1+lfx0);
+
+    have_res = 0;
+    for (i=0; i<nc && seqc1[i]; i++) {
+      if (!have_res && seqc1[i]==' ') fputc('-',fd);
+      else if (seqc1[i]==' ') break;
+      else {
+	have_res = 1;
+	fputc(seqc1[i],fd);
+      }
+      if (i%50 == 49) fputc('\n',fd);
+    }
+    if ((i-1)%50!=49 || seqc1[i-1]==' ') fputc('\n',fd);
+#ifdef M10_CONS
+    fprintf(fd,"; al_cons:\n");
+    for (i=0,del0=0,id=ioff0; id-del0<aln->amax0 && i < nc; i++,id++) {
+      if (seqc0[i] == '\0' || seqc1[i] == '\0') break;
+      if (seqc0[i]=='-' || seqc0[i]==' ' || seqc0[i]=='\\') del0++;
+      else if (seqc0[i]=='/') del0++;
+      if (id-del0<aln->amin0) fputc(' ',fd);
+      else if (seqc0[i]=='-'||seqc1[i]=='-') fputc('-',fd);
+      else fputc(map_sym_p[seqca[i]],fd);
+
+      if (i%50 == 49) fputc('\n',fd);
+    }
+    if ((i-1)%50!=49 || seqc1[i-1]==' ') fputc('\n',fd);
+#endif
+    return;
+  }
+  else if (m_msp->markx & MX_RES_ALIGN_SCORE) {
+    have_res = 0;
+    tot_score = 0;
+    del0 = del1 = 0;
+    fprintf(fd,">%s\t%s\t%s0\t%s1\tscore\ttotal\n",
+	    name0,name1,m_msp->sqnam,m_msp->sqnam);
+    for (ic=0; ic<nc; ic++, ioff0++, ioff1++) {
+      if (seqc0[ic] == ' ' || seqc0[ic] == '-' || seqc0[ic] == '/' || seqc0[ic] == '\\') {
+	del0++;
+      }
+      if (seqc1[ic] == ' ' || seqc1[ic] == '-' || seqc1[ic] == '/' || seqc1[ic] == '\\') {
+	del1++;
+      }
+
+      tot_score += cumm_seq_score[ic];
+      fprintf(fd,"%ld\t%ld\t%c\t%c\t%d\t%d\n",
+	      aln->q_offset+qlsgn*(ioff0-del0)*aln->llmult+qfx0,
+	      aln->l_offset+llsgn*(ioff1-del1)+lfx0,	      
+	      seqc0[ic], seqc1[ic], cumm_seq_score[ic], tot_score);
+    }
+    return;
+  }
+
+  memset(line[0],' ',MAXOUT);
+  memset(line[1],' ',MAXOUT);
+  memset(line[2],' ',MAXOUT);
+
+  /* cl0 indicates whether a coordinate should be printed over the first
+     sequence; cl1 indicates a coordinate for the second;
+  */
+
+  ic = 0; del0=del1=0;
+
+  /* we set afmt/afmt0 here, rather than at the start of discons, so
+     we can have accurate values for the max and min query/subject
+     start/end using qlsgn and qlfact
+  */
+
+  if (!(m_msp->markx & MX_MBLAST)) {
+    if (nml > 6) {
+      blank[nml-6]='\0';
+      sprintf(afmt,"%%-%ds %%s\n",nml);
+    }
+    else {
+      blank[0]='\0';
+      SAFE_STRNCPY(afmt,"%-6s %s\n",sizeof(afmt));
+    }
+  }
+  else {
+    /* for MX_MBLAST format, the size of the numbers is a function of
+       the largest numbers in the first or last coordinate - which
+       could be either the query or the library sequence.
+    */
+    if (qlsgn > 0) {q_digit_max = aln->smin0 + (aln->amax0 - aln->amin0); }
+    else {q_digit_max = aln->smin0;}
+    q_digit_max = aln->q_offset + qlsgn*q_digit_max + 1l;
+
+    if (llsgn > 0) {s_digit_max = aln->smin1 + (aln->amax1 - aln->amin1); }
+    else {s_digit_max = aln->smin1;}
+    s_digit_max = aln->l_offset + aln->frame + llsgn*aln->llmult*s_digit_max + 1l;
+
+    sprintf(digit_tmp,"%ld",max(q_digit_max, s_digit_max));
+    digit_len = strlen(digit_tmp);
+    if (digit_len < 4) digit_len = 4;
+
+    sprintf(afmt,"%%-5s  %%-%dld %%s %%-ld\n",digit_len);
+    blank[digit_len+6]='\0';
+    SAFE_STRNCPY(afmt0,"%-10s  %s\n",sizeof(afmt0));
+  }
+
+  if (aln->d_start0 < aln->d_stop0) qf_flag = 1; else qf_flag = 0;
+  if (aln->d_start1 < aln->d_stop1) sf_flag = 1; else sf_flag = 0;
+
+  for (il=0; il<(nc+l_llen-1)/l_llen; il++) {
+    loff=il*l_llen;
+    lend=min(l_llen,nc-loff);
+
+    ll0 = NO; ll1 = NO;
+
+    memset(cline[0],' ',MAXOUT+1);
+    memset(cline[1],' ',MAXOUT+1);
+
+    ic_save = ic;
+
+    q_start = aln->q_offset + (long)qlsgn*ioff00 +
+      (long)qlsgn*qlfact*(ioff0-del0-ioff00) + qf_flag;
+    s_start = aln->l_offset + /* aln->frame + */
+      (long)llsgn*aln->llmult*ioff10 +
+      (long)llsgn*llfact*(ioff1-del1-ioff10) + sf_flag;
+
+    for (i=0; i<lend; i++, ic++,ioff0++,ioff1++) {
+      cl0 =  cl1 = rl0 = rl1 = YES;
+      if ((line[0][i]=seqc0[ic])=='-' || seqc0[ic]=='\\') {
+	del0++; cl0=rl0=NO;
+      }
+      else if (seqc0[ic]=='/') {
+	del0++; cl0=rl0=NO;
+      }
+      if ((line[2][i]=seqc1[ic])=='-' || seqc1[ic]=='\\') {
+	del1++; cl1=rl1=NO;
+      }
+      else if (seqc1[ic]=='/') {
+	del1++; cl1=rl1=NO;
+      }
+
+      if (seqc0[ic]==' ') {del0++; cl0=rl0=NO;}
+      else ll0 = YES;
+      if (seqc1[ic]==' ') {del1++; cl1=rl1=NO;}
+      else ll1 = YES;
+
+      /* the old version used qoffset, this version uses q_offset+q_off */
+      qqoff = aln->q_offset + (long)qlsgn*ioff00 +
+	(long)qlsgn*qlfact*(ioff0-del0-ioff00);
+      if (cl0 && qqoff%10 == 9)  {
+	sprintf(&clinep[0][i-qfxn],"%8ld",qqoff+1l);
+	clinep[0][i+8-qfxn]=' ';
+	rl0 = NO;
+      }
+      else if (cl0 && qqoff== -1) {
+	sprintf(&clinep[0][i-qfxn],"%8ld",0l);
+	clinep[0][i+8-qfxn]=' ';
+	rl0 = NO;
+      }
+      else if (rl0 && (qqoff+1)%10 == 0) {
+	sprintf(&clinep[0][i-qfxn],"%8ld",qqoff+1);
+	clinep[0][i+8-qfxn]=' ';
+      }
+      
+      /* the lloff coordinate of a residue is the sum of:
+	 m_msp->sq1off-1	 - the user defined coordinate
+	 aln->l_offset	- the offset into the library sequence
+	 llsgn*ioff10	- the offset into the beginning of the alignment
+	 (given in the "natural" coordinate system,
+	 except for tfasta3 which provides context)
+	 llsgn*llfact*(ioff1-del1-ioff10)
+	 - the position in the consensus aligment, -gaps
+      */
+
+      /* it seems like this should be done in calc_coord() */
+
+      lloff = aln->l_offset + /* aln->frame  + */
+	(long)llsgn*aln->llmult*ioff10 +
+	(long)llsgn*llfact*(ioff1-del1-ioff10);
+
+      if (cl1 && lloff%10 == 9)  {
+	sprintf(&clinep[1][i-lfxn],"%8ld",lloff+1l);
+	clinep[1][i+8-lfxn]=' ';
+	rl1 = NO;
+      }
+      else if (cl1 && lloff== -1) {
+	sprintf(&clinep[1][i],"%8ld",0l);
+	clinep[1][i+8-lfxn]=' ';
+	rl1 = NO;
+      }
+      else if (rl1 && (lloff+1)%10 == 0) {
+	sprintf(&clinep[1][i-lfxn],"%8ld",lloff+1);
+	clinep[1][i+8-lfxn]=' ';
+      }
+
+      line[1][i] = ' ';
+      if (ioff0-del0 >= aln->amin0 && ioff0-del0 <= aln->amax0) {
+	if (m_msp->markx & MX_MBLAST) {
+	  if (disp_dna_align) {
+	    if (seqca[ic]==4) {line[1][i] = '|';}
+	    else {line[1][i] = ' ';}
+	  }
+	  else {
+	    if (seqca[ic]==4) {line[1][i]=line[0][i];}
+	    else {line[1][i] = map_sym_p[seqca[ic]];}
+	  }
+	}
+	else {
+	  if (seqca[ic]==4) {line[1][i]=map_sym_p[4];}
+	  else if ((m_msp->markx&MX_ATYPE)==2) line[1][i]=line[2][i];
+	  else line[1][i] = map_sym_p[seqca[ic]];
+	}
+      }
+      else if ((m_msp->markx&MX_ATYPE)==2) line[1][i]=line[2][i];
+    }
+
+    q_end = qqoff + qf_flag + (aln->qlfact-1)*(aln->qlrev > 0 ? -1 : 1);
+    s_end = lloff + sf_flag + (aln->llfact-1)*(aln->qlrev > 0 ? -1 : 1);
+
+    if (m_msp->ann_flg) {
+      for (ic=ic_save,i=0; i<lend; ic++,i++) {
+	if (m_msp->markx&MX_ANNOT_MID) {
+	  if (seqc0a && seqc0a[ic]!= ' ') {line[1][i] = seqc0a[ic];}
+	  if (seqc1a && seqc1a[ic]!= ' ') {line[1][i] = seqc1a[ic];}
+	}
+	if (m_msp->markx&MX_ANNOT_COORD) {
+	  if (seqc0a && seqc0a[ic]!= ' ') clinep[0][i+7-qfxn] = seqc0a[ic];
+	  if (seqc1a && seqc1a[ic]!= ' ') clinep[1][i+7-lfxn] = seqc1a[ic];
+	}
+      }
+    }
+
+    line[0][lend]=line[1][lend]=line[2][lend]=0;
+    clinep[0][lend+7]=clinep[1][lend+7]=0;
+    
+    ll01 = ll0&&ll1;
+    if ((m_msp->markx&MX_ATYPE)==2 && (!aln->showall || ll0)) ll1=0;
+    fprintf(fd,"\n");
+    if (!(m_msp->markx & MX_MBLAST)) {
+      if (ll0) fprintf(fd,"%s%s\n",blank,clinep[0]);
+      if (ll0) fprintf(fd,afmt,name0,line[0]);
+      if (ll01) fprintf(fd,afmt,name01,line[1]);
+      if (ll1) fprintf(fd,afmt,name1,line[2]);
+      if (ll1) fprintf(fd,"%s%s\n",blank,clinep[1]);
+    }
+    else {
+      /* this code emulates BLAST output, but currently only for
+	 coordinates < 10000) coordinates > 10000 will require that
+	 the offset start and end coordinates across the entire
+	 alignment are checked, and then the format is modified to
+	 ensure that they will fit.  A simple %-d does not work,
+	 because the other sequence (query/library) may have the large
+	 coordinate.
+
+	 thus, afmt and afmt0 must be modified for each sequence,
+	 based on the boundaries of the alignment
+      */
+
+      if (ll0) fprintf(fd,afmt,name0,q_start,line[0],q_end);
+      if (ll01) fprintf(fd,afmt0,blank,line[1]);
+      if (ll1) fprintf(fd,afmt,name1,s_start,line[2],s_end);
+    }
+  }
+}
+
+static float gscale= -1.0;
+
+void
+disgraph(FILE *fd, int n0,int n1, float percent, int score,
+	 int min0, int min1, int max0, int max1, long sq0off,
+	 char *name0, char *name1, int nml,
+	 int mlen, int markx)
+{
+  int i, gstart, gstop, gend;
+  int llen;
+  char line[MAXOUT+1];
+  char afmt[16], afmtf[64];
+
+  if (nml > 6) {
+    sprintf(afmt,"%%-%ds",nml);
+  }
+  else {
+    SAFE_STRNCPY(afmt,"%-6s",sizeof(afmt));
+  }
+  SAFE_STRNCPY(afmtf,afmt,sizeof(afmtf));
+  SAFE_STRNCAT(afmtf," %4ld-%4ld:     %5.1f%%:%s:\n",sizeof(afmtf));
+
+  llen = mlen - 10;
+  memset(line,' ',llen);
+
+  line[llen-1]='\0';
+  if (gscale < 0.0) {
+    gscale = (float)llen/(float)n0;
+    if ((markx&MX_ATYPE) == 7 ) 
+      fprintf(fd,afmtf,name0,sq0off,sq0off+n0-1,100.0,line);
+  }
+
+  gstart = (int)(gscale*(float)min0+0.5);
+  gstop = (int)(gscale*(float)max0+0.5);
+  gend = gstop+(int)(gscale*(float)(n1-max1));
+
+  if (gstop >= llen) gstop = llen-1;
+  if (gend >= llen) gend = llen-1;
+  for (i=0; i<gstart; i++) line[i]=' ';
+  for (; i<gstop; i++) line[i]='-';
+  for (; i<llen; i++) line[i]=' ';
+
+  line[gend]=':';
+  line[llen]='\0';
+
+  if (markx & MX_AMAP) {
+    if ((markx & MX_ATYPE)==7) {	/* markx==4 - no alignment */
+      SAFE_STRNCPY(afmtf,afmt,sizeof(afmtf));
+      SAFE_STRNCAT(afmtf," %4ld-%4ld:%4d %5.1f%%:%s\n",sizeof(afmtf));
+      fprintf(fd,afmtf,name1,min0+sq0off,max0+sq0off-1,score,percent,line);
+    }
+    else {
+      SAFE_STRNCPY(afmtf,">",sizeof(afmtf));
+      SAFE_STRNCAT(afmtf,afmt,sizeof(afmtf));
+      SAFE_STRNCAT(afmtf," %4ld-%4ld:%s\n",sizeof(afmtf));
+      fprintf(fd,afmtf, name1,min0+sq0off,max0+sq0off-1,line);
+    }
+  }
+}
+
+void
+aancpy(char *to, char *from, int count, struct pstruct *ppst)
+{
+  char *tp;
+  unsigned char *sq;
+  int nsq;
+
+  nsq = ppst->nsqx;
+  if (ppst->ext_sq_set) {
+    sq = ppst->sqx;
+  }
+  else {
+    sq = ppst->sq;
+  }
+
+  tp=to;
+  while (count-- && *from) {
+    if (*from <= nsq) *tp++ = sq[*(from++)];
+    else *tp++ = *from++;
+  }
+  *tp='\0';
+}
+
+/* calc_coord transfers alignment boundary coordinates (aln->amin0,
+   amin1, amax0, amax1) into display coordinates (aln->d_start0,
+   d_start1, etc.)
+
+   this routine now indexes from 1 (rather than 0) because sq starts
+   with a 0
+*/
+
+void
+calc_coord(int n0, int n1,
+	  long q_offset,
+	  long l_offset,
+	  struct a_struct *aln)
+{
+  int l_lsgn, q_lsgn, q_fx0, q_fxn, l_fx0, l_fxn;
+
+  if (aln->qlrev == 1) {
+    aln->q_start_off = q_offset+n0;
+    aln->q_end_off = q_offset+1;
+    q_offset += n0;
+    q_lsgn = -1;
+    q_fx0 = 0;
+    q_fxn = 1;
+  }
+  else {
+    aln->q_start_off = q_offset + 1;
+    aln->q_end_off = q_offset+n0;
+    q_lsgn = 1;
+    q_fx0 = 1;
+    q_fxn = 0;
+  }
+
+  if (aln->llrev == 1) {
+    aln->l_start_off = l_offset + n1;
+    aln->l_end_off = l_offset+1;
+    l_offset += n1;
+    l_lsgn = -1;
+    l_fx0 = 0;
+    l_fxn = 1;
+  }
+  else {
+    aln->l_start_off = l_offset + 1;
+    aln->l_end_off = l_offset+n1;
+    l_lsgn = 1;
+    l_fx0 = 1;
+    l_fxn = 0;
+  }
+  aln->q_offset = q_offset;
+  aln->l_offset = l_offset;
+  aln->d_start0 = q_offset+q_lsgn*aln->amin0+q_fx0;
+  aln->d_stop0  = q_offset+q_lsgn*aln->amax0+q_fxn;
+  aln->d_start1 = l_offset+l_lsgn*aln->amin1*aln->llmult+l_fx0;
+  aln->d_stop1  = l_offset+l_lsgn*aln->amax1*aln->llmult+l_fxn;
+}
diff --git a/src/cal_cons.c b/src/cal_cons.c
new file mode 100644
index 0000000..e61fd60
--- /dev/null
+++ b/src/cal_cons.c
@@ -0,0 +1,1226 @@
+/* cal_cons.c - routines for printing translated alignments for
+   fasta, ssearch, ggsearch, glsearch  */
+
+/* $Id: cal_cons.c 1280 2014-08-21 00:47:55Z wrp $ */
+/* $Revision: 1280 $  */
+
+/* copyright (c) 1998, 1999, 2007, 2014 by William R. Pearson and The
+   Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+
+/* removed from dropgsw2.c, dropnfa.c April, 2007 */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "defs.h"
+#include "param.h"
+#include "dyn_string.h"
+
+#if defined(FASTA) || defined(TFASTA)
+#include "dropnfa.h"
+#endif
+
+#if defined(SSEARCH) || defined(OSEARCH)
+#include "dropgsw2.h"
+#endif
+
+#ifdef LALIGN
+#include "dropgsw2.h"
+#endif
+
+#include "a_mark.h"
+
+struct update_code_str {
+  int p_op_idx;
+  int p_op_cnt;
+  int show_code;
+  int cigar_order;
+  int show_ext;
+  char *op_map;
+};
+
+static char *ori_code = "=-+*x";
+static char *cigar_code = "MDIMX";
+
+static struct update_code_str *
+init_update_data(int show_code);
+
+static void
+sprintf_code(char *tmp_str, struct update_code_str *, int op_idx, int op_cnt);
+
+static void 
+update_code(char *al_str, int al_str_max, 
+	    struct update_code_str *update_data, int op, 
+	    int sim_code, unsigned char sp0, unsigned char sp1);
+
+static void
+close_update_data(char *al_str, int al_str_max,
+		  struct update_code_str *update_data);
+
+extern void aancpy(char *to, char *from, int count, struct pstruct *ppst);
+extern void *init_stack(int, int);
+extern void push_stack(void *, void *);
+extern void *pop_stack(void *);
+extern void *free_stack(void *);
+
+/* returns M_NEG, M_ZERO, M_POS, M_IDENT, M_DEL (a_mark.h)
+   updates *aln->nsim, npos, nident, nmismatch */
+extern int
+align_type(int score, char sp0, char sp1, int nt_align, struct a_struct *aln, int pam_x_id_sim);
+
+extern void
+process_annot_match(int *itmp, int *pam2aa0v, 
+		    long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+		    struct annot_entry *annot_arr_p, char **ann_comment,
+		    void *annot_stack, int *have_push_features, int *v_delta,
+		    int *d_score_p, int *d_ident_p, int *d_alen_p, struct domfeat_link **left_domain_p,
+		    long *left_end_p, int init_score);
+
+extern int
+next_annot_match(int *itmp, int *pam2aa0v, 
+		 long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+		 int i_annot, int n_annot, struct annot_entry **annot_arr, char **ann_comment,
+		 void *annot_stack, int *have_push_features, int *v_delta,
+		 int *d_score_p, int *d_ident_p, int *d_alen_p, struct domfeat_link **left_domain,
+		 long *left_domain_end, int init_score);
+
+extern void
+close_annot_match (int ia, void *annot_stack, int *have_push_features,
+		   int *d_score_p, int *d_ident_p, int *d_alen_p,
+		   struct domfeat_link **left_domain_p,
+		   long *left_end_p, int init_score);
+
+extern void
+comment_var(long i0, char sp0, long i1, char sp1, char o_sp1, char sim_char,
+	    const char *ann_comment, struct dyn_string_str *annot_var_dyn,
+	    int target, int d_type);
+
+void
+display_push_features(void *annot_stack, struct dyn_string_str *annot_var_dyn,
+		      long i0_pos, char sp0, long i1_pos, char sp1, char sym, 
+		      int score, double comp, int n0, int n1,
+		      void *pstat_void, int d_type);
+
+#define DP_FULL_FMT 1	/* Region: score: bits: id: ... */
+
+extern int seq_pos(int pos, int rev, int off);
+
+int calc_cons_a(const unsigned char *aa0, int n0,
+		const unsigned char *aa1, int n1,
+		int *nc,
+		struct a_struct *aln,
+		struct a_res_str *a_res,
+		struct pstruct *ppst,
+		char *seqc0, char *seqc1, char *seqca, int *cumm_seq_score,
+		const unsigned char *ann_arr,
+		const unsigned char *aa0a, const struct annot_str *annot0_p, char *seqc0a,
+		const unsigned char *aa1a, const struct annot_str *annot1_p, char *seqc1a,
+		int *score_delta, 
+		struct dyn_string_str *annot_var_dyn,
+		struct f_struct *f_str,
+		void *pstat_void)
+{
+  int i0, i1, nn1;
+  int op, lenc, nd, ns, itmp, p_match;
+  const unsigned char *aa1p;
+  char *sp0, *sp0a, *sp1, *sp1a, *spa, t_spa;
+  int *i_spa;
+  const unsigned char *sq;
+  int *rp;
+  int smins, mins, ntmp;
+  int have_ann = 0;
+  void *annot_stack;
+
+  /* variables for variant changes */
+  int *aa0_pam2_p;
+  char *sim_sym = aln_map_sym[5];
+  struct annot_entry **s_annot0_arr_p, **s_annot1_arr_p;
+
+  char *ann_comment;
+  int i0_annot, i1_annot;	/* i0_annot, i1_annot, count through
+				   the list of annotations */
+  long i0_left_end, i1_left_end; /* left-most coordinate of domain end */
+
+  int v_delta, v_tmp;
+  int d1_score, d1_ident, d1_alen;
+  int d0_score, d0_ident, d0_alen;
+  int have_push_features;
+  struct domfeat_link *left_domain_list1, *left_domain_list0;
+
+  /* variables for handling coordinate offsets */
+  long q_offset, l_offset;
+
+  *score_delta = 0;
+  i0_left_end = i1_left_end = -1;
+  left_domain_list0 = left_domain_list1 = NULL;
+  d1_score = d1_ident = d1_alen = 0;
+  d0_score = d0_ident = d0_alen = 0;
+
+  NULL_dyn_string(annot_var_dyn);
+  have_ann = (seqc0a != NULL); 
+
+  if (ppst->ext_sq_set) {
+    sq = ppst->sqx;
+  }
+  else {
+    sq = ppst->sq;
+  }
+
+#ifndef TFASTA
+  aa1p = aa1;
+  nn1 = n1;
+#else
+  aa1p = f_str->aa1x;
+  nn1 = f_str->n10;
+#endif
+
+  aln->amin0 = a_res->min0;
+  aln->amax0 = a_res->max0;
+  aln->amin1 = a_res->min1;
+  aln->amax1 = a_res->max1;
+  aln->calc_last_set = 1;
+
+  q_offset = aln->q_offset;
+  l_offset = aln->l_offset;
+
+#ifndef LCAL_CONS
+  /* will we show all the start ?*/
+  if (min(a_res->min0,a_res->min1)<aln->llen || aln->showall==1)
+    if (a_res->min0 >= a_res->min1) {              /* aa0 extends more to left */
+      smins=0;
+      if (aln->showall==1) mins = a_res->min0;
+      else mins = min(a_res->min0,aln->llcntx);
+      aancpy(seqc0,(char *)aa0+a_res->min0-mins,mins,ppst);
+      aln->smin0 = a_res->min0-mins;
+      if ((mins-a_res->min1)>0) {
+	memset(seqc1,' ',mins-a_res->min1);
+	aancpy(seqc1+mins-a_res->min1,(char *)aa1p,a_res->min1,ppst);
+	aln->smin1 = 0;
+      }
+      else {
+	aancpy(seqc1,(char *)aa1p+a_res->min1-mins,mins,ppst);
+	aln->smin1 = a_res->min1-mins;
+      }
+    }
+    else {
+      smins=0;
+      if (aln->showall == 1) mins=a_res->min1;
+      else mins = min(a_res->min1,aln->llcntx);
+      aancpy(seqc1,(char *)(aa1p+a_res->min1-mins),mins,ppst);
+      aln->smin1 = a_res->min1-mins;
+      if ((mins-a_res->min0)>0) {
+	memset(seqc0,' ',mins-a_res->min0);
+	aancpy(seqc0+mins-a_res->min0,(char *)aa0,a_res->min0,ppst);
+	aln->smin0 = 0;
+      }
+      else {
+	aancpy(seqc0,(char *)aa0+a_res->min0-mins,mins,ppst);
+	aln->smin0 = a_res->min0-mins;
+      }
+    }
+  else {
+    mins= min(aln->llcntx,min(a_res->min0,a_res->min1));
+    smins=mins;
+    aln->smin0=a_res->min0 - smins;
+    aln->smin1=a_res->min1 - smins;
+    aancpy(seqc0,(char *)aa0+a_res->min0-mins,mins,ppst);
+    aancpy(seqc1,(char *)aa1p+a_res->min1-mins,mins,ppst);
+  }
+  /* set the alignment code to zero for context */
+  memset(seqca,0,mins);
+  if (have_ann) {
+    memset(seqc0a,' ',mins);
+    memset(seqc1a,' ',mins);
+  }
+#else		/* no flanking context */
+  smins = mins = 0;
+  aln->smin0=a_res->min0;
+  aln->smin1=a_res->min1;
+#endif
+
+  /* now get the middle */
+
+  spa = seqca+mins;
+  if (cumm_seq_score) i_spa = cumm_seq_score+mins;
+  sp0 = seqc0+mins;
+  sp1 = seqc1+mins;
+
+  if (have_ann) {
+    sp0a = seqc0a+mins;
+    sp1a = seqc1a+mins;
+  }
+
+  rp = a_res->res;
+  lenc = aln->nident = aln->nmismatch =
+    aln->npos = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
+  p_match = 1;
+  i0 = a_res->min0;
+  i1 = a_res->min1;
+
+  v_delta = 0;
+  i0_annot = i1_annot = 0;
+  annot_stack = NULL;
+  s_annot0_arr_p = s_annot1_arr_p = NULL;
+  have_push_features=0;
+  if (have_ann) {
+    if ((annot1_p && annot1_p->n_annot>0) || (annot0_p && annot0_p->n_annot > 0)) {annot_stack = init_stack(64,64);}
+    if (annot1_p && annot1_p->n_annot > 0) {
+      s_annot1_arr_p = annot1_p->s_annot_arr_p;
+
+      while (i1_annot < annot1_p->n_annot) {
+	if (s_annot1_arr_p[i1_annot]->pos >= i1 + l_offset) {break;}
+	if (s_annot1_arr_p[i1_annot]->end < i1 + l_offset) {i1_annot++; continue;}
+
+	if (s_annot1_arr_p[i1_annot]->label == '-') {
+	  process_annot_match(&itmp, aa0_pam2_p, l_offset+seq_pos(i1,aln->llrev,0), q_offset + seq_pos(i0,aln->qlrev,0),
+			      sp1, sp1a, sq, s_annot1_arr_p[i1_annot],  &ann_comment, 
+			      annot_stack, &have_push_features, &v_delta,
+			      &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end, 0);
+	}
+	i1_annot++;
+      }
+    }
+
+    if (annot0_p && annot0_p->n_annot>0) {
+      s_annot0_arr_p = annot0_p->s_annot_arr_p;
+
+      while (i0_annot < annot0_p->n_annot && s_annot0_arr_p[i0_annot]->pos < i0 + q_offset) {
+	if (s_annot0_arr_p[i0_annot]->pos >= i0 + q_offset) {break;}
+	if (s_annot0_arr_p[i0_annot]->end < i0 + q_offset) {i0_annot++; continue;}
+
+	if (s_annot0_arr_p[i0_annot]->label == '-') {
+	  process_annot_match(&itmp, aa0_pam2_p, q_offset+seq_pos(i0,aln->qlrev,0), l_offset + seq_pos(i1,aln->llrev,0),
+			      sp0, sp0a, sq, s_annot0_arr_p[i0_annot], &ann_comment, 
+			      annot_stack, &have_push_features, &v_delta,
+			      &d0_score, &d0_ident, &d0_alen, &left_domain_list0, &i0_left_end, 0);
+	}
+	i0_annot++;
+      }
+    }
+  }
+
+  while (i0 < a_res->max0 || i1 < a_res->max1) {
+    /* match/mismatch (aligned residues */
+    /* here, op is the "current" encoding, and *rp is the next one */
+    if (op == 0 && *rp == 0) {
+      op = *rp++;
+      lenc++;
+
+      if (ppst->pam_pssm) {aa0_pam2_p = ppst->pam2p[0][i0];}
+      else {aa0_pam2_p = ppst->pam2[0][aa0[i0]];}
+
+      itmp=aa0_pam2_p[aa1p[i1]];
+
+      *sp0 = sq[aa0[i0]];
+      *sp1 = sq[aa1p[i1]];
+
+      if (have_ann) {
+	have_push_features = 0;
+	*sp0a = *sp1a = ' ';
+	if (aa0a) {*sp0a = ann_arr[aa0a[i0]];}
+	if (aa1a) {*sp1a = ann_arr[aa1a[i1]];}
+	if (s_annot1_arr_p) {
+	  if (i1+l_offset == s_annot1_arr_p[i1_annot]->pos || i1+l_offset == i1_left_end) {
+
+	    i1_annot = next_annot_match(&itmp, aa0_pam2_p, l_offset+seq_pos(i1,aln->llrev,0),
+					q_offset + seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq, 
+					i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+					&ann_comment, annot_stack, &have_push_features, &v_delta,
+					&d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end, 0);
+
+	    /* must be out of the loop to capture the last value */
+	    if (sq[aa1p[i1]] != *sp1) {
+	      t_spa = align_type(itmp, *sp0, *sp1, ppst->nt_align, NULL, ppst->pam_x_id_sim);
+
+	      comment_var(q_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			  l_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			  sq[aa1p[i1]], sim_sym[t_spa], ann_comment,
+			  annot_var_dyn, 1, 1);
+	    }
+	  }
+	  d1_score += itmp;
+	}
+
+	if (s_annot0_arr_p) {
+	  if (i0 + q_offset == s_annot0_arr_p[i0_annot]->pos || i0 + q_offset == i0_left_end) {
+
+	    i0_annot = next_annot_match(&itmp, aa0_pam2_p, q_offset+seq_pos(i0,aln->qlrev,0),
+					l_offset + seq_pos(i1,aln->llrev,0), sp0, sp0a, sq, 
+					i0_annot, annot0_p->n_annot, s_annot0_arr_p,
+					&ann_comment, annot_stack, &have_push_features, &v_delta,
+					&d0_score, &d0_ident, &d0_alen, &left_domain_list0, &i0_left_end, 0);
+
+	    /* must be out of the loop to capture the last value */
+	    if (sq[aa0[i0]] != *sp0) {
+	      t_spa = align_type(itmp, *sp0, *sp1, ppst->nt_align, NULL, ppst->pam_x_id_sim);
+
+	      comment_var(q_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			  l_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			  sq[aa0[i0]], sim_sym[t_spa], ann_comment,
+			  annot_var_dyn, 0, 1);
+	    }
+	  }
+	  d0_score += itmp;
+	}
+	sp0a++; sp1a++;
+      }
+
+      *spa = align_type(itmp, *sp0, *sp1, ppst->nt_align, aln, ppst->pam_x_id_sim);
+
+      d1_alen++;
+      d0_alen++;
+      if (*spa == M_IDENT) {
+	d1_ident++;
+	d0_ident++;
+      }
+
+      /* now we have done all the ?modified identity checks, display
+	 potential site annotations */
+      if (have_ann && have_push_features) {
+	display_push_features(annot_stack, annot_var_dyn,
+			      q_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			      l_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			      sim_sym[*spa],
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1,
+			      pstat_void, DP_FULL_FMT);
+	have_push_features = 0;
+      }
+
+      if (cumm_seq_score) *i_spa++ = itmp;
+      i0++; i1++;
+      sp0++; sp1++; spa++;
+    }
+    else {	/* indel */
+      if (op==0) {
+	op = *rp++;
+	if (cumm_seq_score) *i_spa = ppst->gdelval;
+	d1_score +=  ppst->gdelval;
+	d0_score +=  ppst->gdelval;
+      }
+      if (cumm_seq_score) *i_spa++ += ppst->ggapval;
+      d1_score +=  ppst->ggapval; d1_alen++;
+      d0_score +=  ppst->ggapval; d0_alen++;
+
+      if (op>0) {	/* insertion in aa0 */
+	*sp1 = sq[aa1p[i1]];
+	*sp0 = '-';
+	*spa++ = M_DEL;
+	if (have_ann) {
+	  have_push_features = 0;
+	  *sp0a++ = ' ';
+	  if (aa1a) *sp1a = ann_arr[aa1a[i1]];
+	  else *sp1a = ' ';
+	  if (s_annot1_arr_p) {
+	    if (i1+l_offset == s_annot1_arr_p[i1_annot]->pos || i1+l_offset == i1_left_end) {
+	      i1_annot = next_annot_match(&itmp, aa0_pam2_p, l_offset+seq_pos(i1,aln->llrev,0),
+					  q_offset+seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq, 
+					  i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+					  &ann_comment, annot_stack, &have_push_features, &v_delta,
+					  &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,
+					  ppst->ggapval+ppst->gdelval);
+	    }
+	  }
+
+	  if (have_push_features) {
+	    display_push_features(annot_stack, annot_var_dyn,
+				  q_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+				  l_offset+seq_pos(i1,aln->llrev,0), *sp1,
+				  sim_sym[*spa],
+				  a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1,
+				  pstat_void, DP_FULL_FMT);
+	    have_push_features = 0;
+	  }
+	  sp1a++;
+	}
+
+	sp0++;
+	sp1++;
+	i1++;
+	op--;
+	lenc++;
+	aln->ngap_q++;
+      }
+      else {		/* insertion in aa1 */
+	*sp1 = '-';
+	*spa++ = M_DEL;
+	*sp0 = sq[aa0[i0]];
+	if (have_ann) {
+	  have_push_features = 0;
+	  *sp1a++ = ' ';
+	  if (aa0a) *sp0a = ann_arr[aa0a[i0]];
+	  else *sp0a = ' ';
+	  if (s_annot0_arr_p) {
+	    if (i0+q_offset == s_annot0_arr_p[i0_annot]->pos || i0+q_offset == i0_left_end) {
+	      i0_annot = next_annot_match(&itmp, ppst->pam2[0][aa1[i1]], q_offset+seq_pos(i0,aln->qlrev,0),
+					  l_offset+seq_pos(i1,aln->llrev,0), sp0, sp0a, sq, 
+					  i0_annot, annot0_p->n_annot, s_annot0_arr_p,
+					  &ann_comment, annot_stack, &have_push_features, &v_delta,
+					  &d0_score, &d0_ident, &d0_alen, &left_domain_list0, &i0_left_end,
+					  ppst->ggapval+ppst->gdelval);
+
+	    }
+	  }
+
+	  if (have_push_features) {
+	    display_push_features(annot_stack, annot_var_dyn,
+				  q_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+				  l_offset+seq_pos(i1,aln->llrev,0), *sp1,
+				  sim_sym[*spa],
+				  a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
+	    have_push_features = 0;
+	  }
+	  sp0a++;
+	}
+
+	i0++;
+	sp0++;
+	sp1++;
+	op++;
+	lenc++;
+	aln->ngap_l++;
+      }
+    }
+  }
+
+  *score_delta = v_delta;
+
+  *nc = lenc;
+  if (have_ann) {
+    *sp0a = *sp1a = '\0';
+    have_push_features = 0;
+    /* check for left ends after alignment */
+    if (annot1_p && i1_left_end > 0) {
+      close_annot_match(-1, annot_stack, &have_push_features,
+			&d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,
+			0);
+    }
+
+    if (annot0_p && i0_left_end > 0) {
+      close_annot_match(-1, annot_stack, &have_push_features,
+			&d0_score, &d0_ident, &d0_alen, &left_domain_list0, &i0_left_end,
+			0);
+    }
+
+    if (have_push_features) {
+      display_push_features(annot_stack, annot_var_dyn,
+			    a_res->max0-1 + q_offset, *sp0, 
+			    a_res->max1-1 + l_offset, *sp1,
+			    sim_sym[*spa],
+			    a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
+    }
+  }
+
+  *spa = '\0';
+
+#ifndef LCAL_CONS	/* have context around alignment */
+  /*	now we have the middle, get the right end */
+  if (!aln->llcntx_set) {
+    ns = mins + lenc + aln->llen;	/* show an extra line? */
+    ns -= (itmp = ns %aln->llen);	/* itmp = left over on last line */
+    if (itmp>aln->llen/2) ns += aln->llen;  /* more than 1/2 , use another*/
+    nd = ns - (mins+lenc);		/* this much extra */
+  }
+  else nd = aln->llcntx;
+
+  if (nd > max(n0-a_res->max0,nn1-a_res->max1)) 
+    nd = max(n0-a_res->max0,nn1-a_res->max1);
+  
+  if (aln->showall==1) {
+    nd = max(n0-a_res->max0,nn1-a_res->max1);	/* reset for showall=1 */
+    /* get right end */
+    aancpy(seqc0+mins+lenc,(char *)aa0+a_res->max0,n0-a_res->max0,ppst);
+    aancpy(seqc1+mins+lenc,(char *)aa1p+a_res->max1,nn1-a_res->max1,ppst);
+    /* fill with blanks - this is required to use one 'nc' */
+    memset(seqc0+mins+lenc+n0-a_res->max0,' ',nd-(n0-a_res->max0));
+    memset(seqc1+mins+lenc+nn1-a_res->max1,' ',nd-(nn1-a_res->max1));
+  }
+  else {
+    if ((nd-(n0-a_res->max0))>0) {
+      aancpy(seqc0+mins+lenc,(char *)aa0+a_res->max0,(n0-a_res->max0),ppst);
+      memset(seqc0+mins+lenc+n0-a_res->max0,' ',nd-(n0-a_res->max0));
+    }
+    else {
+      aancpy(seqc0+mins+lenc,(char *)aa0+a_res->max0,nd,ppst);
+    }
+
+    if ((nd-(nn1-a_res->max1))>0) {
+      aancpy(seqc1+mins+lenc,(char *)aa1p+a_res->max1,nn1-a_res->max1,ppst);
+      memset(seqc1+mins+lenc+nn1-a_res->max1,' ',nd-(nn1-a_res->max1));
+    }
+    else {
+      aancpy(seqc1+mins+lenc,(char *)aa1p+a_res->max1,nd,ppst);
+    }
+  }
+  if (have_ann) {
+    memset(seqc0a+mins+lenc,' ',nd);
+    memset(seqc1a+mins+lenc,' ',nd);
+    /*
+      ntmp = nd-(n0-a_res->max0);
+      if (ntmp > 0) memset(seqc0a+mins+lenc+n0-a_res->max0,' ',ntmp);
+      ntmp = nd-(nn1-a_res->max1);
+      if (ntmp > 0) memset(seqc1a+mins+lenc+nn1-a_res->max1,' ',ntmp);
+    */
+  }
+#else
+  nd = 0;
+#endif
+
+  /*  fprintf(stderr,"%d\n",mins+lenc+nd); */
+
+  lenc = mins + lenc + nd;
+
+  if (annot0_p || annot1_p) free_stack(annot_stack);
+
+  return lenc;
+}
+
+void
+calc_astruct(struct a_struct *aln_p, struct a_res_str *a_res_p, struct f_struct *f_str) {
+
+  /* we do not pay attention to aln_p->calc_last_set, because all the
+     functions (calc_astruct, calc_cons_a, calc_code) use exactly the same
+     assignment */
+
+  aln_p->amin0 = a_res_p->min0;
+  aln_p->amax0 = a_res_p->max0;
+  aln_p->amin1 = a_res_p->min1;
+  aln_p->amax1 = a_res_p->max1;
+}
+
+
+
+static struct update_code_str *
+init_update_data(show_code) {
+
+  struct update_code_str *update_data_p;
+
+  if ((update_data_p = (struct update_code_str *)calloc(1,sizeof(struct update_code_str)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - init_update_data(): cannot allocate update_code_str\n",
+	      __FILE__, __LINE__);
+    return NULL;
+  }
+
+  update_data_p->p_op_idx = -1;
+  update_data_p->p_op_cnt = 0;
+  update_data_p->show_code = show_code;
+
+  if ((show_code & SHOW_CODE_MASK) == SHOW_CODE_CIGAR) {
+    update_data_p->op_map = cigar_code;
+    update_data_p->cigar_order = 1;
+  }
+  else {
+    update_data_p->op_map = ori_code;
+    update_data_p->cigar_order = 0;
+  }
+
+  if ((show_code & SHOW_CODE_EXT) == SHOW_CODE_EXT) {
+    update_data_p->show_ext = 1;
+  }
+  else {
+    update_data_p->show_ext = 0;
+  }
+
+  return update_data_p;
+}
+
+static void
+close_update_data(char *al_str, int al_str_max, 
+		  struct update_code_str *up_dp) {
+  char tmp_cnt[MAX_SSTR];
+
+  if (!up_dp) return;
+  sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx, up_dp->p_op_cnt);
+  strncat(al_str,tmp_cnt,al_str_max);
+
+  free(up_dp);
+}
+
+/* update_code() has been modified to work more correctly with
+   ggsearch/glsearch, which, because alignments can start with either
+   insertions or deletions, can produce an initial code of "0=".  When
+   that happens, it is ignored and no code is added.
+
+   *al_str - alignment string [al_str_max] - not dynamic
+   op -- encoded operation, currently 0=match, 1-delete, 2-insert, 3-term-match, 4-mismatch
+   op_cnt -- length of run
+   show_code -- SHOW_CODE_CIGAR uses cigar_code, otherwise legacy
+*/
+
+static void
+sprintf_code(char *tmp_str, struct update_code_str *up_dp, int op_idx, int op_cnt) {
+  if (up_dp->cigar_order) {
+    sprintf(tmp_str,"%d%c",op_cnt,up_dp->op_map[op_idx]);
+  }
+  else {
+    sprintf(tmp_str,"%c%d",up_dp->op_map[op_idx],op_cnt);
+  }
+}
+
+static void
+update_code(char *al_str, int al_str_max, 
+	    struct update_code_str *up_dp, int op, 
+	    int sim_code,  unsigned char sp0, unsigned char sp1)
+{
+  char tmp_cnt[MAX_SSTR];
+
+  /* op == 0 : match state (could involve termination codons);
+     op == 1 : deletion
+     op == 2 : insertion
+     op == 3 : *:*
+     p_op == 5 : mismatch state
+  */
+
+  if (up_dp->p_op_cnt == 0) {
+    up_dp->p_op_idx = op;
+    up_dp->p_op_cnt = 1;
+    return;
+  }
+
+  if (op == 1 || op == 2) {
+    if (up_dp->p_op_idx == op) { up_dp->p_op_cnt++;}
+    else {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+      strncat(al_str,tmp_cnt,al_str_max);
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+  }
+  else if (op==0 || op == 3) {
+    if (sp0 != '*' && sp1 != '*') {	/* default case, not termination */
+      if (up_dp->show_ext) {
+	if (sim_code != M_IDENT) { op = 4;}
+      }
+    }
+    else {	/* have a termination codon, output for !SHOW_CODE_CIGAR */
+      if (!up_dp->cigar_order) {
+	if (sp0 == '*' || sp1 == '*') { op = 3;}
+      }
+      else if (up_dp->show_ext && (sp0 != sp1)) { op = 4;}
+    }
+
+    if (op != up_dp->p_op_idx) {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+      strncat(al_str,tmp_cnt,al_str_max);
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    else {
+      up_dp->p_op_cnt++;
+    }
+  }
+}
+
+/* build an array of match/ins/del - length strings */
+/* 5-June-2014 - modified to split "match" encoding into identical (=)
+   and mismatch (X)
+
+   To support domain-based scoring, this function iterates through
+   every aligned position, including insertions and deletions (which
+   are encoded as runs).
+ */
+
+int calc_code(const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      struct a_struct *aln,
+	      struct a_res_str *a_res,
+	      struct pstruct *ppst,
+	      char *al_str, int al_str_n, 
+	      const unsigned char *ann_arr, 
+	      const unsigned char *aa0a,
+	      const struct annot_str *annot0_p,
+	      const unsigned char *aa1a,
+	      const struct annot_str *annot1_p,
+	      struct dyn_string_str *ann_code_dyn,
+	      int *score_delta,
+	      struct f_struct *f_str,
+	      void *pstat_void,
+	      int display_code)
+{
+  int i0, i1;
+  int op, lenc;
+  int p_op, op_cnt;
+  int match, p_match, match_cnt;
+  int mis, p_mis, mis_cnt;
+  const unsigned char *aa1p;
+  char sp0, sp1;
+  struct update_code_str *update_data_p;
+  unsigned char *sq;
+  int *rp;
+  int have_ann=0;
+  char ann_ch0, ann_ch1;
+  char tmp_astr[MAX_STR];
+  int sim_code, t_spa;
+  char *sim_sym = aln_map_sym[MX_ACC];
+  int aa0c, aa1c, itmp;
+  int show_code, annot_fmt, start_flag;
+
+  /* variables for variant changes, regions */
+  int *aa0_pam2_p;
+  void *annot_stack;
+  struct annot_entry **s_annot0_arr_p;
+  struct annot_entry **s_annot1_arr_p;
+  int i0_annot, i1_annot, v_delta, v_tmp;
+  long i0_left_end, i1_left_end;
+  int d1_score, d1_ident, d1_alen;
+  int d0_score, d0_ident, d0_alen;
+  struct domfeat_link *left_domain_list1, *left_domain_list0;
+  int have_push_features;
+  long q_offset, l_offset;
+
+  *score_delta = 0;
+  i0_left_end = i1_left_end = -1;
+  left_domain_list0 = left_domain_list1 = NULL;
+  d1_score = d1_ident = d1_alen = 0;
+  d0_score = d0_ident = d0_alen = 0;
+
+  show_code = (display_code & (SHOW_CODE_MASK+SHOW_CODE_EXT));	/* see defs.h; SHOW_CODE_ALIGN=2,_CIGAR=3,_CIGAR_EXT=4 */
+  annot_fmt = 2;
+  if (display_code & SHOW_ANNOT_FULL) {
+    annot_fmt = 1;
+  }
+  
+  if (aa0a != NULL && aa1a != NULL) { have_ann = 2;}
+  else if (aa0a != NULL || aa1a != NULL) { have_ann = 1;}
+  else {have_ann = 0;}
+
+  if (ppst->ext_sq_set) { sq = ppst->sqx;}
+  else {sq = ppst->sq;}
+
+#ifndef TFASTA
+  aa1p = aa1;
+#else
+  aa1p = f_str->aa1x;
+#endif
+
+  rp = a_res->res;
+  lenc = aln->nident = aln->nmismatch = aln->nsim = aln->npos = aln->ngap_q = aln->ngap_l = aln->nfs = 0;
+
+  update_data_p = init_update_data(show_code);
+
+  i0 = a_res->min0;
+  i1 = a_res->min1;
+
+  q_offset = aln->q_offset;
+  l_offset = aln->l_offset;
+
+  op = p_op = 0;
+  op_cnt = match_cnt = mis_cnt = 0;
+  start_flag = 1;
+
+  v_delta = 0;
+  i0_annot = i1_annot = 0;
+  annot_stack = NULL;
+  s_annot0_arr_p = s_annot1_arr_p = NULL;
+  if (have_ann) {
+    have_push_features = 0;
+    if (annot0_p || annot1_p) annot_stack = init_stack(64,64);
+
+    if (annot1_p && annot1_p->n_annot > 0) {
+      s_annot1_arr_p = annot1_p->s_annot_arr_p;
+      while (i1_annot < annot1_p->n_annot) {
+	if (s_annot1_arr_p[i1_annot]->pos >= i1 + l_offset) {break;}
+	if (s_annot1_arr_p[i1_annot]->end < i1 + l_offset) {i1_annot++; continue;}
+
+	if (s_annot1_arr_p[i1_annot]->label == '-') {
+	  process_annot_match(&itmp, aa0_pam2_p, l_offset+seq_pos(i1,aln->llrev,0), q_offset + seq_pos(i0,aln->qlrev,0),
+			      &sp1, NULL, sq, s_annot1_arr_p[i1_annot], NULL, 
+			      annot_stack, &have_push_features, &v_delta,
+			      &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end, 0);
+	}
+	i1_annot++;
+      }
+    }
+
+    if (annot0_p && annot0_p->n_annot > 0) {
+      s_annot0_arr_p = annot0_p->s_annot_arr_p;
+      while (i0_annot < annot0_p->n_annot && s_annot0_arr_p[i0_annot]->pos < i0+q_offset) {
+	if (s_annot0_arr_p[i0_annot]->pos >= i0 + q_offset) {break;}
+	if (s_annot0_arr_p[i0_annot]->end < i0 + q_offset) {i0_annot++; continue;}
+
+	if (s_annot0_arr_p[i0_annot]->label == '-') {
+	  process_annot_match(&itmp, aa0_pam2_p, q_offset+seq_pos(i0,aln->qlrev,0), l_offset + seq_pos(i1,aln->llrev,0),
+			      &sp0, NULL, sq, s_annot0_arr_p[i0_annot], NULL, 
+			      annot_stack, &have_push_features, &v_delta,
+			      &d0_score, &d0_ident, &d0_alen, &left_domain_list0, &i0_left_end, 0);
+	}
+	i0_annot++;
+      }
+    }
+  }
+
+  while (i0 < a_res->max0 || i1 < a_res->max1) {
+    /* match/mismatch (aligned residues */
+    /* here, op is the "current" encoding, and *rp is the next one */
+
+    /* there is an op==0 for every aligned/matched residue.
+       insertions in aa0 > 0,
+       deletions < 0
+
+       for enhanced CIGAR, need run lengths for identities, mismatches
+     */
+
+    if (ppst->pam_pssm) {aa0_pam2_p = ppst->pam2p[0][i0];}
+    else {aa0_pam2_p = ppst->pam2[0][aa0[i0]];}
+
+    if (op == 0 && *rp == 0) {
+      aa0c = aa0[i0];
+      aa1c = aa1p[i1];
+      itmp = aa0_pam2_p[aa1c];
+      sp0 = sq[aa0c];
+      sp1 = sq[aa1c];
+
+      /* variant annot1_p annotations can cause substitution */
+      if (s_annot1_arr_p) {
+	if (i1+l_offset == s_annot1_arr_p[i1_annot]->pos || i1+l_offset == i1_left_end) {
+
+	  i1_annot = next_annot_match(&itmp, aa0_pam2_p, l_offset+seq_pos(i1,aln->llrev,0),
+				      q_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+				      i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+				      NULL, annot_stack, &have_push_features, &v_delta,
+				      &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end, 0);
+
+	  if (sq[aa1c] != sp1) {
+	    t_spa = align_type(itmp, sp0, sp1, ppst->nt_align, NULL, ppst->pam_x_id_sim);
+	    comment_var(q_offset+seq_pos(i0,aln->qlrev,0), sp0,
+			l_offset+seq_pos(i1,aln->llrev,0), sp1,
+			sq[aa1c], sim_sym[t_spa], NULL,
+			ann_code_dyn, 1, annot_fmt);
+	  }
+	}
+	d1_score += itmp;
+      }
+
+      if (s_annot0_arr_p) {
+	if (i0+q_offset == s_annot0_arr_p[i0_annot]->pos || i0+q_offset == i0_left_end) {
+
+	  i0_annot = next_annot_match(&itmp, aa0_pam2_p, q_offset+seq_pos(i0,aln->qlrev,0),
+				      l_offset+seq_pos(i1,aln->llrev,0), &sp0, NULL, sq,
+				      i0_annot, annot0_p->n_annot, s_annot0_arr_p,
+				      NULL, annot_stack, &have_push_features, &v_delta,
+				      &d0_score, &d0_ident, &d0_alen, &left_domain_list0, &i0_left_end, 0);
+
+	  /* check for sequence change from variant */
+	  if (sq[aa0c] != sp0) {
+	    t_spa = align_type(itmp, sp0, sp1, ppst->nt_align, NULL, ppst->pam_x_id_sim);
+	    comment_var(q_offset+seq_pos(i0,aln->qlrev,0), sp0,
+			l_offset+seq_pos(i1,aln->llrev,0), sp1,
+			sq[aa0c], sim_sym[t_spa], NULL,
+			ann_code_dyn, 0, annot_fmt);
+	  }
+	}
+	d0_score += itmp;
+      }
+
+      d0_alen++;
+      d1_alen++;
+      if ((sim_code == align_type(itmp, sp0, sp1, ppst->nt_align, aln, ppst->pam_x_id_sim)) == M_IDENT) {
+	d0_ident++;
+	d1_ident++;
+      }
+
+      update_code(al_str, al_str_n-strlen(al_str), update_data_p, op, sim_code, sp0, sp1);
+
+      /* update op to the next encoded position */
+      op = *rp++;
+      lenc++;
+      
+      /* check for an annotation */
+      if (have_ann) {
+	ann_ch0 = ann_ch1 = '\0';
+	/* conventional annotations */
+	if (have_ann == 2 && (ann_arr[aa0a[i0]] != ' ' || ann_arr[aa1a[i1]] != ' ')) {
+	  ann_ch0 = ann_arr[aa0a[i0]];
+	  if (ann_ch0 == ' ') ann_ch0 = 'X';
+	  ann_ch1 = ann_arr[aa1a[i1]];
+	  if (ann_ch1 == ' ') ann_ch1 = 'X';
+	}
+	else if (aa0a != NULL && ann_arr[aa0a[i0]]!=' ') {
+	  ann_ch0 = ann_arr[aa0a[i0]];
+	  ann_ch1 = 'X';
+	}
+	else if (aa1a != NULL && ann_arr[aa1a[i1]]!=' ') {
+	  ann_ch0 = 'X';
+	  ann_ch1 = ann_arr[aa1a[i1]];
+	}
+
+	/* ann_ch0 only works below because ann_ch0=='X' if ann_ch1 */
+	if ( ann_ch0 && !(ann_ch1 == '['  || ann_ch1 == ']' || ann_ch0 == '[' || ann_ch0 == ']')) {
+	  sprintf(tmp_astr, "|%c%c:%ld%c%c%ld%c",
+		  ann_ch0,ann_ch1, q_offset+seq_pos(i0,aln->qlrev,0)+1,sp0,
+		  sim_sym[sim_code], l_offset+seq_pos(i1,aln->llrev,0)+1,sp1);
+	  /* SAFE_STRNCAT(ann_code_s, tmp_astr, n_ann_code_s); */
+	  dyn_strcat(ann_code_dyn, tmp_astr);
+	}
+	  
+	if ((s_annot1_arr_p || s_annot0_arr_p) && have_push_features) {
+	  display_push_features(annot_stack, ann_code_dyn,
+				q_offset+seq_pos(i0,aln->qlrev,0), sp0,
+				l_offset+seq_pos(i1,aln->llrev,0), sp1,
+				sim_sym[sim_code], 
+				a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+	  have_push_features = 0;
+	}
+      }
+      i0++; i1++;
+    }
+    else {	/* not in match run, in a gap */
+      if (op == 0) {
+	/* at a transition from match (previous) to indel (current) */
+	d1_score += ppst->gdelval;
+	d0_score += ppst->gdelval;
+	op = *rp++;
+      }
+      d1_score += ppst->ggapval; d1_alen++;
+      d0_score += ppst->ggapval; d0_alen++;
+
+      if (op > 0) {
+	update_code(al_str, al_str_n-strlen(al_str), update_data_p, 2, sim_code,'-','-');
+
+	if (s_annot1_arr_p) {
+	  if (i1+l_offset == s_annot1_arr_p[i1_annot]->pos || i1+l_offset == i1_left_end) {
+	    i1_annot = next_annot_match(&itmp, aa0_pam2_p, l_offset+seq_pos(i1,aln->llrev,0),
+					q_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq, 
+					i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+					NULL, annot_stack, &have_push_features, &v_delta,
+					&d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end, 0);
+
+	    if (have_push_features) {
+	      display_push_features(annot_stack, ann_code_dyn,
+				    q_offset+seq_pos(i0,aln->qlrev,0), sp0,
+				    l_offset+seq_pos(i1,aln->llrev,0), sp1,
+				    sim_sym[sim_code],
+				    a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+	      have_push_features = 0;
+	    }
+	  }
+	}
+	op--; lenc++; i1++; aln->ngap_q++;
+      }
+      else {  /* (op < 0) */
+
+	update_code(al_str, al_str_n-strlen(al_str), update_data_p, 1, sim_code,'-','-');
+
+	if (s_annot0_arr_p) {
+	  if (i0+q_offset == s_annot0_arr_p[i0_annot]->pos || i0+q_offset == i0_left_end) {
+
+	    i0_annot = next_annot_match(&itmp, aa0_pam2_p, q_offset+seq_pos(i0,aln->qlrev,0),
+					l_offset+seq_pos(i1,aln->llrev,0), &sp0, NULL, sq, 
+					i0_annot, annot0_p->n_annot, s_annot0_arr_p,
+					NULL, annot_stack, &have_push_features, &v_delta,
+					&d0_score, &d0_ident, &d0_alen, &left_domain_list0, &i0_left_end, 0);
+
+	    if (have_push_features) {
+	      display_push_features(annot_stack, ann_code_dyn,
+				    q_offset+seq_pos(i0,aln->qlrev,0), sp0,
+				    l_offset+seq_pos(i1,aln->llrev,0), sp1,
+				    sim_sym[sim_code],
+				    a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+	      have_push_features = 0;
+	    }
+	  }
+	}
+	op++; lenc++; i0++; aln->ngap_l++;
+      }
+    }
+  }
+
+  /* all done, clean things up */
+
+  close_update_data(al_str, al_str_n-strlen(al_str), update_data_p);
+
+  if (have_ann) {
+    have_push_features = 0;
+    /* also check for regions after alignment */
+
+    if (s_annot1_arr_p && i1_left_end > 0) {
+      close_annot_match(-1, annot_stack, &have_push_features,
+			&d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,
+			0);
+    }
+    if (s_annot0_arr_p && i0_left_end > 0) {
+      close_annot_match(-1, annot_stack, &have_push_features,
+			&d0_score, &d0_ident, &d0_alen, &left_domain_list0, &i0_left_end,
+			0);
+    }
+
+    if (have_push_features) {
+      display_push_features(annot_stack, ann_code_dyn,
+			    q_offset+a_res->max0-1, sp0,
+			    l_offset+a_res->max1-1, sp1,
+			    sim_sym[sim_code],
+			    a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+    }
+  }
+
+  if (annot0_p || annot1_p) free_stack(annot_stack);
+
+  *score_delta = v_delta;
+  return lenc;
+}
+
+int calc_id(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    struct a_struct *aln, 
+	    struct a_res_str *a_res,
+	    struct pstruct *ppst,
+	    const struct annot_str *annot0_p,
+	    const struct annot_str *annot1_p,
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+	    struct f_struct *f_str)
+{
+  int i0, i1, nn1;
+  int op, lenc;
+  char sp0, sp1;
+  char tmp_str[MAX_SSTR];
+  const unsigned char *aa1p;
+  int *rp;
+  unsigned char *sq;
+
+  /* variables for variant changes */
+  int *aa0_pam2_p;
+  struct annot_entry **s_annot0_arr_p;
+  struct annot_entry **s_annot1_arr_p;
+  int  itmp, i0_annot, i1_annot, v_delta, v_tmp;
+  long q_offset, l_offset;
+  long i0_left_end, i1_left_end;
+  int d1_score, d1_ident, d1_alen;
+  int d0_score, d0_ident, d0_alen;
+  struct domfeat_link *left_domain_list1, *left_domain_list0;
+  struct domfeat_link *this_dom, *next_dom;
+
+  left_domain_list1 = left_domain_list0 = NULL;
+  
+  *score_delta = 0;
+  i0_left_end = i1_left_end = -1;
+  left_domain_list0 = left_domain_list1 = NULL;
+  
+  NULL_dyn_string(annot_var_dyn);
+
+  if (ppst->ext_sq_set) { sq = ppst->sqx; }
+  else { sq = ppst->sq; }
+
+#ifndef TFASTA
+  aa1p = aa1;
+  nn1 = n1;
+#else
+  aa1p = f_str->aa1x;
+  nn1 = f_str->n10;
+#endif
+
+  aln->amin0 = a_res->min0;
+  aln->amax0 = a_res->max0;
+  aln->amin1 = a_res->min1;
+  aln->amax1 = a_res->max1;
+  aln->calc_last_set = 1;
+
+  q_offset = aln->q_offset;
+  l_offset = aln->l_offset;
+
+  rp = a_res->res;
+  lenc = aln->nident = aln->nmismatch = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
+  i0 = a_res->min0;
+  i1 = a_res->min1;
+
+  v_delta = 0;
+  i0_annot = i1_annot = 0;
+
+  d1_score = d1_ident = d1_alen = 0;
+  d0_score = d0_ident = d0_alen = 0;
+
+  if (annot1_p && annot1_p->n_annot > 0) s_annot1_arr_p = annot1_p->s_annot_arr_p;
+  else s_annot1_arr_p = NULL;
+  if (annot0_p && annot0_p->n_annot > 0) s_annot0_arr_p = annot0_p->s_annot_arr_p;
+  else s_annot0_arr_p = NULL;
+
+  while (i0 < a_res->max0 || i1 < a_res->max1) {
+
+    if (ppst->pam_pssm) {
+      aa0_pam2_p = ppst->pam2p[0][i0];
+    }
+    else {
+      aa0_pam2_p = ppst->pam2[0][aa0[i0]];
+    }
+
+    if (op == 0 && *rp == 0) {
+      /* op==0 -> we are in a match run, and current code is a match */
+      op = *rp++;
+      lenc++;
+
+      itmp = ppst->pam2[0][aa0[i0]][aa1p[i1]];
+      sp0 = sq[aa0[i0]];
+      sp1 = sq[aa1p[i1]];
+
+      if (s_annot1_arr_p && (i1 + l_offset == s_annot1_arr_p[i1_annot]->pos || i1+l_offset == i1_left_end)) {
+	i1_annot = next_annot_match(&itmp, aa0_pam2_p, l_offset+seq_pos(i1,aln->llrev,0),
+				    q_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+				    i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+				    NULL, NULL, NULL, &v_delta, 
+				    &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end, itmp);
+
+	/* must be out of the loop to capture the last value */
+	if (sq[aa1p[i1]] != sp1) {
+	  sprintf(tmp_str,"%c%d%c;",sq[aa1p[i1]],i1+1,sp1);
+	  /*   SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+	  dyn_strcat(annot_var_dyn, tmp_str);
+	}
+	d1_score += itmp;
+      }
+
+      if (s_annot0_arr_p && (i0 + q_offset == s_annot0_arr_p[i0_annot]->pos || i0+q_offset == i0_left_end)) {
+	i0_annot = next_annot_match(&itmp, aa0_pam2_p, q_offset+seq_pos(i0,aln->qlrev,0),
+				    l_offset+seq_pos(i1,aln->llrev,0), &sp0, NULL, sq,
+				    i0_annot, annot0_p->n_annot, s_annot0_arr_p,
+				    NULL, NULL, NULL, &v_delta, 
+				    &d0_score, &d0_ident, &d0_alen, &left_domain_list0, &i0_left_end, itmp);
+
+
+
+	if (sq[aa0[i0]] != sp0) {
+	  sprintf(tmp_str,"q%c%d%c;",sq[aa0[i0]],i0+1,sp0);
+	  /*  SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+	  dyn_strcat(annot_var_dyn, tmp_str);
+	}
+	d0_score += itmp;
+      }
+
+      /* updates nident, nsim, npos */
+      d0_alen++;
+      d1_alen++;
+      if (align_type(itmp, sp0, sp1, ppst->nt_align, aln, ppst->pam_x_id_sim) == M_IDENT) {
+	d0_ident++;
+	d1_ident++;
+      }
+
+      i0++; i1++;
+    }
+    else {
+      if (op==0) op = *rp++;
+      if (op>0) {	/* inserts in seq0 */
+	op--; lenc++; i1++; aln->ngap_q++;
+      }
+      else {		/* inserts in seq 1 */
+	op++; lenc++; i0++; aln->ngap_l++;
+      }
+    }
+  }
+  *score_delta = v_delta;
+
+  return lenc;
+}
diff --git a/src/cal_cons2.c b/src/cal_cons2.c
new file mode 100644
index 0000000..ba86330
--- /dev/null
+++ b/src/cal_cons2.c
@@ -0,0 +1,1164 @@
+/* cal_cons.c - routines for printing translated alignments for
+   fasta, ssearch, ggsearch, glsearch  */
+
+/* $Id: cal_cons.c 1280 2014-08-21 00:47:55Z wrp $ */
+
+/* copyright (c) 1998, 1999, 2007, 2014 by William R. Pearson and The
+   Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* removed from dropgsw2.c, dropnfa.c April, 2007 */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "defs.h"
+#include "param.h"
+#include "dyn_string.h"
+
+#if defined(FASTA) || defined(TFASTA)
+#include "dropnfa.h"
+#endif
+
+#if defined(SSEARCH) || defined(OSEARCH)
+#include "dropgsw2.h"
+#endif
+
+#ifdef LALIGN
+#include "dropgsw2.h"
+#endif
+
+#include "a_mark.h"
+
+struct update_code_str {
+  int p_op_idx;
+  int p_op_cnt;
+  int btop_enc;
+  int show_code;
+  int cigar_order;
+  int show_ext;
+  char *op_map;
+};
+
+static char *ori_code = "=-+*x";
+static char *cigar_code = "MDIMX";
+
+static struct update_code_str *
+init_update_data(int show_code);
+
+static void
+sprintf_code(char *tmp_str, struct update_code_str *, int op_idx, int op_cnt);
+
+static void 
+update_code(struct dyn_string_str *align_code_dyn,
+	    struct update_code_str *update_data, int op, 
+	    int sim_code, unsigned char sp0, unsigned char sp1);
+
+static void
+close_update_data(struct dyn_string_str *align_code_dyn,
+		  struct update_code_str *update_data);
+
+extern void aancpy(char *to, char *from, int count, struct pstruct *ppst);
+extern void *init_stack(int, int);
+extern void push_stack(void *, void *);
+extern void *pop_stack(void *);
+extern void *free_stack(void *);
+extern struct domfeat_data * init_domfeat_data(const struct annot_str *annot_p);
+
+/* returns M_NEG, M_ZERO, M_POS, M_IDENT, M_DEL (a_mark.h)
+   updates *aln->nsim, npos, nident, nmismatch */
+extern int
+align_type(int score, char sp0, char sp1, int nt_align, struct a_struct *aln, int pam_x_id_sim);
+
+extern void	/* in compacc2e.c */
+process_annot_match(int *itmp, int *pam2aa0v, 
+		    long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+		    struct annot_entry *annot_arr_p, int n_domains, char **ann_comment,
+		    void *annot_stack, int *have_push_features_p, int *v_delta,
+		    int *d_score_p, int *d_ident_p, int *d_alen_p, int *d_gaplen_p,
+		    struct domfeat_data **left_domain_head_p,
+		    struct domfeat_data *left_domain_p,
+		    long *left_end_p, int init_score);
+
+extern int	/* in compacc2e.c */
+next_annot_match(int *itmp, int *pam2aa0v, 
+		 long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+		 int i_annot, int n_annot, struct annot_entry **annot_arr, char **ann_comment,
+		 void *annot_stack, int *have_push_features_p, int *v_delta,
+		 int *d_score_p, int *d_ident_p, int *d_alen_p, int *d_gaplen_p,
+		  struct domfeat_data **left_domain_head_p, struct domfeat_data *left_domain_p,
+		 long *left_domain_end, int init_score);
+
+extern void	/* in compacc2e.c */
+close_annot_match (int ia, void *annot_stack, int *have_push_features_p,
+		   int *d_score_p, int *d_ident_p, int *d_alen_p, int *d_gaplen_p,
+		   struct domfeat_data **left_domain_p,
+		   long *left_end_p, int init_score);
+
+extern void	/* in compacc2e.c */
+comment_var(long i0, char sp0, long i1, char sp1, char o_sp1, char sim_char,
+	    const char *ann_comment, struct dyn_string_str *annot_var_dyn,
+	    int target, int d_type);
+
+extern void		/* in compacc2e.c */
+display_push_features(void *annot_stack, struct dyn_string_str *annot_var_dyn,
+		      long i0_pos, char sp0, long i1_pos, char sp1, char sym, 
+		      int score, double comp, int sw_score, int n0, int n1,
+		      void *pstat_void, int d_type);
+
+#define DP_FULL_FMT 1	/* Region: score: bits: id: ... */
+
+extern int seq_pos(int pos, int rev, int off);
+
+/* values of calc_func_mode */
+#define CALC_CONS 1
+#define CALC_CODE 2
+#define CALC_ID   3
+#define CALC_ID_DOM   4
+
+int
+pre_fill_cons(const unsigned char *aa0, const unsigned char *aa1p,
+	      struct a_res_str *a_res,
+	      struct pstruct *ppst,
+	      struct a_struct *aln,
+	      char *seqc0, char *seqc1, char *seqca, int *smins,
+	      char *seqc0a, char *seqc1a) {
+  int mins;
+
+  /* will we show all the start ?*/
+  if (min(a_res->min0,a_res->min1)<aln->llen || aln->showall==1)
+    if (a_res->min0 >= a_res->min1) {              /* aa0 extends more to left */
+      *smins=0;
+      if (aln->showall==1) mins = a_res->min0;
+      else mins = min(a_res->min0,aln->llcntx);
+      aancpy(seqc0,(char *)aa0+a_res->min0-mins,mins,ppst);
+      aln->smin0 = a_res->min0-mins;
+      if ((mins-a_res->min1)>0) {
+	memset(seqc1,' ',mins-a_res->min1);
+	aancpy(seqc1+mins-a_res->min1,(char *)aa1p,a_res->min1,ppst);
+	aln->smin1 = 0;
+      }
+      else {
+	aancpy(seqc1,(char *)aa1p+a_res->min1-mins,mins,ppst);
+	aln->smin1 = a_res->min1-mins;
+      }
+    }
+    else {
+      *smins=0;
+      if (aln->showall == 1) mins=a_res->min1;
+      else mins = min(a_res->min1,aln->llcntx);
+      aancpy(seqc1,(char *)(aa1p+a_res->min1-mins),mins,ppst);
+      aln->smin1 = a_res->min1-mins;
+      if ((mins-a_res->min0)>0) {
+	memset(seqc0,' ',mins-a_res->min0);
+	aancpy(seqc0+mins-a_res->min0,(char *)aa0,a_res->min0,ppst);
+	aln->smin0 = 0;
+      }
+      else {
+	aancpy(seqc0,(char *)aa0+a_res->min0-mins,mins,ppst);
+	aln->smin0 = a_res->min0-mins;
+      }
+    }
+  else {
+    mins= min(aln->llcntx,min(a_res->min0,a_res->min1));
+    *smins=mins;
+    aln->smin0=a_res->min0 - *smins;
+    aln->smin1=a_res->min1 - *smins;
+    aancpy(seqc0,(char *)aa0+a_res->min0-mins,mins,ppst);
+    aancpy(seqc1,(char *)aa1p+a_res->min1-mins,mins,ppst);
+  }
+  /* set the alignment code to zero for context */
+  memset(seqca,0,mins);
+  if (seqc0a) {
+    memset(seqc0a,' ',mins);
+    memset(seqc1a,' ',mins);
+  }
+  return mins;
+}
+
+int 
+post_fill_cons(const unsigned char *aa0, int n0,
+	       const unsigned char *aa1p, int nn1,
+	       struct a_res_str *a_res,
+	       struct pstruct *ppst,
+	       int mins, int lenc,
+	       struct a_struct *aln,
+	       char *seqc0, char *seqc1,
+	       char *seqc0a, char *seqc1a) {
+
+  int ns, nd, itmp;
+
+  /*	now we have the middle, get the right end */
+  if (!aln->llcntx_set) {
+    ns = mins + lenc + aln->llen;	/* show an extra line? */
+    ns -= (itmp = ns %aln->llen);	/* itmp = left over on last line */
+    if (itmp>aln->llen/2) ns += aln->llen;  /* more than 1/2 , use another*/
+    nd = ns - (mins+lenc);		/* this much extra */
+  }
+  else nd = aln->llcntx;
+
+  if (nd > max(n0-a_res->max0,nn1-a_res->max1)) 
+    nd = max(n0-a_res->max0,nn1-a_res->max1);
+  
+  if (aln->showall==1) {
+    nd = max(n0-a_res->max0,nn1-a_res->max1);	/* reset for showall=1 */
+    /* get right end */
+    aancpy(seqc0+mins+lenc,(char *)aa0+a_res->max0,n0-a_res->max0,ppst);
+    aancpy(seqc1+mins+lenc,(char *)aa1p+a_res->max1,nn1-a_res->max1,ppst);
+    /* fill with blanks - this is required to use one 'nc' */
+    memset(seqc0+mins+lenc+n0-a_res->max0,' ',nd-(n0-a_res->max0));
+    memset(seqc1+mins+lenc+nn1-a_res->max1,' ',nd-(nn1-a_res->max1));
+  }
+  else {
+    if ((nd-(n0-a_res->max0))>0) {
+      aancpy(seqc0+mins+lenc,(char *)aa0+a_res->max0,(n0-a_res->max0),ppst);
+      memset(seqc0+mins+lenc+n0-a_res->max0,' ',nd-(n0-a_res->max0));
+    }
+    else {
+      aancpy(seqc0+mins+lenc,(char *)aa0+a_res->max0,nd,ppst);
+    }
+
+    if ((nd-(nn1-a_res->max1))>0) {
+      aancpy(seqc1+mins+lenc,(char *)aa1p+a_res->max1,nn1-a_res->max1,ppst);
+      memset(seqc1+mins+lenc+nn1-a_res->max1,' ',nd-(nn1-a_res->max1));
+    }
+    else {
+      aancpy(seqc1+mins+lenc,(char *)aa1p+a_res->max1,nd,ppst);
+    }
+  }
+  if (seqc0a) {
+    memset(seqc0a+mins+lenc,' ',nd);
+    memset(seqc1a+mins+lenc,' ',nd);
+    /*
+      ntmp = nd-(n0-a_res->max0);
+      if (ntmp > 0) memset(seqc0a+mins+lenc+n0-a_res->max0,' ',ntmp);
+      ntmp = nd-(nn1-a_res->max1);
+      if (ntmp > 0) memset(seqc1a+mins+lenc+nn1-a_res->max1,' ',ntmp);
+    */
+  }
+  return nd;
+}
+
+/* add_annot_code: adds annotation codes to struct dyn_string_str ann_code_dyn */
+void
+add_annot_code(int have_ann, char sp0, char sp1, 
+	       char ann_aa0_i0, char ann_aa1_i1,
+	       long q_off_pos, long l_off_pos, char sim_sym_code,
+	       struct dyn_string_str *ann_code_dyn)
+{
+  char ann_ch0, ann_ch1;
+  char tmp_astr[MAX_STR];
+
+  ann_ch0 = ann_ch1 = '\0';
+
+  /* conventional annotations */
+  if (have_ann == 3 && (ann_aa0_i0 != ' ' || ann_aa1_i1 != ' ')) {
+    ann_ch0 = ann_aa0_i0;
+    if (ann_ch0 == ' ') ann_ch0 = 'X';
+    ann_ch1 = ann_aa1_i1;
+    if (ann_ch1 == ' ') ann_ch1 = 'X';
+  }
+  else if ((have_ann&2)==0 && ann_aa0_i0 != ' ') {
+    ann_ch0 = ann_aa0_i0;
+    ann_ch1 = 'X';
+  }
+  else if ((have_ann&1)==0 && ann_aa1_i1 != ' ') {
+    ann_ch0 = 'X';
+    ann_ch1 = ann_aa1_i1;
+  }
+
+  /* ann_ch0 only works below because ann_ch0=='X' if ann_ch1 */
+  /*
+  if ( ann_ch0 && !(ann_ch1 == '['  || ann_ch1 == ']' || ann_ch0 == '[' || ann_ch0 == ']')) {
+    sprintf(tmp_astr, "|%c%c:%ld%c%c%ld%c",
+	    ann_ch0,ann_ch1, q_offset+seq_pos(i0,aln->qlrev,0)+1,sp0,
+	    sim_sym[sim_code], l_offset+seq_pos(i1,aln->llrev,0)+1,sp1);
+  */
+  if ( ann_ch0 && !(ann_ch1 == '['  || ann_ch1 == ']' || ann_ch0 == '[' || ann_ch0 == ']')) {
+    sprintf(tmp_astr, "|%c%c:%ld%c%c%ld%c",
+	    ann_ch0,ann_ch1, q_off_pos+1,sp0,
+	    sim_sym_code, l_off_pos+1,sp1);
+    dyn_strcat(ann_code_dyn, tmp_astr);
+  }
+}
+
+/* calc_cons_u - combines calc_cons_a/calc_code/ calc_id */
+int
+calc_cons_u( /* inputs */
+	    const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    struct a_res_str *a_res,	/* alignment encoding */
+	    struct pstruct *ppst,
+	    struct f_struct *f_str,
+	    void *pstat_void,
+	    /* annotation stuff */
+	    const unsigned char *ann_arr,
+	    const unsigned char *aa0a, const struct annot_str *annot0_p,
+	    const unsigned char *aa1a, const struct annot_str *annot1_p,
+	    int calc_func_mode, 	/* CALC_CONS, CALC_CODE, CALC_ID */
+	    int display_code, 	/* used only by CALC_CODE */
+	    /* outputs */
+	    int *nc,
+	    char *seqc0, char *seqc1, char *seqca, int *cumm_seq_score,
+	    char *seqc0a, char *seqc1a,
+	    struct a_struct *aln,
+	    int *score_delta, 
+	    struct dyn_string_str *annot_var_dyn,
+	    struct dyn_string_str *align_code_dyn)
+{
+  int i0, i1, nn1;
+  int op, lenc, nd, ns, itmp;
+  const unsigned char *aa1p;
+  char *sp0_p, *sp0a_p, *sp1_p, *sp1a_p, *spa_p, t_spa;
+  char sp0_c, sp1_c, spa_c;	/* used for CALC_ID, CALC_CODE */
+  char sp0a_c, sp1a_c;		/* used for CALC_CODE */
+  char tmp_str[MAX_SSTR];
+  int *i_spa;
+  const unsigned char *sq;
+  int *rp;
+  int smins, mins, ntmp;
+  int have_ann;
+  void *annot_stack = NULL;
+  struct update_code_str *update_data_p;
+
+  /* variables for variant changes */
+  int *aa0_pam2_p;
+  char *sim_sym = aln_map_sym[5];
+  struct annot_entry **s_annot0_arr_p, **s_annot1_arr_p;
+
+  char *ann_comment;
+  int i0_annot, i1_annot;	/* i0_annot, i1_annot, count through
+				   the list of annotations */
+  long i0_left_end, i1_left_end; /* left-most coordinate of domain end */
+
+  int show_code, annot_fmt, start_flag;
+
+  int v_delta, v_tmp;
+  int d1_score, d1_ident, d1_alen, d1_gaplen;
+  int d0_score, d0_ident, d0_alen, d0_gaplen;
+  int have_push_features;
+  int *have_push_features_p;
+
+  /* struct domfeat_data is used to capture score, coordinate, and
+     identity information for possibly overlapping sub-alignment
+     scores -- each domfeat_data entry is associated with an
+     annot_p->annot_arr_p entry */
+  struct domfeat_data *left_domain_head1, *left_domain_head0;
+  struct domfeat_data *left_domain_list1, *left_domain_list0;
+
+  /* variables for handling coordinate offsets */
+  long q_offset, l_offset;
+  long i0_off, i1_off;
+
+  if (ppst->ext_sq_set) { sq = ppst->sqx;}
+  else {sq = ppst->sq;}
+
+#ifndef TFASTA
+  aa1p = aa1;
+  nn1 = n1;
+#else
+  aa1p = f_str->aa1x;
+  nn1 = f_str->n10;
+#endif
+
+  aln->amin0 = a_res->min0;
+  aln->amax0 = a_res->max0;
+  aln->amin1 = a_res->min1;
+  aln->amax1 = a_res->max1;
+  aln->calc_last_set = 1;
+
+  q_offset = aln->q_offset;
+  l_offset = aln->l_offset;
+
+#ifndef LCAL_CONS	/* use for local context */
+  if (calc_func_mode == CALC_CONS) {
+    mins = pre_fill_cons(aa0, aa1p, a_res,ppst,  aln, seqc0, seqc1, seqca, &smins,
+			 seqc0a, seqc1a);
+  }
+#else		/* no flanking context */
+  smins = mins = 0;
+  aln->smin0=a_res->min0;
+  aln->smin1=a_res->min1;
+#endif
+
+  /* now get the middle */
+  have_ann = 0;	/* default no annotation */
+  left_domain_head0 = left_domain_head1 = NULL;
+  left_domain_list0 = left_domain_list1 = NULL;
+
+  /* have_ann encodes which sequences are annotated */
+  if ((annot0_p && annot0_p->n_annot > 0) || (aa0a != NULL)) { have_ann |= 1;}
+  if ((annot1_p && annot1_p->n_annot > 0) || (aa1a != NULL)) { have_ann |= 2;}
+
+  if (calc_func_mode == CALC_CONS) {
+    spa_p = seqca+mins;	/* pointer to alignment symbol */
+    if (cumm_seq_score) i_spa = cumm_seq_score+mins;	/* set index for cumm_seq_score */
+    sp0_p = seqc0+mins;
+    sp1_p = seqc1+mins;
+    /*     have_ann = (seqc0a != NULL);  */
+    annot_fmt = DP_FULL_FMT;
+  }
+  else if (calc_func_mode == CALC_ID || calc_func_mode == CALC_ID_DOM) {
+    spa_p = &spa_c;
+    sp0_p = &sp0_c;
+    sp1_p = &sp1_c;
+    /* does not require aa0a/aa1a, only for variants */
+    /*     have_ann = ((annot1_p && annot1_p->n_annot > 0) || (annot0_p && annot0_p->n_annot > 0)); */
+    annot_fmt = 3;
+  }
+  else if (calc_func_mode == CALC_CODE) {
+    spa_p = &spa_c;
+    sp0_p = &sp0_c;
+    sp1_p = &sp1_c;
+
+    show_code = (display_code & (SHOW_CODE_MASK+SHOW_CODE_EXT)); /* see defs.h; SHOW_CODE_ALIGN=4,_CIGAR=8,_CIGAR_EXT=24, _BTOP_EXT=16 */
+    annot_fmt = 2;
+    if (display_code & SHOW_ANNOT_FULL) {
+      annot_fmt = 1;
+    }
+
+    update_data_p = init_update_data(show_code);
+  }
+  else {
+    fprintf(stderr,"*** error [%s:%d] --- cal_cons_u() invalid calc_func_mode: %d\n",
+	    __FILE__, __LINE__, calc_func_mode);
+    exit(1);
+  }
+
+  have_push_features=0;
+
+  if (have_ann) {  /* initialize annotation variables */
+    if (calc_func_mode == CALC_CONS) {
+      sp0a_p = seqc0a+mins;
+      sp1a_p = seqc1a+mins;
+      annot_stack = init_stack(64,64);
+      have_push_features_p = &have_push_features;
+    }
+    else if (calc_func_mode == CALC_ID || calc_func_mode == CALC_ID_DOM) {
+      sp0a_p = &sp0a_c;
+      sp1a_p = &sp1a_c;
+      have_push_features_p = &have_push_features;
+      /*      ann_comment = NULL; */
+      annot_stack = init_stack(64,64);
+    }
+    else if (calc_func_mode == CALC_CODE) {
+      annot_stack = init_stack(64,64);
+      sp0a_p = &sp0a_c;
+      sp1a_p = &sp1a_c;
+      have_push_features_p = &have_push_features;
+    }
+
+    *score_delta = 0;
+    i0_left_end = i1_left_end = -1;
+    NULL_dyn_string(annot_var_dyn);
+  }
+  /* always initialize, updated with no annotations */
+  d1_score = d1_ident = d1_alen = d1_gaplen = 0;
+  d0_score = d0_ident = d0_alen = d0_gaplen = 0;
+
+  lenc = aln->nident = aln->nmismatch =
+    aln->npos = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
+
+  i0 = a_res->min0;	/* start in aa0[] */
+  i1 = a_res->min1;	/* start in aa1[] */
+
+  /* handle region annotations outside alignment */
+  v_delta = 0;
+  i0_annot = i1_annot = 0;
+  s_annot0_arr_p = s_annot1_arr_p = NULL;
+  if (have_ann) {
+    i1_off = seq_pos(i1, aln->llrev,0) + l_offset;
+    i0_off = seq_pos(i0, aln->qlrev,0) + q_offset;
+
+    if (annot1_p && annot1_p->n_annot > 0) {
+
+	left_domain_list1 = init_domfeat_data(annot1_p);
+	s_annot1_arr_p = annot1_p->s_annot_arr_p;
+
+	while (i1_annot < annot1_p->n_annot) {
+	  if (s_annot1_arr_p[i1_annot]->pos >= i1_off) {break;}
+	  if (s_annot1_arr_p[i1_annot]->end <= i1_off) {i1_annot++; continue;}
+
+	  if (s_annot1_arr_p[i1_annot]->label == '-') {
+	    process_annot_match(&itmp, aa0_pam2_p, i1_off, i0_off,
+				sp1_p, sp1a_p, sq, s_annot1_arr_p[i1_annot],  annot1_p->n_domains, &ann_comment, 
+				annot_stack, have_push_features_p, &v_delta,
+				&d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+				&left_domain_head1,
+				&left_domain_list1[i1_annot], &i1_left_end, 0);
+	  }
+	  i1_annot++;
+	}
+    }
+
+    /* do not need have_ann here, because domain only */
+    if (annot0_p && annot0_p->n_annot>0) {
+
+      if (calc_func_mode == CALC_CONS || calc_func_mode == CALC_CODE) {
+
+	/* inefficient -- the same initiation is done for every
+	  query/subj alignment, even though it is always the same --
+	  should be done in the build_ares() loop */
+	left_domain_list0 = init_domfeat_data(annot0_p);
+	s_annot0_arr_p = annot0_p->s_annot_arr_p;
+
+	while (i0_annot < annot0_p->n_annot) {
+	  if (s_annot0_arr_p[i0_annot]->pos >= i0_off) {break;}
+	  if (s_annot0_arr_p[i0_annot]->end <= i0_off) {i0_annot++; continue;}
+
+	  if (s_annot0_arr_p[i0_annot]->label == '-') {
+	    process_annot_match(&itmp, NULL, i0_off, i1_off,
+				sp0_p, sp0a_p, sq, s_annot0_arr_p[i0_annot], annot0_p->n_domains,  &ann_comment, 
+				annot_stack, have_push_features_p, &v_delta,
+				&d0_score, &d0_ident, &d0_alen, &d0_gaplen,
+				&left_domain_head0,
+				&left_domain_list0[i0_annot], &i0_left_end, 0);
+	  }
+	  i0_annot++;
+	}
+      }
+    }
+  }
+  /* done with domains starting before alignment */
+
+  /* handle alignment encoding */
+  rp = a_res->res;	/* alignment encoding array */
+  while (i0 < a_res->max0 || i1 < a_res->max1) {
+    /* match/mismatch (aligned residues */
+    /* here, op is the "current" encoding, and *rp is the next one */
+    if (op == 0 && *rp == 0) {
+      op = *rp++;
+      lenc++;
+
+      if (ppst->pam_pssm) {aa0_pam2_p = ppst->pam2p[0][i0];}
+      else {aa0_pam2_p = ppst->pam2[0][aa0[i0]];}
+
+      itmp=aa0_pam2_p[aa1p[i1]];
+
+      *sp0_p = sq[aa0[i0]];
+      *sp1_p = sq[aa1p[i1]];
+
+      if (have_ann) {
+	have_push_features = 0;
+	*sp0a_p = *sp1a_p = ' ';
+	if (aa0a) *sp0a_p = ann_arr[aa0a[i0]];
+	if (aa1a) *sp1a_p = ann_arr[aa1a[i1]];
+
+	if (s_annot1_arr_p) {
+	  if (i1+l_offset == s_annot1_arr_p[i1_annot]->pos) {
+
+	    i1_annot = next_annot_match(&itmp, aa0_pam2_p, l_offset+seq_pos(i1,aln->llrev,0),
+					q_offset + seq_pos(i0,aln->qlrev,0), sp1_p, sp1a_p, sq, 
+					i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+					&ann_comment, annot_stack, have_push_features_p, &v_delta,
+					&d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+					&left_domain_head1, left_domain_list1, &i1_left_end, 0);
+
+	    /* must be out of the loop to capture the last value */
+	    if (sq[aa1p[i1]] != *sp1_p) {
+	      t_spa = align_type(itmp, *sp0_p, *sp1_p, ppst->nt_align, NULL, ppst->pam_x_id_sim);
+	      if (calc_func_mode != CALC_ID && calc_func_mode != CALC_ID_DOM) {
+		comment_var(q_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			    l_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+			    sq[aa1p[i1]], sim_sym[t_spa], ann_comment,
+			    annot_var_dyn, 1, annot_fmt);
+	      }
+	      else {
+		sprintf(tmp_str,"%c%d%c;",sq[aa1p[i1]],i1+1,*sp1_p);
+		/*   SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+		dyn_strcat(annot_var_dyn, tmp_str);
+	      }
+	    }
+	  }
+	  d1_score += itmp;
+	}
+
+	if (s_annot0_arr_p) {
+	  if (i0 + q_offset == s_annot0_arr_p[i0_annot]->pos) {
+
+	    i0_annot = next_annot_match(&itmp, aa0_pam2_p, q_offset+seq_pos(i0,aln->qlrev,0),
+					l_offset + seq_pos(i1,aln->llrev,0), sp0_p, sp0a_p, sq, 
+					i0_annot, annot0_p->n_annot, s_annot0_arr_p,
+					&ann_comment, annot_stack, have_push_features_p, &v_delta,
+					&d0_score, &d0_ident, &d0_alen, &d0_gaplen,
+					&left_domain_head0, left_domain_list0, &i0_left_end, 0);
+
+	    /* must be out of the loop to capture the last value */
+	    if (sq[aa0[i0]] != *sp0_p) {
+	      t_spa = align_type(itmp, *sp0_p, *sp1_p, ppst->nt_align, NULL, ppst->pam_x_id_sim);
+	      if (calc_func_mode != CALC_ID && calc_func_mode != CALC_ID_DOM) {
+
+	      comment_var(q_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			  l_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+			  sq[aa0[i0]], sim_sym[t_spa], ann_comment,
+			  annot_var_dyn, 0, annot_fmt);
+	      }
+	      else {
+		sprintf(tmp_str,"q%c%d%c;",sq[aa0[i0]],i0+1,*sp0_p);
+		/*  SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+		dyn_strcat(annot_var_dyn, tmp_str);
+	      }
+	    }
+	  }
+	  d0_score += itmp;
+	}
+	if (calc_func_mode == CALC_CONS) {sp0a_p++; sp1a_p++;}
+      }
+
+      *spa_p = align_type(itmp, *sp0_p, *sp1_p, ppst->nt_align, aln, ppst->pam_x_id_sim);
+
+      d1_alen++;
+      d0_alen++;
+      if (*spa_p == M_IDENT) {
+	d1_ident++;
+	d0_ident++;
+      }
+
+      if (s_annot1_arr_p && (i1 + l_offset == i1_left_end)) {
+	i1_annot = next_annot_match(&itmp, aa0_pam2_p, l_offset+seq_pos(i1,aln->llrev,0),
+				    q_offset + seq_pos(i0,aln->qlrev,0), sp1_p, sp1a_p, sq, 
+				    i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+				    &ann_comment, annot_stack, have_push_features_p, &v_delta,
+				    &d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+				    &left_domain_head1, left_domain_list1, &i1_left_end, 0);
+      }
+
+      if (s_annot0_arr_p && (i0 + q_offset == i0_left_end)) {
+	i0_annot = next_annot_match(&itmp, aa0_pam2_p, q_offset+seq_pos(i0,aln->qlrev,0),
+				    l_offset + seq_pos(i1,aln->llrev,0), sp0_p, sp0a_p, sq, 
+				    i0_annot, annot0_p->n_annot, s_annot0_arr_p,
+				    &ann_comment, annot_stack, have_push_features_p, &v_delta,
+				    &d0_score, &d0_ident, &d0_alen, &d0_gaplen,
+				    &left_domain_head0, left_domain_list0, &i0_left_end, 0);
+      }
+
+      if (calc_func_mode == CALC_CODE) {
+	update_code(align_code_dyn, update_data_p, op, *spa_p, *sp0_p, *sp1_p);
+      }
+
+      /* now we have done all the ?modified identity checks, display
+	 potential site annotations */
+      if (have_ann && calc_func_mode == CALC_CODE) {
+	  add_annot_code(have_ann, *sp0_p, *sp1_p, *sp0a_p, *sp1a_p,
+			 q_offset + seq_pos(i0,aln->qlrev,0), l_offset+seq_pos(i1,aln->llrev,0),
+			 sim_sym[*spa_p], annot_var_dyn);
+      }
+
+      if (have_push_features && calc_func_mode != CALC_ID) {
+	display_push_features(annot_stack, annot_var_dyn,
+			      q_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			      l_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+			      sim_sym[*spa_p],
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+			      n0, n1,
+			      pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      if (cumm_seq_score) *i_spa++ = itmp;	/* update cummulative score */
+      i0++; i1++;
+      if (calc_func_mode == CALC_CONS) {
+	sp0_p++; sp1_p++; spa_p++;
+      }
+    }
+    else {	/* indel */
+      /* include all calc_func_mode's, because i_annot must be incremented in indels */
+      if (op==0) {
+	op = *rp++;
+	if (cumm_seq_score) *i_spa = ppst->gdelval;
+	d1_score +=  ppst->gdelval;
+	d0_score +=  ppst->gdelval;
+      }
+      if (cumm_seq_score) *i_spa++ += ppst->ggapval;
+      d1_score +=  ppst->ggapval; d1_alen++; d1_gaplen++;
+      d0_score +=  ppst->ggapval; d0_alen++; d0_gaplen++;
+
+      if (op > 0) {	/* insertion in aa0 */
+	*sp1_p = sq[aa1p[i1]];
+	*sp0_p = '-';
+	*spa_p = M_DEL;
+
+	if (calc_func_mode == CALC_CODE) {
+	  update_code(align_code_dyn, update_data_p, 2, *spa_p,*sp0_p,*sp1_p);
+	}
+
+	if (have_ann) {
+	  have_push_features = 0;
+	  *sp0a_p = ' ';
+	  if (aa1a) *sp1a_p = ann_arr[aa1a[i1]];
+	  else *sp1a_p = ' ';
+	  if (s_annot1_arr_p) {
+	    if (i1+l_offset == s_annot1_arr_p[i1_annot]->pos || i1+l_offset == i1_left_end) {
+	      i1_annot = next_annot_match(&itmp, aa0_pam2_p, l_offset+seq_pos(i1,aln->llrev,0),
+					  q_offset+seq_pos(i0,aln->qlrev,0), sp1_p, sp1a_p, sq, 
+					  i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+					  &ann_comment, annot_stack, have_push_features_p, &v_delta,
+					  &d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+					  &left_domain_head1, left_domain_list1, &i1_left_end,
+					  ppst->ggapval+ppst->gdelval);
+	    }
+	  }
+
+	  if (have_ann && calc_func_mode == CALC_CODE) {
+	    add_annot_code(have_ann, *sp0_p, *sp1_p, *sp0a_p, *sp1a_p,
+			   q_offset + seq_pos(i0,aln->qlrev,0), l_offset+seq_pos(i1,aln->llrev,0),
+			   '-', annot_var_dyn);
+	  }
+
+	  if (have_push_features && calc_func_mode != CALC_ID) {
+	    display_push_features(annot_stack, annot_var_dyn,
+				  q_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+				  l_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+				  sim_sym[*spa_p],
+				  a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+				  n0, n1,
+				  pstat_void, annot_fmt);
+	    have_push_features = 0;
+	  }
+	  if (calc_func_mode == CALC_CONS) {
+	    sp0a_p++;
+	    sp1a_p++;
+	  }
+	}
+	if (calc_func_mode == CALC_CONS) {
+	  spa_p++;
+	  sp0_p++;
+	  sp1_p++;
+	}
+	i1++;
+	op--;
+	lenc++;
+	aln->ngap_q++;
+      }
+      else {		/* (op < 0),  insertion in aa1 */
+	*sp1_p = '-';
+	*spa_p = M_DEL;
+	*sp0_p = sq[aa0[i0]];
+
+	if (calc_func_mode == CALC_CODE) {
+	  update_code(align_code_dyn, update_data_p, 1, *spa_p,*sp0_p,*sp1_p);
+	}
+
+	if (have_ann) {
+	  have_push_features = 0;
+	  *sp1a_p = ' ';
+	  if (aa0a) *sp0a_p = ann_arr[aa0a[i0]];
+	  else *sp0a_p = ' ';
+	  if (s_annot0_arr_p) {
+	    if (i0+q_offset == s_annot0_arr_p[i0_annot]->pos || i0+q_offset == i0_left_end) {
+	      i0_annot = next_annot_match(&itmp, ppst->pam2[0][aa1[i1]], q_offset+seq_pos(i0,aln->qlrev,0),
+					  l_offset+seq_pos(i1,aln->llrev,0), sp0_p, sp0a_p, sq, 
+					  i0_annot, annot0_p->n_annot, s_annot0_arr_p,
+					  &ann_comment, annot_stack, have_push_features_p, &v_delta,
+					  &d0_score, &d0_ident, &d0_alen, &d0_gaplen,
+					  &left_domain_head0, left_domain_list0, &i0_left_end,
+					  ppst->ggapval+ppst->gdelval);
+
+	    }
+	  }
+
+	  if (calc_func_mode == CALC_CODE) {
+	    add_annot_code(have_ann, *sp0_p, *sp1_p, *sp0a_p, *sp1a_p,
+			   q_offset + seq_pos(i0,aln->qlrev,0), l_offset+seq_pos(i1,aln->llrev,0),
+			   '-', annot_var_dyn);
+	  }
+
+	  if (have_push_features && calc_func_mode != CALC_ID) {
+	    display_push_features(annot_stack, annot_var_dyn,
+				  q_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+				  l_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+				  sim_sym[*spa_p],
+				  a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+				  n0, n1, pstat_void, annot_fmt);
+	    have_push_features = 0;
+	  }
+	  if (calc_func_mode == CALC_CONS) {
+	    sp0a_p++;
+	    sp1a_p++;
+	  }
+	}
+
+	i0++;
+	if (calc_func_mode == CALC_CONS) {
+	  spa_p++;
+	  sp0_p++;
+	  sp1_p++;
+	}
+	op++;
+	lenc++;
+	aln->ngap_l++;
+      }
+    }
+  }
+
+  if (calc_func_mode == CALC_CODE) {
+    close_update_data(align_code_dyn, update_data_p);
+  }
+
+  *score_delta = v_delta;
+
+  *nc = lenc;
+  if (have_ann) {
+    have_push_features = 0;
+    /* check for left ends after alignment */
+    if (annot1_p && i1_left_end > 0) {
+      close_annot_match(-1, annot_stack, have_push_features_p,
+			&d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+			&left_domain_head1, &i1_left_end, 0);
+    }
+
+    if (annot0_p && i0_left_end > 0) {
+      close_annot_match(-1, annot_stack, have_push_features_p,
+			&d0_score, &d0_ident, &d0_alen, &d0_gaplen,
+			&left_domain_head0, &i0_left_end, 0);
+    }
+
+    if (have_push_features && calc_func_mode != CALC_ID) {
+      display_push_features(annot_stack, annot_var_dyn,
+			    a_res->max0-1 + q_offset, *sp0_p,
+			    a_res->max1-1 + l_offset, *sp1_p,
+			    sim_sym[*spa_p],
+			    a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+			    n0, n1, pstat_void, annot_fmt);
+    }
+  }
+
+  *spa_p = '\0';
+  if (have_ann) {
+    *sp0a_p = *sp1a_p = '\0';
+  }
+  if (calc_func_mode == CALC_CONS) {
+#ifndef LCAL_CONS	/* have context around alignment */
+    nd = post_fill_cons(aa0, n0, aa1p, nn1,
+			a_res, ppst, mins, lenc, aln,
+			seqc0, seqc1, seqc0a, seqc1a);
+#else
+    nd = 0;
+#endif
+    lenc = mins + lenc + nd;
+  }
+
+  if (have_ann) {
+    if (left_domain_list0) free(left_domain_list0);
+    if (left_domain_list1) free(left_domain_list1);
+    annot_stack = free_stack(annot_stack);
+  }
+
+  return lenc;
+}
+
+int calc_cons_a(const unsigned char *aa0, int n0,
+		const unsigned char *aa1, int n1,
+		int *nc,
+		struct a_struct *aln,
+		struct a_res_str *a_res,
+		struct pstruct *ppst,
+		char *seqc0, char *seqc1, char *seqca, int *cumm_seq_score,
+		const unsigned char *ann_arr,
+		const unsigned char *aa0a, const struct annot_str *annot0_p, char *seqc0a,
+		const unsigned char *aa1a, const struct annot_str *annot1_p, char *seqc1a,
+		int *score_delta, 
+		struct dyn_string_str *annot_var_dyn,
+		struct f_struct *f_str,
+		void *pstat_void)
+{
+  return calc_cons_u(
+		     aa0, n0, aa1, n1,
+		     a_res, ppst, f_str, pstat_void,
+		     ann_arr, aa0a, annot0_p, aa1a, annot1_p, CALC_CONS, 0,
+		     nc, seqc0, seqc1, seqca, cumm_seq_score,
+		     seqc0a, seqc1a, aln, score_delta, annot_var_dyn, NULL
+		     );
+}
+
+void
+calc_astruct(struct a_struct *aln_p, struct a_res_str *a_res_p, struct f_struct *f_str) {
+
+  /* we do not pay attention to aln_p->calc_last_set, because all the
+     functions (calc_astruct, calc_cons_a, calc_code) use exactly the same
+     assignment */
+
+  aln_p->amin0 = a_res_p->min0;
+  aln_p->amax0 = a_res_p->max0;
+  aln_p->amin1 = a_res_p->min1;
+  aln_p->amax1 = a_res_p->max1;
+}
+
+static struct update_code_str *
+init_update_data(show_code) {
+
+  struct update_code_str *update_data_p;
+
+  if ((update_data_p = (struct update_code_str *)calloc(1,sizeof(struct update_code_str)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - init_update_data(): cannot allocate update_code_str\n",
+	      __FILE__, __LINE__);
+    return NULL;
+  }
+
+  update_data_p->p_op_idx = -1;
+  update_data_p->p_op_cnt = 0;
+  update_data_p->show_code = show_code;
+  update_data_p->btop_enc = 0;
+
+  if ((show_code & SHOW_CODE_CIGAR) == SHOW_CODE_CIGAR) {	/* CIGAR enc */
+    update_data_p->op_map = cigar_code;
+    update_data_p->cigar_order = 1;
+  }
+  else if ((show_code & SHOW_CODE_BTOP) == SHOW_CODE_BTOP) {	/* btop_enc */
+    update_data_p->op_map = ori_code;
+    update_data_p->cigar_order = 0;
+    update_data_p->btop_enc = 1;
+  }    
+  else {   /* orig (ALIGN) enc */
+    update_data_p->op_map = ori_code;
+    update_data_p->cigar_order = 0;
+  }
+
+  if ((show_code & SHOW_CODE_EXT) == SHOW_CODE_EXT) {	/* set for CIGAR/ALIGN, BTOP already set */
+    update_data_p->show_ext = 1;
+  }
+  else {
+    update_data_p->show_ext = 0;
+  }
+
+  return update_data_p;
+}
+
+
+static void
+close_update_data(struct dyn_string_str *align_code_dyn,
+		  struct update_code_str *up_dp) {
+  char tmp_cnt[MAX_SSTR];
+  tmp_cnt[0] = '\0';
+
+  if (!up_dp) return;
+
+  if (up_dp->p_op_cnt) {
+    if (up_dp->btop_enc) { /* btop_enc always has a p_opt_cnt == 0 unless in run of identical match */
+      sprintf(tmp_cnt,"%d",up_dp->p_op_cnt);
+      up_dp->p_op_cnt = 0;
+    }
+    else {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx, up_dp->p_op_cnt);
+    }
+    dyn_strcat(align_code_dyn,tmp_cnt);
+  }
+
+  free(up_dp);
+}
+
+/* update_code() has been modified to work more correctly with
+   ggsearch/glsearch, which, because alignments can start with either
+   insertions or deletions, can produce an initial code of "0=".  When
+   that happens, it is ignored and no code is added.
+
+   *align_code_dyn - alignment string (dynamic)
+   op -- encoded operation, currently 0=match, 1-delete, 2-insert, 3-term-match, 4-mismatch
+   op_cnt -- length of run
+   show_code -- SHOW_CODE_CIGAR uses cigar_code, otherwise legacy
+*/
+
+static void
+sprintf_code(char *tmp_str, struct update_code_str *up_dp, int op_idx, int op_cnt) {
+  if (up_dp->cigar_order) {
+    sprintf(tmp_str,"%d%c",op_cnt,up_dp->op_map[op_idx]);
+  }
+  else {
+    sprintf(tmp_str,"%c%d",up_dp->op_map[op_idx],op_cnt);
+  }
+}
+
+/* only called for btop alignment encoding, for identity, update
+   count, otherwise, print previous count and current difference.
+   assumes that up_dp->p_op_cnt only tracks identity
+*/
+
+static void
+sprintf_btop(char *tmp_str, 
+	     struct update_code_str *up_dp, 
+	     int op, int sim_code,
+	     unsigned char sp0, unsigned char sp1)
+{
+  char local_str[MAX_SSTR];
+  local_str[0]='\0';
+
+  tmp_str[0] = '\0';
+
+  if (op==0 && sim_code == M_IDENT) {
+    up_dp->p_op_cnt++;
+    return;
+  }
+  else {
+    if (up_dp->p_op_cnt > 0) {
+      sprintf(local_str,"%d",up_dp->p_op_cnt);
+    }
+    up_dp->p_op_cnt = 0;
+    sprintf(tmp_str,"%s%c%c",local_str,sp0,sp1);
+  }
+}
+
+static void
+update_code(struct dyn_string_str *align_code_dyn, 
+	    struct update_code_str *up_dp, int op, 
+	    int sim_code,  unsigned char sp0, unsigned char sp1)
+{
+  char tmp_cnt[MAX_SSTR];
+  tmp_cnt[0]='\0';
+
+  /* op == 0 : match state (could involve termination codons);
+     op == 1 : deletion
+     op == 2 : insertion
+     op == 3 : *:*
+     p_op == 5 : mismatch state
+  */
+
+  if (up_dp->btop_enc) {
+    sprintf_btop(tmp_cnt, up_dp, op, sim_code, sp0, sp1);
+    dyn_strcat(align_code_dyn, tmp_cnt);
+    return;
+  }
+
+  /* not btop_enc */
+  if (up_dp->p_op_cnt == 0) {
+    up_dp->p_op_idx = op;
+    up_dp->p_op_cnt = 1;
+    return;
+  }
+
+  if (op == 1 || op == 2) {
+    if (up_dp->p_op_idx == op) { up_dp->p_op_cnt++;}
+    else {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+      dyn_strcat(align_code_dyn, tmp_cnt);
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+  }
+  else if (op==0 || op == 3) {
+    if (sp0 != '*' && sp1 != '*') {	/* default case, not termination */
+      if (up_dp->show_ext) {
+	if (sim_code != M_IDENT) { op = 4;}
+      }
+    }
+    else {	/* have a termination codon, output for !SHOW_CODE_CIGAR */
+      if (!up_dp->cigar_order) {
+	if (sp0 == '*' || sp1 == '*') { op = 3;}
+      }
+      else if (up_dp->show_ext && (sp0 != sp1)) { op = 4;}
+    }
+
+    if (op != up_dp->p_op_idx) {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+      dyn_strcat(align_code_dyn, tmp_cnt);
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    else {
+      up_dp->p_op_cnt++;
+    }
+  }
+}
+
+/* build an array of match/ins/del - length strings */
+/* 5-June-2014 - modified to split "match" encoding into identical (=)
+   and mismatch (X)
+
+   To support domain-based scoring, this function iterates through
+   every aligned position, including insertions and deletions (which
+   are encoded as runs).
+ */
+
+int calc_code(const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      struct a_struct *aln,
+	      struct a_res_str *a_res,
+	      struct pstruct *ppst,
+	      struct dyn_string_str *align_code_dyn,
+	      /* char *al_str, int al_str_n,  */
+	      const unsigned char *ann_arr, 
+	      const unsigned char *aa0a,
+	      const struct annot_str *annot0_p,
+	      const unsigned char *aa1a,
+	      const struct annot_str *annot1_p,
+	      struct dyn_string_str *annot_code_dyn,
+	      int *score_delta,
+	      struct f_struct *f_str,
+	      void *pstat_void,
+	      int display_code)
+{
+  int nc;
+
+  return calc_cons_u(
+		     aa0, n0, aa1, n1,
+		     a_res, ppst, f_str, pstat_void,
+		     ann_arr, aa0a, annot0_p, aa1a, annot1_p, CALC_CODE, 
+		     display_code,
+		     &nc, NULL, NULL, NULL, NULL,
+		     NULL, NULL, aln, score_delta, annot_code_dyn,
+		     align_code_dyn
+		     );
+}
+
+/* calc_id never looks at domains or features, only variation */
+
+int calc_id(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    struct a_struct *aln, 
+	    struct a_res_str *a_res,
+	    struct pstruct *ppst,
+	    const struct annot_str *annot0_p,
+	    const struct annot_str *annot1_p,
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+	    struct f_struct *f_str)
+{
+  int nc;
+
+  return calc_cons_u(
+		     aa0, n0, aa1, n1,
+		     a_res, ppst, f_str, NULL,
+		     NULL, NULL, annot0_p, NULL, annot1_p, CALC_ID, 0,
+		     &nc, NULL, NULL, NULL, NULL,
+		     NULL, NULL, aln, score_delta, annot_var_dyn,
+		     NULL
+		     );
+}
+
+/* calc_id never looks at domains or features, only variation */
+
+int calc_idd(const unsigned char *aa0, int n0,
+	     const unsigned char *aa1, int n1,
+	     struct a_struct *aln, 
+	     struct a_res_str *a_res,
+	     struct pstruct *ppst,
+	     const struct annot_str *annot0_p,
+	     const struct annot_str *annot1_p,
+	     int *score_delta,
+	     struct dyn_string_str *annot_var_dyn,
+	     struct f_struct *f_str)
+{
+  int nc;
+
+  return calc_cons_u(
+		     aa0, n0, aa1, n1,
+		     a_res, ppst, f_str, NULL,
+		     NULL, NULL, annot0_p, NULL, annot1_p, CALC_ID_DOM, 0,
+		     &nc, NULL, NULL, NULL, NULL,
+		     NULL, NULL, aln, score_delta, annot_var_dyn,
+		     NULL
+		     );
+}
diff --git a/src/cal_consf.c b/src/cal_consf.c
new file mode 100644
index 0000000..2adbea2
--- /dev/null
+++ b/src/cal_consf.c
@@ -0,0 +1,591 @@
+/* cal_consf.c - routines for printing translated alignments for [t]fast[sf] */
+
+/* copyright (c) 1998, 1999, 2007, 2014 by William R. Pearson and The
+   Rector & Visitors of the University of Virginia */
+
+/*  $Id: cal_consf.c 1263 2014-06-25 10:40:39Z wrp $ */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* removed from dropfs2.c, dropff2.c April, 2007 */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "defs.h"
+#include "param.h"
+
+#include "tatstats.h"
+
+#include "a_mark.h"
+
+#define DROP_INTERN
+#include "drop_func.h"
+
+void update_code(struct dyn_string_str *align_code_dyn, int op, int op_cnt, int fnum, int show_code);
+extern void aancpy(char *to, char *from, int count, const struct pstruct *ppst);
+
+int
+calc_cons_a(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    int *nc,
+	    struct a_struct *aln,
+	    struct a_res_str *a_res,
+	    struct pstruct *ppst,
+	    char *seqc0, char *seqc1, char *seqca, int *cumm_seq_score,
+	    const unsigned char *ann_arr,
+	    const unsigned char *aa0a, const struct annot_str *annot0_p, char *seqc0a, 
+	    const unsigned char *aa1a, const struct annot_str *annot1_p, char *seqc1a, 
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+	    struct f_struct *f_str,
+	    void *pstat_void)
+{
+  int i0, i1, nn1, n0t;
+  int op, lenc, len_gap, nd, ns, itmp, p_ac, fnum, o_fnum;
+  int *i_spa;
+  const unsigned char *aa1p;
+  const unsigned char *aa0ap;
+  char *sp0, *sp0a, *sp1, *sp1a, *spa;
+  int *rp;
+  int mins, smins, ntmp;
+  int have_ann = 0;
+
+  *score_delta = 0;
+  NULL_dyn_string(annot_var_dyn);
+  have_ann = (seqc0a != NULL && (aa0a != NULL || aa1a != NULL));
+  
+#ifndef TFAST
+  aa1p = aa1;
+  nn1 = n1;
+#else
+  aa1p = f_str->aa1x;
+  nn1 = f_str->n10;
+#endif
+
+  aln->amin0 = a_res->min0 + f_str->aa0t_off;
+  aln->amax0 = a_res->max0 + f_str->aa0t_off;;
+  aln->amin1 = a_res->min1;
+  aln->amax1 = a_res->max1;
+  aln->calc_last_set = 1;
+
+  /* first fill in the ends */
+  n0 -= (f_str->nm0-1);
+
+  if (min(a_res->min0,a_res->min1)<aln->llen || aln->showall==1)
+    			/* will we show all the start ?*/
+    if (a_res->min0>=a_res->min1) {                        /* aa0 extends more to left */
+      smins=0;
+      if (aln->showall==1) mins=a_res->min0;
+      else mins = min(a_res->min0,aln->llen/2);
+      aancpy(seqc0,(char *)f_str->aa0t+a_res->min0-mins,mins,ppst);
+      aln->smin0 = a_res->min0-mins;
+      if ((mins-a_res->min1)>0) {
+	memset(seqc1,' ',mins-a_res->min1);
+	aancpy(seqc1+mins-a_res->min1,(char *)aa1p,a_res->min1,ppst);
+	aln->smin1 = 0;
+      }
+      else {
+	aancpy(seqc1,(char *)aa1p+a_res->min1-mins,mins,ppst);
+	aln->smin1 = a_res->min1-mins;
+      }
+    }
+    else {
+      smins=0;
+      if (aln->showall == 1) mins=a_res->min1;
+      else mins = min(a_res->min1,aln->llen/2);
+      aancpy(seqc1,(char *)(aa1p+a_res->min1-mins),mins,ppst);
+      aln->smin1 = a_res->min1-mins;
+      if ((mins-a_res->min0)>0) {
+	memset(seqc0,' ',mins-a_res->min0);
+	aancpy(seqc0+mins-a_res->min0,(char *)f_str->aa0t,a_res->min0,ppst);
+	aln->smin0 = 0;
+      }
+      else {
+	aancpy(seqc0,(char *)f_str->aa0t+a_res->min0-mins,mins,ppst);
+	aln->smin0 = a_res->min0-mins;
+      }
+    }
+  else {
+    mins= min(aln->llen/2,min(a_res->min0,a_res->min1));
+    smins=mins;
+    aln->smin0=a_res->min0;
+    aln->smin1=a_res->min1;
+    aancpy(seqc0,(char *)f_str->aa0t+a_res->min0-mins,mins,ppst);
+    aancpy(seqc1,(char *)aa1p+a_res->min1-mins,mins,ppst);
+  }
+
+  memset(seqca,M_BLANK,mins);
+  if (have_ann) {
+    /* pad annotation before alignment - this strategy means no
+       annotation before alignment */
+    memset(seqc0a,' ', mins);
+    memset(seqc1a,' ', mins);
+  }
+
+/* now get the middle */
+
+  spa = seqca+mins;
+  if (cumm_seq_score) i_spa = cumm_seq_score+mins;
+  sp0 = seqc0+mins;
+  sp0a = seqc0a+mins;
+  sp1 = seqc1+mins;
+  sp1a = seqc1a+mins;
+  rp = a_res->res;
+  n0t=lenc=len_gap=aln->nident=aln->nmismatch=aln->nsim=aln->npos=aln->ngap_q=aln->ngap_l=op=p_ac= 0;
+  i0 = a_res->min0;
+  i1 = a_res->min1;
+  
+  /* op is the previous "match/insert" operator; *rp is the current
+     operator or repeat count */
+
+#if defined(FASTS) || defined(FASTM)
+  o_fnum = f_str->aa0ti[i0];
+  if (aa0a) aa0ap = &aa0a[f_str->nmoff[o_fnum]+i0];
+#endif
+
+  while (i0 < a_res->max0 || i1 < a_res->max1) {
+#if defined(FASTS) || defined(FASTM)
+    fnum = f_str->aa0ti[i0];
+#endif
+
+    if (op == 0 && *rp == 0) {	/* previous was match (or start), current is match */
+
+#if defined(FASTS) || defined(FASTM)
+      if (p_ac == 0) { /* previous code was a match */
+	if (fnum != o_fnum) { /* continuing a match, but with a different fragment */
+	  if (have_ann) { aa0ap = &aa0a[f_str->nmoff[fnum]];}
+	  o_fnum = fnum;
+	}
+      }
+      else {
+	p_ac = 0; o_fnum = fnum = f_str->aa0ti[i0];
+	if (have_ann) {aa0ap = &aa0a[f_str->nmoff[fnum]];}
+      }
+#endif
+      op = *rp++;		/* get the next match/insert operator */
+
+      /* get the alignment symbol */
+      if ((itmp=ppst->pam2[0][f_str->aa0t[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
+      else if (itmp == 0) { *spa = M_ZERO;}
+      else {*spa = M_POS;}
+      if (*spa == M_POS)  { aln->npos++;}
+      if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
+
+      if (cumm_seq_score) *i_spa++ += itmp;
+
+      *sp0 = ppst->sq[f_str->aa0t[i0++]];	/* get the residues for the consensus */
+
+      if (have_ann) {
+	if (aa0a) {*sp0a++ = ann_arr[*aa0ap++];}
+	else {*sp0a++ = ' ';}
+	if (aa1a) {*sp1a++ = ann_arr[aa1a[i1]];}
+	else {*sp1a++ = ' ';}
+      }
+
+      *sp1 = ppst->sq[aa1p[i1++]];
+      n0t++;
+      lenc++;
+      if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+      else {aln->nmismatch++;}
+      sp0++; sp1++; spa++;
+    }
+    else {	/* either op != 0 (previous was insert) or *rp != 0
+		   (current is insert) */
+      if (op==0) { op = *rp++;}	/* previous was match, start insert */
+      				/* previous was insert - count through gap */
+#if defined(FASTS) || defined(FASTM)
+      if (p_ac != 1) {
+	p_ac = 1;
+	fnum = f_str->aa0ti[i0];
+      }
+#endif
+      if (have_ann) {
+	*sp0a++ = ' ';
+	if (aa1a) {*sp1a++ = ann_arr[aa1a[i1]];}
+	else {*sp1a++ = ' ';}
+      }
+      *sp0++ = '-';
+      *sp1++ = ppst->sq[aa1p[i1++]];
+      *spa++ = M_DEL;
+      op--;
+      len_gap++;
+      lenc++;
+    }
+  }	/* end alignment while() */
+
+  if (have_ann) {*sp0a = *sp1a = '\0';}
+  *spa = '\0';
+  *nc = lenc-len_gap;
+
+  /* now we have the middle, get the right end */
+
+  /* ns should be the length of alignmnet display in seqc0/0a/1/1a */
+  ns = mins + lenc + aln->llen;
+  /* adjust for the last line */
+  ns -= (itmp = ns %aln->llen);
+  /* add another full line */
+  if (itmp>aln->llen/2) ns += aln->llen;
+  /* the amount to display at the end, now (after lenc) */
+  nd = ns - (mins+lenc);
+  if (nd > max(n0t-a_res->max0,nn1-a_res->max1)) nd = max(n0t-a_res->max0,nn1-a_res->max1);
+  
+  if (aln->showall==1) {
+    nd = max(n0t-a_res->max0,nn1-a_res->max1);	/* reset for showall=1 */
+    /* get right end */
+    aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res->max0,n0t-a_res->max0,ppst);
+    aancpy(seqc1+mins+lenc,(char *)aa1p+a_res->max1,nn1-a_res->max1,ppst);
+    /* fill with blanks - this is required to use one 'nc' */
+    memset(seqc0+mins+lenc+n0t-a_res->max0,' ',nd-(n0t-a_res->max0));
+    memset(seqc1+mins+lenc+nn1-a_res->max1,' ',nd-(nn1-a_res->max1));
+  }
+  else {
+    if ((nd-(n0t-a_res->max0))>0) {
+      /* finish copying out the sequence */
+      aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res->max0,
+	     n0t-a_res->max0,ppst);
+      /* add blanks to pad */
+      memset(seqc0+mins+lenc+n0t-a_res->max0,' ',nd-(n0t-a_res->max0));
+    }
+    else {
+      /* just use up some sequence */
+      aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res->max0,nd,ppst);
+    }
+    if ((nd-(nn1-a_res->max1))>0) {
+      /* finish copying out the sequence */
+      aancpy(seqc1+mins+lenc,(char *)aa1p+a_res->max1,nn1-a_res->max1,ppst);
+      /* add blanks to pad */
+      memset(seqc1+mins+lenc+nn1-a_res->max1,' ',nd-(nn1-a_res->max1));
+    }
+    else {
+      /* just use up some sequence */
+      aancpy(seqc1+mins+lenc,(char *)aa1p+a_res->max1,nd,ppst);
+    }
+  }
+  if (have_ann) {
+    /* also pad the annotation -- this strategy means no annotations
+       in the unaligned region*/
+    memset(seqc0a+mins+lenc,' ',nd);
+    memset(seqc1a+mins+lenc,' ',nd);
+    /* 
+    ntmp = nd-(n0t-a_res->max0);
+    if (ntmp > 0) memset(seqc0a+mins+lenc+n0-a_res->max0,' ',ntmp);
+    ntmp = nd-(nn1-a_res->max1);
+    if (ntmp > 0) memset(seqc1a+mins+lenc+nn1-a_res->max1,' ',ntmp);
+    */
+  }
+
+  aln->smin0 = f_str->aa0t_off;
+
+  return mins+lenc+nd;
+}
+
+void
+calc_astruct(struct a_struct *aln_p, struct a_res_str *a_res_p, struct f_struct *f_str) {
+
+  /* we do not pay attention to aln_p->calc_last_set, because all the
+     functions (calc_astruct, calc_cons_a, calc_code) use exactly the same
+     assignment */
+
+  aln_p->amin0 = a_res_p->min0 + f_str->aa0t_off;
+  aln_p->amax0 = a_res_p->max0 + f_str->aa0t_off;
+  aln_p->amin1 = a_res_p->min1;
+  aln_p->amax1 = a_res_p->max1;
+}
+
+/* build an array of match/ins/del - length strings */
+int
+calc_code(const unsigned char *aa0, const int n0,
+	  const unsigned char *aa1, const int n1,
+	  struct a_struct *aln,
+	  struct a_res_str *a_res,
+	  struct pstruct *ppst,
+	  struct dyn_string_str *align_code_dyn,
+	  const unsigned char *ann_arr, 
+	  const unsigned char *aa0a,
+	  const struct annot_str *annot0_p,
+	  const unsigned char *aa1a, 
+	  const struct annot_str *annot1_p,
+	  struct dyn_string_str *annot_code_dyn,
+	  int *score_delta,
+	  struct f_struct *f_str,
+	  void *pstat_void,
+	  int display_code)
+{
+  int i0, i1, nn1;
+  int op, lenc, len_gap;
+  int p_ac, op_cnt;
+  const unsigned char *aa1p;
+  const unsigned char *aa0ap;
+  char tmp_cnt[20];
+  char sp0, sp1, spa;
+  unsigned char *sq;
+  int *rp;
+  int mins, smins;
+  int o_fnum,fnum = 0;
+
+  int have_ann = 0;
+  char ann_ch0, ann_ch1;
+  char tmp_astr[MAX_STR];
+  int sim_code;
+  int show_code, annot_fmt;
+  char *sim_sym= aln_map_sym[MX_ACC];
+
+  *score_delta = 0;
+
+  show_code = (display_code & SHOW_CODE_MASK);
+  annot_fmt = 2;
+  if (display_code & SHOW_ANNOT_FULL) {
+    annot_fmt = 1;
+  }
+
+  if (aa0a != NULL && aa1a != NULL) { have_ann = 2;}
+  else if (aa0a != NULL || aa1a != NULL) { have_ann = 1;}
+  else {have_ann = 0;}
+
+  if (ppst->ext_sq_set) {sq = ppst->sqx;}
+  else {sq = ppst->sq;}
+
+#ifndef TFAST
+  aa1p = aa1;
+  nn1 = n1;
+#else
+  aa1p = f_str->aa1x;
+  nn1 = f_str->n10;
+#endif
+
+  /* 
+  aln->amin0 = a_res->min0;
+  aln->amin1 = a_res->min1;
+  aln->amax0 = a_res->max0;
+  aln->amax1 = a_res->max1;
+  */
+
+  rp = a_res->res;
+  lenc = len_gap =aln->nident=aln->nmismatch=aln->nsim=aln->npos=aln->ngap_q=aln->ngap_l=aln->nfs=op=p_ac = 0;
+  op_cnt = 0;
+
+  i0 = a_res->min0;	/* start in aa0 (f_str->aa0t) */
+  i1 = a_res->min1;	/* start in aa1 */
+  tmp_cnt[0]='\0';
+  
+#if defined(FASTS) || defined(FASTM)
+  o_fnum = f_str->aa0ti[i0]+1;
+  if (aa0a) { aa0ap = &aa0a[f_str->nmoff[o_fnum]+i0]; }
+#endif
+
+  while (i0 < a_res->max0 || i1 < a_res->max1) {
+    fnum = f_str->aa0ti[i0]+1;
+    if (op == 0 && *rp == 0) {	/* previous was match, this is match */
+#if defined(FASTS) || defined(FASTM)
+      if (p_ac == 0) {	/* previous code was a match */
+	if (fnum == o_fnum) { op_cnt++;	}
+	else {		/* continuing a match, but with a different fragment */
+	  update_code(align_code_dyn, p_ac, op_cnt, o_fnum, show_code);
+	  if (have_ann) aa0ap = &aa0a[f_str->nmoff[fnum]];
+	  o_fnum = fnum;
+	  op_cnt=1;
+	}
+      }
+      else {
+	update_code(align_code_dyn,p_ac,op_cnt,o_fnum, show_code);
+	op_cnt = 1; p_ac = 0; o_fnum = fnum = f_str->aa0ti[i0] + 1;
+	if (have_ann) {aa0ap = &aa0a[f_str->nmoff[fnum]];}
+      }
+#endif
+      op = *rp++;
+      lenc++;
+      sim_code = M_NEG;
+      if (ppst->pam2[0][f_str->aa0t[i0]][aa1p[i1]]>0) {
+	sim_code = M_POS;
+	aln->npos++;
+	aln->nsim++;
+      }
+      else if (ppst->pam2[0][f_str->aa0t[i0]][aa1p[i1]]==0) {
+	sim_code = M_ZERO;
+	aln->nsim++;
+      }
+
+      sp0 = ppst->sq[f_str->aa0t[i0]];
+      sp1 = ppst->sq[aa1p[i1]];
+      if (toupper(sp0) == toupper(sp1)) {
+	sim_code = M_IDENT;
+	aln->nident++;
+      }
+      else {aln->nmismatch++;}
+
+      /* check for an annotation */
+      if (have_ann) {
+	ann_ch0 = ann_ch1 = '\0';
+	if (have_ann == 2 && (ann_arr[*aa0ap] != ' ' || ann_arr[aa1a[i1]] != ' ')) {
+	  ann_ch0 = ann_arr[*aa0ap];
+	  if (ann_ch0 == ' ') ann_ch0 = 'X';
+	  ann_ch1 = ann_arr[aa1a[i1]];
+	  if (ann_ch1 == ' ') ann_ch1 = 'X';
+	}
+	else if (aa0a != NULL && ann_arr[*aa0ap]!=' ') {
+	  ann_ch0 = ann_arr[*aa0ap];
+	  ann_ch1 = 'X';
+	}
+	else if (aa1a != NULL && ann_arr[aa1a[i1]]!=' ') {
+	  ann_ch0 = 'X';
+	  ann_ch1 = ann_arr[aa1a[i1]];
+	}
+	aa0ap++;
+	if (ann_ch0) {
+	  sprintf(tmp_astr, "|%ld:%ld:%c%c:%c%c%c",
+		  aln->q_offset+i0+1,aln->l_offset+i1+1,
+		  ann_ch0,ann_ch1,sim_sym[sim_code],sp0,sp1);
+	  /* strncat(ann_str, tmp_astr, ann_str_n - strlen(ann_str) - 1); */
+	  dyn_strcat(annot_code_dyn, tmp_astr);
+	}
+      }
+      i0++;
+      i1++;
+    }
+    else {
+      if (op==0) op = *rp++;
+      if (p_ac == 1) { op_cnt++;}
+      else {
+	update_code(align_code_dyn,p_ac,op_cnt,o_fnum, show_code);
+#if defined(FASTS) || defined(FASTM)
+	p_ac = 1;
+	fnum = f_str->aa0ti[i0];
+#endif
+	op_cnt = 1; fnum = f_str->aa0ti[i0] + 1;
+      }
+      op--; lenc++; i1++; len_gap++;
+    }
+  }
+  update_code(align_code_dyn,p_ac,op_cnt,o_fnum, show_code);
+
+  return lenc - len_gap;
+}
+
+/* update_code(): if "op" == 0, this is the end of a match of length
+   "op_cnt" involving fragment "fnum"
+   otherwise, this is an insertion (op==1) or deletion (op==2)
+*/
+
+void
+update_code(struct dyn_string_str *align_code_dyn, int op, int op_cnt, int fnum, int show_code) {
+
+  char align_char[4]={"=-+"};
+  char cigar_char[4]={"MDI"};
+  char tmp_cnt[20];
+
+  if (op_cnt == 0) return;
+
+  if (show_code == SHOW_CODE_CIGAR) {
+    sprintf(tmp_cnt,"%d%c",op_cnt,cigar_char[op]);
+  }
+  else {
+    if (op == 0)
+      sprintf(tmp_cnt,"%c%d[%d]",align_char[op],op_cnt,fnum);
+    else
+      sprintf(tmp_cnt,"%c%d",align_char[op],op_cnt);
+  }
+  dyn_strcat(align_code_dyn, tmp_cnt);
+}
+
+int
+calc_id(const unsigned char *aa0, int n0,
+	const unsigned char *aa1, int n1,
+	struct a_struct *aln,
+	struct a_res_str *a_res,
+	struct pstruct *ppst,
+	const struct annot_str *annot0_p,
+	const struct annot_str *annot1_p,
+	int *score_delta,
+	struct dyn_string_str *annot_var_dyn,
+	struct f_struct *f_str)
+{
+  int i0, i1, nn1;
+  int op, lenc, len_gap;
+  const unsigned char *aa1p;
+  int sp0, sp1;
+  int *rp;
+  int mins, smins;
+  
+  *score_delta = 0;
+  NULL_dyn_string(annot_var_dyn);
+
+#ifndef TFAST
+  aa1p = aa1;
+  nn1 = n1;
+#else
+  aa1p = f_str->aa1x;
+  nn1 = f_str->n10;
+#endif
+
+  aln->amin0 = a_res->min0 + f_str->aa0t_off;
+  aln->amax0 = a_res->max0 + f_str->aa0t_off;
+  aln->amin1 = a_res->min1;
+  aln->amax1 = a_res->max1;
+  aln->calc_last_set = 1;
+
+  /* first fill in the ends */
+  n0 -= (f_str->nm0-1);
+
+  /* now get the middle */
+  rp = a_res->res;
+  lenc=len_gap=aln->nident=aln->nmismatch=aln->nsim=aln->npos=aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
+  i0 = a_res->min0;
+  i1 = a_res->min1;
+  
+  while (i0 < a_res->max0 || i1 < a_res->max1) {
+    if (op == 0 && *rp == 0) {
+      op = *rp++;
+
+      if (ppst->pam2[0][f_str->aa0t[i0]][aa1p[i1]]>0) {
+	aln->nsim++;
+	aln->npos++;
+      }
+      else if (ppst->pam2[0][f_str->aa0t[i0]][aa1p[i1]]==0) {
+	aln->nsim++;
+      }
+
+      sp0 = ppst->sq[f_str->aa0t[i0++]];
+      sp1 = ppst->sq[aa1p[i1++]];
+      lenc++;
+      if (toupper(sp0) == toupper(sp1)) aln->nident++;
+      else {aln->nmismatch++;}
+    }
+    else {
+      if (op==0) { op = *rp++;}
+      i1++;
+      op--;
+      len_gap++;
+      lenc++;
+    }
+  }
+  return lenc-len_gap;
+}
+
+int
+calc_idd(const unsigned char *aa0, int n0,
+	const unsigned char *aa1, int n1,
+	struct a_struct *aln,
+	struct a_res_str *a_res,
+	struct pstruct *ppst,
+	const struct annot_str *annot0_p,
+	const struct annot_str *annot1_p,
+	int *score_delta,
+	struct dyn_string_str *annot_var_dyn,
+	struct f_struct *f_str)
+{
+  return calc_id(aa0,n0,aa1,n1,aln, a_res, ppst, annot0_p, annot1_p, score_delta, annot_var_dyn, f_str);
+}
diff --git a/src/comp_lib9.c b/src/comp_lib9.c
new file mode 100644
index 0000000..1f894b5
--- /dev/null
+++ b/src/comp_lib9.c
@@ -0,0 +1,3052 @@
+/*  $Id: comp_lib9.c 1291 2014-08-28 18:32:58Z wrp $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2002, 2014 by William R. Pearson
+   and The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/*
+ * Jan 17, 2007 - remove #ifdef PRSS - begin better statistics in place
+ * for small libraries, related libraries
+ *
+ * Concurrent read version
+ *
+ *	Feb 20, 1998 modifications for prss3
+ *
+ *	December, 1998 - DNA searches are now down with forward and reverse
+ *			 strands
+ */
+
+/* (22-Jan-2011) comp_lib7.c is an extension of comp_lib6.c.  Both
+   programs read the library in its entirety before doing the search.
+   comp_lib7.c provides full seqr_chain buffers of results, while
+   comp_lib6.c works with individual seq_records
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <time.h>
+
+#include <limits.h>
+#include <float.h>
+#include <math.h>
+
+#ifdef UNIX
+#include <unistd.h>
+#include <sys/types.h>
+#include <signal.h>
+#endif
+
+#ifdef MPI_SRC
+#include "mpi.h"
+#endif
+
+#include "defs.h"
+
+#include "structs.h"		/* mngmsg, libstruct */
+#include "mm_file.h"
+#include "best_stats.h"		/* defines beststr */
+
+#include "thr_buf_structs.h"
+#include "drop_func.h"
+
+#define XTERNAL
+#include "uascii.h"
+
+char *mp_verstr="";
+
+/********************************/
+/* extern variable declarations */
+/********************************/
+extern int fa_max_workers;
+extern char *prog_func;		/* function label */
+extern char *verstr, *iprompt0, *iprompt1, *iprompt2, *refstr;
+
+/********************************/
+/*extern function declarations  */
+/********************************/
+struct lmf_str *open_lib(struct lib_struct *lib_p, int dnaseq, int *sascii, int quiet);
+
+void
+close_lib_list(struct lib_struct *lib_list_p, int free_flag, int mm_force);
+
+int closelib(struct lmf_str *m_fptr, int force);
+
+void *my_srand();
+unsigned int my_nrand(int, void *);
+
+struct seqr_chain *
+new_seqr_chain(int max_chain_seqs, int aa1b_size, struct seqr_chain *old_chain, int maxn, long *lost_memK, int alloc_buf_flg);
+void end_seqr_chain(struct seqr_chain *last_seqr);
+void free_seqr_chain(struct seqr_chain *this_seqr);
+
+struct getlib_str *
+init_getlib_info(struct lib_struct *lib_list_p, int maxn, long max_memK);
+/* void free_getlib_info(struct getlib_str *); */
+
+struct seqr_chain *
+next_seqr_chain(const struct mng_thr *m_bufi, struct getlib_str *getlib_info,
+		struct buf_head *lib_bhead_p,
+		struct mngmsg *m_msp, const struct pstruct *ppst);
+
+struct seq_record *
+next_sequence_p(struct mseq_record **cur_mseq_p, struct seq_record *prev_seq_p,
+		struct seqr_chain *cur_seqr_chain, int maxn);
+
+void reset_seqr_chain(struct seqr_chain *seqr_base);
+
+void
+seqr_chain_work(unsigned char **aa0, unsigned char *aa0s,
+		struct buf_head *lib_bhead_p, struct getlib_str *getlib_info,
+		const struct mng_thr *m_bufi_p, struct mngmsg *m_msp, struct pstruct *ppst,
+		void *pstat_void, struct db_str *ldb, struct hist_str *histp, struct score_count_s *s_info,
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+		unsigned char *aa1shuff, void *f_str, void *qf_str,
+#endif
+		struct seq_record *best_seqs, struct mseq_record *best_mseqs, struct beststr *best,
+		FILE *fdata);
+
+void init_aa0(unsigned char **aa0, int n0, int nm0,
+	      unsigned char **aa0s, unsigned char **aa1s, 
+	      int qframe, int qshuffle_flg, int max_tot,
+	      struct pstruct *ppst, void **f_str, void **qf_str,
+	      void *my_rand_state);
+
+extern int ann_scan(unsigned char *, int, unsigned char **, int);
+extern int get_annot(char *sname, struct mngmsg *m_msp, char *bline, long q_offset, int n1,
+		     struct annot_str **annot_p, int target, int debug);
+extern int scanseq(unsigned char *seq, int n, char *str);
+extern void re_ascii(int *qascii, int *sascii, int max_ann_arr);
+extern int recode(unsigned char *seq, int n, int *qascii, int nsq);
+extern void revcomp(unsigned char *seq, int n, int *c_nt);
+
+extern void init_ascii(int is_ext, int *sascii, int nsq, int is_dna);
+extern void validate_novel_aa(int *sascii, int nsq, int is_dna);
+extern void qshuffle(unsigned char *aa0, int n0, int nm0, void *);
+extern void free_pam2p(int **);
+
+#ifdef DEBUG
+void check_rbuf(struct buf_head *cur_buf);
+int check_seq_range(unsigned char *aa1b, int n1, int nsq, char *);
+unsigned long adler32(unsigned long, const unsigned char *, unsigned int);
+#endif
+
+/* initialize environment (doinit.c) */
+extern void initenv (int argc, char **argv, struct mngmsg *m_msg,
+		     struct pstruct *ppst, unsigned char **aa0);
+
+void fset_vars(struct mngmsg *m_msp, struct pstruct *ppst);
+
+/* print timing information */
+extern void ptime (FILE *, long);
+
+#ifdef COMP_MLIB 
+#define QGETLIB (q_file_p->getlib)
+#endif
+
+#define GETLIB (m_file_p->getlib)
+
+int samp_stats_idx (int *pre_nstats, int nstats, void *rand_state);
+
+void
+save_best(struct buf_head *lib_buf, const struct mngmsg *, struct pstruct *ppst,
+	  struct db_str *, FILE *fdata, struct hist_str *, void **,
+	  struct score_count_s *);
+void
+save_best2(struct buf_head *lib_buf, const struct mngmsg *, struct pstruct *ppst,
+	   struct db_str *, FILE *fdata, struct hist_str *, void **,
+	   struct score_count_s *);
+
+void
+save_shuf(struct buf_head *lib_buf, int nitt, int shuff_max, int score_ix,
+	  struct score_count_s *);
+
+int
+save_align(struct buf_head *lib_bhead_p, struct beststr **bestp_arr);
+
+void
+init_beststats(struct beststr **best, struct beststr ***bestp_arr,
+	       struct seq_record **best_seqs, struct mseq_record **best_mseqs,
+	       struct stat_str **stats, struct stat_str **rstats,
+	       int shuff_max, int link_flag);
+void
+preserve_seq(struct buf2_data_s *, struct seq_record *, struct mseq_record *, struct beststr *);
+
+void
+preserve_seq2(struct beststr *, struct seq_record *, struct mseq_record *, struct beststr *);
+
+void
+buf_do_work(unsigned char **aa0, int n0, struct buf_head *lib_bhead_p,
+	    int max_frame, struct pstruct *ppst, void **f_str);
+void
+buf_qshuf_work(unsigned char *aa0s, int n0, struct buf_head *lib_bhead_p,
+	       int max_frame, struct pstruct *ppst, void *qf_str, int score_ix);
+void
+buf_shuf_work(unsigned char **aa0, int n0, unsigned char *aa1s, struct buf_head *lib_bhead_p,
+	      int max_frame, struct pstruct *ppst, void **f_str,
+	      int score_ix, void *rand_state);
+void
+buf_shuf_seq(unsigned char **aa0, int n0,
+	     unsigned char **aa1_shuff_b, unsigned char *aa1save, int maxn,
+	     struct beststr **bestp_arr, int nbest, 
+	     struct pstruct *pst, struct mngmsg *m_msp,
+	     struct mng_thr *m_thr_p
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+	     , void **f_str
+#endif
+	     , struct score_count_s *s_info);
+
+void
+buf_align_seq(unsigned char **aa0, int n0,
+	      struct beststr **bestp_arr, int nbest,
+	      struct pstruct *ppst, struct mngmsg *m_msp,
+	      struct mng_thr *m_bufi_p
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+	      , void **f_str
+#endif
+	      );
+
+void
+buf_do_align(unsigned char **aa0,  int n0,
+	     struct buf_head *lib_bhead_p, 
+	     struct pstruct *ppst, const struct mngmsg *m_msp,
+	     void **f_str);
+
+struct buf_head *
+alloc_comp_bufs (struct mng_thr *m_bufi_p, struct mngmsg *m_msp,
+		 int ave_seq_len);
+
+/* statistics functions */
+extern int
+process_hist(struct stat_str *sptr, int nstats, 
+	     const struct mngmsg *m_msg,
+	     struct pstruct *ppst,
+	     struct hist_str *hist, void **pstat_void, struct score_count_s *s_info, int do_hist);
+
+extern double find_z(int score, double escore, int length, double comp,void *);
+extern double zs_to_E(double zs,int n1, int dnaseq, long entries, struct db_str db);
+
+void last_stats(const unsigned char *, int, 
+		struct stat_str *sptr, int nstats,
+		struct beststr **bestp_arr, int nbest,
+		const struct mngmsg *m_msg, struct pstruct *ppst, 
+		struct hist_str *histp, void *);
+
+int last_calc( unsigned char **aa0, unsigned char *aa1, int maxn,
+	       struct beststr **bestp_arr, int nbest,
+	       const struct mngmsg *m_msg, struct pstruct *ppst, 
+	       void **f_str, void *rs_str);
+
+void scale_scores(struct beststr **bestp_arr, int nbest,
+		  struct db_str,struct pstruct *ppst, void *);
+
+int E1_to_s(double e_val, int n0, int n1, int db_size, void *pu);
+
+extern void pstat_info(char *, int, char *, void *);
+
+extern int shuffle(unsigned char *, unsigned char *, int, void *);
+extern int shuffle3(unsigned char *, unsigned char *, int, void *);
+extern int rshuffle(unsigned char *, unsigned char *, int);
+extern int wshuffle(unsigned char *, unsigned char *, int, int, void *);
+
+extern void set_db_size(int, struct db_str *, struct hist_str *);
+
+extern void /* pre-alignment */
+pre_load_best(unsigned char *aa1, int maxn,struct beststr **bbp_arr, int nbest,
+	      struct mngmsg *m_msp, int debug);
+
+extern char *	/* run a command to produce a fasta file */
+build_lib_db(char *script_file);
+
+extern char *	/* run link file */
+build_link_data(char **, struct mngmsg *, struct beststr **, int);
+
+/* display functions */
+extern void
+showbest (FILE *fp, unsigned char **aa0, unsigned char *aa1, int maxn,
+	  struct beststr **bestp_arr, int nbest,
+	  int qlib, struct mngmsg *m_msg,struct pstruct *ppst,
+	  struct db_str db, char **gstring2p, void **f_str);
+
+extern void
+showalign (FILE *fp, unsigned char **aa0, unsigned char *aa1, int maxn,
+	   struct beststr **bestp_arr, int nbest, int qlib, 
+	   const struct mngmsg *m_msg, const struct pstruct *ppst,
+	   char **gstring2p, void **f_str, struct mng_thr *m_bufi_p);
+
+/* misc functions */
+void h_init(struct pstruct *, struct mngmsg *, char *);		/* doinit.c */
+void last_init(struct mngmsg *, struct pstruct *); /* initfa/sw.c */
+void last_params(unsigned char *, int, struct mngmsg *, struct pstruct *);
+int validate_params(const unsigned char *, int, const struct mngmsg *,
+		    const struct pstruct *,
+		    const int *lascii, const int *pascii);
+
+void s_abort(char *, char *);		/* compacc.c */
+
+/* initfa/sw.c */
+void resetp(struct mngmsg *, struct pstruct *); 
+
+void gettitle(char *, char *, int);	/* nxgetaa.c */
+void lib_choice(char *lname, int nln, char *flstr, int ldnaseq);	/* lib_sel.c */
+struct lib_struct *
+lib_select(char *lname, char *ltitle, const char *flstr, int ldnaseq);	/* lib_sel.c */
+
+void query_parm(struct mngmsg *, struct pstruct *); /* initfa/sw.c */
+
+/* doinit.c */
+void markx_to_m_msp(struct mngmsg *m_msp, struct markx_str *markx);
+void m_msp_to_markx(struct markx_str *markx, struct mngmsg *m_msp);
+
+/* compacc.c */
+void print_header1(FILE *fd, const char *argv_line,
+		   const struct mngmsg *m_msp, const struct pstruct *ppst);
+void print_header2(FILE *fd, int qlib, char *info_qlabel, unsigned char **aa0,
+		   const struct mngmsg *m_msp, const struct pstruct *ppst, char *info_lib_range_p);
+void print_header3(FILE *fd, int qlib, struct mngmsg *m_msp, struct pstruct *ppst);
+
+void print_header4(FILE *fd, char *info_qlabel, char *argv_line, char *info_gstring3,
+		   char *info_hstring_p[2], struct mngmsg *m_msp, struct pstruct *ppst);
+void print_header4a(FILE *fd, struct mngmsg *m_msp);
+
+void print_header5(FILE *fd, int qlib, struct db_str *qtt,
+		   struct mngmsg *m_msp, struct pstruct *ppst, int in_mem, long tot_memK);
+
+void print_annot_header(FILE *fd, struct mngmsg *m_msp);
+
+void prhist(FILE *, const struct mngmsg *, struct pstruct *, struct hist_str hist,
+	    int nstats, int sstats, struct db_str, char *, char *, char **, char **, long);
+
+void print_sum(FILE *, struct db_str *qtt, struct db_str *ntt, int in_mem, long tot_memK);
+int reset_maxn(struct mngmsg *, int, int);	/* set m_msg.maxt, maxn from maxl */
+
+FILE *outfd;			/* Output file */
+
+/* this information is global for fsigint() */
+extern long s_time();			/* fetches time */
+long tstart, tscan, tprev, tdone;	/* Timing */
+#ifdef COMP_MLIB
+long ttscan, ttdisp;
+#endif
+time_t tdstart, tddone;
+
+static struct db_str qtt = {0l, 0l, 0};
+#ifdef DEBUG
+char ext_qtitle[MAX_STR];
+#endif
+
+#if defined(COMP_THR) || defined(PCOMPLIB)
+/***************************************/
+/* thread global variable declarations */
+/***************************************/
+
+/* functions for getting/sending buffers to threads (thr_sub.c) */
+#ifndef PCOMPLIB
+extern void init_thr(int , struct thr_str *, const struct mngmsg *, struct pstruct *,
+		     unsigned char *, struct mng_thr *m_bufi_p);
+extern void start_thr(void);
+#define RESULTS_BUF reader_buf
+#else
+extern void init_thr(int , char *, const struct mngmsg *, struct pstruct *,  unsigned char *, struct mng_thr *m_bufi_p);
+extern void work_comp(int);
+#define RESULTS_BUF worker_buf
+#endif
+extern void get_rbuf(struct buf_head **lib_buf, int max_work_buf);
+extern void put_rbuf(struct buf_head *lib_buf, int max_work_buf);
+extern void wait_rbuf(int max_work_buf);
+extern void rbuf_done(int nthreads);
+extern void put_rbuf_done(int nthreads, struct buf_head *lib_buf, 
+			  int max_work_buf);
+#ifndef PCOMPLIB
+#undef XTERNAL
+#include "thr_bufs2.h"
+#else
+#include "pcomp_bufs.h"
+#endif
+#endif
+
+struct buf_head *lib_buf2_list;
+
+/* these variables must be global for comp_thr.c so that save_best()
+   can use them */
+struct beststr **bestp_arr;	/* array of pointers */
+int nbest;	/* number of best scores */
+
+struct stat_str *stats; /* array of scores for statistics from real
+			   (or shuffled) sequences*/
+struct stat_str *qstats;	/* array of scores for shuffled query stats */
+struct stat_str *rstats;	/* array of scores from shuffled library */
+
+  /* these variables are global so they can be set both by the main()
+     program and save_best() in threaded mode.
+  */
+
+int nstats, nqstats, nrstats, pre_nstats, kstats, shuff_tot, sstats;
+double zbestcut;		/* cut off for best z-score */
+int bestfull;		/* index for selectbest() */
+int stats_done=0;	/* flag for z-value processing */
+void *rand_state;
+
+static int seq_index=0;
+
+void fsigint();
+
+/* **************************************************************** */
+/* start of main() program                                          */
+/* **************************************************************** */
+int
+main (int argc, char *argv[]) 
+{
+  unsigned char *aa0[6], *aa0s;
+  unsigned char *aa1save;	/* aa1shuff and aa1save must be distinct */
+  unsigned char *aa1shuff, *aa1shuff_b=NULL;	/* for new unthreaded version */
+  char *lib_db_file;
+
+  char tmp_str[MAX_SSTR];
+  char link_title[MAX_LSTR];
+  char *link_lib_str;
+  char *link_lib_file;
+
+  double zs_off_save;
+  int n_sig;
+
+  struct a_res_str *next_ares_p, *cur_ares_p; /* used to free-up old a_res */
+
+  /* status/parameter information */
+  char info_lib_range[MAX_FN];
+  char *info_lib_range_p;
+  char info_pgm_abbr[MAX_SSTR];
+  char info_qlabel[MAX_STR];
+  char *info_gstring2p[2];
+  char info_gstring3[MAX_STR];
+  char *info_hstring_p[2];
+  char fdata_pstat_info[MAX_STR];
+
+#ifdef COMP_MLIB
+  fseek_t qseek;
+  int qlib;
+  struct lib_struct *q_lib_p;
+  struct lmf_str *q_file_p;
+  int sstart, sstop, is;
+#endif
+
+  long next_q_offset;
+  int id;
+  struct lib_struct *lib_list_p, *link_lib_list_p;
+  struct getlib_str *getlib_info, *link_getlib_info;
+
+  struct db_str link_ldb={0,0,0};
+  struct score_count_s link_s_info={0,0,0,0};
+  int utmp;		/* user input tmp */
+
+  struct pstruct pst;
+  void *f_str[6], *qf_str;	/* different f_str[]'s for forward,reverse */
+  int have_f_str=0;
+
+  /* these variables track buffers of library sequences */
+  struct buf_head *lib_bhead_p;
+
+  struct mng_thr m_bufi;	/* has max_work_buf, max_buf2_res,
+				   max_chain_seqs, nframe */
+  int ave_seq_len;
+  /*   int empty_reader_bufs; */
+#ifdef COMP_THR
+  /*   int t_reader_buf_readp; */
+  struct thr_str *work_info;
+#endif
+#ifdef MPI_SRC
+  int mpi_tid;
+#endif
+  /* end of library sequence buffers */
+
+  struct mngmsg m_msg;		/* saves most non-param information
+				   about library alignment */
+  struct markx_str markx_save;	/* saves m_msg values for markx */
+  struct markx_str *cur_markx;	/* follow m_msg.markx_list */
+
+  struct hist_str hist2;	/* hist str for zsflag > 2 */
+  int zsflag_save;		/* save zsflag > 20 */
+  char rline[MAX_FN];
+  char argv_line[MAX_LSTR];
+  int t_quiet;
+
+  int i;
+  FILE *fdata=NULL;		/* file for full results */
+  struct beststr *best, *bbp;	/* array of best scores */
+
+  /* save sequence meta info for sequences that are not currently available */
+  struct seq_record *best_seqs;
+  struct mseq_record *best_mseqs;
+
+  int leng;			/* leng is length of the descriptive line */
+  int maxn;			/* size of the library sequence examined */
+  int qlcont;			/* continued query sequence */
+  char *bp;			/* general purpose string ptr */
+  
+  /* this is necessary because of an SGI Irix 64 issue */
+  info_gstring2p[0] = calloc(MAX_STR,sizeof(char));
+  info_gstring2p[1] = calloc(MAX_STR,sizeof(char));
+  info_hstring_p[0] = calloc(MAX_STR,sizeof(char));
+  info_hstring_p[1] = calloc(MAX_STR,sizeof(char));
+
+  if ((bp = strrchr(argv[0],'/'))!=NULL) {
+    strncpy(m_msg.pgm_name,bp+1,sizeof(m_msg.pgm_name));
+  }
+  else {
+    strncpy(m_msg.pgm_name,argv[0],sizeof(m_msg.pgm_name));
+  }
+
+  /* Initialization */
+
+  m_msg.s_info.s_cnt[0] = m_msg.s_info.s_cnt[1] =
+    m_msg.s_info.s_cnt[2] =   m_msg.s_info.tot_scores = 0;
+  m_msg.ss_info.s_cnt[0] = m_msg.ss_info.s_cnt[1] =
+    m_msg.ss_info.s_cnt[2] =   m_msg.ss_info.tot_scores = 0;
+
+#ifndef SHOW_HELP
+#if defined(UNIX)
+  m_msg.quiet= !isatty(1);
+#else
+  m_msg.quiet = 0;
+#endif
+#else
+  m_msg.quiet = 1;
+#endif
+
+#ifdef MPI_SRC
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(MPI_COMM_WORLD,&mpi_tid);
+  if (mpi_tid > 0) {
+    work_comp(mpi_tid); 
+    MPI_Finalize();
+    exit(0);
+  }
+#endif
+
+#ifdef PGM_DOC
+  /* document command line */
+  argv_line[0]='\0';
+  for (i=0; i<argc; i++) {
+    SAFE_STRNCAT(argv_line," ",sizeof(argv_line));
+    if (strchr(argv[i],' ')) {
+      SAFE_STRNCAT(argv_line,"\"",sizeof(argv_line));
+      SAFE_STRNCAT(argv_line,argv[i],sizeof(argv_line));
+      SAFE_STRNCAT(argv_line,"\"",sizeof(argv_line));
+    }
+    else {
+      SAFE_STRNCAT(argv_line,argv[i],sizeof(argv_line));
+    }
+  }
+  argv_line[sizeof(argv_line)-1]='\0';
+#endif
+
+  /* first initialization routine - nothing is known */
+  h_init(&pst, &m_msg, info_pgm_abbr);
+  
+  m_msg.db.length = m_msg.ldb.length = qtt.length = 0l;
+  m_msg.db.entries = m_msg.db.carry = 
+    m_msg.ldb.entries = m_msg.ldb.carry = qtt.entries = qtt.carry = 0;
+  m_msg.pstat_void = m_msg.pstat_void2 = NULL;
+  m_msg.hist.entries = 0;
+
+  f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] = f_str[0] = NULL;
+  aa0[0] = NULL;
+  rand_state = my_srand();
+
+  /* initialize values in comp_lib9 that are thread/serial specific */
+  fset_vars(&m_msg, &pst);
+
+  /* second initialization - get commmand line arguments */
+  initenv (argc, argv, &m_msg, &pst, &aa0[0]);
+
+#ifndef PCOMPLIB
+#ifdef COMP_THR
+  if ((work_info=
+       (struct thr_str *)calloc(fa_max_workers,sizeof(struct thr_str)))==NULL) {
+    fprintf(stderr, " cannot allocate work_info[%d]\n",fa_max_workers);
+    exit(1);
+  }
+#else
+  fa_max_workers = 1;
+#endif
+#endif
+
+  ttscan = ttdisp = 0;
+  tstart = tscan = s_time();
+  tdstart = time(NULL);
+
+  /* Allocate space for the query and library sequences */
+  /* pad aa0[] with an extra SEQ_PAD chars for ALTIVEC padding */
+  if (aa0[0]==NULL) {
+    if ((aa0[0] = (unsigned char *)malloc((m_msg.max_tot+1+SEQ_PAD)*sizeof(unsigned char)))
+	== NULL)
+      s_abort ("Unable to allocate query sequence", "");
+    *aa0[0]=0;
+    aa0[0]++;
+  }
+  aa0[5]=aa0[4]=aa0[3]=aa0[2]=aa0[1]=aa0[0];
+
+  if ((aa1save = (unsigned char *)malloc((m_msg.max_tot+1)*sizeof (char))) == NULL) {
+    s_abort ("Unable to allocate library overlap", "");
+  }
+  *aa1save=0;
+  aa1save++;
+
+  /* print argv_line, program, version */
+  print_header1(stdout, argv_line, &m_msg, &pst);
+
+  /* get query information */
+  if (m_msg.tname[0] == '\0') {
+      if (m_msg.quiet == 1)
+	s_abort("Query sequence undefined","");
+    l1:	fputs (iprompt1, stdout);
+      fflush  (stdout);
+      if (fgets (m_msg.tname, MAX_FN, stdin) == NULL)
+	s_abort ("Unable to read query library name","");
+      m_msg.tname[MAX_FN-1]='\0';
+      if ((bp=strchr(m_msg.tname,'\n'))!=NULL) *bp='\0';
+      if (m_msg.tname[0] == '\0') goto l1;
+  }
+
+  /* **************************************************************** */
+  /* (1) open the query library; 
+     (2) get a sequence;
+     (3) check for annotations */
+
+  /* we need a q_lib_p before opening the library */
+  if ((q_lib_p = (struct lib_struct *)calloc(1,sizeof(struct lib_struct)))==NULL) {
+    s_abort(" cannot allocate q_lib_p","");
+  }
+  else {
+    q_lib_p->file_name = m_msg.tname;
+  }
+
+  /* Open query library */
+  if ((q_file_p= open_lib(q_lib_p, m_msg.qdnaseq,qascii,!m_msg.quiet))==NULL) {
+    s_abort(" cannot open library ",m_msg.tname);
+  }
+  /* Fetch first sequence */
+  qlib = 0;
+  m_msg.q_offset = next_q_offset = 0l;
+  qlcont = 0;
+  m_msg.n0 = 
+    QGETLIB (aa0[0], MAXTST, m_msg.qtitle, sizeof(m_msg.qtitle),
+	     &qseek, &qlcont,q_file_p,&m_msg.q_off);
+  if ((bp=strchr(m_msg.qtitle,' '))!=NULL) *bp='\0';
+  strncpy(info_qlabel,m_msg.qtitle,sizeof(info_qlabel));
+#ifdef DEBUG
+  SAFE_STRNCPY(ext_qtitle, m_msg.qtitle,sizeof(ext_qtitle));
+#endif
+  if (bp != NULL) *bp = ' ';
+  info_qlabel[sizeof(info_qlabel)-1]='\0';
+
+  /* if annotations are included in sequence, remove them */
+  if (m_msg.ann_flg) {
+    m_msg.n0 = ann_scan(aa0[0],m_msg.n0,&m_msg.aa0a,m_msg.qdnaseq);
+    /* cannot do get_annot() here because lascii not initialized */
+  }
+
+  /* if protein and ldb_info.term_code set, add '*' if not there */
+  if (m_msg.ldb_info.term_code && !(m_msg.qdnaseq==SEQT_DNA || m_msg.qdnaseq==SEQT_RNA) &&
+      aa0[0][m_msg.n0-1]!='*') {
+    aa0[0][m_msg.n0++]='*';
+    aa0[0][m_msg.n0]=0;
+  }
+
+  /* if ends with ESS, remove terminal ESS */
+  if (aa0[0][m_msg.n0-1] == ESS) { m_msg.n0--; aa0[0][m_msg.n0]= '\0';}
+
+  /* check for subset */
+  if (q_file_p->opt_text[0]!='\0') {
+    if (q_file_p->opt_text[0]=='-') {
+      sstart=0; sscanf(&q_file_p->opt_text[1],"%d",&sstop);
+    }
+    else {
+      sstart = 0; sstop = -1;
+      sscanf(&q_file_p->opt_text[0],"%d-%d",&sstart,&sstop);
+      sstart--;
+      if (sstop <= 0 ) sstop = BIGNUM;
+    }
+
+    for (id=0,is=sstart; is<min(m_msg.n0,sstop); ) {
+      aa0[0][id++]=aa0[0][is++];
+    }
+    aa0[0][id]=0;
+    m_msg.n0 = min(m_msg.n0,sstop)-sstart;
+    m_msg.q_off += sstart;
+  }
+
+  /* check to see if query has been segmented */
+  if (qlcont) {
+    next_q_offset = m_msg.q_offset + m_msg.n0 - m_msg.q_overlap;
+  }
+  else {
+    next_q_offset = 0l;
+  }
+
+  /* this probably cannot happen any more */
+  if (m_msg.n0 > MAXTST) {
+    fprintf(stderr," sequence truncated to %d\n %s\n",MAXTST,m_msg.sqnam);
+    aa0[0][MAXTST]='\0';
+    m_msg.n0=MAXTST;
+  }
+
+  /* check for protein/DNA alphabet type */
+  if (m_msg.qdnaseq == SEQT_UNK) {
+  /* cannot change the alphabet mapping if a matrix has been set */
+  /* do automatic sequence recognition,but only for sequences > 20 residues */
+    if ( !pst.pam_set && m_msg.n0 > 20 &&
+	(float)scanseq(aa0[0],m_msg.n0,"ACGTUNacgtun")/(float)m_msg.n0 >0.85) {
+      pascii = nascii;
+      m_msg.qdnaseq = SEQT_DNA;
+    }
+    else {	/* its protein */
+      pascii = aascii;
+      m_msg.qdnaseq = SEQT_PROT;
+    }
+    /* modify qascii to use encoded version 
+       cannot use memcpy() because it loses annotations 
+    */
+    re_ascii(qascii,pascii,strlen((char *)&m_msg.ann_arr[1]));
+    init_ascii(pst.ext_sq_set,qascii,pst.nsq,m_msg.qdnaseq);
+    validate_novel_aa(qascii, pst.nsq, m_msg.qdnaseq);
+    m_msg.n0 = recode(aa0[0],m_msg.n0,qascii, pst.nsqx);
+  }
+
+   /* check sequence length -- cannot do before now because query
+      alphabet may change */
+  if (m_msg.n0 <= 0)
+    s_abort ("Query sequence length <= 0: ", m_msg.tname);
+
+   /* reset algorithm parameters for alphabet */
+  resetp (&m_msg, &pst);
+
+#ifndef COMP_MLIB
+  gettitle(m_msg.tname,m_msg.qtitle,sizeof(m_msg.qtitle));
+  if (m_msg.tname[0]=='-' || m_msg.tname[0]=='@') {
+    strncmp(m_msg.tname,m_msg.qtitle,sizeof(m_msg.tname));
+    if ((bp=strchr(m_msg.tname,' '))!=NULL) *bp='\0';
+  }
+#endif
+
+   /* get library file names from argv[2] or by prompting */
+  if (strlen (m_msg.lname) == 0) {
+    if (m_msg.quiet == 1) s_abort("Library name undefined","");
+    lib_choice(m_msg.lname,sizeof(m_msg.lname),m_msg.flstr, m_msg.ldb_info.ldnaseq);
+  }
+  
+  if (m_msg.lname[0] == '!') {
+    if ((lib_db_file = build_lib_db(&m_msg.lname[1]))==NULL) {
+      fprintf(stderr,"***[comp_lib9.c] Cannot open/execute %s script\n",&m_msg.lname[1]);
+      exit(1);
+    }
+
+    /* get a list of files to search */
+    lib_list_p = lib_select(lib_db_file, m_msg.ltitle, m_msg.flstr,
+			    m_msg.ldb_info.ldnaseq);
+  }
+  else {
+    /* get a list of files to search */
+    lib_list_p = lib_select(m_msg.lname, m_msg.ltitle, m_msg.flstr, m_msg.ldb_info.ldnaseq);
+  }
+
+  /* Get additional parameters here */
+  if (!m_msg.quiet) query_parm (&m_msg, &pst);
+  
+  /* set up zsflag, labels for output, thr_fact, shuffle params, pam matrix */
+  last_init(&m_msg, &pst);
+
+  /* allocate beststr best, beststr **bestp_arr, best_seqs/mseqs
+     structures for preserving seq_records, stats structures, */
+  init_beststats(&best, &bestp_arr,
+		 &best_seqs, &best_mseqs,
+		 &stats, &rstats, m_msg.shuff_max,
+		 m_msg.link_lname[0]);
+
+#ifdef UNIX
+  /* set up signals now that input is done */
+  signal(SIGHUP,SIG_IGN);
+#endif
+
+  /* **************************************************************** */
+  /* begin setting things up for threads */
+  /* **************************************************************** */
+  /* 
+     This section defines m_bufi.max_chain_seqs, the average number of entries
+     per buffer, and m_bufi.max_work_buf, the total number of buffers
+
+     Use a 2 Mbyte (DEF_WORKER_BUF) buffer for each worker.  For
+     proteins, that means 5,000 sequences of length 400 (average).
+     For DNA, that means 2,000 sequences of length 1000.
+
+     To accommodate larger libraries in memory, use more buffers, not
+     bigger buffers.
+
+     Once m_bufi.max_chain_seqs/max_work_buf and ave_seq_len are set,
+     allocate all the communication buffers with alloc_comp_bufs();
+  */
+
+  if (m_msg.ldb_info.ldnaseq== SEQT_DNA) {
+    ave_seq_len = AVE_NT_LEN;
+    m_bufi.max_chain_seqs = DEF_WORKER_BUF/AVE_NT_LEN;
+  }
+  else {
+    ave_seq_len = AVE_AA_LEN;
+    m_bufi.max_chain_seqs = DEF_WORKER_BUF/AVE_AA_LEN;
+  }
+
+  m_bufi.max_chain_seqs /= m_msg.thr_fact;
+  /* make certain all buffers have at least 600 sequences */
+  m_bufi.max_chain_seqs = max(m_bufi.max_chain_seqs,600);
+
+  /* max_work_buf is the number of buffers - if the worker buffers are
+     small, then make lots more buffers */
+
+#ifdef PCOMPLIB	/* PCOMPLIB -- one buffer per worker */
+  m_bufi.max_work_buf = fa_max_workers;
+#else	/* !PCOMPLIB */
+  m_bufi.max_work_buf = (DEF_WORKER_BUF * 2 * fa_max_workers)/(ave_seq_len * m_bufi.max_chain_seqs);
+  if (m_bufi.max_work_buf < 2*fa_max_workers) m_bufi.max_work_buf = 2*fa_max_workers;
+  if (m_bufi.max_work_buf > 4*fa_max_workers) m_bufi.max_work_buf = 4*fa_max_workers;
+  m_bufi.max_work_buf -= (m_bufi.max_work_buf%fa_max_workers);
+#ifndef COMP_THR
+  /* if not threaded, only one (larger) buffer */
+  m_bufi.max_chain_seqs *= m_bufi.max_work_buf;
+  m_bufi.max_work_buf = 1;
+#endif	/* !COMP_THR */
+#endif
+  m_bufi.max_buf2_res = m_bufi.max_chain_seqs * (m_msg.nitt1+1 - m_msg.revcomp);
+
+  /* allocate lib_buf2_lib[] and the associated data and results buffers,
+     as well as reader_buf[] and worker_buf[] */
+
+  lib_buf2_list = alloc_comp_bufs(&m_bufi, &m_msg, ave_seq_len);
+
+  /* initialization of global variables for threads/buffers */
+
+#if defined(COMP_THR) || defined(PCOMPLIB)
+#ifdef DEBUG
+  /*   fprintf(stderr," max_work_buf: %d\n", m_bufi.max_work_buf); */
+#endif
+  num_reader_bufs = m_bufi.max_work_buf;
+#endif
+
+#ifdef COMP_THR
+  num_worker_bufs = 0;
+  reader_done = 0;
+  worker_buf_workp = 0;
+  worker_buf_readp = 0;
+  reader_buf_workp = 0;
+  reader_buf_readp = 0;
+
+  start_thread = 1;	/* keeps threads from starting */
+#endif
+
+  /* Label the output */
+  if ((bp = (char *) strchr (m_msg.lname, ' ')) != NULL) *bp = '\0';
+  if (m_msg.ltitle[0] == '\0') {
+    strncpy(m_msg.ltitle,m_msg.lname,sizeof(m_msg.ltitle));
+    m_msg.ltitle[sizeof(m_msg.ltitle)-1]='\0';
+  }
+
+  if (m_msg.dfile[0]) {
+    fdata=fopen(m_msg.dfile,"w");
+    fprintf(fdata, "#%s\n",argv_line);
+  }
+
+  /* pre-load the library into a [m]seq_record array */
+  /* initialize outside while(1) { query loop } */
+  m_msg.db.length = 0l;
+  m_msg.db.entries = m_msg.db.carry = 0;
+
+  /* also sets ldb_info.l_overlap, use a fixed 150 residue overlap */
+  m_msg.ldb_info.maxn = maxn = reset_maxn(&m_msg, 150, m_msg.max_tot);
+  pst.maxlen = maxn;
+
+  seq_index = 0;
+
+  outfd = stdout;  
+
+  /* in comp_lib9.c, getlib_info saves all the state information
+     required to read the next sequence from the library. */
+  getlib_info = init_getlib_info(lib_list_p, m_msg.ldb_info.maxn,m_msg.max_memK);
+  m_msg.cur_seqr_cnt = 0;
+
+  /* main loop for doing a search, getting the next query */
+  while(1) {
+
+    /* Initialize bestp_arr */
+    for (nbest = 0; nbest < MAX_BEST; nbest++)
+      bestp_arr[nbest] = &best[nbest];
+    nbest = 0;
+
+    qlib++;
+    stats_done = 0;
+
+    zbestcut = -FLT_MAX;
+    nstats = nrstats = pre_nstats = shuff_tot = sstats = 0;
+
+    /* ensure that link_list is NULL for no result */
+    link_lib_list_p = NULL;	
+
+    /* get the last parameters */
+    last_params(aa0[0],m_msg.n0, &m_msg, &pst);
+
+    if (!validate_params(aa0[0],m_msg.n0, &m_msg, &pst,
+			 lascii, pascii)) {
+      fprintf(stderr," *** ERROR *** validate_params() failed:\n -- %s\n", argv_line);
+      exit(1);
+    }
+
+    /*  When approx. E()-scores are calculated (FASTS,FASTM), we still
+      need statistics structures; get them immediately.  In this case,
+      find_z() must produce a z_score (large positive is good) from an
+      e_score.  */
+
+    if (m_msg.escore_flg) {
+      pst.zsflag_f = process_hist(stats,nstats,&m_msg,&pst,
+				  &m_msg.hist,&m_msg.pstat_void,&m_msg.s_info,0);
+      stats_done=1;
+    }
+
+#ifdef COMP_THR    
+    init_thr(fa_max_workers, work_info, &m_msg, &pst, aa0[0], &m_bufi);
+#endif
+#ifdef PCOMPLIB
+    info_lib_range_p = &info_lib_range[0];
+    init_thr(fa_max_workers, info_lib_range_p, &m_msg, &pst, aa0[0], &m_bufi);
+#endif
+
+    /* always have qstats available; allocate space for shuffled query scores (if needed) */
+    if (m_msg.qshuffle && qstats==NULL) {
+      if ((qstats =
+	   (struct stat_str *)calloc(m_msg.shuff_max+1,sizeof(struct stat_str)))==NULL)
+	s_abort ("Cannot allocate qstats struct","");
+    }
+    nqstats = 0;
+
+    /* format query title */
+    leng = (int)strlen(m_msg.qtitle);
+    if (!(m_msg.markx & MX_M9SUMM) && leng > m_msg.aln.llen) leng -= 10;
+    if (leng > sizeof(m_msg.qtitle)-20) leng -= 20;
+
+    if (!(m_msg.markx & MX_MBLAST2)) {
+      if (m_msg.nm0 <= 1) {
+	sprintf(tmp_str," - %d %s", m_msg.n0, m_msg.sqnam);
+      }
+      else {
+	sprintf(tmp_str," - %d %s in %d fragments", m_msg.n0 - (m_msg.nm0-1), m_msg.sqnam, m_msg.nm0);
+      }
+
+      if (strlen(tmp_str) + leng + 1> sizeof(m_msg.qtitle)) {
+	leng = sizeof(m_msg.qtitle) - strlen(tmp_str) - 1;
+	SAFE_STRNCAT((m_msg.qtitle+leng), tmp_str, sizeof(m_msg.qtitle));
+      }
+      else {SAFE_STRNCAT(m_msg.qtitle, tmp_str, sizeof(m_msg.qtitle));}
+    }
+    if (fdata) {
+      fprintf(fdata,">>>%ld %3d\t%-50s\n",qtt.entries,m_msg.n0,m_msg.qtitle);
+    }
+
+    tprev = s_time();
+
+    qtt.length += m_msg.n0;
+    qtt.entries++;
+
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+    have_f_str=1;
+
+    /* allocates aa0 arrays, calls init_work, only for non-threaded */
+    init_aa0(aa0, m_msg.n0, m_msg.nm0, &aa0s, &aa1shuff,
+	     m_msg.qframe, m_msg.qshuffle, m_msg.max_tot,
+	     &pst,  &f_str[0], &qf_str, rand_state);
+    aa1shuff_b = aa1shuff-1;
+
+    /* label library size limits -- must be called after init_aa0(),
+       which calls init_work(), which can reset the n1_high/n1_low
+       limits */
+    if (pst.n1_low > 0 && pst.n1_high < BIGNUM) 
+      sprintf(info_lib_range," (range: %d-%d)",pst.n1_low,pst.n1_high);
+    else if (pst.n1_low > 0) 
+      sprintf(info_lib_range," (range: >%d)",pst.n1_low);
+    else if (pst.n1_high < BIGNUM)
+      sprintf(info_lib_range," (range: <%d)",pst.n1_high);
+    else
+      info_lib_range[0]='\0';
+    info_lib_range[sizeof(info_lib_range)-1]='\0';
+    info_lib_range_p = info_lib_range;
+#endif
+
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+    lib_bhead_p = lib_buf2_list;	/* equivalent to un-threaded get_rbuf() */
+#else	/* COMP_THR/PCOMPLIB */
+#ifndef PCOMPLIB
+    start_thr();
+#endif
+    /* now open the library and start reading */
+    /* get a buffer and fill it up */
+    get_rbuf(&lib_bhead_p,m_bufi.max_work_buf);
+#endif
+
+    /* **************************************************************** */
+    /* do the sequence comparison library scan                          */
+    /* **************************************************************** */
+    seqr_chain_work(aa0, aa0s, lib_bhead_p, getlib_info,  &m_bufi, &m_msg, &pst,
+		    &m_msg.pstat_void, &m_msg.ldb, &m_msg.hist, &m_msg.s_info,
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+		    aa1shuff, f_str, qf_str,
+#endif
+		    best_seqs, best_mseqs, best, fdata);
+
+    /* the initial search is done, do statistics, possible expansion,
+       and alignments */
+
+#if defined(COMP_THR) && !defined(PCOMPLIB)
+    info_lib_range_p = work_info[0].info_lib_range;
+#endif
+
+    m_msg.nbr_seq = m_msg.db.entries;
+    get_param(&pst, info_gstring2p,info_gstring3, &m_msg.s_info);
+
+    /* *************************** */
+    /* analyze the last results    */
+    /* *************************** */
+    
+#ifndef SAMP_STATS
+    if (!stats_done && nstats > 0) {
+#endif
+      /* we ALWAYS do this if SAMP_STATS, because the statistics may have changed */
+      /* the new incremental sampling produces an nstats that is much
+	 too large */
+
+      zsflag_save = pst.zsflag;
+      if (pst.zsflag > 20) {
+	pst.zsflag -= 20;
+      }
+      pst.zsflag_f = process_hist(stats,nstats,&m_msg, &pst,&m_msg.hist,
+				  &m_msg.pstat_void, &m_msg.s_info, stats_done);
+      pst.zsflag = zsflag_save;
+
+      if (m_msg.pstat_void != NULL) {
+	stats_done = 1;
+	for (i = 0; i < nbest; i++) {
+	  bestp_arr[i]->zscore =
+	    find_z(bestp_arr[i]->rst.score[pst.score_ix],
+		   bestp_arr[i]->rst.escore, bestp_arr[i]->seq->n1, 
+		   bestp_arr[i]->rst.comp, m_msg.pstat_void);
+	}
+#ifndef SAMP_STATS
+      }
+      else pst.zsflag = -1;
+#endif
+    }
+
+    /* **************************************************************** */
+    /* do shuffles if too few library sequences or for second estimate  */
+    /* **************************************************************** */
+
+    /* if there are not many scores, produce better statistics by shuffling */
+    /* but only if statistics are enabled (7-July-2008) */
+    if (pst.zsflag != 3 && pst.zsflag > -1 && 
+	nbest > 0 && nbest < m_msg.shuff_max) {
+
+      buf_shuf_seq(aa0, m_msg.n0, &aa1shuff_b, aa1save, maxn,
+		   bestp_arr, nbest, &pst, &m_msg, &m_bufi
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+		   , f_str
+#endif
+		   , &m_msg.ss_info);
+
+      /* (4) analyze rstats */
+      if (pst.zsflag < 10) pst.zsflag += 10;
+      if (pst.zsflag > 20) pst.zsflag -= 10;
+      pst.zsflag_f = process_hist(rstats,nrstats,&m_msg, &pst,&m_msg.hist,
+				  &m_msg.pstat_void,&m_msg.ss_info,0);
+    }
+
+    /* **************************************************************** */
+    /* done with shuffling for small sample size                        */
+    /* **************************************************************** */
+
+    if (!pst.zdb_size_set) pst.zdb_size = m_msg.ldb.entries;
+
+#if defined(COMP_THR) || defined(PCOMPLIB)
+    /* before I call last_calc/showbest/showalign, I need init_work() to
+       get an f_str. This duplicates some code above, which is used in
+       the non-threaded version.
+
+       I have tried to get an f_str from one of the threads, but on
+       some architectures, that f_str is not available to the main thread.
+    */
+
+    if (!have_f_str) {
+      init_work(aa0[0],m_msg.n0,&pst,&f_str[0]);
+      /* f_str[0] = work_info[0].f_str_ap[0]; */
+      have_f_str = 1;
+      f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] =  f_str[0];
+
+      if (m_msg.qframe == 2) {
+	if ((aa0[1]=(unsigned char *)calloc((size_t)m_msg.n0+2+SEQ_PAD,
+					    sizeof(unsigned char)))==NULL) {
+	  fprintf(stderr," cannot allocate aa0[1][%d] for alignments\n",
+		  m_msg.n0+2+SEQ_PAD);
+	}
+	*aa0[1]='\0';
+	aa0[1]++;
+	memcpy(aa0[1],aa0[0],m_msg.n0+1);
+	/* for ALTIVEC/SSE2, must pad with 16 NULL's, but not necessary after calloc() */
+	for (id=0; id<SEQ_PAD; id++) {aa0[1][m_msg.n0+id]=0;}
+
+	revcomp(aa0[1],m_msg.n0,&pst.c_nt[0]);
+	init_work(aa0[1],m_msg.n0,&pst,&f_str[1]);
+	/* f_str[1] = work_info[0].f_str_ap[1]; */
+      }
+    }
+#endif
+
+    /* now we have one set of scaled scores for in bestp_arr  -
+       for FASTS/F, we need to do some additional processing */
+
+    if (!m_msg.qshuffle) {
+      last_stats(aa0[0], m_msg.n0, stats,nstats, bestp_arr,nbest,
+		 &m_msg, &pst, &m_msg.hist, &m_msg.pstat_void);
+    }
+    else {
+      last_stats(aa0[0], m_msg.n0,
+		 qstats,nqstats, bestp_arr,nbest, &m_msg, &pst, 
+		 &m_msg.hist, &m_msg.pstat_void);
+    }
+
+    /* here is a contradiction: if pst.zsflag < 0, then m_msg.pstat_void
+       should be NULL; if it is not, then process_hist() has been called */
+    if (pst.zsflag < 0 && m_msg.pstat_void != NULL) pst.zsflag = 1;
+
+    if (m_msg.last_calc_flg) {
+      /* last_calc may need coefficients from last_stats() */
+      nbest = last_calc(aa0, aa1save, maxn, bestp_arr, nbest, &m_msg, &pst,
+			f_str, m_msg.pstat_void);
+    }
+
+    /* in addition to scaling scores, this sorts bestp_arr[nbest] */
+    scale_scores(bestp_arr,nbest,m_msg.db, &pst,m_msg.pstat_void);
+
+#ifdef DEBUG
+      /* check for bestp_arr corruption */
+      for (i=0; i<nbest; i++) {
+	if (bestp_arr[i]->n1 != bestp_arr[i]->seq->n1) {
+	  fprintf(stderr," *** [comp_lib9.c:1120] *** n1 conflict[%d]: n1: %d != seq->n1: %d\n",
+		  i, bestp_arr[i]->n1, bestp_arr[i]->seq->n1);
+	}
+      }
+#endif
+
+    /* For large databases, we have good zscores for all the MAX_BEST
+       sequences.  Thus, we can sort the scores, and get a list of all
+       sequences with scores better than the E() threshold.  If zsflag
+       > 20, then we should shuffle those guys to get an alternative
+       estimate of lambda and K */
+    
+    if (pst.zsflag > 20 && nbest >= m_msg.shuff_max) {
+      n_sig = nbest;
+      for (i=0; i<nbest; i++) {
+	if (bestp_arr[i]->rst.escore > pst.e_cut) {
+	  n_sig = i;
+	  break;
+	}
+      }
+
+      /* if there are no significant hits, shuffle the top 10 */
+      if (n_sig < 10) n_sig = 10;
+
+      /* check to see how many significant sequences there are, and
+	 ensure that every sequence is shuffled at least 5 times */
+      if (n_sig * 5 > m_msg.shuff_max) {
+	m_msg.shuff_max = n_sig*5;
+	if (m_msg.shuff_max > MAX_BEST/2) m_msg.shuff_max = MAX_BEST/2;
+	if ((rstats = (struct stat_str *)realloc(rstats, m_msg.shuff_max * sizeof(struct stat_str)))==NULL) {
+	  fprintf(stderr, " *** Cannot reallocate rstats[%d] ***\n",m_msg.shuff_max);
+	  exit(1);
+	}
+      }
+
+      buf_shuf_seq(aa0, m_msg.n0, &aa1shuff_b, aa1save, maxn,
+		   bestp_arr, n_sig, &pst, &m_msg, &m_bufi
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+		   , f_str
+#endif
+		   , &m_msg.ss_info);
+
+      zs_off_save = pst.zs_off;
+      /* ensure that hist2.hist_a is initialized properly */
+      hist2.hist_a = NULL;
+      pst.zsflag_f = process_hist(rstats,nrstats,&m_msg, &pst,&hist2,
+				  &m_msg.pstat_void2,&m_msg.ss_info, 0);
+      pst.zs_off = zs_off_save;
+
+      for (i=0; i<nbest; i++) {
+	bestp_arr[i]->zscore2 =
+	  find_z(bestp_arr[i]->rst.score[pst.score_ix],
+		 bestp_arr[i]->rst.escore, bestp_arr[i]->seq->n1,
+		 bestp_arr[i]->rst.comp,m_msg.pstat_void2);
+      }
+    }
+
+    get_param(&pst, info_gstring2p,info_gstring3, &m_msg.s_info);
+
+    /* **************************************************************** */
+    /* label Library: output                                            */
+    /* **************************************************************** */
+
+    tscan = s_time();
+
+    /* get query annotations here, before print_header2() */
+    if (m_msg.annot0_sname[0]) {
+      if (get_annot(m_msg.annot0_sname, &m_msg, m_msg.qtitle, m_msg.q_offset+m_msg.q_off-1,m_msg.n0, &m_msg.annot_p, 0, pst.debug_lib) < 0) {
+	fprintf(stderr,"*** error [%s:%d] - %s did not produce annotations\n",__FILE__, __LINE__, m_msg.annot0_sname);
+	m_msg.annot0_sname[0] = '\0';
+      }
+      if (m_msg.annot_p && m_msg.annot_p->n_annot > 0) {
+	m_msg.aa0a = m_msg.annot_p->aa1_ann;
+      }
+      if (!m_msg.ann_arr[0]) {m_msg.ann_arr[0] = ' '; m_msg.ann_arr[1] = '\0';}
+    }
+
+    /* header2 print Query:, Annotation:, Library: */
+    print_header2(stdout, qlib, info_qlabel, aa0, &m_msg, &pst, info_lib_range_p);
+
+    if (m_msg.std_output) {
+      prhist (stdout, &m_msg, &pst, m_msg.hist, nstats, sstats, m_msg.ldb,
+	      (pst.zsflag > 20? hist2.stat_info:NULL),info_lib_range_p,
+	      info_gstring2p, info_hstring_p, tscan-tprev) ;
+
+      /* print annot header if it cannot be changed by script */
+      if (!(m_msg.markx & (MX_MBLAST2+MX_M8OUT)) && 
+	  !(m_msg.annot1_sname[0] || m_msg.annot0_sname[0]))
+	print_annot_header(stdout, &m_msg);
+    }
+#ifdef COMP_MLIB
+    ttscan += tscan-tprev;
+#endif
+
+    /* check to see if there are alternate output files */
+  l3:
+    if (!m_msg.quiet) {
+      printf("Enter filename for results [%s]: ", m_msg.outfile);
+      fflush(stdout);
+      rline[0]='\0';
+      if (fgets(rline,sizeof(rline),stdin)==NULL) goto end_l;
+      else {	/* parse rline input */
+	if ((bp=strchr(rline,'\n'))!=NULL) *bp = '\0';
+	if (rline[0]!='\0') strncpy(m_msg.outfile,rline,sizeof(m_msg.outfile));
+      }
+    }
+
+    if (m_msg.outfile[0]!='\0') {	/* have an output file name */
+      if ((outfd=fopen(m_msg.outfile,"w"))==NULL) {
+	fprintf(stderr," could not open %s\n",m_msg.outfile);
+	if (!m_msg.quiet) goto l3;
+	else goto l4;		/* skip output file */
+      }
+
+      if (m_msg.markx_list==NULL) {	/* no -m 9 options, need one */
+	if ((m_msg.markx_list = (struct markx_str *)calloc(1,sizeof(struct markx_str)))==NULL) {
+	  fprintf(stderr,"cannot allocate m_msg.markx_list\n");
+	  goto l4;
+	}
+	else {
+	  m_msp_to_markx(m_msg.markx_list, &m_msg);
+	}
+      }
+      m_msg.markx_list->out_file = m_msg.outfile;
+      m_msg.markx_list->out_fd = m_msg.outfd = outfd;
+      /* save permanent m_msg marks info */
+    }
+      
+  l4:   
+    m_msp_to_markx(&markx_save, &m_msg);
+    /* for each entry in markx_list */
+    for (cur_markx = m_msg.markx_list; cur_markx;
+	 cur_markx = cur_markx->next) {
+
+      if (cur_markx->out_file && cur_markx->out_file[0] && cur_markx->out_fd == NULL) {
+	if ((cur_markx->out_fd=fopen(cur_markx->out_file,"w"))==NULL) {
+	  fprintf(stderr," could not open %s\n",cur_markx->out_file);
+	}
+      }
+      if (cur_markx->out_fd == NULL) continue;
+
+      markx_to_m_msp(&m_msg, cur_markx);
+
+      if (qlib==1) print_header1(cur_markx->out_fd, argv_line, &m_msg, &pst);
+      print_header2(cur_markx->out_fd, qlib, info_qlabel, aa0, &m_msg, &pst, info_lib_range_p);
+
+      if (m_msg.std_output) {
+	prhist(cur_markx->out_fd, &m_msg, &pst,m_msg.hist, nstats, sstats, m_msg.db,
+	       (pst.zsflag > 20? hist2.stat_info:NULL), info_lib_range_p,
+	       info_gstring2p, info_hstring_p, tscan-tprev);
+
+	/* print annot header if it cannot be changed by script */
+	if (!(m_msg.markx & (MX_MBLAST2+MX_M8OUT)) &&
+	    !(m_msg.annot1_sname[0] || m_msg.annot0_sname[0]))
+	  print_annot_header(cur_markx->out_fd, &m_msg);
+      }
+
+      /* done with this output, restore m_msg */
+    }
+    markx_to_m_msp(&m_msg,&markx_save);
+
+    /* find the lowest scoring alignment to be displayed */
+
+    /* m_msg.nshow always provides the number of alignments to be
+       displayed in quiet mode */
+
+    /* skip entries if -F e_low specified */
+    if (pst.zsflag >= 0) {
+      for (i=0; i<nbest && bestp_arr[i]->rst.escore < m_msg.e_low; i++) {};
+      m_msg.nskip = i;
+    }
+    else {
+      m_msg.nskip = 0;
+    }
+
+    if (m_msg.quiet || m_msg.tot_markx & MX_M9SUMM) {
+
+      /* to determine how many sequences to re-align (either for
+	 do_opt() or calc_id() we need to set m_msg.nshow based on
+	 e_cut, m_msg.mshow to display the correct number of
+	 alignments */
+
+      /* the logic in this section is:
+	 -b '$' - show all the results in the database after -F e_top
+	 -b "123" - show min(123, e_cut_count)
+	 -b "=123" - show min(123,nbest)
+	 -b ">123" - show max(123,e_cut_count)
+	 m_msg.mshow_set==1 -- has a value been entered "-b 123", "-b =123", "-b >123"
+	 m_msg.mshow_min==1  -- the mshow value is the mininum number to display, set with "-b =,>123"
+      */
+
+      if (pst.zsflag >= 0) {	/* do we have E()-values? */
+
+	for (i=m_msg.nskip; i<nbest && bestp_arr[i]->rst.escore < m_msg.e_cut; i++) {}
+
+	if (m_msg.mshow_set <= 0) {	/* no -b 123 */
+	  m_msg.nshow = min(i - m_msg.nskip, nbest-m_msg.nskip);
+	}
+	else {	/* mshow_set>0 */
+	  if (m_msg.mshow_min == 1) {	/* -b '=123" must show exactly m_msg.mshow results */
+	    m_msg.nshow = m_msg.mshow;
+	  }
+	  else if (m_msg.mshow_min == 2 ) {	/* -b '>123' show 123 unless e_cut > 123 */
+	    m_msg.nshow = max(m_msg.mshow, i-m_msg.nskip);
+	  }
+	  else {
+	    /* -b '123' show no more than 123, limited by e_cut */
+	    if (m_msg.mshow >= 0) m_msg.nshow = min(m_msg.mshow, i-m_msg.nskip);
+	    else { m_msg.nshow = nbest-m_msg.nskip;}	/* -b '$' sets m_msg.mshow == -1 */
+	  }
+	}
+      }
+      else {	/* we do not have E()-values */
+	if (m_msg.mshow >= 0) { m_msg.nshow = min(m_msg.mshow,nbest); }
+	else { m_msg.nshow = nbest;}
+      }
+
+      if (m_msg.nshow <= 0) { /* no results to display */
+	if (m_msg.std_output) fprintf(outfd,"!! No sequences with E() < %0.5g\n",m_msg.e_cut);
+	m_msg.nshow = 0;
+	goto end_l;
+      }
+    }
+
+    m_msg.pre_load_done = 0;
+
+    /* ******************************************************************/
+    /* check for additional expansion sequences via link script         */
+    /* **************************************************************** */
+    if (m_msg.link_lname[0]) {
+      /* guarantee that we have the bline's for the best sequences */
+      pre_load_best(aa1save, maxn, &bestp_arr[m_msg.nskip], m_msg.nshow, &m_msg, pst.debug_lib);
+
+      if ((link_lib_str = build_link_data(&link_lib_file, &m_msg, bestp_arr,pst.debug_lib))==NULL) {
+	goto no_links;
+      };
+
+      /* get a list of files */
+      link_lib_list_p = lib_select(link_lib_str, link_title, m_msg.flstr, m_msg.ldb_info.ldnaseq);
+      if (link_lib_str != NULL) free(link_lib_str);
+
+      link_getlib_info = init_getlib_info(link_lib_list_p, m_msg.ldb_info.maxn,m_msg.max_memK/16);
+      m_msg.cur_seqr_cnt = 0;
+
+      /* before searching with another getlib_info, we need to be
+	 certain that all the best_str entries that will be used after
+	 the link search have been saved (only necessary if buffers
+	 may be reused) */
+      if (getlib_info->use_memory <= 0 && getlib_info->lib_list_p->m_file_p->get_mmap_chain==NULL) {
+	for (i=0; i < m_msg.nshow; i++) {
+	  if (bestp_arr[i]->seq->aa1b != NULL) {
+	    preserve_seq2(bestp_arr[i],best_seqs, best_mseqs, best);
+	    if (bestp_arr[i]->n1 != bestp_arr[i]->seq->n1) {
+	      fprintf(stderr,"[*** comp_lib9.c:1334***] -n1:%d != seq->n1:%d\n",
+		      bestp_arr[i]->n1, bestp_arr[i]->seq->n1);
+	    }
+	  }
+	}
+      }
+
+      /* calculate scores for the sequences */
+#if defined(COMP_THR) || defined(PCOMPLIB)
+    for (i=0; i<m_bufi.max_work_buf; i++) {
+      memset(lib_buf2_list[i].buf2_data,0,
+	     (size_t)(m_bufi.max_buf2_res+1)*sizeof(struct buf2_data_s));
+      lib_buf2_list[i].hdr.buf2_cnt=
+	lib_buf2_list[i].hdr.have_results=
+	lib_buf2_list[i].hdr.have_best_save = 
+	lib_buf2_list[i].hdr.aa1b_used = 0;
+    }
+    num_reader_bufs = m_bufi.max_work_buf;
+#endif
+
+#if defined(COMP_THR) || defined(PCOMPLIB)
+      get_rbuf(&lib_bhead_p,m_bufi.max_work_buf);
+#else
+      lib_bhead_p = lib_buf2_list;
+#endif
+
+      /* **************************************************************** */
+      /* do the sequence comparison scan on the expanded sequences        */
+      /* **************************************************************** */
+      seqr_chain_work(aa0, aa0s, lib_bhead_p, link_getlib_info,
+		      &m_bufi, &m_msg, &pst, &m_msg.pstat_void,
+		      &link_ldb, NULL, &link_s_info, 
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+		      aa1shuff, f_str, qf_str,
+#endif
+		      best_seqs, best_mseqs, best, NULL);
+
+      m_msg.pre_load_done = 0;	/* pre-load has not been done for link library sequences */
+
+#ifdef DEBUG
+      /* check for bestp_arr corruption */
+      for (i=0; i<m_msg.nshow; i++) {
+	if (bestp_arr[i]->n1 != bestp_arr[i]->seq->n1) {
+	  fprintf(stderr," *** [comp_lib9.c:1377] *** n1 conflict[%d]: n1: %d != seq->n1: %d\n",
+		  i, bestp_arr[i]->n1, bestp_arr[i]->seq->n1);
+	}
+      }
+#endif
+      /* need to resort results, and re-check how many should be displayed */
+      scale_scores(bestp_arr,nbest,m_msg.db, &pst,m_msg.pstat_void);
+
+#ifdef DEBUG
+      /* check for bestp_arr corruption */
+      for (i=0; i<m_msg.nshow; i++) {
+	if (bestp_arr[i]->n1 != bestp_arr[i]->seq->n1) {
+	  fprintf(stderr," *** [comp_lib9.c:1389] *** n1 conflict[%d]: n1: %d != seq->n1: %d\n",
+		  i, bestp_arr[i]->n1, bestp_arr[i]->seq->n1);
+	}
+      }
+#endif
+
+      if (pst.zsflag >= 0) {
+	/* skip entries if -F e_low specified */
+	for (i=0; i<nbest && bestp_arr[i]->rst.escore < m_msg.e_low; i++) {};
+	m_msg.nskip = i;
+      }
+      else {
+	/* no statistics, just use the same score */
+	m_msg.nskip = 0;
+      }
+
+      if (m_msg.quiet || m_msg.tot_markx & MX_M9SUMM) {
+
+	/* to determine how many sequences to re-align (either for
+	   do_opt() or calc_id() we need to modify m_msg.mshow to get
+	   the correct number of alignments */
+
+	if (pst.zsflag >= 0) {	/* do we have e_values? */
+	  for (i=m_msg.nskip; i<nbest && bestp_arr[i]->rst.escore < m_msg.e_cut; i++) {}
+
+	  if (m_msg.mshow_set != 1) {
+	    m_msg.nshow = min(i - m_msg.nskip, nbest-m_msg.nskip);
+	  }
+	  else {
+	    if (m_msg.mshow_min) {	/* must show at least m_msg.mshow results */
+	      m_msg.nshow = max(m_msg.mshow, i-m_msg.nskip);
+	    }
+	    else {	/* limit by e_cut */
+	      m_msg.nshow = min(m_msg.mshow, i-m_msg.nskip);
+	    }
+	  }
+	}
+      }
+    }
+    /* done with -e link_lname */
+
+  no_links:
+    /* **************************************************************** */
+    /* if we need alignment info now, pre-load and pre-calculate it     */
+    /* **************************************************************** */
+    /* the list of conditions is greatly expanded to allow for variant scores */
+    if (m_msg.quiet && 
+	((m_msg.stages > 1) || 
+	 m_msg.annot0_sname[0] || m_msg.annot1_sname[0] ||
+	 (m_msg.tot_markx & (MX_M9SUMM + MX_MBLAST + MX_MBLAST2 + MX_M8OUT + MX_M8COMMENT)))) {
+
+      /* pre-load sequence data for  alignments for showbest, showalign */
+      pre_load_best(aa1save, maxn, &bestp_arr[m_msg.nskip], m_msg.nshow, &m_msg, pst.debug_lib);
+
+      /* must calculate repeat_thresh before buf_align_seq */
+      if (pst.do_rep) {
+	if (pst.zsflag >= 0) {
+	  for (i=m_msg.nskip; i < m_msg.nskip + m_msg.nshow;  i++) {
+	    bestp_arr[i]->repeat_thresh = 
+	      min(E1_to_s(pst.e_cut_r, m_msg.n0, bestp_arr[i]->seq->n1,
+			  pst.zdb_size, m_msg.pstat_void),bestp_arr[i]->rst.score[pst.score_ix]);
+	  }
+	}
+	else {
+	  for (i=m_msg.nskip; i < nbest; i++) {
+	    bestp_arr[i]->repeat_thresh = bestp_arr[i]->rst.score[pst.score_ix];
+	  }
+	}
+      }
+
+      buf_align_seq(aa0, m_msg.n0, &bestp_arr[m_msg.nskip], m_msg.nshow,
+		    &pst, &m_msg, &m_bufi
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+		    , f_str
+#endif
+		    );
+
+      /* buf_align_seq can produce higher a_res->rst.score[]s, partly
+	 because of variation but also because of a wider optimization
+	 band.  In earlier versions, the original rst.score was always
+	 displayed and sorted on.  With variant re-scoring, the
+	 alignment score can improve the rst.score, so the rst.score
+	 must be updated, and the scores re-sorted */
+
+      for (i=m_msg.nskip; i < m_msg.nskip + m_msg.nshow;  i++) {
+	bbp = bestp_arr[i];
+	if (bbp->a_res == NULL) continue;
+	if (bbp->a_res->score_delta > 0) {
+	  bbp->rst.score[0] += bbp->a_res->score_delta;
+	  bbp->rst.score[1] += bbp->a_res->score_delta;
+	  bbp->rst.score[2] += bbp->a_res->score_delta;
+	}
+      }
+      scale_scores(&bestp_arr[m_msg.nskip],m_msg.nshow,m_msg.db, &pst,m_msg.pstat_void);
+    }
+
+    if (!(m_msg.markx & (MX_MBLAST2+MX_M8OUT)) &&
+	(m_msg.annot1_sname[0] || m_msg.annot0_sname[0])) print_annot_header(stdout, &m_msg);
+
+
+    /* **************************************************************** */
+    /* BLAST header ouput: Database:\n %ld sequences; %ld total letters */
+    /*                     Query: %s length=%d                          */
+    /* **************************************************************** */
+    print_header3(stdout, qlib, &m_msg, &pst);
+
+    showbest(stdout, aa0, aa1save, maxn, &bestp_arr[m_msg.nskip], nbest-m_msg.nskip,
+	     qtt.entries, &m_msg, &pst,m_msg.db, info_gstring2p, f_str);
+
+    m_msp_to_markx(&markx_save, &m_msg);
+    t_quiet = m_msg.quiet;
+    m_msg.quiet = -1;	/* should guarantee 1..m_msg.nshow shown */
+
+    /* set copies of showbest to alternative files */
+    for (cur_markx = m_msg.markx_list; cur_markx; cur_markx = cur_markx->next) {
+      if (cur_markx->out_fd == NULL) continue;
+      markx_to_m_msp(&m_msg, cur_markx);
+      if (!(m_msg.markx & (MX_MBLAST2+MX_M8OUT)) && 
+	  (m_msg.annot1_sname[0] || m_msg.annot0_sname[0])) 
+	print_annot_header(cur_markx->out_fd, &m_msg);
+      print_header3(cur_markx->out_fd, qlib, &m_msg, &pst);
+      showbest(cur_markx->out_fd, aa0, aa1save, maxn, &bestp_arr[m_msg.nskip], nbest-m_msg.nskip,
+	       qtt.entries, &m_msg, &pst, m_msg.db, info_gstring2p, f_str);
+    }
+    m_msg.quiet = t_quiet;
+    markx_to_m_msp(&m_msg, &markx_save);
+
+    /* m_msg.ashow can be -1 or > 0 to show results */
+    if (m_msg.nshow > 0 && m_msg.ashow != 0) {
+
+      rline[0]='N';
+      if (!m_msg.quiet){
+	printf(" Display alignments also? (y/n) [n] "); fflush(stdout);
+	if (fgets(rline,sizeof(rline),stdin)==NULL) goto end_l;
+      }
+      else rline[0]='Y';
+
+      if (toupper((int)rline[0])=='Y') {
+	if (!m_msg.quiet && m_msg.do_showbest) {
+	  printf(" number of alignments [%d]? ",m_msg.nshow);
+	  fflush(stdout);
+	  if (fgets(rline,sizeof(rline),stdin)==NULL) goto end_l;
+	  if (rline[0]!=0) sscanf(rline,"%d",&utmp);
+	  if (utmp == 0) utmp = -1;
+	  m_msg.ashow=min(utmp,m_msg.nshow);
+	}
+      }
+
+      /* **************************************************************** */
+      /* print_header4() : showalign alignment transition                 */
+      /* >>>query vs library for -m 9, -m 10                              */
+      /*  "\n" for MX_MBLAST                                              */
+      /* ; pg_name, other info for -m 10                                  */
+      /*  not sent to stdout if outfile specified                         */
+      /* **************************************************************** */
+      print_header4(outfd, info_qlabel, argv_line, info_gstring3, info_hstring_p, &m_msg, &pst);
+      showalign (outfd, aa0, aa1save, maxn,
+		 &bestp_arr[m_msg.nskip], nbest-m_msg.nskip,
+		 qtt.entries, &m_msg, &pst, info_gstring2p, f_str, &m_bufi);
+
+      fflush(outfd);
+    }
+
+    m_msp_to_markx(&markx_save, &m_msg);
+    for (cur_markx = m_msg.markx_list; cur_markx; cur_markx=cur_markx->next) {
+      if (cur_markx->out_fd == NULL) continue;
+      if (cur_markx->out_fd == outfd) continue;
+      markx_to_m_msp(&m_msg, cur_markx);
+      print_header4(cur_markx->out_fd, info_qlabel, argv_line, info_gstring3, info_hstring_p, &m_msg, &pst);
+      showalign (cur_markx->out_fd, aa0, aa1save, maxn,
+		 &bestp_arr[m_msg.nskip], nbest-m_msg.nskip,
+		 qtt.entries, &m_msg, &pst, info_gstring2p, f_str, &m_bufi);
+      fflush(cur_markx->out_fd);
+    }
+    markx_to_m_msp(&m_msg, &markx_save);
+
+  end_l:
+
+    if (m_msg.nshow==0 &&  m_msg.markx & MX_M8COMMENT) {
+      fprintf(outfd,"# %d hits found\n",m_msg.nshow);
+    }
+
+    /* print >>><<< for correct -m 9 */
+    print_header4a(outfd, &m_msg);
+    for (cur_markx = m_msg.markx_list; cur_markx; cur_markx=cur_markx->next) {
+      if (cur_markx->out_fd == NULL) continue;
+      if (cur_markx->out_fd == outfd) continue;
+      if (m_msg.nshow==0 &&  cur_markx->markx & MX_M8COMMENT) {
+        fprintf(cur_markx->out_fd,"# %d hits found\n",m_msg.nshow);
+      }
+      print_header4a(cur_markx->out_fd, &m_msg);
+    }
+
+    /* display info, statistics parameters for fdata res file */
+    if (fdata) {
+      fprintf(fdata,"#Algorithm : %s\n",info_gstring2p[0]);
+      fprintf(fdata,"#Parameters : %s\n",info_gstring2p[1]);
+      fprintf(fdata,"#Query: %3ld>>>%-50s\n",qtt.entries-1,m_msg.qtitle);
+      pstat_info(fdata_pstat_info, sizeof(fdata_pstat_info), "#Stat:",m_msg.pstat_void);
+      fputs(fdata_pstat_info,fdata);
+      fflush(fdata);
+    }
+    
+#if defined(COMP_THR) || defined(PCOMPLIB)
+    rbuf_done(fa_max_workers);
+#endif
+
+    /* **************************************************************** */
+    /* completely finished with previous query                          */
+    /* **************************************************************** */
+
+    /* clean up/reinitialize the threads buffers */
+#if defined(COMP_THR) || defined(PCOMPLIB)
+    for (i=0; i<m_bufi.max_work_buf; i++) {
+      lib_buf2_list[i].hdr.buf2_cnt=
+	lib_buf2_list[i].hdr.have_results=
+	lib_buf2_list[i].hdr.have_best_save = 
+	lib_buf2_list[i].hdr.aa1b_used = 0;
+    }
+
+    num_reader_bufs = m_bufi.max_work_buf;
+#endif
+
+#if defined(COMP_THR)
+    num_worker_bufs = 0;
+    reader_done = 0;
+    reader_wait = 1;
+    worker_buf_workp = 0;
+    worker_buf_readp = 0;
+    reader_buf_workp = 0;
+    reader_buf_readp = 0;
+
+    start_thread = 1;	/* stop thread from starting again */
+#endif
+
+    /* clean up best_seqs */
+    memset(best_seqs,0,(MAX_BEST+1)*sizeof(struct seq_record));
+
+    /* re-initialize lib_buf2_list buffers */
+    for (lib_bhead_p = lib_buf2_list; 
+	 lib_bhead_p < lib_buf2_list+m_bufi.max_work_buf; lib_bhead_p++) {
+
+      /* this wipes out lib_bhead_p->hdr.buf2[0].seq, .mseq */
+      memset(lib_bhead_p->buf2_data,0,(size_t)(m_bufi.max_buf2_res+1)*sizeof(struct buf2_data_s));
+      /* replace it */
+      lib_bhead_p->hdr.have_results = 0;
+    }
+
+    /* re-initialize library counts */
+    m_msg.ldb.length = 0l;
+    m_msg.ldb.entries = m_msg.ldb.carry = 0;
+
+    /* **************************************************************** */
+    /* free allocated alignment encodings associated with bestp_arr[]   */
+    /* needs to deallocate aln_code, ann_code                           */
+    /* bestp_arr[i]->a_res is a pointer, so it must be free()'d         */
+    /* **************************************************************** */
+    for (i=m_msg.nskip; i < m_msg.nskip+m_msg.nshow; i++) {
+      if (bestp_arr[i]->have_ares & 0x2) {
+	cur_ares_p = bestp_arr[i]->a_res;
+	while (cur_ares_p) {
+	  if (cur_ares_p->aln_code) free(cur_ares_p->aln_code);
+	  if (cur_ares_p->annot_code) free(cur_ares_p->annot_code);
+	  if (cur_ares_p->annot_var_s) free(cur_ares_p->annot_var_s);
+	  if (cur_ares_p->annot_var_id) free(cur_ares_p->annot_var_id);
+	  if (cur_ares_p->annot_var_idd) free(cur_ares_p->annot_var_idd);
+	  if (bestp_arr[i]->have_ares & 0x1 && cur_ares_p->res) free(cur_ares_p->res);
+	  next_ares_p = cur_ares_p->next;
+	  free(cur_ares_p);
+	  cur_ares_p = next_ares_p;
+	}
+	bestp_arr[i]->a_res = NULL;
+      }
+      bestp_arr[i]->have_ares = 0;
+      bestp_arr[i]->mseq->bline = NULL;
+      bestp_arr[i]->mseq->bline_max = 0;
+    }
+
+#ifdef DEBUG
+    /* check to see if there are ANY un-reset have_ares */
+    for (i=0; i< nbest; i++) {
+      if (bestp_arr[i]->have_ares) {
+	fprintf(stderr," Un-reset have_ares[%d]: %d\n",i,bestp_arr[i]->have_ares);
+	bestp_arr[i]->have_ares = 0;
+      }
+    }
+#endif
+
+    /* reset align_done flag for next search */
+    m_msg.align_done = 0;
+
+    if (m_msg.qframe == 2) free(aa0[1]-1);
+
+    if (have_f_str) {
+      have_f_str = 0;
+      if (f_str[1]!=f_str[0]) {
+	close_work (aa0[1], m_msg.n0, &pst, &f_str[1]);
+      }
+      close_work (aa0[0], m_msg.n0, &pst, &f_str[0]);
+    }
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+    if (m_msg.qshuffle) close_work (aa0s, m_msg.n0, &pst, &qf_str);
+#endif
+    if (pst.pam_pssm) {
+      free_pam2p(pst.pam2p[0]);
+      free_pam2p(pst.pam2p[1]);
+    }
+
+    if (aa1shuff_b != NULL) {
+      free(aa1shuff_b);
+      aa1shuff_b = NULL;
+    }
+
+    if (m_msg.aa1save_buf_b != NULL) {
+      free(m_msg.aa1save_buf_b);
+      m_msg.aa1save_buf_b = NULL;
+    }
+
+    if (m_msg.bline_buf_b != NULL) {
+      free(m_msg.bline_buf_b);
+      m_msg.bline_buf_b = NULL;
+    }
+
+    if (link_lib_list_p) {
+      close_lib_list(link_lib_list_p,1,1);
+      free_seqr_chain(link_getlib_info->start_seqr_chain);
+      /* delete the library file */
+      if (!pst.debug_lib) {
+#ifdef UNIX
+	unlink(link_lib_file);
+#else
+	_unlink(link_lib_file);
+#endif
+      }
+      if (link_lib_file) free(link_lib_file);
+    }
+
+    tddone = time(NULL);
+    tdone = s_time();
+    fflush(outfd);
+
+    ttdisp += tdone-tscan;
+
+    /* reset pst parameters to original */
+    pst.zsflag = m_msg.zsflag;
+    pst.zsflag2 = m_msg.zsflag2;
+    pst.n1_low = m_msg.n1_low;
+    pst.n1_high = m_msg.n1_high;
+
+    /* **************************************************************** */
+    /* start the next query                                             */
+    /* **************************************************************** */
+  next_query:
+    m_msg.q_offset = next_q_offset;
+
+    m_msg.n0 = 
+      QGETLIB (aa0[0], MAXTST, m_msg.qtitle, sizeof(m_msg.qtitle),
+	       &qseek, &qlcont,q_file_p,&m_msg.q_off);
+    if (m_msg.n0 < 0) break;
+    if (m_msg.n0 == 0) { 
+      next_q_offset = 0;
+      goto next_query;
+    }
+
+    if ((bp=strchr(m_msg.qtitle,' '))!=NULL) *bp='\0';
+    strncpy(info_qlabel, m_msg.qtitle,sizeof(info_qlabel));
+#ifdef DEBUG
+  SAFE_STRNCPY(ext_qtitle, m_msg.qtitle,sizeof(ext_qtitle));
+#endif
+    if (bp != NULL) *bp=' ';
+    info_qlabel[sizeof(info_qlabel)-1]='\0';
+
+    if (m_msg.ann_flg) {
+      m_msg.n0 = ann_scan(aa0[0],m_msg.n0,&m_msg.aa0a,m_msg.qdnaseq);
+    }
+
+    if (m_msg.ldb_info.term_code && m_msg.qdnaseq==SEQT_PROT &&
+	aa0[0][m_msg.n0-1]!=m_msg.ldb_info.term_code) {
+      aa0[0][m_msg.n0++]=m_msg.ldb_info.term_code;
+      aa0[0][m_msg.n0]=0;
+    }
+
+    /* if ends with ESS, remove terminal ESS */
+    if (aa0[0][m_msg.n0-1] == ESS) { m_msg.n0--; aa0[0][m_msg.n0]= '\0';}
+
+    if (m_msg.outfd) {fputc('\n',stdout);}
+
+    if (qlcont) {
+      next_q_offset = m_msg.q_offset + m_msg.n0 - m_msg.q_overlap;
+    }
+    else {
+      next_q_offset = 0l;
+    }
+
+    /* **************************************************************** */
+    /* have the query, reset the seqr_chain for another scan            */
+    /* **************************************************************** */
+    if (getlib_info->use_memory <= 0) {
+      /* re-use the buffer, but re-open the library */
+      getlib_info->eof = 0;
+      getlib_info->lib_list_p = lib_list_p;
+      close_lib_list(getlib_info->lib_list_p,0,0);
+    }
+    else {
+      reset_seqr_chain(getlib_info->start_seqr_chain);
+    }
+  }	/* end of while(1) for multiple queries */
+
+  /* **************************************************************** */
+  /* ALL done -- all queries read, all results shown, clean up        */  
+  /* **************************************************************** */
+#ifdef PCOMPLIB
+  /* tell workers to quit */
+  init_thr(fa_max_workers, NULL, NULL, NULL, NULL, NULL);
+#endif
+
+  close_lib_list(lib_list_p,1,1);
+
+  if (m_msg.lname[0] == '!' && !pst.debug_lib) {
+#ifdef UNIX
+    unlink(lib_db_file);
+#else
+    _unlink(lib_db_file);
+#endif
+  }
+
+  tdone = s_time();
+
+#ifdef DEBUG
+  fprintf(stderr,"[*** comp_lib9.c***] seqr_chains: %d [%d]; aa1b_buffer: %ld [%ld] blocks (lost: %ld); using memory:%d\n",
+	 m_msg.cur_seqr_cnt, m_bufi.max_work_buf,
+	 getlib_info->tot_memK,getlib_info->max_memK, getlib_info->lost_memK,
+	 getlib_info->use_memory);
+#endif
+
+  /* **************************************************************** */
+  /* final summary text                                               */
+  /* 218 residues in 1 query   sequences                              */
+  /* 5190103 residues in 13351 library sequences                      */
+  /* Tcomplib [36.3.5 Apr, 2011(preload7)] (2 proc)                   */
+  /* goes to stdout regardless of outfd                               */
+  /* **************************************************************** */
+  print_header5(stdout, qlib, &qtt, &m_msg, &pst, 
+		getlib_info->use_memory, getlib_info->tot_memK);
+
+  m_msp_to_markx(&markx_save, &m_msg);
+  for (cur_markx = m_msg.markx_list; cur_markx; cur_markx=cur_markx->next) {
+    if (cur_markx->out_fd == NULL) continue;
+    markx_to_m_msp(&m_msg, cur_markx);
+    print_header5(cur_markx->out_fd, qlib, &qtt, &m_msg, &pst, 
+		  getlib_info->use_memory, getlib_info->tot_memK);
+    fflush(cur_markx->out_fd);
+    fclose(cur_markx->out_fd);
+  }
+  markx_to_m_msp(&m_msg, &markx_save);
+
+#ifdef PCOMPLIB
+#ifdef MPI_SRC
+  MPI_Finalize();
+#endif
+#endif
+  exit(0);
+}
+/* **************************************************************** */
+/* end of main() program                                            */
+/* **************************************************************** */
+
+void fsigint()
+{
+  struct db_str db;
+
+  db.entries = db.length = db.carry = 0;
+  tdone = s_time();
+  tddone = time(NULL);
+
+  printf(" /*** interrupted ***/\n");
+  if (outfd!=stdout) fprintf(outfd,"/*** interrupted ***/\n");
+  fprintf(stderr,"/*** interrupted ***/\n");
+
+  print_sum(stdout,&qtt, &db,0,0);
+  if (outfd!=stdout) print_sum(outfd,&qtt, &db,0,0);
+
+  exit(1);
+}
+
+/* ******************************************************************* */
+/* alloc_comp_bufs() allocate thread buffers                           */
+/* a thread buffer can hold all the results from a seqr_chain buffer:  */
+/* m_bufi_p->max_buf2_res = nframe * m_bufi_p->max_chain_seqs (preset) */
+/* ******************************************************************* */
+struct buf_head *
+alloc_comp_bufs (struct mng_thr *m_bufi_p, struct mngmsg *m_msp,
+		 int ave_seq_len) {
+  struct buf_head *lib_buf2_list, *lib_buf2_ptr;
+  int i, buf_siz, buf2_rsize;
+
+  /* m_msp->cur_seqr_cnt tracks the number of seqr chains allocated.
+     There must be at least max_work_buf seqr_chains. */
+  m_msp->cur_seqr_cnt = 0;
+
+  if ((lib_buf2_list = 
+       (struct buf_head *)calloc((size_t)(m_bufi_p->max_work_buf),
+				 sizeof(struct buf_head))) == NULL) {
+    fprintf(stderr," cannot allocate lib_buf2_list[%d]\n", m_bufi_p->max_work_buf);
+    exit(1);
+  }
+
+#if defined(COMP_THR) || defined(PCOMPLIB)
+  if ((worker_buf = 
+       (struct buf_head **)calloc((size_t)(m_bufi_p->max_work_buf),
+				  sizeof(struct buf_head *))) == NULL) {
+    fprintf(stderr," cannot allocate **worker_buf[%d]\n", m_bufi_p->max_work_buf);
+    exit(1);
+  }
+#endif
+
+#ifdef COMP_THR
+  if ((reader_buf = 
+       (struct buf_head **)calloc((size_t)(m_bufi_p->max_work_buf),
+				  sizeof(struct buf_head *))) == NULL) {
+    fprintf(stderr," cannot allocate **reader_buf[%d]\n", m_bufi_p->max_work_buf);
+    exit(1);
+  }
+#endif
+
+  /* **************************************************************** */
+  /* allocate space for library buffers and results                   */
+  /* there are four structures/buffers used to keep track of          */
+  /*  sequences/results:                                              */
+  /* (1) lib_buf2_list[] is a buf_head array, with:                   */
+  /*    buf2_hdr_s hdr -                                              */
+  /*    that stores whether the results are ready, number of results  */
+  /*    available, and information about the seq_record buffer and    */
+  /*    aa1b buffer                                                   */
+  /* (2) buf2_data[] -> seq_record/mseq_record arrays                 */
+  /* (3) buf2_res[] -> score results                                  */
+  /* (4) buf2_ares[] -> alignment results                             */
+  /* **************************************************************** */
+  buf_siz = max(m_bufi_p->max_chain_seqs*ave_seq_len, m_msp->max_tot * 2);
+  if (buf_siz < m_msp->max_tot) buf_siz = m_msp->max_tot;
+  m_bufi_p->seq_buf_size = buf_siz;
+
+  buf2_rsize = m_bufi_p->max_buf2_res+1;
+
+  /* allocate max_buf2_res  buf2_str's into each buf2 */
+  for (i=0; i<m_bufi_p->max_work_buf; i++) {
+    lib_buf2_ptr = &lib_buf2_list[i];
+    if ((lib_buf2_ptr->buf2_ares = (struct buf2_ares_s *)
+	 calloc(buf2_rsize,sizeof(struct buf2_ares_s)))==NULL) {
+      fprintf(stderr," cannot allocate ares buffer struct %d %d\n",
+	      i,m_bufi_p->max_buf2_res+1);
+      exit(1);
+    }
+
+    if ((lib_buf2_ptr->buf2_res = (struct buf2_res_s *)
+	 calloc(buf2_rsize,sizeof(struct buf2_res_s)))==NULL) {
+      fprintf(stderr," cannot allocate res buffer struct %d %d\n",
+	      i,m_bufi_p->max_buf2_res+1);
+      exit(1);
+    }
+
+    if ((lib_buf2_ptr->buf2_data = (struct buf2_data_s *)
+	 calloc(buf2_rsize,sizeof(struct buf2_data_s)))==NULL) {
+      fprintf(stderr," cannot allocate buffer struct %d %d\n",
+	      i,m_bufi_p->max_buf2_res+1);
+      exit(1);
+    }
+
+    lib_buf2_ptr->hdr.seq_record_continuous = 0;	/* should be 1 */
+    /* these values are not used with seqr_chains */
+    lib_buf2_ptr->hdr.aa1b_used = 0;
+    lib_buf2_ptr->hdr.aa1b_size = 0;
+
+    lib_buf2_ptr->hdr.have_results=0;
+    lib_buf2_ptr->hdr.my_id = i;
+
+#if defined(COMP_THR) || defined(PCOMPLIB)
+    RESULTS_BUF[i] = lib_buf2_ptr;
+#endif
+  }
+  return lib_buf2_list;
+}
+
+/* **************************************************************** */
+/* new_seqr_chain() allocates space for:
+     struct seq_record seqr_base[max_chain_seqs];
+     struct mseq_record mseqr_base[max_chain_seqs];
+     if buffer required, unsigned char aa1b_base[aa1b_size]
+
+   comp_lib9.c tracks how much space is being used, and, if the space
+   is too large, reuses existing buffers.
+*/
+/* **************************************************************** */
+struct seqr_chain *
+new_seqr_chain(int max_chain_seqs, int aa1b_size, struct seqr_chain *old_seqr_chain,
+	       int maxn, long *lost_memK, int alloc_buf_flg) {
+  struct seqr_chain *my_seqr_chain;
+  int this_aa1b_size;
+
+  /* allocate the chain */
+  if ((my_seqr_chain = (struct seqr_chain *)calloc(1,sizeof(struct seqr_chain)))==NULL) {
+    fprintf(stderr," Cannot allocate library seqr_chain\n");
+    exit(1);
+  }
+
+  this_aa1b_size = aa1b_size;
+  if (old_seqr_chain) {
+    old_seqr_chain->next = my_seqr_chain;
+    if (old_seqr_chain->aa1b_size > 0) {
+      this_aa1b_size = old_seqr_chain->aa1b_size;
+    }
+  }
+  else {
+    *lost_memK = 0l;
+  }
+
+  my_seqr_chain->max_chain_seqs = max_chain_seqs;
+  my_seqr_chain->cur_seq_cnt = 0;
+
+  /* now allocate the seq_record, mseq_record buffers, and the space for the sequences */
+  if ((my_seqr_chain->seqr_base = (struct seq_record *)
+       calloc((size_t)max_chain_seqs,sizeof(struct seq_record))) == NULL) {
+    fprintf(stderr," cannot allocate seq_record buffer %d\n",max_chain_seqs);
+    exit(1);
+  }
+
+  if ((my_seqr_chain->mseqr_base = (struct mseq_record *)
+       calloc((size_t)max_chain_seqs,sizeof(struct mseq_record))) == NULL) {
+    fprintf(stderr," cannot allocate mseq_record buffer %d\n",max_chain_seqs);
+    exit(1);
+  }
+
+  if (alloc_buf_flg) {
+    /* try to adjust aa1b_size to a sensible value based on past history */
+    if (old_seqr_chain != NULL && (old_seqr_chain->aa1b_size > 0)) {
+      *lost_memK += ((old_seqr_chain->aa1b_size - old_seqr_chain->aa1b_next)>>10);
+      /* if sequences are shorter than expected */
+      /*
+	if ((old_seqr_chain->cur_seq_cnt >= (max_chain_seqs-10)) && 
+	max(old_seqr_chain->aa1b_next,maxn) < old_seqr_chain->aa1b_size/2) {
+	this_aa1b_size -= (this_aa1b_size/4);
+	this_aa1b_size = max(this_aa1b_size, maxn * 2);
+	}
+      */
+      /* if not enough sequence slots are being used (sequences longer than expected) */
+      if (old_seqr_chain->cur_seq_cnt < old_seqr_chain->max_chain_seqs/4) {
+	this_aa1b_size *= 2;
+      }
+    }
+
+    /* finally, we need a big sequence buffer */
+    if ((my_seqr_chain->aa1b_base =
+	 (unsigned char *)calloc((size_t)(this_aa1b_size+1),sizeof(unsigned char)))
+	==NULL) {
+      fprintf(stderr," cannot allocate buffer %d\n",this_aa1b_size+1);
+      exit(1);
+    }
+
+    my_seqr_chain->aa1b_base++;
+    my_seqr_chain->aa1b_size = this_aa1b_size;
+  }
+
+  return my_seqr_chain;
+}
+
+/* **************************************************************** */
+/* re-initialize seqr_chain so it can be filled with new sequences  */
+/* **************************************************************** */
+void
+reinit_seqr_chain(struct seqr_chain *my_seqr_chain, int max_chain_seqs) {
+  memset(my_seqr_chain->seqr_base, 0, my_seqr_chain->max_chain_seqs*sizeof(struct seq_record));
+  memset(my_seqr_chain->mseqr_base, 0, my_seqr_chain->max_chain_seqs*sizeof(struct mseq_record));
+  memset(my_seqr_chain->aa1b_base-1, 0, my_seqr_chain->aa1b_size+1);
+  /* this is possible because every seqr_chain has max_chain_seqs seq_records;
+     some have a smaller my_seqr_chain->aa1b_size */
+  my_seqr_chain->max_chain_seqs = max_chain_seqs;
+  my_seqr_chain->cur_seq_cnt = 0;
+  my_seqr_chain->contiguous = 0;
+  my_seqr_chain->aa1b_next = 0;
+  /* cannot reset my_seqr_chain->next because it is needed for chain */
+}
+
+/* **************************************************************** */
+/* end_seqr_chain() -- cleans up cur_seq_cnt, max_chain_seqs when a
+   seqr_chain is full.
+
+   inputs: *last_seqr (the finished seqr_chain)
+
+   modifies: max_chain_seqs (sets max_chain_seqs to cur_chain_seqs so
+             that future will have the correct number of seq_records */
+/* **************************************************************** */
+void
+end_seqr_chain(struct seqr_chain *last_seqr) {
+  /* the last seqrecord_p is always one too far */
+  last_seqr->cur_seq_cnt--;
+  last_seqr->max_chain_seqs = last_seqr->cur_seq_cnt;
+  /* prevent other threads from using this link */
+}
+
+/* **************************************************************** */
+/* free_seqr_chain() -- deallocates space allocated in seqr_chain.
+   frees, seq_records, mseq_records, and aa1b buffer
+   inputs: seqr_chain
+*/
+/* **************************************************************** */
+void
+free_seqr_chain(struct seqr_chain *this_seqr_chain) {
+  struct seqr_chain *cur_seqr_chain_p, *next_seqr_chain_p;
+
+  for (cur_seqr_chain_p = this_seqr_chain; cur_seqr_chain_p != NULL;
+       cur_seqr_chain_p = next_seqr_chain_p) {
+    next_seqr_chain_p = cur_seqr_chain_p->next;
+
+    /* with get_mmap_chain, not all chains have aa1b_base allocation */
+    if (cur_seqr_chain_p->aa1b_base) {free(--cur_seqr_chain_p->aa1b_base);}
+    free(cur_seqr_chain_p->mseqr_base);
+    free(cur_seqr_chain_p->seqr_base);
+    free(cur_seqr_chain_p);
+  }
+}
+
+/* **************************************************************** */
+/* next_sequence_p() returns a new seq_p and mseq_p from a seqr_chain, or
+   returns NULL if none are available or no space is available in the
+   buffer
+
+   inputs: *cur_seqr_chain, **cur_mseq_p, *old_seq_p
+   	   (*cur_seqr_chain provides sequence buffer space)
+   modifies: **cur_mseq_p
+   returns: cur_seq_p
+*/
+/* **************************************************************** */
+
+struct seq_record *
+next_sequence_p(struct mseq_record **cur_mseq_p, struct seq_record *old_seq_p, 
+		struct seqr_chain *cur_seqr_chain, int maxn) {
+  struct seq_record *new_seq_p;
+
+  /* if we have a previous sequence (which we do for all but the first),
+     use it to update aa1b_next */
+  if (old_seq_p) cur_seqr_chain->aa1b_next += old_seq_p->n1+1;
+
+  /* is there room in cur_seqr_chain (count<max_count, size +maxn < size */
+  if (cur_seqr_chain->cur_seq_cnt < cur_seqr_chain->max_chain_seqs
+      && cur_seqr_chain->aa1b_next + maxn < cur_seqr_chain->aa1b_size) {
+
+    /* update mseq_p */
+    *cur_mseq_p = cur_seqr_chain->mseqr_base + cur_seqr_chain->cur_seq_cnt;
+    /* update seq_p */
+    new_seq_p = cur_seqr_chain->seqr_base + cur_seqr_chain->cur_seq_cnt;
+    /* update aa1b for seq_p */
+    new_seq_p->aa1b = cur_seqr_chain->aa1b_base + cur_seqr_chain->aa1b_next;
+    /* update counter */
+    cur_seqr_chain->cur_seq_cnt++;
+    return new_seq_p;
+  }
+  else {
+    cur_seqr_chain->cur_seq_cnt++;
+    /* out of room in seqr_chain or aa1b_base */
+    return NULL;
+  }
+}
+
+/* **************************************************************** */
+/* getlib_info():
+   inputs: lib_list_p - the list of library files
+           maxn -- max size of sequence read
+	   max_memK -- memory block counter max
+
+   output: struct getlib_str *getlib_info
+
+   maintain state information required for successive, asynchronous
+   getlib() calls */
+/* **************************************************************** */
+struct getlib_str *
+init_getlib_info(struct lib_struct *lib_list_p, int maxn,long max_memK) {
+  struct getlib_str *my_getlib_info;
+  unsigned char *aa1save;
+
+  if ((my_getlib_info=(struct getlib_str *)calloc(1,sizeof(struct getlib_str)))==NULL) {
+    s_abort ("Unable to allocate getlib_info", "");
+  }
+
+  if ((aa1save = (unsigned char *)calloc((maxn+1),sizeof (char))) == NULL) {
+    s_abort ("Unable to allocate library overlap", "");
+  }
+  *aa1save= '\0';
+  aa1save++;
+
+  my_getlib_info->aa1save = aa1save;
+  my_getlib_info->lib_list_p = lib_list_p;
+  my_getlib_info->use_memory = 0;
+  my_getlib_info->max_memK = max_memK;
+  my_getlib_info->n_libstr=sizeof(my_getlib_info->libstr);
+  my_getlib_info->loffset = 0l;
+  my_getlib_info->ocont = 0;
+  my_getlib_info->lcont = 0;
+
+  return my_getlib_info;
+}
+
+/* **************************************************************** */
+/* next_seqr_chain reads a library in seqr_cnt chunks, returning when
+   a chunk is full.
+
+   8-July-2011 -- modified to directly return memory mapped encoded
+   		  sequences (BLASTDB)
+
+   next_seqr_chain saves enough of the getlib() state to be able to
+   continue reading in getlib_str *getlib_info, which also keeps the
+   information about the current seqr_chain record being used.
+
+   (1) get data structure to save one seq_chain buffer
+       new_seqr_chain(number of buffers= max_chain_seqs, seq_buf_size+1,...)
+       thus, a single seqr_chain() is guaranteed to be contiguous
+   (2) then get a current_seq_p, current_mseq_p 
+   (3) getlib() to put the stuff in the current_seq_p (and current_mseq_p)
+   (4) keep doing this until the seq_chain buffer is filled
+*/
+/* **************************************************************** */
+struct seqr_chain *
+next_seqr_chain(const struct mng_thr *m_bufi_p, struct getlib_str *getlib_info,
+		struct buf_head *lib_bhead_p,
+		struct mngmsg *m_msp, const struct pstruct *ppst)
+{
+  struct seqr_chain *my_seqr_chain=NULL;
+  struct lib_struct *cur_lib_p;
+  struct seq_record *current_seq_p, *old_seq_p;
+  struct mseq_record *current_mseq_p;
+  struct lmf_str *m_fd;
+
+  int maxt;	/* continued sequence */
+  int sstart, sstop, is, id, i;
+  int igncnt=0;			/* count for ignoring sequences warning */
+  struct lmf_str *m_file_p;
+  unsigned char *aa1ptr, *aa1;
+  char *bp;
+  int n1;
+  /* int cont, ocont; */
+  /* long loffset; */
+
+  /* Are the sequences are in memory?  if so, just return the next one.  */
+  if (getlib_info->use_memory>0)  {
+    my_seqr_chain = getlib_info->cur_seqr_chain;
+    if (my_seqr_chain == NULL) {
+      goto return_null;
+    }
+    else {
+      getlib_info->cur_seqr_chain = getlib_info->cur_seqr_chain->next;
+#ifdef DEBUG
+      lib_bhead_p->hdr.my_chain = my_seqr_chain;
+#endif
+      return my_seqr_chain;
+    }
+  }
+
+  /* we do not have complete seqr_chains, so we must do things one
+     record/sequence at a time */
+
+  current_seq_p = old_seq_p = NULL;
+  /* check for eof on current library; return NULL if EOF, or start
+     next library in chain */
+  if (getlib_info->eof) {
+    if (getlib_info->use_memory < 0) {
+      closelib(getlib_info->lib_list_p->m_file_p,1);
+    }
+    if (getlib_info->lib_list_p->next == NULL) {
+      /* finished with everything, use memory next time */
+      if (getlib_info->use_memory >= 0) getlib_info->use_memory = 1;
+      goto return_null;	/* ensures that cur_seqr_chain is reset */
+    }
+    else {
+      getlib_info->eof = 0;
+      getlib_info->lib_list_p = getlib_info->lib_list_p->next;
+    }
+  }
+
+  if (getlib_info->lib_list_p == NULL) goto return_null;
+
+  /* check to see if a library is open; if not get one and open it */
+  if (getlib_info->lib_list_p->m_file_p == NULL) {
+  next_lib:
+    cur_lib_p = getlib_info->lib_list_p;
+    if ((cur_lib_p->m_file_p = 
+	 open_lib(cur_lib_p, m_msp->ldb_info.ldnaseq, lascii, !m_msp->quiet))
+	==NULL) {
+      fprintf(stderr," cannot open library %s\n",cur_lib_p->file_name);
+      getlib_info->lib_list_p = getlib_info->lib_list_p->next;
+      if (getlib_info->lib_list_p == NULL) {
+	goto return_null;
+      }
+      else {
+	goto next_lib;
+      }
+    }
+    /* these values must be reset for new databases, but otherwise
+       should not be reset, because a sequence can span more than one
+       seqr_chain */
+    getlib_info->loffset = 0l;
+    getlib_info->ocont = 0;
+  }
+
+  /* here we have an open library */
+
+  /* if the library is NCBIBL20 and memory mapped, simply return
+     pointers to the memory map */
+  m_fd = getlib_info->lib_list_p->m_file_p;
+  if (m_fd->get_mmap_chain) {
+    /* get a new seqr_chain */
+    my_seqr_chain = 
+      new_seqr_chain(m_bufi_p->max_chain_seqs,(m_bufi_p->seq_buf_size+1),
+		     getlib_info->cur_seqr_chain, m_msp->ldb_info.maxn, 
+		     &getlib_info->lost_memK, 0);
+
+    /* keep track of number of seqr_chains */
+    m_msp->cur_seqr_cnt++;
+
+    /* fill it with memory mapped pointers */
+    if (m_fd->get_mmap_chain(my_seqr_chain, m_fd, &m_msp->db)==EOF) {
+      getlib_info->eof = 1;
+    }
+
+    lib_bhead_p->hdr.my_chain = my_seqr_chain;
+
+    getlib_info->cur_seqr_chain = my_seqr_chain;
+    if (getlib_info->start_seqr_chain == NULL) {
+      getlib_info->start_seqr_chain = my_seqr_chain;
+    }
+    my_seqr_chain->max_chain_seqs = my_seqr_chain->cur_seq_cnt;
+    return my_seqr_chain;
+  }
+
+  /* a seqr_chain contains an array for seq_records, mseq_records, and
+     the aa1b_base buffer for sequences */
+
+  /* this section transitions between the comp_lib6,7 strategy, which
+     kept all the sequence database in memory, simply allocating
+     additional seqr_chains's as required, and the comp_lib5 strategy,
+     which did not use seqr_chains, but linked the sequence buffers
+     with the thread buffers
+
+     seqr_chain buffers are allocated while memory is available and
+     there are fewer than max_work_buf (thread_buf) seqr chains. In
+     addition, each seqr_chain is associated with the thread buffer
+     that will use it.
+
+     If the memory limit is hit, then getlib_info->use_memory is set
+     negative, AND the seqr_chains linked to the thread buffers are
+     used -- each thread buffer is permanently associated with the
+     last seqr_chain that it used.  Linking seqr_chains to thread
+     buffers (as is effectively done in comp_lib5) ensures that the
+     same seqr_chain is never used by more than one thread
+     simultaneously. */
+
+  /* if (a) we don't have m_bufi_p->max_work_buf chains or (b) memory
+     is still available, then get a new_seqr_chain */
+
+  if ((getlib_info->use_memory >= 0) && 
+      (getlib_info->start_seqr_chain == NULL || m_msp->cur_seqr_cnt < m_bufi_p->max_work_buf ||
+       getlib_info->tot_memK + ((m_bufi_p->seq_buf_size+1)>>10) < getlib_info->max_memK)	/* memK is in 1024 bytes thus >>10 */
+      ) {
+    my_seqr_chain = 
+      new_seqr_chain(m_bufi_p->max_chain_seqs,(m_bufi_p->seq_buf_size+1),
+		     getlib_info->cur_seqr_chain, m_msp->ldb_info.maxn, 
+		     &getlib_info->lost_memK, 1);
+    getlib_info->tot_memK += (my_seqr_chain->aa1b_size >> 10);
+    m_msp->cur_seqr_cnt++;
+    lib_bhead_p->hdr.my_chain = my_seqr_chain;
+  }
+  /* if the memory limit has been reached, then keep using the first
+     one (after re-initializing) */
+  else {
+    my_seqr_chain = lib_bhead_p->hdr.my_chain;
+    if (!my_seqr_chain) {
+      fprintf(stderr,"[***%s/next_seqr_chain***] lib_bhead_p->hdr.my_chain==NULL\n",prog_func);
+      exit(1);
+    }
+    reinit_seqr_chain(my_seqr_chain,m_bufi_p->max_chain_seqs);
+    getlib_info->use_memory = -1;
+  }
+
+  getlib_info->cur_seqr_chain = my_seqr_chain;
+
+  if (getlib_info->start_seqr_chain == NULL) {
+    getlib_info->start_seqr_chain = my_seqr_chain;
+  }
+
+  /* (re-)initialize variables from getlib_info */
+  m_file_p = getlib_info->lib_list_p->m_file_p;
+
+  /* then we can count through the seq_records in the
+     seqr_chain and get a getlib() sequence for each one */
+
+  while ((current_seq_p = 
+	  next_sequence_p(&current_mseq_p, old_seq_p, my_seqr_chain, 
+			m_msp->ldb_info.maxn))) {
+      
+    aa1 = current_seq_p->aa1b;
+
+    if (getlib_info->lcont) {
+      maxt = m_msp->ldb_info.maxt3;
+      memcpy(aa1,getlib_info->aa1save,m_msp->ldb_info.l_overlap);
+      aa1ptr= &aa1[m_msp->ldb_info.l_overlap];	/* aa1ptr is where the next GETLIB sequence goes */
+      current_mseq_p->lseek = getlib_info->lseek;
+    }
+    else {
+      maxt = m_msp->ldb_info.maxn;
+      aa1ptr = aa1;
+    }
+
+    n1=GETLIB(aa1ptr, maxt, getlib_info->libstr, getlib_info->n_libstr,
+	      &(current_mseq_p->lseek), &getlib_info->lcont, m_file_p, &(current_seq_p->l_off));
+
+    /* if the library is empty, check for another library */
+    if (n1 < 0) {
+      /* set EOF for this file */
+      getlib_info->eof = 1;
+      /* reduce the seqr_chain count */
+      end_seqr_chain(my_seqr_chain);
+      return my_seqr_chain;
+    }
+
+    old_seq_p = current_seq_p;
+
+#ifdef DEBUG
+    /* check for out of range sequence */
+    /*
+    for (id=0; id<n1; id++) {
+      if (aa1[id] > ppst->nsq_e) {
+	fprintf(stderr," *** ERROR *** %s[%d] = %d > %d out of range\n",libstr, id, aa1[id], ppst->nsq_e);
+	aa1[id] = 1;
+      }
+    }
+    */
+#endif
+
+    /* which sequence is it in the entire library? used for debugging */
+    current_seq_p->index = seq_index;
+    current_mseq_p->index = seq_index++;
+    current_mseq_p->m_file_p = (void *)m_file_p;
+    current_mseq_p->cont = getlib_info->ocont+1;
+    current_seq_p->l_offset = getlib_info->loffset;
+
+    if ((bp=strchr(getlib_info->libstr,' '))!=NULL) *bp='\0';
+    strncpy(current_mseq_p->libstr,getlib_info->libstr,MAX_UID);	/* get old libstr for lcont>0 */
+
+    /* add termination code for FASTX/FASTY if necessary */
+    if (m_msp->ldb_info.term_code && !getlib_info->lcont &&  
+	m_msp->ldb_info.ldnaseq==SEQT_PROT && 
+	aa1ptr[n1-1]!=m_msp->ldb_info.term_code) {
+      aa1ptr[n1++]=m_msp->ldb_info.term_code;
+      aa1ptr[n1]=0;
+    }
+
+    /* check for subset */
+    if (m_file_p->opt_text[0]!='\0') {
+      if (m_file_p->opt_text[0]=='-') {
+	sstart=0; sscanf(&m_file_p->opt_text[1],"%d",&sstop);
+      }
+      else {
+	sstart = 0; sstop = -1;
+	sscanf(&m_file_p->opt_text[0],"%d-%d",&sstart,&sstop);
+	sstart--;
+	if (sstop <= 0 ) sstop = BIGNUM;
+      }
+
+      n1 = min(n1, sstop);
+      for (id=0,is=sstart; is<n1; ) {
+	aa1ptr[id++]=aa1ptr[is++];
+      }
+      aa1ptr[id]='\0';
+      n1 -= sstart;
+      current_seq_p->l_off += sstart;
+    }
+
+    /* update n1 after possible changes */
+    current_seq_p->n1 = n1;
+
+#ifdef DEBUG
+    if (getlib_info->n_libstr <= MAX_UID) {
+      if ((bp=strchr(current_mseq_p->libstr,' '))!=NULL) *bp='\0';
+    }
+    if (aa1[-1]!='\0' || aa1ptr[n1]!='\0') {
+      fprintf(stderr,"%s: aa1[%d] at %ld:%lld  missing NULL boundaries: %d %d\n",
+	      current_mseq_p->libstr,n1, m_msp->db.entries+1,current_mseq_p->lseek,
+	      aa1[-1],aa1ptr[n1]);
+    }
+#endif
+    /* check for a continued sequence and provide a pointer to 
+       the n1_tot array if lcont || ocont */
+    getlib_info->n1tot_v += n1;
+    if (getlib_info->lcont && !getlib_info->ocont) {	/* get a new pointer */
+      if (getlib_info->n1tot_cnt <= 0) {
+	if ((getlib_info->n1tot_ptr=calloc(1000,sizeof(int)))==NULL) {
+	  fprintf(stderr," cannot allocate n1tot_ptr\n");
+	  exit(1);
+	}
+	else {getlib_info->n1tot_cnt=1000;}
+      }
+      getlib_info->n1tot_cnt--;
+      getlib_info->n1tot_cur = getlib_info->n1tot_ptr++;
+    }
+    current_mseq_p->n1tot_p = getlib_info->n1tot_cur;
+
+    m_msp->db.entries++;
+    m_msp->db.length += n1;
+    if (m_msp->db.length > LONG_MAX) {
+      m_msp->db.length -= LONG_MAX; m_msp->db.carry++;
+    }
+
+    /* don't count long sequences more than once */
+    if (aa1!=aa1ptr) {	/* this is a continuation */
+      current_seq_p->n1 = n1 += m_msp->ldb_info.l_overlap;	/* corrected 28-June-2008 */
+      m_msp->db.entries--;
+    }
+
+#ifdef DEBUG
+    current_seq_p->adler32_crc =
+      current_mseq_p->adler32_crc = adler32(1L,current_seq_p->aa1b,current_seq_p->n1);
+
+    /* This finds most reasons for core dumps */
+    if (ppst->debug_lib)
+      for (i=0; i<n1; i++) {
+	if (aa1[i]>ppst->nsqx || aa1[i] <= 0) {
+	  fprintf(stderr,
+		  "%s residue[%d/%d] %d range (%d) lcont/ocont: %d/%d\n%s\n",
+		  current_mseq_p->libstr,i,current_seq_p->n1,aa1[i],ppst->nsq,
+		  getlib_info->lcont,getlib_info->ocont,aa1ptr+i);
+	  aa1[i]=0;
+	  n1=i-1;
+	  break;
+	}
+      }
+#endif
+
+    if ( n1 <= 1) {goto loop2;}
+
+    if (getlib_info->lcont) {
+      memcpy(getlib_info->aa1save,&aa1[n1-m_msp->ldb_info.l_overlap],m_msp->ldb_info.l_overlap);
+    }
+
+    /* all done with the previous getlib(), now get a new
+       current_seq_p (current_mseq_p) and reset aa1 */
+    /* next_sequence_p knows if there is space in the aa1b buffer  */
+
+  loop2: 
+    if (getlib_info->lcont) {
+      /* this must be n1, which is the old value, not current_seq_p->n1 */ 
+      getlib_info->loffset += n1 - m_msp->ldb_info.l_overlap;
+
+      /* ocont must be preserved across seqr_chains */
+      getlib_info->ocont = getlib_info->lcont;
+
+      /* if lcont>0, then lseek must be preserved; getlib() functions
+	 do not set lseek (loffset) if lcont>0 */
+      getlib_info->lseek = current_mseq_p->lseek;
+    }
+    else {
+      if (getlib_info->ocont) {
+	*getlib_info->n1tot_cur = getlib_info->n1tot_v;
+      }
+      getlib_info->ocont = 0;
+      getlib_info->loffset = 0l;
+      getlib_info->n1tot_v = 0;
+      getlib_info->n1tot_cur = NULL;
+    }
+  } /*  end cur_lib_p */
+
+  end_seqr_chain(my_seqr_chain);
+  return my_seqr_chain;
+
+ return_null:
+  getlib_info->cur_seqr_chain = getlib_info->start_seqr_chain;
+  return NULL;
+}
+
+/* reset_seqr_chain() prepares the seqr_chain (in memory) for another
+   search by setting cur_seq_cnt to max_chain_seqs */
+void
+reset_seqr_chain(struct seqr_chain *seqr_base) {
+  struct seqr_chain *cur_seqr;
+
+  for (cur_seqr = seqr_base; cur_seqr; cur_seqr = cur_seqr->next) {
+    cur_seqr->cur_seq_cnt = cur_seqr->max_chain_seqs;
+  }
+}
+
+void
+close_lib_list(struct lib_struct *lib_list_p, int free_flag, int mm_force) {
+  struct lib_struct *cur_lib_p, *next_lib_p;
+
+  for (cur_lib_p=lib_list_p; cur_lib_p != NULL; cur_lib_p = next_lib_p) {
+    next_lib_p = cur_lib_p->next;
+    if (cur_lib_p->m_file_p !=NULL) {
+      closelib(cur_lib_p->m_file_p,mm_force);
+      if (mm_force || cur_lib_p->m_file_p->libf == NULL) {
+	free(cur_lib_p->m_file_p->lline);
+	free(cur_lib_p->m_file_p);
+	cur_lib_p->m_file_p = NULL;
+      }
+    }
+    if (free_flag) {
+      free(cur_lib_p->file_name);
+      free(cur_lib_p);
+    }
+  }
+}
+
+/* save the seq/m_seq data from the thread buffer in
+   best_seq/best_mseq so that the thread buffer seq/m_seq can be
+   re-used.
+
+*/
+
+void preserve_seq(struct buf2_data_s *lib_buf2_dp,
+		  struct seq_record *best_seqs,
+		  struct mseq_record *best_mseqs,
+		  struct beststr *best) {
+  struct seq_record *dest_seq_p, *saved_seq_p;
+  struct mseq_record *dest_mseq_p, *saved_mseq_p;
+  struct beststr *next_bbp;
+
+  saved_seq_p = lib_buf2_dp->best_save->seq;
+  saved_mseq_p = lib_buf2_dp->best_save->mseq;
+
+  /* the thread buffer ptr lib_buf2_dp knows where its results are
+     saved in beststr */
+
+  dest_seq_p = &best_seqs[lib_buf2_dp->best_save - best];
+  dest_mseq_p = &best_mseqs[lib_buf2_dp->best_save - best];
+
+  lib_buf2_dp->best_save->seq = dest_seq_p;
+  lib_buf2_dp->best_save->mseq = dest_mseq_p;
+
+  for (next_bbp = lib_buf2_dp->best_save->bbp_link;
+       (next_bbp != NULL) && (next_bbp->seq == saved_seq_p)
+	 && (next_bbp->n1 == saved_seq_p->n1);
+       next_bbp = next_bbp->bbp_link) {
+    next_bbp->seq = dest_seq_p;
+    next_bbp->mseq = dest_mseq_p;
+  }
+
+  memcpy(dest_seq_p,lib_buf2_dp->seq,sizeof(struct seq_record));
+  memcpy(dest_mseq_p,lib_buf2_dp->mseq,sizeof(struct mseq_record));
+  dest_seq_p->aa1b = NULL;
+}
+
+/* save the seq/m_seq data from the best_str buffer in
+   best_seq/best_mseq so that the thread buffer seq/m_seq can be
+   re-used.
+
+*/
+
+void preserve_seq2(struct beststr *bbp,
+		   struct seq_record *best_seqs,
+		   struct mseq_record *best_mseqs,
+		   struct beststr *best) {
+  struct seq_record *dest_seq_p, *saved_seq_p;
+  struct mseq_record *dest_mseq_p, *saved_mseq_p;
+  struct beststr *next_bbp;
+
+  saved_seq_p = bbp->seq;
+  saved_mseq_p = bbp->mseq;
+
+  /* the thread buffer ptr lib_buf2_dp knows where its results are
+     saved in beststr */
+
+  dest_seq_p = &best_seqs[bbp - best];
+  dest_mseq_p = &best_mseqs[bbp - best];
+
+  for (next_bbp = bbp->bbp_link;
+       (next_bbp != NULL) && (next_bbp->seq == saved_seq_p)
+	 && (next_bbp->n1 == saved_seq_p->n1);
+       next_bbp = next_bbp->bbp_link) {
+    next_bbp->seq = dest_seq_p;
+    next_bbp->mseq = dest_mseq_p;
+  }
+
+  memcpy(dest_seq_p,bbp->seq,sizeof(struct seq_record));
+  memcpy(dest_mseq_p,bbp->mseq,sizeof(struct mseq_record));
+  dest_seq_p->aa1b = NULL;
+
+  bbp->seq = dest_seq_p;
+  bbp->mseq = dest_mseq_p;
+}
+
+/* **************************************************************** */
+/* seqr_chain_work() (comp_lib8/9.c, comp_lib7e.c) corresponds to     */
+/* comp_lib5e.c/getlib_buf_work()                                   */
+/* in current versions, these functions do all the initial library  */
+/* sequence similarity scan -- they exist to be used with expansion */
+/* link libraries                                                   */
+/* **************************************************************** */
+void
+seqr_chain_work(unsigned char **aa0, unsigned char *aa0s, struct buf_head *lib_bhead_p,
+		struct getlib_str *getlib_info,	const struct mng_thr *m_bufi_p,
+		struct mngmsg *m_msp, struct pstruct *ppst, void *pstat_void,
+		struct db_str *ldb, struct hist_str *histp, struct score_count_s *s_info,
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+		unsigned char *aa1shuff, void *f_str, void *qf_str,
+#endif
+		struct seq_record *best_seqs, struct mseq_record *best_mseqs,
+		struct beststr *best, FILE *fdata) {
+
+  struct seqr_chain *current_seqr_chain_p;
+  struct buf2_data_s *lib_buf2_dp;
+  struct seq_record *current_seq_p;
+  struct mseq_record *current_mseq_p;
+  int i, itt, jstats;
+  int seqr_rec_cnt;	/* count through sequences in seqr_chain */
+  int buf2_shuff_mask;		/* pre-calculate when shuffles requested */
+
+  /* set shuffle mask for -z > 10 */
+  buf2_shuff_mask = 0;
+  if (ppst->zsflag >= 10 && ppst->zsflag < 20) {
+    buf2_shuff_mask = BUF2_DOSHUF;
+  }
+
+  /* must have a valid lib_bhead_p starting into this loop */
+  /* start the search */
+
+  while ((current_seqr_chain_p 
+	 = next_seqr_chain(m_bufi_p, getlib_info, lib_bhead_p,
+			   m_msp, ppst))) {
+
+    /* to enable the transition from everything in memory (comp_lib7e)
+       to buffers read as neeed (comp_lib5e), comp_lib9.c requires that
+       lib_bhead_p accommodate a full seqr_chain */
+
+    lib_bhead_p->hdr.buf2_cnt = 0;
+    lib_bhead_p->hdr.have_results = 0;
+    lib_bhead_p->hdr.stop_work = 0;
+    lib_bhead_p->hdr.buf2_type= (BUF2_DOWORK | buf2_shuff_mask);
+    lib_buf2_dp = lib_bhead_p->buf2_data;
+    lib_bhead_p->hdr.aa1b_start = current_seqr_chain_p->aa1b_base;
+    lib_bhead_p->hdr.aa1b_used = 0;
+    lib_bhead_p->hdr.seqr_cnt = 0;
+    lib_bhead_p->hdr.seq_b = current_seqr_chain_p->seqr_base;
+
+    current_seq_p = current_seqr_chain_p->seqr_base;
+    current_mseq_p = current_seqr_chain_p->mseqr_base;
+    seqr_rec_cnt = current_seqr_chain_p->cur_seq_cnt;
+
+    while (seqr_rec_cnt-- > 0) {
+      lib_buf2_dp->seq = current_seq_p;
+      lib_buf2_dp->mseq = current_mseq_p;
+      lib_bhead_p->hdr.aa1b_used += current_seq_p->n1+1;
+	
+#ifdef DEBUG
+      if (current_seq_p->aa1b[-1] != '\0') {
+	fprintf(stderr," invalid current_seq_p->aa1b[-1] = %d\n",current_seq_p->aa1b[-1]);
+      }	
+#endif
+      /* check to see whether this score (or a shuff score) should
+	 be included in statistics */
+
+      jstats = samp_stats_idx(&pre_nstats, nstats, rand_state);
+
+#ifdef PCOMPLIB
+      lib_buf2_dp->seq_dup = 0;	/* mark first ->seq as original, not duplicate */
+#endif
+      for (itt=m_msp->revcomp; itt<=m_msp->nitt1; itt++) {
+	lib_buf2_dp->frame = itt;
+	lib_buf2_dp->stats_idx = jstats;
+	lib_buf2_dp++;		/* point to next buf2 */
+	lib_bhead_p->hdr.buf2_cnt++;
+
+	/* point to the current sequence */
+#ifdef PCOMPLIB
+	lib_buf2_dp->seq_dup = 1;	/* mark duplicates */
+#endif
+	lib_buf2_dp->seq = current_seq_p;
+	lib_buf2_dp->mseq = current_mseq_p;
+      } /* for (itt .. */
+      lib_bhead_p->hdr.seqr_cnt++;
+      current_seq_p++;	/* ready for the next seq_p, necessary
+			   to re-initialize lib_bhead_p->aa1b_base */
+      current_mseq_p++;
+    }
+
+    /* now I have a full lib_bhead_p buffer, which corresponds to a
+       seqr_chain;  send it off and get a new one */
+
+#if defined(COMP_THR) || defined(PCOMPLIB)  /* if COMP_THR/PCOMPLIB - fill and empty buffers */
+    /* provide filled buffer to workers */
+    lib_bhead_p->hdr.have_data = 1;
+    lib_bhead_p->hdr.seq_record_continuous = 1;
+    put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
+    get_rbuf(&lib_bhead_p,m_bufi_p->max_work_buf);	/* get an empty buffer to fill */
+#else	/* just do the searches */
+    if (lib_bhead_p->hdr.buf2_type & BUF2_DOWORK) {
+      buf_do_work(aa0, m_msp->n0, lib_bhead_p, m_msp->nitt1, ppst, f_str);
+      if (m_msp->qshuffle)
+	buf_qshuf_work(aa0s,m_msp->n0, lib_bhead_p, m_msp->nitt1, ppst, qf_str, ppst->score_ix);
+    }
+    if (lib_bhead_p->hdr.buf2_type & BUF2_DOSHUF) {
+      buf_shuf_work(aa0,m_msp->n0, aa1shuff, lib_bhead_p, m_msp->nitt1, ppst,
+		    f_str, ppst->score_ix,rand_state);
+    }
+#endif
+
+    /* "empty" buffers have results that must be processed */
+    if (lib_bhead_p->hdr.buf2_cnt && lib_bhead_p->hdr.have_results) {
+      save_best2(lib_bhead_p,m_msp, ppst, ldb, fdata,
+		histp, pstat_void, s_info);
+
+      if (getlib_info->use_memory <= 0 && getlib_info->lib_list_p->m_file_p->get_mmap_chain==NULL) {
+	/* this section of code is only used for re-cycled buffers */
+	if (lib_bhead_p->hdr.have_best_save) {
+	  lib_buf2_dp = lib_bhead_p->buf2_data;
+	  while (lib_bhead_p->hdr.buf2_cnt--) {
+	    if (lib_buf2_dp->best_save != NULL) {
+	      preserve_seq(lib_buf2_dp, best_seqs, best_mseqs, best);
+	    }
+	    lib_buf2_dp->best_save = NULL;
+	    lib_buf2_dp++;
+	  }
+	  lib_bhead_p->hdr.have_best_save = 0;
+	}
+      }
+    }
+    lib_bhead_p->hdr.buf2_cnt = 0;
+    lib_bhead_p->hdr.have_data=0;
+  } /* end seqr_chain loop */
+    
+#if defined(COMP_THR) || defined(PCOMPLIB)
+  /* send off the final data buffer */
+  lib_bhead_p->hdr.have_data = 1;	/* ignored if buf2_cnt <= 0 */
+  put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
+
+  /* wait for the threads to finish */
+  wait_rbuf(m_bufi_p->max_work_buf);
+  /* wait_rbuf(m_bufi_p->max_work_buf - empty_reader_bufs); */
+
+  /* save the final results */
+  /* this loop assumes that the seq_record/mseq_record buffers pointed
+     to by RESULTS_BUF[i] are independent; thus there must be at least
+     num_reader_bufs/m_bufi.max_work_buf seqr chains.
+   */
+  for (i=0; i < num_reader_bufs; i++) {
+    save_best2(RESULTS_BUF[i],m_msp, ppst, ldb, fdata,
+	      histp, pstat_void, s_info);
+
+    if (getlib_info->use_memory <= 0 && getlib_info->lib_list_p && getlib_info->lib_list_p->m_file_p->get_mmap_chain==NULL) {
+      /* this section of code is only used for re-cycled buffers */
+      if (lib_bhead_p->hdr.have_best_save) {
+	lib_buf2_dp = lib_bhead_p->buf2_data;
+	while (lib_bhead_p->hdr.buf2_cnt--) {
+	  if (lib_buf2_dp->best_save != NULL) {
+	    preserve_seq(lib_buf2_dp, best_seqs, best_mseqs, best);
+	  }
+	  lib_buf2_dp->best_save = NULL;
+	  lib_buf2_dp++;
+	}
+	lib_bhead_p->hdr.have_best_save = 0;
+      }
+    }
+    RESULTS_BUF[i]->hdr.buf2_cnt = RESULTS_BUF[i]->hdr.have_results = 0;
+  }
+#else	/* !defined(COMP_THR || PCOMPLIB) */
+  if (lib_bhead_p->hdr.buf2_type & BUF2_DOWORK) {
+    buf_do_work(aa0, m_msp->n0, lib_bhead_p, m_msp->nitt1, ppst, f_str);
+    if (m_msp->qshuffle)
+      buf_qshuf_work(aa0s,m_msp->n0, lib_bhead_p, m_msp->nitt1, ppst, qf_str, ppst->score_ix);
+  }
+
+  if (lib_bhead_p->hdr.buf2_type & BUF2_DOSHUF) {
+    buf_shuf_work(aa0, m_msp->n0, aa1shuff, lib_bhead_p, m_msp->nitt1,
+		  ppst, f_str, ppst->score_ix, rand_state);
+  }
+
+  save_best2(lib_bhead_p,m_msp, ppst, ldb, fdata,
+	    histp, pstat_void, s_info);
+
+  lib_bhead_p->hdr.buf2_cnt = lib_bhead_p->hdr.have_results = 0;
+#endif
+}
+
+/* **************************************************************** */
+/* display_result() allows more flexible definitions of whether a
+   result is displayed.  Orginally, it was simply
+   bbp->rst.escore<m_msg.e_low, but we may want to require at least
+   one score display                                                */
+/* **************************************************************** */
+int
+display_result(struct beststr *bbp, struct mngmsg *m_msp, int lt_flag) {
+
+  if (lt_flag) {
+    if (bbp->rst.escore <= m_msp->e_cut) return 1;
+    else return 0;
+  }
+  else {
+    if (bbp->rst.escore > m_msp->e_low) return 1;
+    else return 0;
+  }
+}
+
+/* **************************************************************** */
+/* fset_vars() is similar to initenv(), but sets some m_msp, ppst
+   values based on threaded/unthreaded variables
+   initfa.c and doinit.c do not know about COMP_THR/PCOMPLIB.
+   MAX_MEMK varies depending on COMP_THR in defs.h
+ */
+/* **************************************************************** */
+void fset_vars(struct mngmsg *m_msp, struct pstruct *ppst) {
+  long l_tmp;
+  char *cptr, ctmp;
+
+  m_msp->max_memK = MAX_MEMK;
+  if ((cptr=getenv("LIB_MEMK"))!=NULL) {
+    ctmp = '\0';
+    sscanf(cptr,"%ld%c",&l_tmp, &ctmp);
+    if (l_tmp <= 0) m_msp->max_memK = BIGNUM;
+    else {
+      l_tmp *= 1024;
+      if (ctmp == 'G') l_tmp *= 1024;
+      m_msp->max_memK = l_tmp;
+    }
+  }
+}
+
+/* **************************************************************** */
+/* one-time initialization of best, bestp_arr, best_seq, best_mseq,
+   stats, rstats */
+/* **************************************************************** */
+void
+init_beststats(struct beststr **best, struct beststr ***bestp_arr,
+	       struct seq_record **best_seqs, struct mseq_record **best_mseqs,
+	       struct stat_str **stats, struct stat_str **rstats,
+	       int shuff_max, int link_flag) {
+
+  /* Allocate space for saved scores */
+  if ((*best = 
+       (struct beststr *)calloc((MAX_BEST+1),sizeof(struct beststr)))==NULL) {
+    s_abort("Cannot allocate best struct","");
+  }
+  if ((*bestp_arr = 
+       (struct beststr **)malloc((MAX_BEST+1)*sizeof(struct beststr *)))==NULL) {
+    s_abort("Cannot allocate bestp_arr","");
+  }
+
+  /* initialize high score boundary */
+  (*bestp_arr)[0] = *best;
+  (*best)[0].rst.score[0]=(*best)[0].rst.score[1]=(*best)[0].rst.score[2]= INT_MAX;
+  (*best)[0].rst.escore=FLT_MIN;	/* for E()-values, lower is best */
+  (*best)[0].zscore=FLT_MAX;	/* for Z-scores, bigger is best */
+
+  (*best)++; (*bestp_arr)++;	/* ensures that the 0-th value is extreme */
+
+  /* save best score sequence info -- used if there is not enough memory for library */
+  if ((*best_seqs =
+       (struct seq_record *)calloc((MAX_BEST+1),sizeof(struct seq_record)))==NULL) {
+    s_abort("Cannot allocate best_seqs","");
+  }
+
+  if ((*best_mseqs =
+       (struct mseq_record *)calloc((MAX_BEST+1),sizeof(struct mseq_record)))==NULL) {
+    s_abort("Cannot allocate best_seqs","");
+  }
+
+  /* allocate space for sampled scores */
+  if ((*stats =
+       (struct stat_str *)calloc(MAX_STATS,sizeof(struct stat_str)))==NULL) {
+    s_abort ("Cannot allocate stats struct","");
+  }
+
+  /* allocate space for shuffled library scores */
+  if ((*rstats =
+       (struct stat_str *)calloc(shuff_max,sizeof(struct stat_str)))==NULL) {
+    s_abort ("Cannot allocate rstats struct","");
+  }
+}
+
+#ifdef DEBUG
+void
+check_rbuf(struct buf_head *cur_buf) {
+  int buf_cnt, index;
+  struct buf2_data_s *cur_buf2_dp;
+
+  index = 0;
+  buf_cnt = cur_buf->hdr.buf2_cnt;
+  cur_buf2_dp = cur_buf->buf2_data;
+
+  while (buf_cnt-- > 0) {
+    if (cur_buf2_dp->seq == NULL || cur_buf2_dp->seq->aa1b == NULL) {
+      fprintf(stderr, "*** [%s/check_rbuf] NULL buffer->seq entry: at %d\n",prog_func, index);
+    }
+    if (cur_buf2_dp->seq->adler32_crc != adler32(1L,cur_buf2_dp->seq->aa1b, cur_buf2_dp->seq->n1)) {
+      fprintf(stderr, "*** [%s/check_rbuf] CRC mismatch at %d (%d)\n",prog_func, index,cur_buf2_dp->seq->n1);
+    }
+    cur_buf2_dp++;
+    index++;
+  }
+}
+#endif
diff --git a/src/compacc2.c b/src/compacc2.c
new file mode 100644
index 0000000..2f06072
--- /dev/null
+++ b/src/compacc2.c
@@ -0,0 +1,4119 @@
+/* $Id: compacc2.c 1280 2014-08-21 00:47:55Z wrp $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and
+   The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* Concurrent read version */
+
+#include <stdio.h>
+#include <stdlib.h>
+#if defined(UNIX)
+#include <unistd.h>
+#endif
+#if defined(UNIX) || defined(WIN32)
+#include <sys/types.h>
+#endif
+
+#include <limits.h>
+#include <ctype.h>
+#include <float.h>
+
+#include <string.h>
+#include <time.h>
+#include <math.h>
+
+#include "defs.h"
+#include "param.h"
+#include "structs.h"
+
+#include "mm_file.h"
+#include "best_stats.h"
+
+#define XTERNAL
+#include "uascii.h"
+#include "upam.h"
+#undef XTERNAL
+
+#ifdef DEBUG
+extern char ext_qtitle[];
+#endif
+
+extern void abort ();
+
+#include "drop_func.h"	/* get init_work() */
+/* drop_func.h includes dyn_string.h */
+
+void revcomp(unsigned char *seq, int n, int *c_nt);
+extern void qshuffle(unsigned char *aa0, int n0, int nm0, void *);
+#ifdef DEBUG
+unsigned long adler32(unsigned long, const unsigned char *, unsigned int);
+#endif
+
+void
+s_annot_to_aa1a(long offset, int n1, struct annot_str *annot_p, unsigned char *ann_arr, char *tmp_line);
+
+extern void add_annot_def(struct mngmsg *m_msp, char *line, int qa_flag);
+int add_annot_char(unsigned char *ann_arr, char ctmp_label);
+
+int get_annot(char *sname, struct mngmsg *, char *bline, long offset, int n1, 
+	      struct annot_str **annot_p,int target, int debug);
+int
+get_annot_list(char *sname, struct mngmsg *m_msp, struct beststr **bestp_arr,
+	       int nbest,int target, int debug);
+void
+print_sum(FILE *fd, struct db_str *qtt, struct db_str *ntt, int in_mem, long mem_use);
+int
+check_seq_range(unsigned char *aa1b, int n1, int nsq, char *str);
+/* print timing information */
+extern void ptime (FILE *, long);
+
+/* this function consolidates code in comp_lib4.c for non-threaded, and in
+   work_thr2.c (threads) and work_comp2.c (worker nodes)
+*/
+
+void
+init_aa0(unsigned char **aa0, int n0, int nm0,
+	 unsigned char **aa0s, unsigned char **aa1s,
+	 int qframe, int qshuffle_flg, int max_tot,
+	 struct pstruct *ppst, void **f_str, void **qf_str,
+	 void *my_rand_state) {
+  int id;
+
+  /* note that aa[5,4,3,2] are never used, but are provided so that frame
+     can range from 0 .. 5; likewise for f_str[5..2] */
+
+  aa0[5] = aa0[4] = aa0[3] = aa0[2] = aa0[1] = aa0[0];
+
+  /* zero out for SSE2/ALTIVEC -- make sure this is ALWAYS done */
+  for (id=0; id < SEQ_PAD; id++) aa0[0][n0+id] = '\0';
+
+  init_work (aa0[0], n0, ppst, &f_str[0]);
+  f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] = f_str[0];
+
+  if (qframe == 2) {
+    if ((aa0[1]=(unsigned char *)calloc((size_t)n0+2+SEQ_PAD,sizeof(unsigned char)))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot allocate aa01[%d]\n", __FILE__, __LINE__, n0);
+    }
+    *aa0[1]='\0';
+    aa0[1]++;
+    memcpy(aa0[1],aa0[0],n0+1);
+    /* for ALTIVEC/SSE2, must pad with 16 NULL's */
+    for (id=0; id<SEQ_PAD; id++) {aa0[1][n0+id]=0;}
+    revcomp(aa0[1],n0,ppst->c_nt);
+    init_work (aa0[1], n0, ppst, &f_str[1]);
+  }
+
+  if (qshuffle_flg) {
+    if ((*aa0s=(unsigned char *)calloc(n0+2+SEQ_PAD,sizeof(char)))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot allocate aa0s[%d]\n",__FILE__, __LINE__, n0+2);
+      exit(1);
+    }
+    **aa0s='\0';
+    (*aa0s)++;
+    memcpy(*aa0s,aa0[0],n0);
+    qshuffle(*aa0s,n0,nm0, my_rand_state);
+    /* for SSE2/ALTIVEC, must pad with 16 NULL's */
+    for (id=0; id<SEQ_PAD; id++) {(*aa0s)[n0+id]=0;}
+    init_work (*aa0s, n0, ppst, qf_str);
+  }
+
+  /* always allocate shuffle space */
+  if((*aa1s=calloc(max_tot+1,sizeof(char))) == NULL) {
+    fprintf(stderr,"*** error [%s:%d] - unable to allocate shuffled library sequence [%d]\n", __FILE__, __LINE__, max_tot);
+    exit(1);
+  }
+  else {
+    **aa1s=0;
+    (*aa1s)++;
+  }
+}
+
+/* because it is used to pre-allocate space, maxn has various
+   constraints.  For "simple" comparisons, it is simply the length of
+   the longest library sequence.  But for translated comparisons, it
+   must be 3 or 6X the length of the query sequence.
+
+   In addition, however, it can be reduced to make certain that
+   sequences are read in smaller chunks.  And, maxn affect how large
+   overlaps must be when sequences are read in chunks.
+*/
+
+int
+reset_maxn(struct mngmsg *m_msp, int over_len, int maxn) {
+
+  /* reduce maxn if requested */
+  if (m_msp->ldb_info.maxn > 0 && m_msp->ldb_info.maxn < maxn) maxn = m_msp->ldb_info.maxn;
+
+  if (m_msp->qdnaseq==m_msp->ldb_info.ldnaseq || m_msp->qdnaseq==SEQT_DNA ||
+      m_msp->qdnaseq == SEQT_RNA) {/* !TFAST - either FASTA or FASTX */
+
+    if (m_msp->n0 > m_msp->max_tot - m_msp->ldb_info.dupn) {
+      fprintf(stderr,"*** error [%s:%d] -  query sequence is too long %d > %d - %d %s\n",
+	      __FILE__, __LINE__,
+	      m_msp->n0,
+	      m_msp->max_tot, m_msp->ldb_info.dupn,
+	      m_msp->sqnam);
+      exit(1);
+    }
+
+    m_msp->ldb_info.l_overlap = over_len;
+    m_msp->ldb_info.maxt3 = maxn-m_msp->ldb_info.l_overlap;
+  }
+  else {	/* is TFAST */
+    if (m_msp->n0 > MAXTST) {
+      fprintf(stderr,"*** error [%s:%d] -  query sequence is too long %d %s\n",
+	      __FILE__, __LINE__, m_msp->n0,m_msp->sqnam);
+      exit(1);
+    }
+
+    if (m_msp->n0*3 > maxn ) {	/* n0*3 for the three frames - this
+				   will only happen if maxn has been
+				   set low manually */
+
+      if (m_msp->n0*4+2 < m_msp->max_tot) { /* m_msg0*3 + m_msg0 */
+	fprintf(stderr,
+		"*** error [%s:%d] - query sequence too long for library segment: %d - resetting to %d\n",
+		__FILE__, __LINE__,
+		maxn,m_msp->n0*3);
+	maxn = m_msp->ldb_info.maxn = m_msp->n0*3;
+      }
+      else {
+	fprintf(stderr,"*** error [%s:%d] -  query sequence too long for translated search: %d * 4 > %d %s\n",
+		__FILE__, __LINE__, m_msp->n0,maxn, m_msp->sqnam);
+	exit(1);
+      }
+    }
+
+    /* set up some constants for overlaps */
+    m_msp->ldb_info.l_overlap = 3*over_len;
+    m_msp->ldb_info.maxt3 = maxn-m_msp->ldb_info.l_overlap-3;
+    m_msp->ldb_info.maxt3 -= m_msp->ldb_info.maxt3%3;
+    m_msp->ldb_info.maxt3++;
+
+    maxn = maxn - 3; maxn -= maxn%3; maxn++;
+  }
+  return maxn;
+}
+
+
+int
+scanseq(unsigned char *seq, int n, char *str) {
+  int tot,i;
+  char aaray[128];		/* this must be set > nsq */
+
+  for (i=0; i<128; i++)  aaray[i]=0;
+  for (i=0; i < (int)strlen(str); i++) aaray[qascii[str[i]]]=1;
+  for (i=tot=0; i<n; i++) tot += aaray[seq[i]];
+  return tot;
+}
+
+/* subs_env takes a string, possibly with ${ENV}, and looks up all the
+   potential environment variables and substitutes them into the
+   string */
+
+void subs_env(char *dest, char *src, int dest_size) {
+  char *last_src, *bp, *bp1;
+
+  last_src = src;
+
+  if ((bp = strchr(src,'$'))==NULL) {
+    strncpy(dest, src, dest_size);
+    dest[dest_size-1] = '\0';
+  }
+  else {
+    *dest = '\0';
+    while (strlen(dest) < dest_size-1 && bp != NULL ) {
+      /* copy stuff before ${*/
+      *bp = '\0';
+      strncpy(dest, last_src, dest_size);
+      *bp = '$';
+
+      /* copy ENV */
+      if (*(bp+1) != '{') {
+	strncat(dest, "$", dest_size - strlen(dest) -1);
+	dest[dest_size-1] = '\0';
+	bp += 1;
+      }
+      else {	/* have  ${ENV} - put it in */
+	if ((bp1 = strchr(bp+2,'}'))==NULL) {
+	  fprintf(stderr, "*** error [%s:%d] - Unterminated ENV: %s\n",
+		  __FILE__, __LINE__, src);
+	  break;
+	}
+	else {
+	  *bp1 = '\0';
+	  if (getenv(bp+2)!=NULL) {
+	    strncat(dest, getenv(bp+2), dest_size - strlen(dest) - 1);
+	    dest[dest_size-1] = '\0';
+	    *bp1 = '}';
+	  }
+	  bp = bp1+1;	/* bump bp even if getenv == NULL */
+	}
+      }
+      last_src = bp;
+
+      /* now get the next ${ENV} if present */
+      bp = strchr(last_src,'$');
+    }
+    /* now copy the last stuff */
+    strncat(dest, last_src, dest_size - strlen(dest) - 1);
+    dest[dest_size-1]='\0';
+  }
+}
+
+
+void
+selectbest(struct beststr **bptr, int k, int n)	/* k is rank in array */
+{
+  int v, i, j, l, r;
+  struct beststr *tmptr;
+
+  l=0; r=n-1;
+
+  while ( r > l ) {
+    v = bptr[r]->rst.score[0];
+    i = l-1;
+    j = r;
+    do {
+      while (bptr[++i]->rst.score[0] > v) ;
+      while (bptr[--j]->rst.score[0] < v) ;
+      tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
+    } while (j > i);
+    bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
+    if (i>=k) r = i-1;
+    if (i<=k) l = i+1;
+  }
+}
+
+void
+selectbestz(struct beststr **bptr, int k, int n)	/* k is rank in array */
+{
+  int i, j, l, r;
+  struct beststr *tmptr;
+  double v;
+
+  l=0; r=n-1;
+
+  while ( r > l ) {
+    v = bptr[r]->zscore;
+    i = l-1;
+    j = r;
+    do {
+      while (bptr[++i]->zscore > v) ;
+      while (bptr[--j]->zscore < v) ;
+      tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
+    } while (j > i);
+    bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
+    if (i>=k) r = i-1;
+    if (i<=k) l = i+1;
+  }
+}
+
+/* improved shellsort with high-performance increments */
+/*
+shellsort(itemType a[], int l, int r)
+{ int i, j, k, h; itemType v;
+ int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
+		  13776, 4592, 1968, 861, 336,
+		  112, 48, 21, 7, 3, 1 };
+ for ( k = 0; k < 16; k++)
+   for (h = incs[k], i = l+h; i <= r; i++) {
+       v = a[i]; j = i;
+       while (j > h && a[j-h] > v) {
+         a[j] = a[j-h]; j -= h;
+       }
+       a[j] = v;
+     }
+}
+*/
+
+/* ?improved? version of sortbestz using optimal increments and fewer
+   exchanges */
+void sortbestz(struct beststr **bptr, int nbest)
+{
+  int gap, i, j, k;
+  struct beststr *tmp;
+  double v;
+  int incs[14] = { 198768, 86961, 33936,
+		   13776, 4592, 1968, 861, 336,
+		   112, 48, 21, 7, 3, 1 };
+
+  for ( k = 0; k < 14; k++) {
+    gap = incs[k];
+    for (i=gap; i < nbest; i++) {
+      tmp = bptr[i];
+      j = i;
+      v = bptr[i]->zscore;
+      while ( j >= gap && bptr[j-gap]->zscore < v) {
+	bptr[j] = bptr[j - gap];
+	j -= gap;
+      }
+      bptr[j] = tmp;
+    }
+  }
+}
+
+
+/* sort based on sequence index */
+void sortbesti(struct beststr **bptr, int nbest)
+{
+  int gap, i, j, k;
+  struct beststr *tmp;
+  double v;
+  int incs[12] = { 33936, 13776, 4592, 1968, 861, 336,
+		   112, 48, 21, 7, 3, 1 };
+
+  for ( k = 0; k < 12; k++) {
+    gap = incs[k];
+    for (i=gap; i < nbest; i++) {
+      tmp = bptr[i];
+      j = i;
+      v = bptr[i]->seq->index;
+      while ( j >= gap && bptr[j-gap]->seq->index < v) {
+	bptr[j] = bptr[j - gap];
+	j -= gap;
+      }
+      bptr[j] = tmp;
+    }
+  }
+}
+
+void
+sortbeste(struct beststr **bptr, int nbest)
+{
+  int gap, i, j, k;
+  struct beststr *tmp;
+  double v;
+  int incs[14] = { 198768, 86961, 33936,
+		   13776, 4592, 1968, 861, 336,
+		   112, 48, 21, 7, 3, 1 };
+
+  for ( k = 0; k < 14; k++) {
+    gap = incs[k];
+    for (i=gap; i < nbest; i++) {
+      j = i;
+      tmp = bptr[i];
+      v = tmp->rst.escore;
+      while ( j >= gap && bptr[j-gap]->rst.escore > v) {
+	bptr[j] = bptr[j - gap];
+	j -= gap;
+      }
+      bptr[j] = tmp;
+    }
+  }
+
+  /* sometimes there are many high scores with E()==0.0, sort
+     those by z() score */
+
+  j = 0;
+  while (j < nbest && bptr[j]->rst.escore <= 2.0*DBL_MIN ) {j++;}
+  if (j > 1) sortbestz(bptr,j);
+}
+
+extern char *prog_func;
+extern char *verstr, *iprompt0, *refstr, *mp_verstr;
+extern long tstart, tscan, tprev, tdone;	/* Timing */
+#ifdef COMP_MLIB
+extern long ttscan, ttdisp;
+#endif
+extern time_t tdstart, tddone;
+
+/* ****************************************************************
+   print command line arguments (argv_line)
+   possibly HTML header
+   !BLAST
+     please cite
+     version
+   BLAST
+     Reference version
+**************************************************************** */
+void
+print_header1(FILE *fd, const char *argv_line,
+	      const struct mngmsg *m_msp, const struct pstruct *ppst) {
+  int i;
+
+#ifdef PGM_DOC
+  if (!(m_msp->markx & (MX_M8OUT+MX_MBLAST2))) fprintf(fd, "#%s\n",argv_line);
+#endif
+
+  if (m_msp->markx & MX_M11OUT) {
+    fprintf(fd, "#:lav\n\nd {\n   \"%s\"\n}\n",argv_line+1);
+  }
+
+  if (m_msp->markx & MX_HTML) {
+#ifdef HTML_HEAD
+    fprintf(fd,"<html>\n<head>\n<title>%s Results</title>\n</head>\n<body>\n",prog_func);
+#endif
+    fprintf(fd,"<pre>\n");
+  }
+
+  if (m_msp->std_output) {
+    fprintf(fd,"%s\n",iprompt0);
+    if (refstr != NULL && refstr[0] != '\0') {
+      fprintf(fd," version %s%s\nPlease cite:\n %s\n",verstr,mp_verstr,refstr);
+    }
+    else {
+      fprintf(fd," version %s%s\n",verstr,mp_verstr);
+    }
+  }
+
+  if (m_msp->markx & MX_MBLAST2) {
+    if (refstr != NULL && refstr[0] != '\0') {
+      fprintf(fd,"%s %s%s\n\nReference: %s\n\n", prog_func, verstr, mp_verstr, refstr);
+    }
+    else {
+      fprintf(fd,"%s %s%s\n\n", prog_func, verstr, mp_verstr);
+    }
+  }
+
+  fflush(fd);
+}
+
+/* ****************************************************************
+   MX_HTML: <pre>
+   Query:
+     1>>>accession description # aa
+   Annotation:
+   Library:
+**************************************************************** */
+void
+print_header2(FILE *fd, int qlib, char *info_qlabel, unsigned char **aa0,
+	      const struct mngmsg *m_msp, const struct pstruct *ppst,
+	      const char * info_lib_range_p) {
+  int j;
+  char tmp_str[MAX_STR];
+  double db_tt;
+
+  /* if (m_msp->markx & MX_HTML) fputs("<pre>\n",fd); */
+
+  if (m_msp->std_output) {
+    if (qlib==1) {
+      fprintf(fd,"Query: %s\n", m_msp->tname);
+    }
+
+    if (m_msp->qdnaseq == SEQT_DNA || m_msp->qdnaseq == SEQT_RNA) {
+      strncpy(tmp_str,(m_msp->qframe==1)? " (forward-only)" : "\0",sizeof(tmp_str));
+      tmp_str[sizeof(tmp_str)-1]='\0';
+    }
+    else tmp_str[0]='\0';
+
+    fprintf(fd,"%3d>>>%s%s\n", qlib,
+	   m_msp->qtitle,
+	   (m_msp->revcomp ? " (reverse complement)" : tmp_str));
+
+    /* check for annotation */
+    if (m_msp->ann_flg && m_msp->aa0a != NULL) {
+      fprintf(fd,"Annotation: ");
+      for (j=0; j<m_msp->n0; j++) {
+	if (m_msp->aa0a[j] && m_msp->ann_arr[m_msp->aa0a[j]] != ' ' ) {
+	  fprintf(fd,"|%ld:%c%c",
+		  j+m_msp->q_off,m_msp->ann_arr[m_msp->aa0a[j]],ppst->sq[aa0[0][j]]);
+	}
+      }
+    fprintf(fd,"\n");
+    }
+
+    fprintf(fd,"Library: %s%s\n", m_msp->ltitle,info_lib_range_p);
+
+    if (m_msp->db.carry==0) {
+      fprintf(fd, "  %7ld residues in %5ld sequences\n", m_msp->db.length, m_msp->db.entries);
+    }
+    else {
+      db_tt = (double)m_msp->db.carry*(double)LONG_MAX + (double)m_msp->db.length;
+      fprintf(fd, "  %.0f residues in %5ld library sequences\n", db_tt, m_msp->db.entries);
+    }
+
+  }
+  else {
+    if ((m_msp->markx & (MX_M8OUT + MX_M8COMMENT)) == (MX_M8OUT+MX_M8COMMENT)) {
+      fprintf(fd,"# %s %s%s\n",prog_func,verstr,mp_verstr);
+      fprintf(fd,"# Query: %s\n",m_msp->qtitle);
+      fprintf(fd,"# Database: %s\n",m_msp->ltitle);
+    }
+  }
+  if (m_msp->markx & MX_HTML) fputs("</pre>\n",fd);
+  fflush(fd);
+}
+
+/* **************************************************************** */
+/*   before showbest                                                */
+/* **************************************************************** */
+void print_header3(FILE *fd, int qlib, struct mngmsg *m_msp, struct pstruct *ppst) {
+
+    if (m_msp->markx & MX_MBLAST2) {
+      if (qlib == 1) {
+	fprintf(fd, "\nDatabase: %s\n     %12ld sequences; %ld total letters\n\n\n",
+		m_msp->ltitle, m_msp->db.entries, m_msp->db.length);
+      }
+      fprintf(fd, "\nQuery= %s\nLength=%d\n", m_msp->qtitle, m_msp->n0);
+    }
+}
+
+
+/* **************************************************************** */
+/* alignment tranistion                                             */
+/* **************************************************************** */
+void print_header4(FILE *fd, char *info_qlabel, char *argv_line, char *info_gstring3, char *info_hstring_p[2],
+		   struct mngmsg *m_msp, struct pstruct *ppst) {
+
+	if (m_msp->std_output && (m_msp->markx & (MX_AMAP+ MX_HTML + MX_M9SUMM)) && !(m_msp->markx & MX_M10FORM)) {
+	  fprintf(fd,"\n>>>%s%s, %d %s vs %s library\n",
+		  info_qlabel,(m_msp->revcomp ? "_rev":"\0"), m_msp->n0,
+		  m_msp->sqnam,m_msp->lname);
+	}
+
+	if (m_msp->markx & MX_M10FORM) {
+	  fprintf(fd,"\n>>>%s%s, %d %s vs %s library\n",
+		  info_qlabel,(m_msp->revcomp ? "-":"\0"), m_msp->n0, m_msp->sqnam,
+		  m_msp->lname);
+	  fprintf(fd,"; pg_name: %s\n",m_msp->pgm_name);
+	  fprintf(fd,"; pg_ver: %s%s\n",verstr,mp_verstr);
+	  fprintf(fd,"; pg_argv: %s",argv_line);
+	  fputs(info_gstring3,fd);
+	  fputs(info_hstring_p[0],fd);
+	  fputs(info_hstring_p[1],fd);
+	}
+}
+
+void print_header4a(FILE *outfd, struct mngmsg *m_msp) {
+  if (!(m_msp->markx & MX_M8OUT) && (m_msp->markx & (MX_M10FORM+MX_M9SUMM)) && m_msp->show_code != SHOW_CODE_ID) {
+    fprintf(outfd,">>><<<\n");
+  }
+}
+
+void print_header5(FILE *fd, int qlib, struct db_str *qtt,
+		   struct mngmsg *m_msp, struct pstruct *ppst,
+		   int in_mem, long tot_memK) {
+
+  /* for MX_MBLAST2, show some statistics results */
+  if (m_msp->markx & MX_MBLAST2) {
+    fprintf(fd,"\n\nLambda      K     H\n");
+    fprintf(fd," %6.3f  %6.3f  %6.3f\n\n",ppst->pLambda,ppst->pK,ppst->pH);
+    fprintf(fd,"\nGapped\nLambda\n");
+    fprintf(fd," %6.3f  %6.3f  %6.3f\n",ppst->pLambda,ppst->pK,ppst->pH);
+    fprintf(fd,"\nEffective search space used: %ld\n\n",m_msp->db.entries);
+  }
+
+  if (m_msp->markx & MX_M8COMMENT) {
+    fprintf(fd, "# %s processed %d queries\n",prog_func,qlib);
+  }
+
+  if ( !((m_msp->markx & MX_M8OUT) || (m_msp->markx & MX_HTML))
+       && (m_msp->markx & (MX_M10FORM+MX_M9SUMM))) {
+    fprintf(fd,">>>///\n");
+  }
+
+  if ( m_msp->markx & MX_HTML) fputs("<pre>",fd);
+  if (m_msp->std_output) {
+    print_sum(fd, qtt, &m_msp->db, in_mem, tot_memK);}
+  if ( m_msp->markx & MX_HTML) fputs("</pre>\n",fd);
+#ifdef HTML_HEAD
+  if (m_msp->markx & MX_HTML) fprintf(fd,"</body>\n</html>\n");
+#endif
+
+  if (m_msp->markx & MX_MBLAST2) {
+      fprintf(fd,"\n  Database: %s\n",m_msp->ltitle);
+      fprintf(fd,"  Number of letters in database: %ld\n",m_msp->db.length);
+      fprintf(fd,"  Number of sequences in database: %ld\n",m_msp->db.entries);
+      fprintf(fd,"\n\n\nMatrix: %s\n",ppst->pam_name);
+      fprintf(fd,"Gap Penalties: Existence: %d, Extension: %d\n",ppst->gdelval, ppst->ggapval);
+  }
+}
+
+void
+print_annot_header(FILE *fd, struct mngmsg *m_msp) {
+  int i;
+
+  if (m_msp->ann_arr_def[1]) {
+    if (m_msp->markx & MX_HTML) {fprintf(fd,"<pre>");}
+    fprintf(fd, "Annotation symbols:\n");
+    for (i=1; m_msp->ann_arr[i]; i++) {
+      if (m_msp->ann_arr_def[i]) {
+	fprintf(fd, " %c : %s\n",m_msp->ann_arr[i], m_msp->ann_arr_def[i]);
+      }
+    }
+    if (m_msp->markx & MX_HTML) {fputs("</pre><hr />\n",fd);}
+  }
+}
+
+extern int fa_max_workers;
+
+void
+print_sum(FILE *fd, struct db_str *qtt, struct db_str *ntt, int in_mem, long tot_memK)
+{
+  double db_tt;
+  char tstr1[26], tstr2[26];
+  char memstr[256];
+
+  strncpy(tstr1,ctime(&tdstart),sizeof(tstr1));
+  strncpy(tstr2,ctime(&tddone),sizeof(tstr1));
+  tstr1[24]=tstr2[24]='\0';
+
+  /* Print timing to output file as well */
+
+  fprintf(fd, "\n%ld residues in %ld query   sequences\n", qtt->length, qtt->entries);
+  if (ntt->carry == 0)
+    fprintf(fd, "%ld residues in %ld library sequences\n", ntt->length, ntt->entries);
+  else {
+    db_tt = (double)ntt->carry*(double)LONG_MAX + (double)ntt->length;
+    fprintf(fd, "%.0f residues in %ld library sequences\n", db_tt, ntt->entries);
+  }
+
+  memstr[0]='\0';
+  if (tot_memK && in_mem != 0) {
+    sprintf(memstr," in memory [%ldG]",(tot_memK >> 20));
+  }
+
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+  fprintf(fd," Scomplib [%s%s]\n start: %s done: %s\n",verstr,mp_verstr,tstr1,tstr2);
+#endif
+#if defined(COMP_THR)
+  fprintf(fd," Tcomplib [%s%s] (%d proc%s)\n start: %s done: %s\n", verstr, mp_verstr,
+	  fa_max_workers, memstr, tstr1,tstr2);
+#endif
+#if defined(PCOMPLIB)
+  fprintf(fd," Pcomplib [%s%s] (%d proc%s)\n start: %s done: %s\n", verstr, mp_verstr,
+	  fa_max_workers, memstr, tstr1,tstr2);
+#endif
+#ifndef COMP_MLIB
+  fprintf(fd," Scan time: ");
+  ptime(fd, tscan - tprev);
+  fprintf (fd," Display time: ");
+  ptime (fd, tdone - tscan);
+#else
+  fprintf(fd," Total Scan time: ");
+  ptime(fd, ttscan);
+  fprintf (fd," Total Display time: ");
+  ptime (fd, ttdisp);
+#endif
+  fprintf (fd,"\n");
+  fprintf (fd, "\nFunction used was %s [%s%s]\n", prog_func,verstr,mp_verstr);
+}
+
+extern double zs_to_Ec(double zs, long entries);
+extern double zs_to_bit(double zs, int n0, int n1);
+extern double zs_to_p(double zs);
+
+#include "aln_structs.h"
+
+void
+prhist(FILE *fd, const struct mngmsg *m_msp,
+       struct pstruct *ppst,
+       struct hist_str hist,
+       int nstats, int sstats,
+       struct db_str ntt,
+       char *stat_info2,
+       char *lib_range,
+       char **info_gstring2,
+       char **info_hstring,
+       long tscan)
+{
+  int i,j,hl,hll, el, ell, ev;
+  char hline[80], pch, *bp;
+  int mh1, mht;
+  int maxval, maxvalt, dotsiz, ddotsiz,doinset;
+  double cur_e, prev_e, f_int;
+  double max_dev, x_tmp;
+  double db_tt;
+  int n_chi_sq, cum_hl=0, max_i=0, max_dev_i;
+  double zs10_off;
+
+
+  if (m_msp->markx & MX_HTML) fputs("<pre>\n",fd);
+  else {fprintf(fd,"\n");}
+
+  if (ppst->zsflag_f < 0) {
+    if (!m_msp->nohist) {
+      fprintf(fd, "  %7ld residues in %5ld sequences", ntt.length,ntt.entries);
+      fprintf(fd, "%s\n",lib_range);
+    }
+    fprintf(fd,"Algorithm: %s\nParameters: %s\n",info_gstring2[0],info_gstring2[1]);
+    return;
+  }
+
+  if (nstats > 20) {
+    zs10_off = ppst->zs_off * 10.0;
+
+    max_dev = 0.0;
+    mh1 = hist.maxh-1;			/* max value for histogram */
+    mht = (3*hist.maxh-3)/4 - 1;	/* x-coordinate for expansion */
+    n_chi_sq = 0;
+
+    if (!m_msp->nohist && mh1 > 0) {
+      for (i=0,maxval=0,maxvalt=0; i<hist.maxh; i++) {
+	if (hist.hist_a[i] > maxval) maxval = hist.hist_a[i];
+	if (i >= mht &&  hist.hist_a[i]>maxvalt) maxvalt = hist.hist_a[i];
+      }
+      cum_hl = -hist.hist_a[0];
+      dotsiz = (maxval-1)/60+1;
+      ddotsiz = (maxvalt-1)/50+1;
+      doinset = (ddotsiz < dotsiz && dotsiz > 2);
+
+      if (ppst->zsflag_f>=0)
+	fprintf(fd,"       opt      E()\n");
+      else
+	fprintf(fd,"     opt\n");
+
+      prev_e =  zs_to_Ec((double)(hist.min_hist-hist.histint/2)-zs10_off,hist.entries);
+      for (i=0; i<=mh1; i++) {
+	pch = (i==mh1) ? '>' : ' ';
+	pch = (i==0) ? '<' : pch;
+	hll = hl = hist.hist_a[i];
+	if (ppst->zsflag_f>=0) {
+	  cum_hl += hl;
+	  f_int = (double)(i*hist.histint+hist.min_hist)+(double)hist.histint/2.0;
+	  cur_e = zs_to_Ec(f_int-zs10_off,hist.entries);
+	  ev = el = ell = (int)(cur_e - prev_e + 0.5);
+	  if (hl > 0  && i > 5 && i < (90-hist.min_hist)/hist.histint) {
+	    x_tmp  = fabs(cum_hl - cur_e);
+	    if ( x_tmp > max_dev) {
+	      max_dev = x_tmp;
+	      max_i = i;
+	    }
+	    n_chi_sq++;
+	  }
+	  if ((el=(el+dotsiz-1)/dotsiz) > 60) el = 60;
+	  if ((ell=(ell+ddotsiz-1)/ddotsiz) > 40) ell = 40;
+	  fprintf(fd,"%c%3d %5d %5d:",
+		  pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
+		  mh1*hist.histint+hist.min_hist,hl,ev);
+	}
+	else fprintf(fd,"%c%3d %5d :",
+		     pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
+		     mh1*hist.histint+hist.min_hist,hl);
+
+	if ((hl=(hl+dotsiz-1)/dotsiz) > 60) hl = 60;
+	if ((hll=(hll+ddotsiz-1)/ddotsiz) > 40) hll = 40;
+	for (j=0; j<hl; j++) hline[j]='=';
+	if (ppst->zsflag_f>=0) {
+	  if (el <= hl ) {
+	    if (el > 0) hline[el-1]='*';
+	    hline[hl]='\0';
+	  }
+	  else {
+	    for (j = hl; j < el; j++) hline[j]=' ';
+	    hline[el-1]='*';
+	    hline[hl=el]='\0';
+	  }
+	}
+	else hline[hl] = 0;
+	if (i==1) {
+	  for (j=hl; j<10; j++) hline[j]=' ';
+	  sprintf(&hline[10]," one = represents %d library sequences",dotsiz);
+	}
+	if (doinset && i == mht-2) {
+	  for (j = hl; j < 10; j++) hline[j]=' ';
+	  sprintf(&hline[10]," inset = represents %d library sequences",ddotsiz);
+	}
+	if (i >= mht&& doinset ) {
+	  for (j = hl; j < 10; j++) hline[j]=' ';
+	  hline[10]=':';
+	  for (j = 11; j<11+hll; j++) hline[j]='=';
+	  hline[11+hll]='\0';
+	  if (ppst->zsflag_f>=0) {
+	    if (ell <= hll) hline[10+ell]='*';
+	    else {
+	      for (j = 11+hll; j < 10+ell; j++) hline[j]=' ';
+	      hline[10+ell] = '*';
+	      hline[11+ell] = '\0';
+	    }
+	  }
+	}
+
+	fprintf(fd,"%s\n",hline);
+	prev_e = cur_e;
+      }
+    }
+    max_dev_i = max_i*hist.histint+hist.min_hist;
+  }
+  else {
+    max_dev = 0.0;
+    n_chi_sq = 0;
+    max_i = 0;
+    max_dev_i = 0;
+  }
+
+  if (ppst->zsflag_f >=0 ) {
+    if (!m_msp->nohist) {
+      if (ntt.carry==0) {
+	fprintf(fd, "  %7ld residues in %5ld sequences", ntt.length, ntt.entries);
+      }
+      else {
+	db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
+	fprintf(fd, "  %.0f residues in %5ld library sequences", db_tt, ntt.entries);
+      }
+      fprintf(fd, "%s\n",lib_range);
+    }
+    fprintf(fd,"Statistics: %s\n",hist.stat_info);
+    if (stat_info2) {
+      fprintf(fd," Statistics E2: %s\n",stat_info2);
+    }
+
+#ifdef SAMP_STATS
+    fprintf(fd," statistics sampled from %ld (%d) to %ld sequences\n",
+	    (hist.entries > nstats ? nstats : hist.entries),sstats, hist.entries);
+#else
+    fprintf(fd," statistics extrapolated from %ld to %ld sequences\n",
+	    (hist.entries > nstats ? nstats : hist.entries),hist.entries);
+#endif
+
+    if (!m_msp->nohist && cum_hl > 0) {
+      fprintf(fd," Kolmogorov-Smirnov  statistic: %6.4f (N=%d) at %3d\n",
+	      max_dev/(float)cum_hl, n_chi_sq,max_dev_i);
+    }
+    if (m_msp->markx & MX_M10FORM) {
+      while ((bp=strchr(hist.stat_info,'\n'))!=NULL) *bp=' ';
+      if (cum_hl <= 0) cum_hl = -1;
+      sprintf(info_hstring[0],"; mp_extrap: %d %ld\n; mp_stats: %s\n; mp_KS: %6.4f (N=%d) at %3d\n",
+	      MAX_STATS,hist.entries,hist.stat_info,max_dev/(float)cum_hl,
+	      n_chi_sq,max_dev_i);
+    }
+  }
+
+  if (m_msp->markx & MX_M10FORM) {
+    if ((bp = strchr(info_gstring2[1],'\n'))!=NULL) *bp = ' ';
+    sprintf(info_hstring[1],"; mp_Algorithm: %s\n; mp_Parameters: %s\n",info_gstring2[0],info_gstring2[1]);
+    if (bp != NULL ) *bp = '\n';
+  }
+
+  if (ppst->other_info != NULL) {
+    fputs(ppst->other_info, fd);
+  }
+
+  fprintf(fd,"Algorithm: %s\nParameters: %s\n",info_gstring2[0],info_gstring2[1]);
+
+  fprintf (fd," Scan time: ");
+  ptime(fd,tscan);
+  fprintf(fd,"\n");
+  if (!m_msp->annot1_sname[0] &&  m_msp->markx & MX_HTML) {
+    fputs("</pre>\n<hr />\n",fd);
+  }
+
+  fflush(fd);
+}
+
+extern char prog_name[], *verstr;
+
+#ifdef PCOMPLIB
+#include "mpi.h"
+#endif
+
+void s_abort (char *p,  char *p1)
+{
+  int i;
+
+  fprintf (stderr, "\n***[%s] %s%s***\n", prog_name, p, p1);
+#ifdef PCOMPLIB
+  MPI_Abort(MPI_COMM_WORLD,1);
+  MPI_Finalize();
+#endif
+  exit (1);
+}
+
+void w_abort (char *p, char *p1)
+{
+  fprintf (stderr, "\n***[%s] %s%s***\n\n", prog_name, p, p1);
+  exit (1);
+}
+
+extern struct a_res_str *
+build_ares_code(unsigned char *aa0, int n0,
+		unsigned char *aa1, struct seq_record *seq,
+		int frame, int *have_ares, int repeat_thresh,
+		const struct mngmsg *m_msp, struct pstruct *ppst,
+		void *f_str
+		);
+
+extern struct lmf_str *
+re_openlib(struct lmf_str *, int outtty);
+
+#define MAX_BLINE 2048
+#define RANLIB (m_fptr->ranlib)
+
+extern int
+re_getlib(unsigned char *, struct annot_str **,
+	  int, int, int, int, int, long *, long *,
+	  struct lmf_str *m_fptr);
+
+/*
+   pre_load_best loads a set of sequences using re_getlib
+
+   it should be used for getting sequences for shuffling, and for showbest() if m_msg->quiet
+
+   it both opens the m_file_p buffer, gets the bline[] descriptions,
+   and reads the actual sequences.  In reading the sequences, it
+   should first allocate one large buffer so that individual buffers do not need to be freed.
+*/
+
+void
+pre_load_best(unsigned char *aa1save, int maxn,
+	      struct beststr **bbp_arr, int nbest,
+	      struct mngmsg *m_msp, int debug)
+{
+  int i, n1, bl_len, tmp_bline_len, l_llen;
+  int seq_buf_len;
+  char bline[MAX_BLINE];
+  unsigned char  *seq_buf_p;
+  char *bline_buf_p;
+
+  struct beststr *bbp;
+  struct lmf_str *m_fptr;
+
+  /*
+     calculate how much room we need for sequences and blines
+  */
+
+  if (m_msp->pre_load_done) return;
+
+  seq_buf_len = 1;
+  for (i=0; i<nbest; i++) {
+    /* we are not (currently) allocating more than n1+1, because alignment is not vectorized,
+       if it were vectorized, we would need n+16
+    */
+#ifdef DEBUG
+    if (bbp_arr[i]->n1 != bbp_arr[i]->seq->n1) {
+      fprintf(stderr,"*** error [%s:%d] - n1 (%d) != seq->n1 (%d)\n",
+	      __FILE__, __LINE__, bbp_arr[i]->n1, bbp_arr[i]->seq->n1);
+    }
+#endif
+
+    if (bbp_arr[i]->seq->aa1b == NULL) {
+      seq_buf_len += bbp_arr[i]->seq->n1 + 1;
+    }
+  }
+
+  /* have required sequence space (seq_buf_len), allocate it */
+
+  if ((m_msp->aa1save_buf_b=(unsigned char *)calloc(seq_buf_len, sizeof(char)))==NULL) {
+    fprintf(stderr, "*** error [%s:%d] - cannot allocate space[%d] for sequence encoding\n",
+	    __FILE__, __LINE__, seq_buf_len);
+    exit(1);
+  }
+  else {
+    seq_buf_p = m_msp->aa1save_buf_b+1;		/* ensure there is an initial '\0' */
+  }
+
+  /* adjust description line length */
+  l_llen = m_msp->aln.llen;
+  if ((m_msp->markx & MX_M9SUMM) && m_msp->show_code != SHOW_CODE_ID) {
+    l_llen += 40;
+    if (l_llen > 200) l_llen=200;
+  }
+
+  tmp_bline_len = sizeof(bline)-1;
+  if (!(m_msp->markx & MX_M10FORM) && !m_msp->long_info) {tmp_bline_len = l_llen-5;}
+
+  /* allocate more bline than we need for simplicity */
+  if ((bline_buf_p=m_msp->bline_buf_b=(char *)calloc(nbest*tmp_bline_len, sizeof(char)))==NULL) {
+    fprintf(stderr, "*** error [%s:%d] - cannot allocate space[%d] for bline descriptions\n",
+	    __FILE__, __LINE__,  nbest*tmp_bline_len);
+    exit(1);
+  }
+
+  for (i=0; i<nbest; i++) {
+    bbp = bbp_arr[i];
+
+    if ((m_fptr=re_openlib(bbp->mseq->m_file_p,!m_msp->quiet))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot re-open %s\n",
+	      __FILE__, __LINE__, bbp->mseq->m_file_p->lb_name);
+      exit(1);
+    }
+    RANLIB(bline,tmp_bline_len,bbp->mseq->lseek,bbp->mseq->libstr,m_fptr);
+    bl_len = strlen(bline);
+    bbp->mseq->bline = bline_buf_p;
+    bbp->mseq->bline_max = m_msp->aln.llen;
+    strncpy(bbp->mseq->bline, bline, bl_len);
+    bline_buf_p += bl_len+1;
+
+    /* make sure we get annotation if present, and sequence if necessary */
+    if (bbp->seq->aa1b==NULL || (m_msp->ann_flg==1 && bbp->seq->annot_p==NULL)) {
+      n1 = re_getlib(aa1save, (m_msp->ann_flg==1) ? &(bbp->seq->annot_p) : NULL,
+		     maxn,m_msp->ldb_info.maxt3, m_msp->ldb_info.l_overlap,
+		     bbp->mseq->cont,m_msp->ldb_info.term_code,
+		     &bbp->seq->l_offset,&bbp->seq->l_off,bbp->mseq->m_file_p);
+      if (n1 != bbp->seq->n1) {
+	fprintf(stderr,"*** error [%s:%d] - n1[%d/%d] != n1[%d] from re_getlib() at %s [maxn:%d/maxt3:%d]\n",
+		__FILE__, __LINE__,
+		bbp->n1, bbp->seq->n1, n1, bbp->mseq->libstr, maxn, m_msp->ldb_info.maxt3);
+      }
+
+#ifdef DEBUG
+      if (adler32(1L,aa1save,n1)!=bbp->adler32_crc) {
+	fprintf(stderr,"*** error [%s:%d] - adler32_crc from re_getlib() at %d(%d):  %s\n",
+		__FILE__, __LINE__,
+		bbp->mseq->index,bbp->n1, bline);
+      }
+#endif
+
+      /* if we don't have the sequence in the aa1b buffer, copy it from re_getlib */
+      if (bbp->seq->aa1b == NULL)  {
+	bbp->seq->aa1b = seq_buf_p;
+	memcpy(bbp->seq->aa1b, aa1save, bbp->seq->n1+1);
+	seq_buf_p += bbp->seq->n1+1;
+      }
+    }
+  }
+
+  /* here, we are getting query annots after all the bptr[]s have been processed */
+  /* moved to comp_lib9.c */
+  /*
+  if (m_msp->annot0_sname[0]) {
+    if (get_annot(m_msp->annot0_sname, m_msp, m_msp->qtitle, m_msp->q_offset+m_msp->q_off-1,m_msp->n0, &m_msp->annot_p, 0, debug) < 0) {
+      fprintf(stderr,"*** error [%s:%d] - %s did not produce annotations\n",__FILE__, __LINE__, m_msp->annot0_sname);
+      m_msp->annot0_sname[0] = '\0';
+    }
+    if (m_msp->annot_p && m_msp->annot_p->n_annot > 0) {
+      m_msp->aa0a = m_msp->annot_p->aa1_ann;
+    }
+    if (!m_msp->ann_arr[0]) {m_msp->ann_arr[0] = ' '; m_msp->ann_arr[1] = '\0';}
+  }
+  */
+
+  /* if we have an variant annotation script, execute it and capture the output */
+  /* must do after bline is set */
+  if (m_msp->annot1_sname[0]) {
+    if (get_annot_list(m_msp->annot1_sname, m_msp, bbp_arr, nbest, 1, debug)< 0) {
+      fprintf(stderr,"*** error [%s:%d] - %s did not produce annotations for %s\n",__FILE__, __LINE__, m_msp->annot1_sname,m_msp->qtitle);
+      m_msp->annot1_sname[0] = '\0';
+    };
+    if (!m_msp->ann_arr[0]) {m_msp->ann_arr[0] = ' '; m_msp->ann_arr[1] = '\0';}
+  }
+
+  m_msp->pre_load_done = 1;
+}
+
+/*  merge_ares_chains()
+
+    seeks to merge two ares chains, producing a single chain that is
+    sorted by sw_score.
+
+    Strategy -- choose the chain with the highest score, and go down
+    it until the head of the other chain has higher score, then link
+    the other chain to the main chain, breaking the first, and
+    continue the process.
+
+    The two pointers, max_next and alt_next, keep track of the best
+    and the alternate chain
+ */
+
+
+#undef SHOW_MERGE_CHAIN
+
+struct a_res_str *
+merge_ares_chains(struct a_res_str *cur_ares,
+		  struct a_res_str *tmp_ares,
+		  int score_ix,
+		  const char *msg)
+{
+  struct a_res_str *max_next, *max_ares, *alt_ares, *prev_next;
+
+  if (!tmp_ares) return cur_ares;
+
+#ifdef SHOW_MERGE_CHAIN
+  fprintf(stderr,"cur_ares->");
+  for (max_next = cur_ares; max_next; max_next = max_next->next) {
+    fprintf(stderr,"%d->",max_next->rst.score[score_ix]);
+  }
+
+  fprintf(stderr,"||\n");
+  fprintf(stderr,"tmp_ares->");
+  for (max_next = tmp_ares; max_next; max_next = max_next->next) {
+    fprintf(stderr,"%d->",max_next->rst.score[score_ix]);
+  }
+  fprintf(stderr,"||\n");
+#endif
+
+  /* start with the maximum score */
+
+  if (cur_ares->rst.score[score_ix] >= tmp_ares->rst.score[score_ix]) {
+    max_ares = max_next = prev_next = cur_ares;
+    alt_ares = tmp_ares;
+  }
+  else {
+    max_ares = max_next = prev_next = tmp_ares;
+    alt_ares = cur_ares;
+  }
+
+  while (max_next && alt_ares) {
+    /* this is guaranteed true for the first iteration */
+    while (max_next && max_next->rst.score[score_ix] >= alt_ares->rst.score[score_ix]) {
+      prev_next = max_next;
+      max_next = max_next->next;
+    }
+    if (max_next==NULL) break;
+    else {	/* max_next->rst.score[score_ix] no longer greater, switch
+		   pointers */
+      prev_next->next = alt_ares;
+      alt_ares = max_next;
+      max_next = prev_next->next;
+    }
+  }
+
+  /* we quit whenever max_next or alt_ares == NULL; if
+     (max_next==NULL), then continue adding the rest of alt_ares */
+
+  if (max_next==NULL) {
+    prev_next->next = alt_ares;
+  }
+
+
+#ifdef SHOW_MERGE_CHAIN
+  fprintf(stderr,"[%s] merge_ares->",msg);
+  for (max_next = max_ares; max_next; max_next = max_next->next) {
+    fprintf(stderr,"%d->",max_next->rst.score[score_ix]);
+  }
+  fprintf(stderr,"||\n\n");
+#endif
+
+  return max_ares;
+}
+
+/* copies from from to to shuffling */
+
+extern int my_nrand(int, void *);
+
+void
+shuffle(unsigned char *from, unsigned char *to, int n, void *rand_state)
+{
+  int i,j; unsigned char tmp;
+
+  if (from != to) memcpy((void *)to,(void *)from,n);
+
+  for (i=n; i>0; i--) {
+    j = my_nrand(i, rand_state);
+    tmp = to[j];
+    to[j] = to[i-1];
+    to[i-1] = tmp;
+  }
+  to[n] = 0;
+}
+
+/* shuffles DNA sequences as codons */
+void
+shuffle3(unsigned char *from, unsigned char *to, int n, void *rand_state)
+{
+  int i, j, i3,j3; unsigned char tmp;
+  int n3;
+
+  if (from != to) memcpy((void *)to,(void *)from,n);
+
+  n3 = n/3;
+
+  for (i=n3; i>0; i--) {
+    j = my_nrand(i, rand_state);
+    i3 = i*3;
+    j3 = j*3;
+    tmp = to[j3];
+    to[j3] = to[i3-1];
+    to[i3-1] = tmp;
+    tmp = to[j3+1];
+    to[j3+1] = to[i3];
+    to[i3] = tmp;
+    tmp = to[j3+2];
+    to[j3+2] = to[i3+1];
+    to[i3+1] = tmp;
+  }
+  to[n] = 0;
+}
+
+/* "shuffles" by reversing the sequence */
+void
+rshuffle(unsigned char *from, unsigned char *to, int n)
+{
+  unsigned char *ptr = from + n;
+
+  while (n-- > 0) {
+    *to++ = *ptr--;
+  }
+  *to = '\0';
+}
+
+static int ieven = 0;
+/* copies from from to from shuffling, ieven changed for threads */
+void
+wshuffle(unsigned char *from, unsigned char *to, int n, int wsiz, void *rand_state)
+{
+  int i,j, k, mm;
+  unsigned char tmp, *top;
+
+  memcpy((void *)to,(void *)from,n);
+
+  mm = n%wsiz;
+
+  if (ieven) {
+    for (k=0; k<(n-wsiz); k += wsiz) {
+      top = &to[k];
+      for (i=wsiz; i>0; i--) {
+	j = my_nrand(i, rand_state);
+	tmp = top[j];
+	top[j] = top[i-1];
+	top[i-1] = tmp;
+      }
+    }
+    top = &to[n-mm];
+    for (i=mm; i>0; i--) {
+      j = my_nrand(i, rand_state);
+      tmp = top[j];
+      top[j] = top[i-1];
+      top[i-1] = tmp;
+    }
+    ieven = 0;
+  }
+  else {
+    for (k=n; k>=wsiz; k -= wsiz) {
+      top = &to[k-wsiz];
+      for (i=wsiz; i>0; i--) {
+	j = my_nrand(i, rand_state);
+	tmp = top[j];
+	top[j] = top[i-1];
+	top[i-1] = tmp;
+      }
+    }
+    top = &to[0];
+    for (i=mm; i>0; i--) {
+      j = my_nrand(i, rand_state);
+      tmp = top[j];
+      top[j] = top[i-1];
+      top[i-1] = tmp;
+    }
+    ieven = 1;
+  }
+  to[n] = 0;
+}
+
+int
+sfn_cmp(int *q, int *s)
+{
+  if (*q == *s) return *q;
+  while (*q && *s) {
+    if (*q == *s) return *q;
+    else if (*q < *s) q++;
+    else if (*q > *s) s++;
+  }
+  return 0;
+}
+
+#ifndef MPI_SRC
+void
+revcomp(unsigned char *seq, int n, int *c_nt)
+{
+  unsigned char tmp;
+  int i, ni;
+
+  for (i=0, ni = n-1; i< n/2; i++,ni--) {
+    tmp = c_nt[seq[i]];
+    seq[i] = c_nt[seq[ni]];
+    seq[ni] = tmp;
+  }
+  if ((n%2)==1) {
+    i = n/2;
+    seq[i] = c_nt[seq[i]];
+  }
+  seq[n]=0;
+}
+#endif
+
+/* check to see whether this score (or a shuff score) should
+   be included in statistics */
+int samp_stats_idx (int *pre_nstats, int nstats, void *rand_state) {
+  int jstats = -1;
+
+  /* this code works when every score can be used for statistics
+     estimates, but fails for fasta/[t]fast[xy] where only a fraction
+     of scores are used */
+
+  if (*pre_nstats < MAX_STATS) {
+    jstats = (*pre_nstats)++;
+  }
+
+  /* here, the problem is that while we may have pre_nstats
+     possible samplings, in some cases (-M subsets, fasta,
+     [t]fast[xy] we don't have MAX_STATS samples yet.  Until we
+     have MAX_STATS, we want more.  But the stats_idx strategy
+     means that there may be additional samples in the buffers
+     that are not reflected in nstats.
+  */
+
+  else {
+#ifdef SAMP_STATS_LESS
+    /* now we have MAX_STATS samples
+       we want to sample 1/2 of 60K - 120K, 1/3 of 120K - 180K, etc */
+    /* check every 15K to see if we have gone past the next threshold */
+
+    /* pre_nstats cannot be incremented before the % to ensure
+       that stats_inc is incremented exactly at 60000, 120000, etc.
+       use ">=" in case increment comes later
+       tests suggest the first 60K are sampled about 25% more
+       than the rest
+    */
+    if (nstats < MAX_STATS) {
+      jstats = MAX_STATS - my_nrand(MAX_STATS - nstats, rand_state)-1;
+    }
+    else if (((*pre_nstats)++ % (MAX_STATS/4)) == 0 &&
+	     *pre_nstats >= stats_inc * MAX_STATS) {
+      stats_inc = (*pre_nstats / MAX_STATS) + 1;
+    }
+    if ((*pre_nstats % stats_inc) == 0) {
+      jstats = my_nrand(MAX_STATS, rand_state);
+    }
+#else
+    /* this sampling strategy calls my_nrand() for every
+       sequence > 60K, but provides a very uniform sampling */
+    jstats = my_nrand(++(*pre_nstats), rand_state);
+    if (jstats >= MAX_STATS) { jstats = -1;}
+#endif
+  }
+  return jstats;
+}
+
+/* **************************************************************** */
+/* build_link_data -- produces fasta file from m_msp->
+   (1) generate a temporary file name
+   (2) write out accessions \t expects to the temporary file
+   (3) run script against temporary file, producing fasta_file_expansion_file
+   (4) return fasta expansion filename for standard fasta openlib().
+
+   returns: the expansion library file name
+            **link_link_file_p is the name of the file with the data
+              that will be removed.
+*/
+/* **************************************************************** */
+char *
+build_link_data(char **link_lib_file_p,
+		struct mngmsg *m_msp, struct beststr **bestp_arr,
+		int debug) {
+  int i, status;
+  char tmp_line[MAX_SSTR];
+  char link_acc_file[MAX_STR];
+  int link_acc_fd;
+  char *link_lib_file;
+  char *link_lib_str;
+  char link_script[MAX_LSTR];
+  int link_lib_type;
+  char *bp, *link_bp;
+  FILE *link_fd=NULL;		/* file for link accessions */
+
+#ifndef UNIX
+  return NULL;
+#else
+  /* get two tmpfiles, one for accessions, one for library */
+  link_acc_file[0] = '\0';
+
+  if ((link_lib_file=(char *)calloc(MAX_STR,sizeof(char)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - [build_link_data] Cannot allocate link_lib_file",
+	    __FILE__, __LINE__);
+  }
+  link_lib_file[0] = '\0';
+
+  if ((bp=getenv("TMP_DIR"))!=NULL) {
+    strncpy(link_acc_file,bp,sizeof(link_acc_file));
+    link_acc_file[sizeof(link_acc_file)-1] = '\0';
+    SAFE_STRNCAT(link_acc_file,"/",sizeof(link_acc_file));
+  }
+
+  SAFE_STRNCAT(link_acc_file,"link_acc_XXXXXX",sizeof(link_acc_file));
+  link_acc_fd = mkstemp(link_acc_file);
+  strncpy(link_lib_file,link_acc_file,MAX_STR);
+  link_acc_file[sizeof(link_acc_file)-1] = '\0';
+  SAFE_STRNCAT(link_lib_file,".lib",MAX_STR);
+
+  /* write out accessions to link_acc_file */
+  if ((link_fd =fdopen(link_acc_fd,"w"))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - Cannot open link_acc_file: %s\n",
+	    __FILE__, __LINE__, link_acc_file);
+    goto no_links;
+  }
+
+  for (i=0; i<m_msp->nskip + m_msp->nshow; i++) {
+    if ((bp=strchr(bestp_arr[i]->mseq->bline,' '))!=NULL) {
+      *bp = '\0';
+    }
+    fprintf(link_fd,"%s\t%.3g\n",bestp_arr[i]->mseq->bline,bestp_arr[i]->rst.escore);
+    if (bp != NULL) *bp=' ';
+  }
+  fclose(link_fd);
+
+  /* build link_script link_acc_file > link_lib_file */
+  /* check for indirect */
+  link_bp = &m_msp->link_lname[0];
+  if (*link_bp == '!') {
+    link_bp++;
+  }
+  if (*link_bp == '@') {
+    link_bp++;
+  }
+  
+  /* remove library type */
+  if ((bp=strchr(link_bp,' '))!=NULL) {
+    *bp = '\0';
+    sscanf(bp+1,"%d",&link_lib_type);
+  }
+  else {
+    link_lib_type = 0;
+  }
+
+  strncpy(link_script,link_bp,sizeof(link_script));
+  link_script[sizeof(link_script)-1] = '\0';
+  SAFE_STRNCAT(link_script," ",sizeof(link_script));
+  SAFE_STRNCAT(link_script,link_acc_file,sizeof(link_script));
+  SAFE_STRNCAT(link_script," >",sizeof(link_script));
+  SAFE_STRNCAT(link_script,link_lib_file,sizeof(link_script));
+
+  /* un-edit m_msp->link_lname */
+  if (bp != NULL) *bp = ' ';
+
+  /* run link_script link_acc_file > link_lib_file */
+  status = system(link_script);
+  if (!debug) {
+#ifdef UNIX    
+    unlink(link_acc_file);
+#else
+    _unlink(link_acc_file);
+#endif
+  }
+
+  if (status == NO_FILE_EXIT) {	/* my specific return for no links */
+    goto no_links;
+  }
+
+  if (status < 0 || status == 127) {
+    fprintf(stderr,"*** error [%s:%d] - script: %s failed\n",
+	    __FILE__, __LINE__,link_script);
+    goto no_links;
+  }
+
+  if ((link_fd=fopen(link_lib_file,"r"))==NULL) {
+    goto no_links;
+  }
+  else fclose(link_fd);
+
+  if ((link_lib_str=(char *)calloc(MAX_STR,sizeof(char)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - [build_link_data] Cannot allocate link_lib_str",
+	    __FILE__, __LINE__);
+  }
+
+  /* build the file string (possibly @link_lib_file libtype) */
+  link_lib_str[0]='\0';
+  if (m_msp->link_lname[0] == '@') {
+    SAFE_STRNCAT(link_lib_str,"@",MAX_STR);
+  }
+  SAFE_STRNCAT(link_lib_str,link_lib_file,MAX_STR);
+  if (link_lib_type > 0) {
+    sprintf(tmp_line," %d",link_lib_type);
+    SAFE_STRNCAT(link_lib_str,tmp_line,MAX_STR);
+  }
+
+  *link_lib_file_p = link_lib_file;
+  return link_lib_str;
+
+ no_links:
+  free(link_lib_file);
+  *link_lib_file_p = NULL;
+  return NULL;
+#endif
+}
+
+/* **************************************************************** */
+/* build_lib_db -- produces fasta file from script
+   (1) generate a temporary file name lib_db_file
+   (2) run script producing data in lib_db_file
+
+   returns: the expansion library file name
+            **db_str_file_p is the name of the file with the data
+              that will be removed.
+*/
+/* **************************************************************** */
+char *
+build_lib_db(char *script_file) {
+  int i, status;
+  char tmp_line[MAX_SSTR];
+  char *lib_db_file, *lib_db_str;
+  char lib_db_script[MAX_LSTR];
+  int lib_db_indirect;
+  int lib_db_type;
+  int lib_db_str_len;
+  char *bp, *lib_bp;
+
+  if ((lib_db_file=(char *)calloc(MAX_STR,sizeof(char)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - [build_lib_db] Cannot allocate lib_db_file",
+	    __FILE__, __LINE__);
+    goto no_lib;
+  }
+
+  if ((bp=getenv("TMP_DIR"))!=NULL) {
+    strncpy(lib_db_file,bp,MAX_STR);
+    lib_db_file[sizeof(lib_db_file)-1] = '\0';
+    SAFE_STRNCAT(lib_db_file,"/",sizeof(lib_db_file));
+  }
+
+  SAFE_STRNCAT(lib_db_file,"lib_db_XXXXXX",MAX_STR);
+  mktemp(lib_db_file);
+  lib_db_str_len = strlen(lib_db_file)+1;
+
+  /* check for indirect */
+  lib_bp = script_file;
+  if (*lib_bp == '@') {
+    lib_bp++;
+    lib_db_str_len++;
+  }
+  /* remove library type */
+  if ((bp=strchr(lib_bp,' '))!=NULL) {
+    *bp = '\0';
+    sscanf(bp+1,"%d",&lib_db_type);
+    lib_db_str_len += (strlen(bp+1)+1);
+  }
+  else {
+    lib_db_type = 0;
+  }
+
+  strncpy(lib_db_script,lib_bp,sizeof(lib_db_script));
+  lib_db_script[sizeof(lib_db_script)-1] = '\0';
+  SAFE_STRNCAT(lib_db_script," >",sizeof(lib_db_script));
+  SAFE_STRNCAT(lib_db_script,lib_db_file,sizeof(lib_db_script));
+
+  if (bp != NULL) *bp = ' ';
+
+  /* run lib_db_script link_acc_file > lib_db_file */
+  status = system(lib_db_script);
+
+  if (status == NO_FILE_EXIT) {	/* my specific return for no links */
+    goto no_lib;
+  }
+
+  if (status < 0 || status == 127) {
+    fprintf(stderr,"*** error [%s:%d] - [build_lib_db] script: %s failed\n",
+	    __FILE__, __LINE__, lib_db_script);
+    goto no_lib;
+  }
+
+  /* build the file string (possibly @lib_db_str libtype) */
+  if ((lib_db_str=calloc(lib_db_str_len+1,sizeof(char)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - [build_lib_db] cannot allocate lib_db_str[%d]\n",
+	    __FILE__, __LINE__, lib_db_str_len+1);
+    goto no_lib;
+  }
+  lib_db_str[0]='\0';
+  if (*script_file == '@') {
+    SAFE_STRNCAT(lib_db_str,"@",MAX_STR);
+  }
+  SAFE_STRNCAT(lib_db_str,lib_db_file,MAX_STR);
+  if (lib_db_type > 0) {
+    sprintf(tmp_line," %d",lib_db_type);
+    SAFE_STRNCAT(lib_db_str,tmp_line,MAX_STR);
+  }
+
+  return lib_db_str;
+
+ no_lib:
+  return NULL;
+}
+
+/* used to temporarily allocate annotation array in next_annot_entry()*/
+struct annot_mstr {
+  int max_annot;
+  struct annot_entry *tmp_arr_p;
+};
+
+/* init_tmp_annot_mstr(size) intializes the structure used to track annots  */
+int
+init_tmp_annot(struct annot_mstr *this, int size) {
+  struct annot_entry *tmp_ann_astr;
+
+  /* only reset if array is NULL */
+  if (this->tmp_arr_p == NULL || this->max_annot <= 0) {
+    this->max_annot = 32;
+    if ((this->tmp_arr_p=(struct annot_entry *)calloc(this->max_annot, sizeof(struct annot_entry)))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot allocate annot_entry[%d]\n",
+		__FILE__,__LINE__,this->max_annot);
+	return 0;
+    }
+  }
+  return 1;
+}
+
+int
+update_tmp_annot(struct annot_mstr *this) {
+
+  this->max_annot += (this->max_annot/2);
+  if ((this->tmp_arr_p= (struct annot_entry *)realloc(this->tmp_arr_p, this->max_annot*sizeof(struct annot_entry)))==NULL) {
+    fprintf(stderr,"[*** error [%s:%d] - cannot reallocate tmp_ann_astr[%d]\n",
+	    __FILE__, __LINE__, this->max_annot);
+    return 0;
+  }
+  return 1;
+}
+
+struct annot_str *
+next_annot_entry(FILE *annot_fd, char *tmp_line, int n_tmp_line,
+		 struct annot_str *annot_p,
+		 struct annot_mstr *mtmp_annot_p,
+		 struct mngmsg *m_msp, int target);
+
+/* **************************************************************** */
+/* get_annot_list -- produces fasta file from sname
+   if sname[0]=='<', read the file directly, goto (4)
+   if sname[0]=='!', run a script
+   (1) generate a temporary file name
+   (2) write out list of blines
+   (3) run m_msp->annot1_sname[] script against temporary file, producing table of annotations
+
+   (4) read in the annotations and merge them into beststr
+   (5) return number of annotations
+*/
+/* **************************************************************** */
+
+int
+get_annot_list(char *sname, struct mngmsg *m_msp, struct beststr **bestp_arr, int nbest,
+	       int target, int debug) {
+  int i, status;
+  long l_offset;
+  char tmp_line[MAX_STR];
+  char annot_bline_file[MAX_STR];
+  int annot_bline_fd;
+  char *annot_descr_file;
+  char annot_script[MAX_LSTR];
+  struct annot_str *annot_p;
+  char *bp;
+  int annot_seq_cnt;
+  FILE *annot_fd=NULL;		/* file for annot accessions */
+  struct annot_mstr mtmp_annot;
+
+#ifndef UNIX
+  return 0;
+#else
+
+  if (sname[0] == '!') {
+
+    /* get two tmpfiles, one for bline, one for returned annotations
+       (but it would make more sense to use popen() to get the
+       annotations back
+    */
+
+    annot_bline_file[0] = '\0';
+
+    if ((annot_descr_file=(char *)calloc(MAX_STR,sizeof(char)))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - [get_annot_list] Cannot allocate annot_file",
+	      __FILE__, __LINE__);
+    }
+    annot_descr_file[0] = '\0';
+
+    if ((bp=getenv("TMP_DIR"))!=NULL) {
+      strncpy(annot_bline_file,bp,sizeof(annot_bline_file));
+      annot_bline_file[sizeof(annot_bline_file)-1] = '\0';
+      SAFE_STRNCAT(annot_bline_file,"/",sizeof(annot_bline_file));
+    }
+
+    SAFE_STRNCAT(annot_bline_file,"annot_bline_XXXXXX",sizeof(annot_bline_file));
+    annot_bline_fd = mkstemp(annot_bline_file);
+    strncpy(annot_descr_file,annot_bline_file,MAX_STR);
+    annot_bline_file[sizeof(annot_bline_file)-1] = '\0';
+    SAFE_STRNCAT(annot_descr_file,".annot",MAX_STR);
+
+    /* write out accessions to annot_bline_file */
+    if ((annot_fd =fdopen(annot_bline_fd,"w"))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - Cannot open annot_bline_file: %s\n",__FILE__, __LINE__, annot_bline_file);
+      goto no_annots;
+    }
+
+    for (i=0; i<nbest; i++) {
+      if (bestp_arr[i]->mseq->annot_req_flag) {	continue; }
+      if ((strlen(bestp_arr[i]->mseq->bline) > DESCR_OFFSET) &&
+	  (bp=strchr(bestp_arr[i]->mseq->bline+DESCR_OFFSET,' '))!=NULL) {*bp = '\0';}
+      else {bp = NULL;}
+      /* provide sequence length with offset, but only if offset is positive */
+      l_offset = bestp_arr[i]->seq->l_offset+bestp_arr[i]->seq->l_off -1;
+      if (l_offset < 0) { l_offset = 0;}
+      fprintf(annot_fd,"%s\t%ld\n",bestp_arr[i]->mseq->bline,
+	      l_offset + bestp_arr[i]->seq->n1);
+      if (bp != NULL) *bp=' ';
+      bestp_arr[i]->mseq->annot_req_flag = 1;
+    }
+    fclose(annot_fd);
+
+    subs_env(annot_script, sname+1, sizeof(annot_script));
+    annot_script[sizeof(annot_script)-1] = '\0';
+    SAFE_STRNCAT(annot_script," ",sizeof(annot_script));
+    SAFE_STRNCAT(annot_script,annot_bline_file,sizeof(annot_script));
+    SAFE_STRNCAT(annot_script," >",sizeof(annot_script));
+    SAFE_STRNCAT(annot_script,annot_descr_file,sizeof(annot_script));
+
+    /* run annot_script annot_bline_file > annot_descr_file */
+    status = system(annot_script);
+    if (!debug) {
+#ifdef UNIX
+      unlink(annot_bline_file);
+#else
+      _unlink(annot_bline_file);
+#endif
+    }
+
+    if (status == NO_FILE_EXIT) {	/* my specific return for no annots */
+      goto no_annots;
+    }
+
+    if (status < 0 || status == 127) {
+      fprintf(stderr,"*** error [%s:%d] - script: %s failed\n",
+	      __FILE__, __LINE__, annot_script);
+      goto no_annots;
+    }
+  }
+  else if (sname[0] == '<') {
+    annot_descr_file = sname+1;
+  }
+  else {
+    fprintf(stderr,"*** error [%s:%d] - %s not script (!) or file (<)\n",__FILE__, __LINE__, sname);
+    goto no_annots;
+  }
+
+  /* read annotation file */
+
+  if ((annot_fd=fopen(annot_descr_file,"r"))==NULL) {
+    goto no_annots;
+  }
+
+  /* be sure to ask for annotation once */
+  for (i=0; i<nbest; i++) {
+    bestp_arr[i]->mseq->annot_req_flag = 1;
+  }
+  /* we have some annotations */
+  /* the annotation script MUST return the annotations ordered as in annot_descr_file,
+     in "fasta" form:
+
+     >bline_descr
+     pos<tab>label<tab>value?<tab>comment (which is not read in this version)
+     1 *
+     11 V N
+  */
+
+  /* now read the annotation/variant file */
+
+  /* read #comments, =annot_defs at beginning of file */
+  tmp_line[0] = '#';
+  while (tmp_line[0] == '#' || tmp_line[0] == '=') {
+    if (tmp_line[0] == '=') add_annot_def(m_msp, tmp_line+1,1);
+    if (fgets(tmp_line, sizeof(tmp_line), annot_fd)==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - premature annotation file end (%s)\n",
+	      __FILE__,__LINE__, annot_descr_file);
+      goto no_annots;
+    }
+  }
+
+  /* set mtmp_annot to be initialized */
+  mtmp_annot.tmp_arr_p = NULL;
+  mtmp_annot.max_annot = 0;
+
+  annot_seq_cnt = 0;
+
+  /* now read in the annotations, but only the first time if asked multiple times */
+  for (i=0; i<nbest; i++) {
+    if (!bestp_arr[i]->mseq->annot_req_flag) {
+      continue;
+    }
+    bestp_arr[i]->mseq->annot_req_flag = 0;
+
+    if ((bp=strchr(tmp_line,'\n'))!=NULL) *bp = '\0';
+    if ((bp=strchr(tmp_line,'\t'))!=NULL) *bp = '\0';
+    if (tmp_line[0] != '>' || strncmp(&tmp_line[1], bestp_arr[i]->mseq->bline, strlen(&tmp_line[1])) != 0) {
+      fprintf(stderr,"*** error [%s:%d] - %s description mismatch (%s:%s)\n",
+	      __FILE__,__LINE__,annot_descr_file, tmp_line, bestp_arr[i]->mseq->bline);
+      goto no_annots;
+    }
+
+    annot_p = next_annot_entry(annot_fd, tmp_line, sizeof(tmp_line), bestp_arr[i]->seq->annot_p, &mtmp_annot, m_msp, target);
+
+    if (annot_p) {
+      bestp_arr[i]->seq->annot_p = annot_p;
+      s_annot_to_aa1a(bestp_arr[i]->seq->l_offset + bestp_arr[i]->seq->l_off - 1,
+		      bestp_arr[i]->seq->n1, annot_p,m_msp->ann_arr, bestp_arr[i]->mseq->libstr);
+      annot_seq_cnt++;
+      mtmp_annot.tmp_arr_p = NULL;
+    }
+    else {
+      if (bestp_arr[i]->seq->annot_p) {
+	bestp_arr[i]->seq->annot_p->n_annot = 0;
+      }
+    }
+  }
+
+  if (mtmp_annot.tmp_arr_p) free(mtmp_annot.tmp_arr_p);
+
+  fclose(annot_fd);
+  if (sname[0]=='!') {
+    if (!debug) {
+#ifdef UNIX
+      unlink(annot_descr_file);
+#else
+      _unlink(annot_descr_file);
+#endif
+    }
+    free(annot_descr_file);
+  }
+  return annot_seq_cnt;
+
+ no_annots:
+  for (i=0; i<nbest; i++) {
+    if (bestp_arr[i]->seq->annot_p) {
+      if (bestp_arr[i]->seq->annot_p->n_annot > 0) {
+	bestp_arr[i]->seq->annot_p->n_annot = 0;
+      }
+    }
+  }
+  if (sname[0] == '!') free(annot_descr_file);
+  return -1;
+#endif
+}
+
+void sort_annots(struct annot_entry **s_annot, int n_annot)
+{
+  int gap, i, j, k;
+  struct annot_entry *tmp;
+  int v;
+  int incs[6] = { 112, 48, 21, 7, 3, 1 };
+
+  for ( k = 0; k < 6; k++) {
+    gap = incs[k];
+    for (i=gap; i < n_annot; i++) {
+      tmp = s_annot[i];
+      j = i;
+      v = s_annot[i]->pos;
+      while ( j >= gap && s_annot[j-gap]->pos > v) {
+	s_annot[j] = s_annot[j - gap];
+	j -= gap;
+      }
+      s_annot[j] = tmp;
+    }
+  }
+}
+
+struct annot_str *
+next_annot_entry(FILE *annot_fd, char *tmp_line, int n_tmp_line, struct annot_str *annot_p,
+		 struct annot_mstr *mtmp_annot_p, struct mngmsg *m_msp, int target) {
+
+  char ctmp_label, ctmp_value, tmp_comment[MAX_STR], annot_acc[MAX_STR];
+  char *bp;
+  int f_pos, f_end;
+  int i_ann, l_doms, r_doms;
+  int n_annot = 0;
+
+  struct annot_entry *tmp_ann_entry_arr, **s_tmp_ann_entry_arr;
+
+  SAFE_STRNCPY(annot_acc, tmp_line, sizeof(annot_acc));
+
+  if (init_tmp_annot(mtmp_annot_p, 32)==0) return NULL;
+
+  tmp_ann_entry_arr = mtmp_annot_p->tmp_arr_p;
+
+  /* read through each annotation in file */
+  while (fgets(tmp_line, n_tmp_line, annot_fd)!=NULL ) {
+    if (tmp_line[0] == '>') goto next_bline;	/* start of new annotation */
+    if (tmp_line[0] == '#') continue;		/* ignore comments */
+    if (tmp_line[0] == '=') {	/* symbol definition */
+      add_annot_def(m_msp, tmp_line+1,1);
+      continue;
+    }
+
+    if (n_annot >= mtmp_annot_p->max_annot - 1) {
+      /* try to expand annotation array */
+      if (update_tmp_annot(mtmp_annot_p)==0) {
+	return NULL;
+      }
+      tmp_ann_entry_arr = mtmp_annot_p->tmp_arr_p;
+    }
+
+    /* sscanf cannot give strings with blanks */
+    /* sscanf(tmp_line,"%d %c %c %s", &f_pos, &ctmp_label, &ctmp_value, tmp_comment); */
+    tmp_comment[0] = '\0';
+    if ((bp=strchr(tmp_line,'\r')) || (bp=strchr(tmp_line,'\n'))) *bp='\0';	/* clean up EOL */
+    if ((bp=strchr(tmp_line,'\t'))!=NULL) {	/* fields MUST be tab delimited */
+      f_pos=atoi(tmp_line) - 1;	/* get first field -- f_pos, converted to 0-offset  */
+      ctmp_label = bp[1];	/* get second field -- ctmp_label */
+      if ((bp=strchr(bp+1,'\t'))!=NULL) {	/* next field could be f_end or ctmp_value */
+	if (ctmp_label == '-') { f_end = atoi(bp+1) -1; ctmp_value = '\0';}
+	else {ctmp_value = bp[1]; f_end = f_pos;} 	/* have variant, not coordinate */
+	if ((bp=strchr(bp+1,'\t'))!=NULL) {		/* if last <tab>, get comment */
+	  strncpy(tmp_comment,bp+1,sizeof(tmp_comment));
+	}
+      }
+    }
+    else {	/* no tab */
+      continue;
+    }
+
+    tmp_ann_entry_arr[n_annot].pos = f_pos;
+    tmp_ann_entry_arr[n_annot].end = f_end;
+    tmp_ann_entry_arr[n_annot].label=ctmp_label;
+    tmp_ann_entry_arr[n_annot].value=ctmp_value;
+    tmp_ann_entry_arr[n_annot].comment = NULL;
+    tmp_ann_entry_arr[n_annot].target = target;
+    tmp_ann_entry_arr[n_annot].start = NULL;
+
+    if (tmp_comment[0]) {
+      if ((tmp_ann_entry_arr[n_annot].comment=(char *)calloc(strlen(tmp_comment)+1,sizeof(char)))!=NULL) {
+	strncpy(tmp_ann_entry_arr[n_annot].comment,tmp_comment,strlen(tmp_comment));
+      }
+    }
+    if (ctmp_label== 'V') {
+      /* map the .value from ascii to encoded residue */
+      /* this must be lascii, not qascii, because the script
+	 describes the library, not the query, and for FASTX/TFASTX,
+	 those are different */
+      tmp_ann_entry_arr[n_annot].value = lascii[tmp_ann_entry_arr[n_annot].value];
+    }
+    else if (ctmp_label == '[') {
+      i_ann = add_annot_char(m_msp->ann_arr, ctmp_label);
+      if (i_ann > 0) {
+	qascii[ctmp_label] = NANN + i_ann;
+	m_msp->ann_arr_def[i_ann] = NULL;
+      }
+    }
+    else if (ctmp_label == ']') {
+      i_ann = add_annot_char(m_msp->ann_arr, ctmp_label);
+      if (i_ann > 0) {
+	qascii[ctmp_label] = NANN + i_ann;
+	m_msp->ann_arr_def[i_ann] = NULL;
+      }
+    }
+    else if (ctmp_label == '-') {	/* if ctmp_label == '-', have f_end, which must be added with ']' */
+      /* make sure start/stop characters are in annotation alphabet */
+      i_ann = add_annot_char(m_msp->ann_arr, '[');
+      if (i_ann > 0) {
+	qascii['['] = NANN + i_ann;
+	m_msp->ann_arr_def[i_ann] = NULL;
+      }
+
+      tmp_ann_entry_arr[n_annot].label='[';
+      n_annot++;
+
+      if (n_annot >= mtmp_annot_p->max_annot - 1) {
+	if (update_tmp_annot(mtmp_annot_p)==0) {
+	  return NULL;
+	}
+	tmp_ann_entry_arr = mtmp_annot_p->tmp_arr_p;
+      }
+
+      /* only required for start - stop annotations; requires sort
+	 later (which is not currently used */
+      tmp_ann_entry_arr[n_annot].pos = f_end;
+      tmp_ann_entry_arr[n_annot].end = f_end;
+      tmp_ann_entry_arr[n_annot].label=']';
+      tmp_ann_entry_arr[n_annot].value=ctmp_value;
+      tmp_ann_entry_arr[n_annot].comment = NULL;
+      tmp_ann_entry_arr[n_annot].target = target;
+      tmp_ann_entry_arr[n_annot].start = &tmp_ann_entry_arr[n_annot-1];
+
+      i_ann = add_annot_char(m_msp->ann_arr, ']');
+      if (i_ann > 0) {
+	qascii[']'] = NANN + i_ann;
+	m_msp->ann_arr_def[i_ann] = NULL;
+      }
+    }
+    else if ((i_ann = add_annot_char(m_msp->ann_arr, ctmp_label)) > 0) {
+      m_msp->ann_arr_def[i_ann] = NULL;
+      qascii[ctmp_label] = NANN + i_ann;
+    }
+    n_annot++;
+  }
+
+ next_bline:
+  if (n_annot) {  /* if we have annotations, save them and set tmp_ann_entry_arr = NULL */
+    tmp_ann_entry_arr = (struct annot_entry *)realloc(tmp_ann_entry_arr, (n_annot+1)*sizeof(struct annot_entry));
+
+    if ((s_tmp_ann_entry_arr = (struct annot_entry **)calloc((n_annot+1),sizeof(struct annot_entry *)))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot alloc s_tmp_ann_entry_arr[%d]",
+	      __FILE__,__LINE__, n_annot+1);
+      exit(1);
+    }
+
+    /* pair every domain start/stop */
+    /* (1) count number of domains/check for consistency */
+    l_doms = r_doms = 0;
+    for (i_ann=0; i_ann < n_annot; i_ann++) {
+      if (tmp_ann_entry_arr[i_ann].label == '[') l_doms++;
+      if (tmp_ann_entry_arr[i_ann].label == ']') r_doms++;
+    }
+    if (l_doms != r_doms) {
+      fprintf(stderr,"*** error [%s:%d] - unpaired domains: %s %d != %d\n",
+	      __FILE__,__LINE__, annot_acc, l_doms, r_doms);
+#ifdef DEBUG      
+      for (i_ann=0; i_ann < n_annot; i_ann++) {
+	if (tmp_ann_entry_arr[i_ann].label == '[') 
+	  fprintf(stderr, "%ld %c %s\n",tmp_ann_entry_arr[i_ann].pos,tmp_ann_entry_arr[i_ann].label,tmp_ann_entry_arr[i_ann].comment);
+	if (tmp_ann_entry_arr[i_ann].label == ']')
+	  fprintf(stderr, "%ld %c\n",tmp_ann_entry_arr[i_ann].pos,tmp_ann_entry_arr[i_ann].label);
+      }
+#endif
+    }
+    else if (l_doms > 0) {
+      if ((tmp_domain_entry_arr = (struct annot_entry *)calloc((l_doms+1),sizeof(struct annot_entry)))==NULL) {
+	fprintf(stderr,"*** error [%s:%d] - cannot alloc s_tmp_ann_entry_arr[%d]",
+		__FILE__,__LINE__, l_doms+1);
+      }
+      else {
+	l_doms = 0;
+	for (i_ann=0; i_ann < n_annot+1; i_ann++) {
+	  if (tmp_ann_entry_arr[i_ann].label == '[') {
+	    tmp_domain_entry_arr[l_doms].pos = tmp_ann_entry_arr[i_ann].pos;
+	    tmp_domain_entry_arr[l_doms].label = '-';
+	    tmp_domain_entry_arr[l_doms].comment = tmp_ann_entry_arr[i_ann].comment;
+	  }
+	  else if (tmp_ann_entry_arr[i_ann].label == ']') {
+	    tmp_domain_entry_arr[l_doms].end = tmp_ann_entry_arr[i_ann].pos;
+	    l_doms++;
+	  }
+	}      
+      }
+    }
+
+    for (i_ann=0; i_ann < n_annot+1; i_ann++) {
+      s_tmp_ann_entry_arr[i_ann] = &tmp_ann_entry_arr[i_ann];
+    }
+
+    sort_annots(s_tmp_ann_entry_arr,n_annot);
+
+    /* now allocate an annot_p if necessary, and link tmp_ann_entry_arr to it */
+    if (annot_p || (annot_p = calloc(1,sizeof(struct annot_str)))!=NULL) {
+      annot_p->annot_arr_p = tmp_ann_entry_arr;
+      annot_p->s_annot_arr_p = s_tmp_ann_entry_arr;
+      annot_p->n_annot = n_annot;
+      annot_p->n_domains = l_doms;
+      /* set to NULL to re-initialize */
+    }
+  }
+  else {
+    annot_p = NULL;
+  }
+  return annot_p;
+}
+
+
+/* **************************************************************** */
+/* add_annot_char(ann_arr, ctmp_label) --
+
+   (1) add annotation character to ann_arr if not present
+   (2) return i_ann if added
+*/
+/* **************************************************************** */
+
+int
+add_annot_char(unsigned char *ann_arr, char ctmp_label) {
+  int i_ann;
+
+  if (ann_arr[0] == '\0') {
+    ann_arr[0] = ' '; ann_arr[1] = '\0';
+  }
+
+  /* check to see if already there? */
+  if (strchr((char *)ann_arr,ctmp_label)==NULL) {
+    /* check for room for another character */
+    if (strlen((char *)ann_arr) >= MAX_FN) {
+      fprintf(stderr,"*** error [%s:%d] -  too many annotation characters: len(%s) + %c > %d\n",
+	      __FILE__, __LINE__, ann_arr, ctmp_label, MAX_FN-1);
+      return 0;
+    }
+    else {
+      ann_arr[i_ann=strlen((char *)ann_arr)] = ctmp_label;      /* add the character */
+      ann_arr[i_ann+1] = '\0';	      /* guarantee null termination */
+      return i_ann;
+    }
+  }
+  else {
+    return 0;
+  }
+}
+
+/* **************************************************************** */
+/* get_annot -- produces fasta file from m_msp->sname script
+   (modified 20-Sept-2012 to not use intermediate file)
+
+   # (1) generate a temporary file name
+   # (2) write out one bline (or portion that include accession)
+   # (3) run sname[] script against temporary file, producing table of annotations
+   (1) run script bline_id
+   (4) read in the annotations and put them in struct annot_entry;
+   (5) modify *annot_p to point to structure
+   (6) return number of annotations
+*/
+/* **************************************************************** */
+
+int
+get_annot(char *sname, struct mngmsg *m_msp, char *bline, long offset, int n1, struct annot_str **annot_p,
+	  int target, int debug) {
+
+  char tmp_line[MAX_STR];
+  FILE *annot_data_fd;
+  char bline_descr[MAX_STR];
+  char annot_data_file[MAX_LSTR];
+  char annot_script[MAX_LSTR];
+  long q_offset;
+
+  char *bp;
+  FILE *annot_fd=NULL;		/* file for annot accessions */
+  struct annot_mstr mtmp_annot;
+
+#ifndef UNIX
+  return 0;
+#else
+
+  if (sname[0] == '!') {
+    /* popen implementation */
+
+    annot_data_file[0] = '\0';
+
+    if (bline[0] == '>') {
+      SAFE_STRNCPY(bline_descr, bline+1,sizeof(bline_descr));
+    }
+    else {
+      SAFE_STRNCPY(bline_descr, bline,sizeof(bline_descr));
+    }
+    if ((strlen(bline_descr) > DESCR_OFFSET) && 
+	(bp=strchr(bline_descr+DESCR_OFFSET,' '))!=NULL) {*bp = '\0';}
+    else {bp = NULL;}
+
+    q_offset = m_msp->q_offset + m_msp->q_off - 1;
+    if (q_offset < 0) { q_offset = 0;}
+    sprintf(annot_script,"%s \"%s\" %ld",sname+1, bline_descr,q_offset+m_msp->n0);
+    annot_script[sizeof(annot_script)-1] = '\0';
+
+    annot_fd = popen(annot_script,"r");
+  }
+  else if (sname[0] == '<') {
+    SAFE_STRNCPY(annot_data_file,sname+1,sizeof(annot_data_file));
+    annot_fd=fopen(annot_data_file,"r");
+  }
+  else {
+    fprintf(stderr,"*** error [%s:%d] - %s not script (!) or file (<)\n",__FILE__, __LINE__, sname);
+    goto no_annots;
+  }
+
+  if (!annot_fd) {
+    goto no_annots;
+  }
+  else {	/* read the annotations into the array */
+
+    /* read #comments, =annot_defs at beginning of file */
+    tmp_line[0] = '#';
+    while (tmp_line[0] == '#' || tmp_line[0] == '=') {
+      if (tmp_line[0] == '=') add_annot_def(m_msp, tmp_line+1,1);
+      if (fgets(tmp_line, sizeof(tmp_line), annot_fd)==NULL) {
+	fprintf(stderr,"*** error [%s:%d] - premature annotation file end (%s)\n",
+		__FILE__,__LINE__, annot_data_file);
+	goto no_annots;
+      }
+    }
+
+    /* set mtmp_annot to be initialized */
+    mtmp_annot.tmp_arr_p = NULL;
+    mtmp_annot.max_annot = 0;
+
+    /* strlen(&tmp_line[1])-1 to remove '>' and beginning and '\n' at end */
+    if (tmp_line[0] != '>') {
+      fprintf(stderr,"*** error [%s:%d] - no %s description: [%s]\n",
+	      __FILE__,__LINE__,annot_data_file, tmp_line);
+      goto no_annots;
+    }
+
+    *annot_p = next_annot_entry(annot_fd, tmp_line, sizeof(tmp_line), *annot_p, &mtmp_annot, m_msp, target);
+
+    if (sname[0] == '!') {
+      pclose(annot_fd);
+    }
+    else {
+      fclose(annot_fd);
+    }
+
+    /* now allocate an annot_p if necessary, and link tmp_ann_entry_arr to it */
+    if (*annot_p) {
+      s_annot_to_aa1a(offset, n1, (*annot_p),m_msp->ann_arr,"get_annot");
+      return (*annot_p)->n_annot;
+    }
+    else {
+      if (mtmp_annot.tmp_arr_p) free(mtmp_annot.tmp_arr_p);
+      return 0;
+    }
+  }
+
+ no_annots:
+  return -1;
+#endif
+}
+
+/* s_annot_to_aa1a -- takes an annot_entry[] and converts it to an *aa1_ann
+ */
+void
+s_annot_to_aa1a(long offset, int n1, struct annot_str *annot_p, unsigned char *ann_arr, char *tmp_line) {
+  unsigned char *aa1a_tmp;
+  int i, ic, n_annot;
+  struct annot_entry *this_annot;
+  char *bp;
+
+  if ((aa1a_tmp = (unsigned char *)calloc(n1+2,sizeof(char)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - cannot allocate aa1a_ann[%d] array\n",
+	    __FILE__, __LINE__, n1);
+    return;
+  }
+
+  if (offset < 0) offset++;
+
+  for (i=0; i < annot_p->n_annot; i++) {
+    this_annot = &annot_p->annot_arr_p[i];
+    /* skip VAR labels */
+    if (this_annot->label == 'V') { continue; }
+    if (this_annot->label == '-') {
+      aa1a_tmp[this_annot->pos]=qascii['['] - NANN;
+      aa1a_tmp[this_annot->end]=qascii[']'] - NANN;
+      continue;
+    }
+    if (strchr((char *)ann_arr, this_annot->label)==NULL) {continue;}
+    if (this_annot->pos - offset < n1) {
+      if (this_annot->pos >= offset) {	/* not an error, but annotation must be in range */
+	aa1a_tmp[this_annot->pos - offset]=qascii[this_annot->label] - NANN;
+      }
+    }
+    else {
+      fprintf(stderr, "*** error [%s:%d] - this_annot->pos:[%ld - %ld] out of range: %d : %s\n",
+	      __FILE__, __LINE__, this_annot->pos,offset, n1, tmp_line);
+    }
+  }
+  annot_p->aa1_ann = aa1a_tmp;
+}
+
+/* save_best captures much of the complexity of saving the best scores
+   and appropriately sampling the scores for statistical analysis. It
+   does the following:
+
+   (1) update s_info counts for functions like fasta/x/y that don't
+       optimize every score
+
+   (2) for every result in the buffer:
+       (a) decide if it should be used for statistical sampling
+       (b) if the number of samples > MAX_STATS, then run
+           process_hist() and update all the zscores
+       (c) reset everything for next sequence
+
+   (3) must ensure that -BIGNUM are never in best[]
+
+*/
+
+#include "thr_buf_structs.h"
+#ifndef PCOMPLIB
+#define RESULTS_BUF reader_buf
+#define XTERNAL
+#include "thr_bufs2.h"
+#else
+#define RESULTS_BUF worker_buf
+#include "pcomp_bufs.h"
+#endif
+
+extern char *prog_func;		/* function label */
+extern int fa_max_workers;
+extern struct buf_head *lib_buf2_list;
+#ifdef DEBUG
+void check_rbuf(struct buf_head *cur_buf);
+#endif
+extern void get_rbuf(struct buf_head **lib_buf, int max_work_buf);
+extern void put_rbuf(struct buf_head *lib_buf, int max_work_buf);
+extern void wait_rbuf(int max_work_buf);
+extern void rbuf_done(int nthreads);
+extern void put_rbuf_done(int nthreads, struct buf_head *lib_buf,
+			  int max_work_buf);
+extern int
+process_hist(struct stat_str *sptr, int nstats,
+	     const struct mngmsg *m_msg,
+	     struct pstruct *ppst,
+	     struct hist_str *hist, void **pstat_void, struct score_count_s *s_info, int do_hist);
+
+extern void addhistz(double, struct hist_str *); /* scaleswn.c */
+void selectbestz(struct beststr **, int, int );
+extern double find_z(int score, double escore, int length, double comp, void *);
+extern double zs_to_E(double zs,int n1, int dnaseq, long entries, struct db_str db);
+extern struct beststr **bestp_arr;	/* array of pointers */
+extern int nbest;
+extern int nstats, nqstats, nrstats, pre_nstats, kstats, shuff_tot, sstats;
+extern double zbestcut;	/* cut off for best z-score */
+extern int bestfull;	/* index for selectbest() */
+extern int stats_done;	/* flag for z-value processing */
+extern void *rand_state;
+extern struct stat_str *stats; /* array of scores for statistics from real
+			   (or shuffled) sequences*/
+extern struct stat_str *qstats;	/* array of scores for shuffled query stats */
+extern struct stat_str *rstats;	/* array of scores from shuffled library */
+
+/* in the current version (fasta_35_01) save_best is used by both
+   threaded and unthreaded versions */
+
+#define COPY_RST_P(d,s) 		\
+{ d->rst.score[0] = s->rst.score[0];	\
+  d->rst.score[1] = s->rst.score[1];	\
+  d->rst.score[2] = s->rst.score[2];	\
+  d->rst.valid_stat = s->rst.valid_stat; \
+  d->rst.comp = s->rst.comp;		\
+  d->rst.H = s->rst.H;			\
+  d->rst.escore = s->rst.escore;	\
+  d->rst.segnum = s->rst.segnum;	\
+  d->rst.seglen = s->rst.seglen;	\
+}
+
+void
+save_best(struct buf_head *lib_bhead_p,
+	  const struct mngmsg *m_msp, struct pstruct *ppst,
+	  struct db_str *ldb, FILE *fdata,
+	  struct hist_str *histp, void **pstat_voidp,
+	  struct score_count_s *s_info)
+{
+  double zscore;
+  int i_score;
+  struct beststr *bbp;
+  struct buf2_data_s *rbuf_dp, *lib_buf2_dp;
+  struct buf2_res_s *rbuf_rp, *lib_buf2_rp;
+  int i, t_best, t_rbest, t_qrbest, tm_best, t_n1, sc_ix;
+  int t_valid_stat, tr_valid_stat, use_shuff, zsflag_save;
+  double e_score, tm_escore, t_rescore, t_qrescore;
+  int buf2_cnt;
+
+  if (!lib_bhead_p->hdr.have_results) return;
+  if ((buf2_cnt=lib_bhead_p->hdr.buf2_cnt) <= 0) return;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+
+  shuff_tot += lib_bhead_p->hdr.shuff_cnt;
+  s_info->s_cnt[0] += lib_bhead_p->s_cnt_info.s_cnt[0];
+  s_info->s_cnt[1] += lib_bhead_p->s_cnt_info.s_cnt[1];
+  s_info->s_cnt[2] += lib_bhead_p->s_cnt_info.s_cnt[2];
+  s_info->tot_scores += lib_bhead_p->s_cnt_info.tot_scores;;
+
+  sc_ix = ppst->score_ix;
+
+  t_best = t_rbest = t_qrbest = -BIGNUM;
+  tm_escore = t_rescore = t_qrescore = FLT_MAX;
+  t_valid_stat = tr_valid_stat = 0;
+  if (ppst->zsflag >= 10 && ppst->zsflag < 20) { use_shuff = 1;}
+  else { use_shuff = 0;}
+
+#ifdef DEBUG
+  if (fdata) {
+    fprintf(fdata,">save_best: %d\n",buf2_cnt);
+  }
+#endif
+
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  while (buf2_cnt--) { /* count down the number of results */
+    rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
+    rbuf_dp = lib_buf2_dp++;	/* step through the data buffer */
+
+    /* perhaps should use explicit flag to indicate no score */
+    if (rbuf_rp->rst.score[0] == -BIGNUM) continue;
+
+    /* i_score: current raw sorting score */
+    i_score = rbuf_rp->rst.score[sc_ix];
+    /* e_score, current escore */
+    e_score = rbuf_rp->rst.escore;
+
+    /* this should be done in the thread, and a sorted set of indexes
+       should be produced by the thread, so we just go down the list
+       to the zscore threshold */
+    zscore = (double)i_score;
+    if (stats_done) {
+      zscore=find_z(i_score, e_score, rbuf_dp->seq->n1,(double)rbuf_rp->rst.comp,
+			  *pstat_voidp);
+    }
+
+    /* we have complex logic to decide:
+       (a) for multiframe results, which is the best
+       (b) information about valid stats
+       we should simply return a stats array where all this is figured
+       out in the thread.
+     */
+    t_n1 = rbuf_dp->seq->n1;
+    if (i_score > t_best) tm_best = t_best = i_score;
+    if (e_score < tm_escore) tm_escore = e_score;
+    if (rbuf_rp->rst.valid_stat > t_valid_stat) {
+      t_valid_stat = 1;
+    }
+
+    /* this stuff happens only for fasts/fastm/fastf
+       again, the t_qrbest stuff should be done in the thread
+       rather than check for every hit, run through the loop
+       only if necessary.
+     */
+    if (m_msp->qshuffle) {
+      if (rbuf_rp->qr_score > t_qrbest)
+	t_qrbest = rbuf_rp->qr_score;
+      if (rbuf_rp->qr_escore < t_qrescore)
+	t_qrescore = rbuf_rp->qr_escore;
+
+      if (rbuf_dp->frame == m_msp->nitt1 && t_qrbest > 0 && nqstats < m_msp->shuff_max) {
+	qstats[nqstats].n1 = rbuf_dp->seq->n1;	/* save the best score */
+	qstats[nqstats].comp =  rbuf_rp->rst.comp;
+	qstats[nqstats].H = rbuf_rp->rst.H;
+	qstats[nqstats].escore = t_qrescore;
+	qstats[nqstats++].score = t_qrbest;
+	t_qrbest = -BIGNUM;	/* reset t_qrbest, t_qrescore */
+	t_qrescore = FLT_MAX;
+      }
+    }	/* m_msp->qshuffle */
+
+    if (use_shuff) {
+      /* this check is required because some sequences scheduled to be
+	 used for statistics may not in fact be returning a score (if
+	 they are outside the -M range, for example.
+       */
+      if (rbuf_rp->r_rst.score[0] == -BIGNUM) { tr_valid_stat = 0; }
+      if (rbuf_rp->r_rst.valid_stat > tr_valid_stat) {
+	tr_valid_stat = 1;
+      }
+      if (rbuf_rp->r_rst.score[sc_ix] > t_rbest) {
+	t_rbest = rbuf_rp->r_rst.score[sc_ix];
+	t_rescore = rbuf_rp->r_rst.escore;
+      }
+    }
+
+    /* need to look for frame 0 if TFASTA, then save stats at frame 6 */
+    if (fdata) {
+      fprintf(fdata,
+	      "%-12s %6d %d %.5f %.5f %4d %4d %4d %2d %2d %4d %4d %4d %2d %2d %5d %8lld\n",
+	      rbuf_dp->mseq->libstr, rbuf_dp->seq->n1,rbuf_dp->frame,rbuf_rp->rst.comp,rbuf_rp->rst.H,
+	      rbuf_rp->rst.score[0],rbuf_rp->rst.score[1],rbuf_rp->rst.score[2],
+	      t_valid_stat, rbuf_rp->rst.alg_info,
+	      (rbuf_rp->r_rst.score[0]<0 ? -1 : rbuf_rp->r_rst.score[0]),
+	      (rbuf_rp->r_rst.score[1]<0 ? -1 : rbuf_rp->r_rst.score[1]),
+	      (rbuf_rp->r_rst.score[2]<0 ? -1 : rbuf_rp->r_rst.score[2]),
+	      tr_valid_stat, rbuf_rp->r_rst.alg_info,
+	      rbuf_dp->stats_idx, rbuf_dp->mseq->lseek);
+    }
+
+    /* statistics done for best score of set */
+
+    if (rbuf_dp->frame == m_msp->nitt1) {
+      ldb->entries++;
+      ldb->length += t_n1;
+      if (ldb->length > LONG_MAX) {
+	ldb->length -= LONG_MAX; ldb->carry++;
+      }
+    }
+
+    if (rbuf_dp->frame == m_msp->nitt1 && ppst->zsflag >= 0) {
+      /* if this sample should be used for statistics */
+      if (use_shuff) t_valid_stat = tr_valid_stat;
+      if (t_valid_stat) {
+	/* we've got our initial MAX_STATS values */
+	if (nstats >= MAX_STATS) {
+	  if (!stats_done) {
+	    zsflag_save = ppst->zsflag;
+	    if (ppst->zsflag > 20) {
+	      ppst->zsflag -= 20;
+	    }
+	    ppst->zsflag_f = process_hist(stats,nstats,m_msp, ppst,
+					  histp, pstat_voidp,s_info, 0);
+	    ppst->zsflag = zsflag_save;
+	    kstats = nstats;
+	    if (ppst->zsflag >= 0) {	/* this is redundant, but rare */
+	      stats_done = 1;
+	      for (i=0; i< nstats; i++) {
+		bestp_arr[i]->zscore =
+		  find_z(bestp_arr[i]->rst.score[ppst->score_ix],
+			 bestp_arr[i]->rst.escore, bestp_arr[i]->seq->n1,
+			 bestp_arr[i]->rst.comp, *pstat_voidp);
+	      }
+	    }
+	  }
+	}
+	else {
+	  /* this logic allows stats_idx to be over-ruled for searches
+	     where every query does not generate a score */
+	  rbuf_dp->stats_idx = nstats;
+	  nstats++;
+	}
+      }
+
+      if (rbuf_dp->stats_idx >= 0 && t_valid_stat) {
+	if (rbuf_dp->stats_idx >= MAX_STATS || nstats > MAX_STATS) {
+	  fprintf(stderr, "*** error [%s:%d] - nstats index [%d] out of range [%d,%d]\n",
+		  __FILE__, __LINE__,
+		  rbuf_dp->stats_idx, nstats,MAX_STATS);
+	}
+	else {  /* stats_idx is in range */
+	  sstats++;
+	  stats[rbuf_dp->stats_idx].n1 = t_n1;
+	  stats[rbuf_dp->stats_idx].comp = rbuf_rp->rst.comp;
+	  stats[rbuf_dp->stats_idx].H = rbuf_rp->rst.H;
+	  if (use_shuff) { /* use shuffled score */
+	    stats[rbuf_dp->stats_idx].escore  = t_rescore;
+	    stats[rbuf_dp->stats_idx].score = t_rbest;
+	  }
+	  else { /* real score, not shuffled */
+	    stats[rbuf_dp->stats_idx].escore  = tm_escore;
+	    stats[rbuf_dp->stats_idx].score = tm_best;
+	  }
+	} /* end stats_idx in range */
+      }	/* end have valid stats_idx */
+
+      if (t_valid_stat && stats_done && histp) {
+	addhistz(find_z(t_best, tm_escore, rbuf_dp->seq->n1, (double) rbuf_rp->rst.comp,
+			    *pstat_voidp), histp);
+      }
+      /* reset best scores */
+      t_best = t_rbest = -BIGNUM;
+      tm_escore = t_rescore = FLT_MAX;
+      t_valid_stat = tr_valid_stat = 0;
+    }
+
+    /*
+    if (rbuf_rp->rst.score[ppst->score_ix] > 200) {
+      fprintf(stderr, "high score[%d]: %s %d: %d\n", rbuf_dp->seq->index,
+	      rbuf_dp->mseq->libstr, rbuf_dp->seq->n1, rbuf_rp->rst.score[ppst->score_ix]);
+    }
+    */
+
+    if (zscore > zbestcut) {
+      if (nbest >= MAX_BEST) {
+	bestfull = nbest-MAX_BEST/4;
+	selectbestz(bestp_arr,bestfull-1,nbest);
+	zbestcut = bestp_arr[bestfull-1]->zscore;
+	nbest = bestfull;
+      }
+      bbp = bestp_arr[nbest++];
+
+      COPY_RST_P(bbp, rbuf_rp);
+
+      bbp->seq = rbuf_dp->seq;
+      bbp->mseq = rbuf_dp->mseq;
+      bbp->n1 = rbuf_dp->seq->n1;
+#ifdef DEBUG
+      bbp->adler32_crc = rbuf_dp->seq->adler32_crc;
+#endif
+      /* rbuf_dp->best_save is set after a rbuf_dp is entered into best_str */
+      if (rbuf_dp->best_save) {
+	/* a previous rbuf_dp->seq is in best_str at best_save */
+	if (rbuf_dp->best_save->seq == rbuf_dp->seq) {
+	  /* the best_save->seq matches the rbuf_dp->seq */
+	  bbp->bbp_link = rbuf_dp->best_save;
+	  /* bbp_link tells where this ->seq can be found */
+	}
+	else {
+	  bbp->bbp_link = NULL;
+	}
+      }
+      rbuf_dp->best_save = bbp;
+      lib_bhead_p->hdr.have_best_save = 1;
+      bbp->zscore = zscore;
+      bbp->frame = rbuf_dp->frame;
+    }
+  }
+}
+
+void
+save_best2(struct buf_head *lib_bhead_p,
+	   const struct mngmsg *m_msp, struct pstruct *ppst,
+	   struct db_str *ldb, FILE *fdata,
+	   struct hist_str *histp, void **pstat_voidp,
+	   struct score_count_s *s_info)
+{
+  double zscore;
+  int i_score;
+  struct beststr *bbp;
+  struct buf2_data_s *rbuf_dp, *lib_buf2_dp;
+  struct buf2_res_s *rbuf_rp, *lib_buf2_rp;
+  int i, sc_ix;
+  int t_valid_stat, use_shuff, zsflag_save;
+  double e_score;
+  int buf2_cnt;
+
+  if (!lib_bhead_p->hdr.have_results) return;
+  if ((buf2_cnt = lib_bhead_p->hdr.buf2_cnt) <= 0) return;
+
+  /*
+#ifdef DEBUG
+  fprintf(stderr," save_best2: lib_bhead_p->buf2_data[0]->mseq->index/lseek: %d,%lld\n",
+	  lib_bhead_p->buf2_data[0].mseq->index,lib_bhead_p->buf2_data[0].mseq->lseek);
+#endif
+  */
+  if (ppst->zsflag >= 10 && ppst->zsflag < 20) { use_shuff = 1;}
+  else {use_shuff = 0;}
+
+  shuff_tot += lib_bhead_p->hdr.shuff_cnt;
+  s_info->s_cnt[0] += lib_bhead_p->s_cnt_info.s_cnt[0];
+  s_info->s_cnt[1] += lib_bhead_p->s_cnt_info.s_cnt[1];
+  s_info->s_cnt[2] += lib_bhead_p->s_cnt_info.s_cnt[2];
+  s_info->tot_scores += lib_bhead_p->s_cnt_info.tot_scores;;
+  sc_ix = ppst->score_ix;
+
+  /* save the raw data if requested */
+  if (fdata) {
+#ifdef DEBUG
+    fprintf(fdata,">save_best: %d\n",buf2_cnt);
+#endif
+    lib_buf2_dp = lib_bhead_p->buf2_data;
+    lib_buf2_rp = lib_bhead_p->buf2_res;
+    buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+    while (buf2_cnt--) { /* count down the number of results */
+      
+      rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
+      rbuf_dp = lib_buf2_dp++;	/* step through the data buffer */
+
+      /* perhaps should use explicit flag to indicate no score */
+      if (rbuf_rp->rst.score[0] == -BIGNUM) continue;
+
+      fprintf(fdata,
+	      "%-12s %6d %d %.5f %.5f %4d %4d %4d %2d %2d %4d %4d %4d %2d %2d %5d %8lld\n",
+	      rbuf_dp->mseq->libstr, rbuf_dp->seq->n1,rbuf_dp->frame,rbuf_rp->rst.comp,rbuf_rp->rst.H,
+	      rbuf_rp->rst.score[0],rbuf_rp->rst.score[1],rbuf_rp->rst.score[2],
+	      rbuf_rp->is_valid_stat, rbuf_rp->rst.alg_info,
+	      (rbuf_rp->r_rst.score[0]<0 ? -1 : rbuf_rp->r_rst.score[0]),
+	      (rbuf_rp->r_rst.score[1]<0 ? -1 : rbuf_rp->r_rst.score[1]),
+	      (rbuf_rp->r_rst.score[2]<0 ? -1 : rbuf_rp->r_rst.score[2]),
+	      rbuf_rp->is_valid_stat, rbuf_rp->r_rst.alg_info,
+	      rbuf_dp->stats_idx, rbuf_dp->mseq->lseek);
+    }
+  }
+
+  /* save the high-scoring data */
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  while (buf2_cnt--) {
+    rbuf_rp = lib_buf2_rp++;
+    rbuf_dp = lib_buf2_dp++;
+
+    /* perhaps should use explicit flag to indicate no score */
+    if (rbuf_rp->rst.score[0] == -BIGNUM) continue;
+
+    /* i_score: current raw sorting score */
+    i_score = rbuf_rp->rst.score[sc_ix];
+    /* e_score, current escore */
+    e_score = rbuf_rp->rst.escore;
+    /* this should be done in the thread, and a sorted set of indexes
+       should be produced by the thread, so we just go down the list
+       to the zscore threshold */
+    zscore = (double)i_score;
+    if (stats_done) {
+      zscore=find_z(i_score, e_score, rbuf_dp->seq->n1,(double)rbuf_rp->rst.comp,
+			  *pstat_voidp);
+    }
+
+    if (rbuf_dp->frame == m_msp->nitt1) {
+      ldb->entries++;
+      ldb->length += rbuf_dp->seq->n1;
+      if (ldb->length > LONG_MAX) {
+	ldb->length -= LONG_MAX; ldb->carry++;
+      }
+    }
+
+    if (zscore > zbestcut) {
+      if (nbest >= MAX_BEST) {
+	bestfull = nbest-MAX_BEST/4;
+	selectbestz(bestp_arr,bestfull-1,nbest);
+	zbestcut = bestp_arr[bestfull-1]->zscore;
+	nbest = bestfull;
+      }
+      bbp = bestp_arr[nbest++];
+
+      COPY_RST_P(bbp, rbuf_rp);
+
+      bbp->seq = rbuf_dp->seq;
+      bbp->mseq = rbuf_dp->mseq;
+      bbp->n1 = rbuf_dp->seq->n1;
+#ifdef DEBUG
+      bbp->adler32_crc = rbuf_dp->seq->adler32_crc;
+#endif
+      /* rbuf_dp->best_save is set after a rbuf_dp is entered into best_str */
+      if (rbuf_dp->best_save) {
+	/* a previous rbuf_dp->seq is in best_str at best_save */
+	if (rbuf_dp->best_save->seq == rbuf_dp->seq) {
+	  /* the best_save->seq matches the rbuf_dp->seq */
+	  bbp->bbp_link = rbuf_dp->best_save;
+	  /* bbp_link tells where this ->seq can be found */
+	}
+	else {
+	  bbp->bbp_link = NULL;
+	}
+      }
+      rbuf_dp->best_save = bbp;
+      lib_bhead_p->hdr.have_best_save = 1;
+      bbp->zscore = zscore;
+      bbp->frame = rbuf_dp->frame;
+    }
+  }
+
+  /* process results for statistics */
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  while (buf2_cnt--) { /* count down the number of results */
+    rbuf_dp = lib_buf2_dp++;	/* step through the results buffer */
+    rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
+
+    if (!rbuf_rp->is_valid_stat) { continue;}
+
+
+    if (use_shuff) {
+      i_score = rbuf_rp->r_rst.score[sc_ix];
+    e_score = rbuf_rp->r_rst.escore;
+    }
+    else {
+      i_score = rbuf_rp->rst.score[sc_ix];
+      e_score = rbuf_rp->rst.escore;
+    }
+
+    if (rbuf_dp->stats_idx >= MAX_STATS || nstats > MAX_STATS) {
+      fprintf(stderr, "*** error [%s:%d] - nstats index [%d] out of range [%d,%d]\n",
+	      __FILE__, __LINE__,
+	      rbuf_dp->stats_idx, nstats,MAX_STATS);
+      continue;
+    }
+
+    if (nstats < MAX_STATS) {
+      /* this logic allows stats_idx to be over-ruled for searches
+	 where every query does not generate a score */
+      rbuf_dp->stats_idx = nstats;
+      nstats++;
+    }
+
+    if (stats_done && histp) {
+      addhistz(find_z(i_score, e_score, rbuf_dp->seq->n1, (double) rbuf_rp->rst.comp,
+		      *pstat_voidp), histp);
+    }
+
+    if (rbuf_dp->stats_idx < 0) {
+      continue;
+    }
+
+    sstats++;
+    stats[rbuf_dp->stats_idx].n1 = rbuf_dp->seq->n1;
+    stats[rbuf_dp->stats_idx].comp = rbuf_rp->rst.comp;
+    stats[rbuf_dp->stats_idx].H = rbuf_rp->rst.H;
+    stats[rbuf_dp->stats_idx].escore  = e_score;
+    stats[rbuf_dp->stats_idx].score = i_score;
+  }
+
+
+  /* fill the qstats[] array if m_msp->qshuffle */
+  if (m_msp->qshuffle && nqstats < m_msp->shuff_max) {
+    lib_buf2_dp = lib_bhead_p->buf2_data;
+    lib_buf2_rp = lib_bhead_p->buf2_res;
+    buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+    while (buf2_cnt--) {
+      rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
+      rbuf_dp = lib_buf2_dp++;
+
+      if (rbuf_rp->is_valid_stat && rbuf_rp->qr_score > 0
+	  && nqstats < m_msp->shuff_max) {
+	qstats[nqstats].n1 = rbuf_dp->seq->n1;	/* save the best score */
+	qstats[nqstats].comp =  rbuf_rp->rst.comp;
+	qstats[nqstats].H = rbuf_rp->rst.H;
+	qstats[nqstats].escore = rbuf_rp->qr_escore;
+	qstats[nqstats++].score = rbuf_rp->qr_score;
+      }
+    }	/* m_msp->qshuffle */
+  }
+
+  /* check if we have enough data to do stats */
+  if (!stats_done && nstats >= MAX_STATS) {
+    zsflag_save = ppst->zsflag;
+    if (ppst->zsflag > 20) {
+      ppst->zsflag -= 20;
+    }
+    ppst->zsflag_f = process_hist(stats,nstats,m_msp, ppst,
+				  histp, pstat_voidp,s_info, 0);
+    ppst->zsflag = zsflag_save;
+    kstats = nstats;
+    stats_done = 1;
+    for (i=0; i< nstats; i++) {
+      bestp_arr[i]->zscore =
+	find_z(bestp_arr[i]->rst.score[ppst->score_ix],
+	       bestp_arr[i]->rst.escore, bestp_arr[i]->seq->n1,
+	       bestp_arr[i]->rst.comp, *pstat_voidp);
+    }
+  }
+
+}
+
+void
+save_shuf(struct buf_head *lib_bhead_p, int nitt1, int shuff_max, int sc_ix,
+	  struct score_count_s *s_info)
+{
+  struct buf2_data_s *rbuf_dp, *lib_buf2_dp;
+  struct buf2_res_s *rbuf_rp, *lib_buf2_rp;
+  int t_valid_stat;
+  int t_rbest;
+  double t_rescore;
+  int buf2_cnt, jstats;
+  static int kstats=0;
+
+
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+
+  s_info->s_cnt[0] += lib_bhead_p->s_cnt_info.s_cnt[0];
+  s_info->s_cnt[1] += lib_bhead_p->s_cnt_info.s_cnt[1];
+  s_info->s_cnt[2] += lib_bhead_p->s_cnt_info.s_cnt[2];
+
+  s_info->tot_scores += lib_bhead_p->s_cnt_info.tot_scores;
+  /* this is done because we are not using r_rst->valid_stat to limit selection of scores */
+  /*   s_info->s_cnt[sc_ix] = s_info->tot_scores; */
+
+  t_rbest = -BIGNUM;
+  t_valid_stat = 0;
+
+  while (buf2_cnt--) { /* count down the number of results */
+    rbuf_dp = lib_buf2_dp++;	/* step through the results buffer */
+    rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
+
+    /* perhaps should use explicit flag to indicate no score */
+    if (rbuf_rp->r_rst.score[0] == -BIGNUM) continue;
+
+    if (rbuf_rp->r_rst.score[sc_ix] > t_rbest) {
+      t_rbest = rbuf_rp->r_rst.score[sc_ix];
+      t_rescore = rbuf_rp->r_rst.escore;
+    }
+
+    if (rbuf_rp->r_rst.valid_stat > t_valid_stat) {
+      t_valid_stat = 1;
+    }
+
+    /* statistics done for best score of set */
+    /* currently no check for rst->valid_stat, which causes
+       over-estimates of shuffles */
+
+    if (rbuf_dp->frame == nitt1) {
+      if (t_valid_stat) {
+	if (nrstats < shuff_max ) { kstats = jstats = nrstats++; }
+	else {	/* randomly replace */
+	  jstats = my_nrand(++kstats,rand_state);
+	  if (jstats >= shuff_max) goto done;
+	}
+
+	rstats[jstats].n1 = rbuf_dp->seq->n1;
+	rstats[jstats].comp = rbuf_rp->r_rst.comp;
+	rstats[jstats].H = rbuf_rp->r_rst.H;
+	rstats[jstats].escore  = t_rescore;
+	rstats[jstats].score = t_rbest;
+      done:
+	t_rbest = -BIGNUM;
+      }
+    }
+  }
+}
+
+int
+save_align(struct buf_head *lib_bhead_p, struct beststr **bestp_arr)
+{
+  struct buf2_ares_s *rbuf_ap, *lib_buf2_ap;
+  int buf2_cnt;
+
+  if (!lib_bhead_p->hdr.have_results || lib_bhead_p->hdr.buf2_cnt <= 0) return 0;
+
+  lib_buf2_ap = lib_bhead_p->buf2_ares;
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+
+  while (buf2_cnt-- > 0) { /* count down the number of results */
+    rbuf_ap = lib_buf2_ap++;	/* step through the results buffer */
+    if (bestp_arr[rbuf_ap->best_idx]->a_res == NULL) {
+      bestp_arr[rbuf_ap->best_idx]->have_ares = rbuf_ap->have_ares;
+      bestp_arr[rbuf_ap->best_idx]->a_res = rbuf_ap->a_res;
+    }
+#ifdef DEBUG
+    else {
+      fprintf(stderr,"*** error [%s:%d] - attempt to re-save a_res for [%d]: %s\n",
+	      __FILE__, __LINE__, rbuf_ap->best_idx, bestp_arr[rbuf_ap->best_idx]->mseq->bline);
+    }
+#endif
+  }
+
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  lib_bhead_p->hdr.have_results = 0;
+  lib_bhead_p->hdr.buf2_cnt = 0;
+  return buf2_cnt;
+}
+
+/* buf_do_work fills in the lib_bhead_p->buf2_res[] array with the
+   do_work() results,
+
+   inputs:  **aa0, n0 (query)
+            lib_bhead_p->buf2_data lib_bhead_p->hdr.buf2_cnt library sequences
+	    max_frame (used to set statistics info)
+	    ppst,
+	    void *f_struct prepared by init_work()
+
+   results: lib_bhead_p->buf2_res[]
+
+            included in buf2_res[] is use_stat, which captures the
+            logic required to decide whether a value should be saved
+            in the stats[] buffer.  This complexity mostly arises
+            because there can be more scores than sequences, but there
+            can only on statistics score per sequence (the best score).
+*/
+void
+buf_do_work(unsigned char **aa0,  int n0,
+	    struct buf_head *lib_bhead_p,
+	    int max_frame,
+	    struct pstruct *ppst, void **f_str) {
+
+  int buf2_cnt;
+  unsigned long atmp;
+  struct buf2_data_s *lib_buf2_dp;
+  struct buf2_res_s *lib_buf2_rp, *t_best_rp;
+  int t_best, sc_ix;
+  double t_escore;
+
+  sc_ix = ppst->score_ix;
+
+  lib_bhead_p->s_cnt_info.s_cnt[0] = lib_bhead_p->s_cnt_info.s_cnt[1] =
+    lib_bhead_p->s_cnt_info.s_cnt[2] = lib_bhead_p->s_cnt_info.tot_scores = 0;
+
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+
+  t_best_rp = NULL;
+  t_best = -BIGNUM;
+  t_escore = 1000.0;
+
+  while (buf2_cnt-- > 0) {
+
+    lib_buf2_rp->rst.score[0] =
+      lib_buf2_rp->rst.score[1] =
+      lib_buf2_rp->rst.score[2] = -BIGNUM;
+
+    lib_buf2_rp->is_valid_stat = 0;
+
+    if (lib_buf2_dp->seq->n1 < ppst->n1_low ||
+	lib_buf2_dp->seq->n1 > ppst->n1_high ) {
+      /* tells save_best() there is no stats score here -- not
+	 necessary as -BIGNUM indicates no score */
+      lib_buf2_dp->stats_idx = -1;
+      goto next_seq;
+    }
+
+#ifdef DEBUG
+    if (check_seq_range(lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
+			ppst->nsqx, "buf_do_work()")) {
+      fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_work] range error at: %d/%d (n1:%d)\n",
+	      __FILE__, __LINE__,
+	      prog_func,lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
+	      lib_bhead_p->hdr.buf2_cnt, lib_buf2_dp->seq->n1);
+      goto next_seq;
+    };
+
+    /* also check for adler32_crc match */
+    if (lib_buf2_dp->seq->adler32_crc != (atmp=adler32(1L,lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1))) {
+      fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_work] CRC error [%lu!=%lu] at: %d/%d (n1:%d/l_offset:%ld)\n",
+	      __FILE__, __LINE__,
+	      prog_func,lib_buf2_dp->seq->adler32_crc, atmp,
+	      lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
+	      lib_bhead_p->hdr.buf2_cnt,lib_buf2_dp->seq->n1,
+	      lib_buf2_dp->seq->l_offset);
+      goto next_seq;
+    }
+#endif
+
+    do_work (aa0[lib_buf2_dp->frame], n0,
+	     lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
+	     lib_buf2_dp->frame, ppst, f_str[lib_buf2_dp->frame], 0, 0,
+	     &(lib_buf2_rp->rst), &(lib_bhead_p->s_cnt_info));
+
+    if (lib_buf2_rp->rst.valid_stat) {
+      if (lib_buf2_rp->rst.escore < t_escore) {
+	t_escore = lib_buf2_rp->rst.escore;
+	t_best_rp = lib_buf2_rp;
+      }
+      if (lib_buf2_rp->rst.score[sc_ix] > t_best) {
+	t_best = lib_buf2_rp->rst.score[sc_ix];
+	t_best_rp = lib_buf2_rp;
+      }
+    }
+
+    if (lib_buf2_dp->frame == max_frame) {
+      if (t_best_rp!=NULL) {
+	t_best_rp->is_valid_stat = 1;
+	t_best_rp = NULL;
+      }
+      t_best = -BIGNUM;
+      t_escore = 1000.0;
+    }
+
+  next_seq:
+    lib_buf2_dp++;
+    lib_buf2_rp++;
+  }
+
+  /* place to produce z_scores */
+  /* place to produce sorted array */
+
+  lib_bhead_p->hdr.have_results = 1;
+}
+
+void
+buf_do_align(unsigned char **aa0,  int n0,
+	     struct buf_head *lib_bhead_p,
+	     struct pstruct *ppst, const struct mngmsg *m_msp,
+	     void **f_str) {
+
+  int buf2_cnt, i, nsq;
+  struct buf2_data_s *lib_buf2_dp;
+  struct buf2_res_s *lib_buf2_rp;
+  struct buf2_ares_s *lib_buf2_ap;
+  struct rstruct rst;
+
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+  lib_buf2_ap = lib_bhead_p->buf2_ares;
+
+  while (buf2_cnt-- > 0) {
+    if ( m_msp->stages > 1) {
+      /* this is not typically done unless m_msp->stages > 1 */
+      do_opt (aa0[lib_buf2_dp->frame], n0, lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
+	      lib_buf2_dp->frame, ppst, f_str[lib_buf2_dp->frame], &rst);
+      lib_buf2_rp->rst.score[2]=rst.score[2];
+    }
+
+#ifdef DEBUG
+    if (lib_buf2_dp->seq->aa1b == NULL) {
+      fprintf(stderr,"*** error [%s:%d] - [buf_do_align] null aa1b\n",__FILE__, __LINE__);
+      lib_buf2_ap->a_res = NULL;
+      break;
+    }
+    if (check_seq_range(lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
+			ppst->nsqx, "buf_do_align()")) {
+      fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_align] range error at: %d/%d (n1:%d)\n",
+	      __FILE__, __LINE__,
+	      prog_func,lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
+	      lib_bhead_p->hdr.buf2_cnt, lib_buf2_dp->seq->n1);
+    };
+
+    /* also check for adler32_crc match */
+    if (lib_buf2_dp->seq->adler32_crc != adler32(1L,lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1)) {
+      fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_align] CRC error at: %d/%d (n1:%d)\n",
+	      __FILE__, __LINE__,
+	      prog_func,lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
+	      lib_bhead_p->hdr.buf2_cnt, lib_buf2_dp->seq->n1);
+    }
+#endif
+
+    lib_buf2_ap->a_res = build_ares_code(aa0[lib_buf2_dp->frame], m_msp->n0,
+					 lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq,
+					 lib_buf2_dp->frame, &lib_buf2_ap->have_ares,
+					 lib_buf2_dp->repeat_thresh, m_msp, ppst, f_str[lib_buf2_dp->frame] );
+
+    lib_buf2_dp++;
+    lib_buf2_ap++;
+    lib_buf2_rp++;
+  }
+  lib_bhead_p->hdr.have_results = 1;
+}
+
+void
+buf_qshuf_work(unsigned char *aa0s,  int n0,
+	       struct buf_head *lib_bhead_p,
+	       int max_frame,
+	       struct pstruct *ppst, void *qf_str,
+	       int ix_score)
+{
+  int buf2_cnt;
+  struct buf2_data_s *lib_buf2_dp;
+  struct buf2_res_s *lib_buf2_rp, *tq_best_rp;
+  struct rstruct rrst;
+  struct score_count_s q_scnt_info;
+  int tq_best;
+  double tq_escore;
+
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+
+  tq_best_rp = NULL;
+  tq_best = -BIGNUM;
+  tq_escore = 1000.0;
+
+  while (buf2_cnt-- > 0) {
+    rrst.score[0] = rrst.score[1] = rrst.score[2] = -BIGNUM;
+    rrst.valid_stat = 0;
+
+    if (lib_buf2_dp->seq->n1 < ppst->n1_low ||
+	lib_buf2_dp->seq->n1 > ppst->n1_high ) {
+      lib_buf2_dp++;
+      lib_buf2_rp++;
+      tq_best_rp = NULL;
+      tq_best = -BIGNUM;
+      tq_escore = 1000.0;
+      continue;
+    }
+
+    do_work (aa0s, n0,
+	     lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
+	     lib_buf2_dp->frame, ppst, qf_str, 1, 0,
+	     &rrst, &q_scnt_info);
+
+    /* buf_qshuf_work() is always called after buf_do_work(), which
+       sets rp->is_valid_stat */
+    if (lib_buf2_rp->is_valid_stat) {
+      tq_best_rp = lib_buf2_rp;
+    }
+
+    if (rrst.escore < tq_escore) {
+      tq_escore = rrst.escore;
+    }
+    if (rrst.score[ix_score] > tq_best) {
+      tq_best = rrst.score[ix_score];
+    }
+
+    if (lib_buf2_dp->frame == max_frame) {
+      if (tq_best_rp!=NULL) {
+	tq_best_rp->qr_score = tq_best;
+	tq_best_rp->qr_escore = tq_escore;
+	tq_best_rp = NULL;
+      }
+#ifdef DEBUG
+      else {
+	fprintf(stderr,"*** error [%s:%d] - tq_best_rp NULL at: %ld\n",
+		__FILE__, __LINE__, lib_buf2_rp - lib_bhead_p->buf2_res);
+      }
+#endif
+      tq_best = -BIGNUM;
+      tq_escore = 1000.0;
+    }
+    lib_buf2_dp++;
+    lib_buf2_rp++;
+  }
+}
+
+void
+buf_shuf_work(unsigned char **aa0,  int n0, unsigned char *aa1s, struct buf_head *lib_bhead_p,
+	      int max_frame, struct pstruct *ppst, void **f_str,
+	      int ix_score, void *rand_state)
+{
+  int buf2_cnt;
+  int shuff_cnt;
+  struct buf2_data_s *lib_buf2_dp;
+  struct buf2_res_s *lib_buf2_rp, *tr_best_rp;
+  int tr_best, sc_ix;
+  double tr_escore;
+
+  sc_ix = ppst->score_ix;
+
+  lib_bhead_p->s_cnt_info.s_cnt[0] = lib_bhead_p->s_cnt_info.s_cnt[1] =
+    lib_bhead_p->s_cnt_info.s_cnt[2] = lib_bhead_p->s_cnt_info.tot_scores = 0;
+
+  shuff_cnt = 0;
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+
+  tr_best_rp = NULL;
+  tr_best = -BIGNUM;
+  tr_escore = 1000.0;
+
+  while (buf2_cnt-- > 0) {
+    lib_buf2_rp->r_rst.score[0] = lib_buf2_rp->r_rst.score[1] =
+      lib_buf2_rp->r_rst.score[2] = -BIGNUM;
+    lib_buf2_rp->r_rst.valid_stat = lib_buf2_rp->is_valid_stat = 0;
+
+    if ((lib_buf2_dp->stats_idx < 0) || lib_buf2_dp->seq->n1 < ppst->n1_low ||
+	lib_buf2_dp->seq->n1 > ppst->n1_high ) {
+      lib_buf2_dp++;
+      lib_buf2_rp++;
+      tr_best_rp = NULL;
+      tr_best = -BIGNUM;
+      tr_escore = 1000.0;
+      continue;
+    }
+
+    shuff_cnt++;
+    if (ppst->zs_win > 0) {
+      wshuffle(lib_buf2_dp->seq->aa1b,aa1s,lib_buf2_dp->seq->n1,ppst->zs_win, rand_state);
+    }
+    else {
+      if (ppst->shuffle_dna3) {shuffle3(lib_buf2_dp->seq->aa1b,aa1s,lib_buf2_dp->seq->n1, rand_state);}
+      else {shuffle(lib_buf2_dp->seq->aa1b,aa1s,lib_buf2_dp->seq->n1, rand_state);}
+    }
+
+    /* rshuffle(lib_buf2_dp->seq->aa1b,aa1s,lib_buf2_dp->seq->n1); */
+
+#ifdef DEBUG
+    if (check_seq_range(aa1s, lib_buf2_dp->seq->n1,
+			ppst->nsqx, "buf_do_align()")) {
+      fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_shuff] range error at: %d/%d (n1:%d)\n",
+	      __FILE__, __LINE__,
+	      prog_func,lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
+	      lib_bhead_p->hdr.buf2_cnt, lib_buf2_dp->seq->n1);
+    };
+#endif
+
+    do_work (aa0[lib_buf2_dp->frame], n0,
+	     aa1s, lib_buf2_dp->seq->n1,
+	     lib_buf2_dp->frame, ppst, f_str[lib_buf2_dp->frame], 0, 1,
+	     &lib_buf2_rp->r_rst, &(lib_bhead_p->s_cnt_info));
+
+    if (lib_buf2_rp->r_rst.valid_stat) {
+      if (lib_buf2_rp->r_rst.escore < tr_escore) {
+	tr_escore = lib_buf2_rp->r_rst.escore;
+	tr_best_rp = lib_buf2_rp;
+      }
+      if (lib_buf2_rp->r_rst.score[sc_ix] > tr_best) {
+	tr_best = lib_buf2_rp->r_rst.score[sc_ix];
+	tr_best_rp = lib_buf2_rp;
+      }
+    }
+
+    if (lib_buf2_dp->frame == max_frame) {
+      if (tr_best_rp!=NULL) {
+	tr_best_rp->is_valid_stat = 1;
+	tr_best_rp = NULL;
+      }
+      tr_best = -BIGNUM;
+      tr_escore = 1000.0;
+    }
+
+    lib_buf2_dp++;
+    lib_buf2_rp++;
+  }
+  lib_bhead_p->hdr.shuff_cnt = shuff_cnt;
+  lib_bhead_p->hdr.have_results = 1;
+}
+
+/* buf_shuf_seq is designed to:
+   (1) take a list of sequences (specified by bptr[])
+   (2) collect them from the database if they are not already available
+   (3) send them to the threads or shuffle them directly and calculate scores
+*/
+
+void
+buf_shuf_seq(unsigned char **aa0, int n0,
+	     unsigned char **aa1shuff_b, unsigned char *aa1save, int maxn,
+	     struct beststr **bestp_arr, int nbest,
+	     struct pstruct *ppst, struct mngmsg *m_msp,
+	     struct mng_thr *m_bufi_p
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+	     , void **f_str
+#endif
+	     , struct score_count_s *s_info)
+{
+  unsigned char *aa1shuff;
+  struct beststr *bbp, **tmp_bestp;
+  char l_bline[MAX_SSTR];
+  int n1lib_req, shuff_mult;
+  long loffset, l_off;
+  int n1, itt;
+  int max_do_cnt, ndiff, prev_index;
+  int istats;
+  int i, j;
+
+  /* these variables track buffers of library sequences */
+  int cur_buf_size, max_buf_size;
+  struct buf_head *lib_bhead_p;
+  struct buf2_data_s *lib_buf2_dp;
+  struct buf2_res_s *lib_buf2_rp;
+
+/* (1) get the sequences into a buffer - the sequence information is
+   currently in the bestp_arr - find out how many we have, and how
+   many we will need - the number to shuffle */
+
+/* figure out how much space we need, first checking whether we have
+   dups */
+  if ((tmp_bestp = (struct beststr **)calloc(nbest, sizeof(struct beststr *)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - %s/buf_shuf_seq() *** cannot allocate tmp_bestp[%d]\n",
+	    __FILE__, __LINE__, prog_name, nbest);
+    exit(1);
+  }
+  for (i = 0; i < nbest; i++) {
+    tmp_bestp[i] = bestp_arr[i];
+  }
+
+  /* sort tmp_bestp[] by sequence index, so duplicates are adjacent */
+  sortbesti(tmp_bestp, nbest);
+
+  /* count number of different sequence indices, get required space
+     without dups */
+  prev_index = -1;
+  n1lib_req = ndiff = 0;
+  for (i = 0; i < nbest; i++) {
+    if (tmp_bestp[i]->seq->index > prev_index) {
+      prev_index = tmp_bestp[i]->seq->index;
+      n1lib_req += tmp_bestp[i]->n1+ 2;
+      ndiff++;
+    }
+  }
+
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+      if (n1lib_req >= maxn) { /* we need new space, aa1shuff is too small */
+	if ((*aa1shuff_b = aa1shuff =
+	     (unsigned char *)realloc(*aa1shuff_b, n1lib_req*sizeof(char)))==NULL) {
+	  fprintf(stderr,"*** error [%s:%d] - cannot realloc aa1shuff[%d]\n",
+		  __FILE__, __LINE__, n1lib_req);
+	  exit(1);
+	}
+      }
+      else { aa1shuff = *aa1shuff_b;}
+      *aa1shuff = '\0';
+      aa1shuff++;
+
+#else
+      if (n1lib_req < 2) {
+	fprintf(stderr,"*** error [%s:%d] - [%s/buf_shuf_seq] no residues to shuffle: %d (%d)\n",
+		__FILE__, __LINE__,
+		prog_func,n1lib_req,ndiff);
+	exit(1);
+      }
+
+      if ((*aa1shuff_b = aa1shuff =
+	   (unsigned char *)calloc(n1lib_req,sizeof(char)))==NULL) {
+	fprintf(stderr,"*** error [%s:%d] - cannot calloc aa1shuff[%d]\n",
+		__FILE__, __LINE__, n1lib_req);
+	exit(1);
+      }
+      *aa1shuff = '\0';
+      aa1shuff++;
+#endif
+
+      shuff_mult = (m_msp->shuff_max+1)/ndiff;
+      istats = 0;
+
+      /* setup lib_bhead buffers for shuffle comparisons */
+#if defined(COMP_THR) || defined(PCOMPLIB)	/* threaded/parallel */
+      /* max_do_cnt can be smaller than max_buf2_cnt, but not larger */
+      max_do_cnt = min(m_bufi_p->max_buf2_res,
+		       m_msp->shuff_max / (2 * fa_max_workers));
+      /* we don't have a left over one, so we need one */
+      get_rbuf(&lib_bhead_p,m_bufi_p->max_work_buf);
+#else	/* not threaded */
+      max_do_cnt = m_bufi_p->max_buf2_res;
+      lib_bhead_p = lib_buf2_list;  /* equivalent to un-threaded get_rbuf() */
+#endif
+      max_buf_size = n1lib_req;
+      cur_buf_size = 0;
+      lib_bhead_p->hdr.buf2_cnt = 0;
+      lib_bhead_p->hdr.have_results = 0;
+      lib_bhead_p->hdr.stop_work = 0;
+      lib_bhead_p->hdr.buf2_type=BUF2_DOSHUF;
+      lib_bhead_p->hdr.seq_record_continuous = 0;
+      lib_buf2_dp = lib_bhead_p->buf2_data;
+      lib_buf2_rp = lib_bhead_p->buf2_res;
+
+      /* read sequences into shuffle buffer */
+
+      for (i = 0; i < ndiff; i++) {
+	bbp = tmp_bestp[i];
+	if (bbp->seq->aa1b == NULL) {
+	  /* get the sequence */
+	  (bbp->mseq->m_file_p->ranlib)(l_bline, sizeof(l_bline),
+				       bbp->mseq->lseek,bbp->mseq->libstr,bbp->mseq->m_file_p);
+	  n1 = re_getlib(aa1save,NULL, maxn,m_msp->ldb_info.maxt3,
+			 m_msp->ldb_info.l_overlap,bbp->mseq->cont,m_msp->ldb_info.term_code,
+			 &loffset,&l_off,bbp->mseq->m_file_p);
+
+	  /* fprintf(stderr, " %d gets %d %d\n",i,tmp_bestp[i]->seq->n1,n1); */
+
+	  memcpy(aa1shuff, aa1save, n1+1);
+	  bbp->seq->aa1b = aa1shuff;
+	  aa1shuff += n1 + 1;
+	}
+
+	/* lib_buf2_dp is used up by scores, the sequence is not sent multiple times */
+	cur_buf_size += bbp->seq->n1+1;
+	for (j = 0; j < shuff_mult; j++ ) {
+	  for (itt = m_msp->revcomp; itt <= m_msp->nitt1; itt++) {
+#ifdef PCOMPLIB
+	    lib_buf2_dp->seq_dup = 0;	/* mark first ->seq as original, not duplicate */
+#endif
+	    lib_buf2_dp->seq = bbp->seq;
+	    /* this invalidates lib_buf2_p->seq */
+	    lib_buf2_dp->stats_idx = istats++;
+	    lib_buf2_dp->frame = itt;
+	    lib_buf2_dp++;		/* point to next buf2 */
+	    lib_buf2_rp++;		/* point to next buf2 */
+	    lib_bhead_p->hdr.buf2_cnt++;
+
+	    if (lib_bhead_p->hdr.buf2_cnt >= max_do_cnt ||
+		cur_buf_size >= max_buf_size) {
+/* (2) send sequences for shuffling */
+#if defined(COMP_THR) || defined(PCOMPLIB)	/* threaded - fill and empty buffers */
+	      /* provide empty buffer to workers */
+	      lib_bhead_p->hdr.aa1b_used = cur_buf_size;
+	      lib_bhead_p->hdr.have_data = 1;
+	      lib_bhead_p->hdr.seq_record_continuous = 0;
+	      put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
+	      get_rbuf(&lib_bhead_p,m_bufi_p->max_work_buf);
+#else		/* non-thread - just do the searches */
+	      if (lib_bhead_p->hdr.buf2_type & BUF2_DOSHUF) {
+		buf_shuf_work(aa0,m_msp->n0, aa1save, lib_bhead_p,
+			      m_msp->nitt1, ppst, f_str, ppst->score_ix, rand_state);
+	      }
+#endif
+/* (3) save results in the rstats structure */
+	      if (lib_bhead_p->hdr.buf2_cnt > 0 && lib_bhead_p->hdr.have_results) {
+		save_shuf(lib_bhead_p,m_msp->nitt1,m_msp->shuff_max,ppst->score_ix,s_info);
+	      }
+
+	      lib_bhead_p->s_cnt_info.s_cnt[0] = lib_bhead_p->s_cnt_info.s_cnt[1] =
+		lib_bhead_p->s_cnt_info.s_cnt[2] = lib_bhead_p->s_cnt_info.tot_scores = 0;
+
+	      lib_bhead_p->hdr.buf2_cnt = 0;
+	      cur_buf_size = 0;
+	      lib_bhead_p->hdr.have_results = 0;
+	      lib_bhead_p->hdr.buf2_type=BUF2_DOSHUF;
+	      lib_bhead_p->hdr.seq_record_continuous = 0; /* seq_records are coming from bestptr in any order */
+	      lib_bhead_p->hdr.stop_work = 0;
+	      lib_buf2_dp = lib_bhead_p->buf2_data;
+	    }
+	  } /* for (itt .. */
+	}
+      }			/* done with tmp_bestp[] */
+      free(tmp_bestp);
+
+#if defined(COMP_THR) || defined(PCOMPLIB)	/* if COMP_THR/PCOMPLIB - fill and empty buffers */
+      /* check last buffers for any results */
+      lib_bhead_p->hdr.seq_record_continuous = 0;
+      put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
+
+      /* wait for the threads to finish */
+
+      wait_rbuf(m_bufi_p->max_work_buf);
+      /*
+      fprintf(stderr, " num_reader[%d]-empty[%d]: %d\tnrstats: %d\n",
+	      num_reader_bufs,empty_reader_bufs,
+	      num_reader_bufs-empty_reader_bufs, nrstats);
+      */
+
+      for (i=0; i < num_reader_bufs; i++) {
+	if (RESULTS_BUF[i]->hdr.buf2_cnt > 0 && RESULTS_BUF[i]->hdr.have_results) {
+	  save_shuf(RESULTS_BUF[i],m_msp->nitt1, m_msp->shuff_max, ppst->score_ix, s_info);
+	  RESULTS_BUF[i]->hdr.buf2_cnt = RESULTS_BUF[i]->hdr.have_results = 0;
+	}
+      }
+#else	/* just do the searches */
+      /* aa1save is used for shuffles, not aa1shuf, because aa1shuf
+	 has library sequences */
+      buf_shuf_work(aa0,m_msp->n0, aa1save, lib_bhead_p,
+		    m_msp->nitt1, ppst, f_str, ppst->score_ix, rand_state);
+
+      save_shuf(lib_bhead_p,m_msp->nitt1,m_msp->shuff_max, ppst->score_ix, s_info);
+      lib_bhead_p->hdr.buf2_cnt = lib_bhead_p->hdr.have_results = 0;
+#endif
+}
+
+/* buf_align_seq is structurally almost identical to buf_shuf_seq,
+   except that the appropriate sequences are pre-loaded into bbp->seq
+   (and ->bline), and it gets bbp->a_res, rather than scores */
+
+void
+buf_align_seq(unsigned char **aa0, int n0,
+	      struct beststr **bestp_arr, int nbest,
+	      struct pstruct *ppst, struct mngmsg *m_msp,
+	      struct mng_thr *m_bufi_p
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+	      , void **f_str
+#endif
+	     )
+{
+  struct beststr *bbp;
+  int max_align_cnt;
+  int i, n_pre_align;
+  int cur_buf_size, max_buf_size;
+  struct buf_head *lib_bhead_p;
+  struct buf2_data_s *lib_buf2_dp;
+  struct buf2_ares_s *lib_buf2_ap;
+
+  /* setup lib_bhead buffers for alignments */
+#if defined(COMP_THR) || defined(PCOMPLIB)	/* threaded */
+  /* max_do_cnt can be smaller than max_buf2_res, but not larger */
+#ifdef COMP_THR
+  max_align_cnt = min(m_bufi_p->max_buf2_res,
+		      nbest / (4 * fa_max_workers));
+#else
+  max_align_cnt = min(m_bufi_p->max_buf2_res, nbest / fa_max_workers);
+#endif
+  if (max_align_cnt < 1) max_align_cnt = 1;
+
+  get_rbuf(&lib_bhead_p,m_bufi_p->max_work_buf);
+#else	/* not threaded */
+  max_align_cnt = m_bufi_p->max_buf2_res;
+  lib_bhead_p = lib_buf2_list;  /* equivalent to un-threaded get_rbuf() */
+#endif
+
+  max_buf_size = lib_bhead_p->hdr.aa1b_size;
+  lib_bhead_p->hdr.buf2_cnt = 0;
+  lib_bhead_p->hdr.have_results = 0;
+  lib_bhead_p->hdr.stop_work = 0;
+  lib_bhead_p->hdr.buf2_type=BUF2_DOALIGN;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_ap = lib_bhead_p->buf2_ares;
+
+  /* read sequences into align buffer */
+
+  n_pre_align = 0;
+  cur_buf_size = 0;
+  for (i = 0; i < nbest; i++) {
+    bbp = bestp_arr[i];
+
+    /* this invalidates lib_buf2_p->seq */
+    lib_buf2_dp->seq = bbp->seq;
+    cur_buf_size += bbp->seq->n1+1;
+    lib_buf2_dp->frame = bbp->frame;
+    lib_buf2_dp->repeat_thresh = bbp->repeat_thresh;
+#ifdef PCOMPLIB
+    lib_buf2_dp->seq_dup = 0;
+#endif
+    lib_buf2_ap->have_ares = 0;
+    lib_buf2_ap->a_res = NULL;
+    lib_buf2_ap->best_idx = i;
+    lib_buf2_dp++;		/* point to next buf2_data */
+    lib_buf2_ap++;		/* point to next buf2_ares */
+    lib_bhead_p->hdr.buf2_cnt++;
+
+    if (lib_bhead_p->hdr.buf2_cnt >= max_align_cnt ||
+	cur_buf_size >= max_buf_size - m_msp->ldb_info.maxn) {
+/* (2) send sequences for alignment */
+#if defined(COMP_THR) || defined(PCOMPLIB)	/* threaded - fill and empty buffers */
+      /* provide empty buffer to workers */
+      lib_bhead_p->hdr.seqr_cnt = lib_bhead_p->hdr.buf2_cnt;	/* for alignments, they are the same */
+      lib_bhead_p->hdr.have_data = 1;
+      lib_bhead_p->hdr.aa1b_used = cur_buf_size;
+      lib_bhead_p->hdr.seq_record_continuous = 0;
+      put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
+      get_rbuf(&lib_bhead_p,m_bufi_p->max_work_buf);
+#else		/* non-thread - just do the searches */
+      buf_do_align(aa0, m_msp->n0, lib_bhead_p, ppst, m_msp, f_str);
+#endif
+
+/* (3) save alignments */
+      if (lib_bhead_p->hdr.buf2_cnt > 0 && lib_bhead_p->hdr.have_results) {
+	n_pre_align += save_align(lib_bhead_p,bestp_arr);
+      }
+
+      cur_buf_size = 0;
+      max_buf_size = lib_bhead_p->hdr.aa1b_size;
+      lib_bhead_p->hdr.buf2_cnt = 0;
+      lib_bhead_p->hdr.have_results = 0;
+      lib_bhead_p->hdr.buf2_type=BUF2_DOALIGN;
+      lib_bhead_p->hdr.stop_work = 0;
+      lib_buf2_dp = lib_bhead_p->buf2_data;
+      lib_buf2_ap = lib_bhead_p->buf2_ares;
+    }
+  }			/* done with bestp_arr[] */
+
+#if defined(COMP_THR) || defined(PCOMPLIB)	/* if COMP_THR - fill and empty buffers */
+  /* check last buffers for any results */
+  lib_bhead_p->hdr.seqr_cnt = lib_bhead_p->hdr.buf2_cnt;	/* for alignments, they are the same */
+  lib_bhead_p->hdr.have_data = 1;
+  lib_bhead_p->hdr.aa1b_used = cur_buf_size;
+  lib_bhead_p->hdr.seq_record_continuous = 0;
+  put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
+
+  /* wait for the threads to finish */
+
+  wait_rbuf(m_bufi_p->max_work_buf);
+
+  for (i=0; i < num_reader_bufs; i++) {
+    if (RESULTS_BUF[i]->hdr.buf2_cnt > 0 && RESULTS_BUF[i]->hdr.have_results) {
+      n_pre_align += save_align(RESULTS_BUF[i],bestp_arr);
+      RESULTS_BUF[i]->hdr.buf2_cnt = RESULTS_BUF[i]->hdr.have_results = 0;
+    }
+  }
+#else	/* just do the searches */
+  buf_do_align(aa0, m_msp->n0, lib_bhead_p, ppst, m_msp, f_str);
+  n_pre_align += save_align(lib_bhead_p,bestp_arr);
+  lib_bhead_p->hdr.buf2_cnt = lib_bhead_p->hdr.have_results = 0;
+#endif
+
+  m_msp->align_done = 1;
+
+  if (n_pre_align != nbest) {
+    fprintf(stderr,"*** error [%s:%d] -  n_pre_align:%d != nbest: %d\n",
+	    __FILE__, __LINE__, n_pre_align, nbest);
+  }
+  for (i=0; i < nbest; i++) {
+    if (bestp_arr[i]->a_res == NULL) {
+      fprintf(stderr, "*** error [%s:%d] - have NULL a_res: %d\n",
+	      __FILE__, __LINE__, i);
+    }
+  }
+}
+
+int
+check_seq_range(unsigned char *aa1b, int n1, int nsq, char *str) {
+  int i, range_error;
+  unsigned char *aa1p;
+
+  range_error = 0;
+  for (aa1p = aa1b, i=0; i < n1; i++, aa1p++) {
+    if (*aa1p > nsq) {
+      range_error = 1;
+      /*      fprintf(stderr, "%s seq %d (%c) out of range at %d\n",
+	      str, *aa1p, *aa1p,i);
+      */
+    }
+  }
+  return range_error;
+}
+
+struct stack_str {
+  void **stack;
+  int size;
+  int inc;
+  int top;
+};
+
+struct stack_str *init_stack(int size, int inc) {
+  struct stack_str *stack;
+
+  if ((stack=(struct stack_str *)calloc(1,sizeof(struct stack_str)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - cannot allocate stack\n",
+	    __FILE__, __LINE__);
+    return NULL;
+  }
+
+  if ((stack->stack=(void *)calloc(size,sizeof(void *)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - cannot allocate stack->stack[%d]\n",
+	    __FILE__, __LINE__,size);
+    free(stack);
+    return NULL;
+  }
+
+  stack->size = size;
+  stack->inc = inc;
+  stack->top = 0;
+  return stack;
+}
+
+void push_stack(struct stack_str *stack, void *value) {
+
+  if (!stack) return;
+  if (stack->top >= stack->size) {
+    stack->size += stack->inc;
+    if ((stack->stack = (void *)realloc(stack->stack, stack->size*sizeof(void *)))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot re-allocate stack to [%d]\n",
+	      __FILE__, __LINE__, stack->size);
+      return;
+    }
+  }
+  stack->stack[stack->top++] = value;
+}
+
+void * pop_stack(struct stack_str *stack) {
+  if (stack == NULL) {
+#ifdef DEBUG
+    fprintf(stderr," *** error [%s:%d] - pop_stack NULL stack\n",__FILE__, __LINE__);
+#endif
+    return NULL;
+  }
+
+  if (stack->top-- > 0) {
+    return stack->stack[stack->top];
+  }
+  else {
+    stack->top = 0;
+    return NULL;
+  }
+}
+
+void * free_stack(struct stack_str *stack) {
+  if (stack==NULL) return NULL;
+  if (stack->stack != NULL) free(stack->stack);
+  free(stack);
+  return NULL;
+}
+
+struct dyn_string_str *
+init_dyn_string(int size, int inc) {
+  struct dyn_string_str *dyn_string;
+
+  if ((dyn_string=(struct dyn_string_str *)calloc(1,sizeof(struct dyn_string_str)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - cannot allocate dyn_string\n",
+	    __FILE__, __LINE__);
+    return NULL;
+  }
+
+  if ((dyn_string->string=(void *)calloc(size,sizeof(void *)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - cannot allocate dyn_string->string[%d]\n",
+	    __FILE__, __LINE__,size);
+    free(dyn_string);
+    return NULL;
+  }
+
+  dyn_string->c_size = 0;
+  dyn_string->inc = inc;
+  dyn_string->mx_size = size;
+  return dyn_string;
+}
+
+void
+dyn_strcat(struct dyn_string_str *dyn_string, char *value) {
+  size_t add_len;
+
+  add_len = strlen(value);
+
+  if (!dyn_string) return;
+  if (add_len + dyn_string->c_size + 1 >= dyn_string->mx_size) {
+    while (dyn_string->inc < add_len) { dyn_string->inc *= 2; }
+    dyn_string->mx_size += dyn_string->inc;
+    if ((dyn_string->string = (void *)realloc(dyn_string->string, dyn_string->mx_size))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot re-allocate dyn_string to [%d]\n",
+	      __FILE__, __LINE__, dyn_string->mx_size);
+      dyn_string->mx_size = 0;
+      return;
+    }
+  }
+  SAFE_STRNCAT(dyn_string->string,value,dyn_string->mx_size);
+  dyn_string->c_size += add_len;
+}
+
+void dyn_strcpy(struct dyn_string_str *dyn_string, char *value) {
+  size_t add_len;
+
+  add_len = strlen(value);
+
+  if (!dyn_string) return;
+  if (add_len + 1>= dyn_string->mx_size) {
+    while (dyn_string->inc < add_len) { dyn_string->inc *= 2; }
+    dyn_string->mx_size += dyn_string->inc;
+    if ((dyn_string->string = (void *)realloc(dyn_string->string, dyn_string->mx_size))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot re-allocate dyn_string to [%d]\n",
+	      __FILE__, __LINE__, dyn_string->mx_size);
+      dyn_string->mx_size = 0;
+      return;
+    }
+  }
+  SAFE_STRNCPY(dyn_string->string,value,dyn_string->mx_size);
+}
+
+void free_dyn_string(struct dyn_string_str *dyn_string) {
+  if (dyn_string==NULL) return;
+  if (dyn_string->string != NULL) free(dyn_string->string);
+  free(dyn_string);
+}
+
+#include "a_mark.h"
+
+/* *itmp has the current alignment score, if *annot_arr[i_annot].label='V',
+     this can be increased (total increase in *v_delta)
+   *pam2aa0v[.value] gives possibly better pam score for variant
+   *ip is position in annotated sequence (&i0 for annot0_p)
+   *ia is position in aligned sequence (&i1 for annot0_p)
+   sp1 is the array for the (possibly modified) displayed sequence
+   sp1a is the array for the associated annotation
+   sq maps encoded residues to displayed characters
+   i_annot -- current annotation index in annot0_p->annot_arr_p[i_annot]
+   annot_arr = annot0/1_p->annot_arr_p
+   annot_stack = save current annotation
+   *have_push_features = set for annotations pushed in stack (not 'V')
+   *v_delta = change in score from variant at this position
+   **region_p = set for '[' region start
+   init_score -- used to initialize tmp_region_p->score.
+
+*/
+
+int
+next_annot_match(int *itmp, int *pam2aa0v, long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+		 int i_annot, int n_annot, struct annot_entry **annot_arr, char **ann_comment,
+		 void *annot_stack, int *have_push_features, int *v_delta,
+		 struct annot_entry **region_p, struct annot_entry *tmp_region_p,
+		 int init_score)  {
+  int v_tmp;
+
+  if (ann_comment) *ann_comment = NULL;
+
+  /* count through the annotations at this position (long ip) */
+
+  while (i_annot < n_annot && ip == annot_arr[i_annot]->pos) {
+    if (annot_arr[i_annot]->label == 'V') { /* label == 'V' */
+      v_tmp = pam2aa0v[annot_arr[i_annot]->value];
+      if (v_tmp > *itmp) {
+	*v_delta += (v_tmp- *itmp);
+	*itmp = v_tmp;
+	*sp1 = sq[annot_arr[i_annot]->value];
+	if (sp1a) *sp1a = 'V';
+	if (ann_comment) *ann_comment = annot_arr[i_annot]->comment;
+      }
+    }
+    else if (annot_arr[i_annot]->label == '[') {
+      /* region_p needs to point to a more sophisticated data
+	 structure that keeps track of all the current regions being
+	 updated
+
+	 to start, region_p could include a linked list and a pointer to
+	 the current left-most region, which would be used for ']'
+	 detection
+
+	 for efficiency, update the ->score only when a new
+	 (overlapping) region is started or stopped
+
+	 same for n_indent, n_aln
+      */
+
+      if (region_p) {
+	memcpy(tmp_region_p, annot_arr[i_annot],sizeof(struct annot_entry));
+        tmp_region_p->a_pos = ia;
+	tmp_region_p->score = init_score;
+	tmp_region_p->n_ident = tmp_region_p->n_aln = 0;
+	*region_p = tmp_region_p;
+      }
+    }
+    else if (annot_arr[i_annot]->label == ']') {
+      if (have_push_features) *have_push_features = 1;
+      push_stack(annot_stack, annot_arr[i_annot]);
+    }
+    else if (annot_stack) {
+      if (have_push_features) *have_push_features = 1;
+      push_stack(annot_stack, annot_arr[i_annot]);
+    }
+    i_annot++;
+  }  /* everything at this alignment position is checked */
+  return i_annot;
+}
+
+/* returns M_NEG, M_ZERO, M_POS, M_IDENT, M_DEL (a_mark.h)
+   updates *aln->nsim, npos, nident, nmismatch
+
+*/
+int align_type(int score, char sp0, char sp1, int nt_align, struct a_struct *aln, int pam_x_id_sim) {
+  int spa_val;
+
+  if (score<0) {
+    spa_val = M_NEG;
+  }
+  else if (score == 0) {
+    spa_val = M_ZERO;
+    if (aln) aln->nsim++;
+  }
+  else {
+    spa_val = M_POS;
+    if (aln) {aln->nsim++; aln->npos++;}
+  }
+
+  /* correct for score < 0 with 'N:N'/'X:X' */
+  if (pam_x_id_sim > 0) {	/* > 0 -> identical, similar */
+    if ((nt_align && toupper(sp0)=='N' && toupper(sp1)=='N') ||
+	(!nt_align && toupper(sp0)=='X' && toupper(sp1)=='X')) {
+      spa_val = M_POS;
+      if (aln) {
+	aln->nsim++;
+      }
+    }
+  }
+
+  if (aln) aln->nmismatch++;
+  if (toupper(sp0) == toupper(sp1)) {
+    spa_val = M_IDENT;
+    if (aln) {
+      aln->nident++;
+      aln->nmismatch--;
+    }
+  }
+  else if (nt_align) {
+    if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') ||
+	(toupper(sp0)=='U' && toupper(sp1)=='T')) {
+      spa_val = M_IDENT;
+      if (aln) {
+	aln->nident++;
+	aln->nmismatch--;
+      }
+    }
+    /* add to gap count for 'N' matches ?? */
+    else if (aln && toupper(sp0) == 'N') aln->ngap_q++;
+    else if (aln && toupper(sp1) == 'N') aln->ngap_l++;
+  }
+
+  /* correct nident, nmismatch for N:N / X:X */
+  if (pam_x_id_sim < 0) {	/* > 0 -> identical, similar */
+    if ((nt_align && toupper(sp0)=='N' && toupper(sp1)=='N') ||
+	(!nt_align && toupper(sp0)=='X' && toupper(sp1)=='X')) {
+      if (aln) {
+	aln->nident--;
+	aln->nmismatch++;
+      }
+    }
+  }
+
+  return spa_val;
+}
+
+/* seq_pos works with comment_var()/display_push_features()/do_url1() where
+   i_offset = nn for reversed sequences
+   off = 0 for 0 based offsets, 1 for 1-based offsets
+ */
+int
+seq_pos(int pos, int rev, int off) {
+
+  if (rev) {
+    return -pos-1 + off;
+  }
+  else {
+    return pos;
+  }
+}
+
+/* target = 0 (aa0), 1 (aa1)
+
+   d_type = display_type (annot_fmt in cal_cons.c):
+            1 (long text),   d1_fmt = " Variant: %d%c%c%d%c : %c%d%c";
+            2 (-m 9c code)   sprintf(tmp_str, "|%c%c:%ld%c%c%ld%c",
+
+   i0_pos/i1_pos have already been converted to reverse coordinate if necessary
+*/
+void comment_var (long i0_pos, char sp0, long i1_pos, char sp1, char o_sp1,
+		  char sim_char, const char *ann_comment,
+		  struct dyn_string_str *annot_var_dyn, int target, int d_type)
+{
+  char tmp_str[MAX_LSTR], tc, ann_ch0, ann_ch1;
+  char *d1_fmt;
+
+  if (d_type == 1) {
+    if (target ==1) {
+      d1_fmt = " Variant: %d%c%c%d%c : %c%d%c";
+      sprintf(tmp_str,d1_fmt,
+	      i0_pos+1, sp0, sim_char, i1_pos+1,sp1, o_sp1,i1_pos+1,sp1);
+    }
+    else {
+      d1_fmt = " qVariant: %d%c%c%d%c : %c%d%c";
+      sprintf(tmp_str,d1_fmt,
+	      i0_pos+1, sp0, sim_char, i1_pos+1,sp1, o_sp1,i1_pos+1,sp0);
+    }
+
+    /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+    dyn_strcat(annot_var_dyn, tmp_str);
+
+    if (ann_comment) {
+      sprintf(tmp_str," : %s",ann_comment);
+      /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+      dyn_strcat(annot_var_dyn, tmp_str);
+    }
+
+    /* SAFE_STRNCAT(annot_var_s,"\n",n_annot_var_s); */
+    dyn_strcat(annot_var_dyn, "\n");
+  }
+  else if (d_type == 2) {
+    if (target == 1) {
+      ann_ch0 = 'X';
+      ann_ch1 = 'V';
+    }
+    else {
+      ann_ch0 = 'V';
+      ann_ch1 = 'X';
+    }
+
+    sprintf(tmp_str, "|%c%c:%ld%c%c%ld%c",
+	    ann_ch0,ann_ch1,
+	    i0_pos+1,sp0, sim_char,i1_pos+1,sp1);
+    /* SAFE_STRNCAT(annot_var_s, tmp_str, n_annot_var_s); */
+    dyn_strcat(annot_var_dyn, tmp_str);
+  }
+}
+
+void
+display_push_features(void *annot_stack, struct dyn_string_str *annot_var_dyn,
+		      long i0_pos, char sp0, long i1_pos, char sp1, char sym,
+		      struct annot_entry **region0_p,
+		      struct annot_entry **region1_p,
+		      int tot_score, double comp, int n0, int n1,
+		      void *pstat_void, int d_type) {
+  struct annot_entry *this_annot_p;
+  double lbits, total_bits, zscore, lprob, lpercid;
+  char *ann_comment, *bp;
+  struct annot_entry *region_p;
+  char tmp_lstr[MAX_LSTR], ctarget, tmp_sstr[MAX_SSTR];
+  int q_min, q_max, l_min, l_max;
+  char *dt1_fmt, *dt2_fmt;
+
+  zscore = find_z(tot_score, 1.0, n1, comp, pstat_void);
+  total_bits = zs_to_bit(zscore, n0, n1);
+
+  while ((this_annot_p = (struct annot_entry *)pop_stack(annot_stack))!=NULL) {
+
+    if (this_annot_p->label == ']') {
+      if (this_annot_p->target == 1) {
+	region_p = *region1_p;
+	if (!region_p) {
+	  fprintf(stderr,"*** error [%s:%d] *** -- target==1 but region1_p is null\n",__FILE__, __LINE__);
+#ifdef DEBUG
+	  fprintf(stderr,"*** qtitle: %s\n",ext_qtitle);
+#endif
+	  continue;
+	}
+	q_min = region_p->a_pos+1;
+	l_min = region_p->pos+1;
+	dt2_fmt = "|XR:%d-%d:%d-%d:s=%d;b=%.1f;I=%.3f;Q=%.1f";
+      }
+      else {
+	region_p = *region0_p;
+	if (!region_p) {
+	  fprintf(stderr,"*** error [%s:%d] *** -- target==0 but region0_p is null\n",__FILE__, __LINE__);
+#ifdef DEBUG
+	  fprintf(stderr,"*** qtitle: %s\n",ext_qtitle);
+#endif
+	  continue;
+	}
+	q_min = region_p->pos+1;
+	l_min = region_p->a_pos+1;
+	dt2_fmt = "|RX:%d-%d:%d-%d:s=%d;b=%.1f;I=%.3f;Q=%.1f";
+      }
+
+      if (region_p->score < 0) {
+	lbits = 0.0;
+	lprob = 1.0;
+      }
+      else {
+	lbits = total_bits * (double)region_p->score/tot_score;
+	zscore = find_z(region_p->score, 1.0, n1, comp, pstat_void);
+	lprob = zs_to_p(zscore);
+      }
+
+      if (lprob > 0.99) lprob = 0.0;
+      else if (lprob < 1e-300) lprob = 3000.0;
+      else lprob = -10.0*log(lprob)/log(10.0);
+
+      if (region_p->n_aln > 0) {
+	lpercid = ((double)region_p->n_ident)/(double)region_p->n_aln;
+      }
+      else lpercid = -1.0;
+
+      if (d_type == 1) {
+	if (this_annot_p->target == 0) {dt1_fmt = " qRegion: %d-%d:%d-%d : score=%d; bits=%.1f; Id=%.3f; Q=%.1f :  %s\n";}
+	else {dt1_fmt = " Region: %d-%d:%d-%d : score=%d; bits=%.1f; Id=%.3f; Q=%.1f :  %s\n";}
+	sprintf(tmp_lstr, dt1_fmt, q_min, i0_pos+1,
+		l_min, i1_pos+1, region_p->score, lbits, lpercid, lprob,
+		(region_p->comment) ? region_p->comment : '\0');
+
+      }
+      else if (d_type == 2) {
+	sprintf(tmp_lstr,dt2_fmt,
+		q_min, i0_pos+1,
+		l_min, i1_pos+1, region_p->score, lbits,lpercid, lprob);
+
+	if (region_p->comment) {
+	  SAFE_STRNCPY(tmp_sstr,region_p->comment,sizeof(tmp_sstr));
+	  if ((bp=strchr(tmp_sstr,' '))!=NULL) { *bp = '\0';}
+	  SAFE_STRNCAT(tmp_lstr,";C=",sizeof(tmp_lstr));
+	  SAFE_STRNCAT(tmp_lstr,tmp_sstr,sizeof(tmp_lstr));
+	}
+      }
+      /* SAFE_STRNCAT(annot_var_s,tmp_lstr,n_annot_var_s); */
+      dyn_strcat(annot_var_dyn, tmp_lstr);
+      region_p->score = 0;
+      region_p = NULL;
+    }
+    else if ((ann_comment = this_annot_p->comment)) {
+      if (d_type == 1 ) {
+	if (this_annot_p->target == 0) {dt1_fmt = " qSite:%c : %d%c%c%d%c : %s\n";}
+	else {dt1_fmt = " Site:%c : %d%c%c%d%c : %s\n";}
+	sprintf(tmp_lstr,dt1_fmt, this_annot_p->label,i0_pos+1, sp0,
+		sym, i1_pos+1, sp1, ann_comment);
+	/* SAFE_STRNCAT(annot_var_s,tmp_lstr,n_annot_var_s); */
+	dyn_strcat(annot_var_dyn, tmp_lstr);
+      }
+    }
+  }
+}
diff --git a/src/compacc2e.c b/src/compacc2e.c
new file mode 100644
index 0000000..594b930
--- /dev/null
+++ b/src/compacc2e.c
@@ -0,0 +1,4316 @@
+/* $Id: compacc2.c 1280 2014-08-21 00:47:55Z wrp $ */
+/* $Revision: 1280 $  */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and
+   The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* Concurrent read version */
+
+#include <stdio.h>
+#include <stdlib.h>
+#if defined(UNIX)
+#include <unistd.h>
+#endif
+#if defined(UNIX) || defined(WIN32)
+#include <sys/types.h>
+#endif
+
+#include <limits.h>
+#include <ctype.h>
+#include <float.h>
+
+#include <string.h>
+#include <time.h>
+#include <math.h>
+
+#include "defs.h"
+#include "param.h"
+#include "structs.h"
+
+#include "mm_file.h"
+#include "best_stats.h"
+
+#define XTERNAL
+#include "uascii.h"
+#include "upam.h"
+#undef XTERNAL
+
+#ifdef DEBUG
+extern char ext_qtitle[];
+#endif
+
+extern void abort ();
+
+#include "drop_func.h"	/* get init_work() */
+/* drop_func.h includes dyn_string.h */
+
+void revcomp(unsigned char *seq, int n, int *c_nt);
+extern void qshuffle(unsigned char *aa0, int n0, int nm0, void *);
+#ifdef DEBUG
+unsigned long adler32(unsigned long, const unsigned char *, unsigned int);
+#endif
+
+void
+s_annot_to_aa1a(long offset, int n1, struct annot_str *annot_p, unsigned char *ann_arr, char *tmp_line);
+
+extern void add_annot_def(struct mngmsg *m_msp, char *line, int qa_flag);
+int add_annot_char(unsigned char *ann_arr, char ctmp_label);
+
+int get_annot(char *sname, struct mngmsg *, char *bline, long offset, int n1, 
+	      struct annot_str **annot_p,int target, int debug);
+int
+get_annot_list(char *sname, struct mngmsg *m_msp, struct beststr **bestp_arr,
+	       int nbest,int target, int debug);
+void
+print_sum(FILE *fd, struct db_str *qtt, struct db_str *ntt, int in_mem, long mem_use);
+int
+check_seq_range(unsigned char *aa1b, int n1, int nsq, char *str);
+/* print timing information */
+extern void ptime (FILE *, long);
+
+/* this function consolidates code in comp_lib4.c for non-threaded, and in
+   work_thr2.c (threads) and work_comp2.c (worker nodes)
+*/
+
+void
+init_aa0(unsigned char **aa0, int n0, int nm0,
+	 unsigned char **aa0s, unsigned char **aa1s,
+	 int qframe, int qshuffle_flg, int max_tot,
+	 struct pstruct *ppst, void **f_str, void **qf_str,
+	 void *my_rand_state) {
+  int id;
+
+  /* note that aa[5,4,3,2] are never used, but are provided so that frame
+     can range from 0 .. 5; likewise for f_str[5..2] */
+
+  aa0[5] = aa0[4] = aa0[3] = aa0[2] = aa0[1] = aa0[0];
+
+  /* zero out for SSE2/ALTIVEC -- make sure this is ALWAYS done */
+  for (id=0; id < SEQ_PAD; id++) aa0[0][n0+id] = '\0';
+
+  init_work (aa0[0], n0, ppst, &f_str[0]);
+  f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] = f_str[0];
+
+  if (qframe == 2) {
+    if ((aa0[1]=(unsigned char *)calloc((size_t)n0+2+SEQ_PAD,sizeof(unsigned char)))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot allocate aa01[%d]\n", __FILE__, __LINE__, n0);
+    }
+    *aa0[1]='\0';
+    aa0[1]++;
+    memcpy(aa0[1],aa0[0],n0+1);
+    /* for ALTIVEC/SSE2, must pad with 16 NULL's */
+    for (id=0; id<SEQ_PAD; id++) {aa0[1][n0+id]=0;}
+    revcomp(aa0[1],n0,ppst->c_nt);
+    init_work (aa0[1], n0, ppst, &f_str[1]);
+  }
+
+  if (qshuffle_flg) {
+    if ((*aa0s=(unsigned char *)calloc(n0+2+SEQ_PAD,sizeof(char)))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot allocate aa0s[%d]\n",__FILE__, __LINE__, n0+2);
+      exit(1);
+    }
+    **aa0s='\0';
+    (*aa0s)++;
+    memcpy(*aa0s,aa0[0],n0);
+    qshuffle(*aa0s,n0,nm0, my_rand_state);
+    /* for SSE2/ALTIVEC, must pad with 16 NULL's */
+    for (id=0; id<SEQ_PAD; id++) {(*aa0s)[n0+id]=0;}
+    init_work (*aa0s, n0, ppst, qf_str);
+  }
+
+  /* always allocate shuffle space */
+  if((*aa1s=calloc(max_tot+1,sizeof(char))) == NULL) {
+    fprintf(stderr,"*** error [%s:%d] - unable to allocate shuffled library sequence [%d]\n", __FILE__, __LINE__, max_tot);
+    exit(1);
+  }
+  else {
+    **aa1s=0;
+    (*aa1s)++;
+  }
+}
+
+/* because it is used to pre-allocate space, maxn has various
+   constraints.  For "simple" comparisons, it is simply the length of
+   the longest library sequence.  But for translated comparisons, it
+   must be 3 or 6X the length of the query sequence.
+
+   In addition, however, it can be reduced to make certain that
+   sequences are read in smaller chunks.  And, maxn affect how large
+   overlaps must be when sequences are read in chunks.
+*/
+
+int
+reset_maxn(struct mngmsg *m_msp, int over_len, int maxn) {
+
+  /* reduce maxn if requested */
+  if (m_msp->ldb_info.maxn > 0 && m_msp->ldb_info.maxn < maxn) maxn = m_msp->ldb_info.maxn;
+
+  if (m_msp->qdnaseq==m_msp->ldb_info.ldnaseq || m_msp->qdnaseq==SEQT_DNA ||
+      m_msp->qdnaseq == SEQT_RNA) {/* !TFAST - either FASTA or FASTX */
+
+    if (m_msp->n0 > m_msp->max_tot - m_msp->ldb_info.dupn) {
+      fprintf(stderr,"*** error [%s:%d] -  query sequence is too long %d > %d - %d %s\n",
+	      __FILE__, __LINE__,
+	      m_msp->n0,
+	      m_msp->max_tot, m_msp->ldb_info.dupn,
+	      m_msp->sqnam);
+      exit(1);
+    }
+
+    m_msp->ldb_info.l_overlap = over_len;
+    m_msp->ldb_info.maxt3 = maxn-m_msp->ldb_info.l_overlap;
+  }
+  else {	/* is TFAST */
+    if (m_msp->n0 > MAXTST) {
+      fprintf(stderr,"*** error [%s:%d] -  query sequence is too long %d %s\n",
+	      __FILE__, __LINE__, m_msp->n0,m_msp->sqnam);
+      exit(1);
+    }
+
+    if (m_msp->n0*3 > maxn ) {	/* n0*3 for the three frames - this
+				   will only happen if maxn has been
+				   set low manually */
+
+      if (m_msp->n0*4+2 < m_msp->max_tot) { /* m_msg0*3 + m_msg0 */
+	fprintf(stderr,
+		"*** error [%s:%d] - query sequence too long for library segment: %d - resetting to %d\n",
+		__FILE__, __LINE__,
+		maxn,m_msp->n0*3);
+	maxn = m_msp->ldb_info.maxn = m_msp->n0*3;
+      }
+      else {
+	fprintf(stderr,"*** error [%s:%d] -  query sequence too long for translated search: %d * 4 > %d %s\n",
+		__FILE__, __LINE__, m_msp->n0,maxn, m_msp->sqnam);
+	exit(1);
+      }
+    }
+
+    /* set up some constants for overlaps */
+    m_msp->ldb_info.l_overlap = 3*over_len;
+    m_msp->ldb_info.maxt3 = maxn-m_msp->ldb_info.l_overlap-3;
+    m_msp->ldb_info.maxt3 -= m_msp->ldb_info.maxt3%3;
+    m_msp->ldb_info.maxt3++;
+
+    maxn = maxn - 3; maxn -= maxn%3; maxn++;
+  }
+  return maxn;
+}
+
+
+int
+scanseq(unsigned char *seq, int n, char *str) {
+  int tot,i;
+  char aaray[128];		/* this must be set > nsq */
+
+  for (i=0; i<128; i++)  aaray[i]=0;
+  for (i=0; i < (int)strlen(str); i++) aaray[qascii[str[i]]]=1;
+  for (i=tot=0; i<n; i++) tot += aaray[seq[i]];
+  return tot;
+}
+
+/* subs_env takes a string, possibly with ${ENV}, and looks up all the
+   potential environment variables and substitutes them into the
+   string */
+
+void subs_env(char *dest, char *src, int dest_size) {
+  char *last_src, *bp, *bp1;
+
+  last_src = src;
+
+  if ((bp = strchr(src,'$'))==NULL) {
+    strncpy(dest, src, dest_size);
+    dest[dest_size-1] = '\0';
+  }
+  else {
+    *dest = '\0';
+    while (strlen(dest) < dest_size-1 && bp != NULL ) {
+      /* copy stuff before ${*/
+      *bp = '\0';
+      strncpy(dest, last_src, dest_size);
+      *bp = '$';
+
+      /* copy ENV */
+      if (*(bp+1) != '{') {
+	strncat(dest, "$", dest_size - strlen(dest) -1);
+	dest[dest_size-1] = '\0';
+	bp += 1;
+      }
+      else {	/* have  ${ENV} - put it in */
+	if ((bp1 = strchr(bp+2,'}'))==NULL) {
+	  fprintf(stderr, "*** error [%s:%d] - Unterminated ENV: %s\n",
+		  __FILE__, __LINE__, src);
+	  break;
+	}
+	else {
+	  *bp1 = '\0';
+	  if (getenv(bp+2)!=NULL) {
+	    strncat(dest, getenv(bp+2), dest_size - strlen(dest) - 1);
+	    dest[dest_size-1] = '\0';
+	    *bp1 = '}';
+	  }
+	  bp = bp1+1;	/* bump bp even if getenv == NULL */
+	}
+      }
+      last_src = bp;
+
+      /* now get the next ${ENV} if present */
+      bp = strchr(last_src,'$');
+    }
+    /* now copy the last stuff */
+    strncat(dest, last_src, dest_size - strlen(dest) - 1);
+    dest[dest_size-1]='\0';
+  }
+}
+
+
+void
+selectbest(struct beststr **bptr, int k, int n)	/* k is rank in array */
+{
+  int v, i, j, l, r;
+  struct beststr *tmptr;
+
+  l=0; r=n-1;
+
+  while ( r > l ) {
+    v = bptr[r]->rst.score[0];
+    i = l-1;
+    j = r;
+    do {
+      while (bptr[++i]->rst.score[0] > v) ;
+      while (bptr[--j]->rst.score[0] < v) ;
+      tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
+    } while (j > i);
+    bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
+    if (i>=k) r = i-1;
+    if (i<=k) l = i+1;
+  }
+}
+
+void
+selectbestz(struct beststr **bptr, int k, int n)	/* k is rank in array */
+{
+  int i, j, l, r;
+  struct beststr *tmptr;
+  double v;
+
+  l=0; r=n-1;
+
+  while ( r > l ) {
+    v = bptr[r]->zscore;
+    i = l-1;
+    j = r;
+    do {
+      while (bptr[++i]->zscore > v) ;
+      while (bptr[--j]->zscore < v) ;
+      tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
+    } while (j > i);
+    bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
+    if (i>=k) r = i-1;
+    if (i<=k) l = i+1;
+  }
+}
+
+/* improved shellsort with high-performance increments */
+/*
+shellsort(itemType a[], int l, int r)
+{ int i, j, k, h; itemType v;
+ int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
+		  13776, 4592, 1968, 861, 336,
+		  112, 48, 21, 7, 3, 1 };
+ for ( k = 0; k < 16; k++)
+   for (h = incs[k], i = l+h; i <= r; i++) {
+       v = a[i]; j = i;
+       while (j > h && a[j-h] > v) {
+         a[j] = a[j-h]; j -= h;
+       }
+       a[j] = v;
+     }
+}
+*/
+
+/* ?improved? version of sortbestz using optimal increments and fewer
+   exchanges */
+void sortbestz(struct beststr **bptr, int nbest)
+{
+  int gap, i, j, k;
+  struct beststr *tmp;
+  double v;
+  int incs[14] = { 198768, 86961, 33936,
+		   13776, 4592, 1968, 861, 336,
+		   112, 48, 21, 7, 3, 1 };
+
+  for ( k = 0; k < 14; k++) {
+    gap = incs[k];
+    for (i=gap; i < nbest; i++) {
+      tmp = bptr[i];
+      j = i;
+      v = bptr[i]->zscore;
+      while ( j >= gap && bptr[j-gap]->zscore < v) {
+	bptr[j] = bptr[j - gap];
+	j -= gap;
+      }
+      bptr[j] = tmp;
+    }
+  }
+}
+
+
+/* sort based on sequence index */
+void sortbesti(struct beststr **bptr, int nbest)
+{
+  int gap, i, j, k;
+  struct beststr *tmp;
+  double v;
+  int incs[12] = { 33936, 13776, 4592, 1968, 861, 336,
+		   112, 48, 21, 7, 3, 1 };
+
+  for ( k = 0; k < 12; k++) {
+    gap = incs[k];
+    for (i=gap; i < nbest; i++) {
+      tmp = bptr[i];
+      j = i;
+      v = bptr[i]->seq->index;
+      while ( j >= gap && bptr[j-gap]->seq->index < v) {
+	bptr[j] = bptr[j - gap];
+	j -= gap;
+      }
+      bptr[j] = tmp;
+    }
+  }
+}
+
+void
+sortbeste(struct beststr **bptr, int nbest)
+{
+  int gap, i, j, k;
+  struct beststr *tmp;
+  double v;
+  int incs[14] = { 198768, 86961, 33936,
+		   13776, 4592, 1968, 861, 336,
+		   112, 48, 21, 7, 3, 1 };
+
+  for ( k = 0; k < 14; k++) {
+    gap = incs[k];
+    for (i=gap; i < nbest; i++) {
+      j = i;
+      tmp = bptr[i];
+      v = tmp->rst.escore;
+      while ( j >= gap && bptr[j-gap]->rst.escore > v) {
+	bptr[j] = bptr[j - gap];
+	j -= gap;
+      }
+      bptr[j] = tmp;
+    }
+  }
+
+  /* sometimes there are many high scores with E()==0.0, sort
+     those by z() score */
+
+  j = 0;
+  while (j < nbest && bptr[j]->rst.escore <= 2.0*DBL_MIN ) {j++;}
+  if (j > 1) sortbestz(bptr,j);
+}
+
+extern char *prog_func;
+extern char *verstr, *iprompt0, *refstr, *mp_verstr;
+extern long tstart, tscan, tprev, tdone;	/* Timing */
+#ifdef COMP_MLIB
+extern long ttscan, ttdisp;
+#endif
+extern time_t tdstart, tddone;
+
+/* ****************************************************************
+   print command line arguments (argv_line)
+   possibly HTML header
+   !BLAST
+     please cite
+     version
+   BLAST
+     Reference version
+**************************************************************** */
+void
+print_header1(FILE *fd, const char *argv_line,
+	      const struct mngmsg *m_msp, const struct pstruct *ppst) {
+  int i;
+
+#ifdef PGM_DOC
+  if (!(m_msp->markx & (MX_M8OUT+MX_MBLAST2)) || (m_msp->markx & MX_M8COMMENT)) fprintf(fd, "#%s\n",argv_line);
+#endif
+
+  if (m_msp->markx & MX_M11OUT) {
+    fprintf(fd, "#:lav\n\nd {\n   \"%s\"\n}\n",argv_line+1);
+  }
+
+  if (m_msp->markx & MX_HTML) {
+#ifdef HTML_HEAD
+    fprintf(fd,"<html>\n<head>\n<title>%s Results</title>\n</head>\n<body>\n",prog_func);
+#endif
+    fprintf(fd,"<pre>\n");
+  }
+
+  if (m_msp->std_output) {
+    fprintf(fd,"%s\n",iprompt0);
+    if (refstr != NULL && refstr[0] != '\0') {
+      fprintf(fd," version %s%s\nPlease cite:\n %s\n",verstr,mp_verstr,refstr);
+    }
+    else {
+      fprintf(fd," version %s%s\n",verstr,mp_verstr);
+    }
+  }
+
+  if (m_msp->markx & MX_MBLAST2) {
+    if (refstr != NULL && refstr[0] != '\0') {
+      fprintf(fd,"%s %s%s\n\nReference: %s\n\n", prog_func, verstr, mp_verstr, refstr);
+    }
+    else {
+      fprintf(fd,"%s %s%s\n\n", prog_func, verstr, mp_verstr);
+    }
+  }
+
+  fflush(fd);
+}
+
+/* ****************************************************************
+   MX_HTML: <pre>
+   Query:
+     1>>>accession description # aa
+   Annotation:
+   Library:
+**************************************************************** */
+void
+print_header2(FILE *fd, int qlib, char *info_qlabel, unsigned char **aa0,
+	      const struct mngmsg *m_msp, const struct pstruct *ppst,
+	      const char * info_lib_range_p) {
+  int j;
+  char tmp_str[MAX_STR];
+  double db_tt;
+
+  /* if (m_msp->markx & MX_HTML) fputs("<pre>\n",fd); */
+
+  if (m_msp->std_output) {
+    if (qlib==1) {
+      fprintf(fd,"Query: %s\n", m_msp->tname);
+    }
+
+    if (m_msp->qdnaseq == SEQT_DNA || m_msp->qdnaseq == SEQT_RNA) {
+      strncpy(tmp_str,(m_msp->qframe==1)? " (forward-only)" : "\0",sizeof(tmp_str));
+      tmp_str[sizeof(tmp_str)-1]='\0';
+    }
+    else tmp_str[0]='\0';
+
+    fprintf(fd,"%3d>>>%s%s\n", qlib,
+	   m_msp->qtitle,
+	   (m_msp->revcomp ? " (reverse complement)" : tmp_str));
+
+    /* check for annotation */
+    if (m_msp->ann_flg && m_msp->aa0a != NULL) {
+      fprintf(fd,"Annotation: ");
+      for (j=0; j<m_msp->n0; j++) {
+	if (m_msp->aa0a[j] && m_msp->ann_arr[m_msp->aa0a[j]] != ' ' ) {
+	  fprintf(fd,"|%ld:%c%c",
+		  j+m_msp->q_off,m_msp->ann_arr[m_msp->aa0a[j]],ppst->sq[aa0[0][j]]);
+	}
+      }
+    fprintf(fd,"\n");
+    }
+
+    fprintf(fd,"Library: %s%s\n", m_msp->ltitle,info_lib_range_p);
+
+    if (m_msp->db.carry==0) {
+      fprintf(fd, "  %7ld residues in %5ld sequences\n", m_msp->db.length, m_msp->db.entries);
+    }
+    else {
+      db_tt = (double)m_msp->db.carry*(double)LONG_MAX + (double)m_msp->db.length;
+      fprintf(fd, "  %.0f residues in %5ld library sequences\n", db_tt, m_msp->db.entries);
+    }
+
+  }
+  else {
+    if ((m_msp->markx & (MX_M8OUT + MX_M8COMMENT)) == (MX_M8OUT+MX_M8COMMENT)) {
+      fprintf(fd,"# %s %s%s\n",prog_func,verstr,mp_verstr);
+      fprintf(fd,"# Query: %s\n",m_msp->qtitle);
+      fprintf(fd,"# Database: %s\n",m_msp->ltitle);
+    }
+  }
+  if (m_msp->markx & MX_HTML) fputs("</pre>\n",fd);
+  fflush(fd);
+}
+
+/* **************************************************************** */
+/*   before showbest                                                */
+/* **************************************************************** */
+void print_header3(FILE *fd, int qlib, struct mngmsg *m_msp, struct pstruct *ppst) {
+
+    if (m_msp->markx & MX_MBLAST2) {
+      if (qlib == 1) {
+	fprintf(fd, "\nDatabase: %s\n     %12ld sequences; %ld total letters\n\n\n",
+		m_msp->ltitle, m_msp->db.entries, m_msp->db.length);
+      }
+      fprintf(fd, "\nQuery= %s\nLength=%d\n", m_msp->qtitle, m_msp->n0);
+    }
+}
+
+
+/* **************************************************************** */
+/* alignment tranistion                                             */
+/* **************************************************************** */
+void print_header4(FILE *fd, char *info_qlabel, char *argv_line, char *info_gstring3, char *info_hstring_p[2],
+		   struct mngmsg *m_msp, struct pstruct *ppst) {
+
+	if (m_msp->std_output && (m_msp->markx & (MX_AMAP+ MX_HTML + MX_M9SUMM)) && !(m_msp->markx & MX_M10FORM)) {
+	  fprintf(fd,"\n>>>%s%s, %d %s vs %s library\n",
+		  info_qlabel,(m_msp->revcomp ? "_rev":"\0"), m_msp->n0,
+		  m_msp->sqnam,m_msp->lname);
+	}
+
+	if (m_msp->markx & MX_M10FORM) {
+	  fprintf(fd,"\n>>>%s%s, %d %s vs %s library\n",
+		  info_qlabel,(m_msp->revcomp ? "-":"\0"), m_msp->n0, m_msp->sqnam,
+		  m_msp->lname);
+	  fprintf(fd,"; pg_name: %s\n",m_msp->pgm_name);
+	  fprintf(fd,"; pg_ver: %s%s\n",verstr,mp_verstr);
+	  fprintf(fd,"; pg_argv: %s",argv_line);
+	  fputs(info_gstring3,fd);
+	  fputs(info_hstring_p[0],fd);
+	  fputs(info_hstring_p[1],fd);
+	}
+}
+
+void print_header4a(FILE *outfd, struct mngmsg *m_msp) {
+  if (!(m_msp->markx & MX_M8OUT) && (m_msp->markx & (MX_M10FORM+MX_M9SUMM)) && m_msp->show_code != SHOW_CODE_ID && m_msp->show_code != SHOW_CODE_IDD) {
+    fprintf(outfd,">>><<<\n");
+  }
+}
+
+void print_header5(FILE *fd, int qlib, struct db_str *qtt,
+		   struct mngmsg *m_msp, struct pstruct *ppst,
+		   int in_mem, long tot_memK) {
+
+  /* for MX_MBLAST2, show some statistics results */
+  if (m_msp->markx & MX_MBLAST2) {
+    fprintf(fd,"\n\nLambda      K     H\n");
+    fprintf(fd," %6.3f  %6.3f  %6.3f\n\n",ppst->pLambda,ppst->pK,ppst->pH);
+    fprintf(fd,"\nGapped\nLambda\n");
+    fprintf(fd," %6.3f  %6.3f  %6.3f\n",ppst->pLambda,ppst->pK,ppst->pH);
+    fprintf(fd,"\nEffective search space used: %ld\n\n",m_msp->db.entries);
+  }
+
+  if (m_msp->markx & MX_M8COMMENT) {
+    fprintf(fd, "# %s processed %d queries\n",prog_func,qlib);
+  }
+
+  if ( !((m_msp->markx & MX_M8OUT) || (m_msp->markx & MX_HTML))
+       && (m_msp->markx & (MX_M10FORM+MX_M9SUMM))) {
+    fprintf(fd,">>>///\n");
+  }
+
+  if ( m_msp->markx & MX_HTML) fputs("<pre>",fd);
+  if (m_msp->std_output) {
+    print_sum(fd, qtt, &m_msp->db, in_mem, tot_memK);}
+  if ( m_msp->markx & MX_HTML) fputs("</pre>\n",fd);
+#ifdef HTML_HEAD
+  if (m_msp->markx & MX_HTML) fprintf(fd,"</body>\n</html>\n");
+#endif
+
+  if (m_msp->markx & MX_MBLAST2) {
+      fprintf(fd,"\n  Database: %s\n",m_msp->ltitle);
+      fprintf(fd,"  Number of letters in database: %ld\n",m_msp->db.length);
+      fprintf(fd,"  Number of sequences in database: %ld\n",m_msp->db.entries);
+      fprintf(fd,"\n\n\nMatrix: %s\n",ppst->pam_name);
+      fprintf(fd,"Gap Penalties: Existence: %d, Extension: %d\n",ppst->gdelval, ppst->ggapval);
+  }
+}
+
+void
+print_annot_header(FILE *fd, struct mngmsg *m_msp) {
+  int i;
+
+  if (m_msp->ann_arr_def[1]) {
+    if (m_msp->markx & MX_HTML) {fprintf(fd,"<pre>");}
+    fprintf(fd, "Annotation symbols:\n");
+    for (i=1; m_msp->ann_arr[i]; i++) {
+      if (m_msp->ann_arr_def[i]) {
+	fprintf(fd, " %c : %s\n",m_msp->ann_arr[i], m_msp->ann_arr_def[i]);
+      }
+    }
+    if (m_msp->markx & MX_HTML) {fputs("</pre><hr />\n",fd);}
+  }
+}
+
+extern int fa_max_workers;
+
+void
+print_sum(FILE *fd, struct db_str *qtt, struct db_str *ntt, int in_mem, long tot_memK)
+{
+  double db_tt;
+  char tstr1[26], tstr2[26];
+  char memstr[256];
+
+  strncpy(tstr1,ctime(&tdstart),sizeof(tstr1));
+  strncpy(tstr2,ctime(&tddone),sizeof(tstr1));
+  tstr1[24]=tstr2[24]='\0';
+
+  /* Print timing to output file as well */
+
+  fprintf(fd, "\n%ld residues in %ld query   sequences\n", qtt->length, qtt->entries);
+  if (ntt->carry == 0)
+    fprintf(fd, "%ld residues in %ld library sequences\n", ntt->length, ntt->entries);
+  else {
+    db_tt = (double)ntt->carry*(double)LONG_MAX + (double)ntt->length;
+    fprintf(fd, "%.0f residues in %ld library sequences\n", db_tt, ntt->entries);
+  }
+
+  memstr[0]='\0';
+  if (tot_memK && in_mem != 0) {
+    sprintf(memstr," in memory [%ldG]",(tot_memK >> 20));
+  }
+
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+  fprintf(fd," Scomplib [%s%s]\n start: %s done: %s\n",verstr,mp_verstr,tstr1,tstr2);
+#endif
+#if defined(COMP_THR)
+  fprintf(fd," Tcomplib [%s%s] (%d proc%s)\n start: %s done: %s\n", verstr, mp_verstr,
+	  fa_max_workers, memstr, tstr1,tstr2);
+#endif
+#if defined(PCOMPLIB)
+  fprintf(fd," Pcomplib [%s%s] (%d proc%s)\n start: %s done: %s\n", verstr, mp_verstr,
+	  fa_max_workers, memstr, tstr1,tstr2);
+#endif
+#ifndef COMP_MLIB
+  fprintf(fd," Scan time: ");
+  ptime(fd, tscan - tprev);
+  fprintf (fd," Display time: ");
+  ptime (fd, tdone - tscan);
+#else
+  fprintf(fd," Total Scan time: ");
+  ptime(fd, ttscan);
+  fprintf (fd," Total Display time: ");
+  ptime (fd, ttdisp);
+#endif
+  fprintf (fd,"\n");
+  fprintf (fd, "\nFunction used was %s [%s%s]\n", prog_func,verstr,mp_verstr);
+}
+
+extern double zs_to_Ec(double zs, long entries);
+extern double zs_to_bit(double zs, int n0, int n1);
+extern double zs_to_p(double zs);
+
+#include "aln_structs.h"
+
+void
+prhist(FILE *fd, const struct mngmsg *m_msp,
+       struct pstruct *ppst,
+       struct hist_str hist,
+       int nstats, int sstats,
+       struct db_str ntt,
+       char *stat_info2,
+       char *lib_range,
+       char **info_gstring2,
+       char **info_hstring,
+       long tscan)
+{
+  int i,j,hl,hll, el, ell, ev;
+  char hline[80], pch, *bp;
+  int mh1, mht;
+  int maxval, maxvalt, dotsiz, ddotsiz,doinset;
+  double cur_e, prev_e, f_int;
+  double max_dev, x_tmp;
+  double db_tt;
+  int n_chi_sq, cum_hl=0, max_i=0, max_dev_i;
+  double zs10_off;
+
+
+  if (m_msp->markx & MX_HTML) fputs("<pre>\n",fd);
+  else {fprintf(fd,"\n");}
+
+  if (ppst->zsflag_f < 0) {
+    if (!m_msp->nohist) {
+      fprintf(fd, "  %7ld residues in %5ld sequences", ntt.length,ntt.entries);
+      fprintf(fd, "%s\n",lib_range);
+    }
+    fprintf(fd,"Algorithm: %s\nParameters: %s\n",info_gstring2[0],info_gstring2[1]);
+    return;
+  }
+
+  if (nstats > 20) {
+    zs10_off = ppst->zs_off * 10.0;
+
+    max_dev = 0.0;
+    mh1 = hist.maxh-1;			/* max value for histogram */
+    mht = (3*hist.maxh-3)/4 - 1;	/* x-coordinate for expansion */
+    n_chi_sq = 0;
+
+    if (!m_msp->nohist && mh1 > 0) {
+      for (i=0,maxval=0,maxvalt=0; i<hist.maxh; i++) {
+	if (hist.hist_a[i] > maxval) maxval = hist.hist_a[i];
+	if (i >= mht &&  hist.hist_a[i]>maxvalt) maxvalt = hist.hist_a[i];
+      }
+      cum_hl = -hist.hist_a[0];
+      dotsiz = (maxval-1)/60+1;
+      ddotsiz = (maxvalt-1)/50+1;
+      doinset = (ddotsiz < dotsiz && dotsiz > 2);
+
+      if (ppst->zsflag_f>=0)
+	fprintf(fd,"       opt      E()\n");
+      else
+	fprintf(fd,"     opt\n");
+
+      prev_e =  zs_to_Ec((double)(hist.min_hist-hist.histint/2)-zs10_off,hist.entries);
+      for (i=0; i<=mh1; i++) {
+	pch = (i==mh1) ? '>' : ' ';
+	pch = (i==0) ? '<' : pch;
+	hll = hl = hist.hist_a[i];
+	if (ppst->zsflag_f>=0) {
+	  cum_hl += hl;
+	  f_int = (double)(i*hist.histint+hist.min_hist)+(double)hist.histint/2.0;
+	  cur_e = zs_to_Ec(f_int-zs10_off,hist.entries);
+	  ev = el = ell = (int)(cur_e - prev_e + 0.5);
+	  if (hl > 0  && i > 5 && i < (90-hist.min_hist)/hist.histint) {
+	    x_tmp  = fabs(cum_hl - cur_e);
+	    if ( x_tmp > max_dev) {
+	      max_dev = x_tmp;
+	      max_i = i;
+	    }
+	    n_chi_sq++;
+	  }
+	  if ((el=(el+dotsiz-1)/dotsiz) > 60) el = 60;
+	  if ((ell=(ell+ddotsiz-1)/ddotsiz) > 40) ell = 40;
+	  fprintf(fd,"%c%3d %5d %5d:",
+		  pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
+		  mh1*hist.histint+hist.min_hist,hl,ev);
+	}
+	else fprintf(fd,"%c%3d %5d :",
+		     pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
+		     mh1*hist.histint+hist.min_hist,hl);
+
+	if ((hl=(hl+dotsiz-1)/dotsiz) > 60) hl = 60;
+	if ((hll=(hll+ddotsiz-1)/ddotsiz) > 40) hll = 40;
+	for (j=0; j<hl; j++) hline[j]='=';
+	if (ppst->zsflag_f>=0) {
+	  if (el <= hl ) {
+	    if (el > 0) hline[el-1]='*';
+	    hline[hl]='\0';
+	  }
+	  else {
+	    for (j = hl; j < el; j++) hline[j]=' ';
+	    hline[el-1]='*';
+	    hline[hl=el]='\0';
+	  }
+	}
+	else hline[hl] = 0;
+	if (i==1) {
+	  for (j=hl; j<10; j++) hline[j]=' ';
+	  sprintf(&hline[10]," one = represents %d library sequences",dotsiz);
+	}
+	if (doinset && i == mht-2) {
+	  for (j = hl; j < 10; j++) hline[j]=' ';
+	  sprintf(&hline[10]," inset = represents %d library sequences",ddotsiz);
+	}
+	if (i >= mht&& doinset ) {
+	  for (j = hl; j < 10; j++) hline[j]=' ';
+	  hline[10]=':';
+	  for (j = 11; j<11+hll; j++) hline[j]='=';
+	  hline[11+hll]='\0';
+	  if (ppst->zsflag_f>=0) {
+	    if (ell <= hll) hline[10+ell]='*';
+	    else {
+	      for (j = 11+hll; j < 10+ell; j++) hline[j]=' ';
+	      hline[10+ell] = '*';
+	      hline[11+ell] = '\0';
+	    }
+	  }
+	}
+
+	fprintf(fd,"%s\n",hline);
+	prev_e = cur_e;
+      }
+    }
+    max_dev_i = max_i*hist.histint+hist.min_hist;
+  }
+  else {
+    max_dev = 0.0;
+    n_chi_sq = 0;
+    max_i = 0;
+    max_dev_i = 0;
+  }
+
+  if (ppst->zsflag_f >=0 ) {
+    if (!m_msp->nohist) {
+      if (ntt.carry==0) {
+	fprintf(fd, "  %7ld residues in %5ld sequences", ntt.length, ntt.entries);
+      }
+      else {
+	db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
+	fprintf(fd, "  %.0f residues in %5ld library sequences", db_tt, ntt.entries);
+      }
+      fprintf(fd, "%s\n",lib_range);
+    }
+    fprintf(fd,"Statistics: %s\n",hist.stat_info);
+    if (stat_info2) {
+      fprintf(fd," Statistics E2: %s\n",stat_info2);
+    }
+
+#ifdef SAMP_STATS
+    fprintf(fd," statistics sampled from %ld (%d) to %ld sequences\n",
+	    (hist.entries > nstats ? nstats : hist.entries),sstats, hist.entries);
+#else
+    fprintf(fd," statistics extrapolated from %ld to %ld sequences\n",
+	    (hist.entries > nstats ? nstats : hist.entries),hist.entries);
+#endif
+
+    if (!m_msp->nohist && cum_hl > 0) {
+      fprintf(fd," Kolmogorov-Smirnov  statistic: %6.4f (N=%d) at %3d\n",
+	      max_dev/(float)cum_hl, n_chi_sq,max_dev_i);
+    }
+    if (m_msp->markx & MX_M10FORM) {
+      while ((bp=strchr(hist.stat_info,'\n'))!=NULL) *bp=' ';
+      if (cum_hl <= 0) cum_hl = -1;
+      sprintf(info_hstring[0],"; mp_extrap: %d %ld\n; mp_stats: %s\n; mp_KS: %6.4f (N=%d) at %3d\n",
+	      MAX_STATS,hist.entries,hist.stat_info,max_dev/(float)cum_hl,
+	      n_chi_sq,max_dev_i);
+    }
+  }
+
+  if (m_msp->markx & MX_M10FORM) {
+    if ((bp = strchr(info_gstring2[1],'\n'))!=NULL) *bp = ' ';
+    sprintf(info_hstring[1],"; mp_Algorithm: %s\n; mp_Parameters: %s\n",info_gstring2[0],info_gstring2[1]);
+    if (bp != NULL ) *bp = '\n';
+  }
+
+  if (ppst->other_info != NULL) {
+    fputs(ppst->other_info, fd);
+  }
+
+  fprintf(fd,"Algorithm: %s\nParameters: %s\n",info_gstring2[0],info_gstring2[1]);
+
+  fprintf (fd," Scan time: ");
+  ptime(fd,tscan);
+  fprintf(fd,"\n");
+  if (!m_msp->annot1_sname[0] &&  m_msp->markx & MX_HTML) {
+    fputs("</pre>\n<hr />\n",fd);
+  }
+
+  fflush(fd);
+}
+
+extern char prog_name[], *verstr;
+
+#ifdef PCOMPLIB
+#include "mpi.h"
+#endif
+
+void s_abort (char *p,  char *p1)
+{
+  int i;
+
+  fprintf (stderr, "\n***[%s] %s%s***\n", prog_name, p, p1);
+#ifdef PCOMPLIB
+  MPI_Abort(MPI_COMM_WORLD,1);
+  MPI_Finalize();
+#endif
+  exit (1);
+}
+
+void w_abort (char *p, char *p1)
+{
+  fprintf (stderr, "\n***[%s] %s%s***\n\n", prog_name, p, p1);
+  exit (1);
+}
+
+extern struct a_res_str *
+build_ares_code(unsigned char *aa0, int n0,
+		unsigned char *aa1, struct seq_record *seq,
+		int frame, int *have_ares, int repeat_thresh,
+		const struct mngmsg *m_msp, struct pstruct *ppst,
+		void *f_str
+		);
+
+extern struct lmf_str *
+re_openlib(struct lmf_str *, int outtty);
+
+#define MAX_BLINE 2048
+#define RANLIB (m_fptr->ranlib)
+
+extern int
+re_getlib(unsigned char *, struct annot_str **,
+	  int, int, int, int, int, long *, long *,
+	  struct lmf_str *m_fptr);
+
+/*
+   pre_load_best loads a set of sequences using re_getlib
+
+   it should be used for getting sequences for shuffling, and for showbest() if m_msg->quiet
+
+   it both opens the m_file_p buffer, gets the bline[] descriptions,
+   and reads the actual sequences.  In reading the sequences, it
+   should first allocate one large buffer so that individual buffers do not need to be freed.
+*/
+
+void
+pre_load_best(unsigned char *aa1save, int maxn,
+	      struct beststr **bbp_arr, int nbest,
+	      struct mngmsg *m_msp, int debug)
+{
+  int i, n1, bl_len, tmp_bline_len, l_llen;
+  int seq_buf_len;
+  char bline[MAX_BLINE];
+  unsigned char  *seq_buf_p;
+  char *bline_buf_p;
+
+  struct beststr *bbp;
+  struct lmf_str *m_fptr;
+
+  /*
+     calculate how much room we need for sequences and blines
+  */
+
+  if (m_msp->pre_load_done) return;
+
+  seq_buf_len = 1;
+  for (i=0; i<nbest; i++) {
+    /* we are not (currently) allocating more than n1+1, because alignment is not vectorized,
+       if it were vectorized, we would need n+16
+    */
+#ifdef DEBUG
+    if (bbp_arr[i]->n1 != bbp_arr[i]->seq->n1) {
+      fprintf(stderr,"*** error [%s:%d] - n1 (%d) != seq->n1 (%d)\n",
+	      __FILE__, __LINE__, bbp_arr[i]->n1, bbp_arr[i]->seq->n1);
+    }
+#endif
+
+    if (bbp_arr[i]->seq->aa1b == NULL) {
+      seq_buf_len += bbp_arr[i]->seq->n1 + 1;
+    }
+  }
+
+  /* have required sequence space (seq_buf_len), allocate it */
+
+  if ((m_msp->aa1save_buf_b=(unsigned char *)calloc(seq_buf_len, sizeof(char)))==NULL) {
+    fprintf(stderr, "*** error [%s:%d] - cannot allocate space[%d] for sequence encoding\n",
+	    __FILE__, __LINE__, seq_buf_len);
+    exit(1);
+  }
+  else {
+    seq_buf_p = m_msp->aa1save_buf_b+1;		/* ensure there is an initial '\0' */
+  }
+
+  /* adjust description line length */
+  l_llen = m_msp->aln.llen;
+  if ((m_msp->markx & MX_M9SUMM) && m_msp->show_code != SHOW_CODE_ID && m_msp->show_code != SHOW_CODE_IDD) {
+    l_llen += 40;
+    if (l_llen > 200) l_llen=200;
+  }
+
+  tmp_bline_len = sizeof(bline)-1;
+  if (!(m_msp->markx & MX_M10FORM) && !m_msp->long_info) {tmp_bline_len = l_llen-5;}
+
+  /* allocate more bline than we need for simplicity */
+  if ((bline_buf_p=m_msp->bline_buf_b=(char *)calloc(nbest*tmp_bline_len, sizeof(char)))==NULL) {
+    fprintf(stderr, "*** error [%s:%d] - cannot allocate space[%d] for bline descriptions\n",
+	    __FILE__, __LINE__,  nbest*tmp_bline_len);
+    exit(1);
+  }
+
+  for (i=0; i<nbest; i++) {
+    bbp = bbp_arr[i];
+
+    if ((m_fptr=re_openlib(bbp->mseq->m_file_p,!m_msp->quiet))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot re-open %s\n",
+	      __FILE__, __LINE__, bbp->mseq->m_file_p->lb_name);
+      exit(1);
+    }
+    RANLIB(bline,tmp_bline_len,bbp->mseq->lseek,bbp->mseq->libstr,m_fptr);
+    bl_len = strlen(bline);
+    bbp->mseq->bline = bline_buf_p;
+    bbp->mseq->bline_max = m_msp->aln.llen;
+    strncpy(bbp->mseq->bline, bline, bl_len);
+    bline_buf_p += bl_len+1;
+
+    /* make sure we get annotation if present, and sequence if necessary */
+    if (bbp->seq->aa1b==NULL || (m_msp->ann_flg==1 && bbp->seq->annot_p==NULL)) {
+      n1 = re_getlib(aa1save, (m_msp->ann_flg==1) ? &(bbp->seq->annot_p) : NULL,
+		     maxn,m_msp->ldb_info.maxt3, m_msp->ldb_info.l_overlap,
+		     bbp->mseq->cont,m_msp->ldb_info.term_code,
+		     &bbp->seq->l_offset,&bbp->seq->l_off,bbp->mseq->m_file_p);
+      if (n1 != bbp->seq->n1) {
+	fprintf(stderr,"*** error [%s:%d] - n1[%d/%d] != n1[%d] from re_getlib() at %s [maxn:%d/maxt3:%d]\n",
+		__FILE__, __LINE__,
+		bbp->n1, bbp->seq->n1, n1, bbp->mseq->libstr, maxn, m_msp->ldb_info.maxt3);
+      }
+
+#ifdef DEBUG
+      if (adler32(1L,aa1save,n1)!=bbp->adler32_crc) {
+	fprintf(stderr,"*** error [%s:%d] - adler32_crc from re_getlib() at %d(%d):  %s\n",
+		__FILE__, __LINE__,
+		bbp->mseq->index,bbp->n1, bline);
+      }
+#endif
+
+      /* if we don't have the sequence in the aa1b buffer, copy it from re_getlib */
+      if (bbp->seq->aa1b == NULL)  {
+	bbp->seq->aa1b = seq_buf_p;
+	memcpy(bbp->seq->aa1b, aa1save, bbp->seq->n1+1);
+	seq_buf_p += bbp->seq->n1+1;
+      }
+    }
+  }
+
+  /* here, we are getting query annots after all the bptr[]s have been processed */
+  /* moved to comp_lib9.c */
+  /*
+  if (m_msp->annot0_sname[0]) {
+    if (get_annot(m_msp->annot0_sname, m_msp, m_msp->qtitle, m_msp->q_offset+m_msp->q_off-1,m_msp->n0, &m_msp->annot_p, 0, debug) < 0) {
+      fprintf(stderr,"*** error [%s:%d] - %s did not produce annotations\n",__FILE__, __LINE__, m_msp->annot0_sname);
+      m_msp->annot0_sname[0] = '\0';
+    }
+    if (m_msp->annot_p && m_msp->annot_p->n_annot > 0) {
+      m_msp->aa0a = m_msp->annot_p->aa1_ann;
+    }
+    if (!m_msp->ann_arr[0]) {m_msp->ann_arr[0] = ' '; m_msp->ann_arr[1] = '\0';}
+  }
+  */
+
+  /* if we have an variant annotation script, execute it and capture the output */
+  /* must do after bline is set */
+  if (m_msp->annot1_sname[0]) {
+    if (get_annot_list(m_msp->annot1_sname, m_msp, bbp_arr, nbest, 1, debug)< 0) {
+      fprintf(stderr,"*** error [%s:%d] - %s did not produce annotations for %s\n",__FILE__, __LINE__, m_msp->annot1_sname,m_msp->qtitle);
+      m_msp->annot1_sname[0] = '\0';
+    };
+    if (!m_msp->ann_arr[0]) {m_msp->ann_arr[0] = ' '; m_msp->ann_arr[1] = '\0';}
+  }
+
+  m_msp->pre_load_done = 1;
+}
+
+/*  merge_ares_chains()
+
+    seeks to merge two ares chains, producing a single chain that is
+    sorted by sw_score.
+
+    Strategy -- choose the chain with the highest score, and go down
+    it until the head of the other chain has higher score, then link
+    the other chain to the main chain, breaking the first, and
+    continue the process.
+
+    The two pointers, max_next and alt_next, keep track of the best
+    and the alternate chain
+ */
+
+
+#undef SHOW_MERGE_CHAIN
+
+struct a_res_str *
+merge_ares_chains(struct a_res_str *cur_ares,
+		  struct a_res_str *tmp_ares,
+		  int score_ix,
+		  const char *msg)
+{
+  struct a_res_str *max_next, *max_ares, *alt_ares, *prev_next;
+
+  if (!tmp_ares) return cur_ares;
+
+#ifdef SHOW_MERGE_CHAIN
+  fprintf(stderr,"cur_ares->");
+  for (max_next = cur_ares; max_next; max_next = max_next->next) {
+    fprintf(stderr,"%d->",max_next->rst.score[score_ix]);
+  }
+
+  fprintf(stderr,"||\n");
+  fprintf(stderr,"tmp_ares->");
+  for (max_next = tmp_ares; max_next; max_next = max_next->next) {
+    fprintf(stderr,"%d->",max_next->rst.score[score_ix]);
+  }
+  fprintf(stderr,"||\n");
+#endif
+
+  /* start with the maximum score */
+
+  if (cur_ares->rst.score[score_ix] >= tmp_ares->rst.score[score_ix]) {
+    max_ares = max_next = prev_next = cur_ares;
+    alt_ares = tmp_ares;
+  }
+  else {
+    max_ares = max_next = prev_next = tmp_ares;
+    alt_ares = cur_ares;
+  }
+
+  while (max_next && alt_ares) {
+    /* this is guaranteed true for the first iteration */
+    while (max_next && max_next->rst.score[score_ix] >= alt_ares->rst.score[score_ix]) {
+      prev_next = max_next;
+      max_next = max_next->next;
+    }
+    if (max_next==NULL) break;
+    else {	/* max_next->rst.score[score_ix] no longer greater, switch
+		   pointers */
+      prev_next->next = alt_ares;
+      alt_ares = max_next;
+      max_next = prev_next->next;
+    }
+  }
+
+  /* we quit whenever max_next or alt_ares == NULL; if
+     (max_next==NULL), then continue adding the rest of alt_ares */
+
+  if (max_next==NULL) {
+    prev_next->next = alt_ares;
+  }
+
+
+#ifdef SHOW_MERGE_CHAIN
+  fprintf(stderr,"[%s] merge_ares->",msg);
+  for (max_next = max_ares; max_next; max_next = max_next->next) {
+    fprintf(stderr,"%d->",max_next->rst.score[score_ix]);
+  }
+  fprintf(stderr,"||\n\n");
+#endif
+
+  return max_ares;
+}
+
+/* copies from from to to shuffling */
+
+extern int my_nrand(int, void *);
+
+void
+shuffle(unsigned char *from, unsigned char *to, int n, void *rand_state)
+{
+  int i,j; unsigned char tmp;
+
+  if (from != to) memcpy((void *)to,(void *)from,n);
+
+  for (i=n; i>0; i--) {
+    j = my_nrand(i, rand_state);
+    tmp = to[j];
+    to[j] = to[i-1];
+    to[i-1] = tmp;
+  }
+  to[n] = 0;
+}
+
+/* shuffles DNA sequences as codons */
+void
+shuffle3(unsigned char *from, unsigned char *to, int n, void *rand_state)
+{
+  int i, j, i3,j3; unsigned char tmp;
+  int n3;
+
+  if (from != to) memcpy((void *)to,(void *)from,n);
+
+  n3 = n/3;
+
+  for (i=n3; i>0; i--) {
+    j = my_nrand(i, rand_state);
+    i3 = i*3;
+    j3 = j*3;
+    tmp = to[j3];
+    to[j3] = to[i3-1];
+    to[i3-1] = tmp;
+    tmp = to[j3+1];
+    to[j3+1] = to[i3];
+    to[i3] = tmp;
+    tmp = to[j3+2];
+    to[j3+2] = to[i3+1];
+    to[i3+1] = tmp;
+  }
+  to[n] = 0;
+}
+
+/* "shuffles" by reversing the sequence */
+void
+rshuffle(unsigned char *from, unsigned char *to, int n)
+{
+  unsigned char *ptr = from + n;
+
+  while (n-- > 0) {
+    *to++ = *ptr--;
+  }
+  *to = '\0';
+}
+
+static int ieven = 0;
+/* copies from from to from shuffling, ieven changed for threads */
+void
+wshuffle(unsigned char *from, unsigned char *to, int n, int wsiz, void *rand_state)
+{
+  int i,j, k, mm;
+  unsigned char tmp, *top;
+
+  memcpy((void *)to,(void *)from,n);
+
+  mm = n%wsiz;
+
+  if (ieven) {
+    for (k=0; k<(n-wsiz); k += wsiz) {
+      top = &to[k];
+      for (i=wsiz; i>0; i--) {
+	j = my_nrand(i, rand_state);
+	tmp = top[j];
+	top[j] = top[i-1];
+	top[i-1] = tmp;
+      }
+    }
+    top = &to[n-mm];
+    for (i=mm; i>0; i--) {
+      j = my_nrand(i, rand_state);
+      tmp = top[j];
+      top[j] = top[i-1];
+      top[i-1] = tmp;
+    }
+    ieven = 0;
+  }
+  else {
+    for (k=n; k>=wsiz; k -= wsiz) {
+      top = &to[k-wsiz];
+      for (i=wsiz; i>0; i--) {
+	j = my_nrand(i, rand_state);
+	tmp = top[j];
+	top[j] = top[i-1];
+	top[i-1] = tmp;
+      }
+    }
+    top = &to[0];
+    for (i=mm; i>0; i--) {
+      j = my_nrand(i, rand_state);
+      tmp = top[j];
+      top[j] = top[i-1];
+      top[i-1] = tmp;
+    }
+    ieven = 1;
+  }
+  to[n] = 0;
+}
+
+int
+sfn_cmp(int *q, int *s)
+{
+  if (*q == *s) return *q;
+  while (*q && *s) {
+    if (*q == *s) return *q;
+    else if (*q < *s) q++;
+    else if (*q > *s) s++;
+  }
+  return 0;
+}
+
+#ifndef MPI_SRC
+void
+revcomp(unsigned char *seq, int n, int *c_nt)
+{
+  unsigned char tmp;
+  int i, ni;
+
+  for (i=0, ni = n-1; i< n/2; i++,ni--) {
+    tmp = c_nt[seq[i]];
+    seq[i] = c_nt[seq[ni]];
+    seq[ni] = tmp;
+  }
+  if ((n%2)==1) {
+    i = n/2;
+    seq[i] = c_nt[seq[i]];
+  }
+  seq[n]=0;
+}
+#endif
+
+/* check to see whether this score (or a shuff score) should
+   be included in statistics */
+int samp_stats_idx (int *pre_nstats, int nstats, void *rand_state) {
+  int jstats = -1;
+
+  /* this code works when every score can be used for statistics
+     estimates, but fails for fasta/[t]fast[xy] where only a fraction
+     of scores are used */
+
+  if (*pre_nstats < MAX_STATS) {
+    jstats = (*pre_nstats)++;
+  }
+
+  /* here, the problem is that while we may have pre_nstats
+     possible samplings, in some cases (-M subsets, fasta,
+     [t]fast[xy] we don't have MAX_STATS samples yet.  Until we
+     have MAX_STATS, we want more.  But the stats_idx strategy
+     means that there may be additional samples in the buffers
+     that are not reflected in nstats.
+  */
+
+  else {
+#ifdef SAMP_STATS_LESS
+    /* now we have MAX_STATS samples
+       we want to sample 1/2 of 60K - 120K, 1/3 of 120K - 180K, etc */
+    /* check every 15K to see if we have gone past the next threshold */
+
+    /* pre_nstats cannot be incremented before the % to ensure
+       that stats_inc is incremented exactly at 60000, 120000, etc.
+       use ">=" in case increment comes later
+       tests suggest the first 60K are sampled about 25% more
+       than the rest
+    */
+    if (nstats < MAX_STATS) {
+      jstats = MAX_STATS - my_nrand(MAX_STATS - nstats, rand_state)-1;
+    }
+    else if (((*pre_nstats)++ % (MAX_STATS/4)) == 0 &&
+	     *pre_nstats >= stats_inc * MAX_STATS) {
+      stats_inc = (*pre_nstats / MAX_STATS) + 1;
+    }
+    if ((*pre_nstats % stats_inc) == 0) {
+      jstats = my_nrand(MAX_STATS, rand_state);
+    }
+#else
+    /* this sampling strategy calls my_nrand() for every
+       sequence > 60K, but provides a very uniform sampling */
+    jstats = my_nrand(++(*pre_nstats), rand_state);
+    if (jstats >= MAX_STATS) { jstats = -1;}
+#endif
+  }
+  return jstats;
+}
+
+/* **************************************************************** */
+/* build_link_data -- produces fasta file from m_msp->
+   (1) generate a temporary file name
+   (2) write out accessions \t expects to the temporary file
+   (3) run script against temporary file, producing fasta_file_expansion_file
+   (4) return fasta expansion filename for standard fasta openlib().
+
+   returns: the expansion library file name
+            **link_link_file_p is the name of the file with the data
+              that will be removed.
+*/
+/* **************************************************************** */
+char *
+build_link_data(char **link_lib_file_p,
+		struct mngmsg *m_msp, struct beststr **bestp_arr,
+		int debug) {
+  int i, status;
+  char tmp_line[MAX_SSTR];
+  char link_acc_file[MAX_STR];
+  int link_acc_fd;
+  char *link_lib_file;
+  char *link_lib_str;
+  char link_script[MAX_LSTR];
+  int link_lib_type;
+  char *bp, *link_bp;
+  FILE *link_fd=NULL;		/* file for link accessions */
+
+#ifndef UNIX
+  return NULL;
+#else
+  /* get two tmpfiles, one for accessions, one for library */
+  link_acc_file[0] = '\0';
+
+  if ((link_lib_file=(char *)calloc(MAX_STR,sizeof(char)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - [build_link_data] Cannot allocate link_lib_file",
+	    __FILE__, __LINE__);
+  }
+  link_lib_file[0] = '\0';
+
+  if ((bp=getenv("TMP_DIR"))!=NULL) {
+    strncpy(link_acc_file,bp,sizeof(link_acc_file));
+    link_acc_file[sizeof(link_acc_file)-1] = '\0';
+    SAFE_STRNCAT(link_acc_file,"/",sizeof(link_acc_file));
+  }
+
+  SAFE_STRNCAT(link_acc_file,"link_acc_XXXXXX",sizeof(link_acc_file));
+  link_acc_fd = mkstemp(link_acc_file);
+  strncpy(link_lib_file,link_acc_file,MAX_STR);
+  link_acc_file[sizeof(link_acc_file)-1] = '\0';
+  SAFE_STRNCAT(link_lib_file,".lib",MAX_STR);
+
+  /* write out accessions to link_acc_file */
+  if ((link_fd =fdopen(link_acc_fd,"w"))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - Cannot open link_acc_file: %s\n",
+	    __FILE__, __LINE__, link_acc_file);
+    goto no_links;
+  }
+
+  for (i=0; i<m_msp->nskip + m_msp->nshow; i++) {
+    if ((bp=strchr(bestp_arr[i]->mseq->bline,' '))!=NULL) {
+      *bp = '\0';
+    }
+    fprintf(link_fd,"%s\t%.3g\n",bestp_arr[i]->mseq->bline,bestp_arr[i]->rst.escore);
+    if (bp != NULL) *bp=' ';
+  }
+  fclose(link_fd);
+
+  /* build link_script link_acc_file > link_lib_file */
+  /* check for indirect */
+  link_bp = &m_msp->link_lname[0];
+  if (*link_bp == '!') {
+    link_bp++;
+  }
+  if (*link_bp == '@') {
+    link_bp++;
+  }
+  
+  /* remove library type */
+  if ((bp=strchr(link_bp,' '))!=NULL) {
+    *bp = '\0';
+    sscanf(bp+1,"%d",&link_lib_type);
+  }
+  else {
+    link_lib_type = 0;
+  }
+
+  strncpy(link_script,link_bp,sizeof(link_script));
+  link_script[sizeof(link_script)-1] = '\0';
+  SAFE_STRNCAT(link_script," ",sizeof(link_script));
+  SAFE_STRNCAT(link_script,link_acc_file,sizeof(link_script));
+  SAFE_STRNCAT(link_script," >",sizeof(link_script));
+  SAFE_STRNCAT(link_script,link_lib_file,sizeof(link_script));
+
+  /* un-edit m_msp->link_lname */
+  if (bp != NULL) *bp = ' ';
+
+  /* run link_script link_acc_file > link_lib_file */
+  status = system(link_script);
+  if (!debug) {
+#ifdef UNIX    
+    unlink(link_acc_file);
+#else
+    _unlink(link_acc_file);
+#endif
+  }
+
+  if (status == NO_FILE_EXIT) {	/* my specific return for no links */
+    goto no_links;
+  }
+
+  if (status < 0 || status == 127) {
+    fprintf(stderr,"*** error [%s:%d] - script: %s failed\n",
+	    __FILE__, __LINE__,link_script);
+    goto no_links;
+  }
+
+  if ((link_fd=fopen(link_lib_file,"r"))==NULL) {
+    goto no_links;
+  }
+  else fclose(link_fd);
+
+  if ((link_lib_str=(char *)calloc(MAX_STR,sizeof(char)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - [build_link_data] Cannot allocate link_lib_str",
+	    __FILE__, __LINE__);
+  }
+
+  /* build the file string (possibly @link_lib_file libtype) */
+  link_lib_str[0]='\0';
+  if (m_msp->link_lname[0] == '@') {
+    SAFE_STRNCAT(link_lib_str,"@",MAX_STR);
+  }
+  SAFE_STRNCAT(link_lib_str,link_lib_file,MAX_STR);
+  if (link_lib_type > 0) {
+    sprintf(tmp_line," %d",link_lib_type);
+    SAFE_STRNCAT(link_lib_str,tmp_line,MAX_STR);
+  }
+
+  *link_lib_file_p = link_lib_file;
+  return link_lib_str;
+
+ no_links:
+  free(link_lib_file);
+  *link_lib_file_p = NULL;
+  return NULL;
+#endif
+}
+
+/* **************************************************************** */
+/* build_lib_db -- produces fasta file from script
+   (1) generate a temporary file name lib_db_file
+   (2) run script producing data in lib_db_file
+
+   returns: the expansion library file name
+            **db_str_file_p is the name of the file with the data
+              that will be removed.
+*/
+/* **************************************************************** */
+char *
+build_lib_db(char *script_file) {
+  int i, status;
+  char tmp_line[MAX_SSTR];
+  char *lib_db_file, *lib_db_str;
+  char lib_db_script[MAX_LSTR];
+  int lib_db_indirect;
+  int lib_db_type;
+  int lib_db_str_len;
+  char *bp, *lib_bp;
+
+  if ((lib_db_file=(char *)calloc(MAX_STR,sizeof(char)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - [build_lib_db] Cannot allocate lib_db_file",
+	    __FILE__, __LINE__);
+    goto no_lib;
+  }
+
+  if ((bp=getenv("TMP_DIR"))!=NULL) {
+    strncpy(lib_db_file,bp,MAX_STR);
+    lib_db_file[sizeof(lib_db_file)-1] = '\0';
+    SAFE_STRNCAT(lib_db_file,"/",sizeof(lib_db_file));
+  }
+
+  SAFE_STRNCAT(lib_db_file,"lib_db_XXXXXX",MAX_STR);
+  mktemp(lib_db_file);
+  lib_db_str_len = strlen(lib_db_file)+1;
+
+  /* check for indirect */
+  lib_bp = script_file;
+  if (*lib_bp == '@') {
+    lib_bp++;
+    lib_db_str_len++;
+  }
+  /* remove library type */
+  if ((bp=strchr(lib_bp,' '))!=NULL) {
+    *bp = '\0';
+    sscanf(bp+1,"%d",&lib_db_type);
+    lib_db_str_len += (strlen(bp+1)+1);
+  }
+  else {
+    lib_db_type = 0;
+  }
+
+  strncpy(lib_db_script,lib_bp,sizeof(lib_db_script));
+  lib_db_script[sizeof(lib_db_script)-1] = '\0';
+  SAFE_STRNCAT(lib_db_script," >",sizeof(lib_db_script));
+  SAFE_STRNCAT(lib_db_script,lib_db_file,sizeof(lib_db_script));
+
+  if (bp != NULL) *bp = ' ';
+
+  /* run lib_db_script link_acc_file > lib_db_file */
+  status = system(lib_db_script);
+
+  if (status == NO_FILE_EXIT) {	/* my specific return for no links */
+    goto no_lib;
+  }
+
+  if (status < 0 || status == 127) {
+    fprintf(stderr,"*** error [%s:%d] - [build_lib_db] script: %s failed\n",
+	    __FILE__, __LINE__, lib_db_script);
+    goto no_lib;
+  }
+
+  /* build the file string (possibly @lib_db_str libtype) */
+  if ((lib_db_str=calloc(lib_db_str_len+1,sizeof(char)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - [build_lib_db] cannot allocate lib_db_str[%d]\n",
+	    __FILE__, __LINE__, lib_db_str_len+1);
+    goto no_lib;
+  }
+  lib_db_str[0]='\0';
+  if (*script_file == '@') {
+    SAFE_STRNCAT(lib_db_str,"@",MAX_STR);
+  }
+  SAFE_STRNCAT(lib_db_str,lib_db_file,MAX_STR);
+  if (lib_db_type > 0) {
+    sprintf(tmp_line," %d",lib_db_type);
+    SAFE_STRNCAT(lib_db_str,tmp_line,MAX_STR);
+  }
+
+  return lib_db_str;
+
+ no_lib:
+  return NULL;
+}
+
+/* used to temporarily allocate annotation array in next_annot_entry()*/
+struct annot_mstr {
+  int max_annot;
+  struct annot_entry *tmp_arr_p;
+};
+
+/* init_tmp_annot_mstr(size) intializes the structure used to track annots  */
+int
+init_tmp_annot(struct annot_mstr *this, int size) {
+  struct annot_entry *tmp_ann_astr;
+
+  /* only reset if array is NULL */
+  if (this->tmp_arr_p == NULL || this->max_annot <= 0) {
+    this->max_annot = 32;
+    if ((this->tmp_arr_p=(struct annot_entry *)calloc(this->max_annot, sizeof(struct annot_entry)))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot allocate annot_entry[%d]\n",
+		__FILE__,__LINE__,this->max_annot);
+	return 0;
+    }
+  }
+  return 1;
+}
+
+int
+update_tmp_annot(struct annot_mstr *this) {
+
+  this->max_annot += (this->max_annot/2);
+  if ((this->tmp_arr_p= (struct annot_entry *)realloc(this->tmp_arr_p, this->max_annot*sizeof(struct annot_entry)))==NULL) {
+    fprintf(stderr,"[*** error [%s:%d] - cannot reallocate tmp_ann_astr[%d]\n",
+	    __FILE__, __LINE__, this->max_annot);
+    return 0;
+  }
+  return 1;
+}
+
+struct annot_str *
+next_annot_entry(FILE *annot_fd, char *tmp_line, int n_tmp_line,
+		 struct annot_str *annot_p,		
+		 struct annot_mstr *mtmp_annot_p,	/* this structure will end up in the seq_record */
+		 struct mngmsg *m_msp, int target);
+
+/* **************************************************************** */
+/* get_annot_list -- produces fasta file from sname
+   if sname[0]=='<', read the file directly, goto (4)
+   if sname[0]=='!', run a script
+   (1) generate a temporary file name
+   (2) write out list of blines
+   (3) run m_msp->annot1_sname[] script against temporary file, producing table of annotations
+
+   (4) read in the annotations and merge them into beststr
+   (5) return number of annotations
+*/
+/* **************************************************************** */
+
+int
+get_annot_list(char *sname, struct mngmsg *m_msp, struct beststr **bestp_arr, int nbest,
+	       int target, int debug) {
+  int i, status;
+  long l_offset;
+  char tmp_line[MAX_STR];
+  char annot_bline_file[MAX_STR];
+  int annot_bline_fd;
+  char *annot_descr_file;
+  char annot_script[MAX_LSTR];
+  struct annot_str *annot_p;
+  char *bp;
+  int annot_seq_cnt;
+  FILE *annot_fd=NULL;		/* file for annot accessions */
+  struct annot_mstr mtmp_annot;	/* allows annot_arr_p to be expanded */
+
+#ifndef UNIX
+  return 0;
+#else
+
+  if (sname[0] == '!') {
+
+    /* get two tmpfiles, one for bline, one for returned annotations
+       (but it would make more sense to use popen() to get the
+       annotations back
+    */
+
+    annot_bline_file[0] = '\0';
+
+    if ((annot_descr_file=(char *)calloc(MAX_STR,sizeof(char)))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - [get_annot_list] Cannot allocate annot_file",
+	      __FILE__, __LINE__);
+    }
+    annot_descr_file[0] = '\0';
+
+    if ((bp=getenv("TMP_DIR"))!=NULL) {
+      strncpy(annot_bline_file,bp,sizeof(annot_bline_file));
+      annot_bline_file[sizeof(annot_bline_file)-1] = '\0';
+      SAFE_STRNCAT(annot_bline_file,"/",sizeof(annot_bline_file));
+    }
+
+    SAFE_STRNCAT(annot_bline_file,"annot_bline_XXXXXX",sizeof(annot_bline_file));
+    annot_bline_fd = mkstemp(annot_bline_file);
+    strncpy(annot_descr_file,annot_bline_file,MAX_STR);
+    annot_bline_file[sizeof(annot_bline_file)-1] = '\0';
+    SAFE_STRNCAT(annot_descr_file,".annot",MAX_STR);
+
+    /* write out accessions to annot_bline_file */
+    if ((annot_fd =fdopen(annot_bline_fd,"w"))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - Cannot open annot_bline_file: %s\n",__FILE__, __LINE__, annot_bline_file);
+      goto no_annots;
+    }
+
+    for (i=0; i<nbest; i++) {
+      if (bestp_arr[i]->mseq->annot_req_flag) {	continue; }
+      if ((strlen(bestp_arr[i]->mseq->bline) > DESCR_OFFSET) &&
+	  (bp=strchr(bestp_arr[i]->mseq->bline+DESCR_OFFSET,' '))!=NULL) {*bp = '\0';}
+      else {bp = NULL;}
+      /* provide sequence length with offset, but only if offset is positive */
+      l_offset = bestp_arr[i]->seq->l_offset+bestp_arr[i]->seq->l_off -1;
+      if (l_offset < 0) { l_offset = 0;}
+      fprintf(annot_fd,"%s\t%ld\n",bestp_arr[i]->mseq->bline,
+	      l_offset + bestp_arr[i]->seq->n1);
+      if (bp != NULL) *bp=' ';
+      bestp_arr[i]->mseq->annot_req_flag = 1;
+    }
+    fclose(annot_fd);
+
+    subs_env(annot_script, sname+1, sizeof(annot_script));
+    annot_script[sizeof(annot_script)-1] = '\0';
+    SAFE_STRNCAT(annot_script," ",sizeof(annot_script));
+    SAFE_STRNCAT(annot_script,annot_bline_file,sizeof(annot_script));
+    SAFE_STRNCAT(annot_script," >",sizeof(annot_script));
+    SAFE_STRNCAT(annot_script,annot_descr_file,sizeof(annot_script));
+
+    /* run annot_script annot_bline_file > annot_descr_file */
+    status = system(annot_script);
+    if (!debug) {
+#ifdef UNIX
+      unlink(annot_bline_file);
+#else
+      _unlink(annot_bline_file);
+#endif
+    }
+
+    if (status == NO_FILE_EXIT) {	/* my specific return for no annots */
+      goto no_annots;
+    }
+
+    if (status < 0 || status == 127) {
+      fprintf(stderr,"*** error [%s:%d] - script: %s failed\n",
+	      __FILE__, __LINE__, annot_script);
+      goto no_annots;
+    }
+  }
+  else if (sname[0] == '<') {
+    annot_descr_file = sname+1;
+  }
+  else {
+    fprintf(stderr,"*** error [%s:%d] - %s not script (!) or file (<)\n",__FILE__, __LINE__, sname);
+    goto no_annots;
+  }
+
+  /* read annotation file */
+
+  if ((annot_fd=fopen(annot_descr_file,"r"))==NULL) {
+    goto no_annots;
+  }
+
+  /* be sure to ask for annotation once */
+  for (i=0; i<nbest; i++) {
+    bestp_arr[i]->mseq->annot_req_flag = 1;
+  }
+  /* we have some annotations */
+  /* the annotation script MUST return the annotations ordered as in annot_descr_file,
+     in "fasta" form:
+
+     >bline_descr
+     pos<tab>label<tab>value?<tab>comment (which is not read in this version)
+     1 *
+     11 V N
+  */
+
+  /* now read the annotation/variant file */
+
+  /* read #comments, =annot_defs at beginning of file */
+  tmp_line[0] = '#';
+  while (tmp_line[0] == '#' || tmp_line[0] == '=') {
+    if (tmp_line[0] == '=') add_annot_def(m_msp, tmp_line+1,1);
+    if (fgets(tmp_line, sizeof(tmp_line), annot_fd)==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - premature annotation file end (%s)\n",
+	      __FILE__,__LINE__, annot_descr_file);
+      goto no_annots;
+    }
+  }
+
+  /* set mtmp_annot to be initialized */
+  mtmp_annot.tmp_arr_p = NULL;
+  mtmp_annot.max_annot = 0;
+
+  annot_seq_cnt = 0;
+
+  /* now read in the annotations, but only the first time if asked multiple times */
+  for (i=0; i<nbest; i++) {
+    if (!bestp_arr[i]->mseq->annot_req_flag) {
+      continue;
+    }
+    bestp_arr[i]->mseq->annot_req_flag = 0;
+
+    if ((bp=strchr(tmp_line,'\n'))!=NULL) *bp = '\0';
+    if ((bp=strchr(tmp_line,'\t'))!=NULL) *bp = '\0';
+    if (tmp_line[0] != '>' || strncmp(&tmp_line[1], bestp_arr[i]->mseq->bline, strlen(&tmp_line[1])) != 0) {
+      fprintf(stderr,"*** error [%s:%d] - %s description mismatch (%s:%s)\n",
+	      __FILE__,__LINE__,annot_descr_file, tmp_line, bestp_arr[i]->mseq->bline);
+      goto no_annots;
+    }
+
+    annot_p = next_annot_entry(annot_fd, tmp_line, sizeof(tmp_line), bestp_arr[i]->seq->annot_p, &mtmp_annot, m_msp, target);
+
+    if (annot_p) {
+      bestp_arr[i]->seq->annot_p = annot_p;
+      s_annot_to_aa1a(bestp_arr[i]->seq->l_offset + bestp_arr[i]->seq->l_off - 1,
+		      bestp_arr[i]->seq->n1, annot_p,m_msp->ann_arr, bestp_arr[i]->mseq->libstr);
+      annot_seq_cnt++;
+      mtmp_annot.tmp_arr_p = NULL;	/* prevents tmp_arr_p from being freed */
+    }
+    else {
+      if (bestp_arr[i]->seq->annot_p) {
+	bestp_arr[i]->seq->annot_p->n_annot = 0;
+      }
+    }
+  }
+
+  if (mtmp_annot.tmp_arr_p) free(mtmp_annot.tmp_arr_p);
+
+  fclose(annot_fd);
+  if (sname[0]=='!') {
+    if (!debug) {
+#ifdef UNIX
+      unlink(annot_descr_file);
+#else
+      _unlink(annot_descr_file);
+#endif
+    }
+    free(annot_descr_file);
+  }
+  return annot_seq_cnt;
+
+ no_annots:
+  for (i=0; i<nbest; i++) {
+    if (bestp_arr[i]->seq->annot_p) {
+      if (bestp_arr[i]->seq->annot_p->n_annot > 0) {
+	bestp_arr[i]->seq->annot_p->n_annot = 0;
+      }
+    }
+  }
+  if (sname[0] == '!') free(annot_descr_file);
+  return -1;
+#endif
+}
+
+void sort_annots(struct annot_entry **s_annot, int n_annot)
+{
+  int gap, i, j, k;
+  struct annot_entry *tmp;
+  int v;
+  int incs[6] = { 112, 48, 21, 7, 3, 1 };
+
+  for ( k = 0; k < 6; k++) {
+    gap = incs[k];
+    for (i=gap; i < n_annot; i++) {
+      tmp = s_annot[i];
+      j = i;
+      v = s_annot[i]->pos;
+      while ( j >= gap && s_annot[j-gap]->pos > v) {
+	s_annot[j] = s_annot[j - gap];
+	j -= gap;
+      }
+      s_annot[j] = tmp;
+    }
+  }
+}
+
+/* next_annot_entry -- reads and parses one annotation entry
+  reads lines from file *annot_fd
+  saves results to annot_str *annot_p (initially allocated as mtmp_annot_p, and expanded using mtmp_annot_p)
+  also initializes dom_feats_head, annot_arr_p, s_annot_arr_p
+*/
+
+struct annot_str *
+next_annot_entry(FILE *annot_fd, char *tmp_line, int n_tmp_line, struct annot_str *annot_p,
+		 struct annot_mstr *mtmp_annot_p, struct mngmsg *m_msp, int target) {
+
+  char ctmp_label, ctmp_value, tmp_comment[MAX_STR], annot_acc[MAX_STR];
+  struct domfeat_link *domfeats_head, *domfeats_current;
+  char *bp;
+  int f_pos, f_end;
+  int i_ann, l_doms;
+  int n_annot = 0;
+  int last_left_bracket = -1;
+  struct annot_entry *tmp_ann_entry_arr, **s_tmp_ann_entry_arr;
+
+  int *t_ascii = lascii;	/* generally, annotation target is library */
+  if (target != 1) t_ascii = qascii;	/* for TFAST, target is only query */
+
+  SAFE_STRNCPY(annot_acc, tmp_line, sizeof(annot_acc));
+
+  /* initialize a 32-entry annot_entry array */
+  /* temporary, expandable place for annotations */
+  if (init_tmp_annot(mtmp_annot_p, 32)==0) return NULL;
+  tmp_ann_entry_arr = mtmp_annot_p->tmp_arr_p;
+
+  l_doms = 0;
+
+  /* read through each annotation in file */
+  while (fgets(tmp_line, n_tmp_line, annot_fd)!=NULL ) {
+    if (tmp_line[0] == '>') goto next_bline;	/* start of new annotation */
+    if (tmp_line[0] == '#') continue;		/* ignore comments */
+    if (tmp_line[0] == '=') {	/* symbol definition */
+      add_annot_def(m_msp, tmp_line+1,1);
+      continue;
+    }
+
+    if (n_annot >= mtmp_annot_p->max_annot - 1) {
+      /* try to expand annotation array */
+      if (update_tmp_annot(mtmp_annot_p)==0) {
+	return NULL;
+      }
+      tmp_ann_entry_arr = mtmp_annot_p->tmp_arr_p;
+    }
+
+    /* sscanf cannot give strings with blanks */
+    /* sscanf(tmp_line,"%d %c %c %s", &f_pos, &ctmp_label, &ctmp_value, tmp_comment); */
+    tmp_comment[0] = '\0';
+    if ((bp=strchr(tmp_line,'\r')) || (bp=strchr(tmp_line,'\n'))) *bp='\0';	/* clean up EOL */
+    if ((bp=strchr(tmp_line,'\t'))!=NULL) {	/* fields MUST be tab delimited */
+      f_pos=atoi(tmp_line) - 1;	/* get first field -- f_pos, converted to 0-offset  */
+      ctmp_label = bp[1];	/* get second field -- ctmp_label */
+      if ((bp=strchr(bp+1,'\t'))!=NULL) {	/* next field could be f_end or ctmp_value */
+	if (ctmp_label == '-') { f_end = atoi(bp+1) -1; ctmp_value = '\0';}
+	else {ctmp_value = bp[1]; f_end = f_pos;} 	/* have variant, not coordinate */
+	if ((bp=strchr(bp+1,'\t'))!=NULL) {		/* if last <tab>, get comment */
+	  strncpy(tmp_comment,bp+1,sizeof(tmp_comment));
+	}
+      }
+    }
+    else {	/* no tab */
+      continue;
+    }
+
+    if (ctmp_label != ']') {	/* anything except ']' needs to be recorded */
+      tmp_ann_entry_arr[n_annot].pos = f_pos;
+      tmp_ann_entry_arr[n_annot].end = f_end;
+      tmp_ann_entry_arr[n_annot].label=ctmp_label;
+      tmp_ann_entry_arr[n_annot].value=ctmp_value;
+      tmp_ann_entry_arr[n_annot].comment = NULL;
+      tmp_ann_entry_arr[n_annot].target = target;	/* query (0) or library (1) */
+    }
+    else {	/* ctmp_label == ']' -- closing domain */
+      if (last_left_bracket < 0) {
+	fprintf(stderr,"*** error [%s:%d] -- next_annot_entry(%s) - ']' without '[': %s\n",
+		__FILE__,__LINE__, annot_acc, tmp_line);
+	continue;
+      }
+      tmp_ann_entry_arr[last_left_bracket].end = f_pos;
+      tmp_ann_entry_arr[last_left_bracket].label = '-';
+      last_left_bracket = -1;
+    }
+
+    if (f_end < f_pos) {
+	fprintf(stderr,"*** error [%s:%d] -- %s: domain start (%d) > domain end (%d)\n",
+		__FILE__,__LINE__, annot_acc, f_pos+1, f_end+1);
+	continue;
+    }
+
+    if (tmp_comment[0]) {
+      if ((tmp_ann_entry_arr[n_annot].comment=(char *)calloc(strlen(tmp_comment)+1,sizeof(char)))!=NULL) {
+	strncpy(tmp_ann_entry_arr[n_annot].comment,tmp_comment,strlen(tmp_comment));
+      }
+    }
+
+    /* fill in other information if appropriate */
+    if (ctmp_label== 'V') {
+      /* map the .value from ascii to encoded residue */
+      /* in general, it is lascii == qascii, but for TFAST, lascii != qascii */
+      tmp_ann_entry_arr[n_annot].value = t_ascii[tmp_ann_entry_arr[n_annot].value];
+    }
+    else if (ctmp_label == '-') {	/* have beginning and end of domain */
+      l_doms++;
+      i_ann = add_annot_char(m_msp->ann_arr, '[');
+      if (i_ann > 0) {
+	qascii['['] = NANN + i_ann;
+	m_msp->ann_arr_def[i_ann] = NULL;	/* set definition string NULL */
+      }
+      i_ann = add_annot_char(m_msp->ann_arr, ']');
+      if (i_ann > 0) {
+	qascii[']'] = NANN + i_ann;
+	m_msp->ann_arr_def[i_ann] = NULL;
+      }
+    }
+    else if (ctmp_label == '[') {	/* beginning of domain */
+      l_doms++;
+      last_left_bracket = n_annot;
+      i_ann = add_annot_char(m_msp->ann_arr, ctmp_label);
+      if (i_ann > 0) {
+	qascii['['] = NANN + i_ann;
+	m_msp->ann_arr_def[i_ann] = NULL;
+      }
+    }
+    else if (ctmp_label == ']') {	/* end of domain */
+      i_ann = add_annot_char(m_msp->ann_arr, ctmp_label);
+      if (i_ann > 0) {
+	qascii[']'] = NANN + i_ann;
+	m_msp->ann_arr_def[i_ann] = NULL;
+      }
+      continue;	/* no n_annot++, */
+    }
+    else if ((i_ann = add_annot_char(m_msp->ann_arr, ctmp_label)) > 0) {	/* not V[], active site mark */
+      m_msp->ann_arr_def[i_ann] = NULL;
+      qascii[ctmp_label] = NANN + i_ann;
+    }
+    n_annot++;
+  }
+
+ next_bline:
+  if (n_annot) {  /* if we have annotations, save them and set tmp_ann_entry_arr = NULL */
+
+    /* check for unpaired '['; unpaired ']' was checked earlier */
+
+    for (i_ann=0; i_ann < n_annot; i_ann++) {
+      if (tmp_ann_entry_arr[i_ann].label == '[') {
+	fprintf(stderr,"*** error [%s:%d] -- next_annot_entry(%s) -  unpaired '[' %d:%s\n",
+		__FILE__,__LINE__, annot_acc, i_ann, tmp_ann_entry_arr[i_ann].comment);
+	return NULL;
+      }
+    }
+
+    /* everything is paired properly */
+    /* re-allocate to exact space */
+    tmp_ann_entry_arr = (struct annot_entry *)realloc(tmp_ann_entry_arr, (n_annot+1)*sizeof(struct annot_entry));
+
+    /* provide sorted array */
+    if ((s_tmp_ann_entry_arr = (struct annot_entry **)calloc((n_annot+1),sizeof(struct annot_entry *)))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] -- next_annot_entry(%s) -  cannot alloc s_tmp_ann_entry_arr[%d]",
+	      __FILE__,__LINE__, annot_acc, n_annot+1);
+      return NULL;
+    }
+
+    for (i_ann=0; i_ann < n_annot+1; i_ann++) {
+      s_tmp_ann_entry_arr[i_ann] = &tmp_ann_entry_arr[i_ann];
+    }
+
+    sort_annots(s_tmp_ann_entry_arr,n_annot);
+
+    /* now allocate an annot_p if necessary, and link tmp_ann_entry_arr to it */
+    if (annot_p || (annot_p = calloc(1,sizeof(struct annot_str)))!=NULL) {
+      annot_p->annot_arr_p = tmp_ann_entry_arr;
+      annot_p->s_annot_arr_p = s_tmp_ann_entry_arr;
+      annot_p->n_annot = n_annot;
+      annot_p->n_domains = l_doms;
+      /* set to NULL to re-initialize */
+    }
+  }
+  else {
+    annot_p = NULL;
+  }
+  return annot_p;
+}
+
+
+/* **************************************************************** */
+/* add_annot_char(ann_arr, ctmp_label) --
+
+   (1) add annotation character to ann_arr if not present
+   (2) return i_ann if added
+*/
+/* **************************************************************** */
+
+int
+add_annot_char(unsigned char *ann_arr, char ctmp_label) {
+  int i_ann;
+
+  if (ann_arr[0] == '\0') {
+    ann_arr[0] = ' '; ann_arr[1] = '\0';
+  }
+
+  /* check to see if already there? */
+  if (strchr((char *)ann_arr,ctmp_label)==NULL) {
+    /* check for room for another character */
+    if (strlen((char *)ann_arr) >= MAX_FN) {
+      fprintf(stderr,"*** error [%s:%d] -- add_annot_char - too many annotation characters: len(%s) + %c > %d\n",
+	      __FILE__, __LINE__, ann_arr, ctmp_label, MAX_FN-1);
+      return 0;
+    }
+    else {
+      ann_arr[i_ann=strlen((char *)ann_arr)] = ctmp_label;      /* add the character */
+      ann_arr[i_ann+1] = '\0';	      /* guarantee null termination */
+      return i_ann;
+    }
+  }
+  else {
+    return 0;
+  }
+}
+
+/* **************************************************************** */
+/* get_annot -- produces fasta file from m_msp->sname script
+   (modified 20-Sept-2012 to not use intermediate file)
+
+   # (1) generate a temporary file name
+   # (2) write out one bline (or portion that include accession)
+   # (3) run sname[] script against temporary file, producing table of annotations
+   (1) run script bline_id
+   (4) read in the annotations and put them in struct annot_entry;
+   (5) modify *annot_p to point to structure
+   (6) return number of annotations
+*/
+/* **************************************************************** */
+
+int
+get_annot(char *sname, struct mngmsg *m_msp, char *bline, long offset, int n1, struct annot_str **annot_p,
+	  int target, int debug) {
+
+  char tmp_line[MAX_STR];
+  FILE *annot_data_fd;
+  char bline_descr[MAX_STR];
+  char annot_data_file[MAX_LSTR];
+  char annot_script[MAX_LSTR];
+  long q_offset;
+
+  char *bp;
+  FILE *annot_fd=NULL;		/* file for annot accessions */
+  struct annot_mstr mtmp_annot;
+
+#ifndef UNIX
+  /* need pipes, system() */
+  return 0;
+#else
+
+  if (sname[0] == '!') {
+    /* popen implementation */
+
+    annot_data_file[0] = '\0';
+
+    if (bline[0] == '>') {
+      SAFE_STRNCPY(bline_descr, bline+1,sizeof(bline_descr));
+    }
+    else {
+      SAFE_STRNCPY(bline_descr, bline,sizeof(bline_descr));
+    }
+    if ((strlen(bline_descr) > DESCR_OFFSET) && 
+	(bp=strchr(bline_descr+DESCR_OFFSET,' '))!=NULL) {*bp = '\0';}
+    else {bp = NULL;}
+
+    q_offset = m_msp->q_offset + m_msp->q_off - 1;
+    if (q_offset < 0) { q_offset = 0;}
+    sprintf(annot_script,"%s \"%s\" %ld",sname+1, bline_descr,q_offset+m_msp->n0);
+    annot_script[sizeof(annot_script)-1] = '\0';
+
+    annot_fd = popen(annot_script,"r");
+  }
+  else if (sname[0] == '<') {
+    SAFE_STRNCPY(annot_data_file,sname+1,sizeof(annot_data_file));
+    annot_fd=fopen(annot_data_file,"r");
+  }
+  else {
+    fprintf(stderr,"*** error [%s:%d] -- get_annot() - %s not script (!) or file (<)\n",__FILE__, __LINE__, sname);
+    goto no_annots;
+  }
+
+  if (!annot_fd) {
+    goto no_annots;
+  }
+  else {	/* read the annotations into the array */
+
+    /* read #comments, =annot_defs at beginning of file */
+    tmp_line[0] = '#';
+    while (tmp_line[0] == '#' || tmp_line[0] == '=') {
+      if (tmp_line[0] == '=') add_annot_def(m_msp, tmp_line+1,1);
+      if (fgets(tmp_line, sizeof(tmp_line), annot_fd)==NULL) {
+	fprintf(stderr,"*** error [%s:%d] -- get_annot() -  premature annotation file end (%s)\n",
+		__FILE__,__LINE__, annot_data_file);
+	goto no_annots;
+      }
+    }
+
+    /* set mtmp_annot to be initialized */
+    mtmp_annot.tmp_arr_p = NULL;
+    mtmp_annot.max_annot = 0;
+
+    /* strlen(&tmp_line[1])-1 to remove '>' and beginning and '\n' at end */
+    if (tmp_line[0] != '>') {
+      fprintf(stderr,"*** error [%s:%d] -- get_annot() - no %s description: [%s]\n",
+	      __FILE__,__LINE__,annot_data_file, tmp_line);
+      goto no_annots;
+    }
+
+    *annot_p = next_annot_entry(annot_fd, tmp_line, sizeof(tmp_line), *annot_p, &mtmp_annot, m_msp, target);
+
+    if (sname[0] == '!') {
+      pclose(annot_fd);
+    }
+    else {
+      fclose(annot_fd);
+    }
+
+    /* now allocate an annot_p if necessary, and link tmp_ann_entry_arr to it */
+    if (*annot_p) {
+      s_annot_to_aa1a(offset, n1, (*annot_p),m_msp->ann_arr,"get_annot");
+      return (*annot_p)->n_annot;
+    }
+    else {
+      if (mtmp_annot.tmp_arr_p) free(mtmp_annot.tmp_arr_p);
+      return 0;
+    }
+  }
+
+ no_annots:
+  return -1;
+#endif
+}
+
+/* s_annot_to_aa1a -- takes an annot_entry[] and converts it to an *aa1_ann
+ */
+void
+s_annot_to_aa1a(long offset, int n1, struct annot_str *annot_p, unsigned char *ann_arr, char *tmp_line) {
+  unsigned char *aa1a_tmp;
+  int i, ic, n_annot;
+  struct annot_entry *this_annot;
+  char *bp;
+
+  if ((aa1a_tmp = (unsigned char *)calloc(n1+2,sizeof(char)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] -- s_annot_to_aa1a() - cannot allocate aa1a_ann[%d] array\n",
+	    __FILE__, __LINE__, n1);
+    return;
+  }
+
+  if (offset < 0) offset++;
+
+  for (i=0; i < annot_p->n_annot; i++) {
+    this_annot = &annot_p->annot_arr_p[i];
+    /* skip VAR labels */
+    if (this_annot->label == 'V') { continue; }
+    if (this_annot->label == '-') {
+      if (this_annot->pos - offset >= 0) {
+	if (this_annot->pos - offset < n1) {
+	  aa1a_tmp[this_annot->pos-offset] = qascii['['] - NANN;
+	}
+	else {
+	  fprintf(stderr,"*** error [%s:%d] -- s_annot_to_aa1a() - attempt to write off end of aa1a_tmp[%d]: %ld -- %s\n",
+		  __FILE__,__LINE__, n1, this_annot->pos - offset, tmp_line);
+	  continue;
+	}
+      }
+      else {
+	if (this_annot->end - offset < 0) continue;
+	/*
+	fprintf(stderr,"*** error [%s:%d] --- s_annot_to_aa1a[%ld:%d] out of range\n",
+		__FILE__, __LINE__, this_annot->pos - offset, 0);
+	*/
+	aa1a_tmp[0] = qascii['['] - NANN;
+	this_annot->pos = offset;
+      }
+      if (this_annot->end - offset < n1) {aa1a_tmp[this_annot->end-offset]=qascii[']'] - NANN;}
+      else {
+	/*
+	fprintf(stderr,"*** error [%s:%d] --- s_annot_to_aa1a[%ld:%d] out of range\n",
+		__FILE__, __LINE__, this_annot->end - offset, n1);
+	
+	*/
+	aa1a_tmp[n1-1] = qascii[']'] - NANN;
+	this_annot->end = offset+n1-1;
+      }
+      continue;
+    }
+    if (strchr((char *)ann_arr, this_annot->label)==NULL) {continue;}
+    if (this_annot->pos - offset < n1) {
+      if (this_annot->pos >= offset) {	/* not an error, but annotation must be in range */
+	aa1a_tmp[this_annot->pos - offset]=qascii[this_annot->label] - NANN;
+      }
+    }
+    else {
+      fprintf(stderr, "*** error [%s:%d] -- s_annot_to_aa1() - athis_annot->pos:[%ld - %ld] out of range: %d : %s\n",
+	      __FILE__, __LINE__, this_annot->pos,offset, n1, tmp_line);
+    }
+  }
+  annot_p->aa1_ann = aa1a_tmp;
+}
+
+/* save_best captures much of the complexity of saving the best scores
+   and appropriately sampling the scores for statistical analysis. It
+   does the following:
+
+   (1) update s_info counts for functions like fasta/x/y that don't
+       optimize every score
+
+   (2) for every result in the buffer:
+       (a) decide if it should be used for statistical sampling
+       (b) if the number of samples > MAX_STATS, then run
+           process_hist() and update all the zscores
+       (c) reset everything for next sequence
+
+   (3) must ensure that -BIGNUM are never in best[]
+
+*/
+
+#include "thr_buf_structs.h"
+#ifndef PCOMPLIB
+#define RESULTS_BUF reader_buf
+#define XTERNAL
+#include "thr_bufs2.h"
+#else
+#define RESULTS_BUF worker_buf
+#include "pcomp_bufs.h"
+#endif
+
+extern char *prog_func;		/* function label */
+extern int fa_max_workers;
+extern struct buf_head *lib_buf2_list;
+#ifdef DEBUG
+void check_rbuf(struct buf_head *cur_buf);
+#endif
+extern void get_rbuf(struct buf_head **lib_buf, int max_work_buf);
+extern void put_rbuf(struct buf_head *lib_buf, int max_work_buf);
+extern void wait_rbuf(int max_work_buf);
+extern void rbuf_done(int nthreads);
+extern void put_rbuf_done(int nthreads, struct buf_head *lib_buf,
+			  int max_work_buf);
+extern int
+process_hist(struct stat_str *sptr, int nstats,
+	     const struct mngmsg *m_msg,
+	     struct pstruct *ppst,
+	     struct hist_str *hist, void **pstat_void, struct score_count_s *s_info, int do_hist);
+
+extern void addhistz(double, struct hist_str *); /* scaleswn.c */
+void selectbestz(struct beststr **, int, int );
+extern double find_z(int score, double escore, int length, double comp, void *);
+extern double zs_to_E(double zs,int n1, int dnaseq, long entries, struct db_str db);
+extern struct beststr **bestp_arr;	/* array of pointers */
+extern int nbest;
+extern int nstats, nqstats, nrstats, pre_nstats, kstats, shuff_tot, sstats;
+extern double zbestcut;	/* cut off for best z-score */
+extern int bestfull;	/* index for selectbest() */
+extern int stats_done;	/* flag for z-value processing */
+extern void *rand_state;
+extern struct stat_str *stats; /* array of scores for statistics from real
+			   (or shuffled) sequences*/
+extern struct stat_str *qstats;	/* array of scores for shuffled query stats */
+extern struct stat_str *rstats;	/* array of scores from shuffled library */
+
+/* in the current version (fasta_35_01) save_best is used by both
+   threaded and unthreaded versions */
+
+#define COPY_RST_P(d,s) 		\
+{ d->rst.score[0] = s->rst.score[0];	\
+  d->rst.score[1] = s->rst.score[1];	\
+  d->rst.score[2] = s->rst.score[2];	\
+  d->rst.valid_stat = s->rst.valid_stat; \
+  d->rst.comp = s->rst.comp;		\
+  d->rst.H = s->rst.H;			\
+  d->rst.escore = s->rst.escore;	\
+  d->rst.segnum = s->rst.segnum;	\
+  d->rst.seglen = s->rst.seglen;	\
+}
+
+void
+save_best(struct buf_head *lib_bhead_p,
+	  const struct mngmsg *m_msp, struct pstruct *ppst,
+	  struct db_str *ldb, FILE *fdata,
+	  struct hist_str *histp, void **pstat_voidp,
+	  struct score_count_s *s_info)
+{
+  double zscore;
+  int i_score;
+  struct beststr *bbp;
+  struct buf2_data_s *rbuf_dp, *lib_buf2_dp;
+  struct buf2_res_s *rbuf_rp, *lib_buf2_rp;
+  int i, t_best, t_rbest, t_qrbest, tm_best, t_n1, sc_ix;
+  int t_valid_stat, tr_valid_stat, use_shuff, zsflag_save;
+  double e_score, tm_escore, t_rescore, t_qrescore;
+  int buf2_cnt;
+
+  if (!lib_bhead_p->hdr.have_results) return;
+  if ((buf2_cnt=lib_bhead_p->hdr.buf2_cnt) <= 0) return;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+
+  shuff_tot += lib_bhead_p->hdr.shuff_cnt;
+  s_info->s_cnt[0] += lib_bhead_p->s_cnt_info.s_cnt[0];
+  s_info->s_cnt[1] += lib_bhead_p->s_cnt_info.s_cnt[1];
+  s_info->s_cnt[2] += lib_bhead_p->s_cnt_info.s_cnt[2];
+  s_info->tot_scores += lib_bhead_p->s_cnt_info.tot_scores;;
+
+  sc_ix = ppst->score_ix;
+
+  t_best = t_rbest = t_qrbest = -BIGNUM;
+  tm_escore = t_rescore = t_qrescore = FLT_MAX;
+  t_valid_stat = tr_valid_stat = 0;
+  if (ppst->zsflag >= 10 && ppst->zsflag < 20) { use_shuff = 1;}
+  else { use_shuff = 0;}
+
+#ifdef DEBUG
+  if (fdata) {
+    fprintf(fdata,">save_best: %d\n",buf2_cnt);
+  }
+#endif
+
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  while (buf2_cnt--) { /* count down the number of results */
+    rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
+    rbuf_dp = lib_buf2_dp++;	/* step through the data buffer */
+
+    /* perhaps should use explicit flag to indicate no score */
+    if (rbuf_rp->rst.score[0] == -BIGNUM) continue;
+
+    /* i_score: current raw sorting score */
+    i_score = rbuf_rp->rst.score[sc_ix];
+    /* e_score, current escore */
+    e_score = rbuf_rp->rst.escore;
+
+    /* this should be done in the thread, and a sorted set of indexes
+       should be produced by the thread, so we just go down the list
+       to the zscore threshold */
+    zscore = (double)i_score;
+    if (stats_done) {
+      zscore=find_z(i_score, e_score, rbuf_dp->seq->n1,(double)rbuf_rp->rst.comp,
+			  *pstat_voidp);
+    }
+
+    /* we have complex logic to decide:
+       (a) for multiframe results, which is the best
+       (b) information about valid stats
+       we should simply return a stats array where all this is figured
+       out in the thread.
+     */
+    t_n1 = rbuf_dp->seq->n1;
+    if (i_score > t_best) tm_best = t_best = i_score;
+    if (e_score < tm_escore) tm_escore = e_score;
+    if (rbuf_rp->rst.valid_stat > t_valid_stat) {
+      t_valid_stat = 1;
+    }
+
+    /* this stuff happens only for fasts/fastm/fastf
+       again, the t_qrbest stuff should be done in the thread
+       rather than check for every hit, run through the loop
+       only if necessary.
+     */
+    if (m_msp->qshuffle) {
+      if (rbuf_rp->qr_score > t_qrbest)
+	t_qrbest = rbuf_rp->qr_score;
+      if (rbuf_rp->qr_escore < t_qrescore)
+	t_qrescore = rbuf_rp->qr_escore;
+
+      if (rbuf_dp->frame == m_msp->nitt1 && t_qrbest > 0 && nqstats < m_msp->shuff_max) {
+	qstats[nqstats].n1 = rbuf_dp->seq->n1;	/* save the best score */
+	qstats[nqstats].comp =  rbuf_rp->rst.comp;
+	qstats[nqstats].H = rbuf_rp->rst.H;
+	qstats[nqstats].escore = t_qrescore;
+	qstats[nqstats++].score = t_qrbest;
+	t_qrbest = -BIGNUM;	/* reset t_qrbest, t_qrescore */
+	t_qrescore = FLT_MAX;
+      }
+    }	/* m_msp->qshuffle */
+
+    if (use_shuff) {
+      /* this check is required because some sequences scheduled to be
+	 used for statistics may not in fact be returning a score (if
+	 they are outside the -M range, for example.
+       */
+      if (rbuf_rp->r_rst.score[0] == -BIGNUM) { tr_valid_stat = 0; }
+      if (rbuf_rp->r_rst.valid_stat > tr_valid_stat) {
+	tr_valid_stat = 1;
+      }
+      if (rbuf_rp->r_rst.score[sc_ix] > t_rbest) {
+	t_rbest = rbuf_rp->r_rst.score[sc_ix];
+	t_rescore = rbuf_rp->r_rst.escore;
+      }
+    }
+
+    /* need to look for frame 0 if TFASTA, then save stats at frame 6 */
+    if (fdata) {
+      fprintf(fdata,
+	      "%-12s %6d %d %.5f %.5f %4d %4d %4d %2d %2d %4d %4d %4d %2d %2d %5d %8lld\n",
+	      rbuf_dp->mseq->libstr, rbuf_dp->seq->n1,rbuf_dp->frame,rbuf_rp->rst.comp,rbuf_rp->rst.H,
+	      rbuf_rp->rst.score[0],rbuf_rp->rst.score[1],rbuf_rp->rst.score[2],
+	      t_valid_stat, rbuf_rp->rst.alg_info,
+	      (rbuf_rp->r_rst.score[0]<0 ? -1 : rbuf_rp->r_rst.score[0]),
+	      (rbuf_rp->r_rst.score[1]<0 ? -1 : rbuf_rp->r_rst.score[1]),
+	      (rbuf_rp->r_rst.score[2]<0 ? -1 : rbuf_rp->r_rst.score[2]),
+	      tr_valid_stat, rbuf_rp->r_rst.alg_info,
+	      rbuf_dp->stats_idx, rbuf_dp->mseq->lseek);
+    }
+
+    /* statistics done for best score of set */
+
+    if (rbuf_dp->frame == m_msp->nitt1) {
+      ldb->entries++;
+      ldb->length += t_n1;
+      if (ldb->length > LONG_MAX) {
+	ldb->length -= LONG_MAX; ldb->carry++;
+      }
+    }
+
+    if (rbuf_dp->frame == m_msp->nitt1 && ppst->zsflag >= 0) {
+      /* if this sample should be used for statistics */
+      if (use_shuff) t_valid_stat = tr_valid_stat;
+      if (t_valid_stat) {
+	/* we've got our initial MAX_STATS values */
+	if (nstats >= MAX_STATS) {
+	  if (!stats_done) {
+	    zsflag_save = ppst->zsflag;
+	    if (ppst->zsflag > 20) {
+	      ppst->zsflag -= 20;
+	    }
+	    ppst->zsflag_f = process_hist(stats,nstats,m_msp, ppst,
+					  histp, pstat_voidp,s_info, 0);
+	    ppst->zsflag = zsflag_save;
+	    kstats = nstats;
+	    if (ppst->zsflag >= 0) {	/* this is redundant, but rare */
+	      stats_done = 1;
+	      for (i=0; i< nstats; i++) {
+		bestp_arr[i]->zscore =
+		  find_z(bestp_arr[i]->rst.score[ppst->score_ix],
+			 bestp_arr[i]->rst.escore, bestp_arr[i]->seq->n1,
+			 bestp_arr[i]->rst.comp, *pstat_voidp);
+	      }
+	    }
+	  }
+	}
+	else {
+	  /* this logic allows stats_idx to be over-ruled for searches
+	     where every query does not generate a score */
+	  rbuf_dp->stats_idx = nstats;
+	  nstats++;
+	}
+      }
+
+      if (rbuf_dp->stats_idx >= 0 && t_valid_stat) {
+	if (rbuf_dp->stats_idx >= MAX_STATS || nstats > MAX_STATS) {
+	  fprintf(stderr, "*** error [%s:%d] - nstats index [%d] out of range [%d,%d]\n",
+		  __FILE__, __LINE__,
+		  rbuf_dp->stats_idx, nstats,MAX_STATS);
+	}
+	else {  /* stats_idx is in range */
+	  sstats++;
+	  stats[rbuf_dp->stats_idx].n1 = t_n1;
+	  stats[rbuf_dp->stats_idx].comp = rbuf_rp->rst.comp;
+	  stats[rbuf_dp->stats_idx].H = rbuf_rp->rst.H;
+	  if (use_shuff) { /* use shuffled score */
+	    stats[rbuf_dp->stats_idx].escore  = t_rescore;
+	    stats[rbuf_dp->stats_idx].score = t_rbest;
+	  }
+	  else { /* real score, not shuffled */
+	    stats[rbuf_dp->stats_idx].escore  = tm_escore;
+	    stats[rbuf_dp->stats_idx].score = tm_best;
+	  }
+	} /* end stats_idx in range */
+      }	/* end have valid stats_idx */
+
+      if (t_valid_stat && stats_done && histp) {
+	addhistz(find_z(t_best, tm_escore, rbuf_dp->seq->n1, (double) rbuf_rp->rst.comp,
+			    *pstat_voidp), histp);
+      }
+      /* reset best scores */
+      t_best = t_rbest = -BIGNUM;
+      tm_escore = t_rescore = FLT_MAX;
+      t_valid_stat = tr_valid_stat = 0;
+    }
+
+    /*
+    if (rbuf_rp->rst.score[ppst->score_ix] > 200) {
+      fprintf(stderr, "high score[%d]: %s %d: %d\n", rbuf_dp->seq->index,
+	      rbuf_dp->mseq->libstr, rbuf_dp->seq->n1, rbuf_rp->rst.score[ppst->score_ix]);
+    }
+    */
+
+    if (zscore > zbestcut) {
+      if (nbest >= MAX_BEST) {
+	bestfull = nbest-MAX_BEST/4;
+	selectbestz(bestp_arr,bestfull-1,nbest);
+	zbestcut = bestp_arr[bestfull-1]->zscore;
+	nbest = bestfull;
+      }
+      bbp = bestp_arr[nbest++];
+
+      COPY_RST_P(bbp, rbuf_rp);
+
+      bbp->seq = rbuf_dp->seq;
+      bbp->mseq = rbuf_dp->mseq;
+      bbp->n1 = rbuf_dp->seq->n1;
+#ifdef DEBUG
+      bbp->adler32_crc = rbuf_dp->seq->adler32_crc;
+#endif
+      /* rbuf_dp->best_save is set after a rbuf_dp is entered into best_str */
+      if (rbuf_dp->best_save) {
+	/* a previous rbuf_dp->seq is in best_str at best_save */
+	if (rbuf_dp->best_save->seq == rbuf_dp->seq) {
+	  /* the best_save->seq matches the rbuf_dp->seq */
+	  bbp->bbp_link = rbuf_dp->best_save;
+	  /* bbp_link tells where this ->seq can be found */
+	}
+	else {
+	  bbp->bbp_link = NULL;
+	}
+      }
+      rbuf_dp->best_save = bbp;
+      lib_bhead_p->hdr.have_best_save = 1;
+      bbp->zscore = zscore;
+      bbp->frame = rbuf_dp->frame;
+    }
+  }
+}
+
+void
+save_best2(struct buf_head *lib_bhead_p,
+	   const struct mngmsg *m_msp, struct pstruct *ppst,
+	   struct db_str *ldb, FILE *fdata,
+	   struct hist_str *histp, void **pstat_voidp,
+	   struct score_count_s *s_info)
+{
+  double zscore;
+  int i_score;
+  struct beststr *bbp;
+  struct buf2_data_s *rbuf_dp, *lib_buf2_dp;
+  struct buf2_res_s *rbuf_rp, *lib_buf2_rp;
+  int i, sc_ix;
+  int t_valid_stat, use_shuff, zsflag_save;
+  double e_score;
+  int buf2_cnt;
+
+  if (!lib_bhead_p->hdr.have_results) return;
+  if ((buf2_cnt = lib_bhead_p->hdr.buf2_cnt) <= 0) return;
+
+  /*
+#ifdef DEBUG
+  fprintf(stderr," save_best2: lib_bhead_p->buf2_data[0]->mseq->index/lseek: %d,%lld\n",
+	  lib_bhead_p->buf2_data[0].mseq->index,lib_bhead_p->buf2_data[0].mseq->lseek);
+#endif
+  */
+  if (ppst->zsflag >= 10 && ppst->zsflag < 20) { use_shuff = 1;}
+  else {use_shuff = 0;}
+
+  shuff_tot += lib_bhead_p->hdr.shuff_cnt;
+  s_info->s_cnt[0] += lib_bhead_p->s_cnt_info.s_cnt[0];
+  s_info->s_cnt[1] += lib_bhead_p->s_cnt_info.s_cnt[1];
+  s_info->s_cnt[2] += lib_bhead_p->s_cnt_info.s_cnt[2];
+  s_info->tot_scores += lib_bhead_p->s_cnt_info.tot_scores;;
+  sc_ix = ppst->score_ix;
+
+  /* save the raw data if requested */
+  if (fdata) {
+#ifdef DEBUG
+    fprintf(fdata,">save_best: %d\n",buf2_cnt);
+#endif
+    lib_buf2_dp = lib_bhead_p->buf2_data;
+    lib_buf2_rp = lib_bhead_p->buf2_res;
+    buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+    while (buf2_cnt--) { /* count down the number of results */
+      
+      rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
+      rbuf_dp = lib_buf2_dp++;	/* step through the data buffer */
+
+      /* perhaps should use explicit flag to indicate no score */
+      if (rbuf_rp->rst.score[0] == -BIGNUM) continue;
+
+      fprintf(fdata,
+	      "%-12s %6d %d %.5f %.5f %4d %4d %4d %2d %2d %4d %4d %4d %2d %2d %5d %8lld\n",
+	      rbuf_dp->mseq->libstr, rbuf_dp->seq->n1,rbuf_dp->frame,rbuf_rp->rst.comp,rbuf_rp->rst.H,
+	      rbuf_rp->rst.score[0],rbuf_rp->rst.score[1],rbuf_rp->rst.score[2],
+	      rbuf_rp->is_valid_stat, rbuf_rp->rst.alg_info,
+	      (rbuf_rp->r_rst.score[0]<0 ? -1 : rbuf_rp->r_rst.score[0]),
+	      (rbuf_rp->r_rst.score[1]<0 ? -1 : rbuf_rp->r_rst.score[1]),
+	      (rbuf_rp->r_rst.score[2]<0 ? -1 : rbuf_rp->r_rst.score[2]),
+	      rbuf_rp->is_valid_stat, rbuf_rp->r_rst.alg_info,
+	      rbuf_dp->stats_idx, rbuf_dp->mseq->lseek);
+    }
+  }
+
+  /* save the high-scoring data */
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  while (buf2_cnt--) {
+    rbuf_rp = lib_buf2_rp++;
+    rbuf_dp = lib_buf2_dp++;
+
+    /* perhaps should use explicit flag to indicate no score */
+    if (rbuf_rp->rst.score[0] == -BIGNUM) continue;
+
+    /* i_score: current raw sorting score */
+    i_score = rbuf_rp->rst.score[sc_ix];
+    /* e_score, current escore */
+    e_score = rbuf_rp->rst.escore;
+    /* this should be done in the thread, and a sorted set of indexes
+       should be produced by the thread, so we just go down the list
+       to the zscore threshold */
+    zscore = (double)i_score;
+    if (stats_done) {
+      zscore=find_z(i_score, e_score, rbuf_dp->seq->n1,(double)rbuf_rp->rst.comp,
+			  *pstat_voidp);
+    }
+
+    if (rbuf_dp->frame == m_msp->nitt1) {
+      ldb->entries++;
+      ldb->length += rbuf_dp->seq->n1;
+      if (ldb->length > LONG_MAX) {
+	ldb->length -= LONG_MAX; ldb->carry++;
+      }
+    }
+
+    if (zscore > zbestcut) {
+      if (nbest >= MAX_BEST) {
+	bestfull = nbest-MAX_BEST/4;
+	selectbestz(bestp_arr,bestfull-1,nbest);
+	zbestcut = bestp_arr[bestfull-1]->zscore;
+	nbest = bestfull;
+      }
+      bbp = bestp_arr[nbest++];
+
+      COPY_RST_P(bbp, rbuf_rp);
+
+      bbp->seq = rbuf_dp->seq;
+      bbp->mseq = rbuf_dp->mseq;
+      bbp->n1 = rbuf_dp->seq->n1;
+#ifdef DEBUG
+      bbp->adler32_crc = rbuf_dp->seq->adler32_crc;
+#endif
+      /* rbuf_dp->best_save is set after a rbuf_dp is entered into best_str */
+      if (rbuf_dp->best_save) {
+	/* a previous rbuf_dp->seq is in best_str at best_save */
+	if (rbuf_dp->best_save->seq == rbuf_dp->seq) {
+	  /* the best_save->seq matches the rbuf_dp->seq */
+	  bbp->bbp_link = rbuf_dp->best_save;
+	  /* bbp_link tells where this ->seq can be found */
+	}
+	else {
+	  bbp->bbp_link = NULL;
+	}
+      }
+      rbuf_dp->best_save = bbp;
+      lib_bhead_p->hdr.have_best_save = 1;
+      bbp->zscore = zscore;
+      bbp->frame = rbuf_dp->frame;
+    }
+  }
+
+  /* process results for statistics */
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  while (buf2_cnt--) { /* count down the number of results */
+    rbuf_dp = lib_buf2_dp++;	/* step through the results buffer */
+    rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
+
+    if (!rbuf_rp->is_valid_stat) { continue;}
+
+
+    if (use_shuff) {
+      i_score = rbuf_rp->r_rst.score[sc_ix];
+    e_score = rbuf_rp->r_rst.escore;
+    }
+    else {
+      i_score = rbuf_rp->rst.score[sc_ix];
+      e_score = rbuf_rp->rst.escore;
+    }
+
+    if (rbuf_dp->stats_idx >= MAX_STATS || nstats > MAX_STATS) {
+      fprintf(stderr, "*** error [%s:%d] - nstats index [%d] out of range [%d,%d]\n",
+	      __FILE__, __LINE__,
+	      rbuf_dp->stats_idx, nstats,MAX_STATS);
+      continue;
+    }
+
+    if (nstats < MAX_STATS) {
+      /* this logic allows stats_idx to be over-ruled for searches
+	 where every query does not generate a score */
+      rbuf_dp->stats_idx = nstats;
+      nstats++;
+    }
+
+    if (stats_done && histp) {
+      addhistz(find_z(i_score, e_score, rbuf_dp->seq->n1, (double) rbuf_rp->rst.comp,
+		      *pstat_voidp), histp);
+    }
+
+    if (rbuf_dp->stats_idx < 0) {
+      continue;
+    }
+
+    sstats++;
+    stats[rbuf_dp->stats_idx].n1 = rbuf_dp->seq->n1;
+    stats[rbuf_dp->stats_idx].comp = rbuf_rp->rst.comp;
+    stats[rbuf_dp->stats_idx].H = rbuf_rp->rst.H;
+    stats[rbuf_dp->stats_idx].escore  = e_score;
+    stats[rbuf_dp->stats_idx].score = i_score;
+  }
+
+
+  /* fill the qstats[] array if m_msp->qshuffle */
+  if (m_msp->qshuffle && nqstats < m_msp->shuff_max) {
+    lib_buf2_dp = lib_bhead_p->buf2_data;
+    lib_buf2_rp = lib_bhead_p->buf2_res;
+    buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+    while (buf2_cnt--) {
+      rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
+      rbuf_dp = lib_buf2_dp++;
+
+      if (rbuf_rp->is_valid_stat && rbuf_rp->qr_score > 0
+	  && nqstats < m_msp->shuff_max) {
+	qstats[nqstats].n1 = rbuf_dp->seq->n1;	/* save the best score */
+	qstats[nqstats].comp =  rbuf_rp->rst.comp;
+	qstats[nqstats].H = rbuf_rp->rst.H;
+	qstats[nqstats].escore = rbuf_rp->qr_escore;
+	qstats[nqstats++].score = rbuf_rp->qr_score;
+      }
+    }	/* m_msp->qshuffle */
+  }
+
+  /* check if we have enough data to do stats */
+  if (!stats_done && nstats >= MAX_STATS) {
+    zsflag_save = ppst->zsflag;
+    if (ppst->zsflag > 20) {
+      ppst->zsflag -= 20;
+    }
+    ppst->zsflag_f = process_hist(stats,nstats,m_msp, ppst,
+				  histp, pstat_voidp,s_info, 0);
+    ppst->zsflag = zsflag_save;
+    kstats = nstats;
+    stats_done = 1;
+    for (i=0; i< nstats; i++) {
+      bestp_arr[i]->zscore =
+	find_z(bestp_arr[i]->rst.score[ppst->score_ix],
+	       bestp_arr[i]->rst.escore, bestp_arr[i]->seq->n1,
+	       bestp_arr[i]->rst.comp, *pstat_voidp);
+    }
+  }
+
+}
+
+void
+save_shuf(struct buf_head *lib_bhead_p, int nitt1, int shuff_max, int sc_ix,
+	  struct score_count_s *s_info)
+{
+  struct buf2_data_s *rbuf_dp, *lib_buf2_dp;
+  struct buf2_res_s *rbuf_rp, *lib_buf2_rp;
+  int t_valid_stat;
+  int t_rbest;
+  double t_rescore;
+  int buf2_cnt, jstats;
+  static int kstats=0;
+
+
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+
+  s_info->s_cnt[0] += lib_bhead_p->s_cnt_info.s_cnt[0];
+  s_info->s_cnt[1] += lib_bhead_p->s_cnt_info.s_cnt[1];
+  s_info->s_cnt[2] += lib_bhead_p->s_cnt_info.s_cnt[2];
+
+  s_info->tot_scores += lib_bhead_p->s_cnt_info.tot_scores;
+  /* this is done because we are not using r_rst->valid_stat to limit selection of scores */
+  /*   s_info->s_cnt[sc_ix] = s_info->tot_scores; */
+
+  t_rbest = -BIGNUM;
+  t_valid_stat = 0;
+
+  while (buf2_cnt--) { /* count down the number of results */
+    rbuf_dp = lib_buf2_dp++;	/* step through the results buffer */
+    rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
+
+    /* perhaps should use explicit flag to indicate no score */
+    if (rbuf_rp->r_rst.score[0] == -BIGNUM) continue;
+
+    if (rbuf_rp->r_rst.score[sc_ix] > t_rbest) {
+      t_rbest = rbuf_rp->r_rst.score[sc_ix];
+      t_rescore = rbuf_rp->r_rst.escore;
+    }
+
+    if (rbuf_rp->r_rst.valid_stat > t_valid_stat) {
+      t_valid_stat = 1;
+    }
+
+    /* statistics done for best score of set */
+    /* currently no check for rst->valid_stat, which causes
+       over-estimates of shuffles */
+
+    if (rbuf_dp->frame == nitt1) {
+      if (t_valid_stat) {
+	if (nrstats < shuff_max ) { kstats = jstats = nrstats++; }
+	else {	/* randomly replace */
+	  jstats = my_nrand(++kstats,rand_state);
+	  if (jstats >= shuff_max) goto done;
+	}
+
+	rstats[jstats].n1 = rbuf_dp->seq->n1;
+	rstats[jstats].comp = rbuf_rp->r_rst.comp;
+	rstats[jstats].H = rbuf_rp->r_rst.H;
+	rstats[jstats].escore  = t_rescore;
+	rstats[jstats].score = t_rbest;
+      done:
+	t_rbest = -BIGNUM;
+      }
+    }
+  }
+}
+
+int
+save_align(struct buf_head *lib_bhead_p, struct beststr **bestp_arr)
+{
+  struct buf2_ares_s *rbuf_ap, *lib_buf2_ap;
+  int buf2_cnt;
+
+  if (!lib_bhead_p->hdr.have_results || lib_bhead_p->hdr.buf2_cnt <= 0) return 0;
+
+  lib_buf2_ap = lib_bhead_p->buf2_ares;
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+
+  while (buf2_cnt-- > 0) { /* count down the number of results */
+    rbuf_ap = lib_buf2_ap++;	/* step through the results buffer */
+    if (bestp_arr[rbuf_ap->best_idx]->a_res == NULL) {
+      bestp_arr[rbuf_ap->best_idx]->have_ares = rbuf_ap->have_ares;
+      bestp_arr[rbuf_ap->best_idx]->a_res = rbuf_ap->a_res;
+    }
+#ifdef DEBUG
+    else {
+      fprintf(stderr,"*** error [%s:%d] - attempt to re-save a_res for [%d]: %s\n",
+	      __FILE__, __LINE__, rbuf_ap->best_idx, bestp_arr[rbuf_ap->best_idx]->mseq->bline);
+    }
+#endif
+  }
+
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  lib_bhead_p->hdr.have_results = 0;
+  lib_bhead_p->hdr.buf2_cnt = 0;
+  return buf2_cnt;
+}
+
+/* buf_do_work fills in the lib_bhead_p->buf2_res[] array with the
+   do_work() results,
+
+   inputs:  **aa0, n0 (query)
+            lib_bhead_p->buf2_data lib_bhead_p->hdr.buf2_cnt library sequences
+	    max_frame (used to set statistics info)
+	    ppst,
+	    void *f_struct prepared by init_work()
+
+   results: lib_bhead_p->buf2_res[]
+
+            included in buf2_res[] is use_stat, which captures the
+            logic required to decide whether a value should be saved
+            in the stats[] buffer.  This complexity mostly arises
+            because there can be more scores than sequences, but there
+            can only on statistics score per sequence (the best score).
+*/
+void
+buf_do_work(unsigned char **aa0,  int n0,
+	    struct buf_head *lib_bhead_p,
+	    int max_frame,
+	    struct pstruct *ppst, void **f_str) {
+
+  int buf2_cnt;
+  unsigned long atmp;
+  struct buf2_data_s *lib_buf2_dp;
+  struct buf2_res_s *lib_buf2_rp, *t_best_rp;
+  int t_best, sc_ix, i;
+  double t_escore;
+
+  sc_ix = ppst->score_ix;
+
+  lib_bhead_p->s_cnt_info.s_cnt[0] = lib_bhead_p->s_cnt_info.s_cnt[1] =
+    lib_bhead_p->s_cnt_info.s_cnt[2] = lib_bhead_p->s_cnt_info.tot_scores = 0;
+
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+
+  t_best_rp = NULL;
+  t_best = -BIGNUM;
+  t_escore = 1000.0;
+
+  while (buf2_cnt-- > 0) {
+
+    lib_buf2_rp->rst.score[0] =
+      lib_buf2_rp->rst.score[1] =
+      lib_buf2_rp->rst.score[2] = -BIGNUM;
+
+    lib_buf2_rp->is_valid_stat = 0;
+
+    if (lib_buf2_dp->seq->n1 < ppst->n1_low ||
+	lib_buf2_dp->seq->n1 > ppst->n1_high ) {
+      /* tells save_best() there is no stats score here -- not
+	 necessary as -BIGNUM indicates no score */
+      lib_buf2_dp->stats_idx = -1;
+      goto next_seq;
+    }
+
+#ifdef DEBUG
+    if (check_seq_range(lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
+			ppst->nsqx, "buf_do_work()")) {
+      fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_work] range error at: %d/%d (n1:%d)\n",
+	      __FILE__, __LINE__,
+	      prog_func,lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
+	      lib_bhead_p->hdr.buf2_cnt, lib_buf2_dp->seq->n1);
+      goto next_seq;
+    };
+
+    /* also check for adler32_crc match */
+    if (lib_buf2_dp->seq->adler32_crc != (atmp=adler32(1L,lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1))) {
+      fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_work] CRC error [%lu!=%lu] at: %d/%d (n1:%d/l_offset:%ld)\n",
+	      __FILE__, __LINE__,
+	      prog_func,lib_buf2_dp->seq->adler32_crc, atmp,
+	      lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
+	      lib_bhead_p->hdr.buf2_cnt,lib_buf2_dp->seq->n1,
+	      lib_buf2_dp->seq->l_offset);
+
+      goto next_seq;
+    }
+#endif
+
+    do_work (aa0[lib_buf2_dp->frame], n0,
+	     lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
+	     lib_buf2_dp->frame, ppst, f_str[lib_buf2_dp->frame], 0, 0,
+	     &(lib_buf2_rp->rst), &(lib_bhead_p->s_cnt_info));
+
+    if (lib_buf2_rp->rst.valid_stat) {
+      if (lib_buf2_rp->rst.escore < t_escore) {
+	t_escore = lib_buf2_rp->rst.escore;
+	t_best_rp = lib_buf2_rp;
+      }
+      if (lib_buf2_rp->rst.score[sc_ix] > t_best) {
+	t_best = lib_buf2_rp->rst.score[sc_ix];
+	t_best_rp = lib_buf2_rp;
+      }
+    }
+
+    if (lib_buf2_dp->frame == max_frame) {
+      if (t_best_rp!=NULL) {
+	t_best_rp->is_valid_stat = 1;
+	t_best_rp = NULL;
+      }
+      t_best = -BIGNUM;
+      t_escore = 1000.0;
+    }
+
+  next_seq:
+    lib_buf2_dp++;
+    lib_buf2_rp++;
+  }
+
+  /* place to produce z_scores */
+  /* place to produce sorted array */
+
+  lib_bhead_p->hdr.have_results = 1;
+}
+
+void
+buf_do_align(unsigned char **aa0,  int n0,
+	     struct buf_head *lib_bhead_p,
+	     struct pstruct *ppst, const struct mngmsg *m_msp,
+	     void **f_str) {
+
+  int buf2_cnt, i, nsq;
+  struct buf2_data_s *lib_buf2_dp;
+  struct buf2_res_s *lib_buf2_rp;
+  struct buf2_ares_s *lib_buf2_ap;
+  struct rstruct rst;
+
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+  lib_buf2_ap = lib_bhead_p->buf2_ares;
+
+  while (buf2_cnt-- > 0) {
+    if ( m_msp->stages > 1) {
+      /* this is not typically done unless m_msp->stages > 1 */
+      do_opt (aa0[lib_buf2_dp->frame], n0, lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
+	      lib_buf2_dp->frame, ppst, f_str[lib_buf2_dp->frame], &rst);
+      lib_buf2_rp->rst.score[2]=rst.score[2];
+    }
+
+#ifdef DEBUG
+    if (lib_buf2_dp->seq->aa1b == NULL) {
+      fprintf(stderr,"*** error [%s:%d] - [buf_do_align] null aa1b\n",__FILE__, __LINE__);
+      lib_buf2_ap->a_res = NULL;
+      break;
+    }
+    if (check_seq_range(lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
+			ppst->nsqx, "buf_do_align()")) {
+      fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_align] range error at: %d/%d (n1:%d)\n",
+	      __FILE__, __LINE__,
+	      prog_func,lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
+	      lib_bhead_p->hdr.buf2_cnt, lib_buf2_dp->seq->n1);
+    };
+
+    /* also check for adler32_crc match */
+    if (lib_buf2_dp->seq->adler32_crc != adler32(1L,lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1)) {
+      fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_align] CRC error at: %d/%d (n1:%d)\n",
+	      __FILE__, __LINE__,
+	      prog_func,lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
+	      lib_bhead_p->hdr.buf2_cnt, lib_buf2_dp->seq->n1);
+    }
+#endif
+
+    lib_buf2_ap->a_res = build_ares_code(aa0[lib_buf2_dp->frame], m_msp->n0,
+					 lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq,
+					 lib_buf2_dp->frame, &lib_buf2_ap->have_ares,
+					 lib_buf2_dp->repeat_thresh, m_msp, ppst, f_str[lib_buf2_dp->frame] );
+
+    lib_buf2_dp++;
+    lib_buf2_ap++;
+    lib_buf2_rp++;
+  }
+  lib_bhead_p->hdr.have_results = 1;
+}
+
+void
+buf_qshuf_work(unsigned char *aa0s,  int n0,
+	       struct buf_head *lib_bhead_p,
+	       int max_frame,
+	       struct pstruct *ppst, void *qf_str,
+	       int ix_score)
+{
+  int buf2_cnt;
+  struct buf2_data_s *lib_buf2_dp;
+  struct buf2_res_s *lib_buf2_rp, *tq_best_rp;
+  struct rstruct rrst;
+  struct score_count_s q_scnt_info;
+  int tq_best;
+  double tq_escore;
+
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+
+  tq_best_rp = NULL;
+  tq_best = -BIGNUM;
+  tq_escore = 1000.0;
+
+  while (buf2_cnt-- > 0) {
+    rrst.score[0] = rrst.score[1] = rrst.score[2] = -BIGNUM;
+    rrst.valid_stat = 0;
+
+    if (lib_buf2_dp->seq->n1 < ppst->n1_low ||
+	lib_buf2_dp->seq->n1 > ppst->n1_high ) {
+      lib_buf2_dp++;
+      lib_buf2_rp++;
+      tq_best_rp = NULL;
+      tq_best = -BIGNUM;
+      tq_escore = 1000.0;
+      continue;
+    }
+
+    do_work (aa0s, n0,
+	     lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
+	     lib_buf2_dp->frame, ppst, qf_str, 1, 0,
+	     &rrst, &q_scnt_info);
+
+    /* buf_qshuf_work() is always called after buf_do_work(), which
+       sets rp->is_valid_stat */
+    if (lib_buf2_rp->is_valid_stat) {
+      tq_best_rp = lib_buf2_rp;
+    }
+
+    if (rrst.escore < tq_escore) {
+      tq_escore = rrst.escore;
+    }
+    if (rrst.score[ix_score] > tq_best) {
+      tq_best = rrst.score[ix_score];
+    }
+
+    if (lib_buf2_dp->frame == max_frame) {
+      if (tq_best_rp!=NULL) {
+	tq_best_rp->qr_score = tq_best;
+	tq_best_rp->qr_escore = tq_escore;
+	tq_best_rp = NULL;
+      }
+#ifdef DEBUG
+      else {
+	fprintf(stderr,"*** error [%s:%d] - tq_best_rp NULL at: %ld\n",
+		__FILE__, __LINE__, lib_buf2_rp - lib_bhead_p->buf2_res);
+      }
+#endif
+      tq_best = -BIGNUM;
+      tq_escore = 1000.0;
+    }
+    lib_buf2_dp++;
+    lib_buf2_rp++;
+  }
+}
+
+void
+buf_shuf_work(unsigned char **aa0,  int n0, unsigned char *aa1s, struct buf_head *lib_bhead_p,
+	      int max_frame, struct pstruct *ppst, void **f_str,
+	      int ix_score, void *rand_state)
+{
+  int buf2_cnt;
+  int shuff_cnt;
+  struct buf2_data_s *lib_buf2_dp;
+  struct buf2_res_s *lib_buf2_rp, *tr_best_rp;
+  int tr_best, sc_ix;
+  double tr_escore;
+
+  sc_ix = ppst->score_ix;
+
+  lib_bhead_p->s_cnt_info.s_cnt[0] = lib_bhead_p->s_cnt_info.s_cnt[1] =
+    lib_bhead_p->s_cnt_info.s_cnt[2] = lib_bhead_p->s_cnt_info.tot_scores = 0;
+
+  shuff_cnt = 0;
+  buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_rp = lib_bhead_p->buf2_res;
+
+  tr_best_rp = NULL;
+  tr_best = -BIGNUM;
+  tr_escore = 1000.0;
+
+  while (buf2_cnt-- > 0) {
+    lib_buf2_rp->r_rst.score[0] = lib_buf2_rp->r_rst.score[1] =
+      lib_buf2_rp->r_rst.score[2] = -BIGNUM;
+    lib_buf2_rp->r_rst.valid_stat = lib_buf2_rp->is_valid_stat = 0;
+
+    if ((lib_buf2_dp->stats_idx < 0) || lib_buf2_dp->seq->n1 < ppst->n1_low ||
+	lib_buf2_dp->seq->n1 > ppst->n1_high ) {
+      lib_buf2_dp++;
+      lib_buf2_rp++;
+      tr_best_rp = NULL;
+      tr_best = -BIGNUM;
+      tr_escore = 1000.0;
+      continue;
+    }
+
+    shuff_cnt++;
+    if (ppst->zs_win > 0) {
+      wshuffle(lib_buf2_dp->seq->aa1b,aa1s,lib_buf2_dp->seq->n1,ppst->zs_win, rand_state);
+    }
+    else {
+      if (ppst->shuffle_dna3) {shuffle3(lib_buf2_dp->seq->aa1b,aa1s,lib_buf2_dp->seq->n1, rand_state);}
+      else {shuffle(lib_buf2_dp->seq->aa1b,aa1s,lib_buf2_dp->seq->n1, rand_state);}
+    }
+
+    /* rshuffle(lib_buf2_dp->seq->aa1b,aa1s,lib_buf2_dp->seq->n1); */
+
+#ifdef DEBUG
+    if (check_seq_range(aa1s, lib_buf2_dp->seq->n1,
+			ppst->nsqx, "buf_do_align()")) {
+      fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_shuff] range error at: %d/%d (n1:%d)\n",
+	      __FILE__, __LINE__,
+	      prog_func,lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
+	      lib_bhead_p->hdr.buf2_cnt, lib_buf2_dp->seq->n1);
+    };
+#endif
+
+    do_work (aa0[lib_buf2_dp->frame], n0,
+	     aa1s, lib_buf2_dp->seq->n1,
+	     lib_buf2_dp->frame, ppst, f_str[lib_buf2_dp->frame], 0, 1,
+	     &lib_buf2_rp->r_rst, &(lib_bhead_p->s_cnt_info));
+
+    if (lib_buf2_rp->r_rst.valid_stat) {
+      if (lib_buf2_rp->r_rst.escore < tr_escore) {
+	tr_escore = lib_buf2_rp->r_rst.escore;
+	tr_best_rp = lib_buf2_rp;
+      }
+      if (lib_buf2_rp->r_rst.score[sc_ix] > tr_best) {
+	tr_best = lib_buf2_rp->r_rst.score[sc_ix];
+	tr_best_rp = lib_buf2_rp;
+      }
+    }
+
+    if (lib_buf2_dp->frame == max_frame) {
+      if (tr_best_rp!=NULL) {
+	tr_best_rp->is_valid_stat = 1;
+	tr_best_rp = NULL;
+      }
+      tr_best = -BIGNUM;
+      tr_escore = 1000.0;
+    }
+
+    lib_buf2_dp++;
+    lib_buf2_rp++;
+  }
+  lib_bhead_p->hdr.shuff_cnt = shuff_cnt;
+  lib_bhead_p->hdr.have_results = 1;
+}
+
+/* buf_shuf_seq is designed to:
+   (1) take a list of sequences (specified by bptr[])
+   (2) collect them from the database if they are not already available
+   (3) send them to the threads or shuffle them directly and calculate scores
+*/
+
+void
+buf_shuf_seq(unsigned char **aa0, int n0,
+	     unsigned char **aa1shuff_b, unsigned char *aa1save, int maxn,
+	     struct beststr **bestp_arr, int nbest,
+	     struct pstruct *ppst, struct mngmsg *m_msp,
+	     struct mng_thr *m_bufi_p
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+	     , void **f_str
+#endif
+	     , struct score_count_s *s_info)
+{
+  unsigned char *aa1shuff;
+  struct beststr *bbp, **tmp_bestp;
+  char l_bline[MAX_SSTR];
+  int n1lib_req, shuff_mult;
+  long loffset, l_off;
+  int n1, itt;
+  int max_do_cnt, ndiff, prev_index;
+  int istats;
+  int i, j;
+
+  /* these variables track buffers of library sequences */
+  int cur_buf_size, max_buf_size;
+  struct buf_head *lib_bhead_p;
+  struct buf2_data_s *lib_buf2_dp;
+  struct buf2_res_s *lib_buf2_rp;
+
+/* (1) get the sequences into a buffer - the sequence information is
+   currently in the bestp_arr - find out how many we have, and how
+   many we will need - the number to shuffle */
+
+/* figure out how much space we need, first checking whether we have
+   dups */
+  if ((tmp_bestp = (struct beststr **)calloc(nbest, sizeof(struct beststr *)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - %s/buf_shuf_seq() *** cannot allocate tmp_bestp[%d]\n",
+	    __FILE__, __LINE__, prog_name, nbest);
+    exit(1);
+  }
+  for (i = 0; i < nbest; i++) {
+    tmp_bestp[i] = bestp_arr[i];
+  }
+
+  /* sort tmp_bestp[] by sequence index, so duplicates are adjacent */
+  sortbesti(tmp_bestp, nbest);
+
+  /* count number of different sequence indices, get required space
+     without dups */
+  prev_index = -1;
+  n1lib_req = ndiff = 0;
+  for (i = 0; i < nbest; i++) {
+    if (tmp_bestp[i]->seq->index > prev_index) {
+      prev_index = tmp_bestp[i]->seq->index;
+      n1lib_req += tmp_bestp[i]->n1+ 2;
+      ndiff++;
+    }
+  }
+
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+      if (n1lib_req >= maxn) { /* we need new space, aa1shuff is too small */
+	if ((*aa1shuff_b = aa1shuff =
+	     (unsigned char *)realloc(*aa1shuff_b, n1lib_req*sizeof(char)))==NULL) {
+	  fprintf(stderr,"*** error [%s:%d] - cannot realloc aa1shuff[%d]\n",
+		  __FILE__, __LINE__, n1lib_req);
+	  exit(1);
+	}
+      }
+      else { aa1shuff = *aa1shuff_b;}
+      *aa1shuff = '\0';
+      aa1shuff++;
+
+#else
+      if (n1lib_req < 2) {
+	fprintf(stderr,"*** error [%s:%d] - [%s/buf_shuf_seq] no residues to shuffle: %d (%d)\n",
+		__FILE__, __LINE__,
+		prog_func,n1lib_req,ndiff);
+	exit(1);
+      }
+
+      if ((*aa1shuff_b = aa1shuff =
+	   (unsigned char *)calloc(n1lib_req,sizeof(char)))==NULL) {
+	fprintf(stderr,"*** error [%s:%d] - cannot calloc aa1shuff[%d]\n",
+		__FILE__, __LINE__, n1lib_req);
+	exit(1);
+      }
+      *aa1shuff = '\0';
+      aa1shuff++;
+#endif
+
+      shuff_mult = (m_msp->shuff_max+1)/ndiff;
+      istats = 0;
+
+      /* setup lib_bhead buffers for shuffle comparisons */
+#if defined(COMP_THR) || defined(PCOMPLIB)	/* threaded/parallel */
+      /* max_do_cnt can be smaller than max_buf2_cnt, but not larger */
+      max_do_cnt = min(m_bufi_p->max_buf2_res,
+		       m_msp->shuff_max / (2 * fa_max_workers));
+      /* we don't have a left over one, so we need one */
+      get_rbuf(&lib_bhead_p,m_bufi_p->max_work_buf);
+#else	/* not threaded */
+      max_do_cnt = m_bufi_p->max_buf2_res;
+      lib_bhead_p = lib_buf2_list;  /* equivalent to un-threaded get_rbuf() */
+#endif
+      max_buf_size = n1lib_req;
+      cur_buf_size = 0;
+      lib_bhead_p->hdr.buf2_cnt = 0;
+      lib_bhead_p->hdr.have_results = 0;
+      lib_bhead_p->hdr.stop_work = 0;
+      lib_bhead_p->hdr.buf2_type=BUF2_DOSHUF;
+      lib_bhead_p->hdr.seq_record_continuous = 0;
+      lib_buf2_dp = lib_bhead_p->buf2_data;
+      lib_buf2_rp = lib_bhead_p->buf2_res;
+
+      /* read sequences into shuffle buffer */
+
+      for (i = 0; i < ndiff; i++) {
+	bbp = tmp_bestp[i];
+	if (bbp->seq->aa1b == NULL) {
+	  /* get the sequence */
+	  (bbp->mseq->m_file_p->ranlib)(l_bline, sizeof(l_bline),
+				       bbp->mseq->lseek,bbp->mseq->libstr,bbp->mseq->m_file_p);
+	  n1 = re_getlib(aa1save,NULL, maxn,m_msp->ldb_info.maxt3,
+			 m_msp->ldb_info.l_overlap,bbp->mseq->cont,m_msp->ldb_info.term_code,
+			 &loffset,&l_off,bbp->mseq->m_file_p);
+
+	  /* fprintf(stderr, " %d gets %d %d\n",i,tmp_bestp[i]->seq->n1,n1); */
+
+	  memcpy(aa1shuff, aa1save, n1+1);
+	  bbp->seq->aa1b = aa1shuff;
+	  aa1shuff += n1 + 1;
+	}
+
+	/* lib_buf2_dp is used up by scores, the sequence is not sent multiple times */
+	cur_buf_size += bbp->seq->n1+1;
+	for (j = 0; j < shuff_mult; j++ ) {
+	  for (itt = m_msp->revcomp; itt <= m_msp->nitt1; itt++) {
+#ifdef PCOMPLIB
+	    lib_buf2_dp->seq_dup = 0;	/* mark first ->seq as original, not duplicate */
+#endif
+	    lib_buf2_dp->seq = bbp->seq;
+	    /* this invalidates lib_buf2_p->seq */
+	    lib_buf2_dp->stats_idx = istats++;
+	    lib_buf2_dp->frame = itt;
+	    lib_buf2_dp++;		/* point to next buf2 */
+	    lib_buf2_rp++;		/* point to next buf2 */
+	    lib_bhead_p->hdr.buf2_cnt++;
+
+	    if (lib_bhead_p->hdr.buf2_cnt >= max_do_cnt ||
+		cur_buf_size >= max_buf_size) {
+/* (2) send sequences for shuffling */
+#if defined(COMP_THR) || defined(PCOMPLIB)	/* threaded - fill and empty buffers */
+	      /* provide empty buffer to workers */
+	      lib_bhead_p->hdr.aa1b_used = cur_buf_size;
+	      lib_bhead_p->hdr.have_data = 1;
+	      lib_bhead_p->hdr.seq_record_continuous = 0;
+	      put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
+	      get_rbuf(&lib_bhead_p,m_bufi_p->max_work_buf);
+#else		/* non-thread - just do the searches */
+	      if (lib_bhead_p->hdr.buf2_type & BUF2_DOSHUF) {
+		buf_shuf_work(aa0,m_msp->n0, aa1save, lib_bhead_p,
+			      m_msp->nitt1, ppst, f_str, ppst->score_ix, rand_state);
+	      }
+#endif
+/* (3) save results in the rstats structure */
+	      if (lib_bhead_p->hdr.buf2_cnt > 0 && lib_bhead_p->hdr.have_results) {
+		save_shuf(lib_bhead_p,m_msp->nitt1,m_msp->shuff_max,ppst->score_ix,s_info);
+	      }
+
+	      lib_bhead_p->s_cnt_info.s_cnt[0] = lib_bhead_p->s_cnt_info.s_cnt[1] =
+		lib_bhead_p->s_cnt_info.s_cnt[2] = lib_bhead_p->s_cnt_info.tot_scores = 0;
+
+	      lib_bhead_p->hdr.buf2_cnt = 0;
+	      cur_buf_size = 0;
+	      lib_bhead_p->hdr.have_results = 0;
+	      lib_bhead_p->hdr.buf2_type=BUF2_DOSHUF;
+	      lib_bhead_p->hdr.seq_record_continuous = 0; /* seq_records are coming from bestptr in any order */
+	      lib_bhead_p->hdr.stop_work = 0;
+	      lib_buf2_dp = lib_bhead_p->buf2_data;
+	    }
+	  } /* for (itt .. */
+	}
+      }			/* done with tmp_bestp[] */
+      free(tmp_bestp);
+
+#if defined(COMP_THR) || defined(PCOMPLIB)	/* if COMP_THR/PCOMPLIB - fill and empty buffers */
+      /* check last buffers for any results */
+      lib_bhead_p->hdr.seq_record_continuous = 0;
+      put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
+
+      /* wait for the threads to finish */
+
+      wait_rbuf(m_bufi_p->max_work_buf);
+      /*
+      fprintf(stderr, " num_reader[%d]-empty[%d]: %d\tnrstats: %d\n",
+	      num_reader_bufs,empty_reader_bufs,
+	      num_reader_bufs-empty_reader_bufs, nrstats);
+      */
+
+      for (i=0; i < num_reader_bufs; i++) {
+	if (RESULTS_BUF[i]->hdr.buf2_cnt > 0 && RESULTS_BUF[i]->hdr.have_results) {
+	  save_shuf(RESULTS_BUF[i],m_msp->nitt1, m_msp->shuff_max, ppst->score_ix, s_info);
+	  RESULTS_BUF[i]->hdr.buf2_cnt = RESULTS_BUF[i]->hdr.have_results = 0;
+	}
+      }
+#else	/* just do the searches */
+      /* aa1save is used for shuffles, not aa1shuf, because aa1shuf
+	 has library sequences */
+      buf_shuf_work(aa0,m_msp->n0, aa1save, lib_bhead_p,
+		    m_msp->nitt1, ppst, f_str, ppst->score_ix, rand_state);
+
+      save_shuf(lib_bhead_p,m_msp->nitt1,m_msp->shuff_max, ppst->score_ix, s_info);
+      lib_bhead_p->hdr.buf2_cnt = lib_bhead_p->hdr.have_results = 0;
+#endif
+}
+
+/* buf_align_seq is structurally almost identical to buf_shuf_seq,
+   except that the appropriate sequences are pre-loaded into bbp->seq
+   (and ->bline), and it gets bbp->a_res, rather than scores */
+
+void
+buf_align_seq(unsigned char **aa0, int n0,
+	      struct beststr **bestp_arr, int nbest,
+	      struct pstruct *ppst, struct mngmsg *m_msp,
+	      struct mng_thr *m_bufi_p
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+	      , void **f_str
+#endif
+	     )
+{
+  struct beststr *bbp;
+  int max_align_cnt;
+  int i, n_pre_align;
+  int cur_buf_size, max_buf_size;
+  struct buf_head *lib_bhead_p;
+  struct buf2_data_s *lib_buf2_dp;
+  struct buf2_ares_s *lib_buf2_ap;
+
+  /* setup lib_bhead buffers for alignments */
+#if defined(COMP_THR) || defined(PCOMPLIB)	/* threaded */
+  /* max_do_cnt can be smaller than max_buf2_res, but not larger */
+#ifdef COMP_THR
+  max_align_cnt = min(m_bufi_p->max_buf2_res,
+		      nbest / (4 * fa_max_workers));
+#else
+  max_align_cnt = min(m_bufi_p->max_buf2_res, nbest / fa_max_workers);
+#endif
+  if (max_align_cnt < 1) max_align_cnt = 1;
+
+  get_rbuf(&lib_bhead_p,m_bufi_p->max_work_buf);
+#else	/* not threaded */
+  max_align_cnt = m_bufi_p->max_buf2_res;
+  lib_bhead_p = lib_buf2_list;  /* equivalent to un-threaded get_rbuf() */
+#endif
+
+  max_buf_size = lib_bhead_p->hdr.aa1b_size;
+  lib_bhead_p->hdr.buf2_cnt = 0;
+  lib_bhead_p->hdr.have_results = 0;
+  lib_bhead_p->hdr.stop_work = 0;
+  lib_bhead_p->hdr.buf2_type=BUF2_DOALIGN;
+  lib_buf2_dp = lib_bhead_p->buf2_data;
+  lib_buf2_ap = lib_bhead_p->buf2_ares;
+
+  /* read sequences into align buffer */
+
+  n_pre_align = 0;
+  cur_buf_size = 0;
+  for (i = 0; i < nbest; i++) {
+    bbp = bestp_arr[i];
+
+    /* this invalidates lib_buf2_p->seq */
+    lib_buf2_dp->seq = bbp->seq;
+    cur_buf_size += bbp->seq->n1+1;
+    lib_buf2_dp->frame = bbp->frame;
+    lib_buf2_dp->repeat_thresh = bbp->repeat_thresh;
+#ifdef PCOMPLIB
+    lib_buf2_dp->seq_dup = 0;
+#endif
+    lib_buf2_ap->have_ares = 0;
+    lib_buf2_ap->a_res = NULL;
+    lib_buf2_ap->best_idx = i;
+    lib_buf2_dp++;		/* point to next buf2_data */
+    lib_buf2_ap++;		/* point to next buf2_ares */
+    lib_bhead_p->hdr.buf2_cnt++;
+
+    if (lib_bhead_p->hdr.buf2_cnt >= max_align_cnt ||
+	cur_buf_size >= max_buf_size - m_msp->ldb_info.maxn) {
+/* (2) send sequences for alignment */
+#if defined(COMP_THR) || defined(PCOMPLIB)	/* threaded - fill and empty buffers */
+      /* provide empty buffer to workers */
+      lib_bhead_p->hdr.seqr_cnt = lib_bhead_p->hdr.buf2_cnt;	/* for alignments, they are the same */
+      lib_bhead_p->hdr.have_data = 1;
+      lib_bhead_p->hdr.aa1b_used = cur_buf_size;
+      lib_bhead_p->hdr.seq_record_continuous = 0;
+      put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
+      get_rbuf(&lib_bhead_p,m_bufi_p->max_work_buf);
+#else		/* non-thread - just do the searches */
+      buf_do_align(aa0, m_msp->n0, lib_bhead_p, ppst, m_msp, f_str);
+#endif
+
+/* (3) save alignments */
+      if (lib_bhead_p->hdr.buf2_cnt > 0 && lib_bhead_p->hdr.have_results) {
+	n_pre_align += save_align(lib_bhead_p,bestp_arr);
+      }
+
+      cur_buf_size = 0;
+      max_buf_size = lib_bhead_p->hdr.aa1b_size;
+      lib_bhead_p->hdr.buf2_cnt = 0;
+      lib_bhead_p->hdr.have_results = 0;
+      lib_bhead_p->hdr.buf2_type=BUF2_DOALIGN;
+      lib_bhead_p->hdr.stop_work = 0;
+      lib_buf2_dp = lib_bhead_p->buf2_data;
+      lib_buf2_ap = lib_bhead_p->buf2_ares;
+    }
+  }			/* done with bestp_arr[] */
+
+#if defined(COMP_THR) || defined(PCOMPLIB)	/* if COMP_THR - fill and empty buffers */
+  /* check last buffers for any results */
+  lib_bhead_p->hdr.seqr_cnt = lib_bhead_p->hdr.buf2_cnt;	/* for alignments, they are the same */
+  lib_bhead_p->hdr.have_data = 1;
+  lib_bhead_p->hdr.aa1b_used = cur_buf_size;
+  lib_bhead_p->hdr.seq_record_continuous = 0;
+  put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
+
+  /* wait for the threads to finish */
+
+  wait_rbuf(m_bufi_p->max_work_buf);
+
+  for (i=0; i < num_reader_bufs; i++) {
+    if (RESULTS_BUF[i]->hdr.buf2_cnt > 0 && RESULTS_BUF[i]->hdr.have_results) {
+      n_pre_align += save_align(RESULTS_BUF[i],bestp_arr);
+      RESULTS_BUF[i]->hdr.buf2_cnt = RESULTS_BUF[i]->hdr.have_results = 0;
+    }
+  }
+#else	/* just do the searches */
+  buf_do_align(aa0, m_msp->n0, lib_bhead_p, ppst, m_msp, f_str);
+  n_pre_align += save_align(lib_bhead_p,bestp_arr);
+  lib_bhead_p->hdr.buf2_cnt = lib_bhead_p->hdr.have_results = 0;
+#endif
+
+  m_msp->align_done = 1;
+
+  if (n_pre_align != nbest) {
+    fprintf(stderr,"*** error [%s:%d] -  n_pre_align:%d != nbest: %d\n",
+	    __FILE__, __LINE__, n_pre_align, nbest);
+  }
+  for (i=0; i < nbest; i++) {
+    if (bestp_arr[i]->a_res == NULL) {
+      fprintf(stderr, "*** error [%s:%d] - have NULL a_res: %d\n",
+	      __FILE__, __LINE__, i);
+    }
+  }
+}
+
+int
+check_seq_range(unsigned char *aa1b, int n1, int nsq, char *str) {
+  int i, range_error;
+  unsigned char *aa1p;
+
+  range_error = 0;
+  for (aa1p = aa1b, i=0; i < n1; i++, aa1p++) {
+    if (*aa1p > nsq) {
+      range_error = 1;
+      /*      fprintf(stderr, "%s seq %d (%c) out of range at %d\n",
+	      str, *aa1p, *aa1p,i);
+      */
+    }
+  }
+  return range_error;
+}
+
+struct stack_str {
+  void **stack;
+  int size;
+  int inc;
+  int top;
+};
+
+struct stack_str *init_stack(int size, int inc) {
+  struct stack_str *stack;
+
+  if ((stack=(struct stack_str *)calloc(1,sizeof(struct stack_str)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - cannot allocate stack\n",
+	    __FILE__, __LINE__);
+    return NULL;
+  }
+
+  if ((stack->stack=(void *)calloc(size,sizeof(void *)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - cannot allocate stack->stack[%d]\n",
+	    __FILE__, __LINE__,size);
+    free(stack);
+    return NULL;
+  }
+
+  stack->size = size;
+  stack->inc = inc;
+  stack->top = 0;
+  return stack;
+}
+
+void push_stack(struct stack_str *stack, void *value) {
+
+  if (!stack) return;
+  if (stack->top >= stack->size) {
+    stack->size += stack->inc;
+    if ((stack->stack = (void *)realloc(stack->stack, stack->size*sizeof(void *)))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot re-allocate stack to [%d]\n",
+	      __FILE__, __LINE__, stack->size);
+      return;
+    }
+    /*
+    fprintf(stderr,"*** error [%s:%d] - stack corruption: %d >= %d\n",
+	    __FILE__, __LINE__, stack->top, stack->size);
+    return;
+    */
+  }
+  stack->stack[stack->top++] = value;
+}
+
+int get_stack_len(struct stack_str *stack) {
+  if (stack == NULL) {return 0;}
+  else {return stack->top;}
+}
+
+
+void * pop_stack(struct stack_str *stack) {
+  if (stack == NULL) {
+#ifdef DEBUG
+    fprintf(stderr," *** error [%s:%d] - pop_stack NULL stack\n",__FILE__, __LINE__);
+#endif
+    return NULL;
+  }
+
+  if (stack->top-- > 0) {
+    return stack->stack[stack->top];
+  }
+  else {
+    stack->top = 0;
+    return NULL;
+  }
+}
+
+void * free_stack(struct stack_str *stack) {
+  if (stack==NULL) return NULL;
+  if (stack->stack != NULL) free(stack->stack);
+  free(stack);
+  return NULL;
+}
+
+struct dyn_string_str *
+init_dyn_string(int size, int inc) {
+  struct dyn_string_str *dyn_string;
+
+  if ((dyn_string=(struct dyn_string_str *)calloc(1,sizeof(struct dyn_string_str)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - cannot allocate dyn_string\n",
+	    __FILE__, __LINE__);
+    return NULL;
+  }
+
+  if ((dyn_string->string=(void *)calloc(size,sizeof(void *)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - cannot allocate dyn_string->string[%d]\n",
+	    __FILE__, __LINE__,size);
+    free(dyn_string);
+    return NULL;
+  }
+
+  dyn_string->c_size = 0;
+  dyn_string->inc = inc;
+  dyn_string->mx_size = size;
+  return dyn_string;
+}
+
+void
+reset_dyn_string(struct dyn_string_str *dyn_string) {
+
+  memset(dyn_string->string,0,dyn_string->c_size);
+  dyn_string->c_size = 0;
+}
+
+void
+dyn_strcat(struct dyn_string_str *dyn_string, char *value) {
+  size_t add_len;
+
+  add_len = strlen(value);
+
+  if (!dyn_string) return;
+  if (add_len + dyn_string->c_size + 1 >= dyn_string->mx_size) {
+    while (dyn_string->inc < add_len) { dyn_string->inc *= 2; }
+    dyn_string->mx_size += dyn_string->inc;
+    if ((dyn_string->string = (void *)realloc(dyn_string->string, dyn_string->mx_size))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot re-allocate dyn_string to [%d]\n",
+	      __FILE__, __LINE__, dyn_string->mx_size);
+      dyn_string->mx_size = 0;
+      return;
+    }
+  }
+  SAFE_STRNCAT(dyn_string->string,value,dyn_string->mx_size);
+  dyn_string->c_size += add_len;
+}
+
+void dyn_strcpy(struct dyn_string_str *dyn_string, char *value) {
+  size_t add_len;
+
+  add_len = strlen(value);
+
+  if (!dyn_string) return;
+  if (add_len + 1>= dyn_string->mx_size) {
+    while (dyn_string->inc < add_len) { dyn_string->inc *= 2; }
+    dyn_string->mx_size += dyn_string->inc;
+    if ((dyn_string->string = (void *)realloc(dyn_string->string, dyn_string->mx_size))==NULL) {
+      fprintf(stderr,"*** error [%s:%d] - cannot re-allocate dyn_string to [%d]\n",
+	      __FILE__, __LINE__, dyn_string->mx_size);
+      dyn_string->mx_size = 0;
+      return;
+    }
+  }
+  SAFE_STRNCPY(dyn_string->string,value,dyn_string->mx_size);
+}
+
+void free_dyn_string(struct dyn_string_str *dyn_string) {
+  if (dyn_string==NULL) return;
+  if (dyn_string->string != NULL) free(dyn_string->string);
+  free(dyn_string);
+}
+
+#include "a_mark.h"
+
+struct domfeat_data *
+init_domfeat_data(const struct annot_str *annot_p) {
+  int i_ann;
+  struct domfeat_data *domfeats_head, *domfeats_current;
+
+  if ((domfeats_head = (struct domfeat_data *)calloc(annot_p->n_annot+1, sizeof(struct domfeat_data)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - calc_cons_u(): cannot allocate left_domain_list [%d]\n", __FILE__, __LINE__, annot_p->n_annot+1);
+    return NULL;
+  }
+
+  /* here we link potentially overlapping domains */
+  for (i_ann=0; i_ann < annot_p->n_annot; i_ann++) {
+    domfeats_head[i_ann].next = NULL;
+    domfeats_head[i_ann].end_pos = -1;
+    domfeats_head[i_ann].annot_entry_p = annot_p->annot_arr_p+i_ann;
+  }
+
+  return domfeats_head;
+}
+
+void
+close_annot_match (int ia, void *annot_stack, int *have_push_features_p,
+		   int *d_score_p, int *d_ident_p, int *d_alen_p, int *d_gaplen_p,
+		   struct domfeat_data **left_domain_head_p,
+		   long *left_end_p, int init_score) {
+
+  struct domfeat_data *this_dom_p;
+
+  for (this_dom_p = *left_domain_head_p; this_dom_p; this_dom_p = this_dom_p->next) {
+    if (ia > 0 && this_dom_p->end_pos > ia) {
+      break;
+    }
+    this_dom_p->score += *d_score_p;
+    this_dom_p->n_ident += *d_ident_p;
+    this_dom_p->n_alen += *d_alen_p;
+    this_dom_p->n_gaplen += *d_gaplen_p;
+    if (have_push_features_p) *have_push_features_p = 1;
+    push_stack(annot_stack, this_dom_p);
+  }
+
+  if (this_dom_p) {
+    *left_end_p = this_dom_p->end_pos;
+  }
+  else {*left_end_p = -1;}
+
+  *left_domain_head_p = this_dom_p;
+}
+
+/* next_annot_match()/process_annot_match() */
+
+/*
+   *itmp has the current alignment score, if *annot_arr[i_annot].label='V',
+     this can be increased (total increase in *v_delta)
+   *pam2aa0v[.value] gives possibly better pam score for variant
+   *ip is position in annotated sequence (&i0 for annot0_p)
+   *ia is position in aligned sequence (&i1 for annot0_p)
+   sp1 is the array for the (possibly modified) displayed sequence
+   sp1a is the array for the associated annotation
+   sq maps encoded residues to displayed characters
+   i_annot -- current annotation index in annot0_p->annot_arr_p[i_annot]
+   annot_arr = annot0/1_p->annot_arr_p
+   annot_stack = save current annotation
+   *have_push_features_p = set for annotations pushed in stack (not 'V')
+   *v_delta = change in score from variant at this position
+   **region_p = set for '[' region start
+   init_score -- used to initialize tmp_region_p->score.
+
+   currently, next_annot_match() is not called before alignment starts or after it ends
+*/
+
+/* process_annot_match processes a single annot_p->pos == ip entry,
+   returning 0 for a left_end == ip match (not using i_annot), or a 1
+   if an i_annot was consumed
+*/
+/* 
+   within the alignment, next_annot_match() provides the while() loop to
+   process several matches at the same location
+
+   before/after the alignment, different while() loops call
+   process_annot_match()
+*/
+
+int
+process_annot_match(int *itmp, int *pam2aa0v, 
+		    long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+		    struct annot_entry *annot_arr_p, int n_annots, char **ann_comment,
+		    void *annot_stack, int *have_push_features_p, int *v_delta,
+		    int *d_score_p, int *d_ident_p, int *d_alen_p, int *d_gaplen_p,
+		    struct domfeat_data **left_domain_head_p,
+		    struct domfeat_data *left_domain_p,
+		    long *left_end_p, int init_score) {
+  int v_tmp;
+  int new_left_domain_end;
+  struct domfeat_data *this_dom, *prev_dom, *new_dom;
+
+  if (*left_domain_head_p) {	/* do we have a domain_link_chain? */
+    *left_end_p = (*left_domain_head_p)->end_pos;
+  }
+    
+  if (ip == *left_end_p) { /* do this first before starting any new domains */
+    close_annot_match(ip, annot_stack, have_push_features_p, 
+		      d_score_p, d_ident_p, d_alen_p, d_gaplen_p,
+		      left_domain_head_p, left_end_p, init_score);
+    *d_ident_p = *d_alen_p = *d_gaplen_p = 0;
+    *d_score_p = init_score;
+    return 0;
+  }
+  else {
+    /* initialize domfeat_data (scoring, boundary) information */
+    left_domain_p->next = NULL;
+    left_domain_p->score = 0;
+    left_domain_p->n_ident = 0;
+    left_domain_p->n_alen = 0;
+    left_domain_p->pos = ip;
+    left_domain_p->a_pos = ia;
+
+    if (annot_arr_p->label == 'V') { /* label == 'V' */
+      v_tmp = pam2aa0v[annot_arr_p->value];
+      if (v_tmp > *itmp) {
+	*v_delta += (v_tmp- *itmp);
+	*itmp = v_tmp;
+	*sp1 = sq[annot_arr_p->value];
+	if (sp1a) *sp1a = 'V';
+	if (ann_comment) *ann_comment = annot_arr_p->comment;
+      }
+    }
+    else if (annot_arr_p->label == '-') {
+
+      /* initialize this dom_entry */
+      left_domain_p->score = init_score;
+      left_domain_p->n_ident = left_domain_p->n_alen = 0;
+      *left_end_p = left_domain_p->end_pos = annot_arr_p->end;
+
+      if (*left_domain_head_p == NULL) {
+	*left_domain_head_p = left_domain_p;
+	*d_score_p = init_score;
+      }
+      else { 
+	/* we already have a domain list - update scores for "live"
+	   domains and insert new domain */
+
+	new_left_domain_end = annot_arr_p->end;
+	new_dom = prev_dom = NULL;
+	
+	/* this loop tries to do two things:
+	   (1) update the scores for all the currently active domains
+	   (2) find the place to insert the new domain
+	*/
+
+	for (this_dom = *left_domain_head_p; this_dom; this_dom = this_dom->next) {
+	  /* here we update the scores */
+	  this_dom->score += *d_score_p;
+	  this_dom->n_ident += *d_ident_p;
+	  this_dom->n_alen += *d_alen_p;
+	  this_dom->n_gaplen += *d_gaplen_p;
+
+	  /* then we check for an insertion location */
+	  if (this_dom->end_pos > new_left_domain_end) {
+	    /* this_dom is beyond the new_left_domain_end, so link it to the previous domain */
+	    new_dom = prev_dom;
+	  }
+	  prev_dom = this_dom;
+	}
+	/* all the scores are updated and new_dom is NULL (for beginning/end) or insertion location */
+
+	*d_ident_p = *d_alen_p = *d_gaplen_p = 0;
+	*d_score_p = init_score;
+
+	/* initialize this dom_entry */
+	left_domain_p->score = init_score;
+	left_domain_p->n_ident = left_domain_p->n_alen = 0;
+	left_domain_p->end_pos = annot_arr_p->end;
+
+	if (new_dom) {	/* left_dom is null if it is first/last */
+	  left_domain_p->next = new_dom->next;
+	  new_dom->next = left_domain_p;
+	}
+	else { /* left_dom is NULL for start OR end, prev_dom has end of list */
+	  if (prev_dom->end_pos < new_left_domain_end) { /* goes into the end of the list */
+	    prev_dom->next = left_domain_p;
+	  }
+	  else {
+	    /* place at start of list */
+	    left_domain_p->next = *left_domain_head_p;
+	    *left_domain_head_p = left_domain_p;
+	  }
+	}
+      } /* done with domain insertion/ domain_end update */
+      *left_end_p = (*left_domain_head_p)->end_pos;
+    }/* all done with '[' */
+    else if (annot_stack) {	/* not [-]V -- residue feature */
+      if (have_push_features_p) { *have_push_features_p = 1; }
+	push_stack(annot_stack, left_domain_p);
+    }
+    return 1;
+  }
+}
+
+int
+next_annot_match(int *itmp, int *pam2aa0v, 
+		 long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+		 int i_annot, int n_annot, struct annot_entry **annot_arr, char **ann_comment,
+		 void *annot_stack, int *have_push_features_p, int *v_delta,
+		 int *d_score_p, int *d_ident_p, int *d_alen_p, int *d_gaplen_p,
+		 struct domfeat_data **left_domain_head_p,
+		 struct domfeat_data *left_domain_p,
+		 long *left_end_p, int init_score)  {
+
+  if (ann_comment) *ann_comment = NULL;
+
+  /* count through the annotations at this position (long ip) */
+  while ((i_annot < n_annot && ip == annot_arr[i_annot]->pos) || ip == *left_end_p) {
+    i_annot += process_annot_match(itmp, pam2aa0v, ip, ia, sp1, sp1a, sq,
+				   annot_arr[i_annot], n_annot, ann_comment,
+				   annot_stack, have_push_features_p, v_delta,
+				   d_score_p, d_ident_p, d_alen_p, d_gaplen_p,
+				   left_domain_head_p, &left_domain_p[i_annot],
+				   left_end_p, init_score);
+  }
+  return i_annot;
+}
+
+/* returns M_NEG, M_ZERO, M_POS, M_IDENT, M_DEL (a_mark.h)
+   updates *aln->nsim, npos, nident, nmismatch
+
+*/
+int align_type(int score, char sp0, char sp1, int nt_align, struct a_struct *aln, int pam_x_id_sim) {
+  int spa_val;
+
+  if (score<0) {
+    spa_val = M_NEG;
+  }
+  else if (score == 0) {
+    spa_val = M_ZERO;
+    if (aln) aln->nsim++;
+  }
+  else {
+    spa_val = M_POS;
+    if (aln) {aln->nsim++; aln->npos++;}
+  }
+
+  /* correct for score < 0 with 'N:N'/'X:X' */
+  if (pam_x_id_sim > 0) {	/* > 0 -> identical, similar */
+    if ((nt_align && toupper(sp0)=='N' && toupper(sp1)=='N') ||
+	(!nt_align && toupper(sp0)=='X' && toupper(sp1)=='X')) {
+      spa_val = M_POS;
+      if (aln) {
+	aln->nsim++;
+      }
+    }
+  }
+
+  if (aln) aln->nmismatch++;
+  if (toupper(sp0) == toupper(sp1)) {
+    spa_val = M_IDENT;
+    if (aln) {
+      aln->nident++;
+      aln->nmismatch--;
+    }
+  }
+  else if (nt_align) {
+    if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') ||
+	(toupper(sp0)=='U' && toupper(sp1)=='T')) {
+      spa_val = M_IDENT;
+      if (aln) {
+	aln->nident++;
+	aln->nmismatch--;
+      }
+    }
+    /* add to gap count for 'N' matches ?? */
+    else if (aln && toupper(sp0) == 'N') aln->ngap_q++;
+    else if (aln && toupper(sp1) == 'N') aln->ngap_l++;
+  }
+
+  /* correct nident, nmismatch for N:N / X:X */
+  if (pam_x_id_sim < 0) {	/* > 0 -> identical, similar */
+    if ((nt_align && toupper(sp0)=='N' && toupper(sp1)=='N') ||
+	(!nt_align && toupper(sp0)=='X' && toupper(sp1)=='X')) {
+      if (aln) {
+	aln->nident--;
+	aln->nmismatch++;
+      }
+    }
+  }
+
+  return spa_val;
+}
+
+/* seq_pos works with comment_var()/display_push_features()/do_url1() where
+   i_offset = nn for reversed sequences
+   off = 0 for 0 based offsets, 1 for 1-based offsets
+ */
+int
+seq_pos(int pos, int rev, int off) {
+
+  if (rev) {
+    return -pos-1 + off;
+  }
+  else {
+    return pos;
+  }
+}
+
+/* target = 0 (aa0), 1 (aa1)
+
+   d_type = display_type (annot_fmt in cal_cons.c):
+            1 (long text),   d1_fmt = " Variant: %d%c%c%d%c : %c%d%c";
+            2 (-m 9c code)   sprintf(tmp_str, "|%c%c:%ld%c%c%ld%c",
+
+   i0_pos/i1_pos have already been converted to reverse coordinate if necessary
+*/
+void comment_var (long i0_pos, char sp0, long i1_pos, char sp1, char o_sp1,
+		  char sim_char, const char *ann_comment,
+		  struct dyn_string_str *annot_var_dyn, int target, int d_type)
+{
+  char tmp_str[MAX_LSTR], tc, ann_ch0, ann_ch1;
+  char *d1_fmt;
+
+  if (d_type == 1) {
+    if (target ==1) {
+      d1_fmt = " Variant: %d%c%c%d%c : %c%d%c";
+      sprintf(tmp_str,d1_fmt,
+	      i0_pos+1, sp0, sim_char, i1_pos+1,sp1, o_sp1,(target?i1_pos+1:i0_pos+1),sp1);
+    }
+    else {
+      d1_fmt = " qVariant: %d%c%c%d%c : %c%d%c";
+      sprintf(tmp_str,d1_fmt,
+	      i0_pos+1, sp0, sim_char, i1_pos+1,sp1, o_sp1,(target?i1_pos+1:i0_pos+1),sp0);
+    }
+
+    /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+    dyn_strcat(annot_var_dyn, tmp_str);
+
+    if (ann_comment) {
+      sprintf(tmp_str," : %s",ann_comment);
+      /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+      dyn_strcat(annot_var_dyn, tmp_str);
+    }
+
+    /* SAFE_STRNCAT(annot_var_s,"\n",n_annot_var_s); */
+    dyn_strcat(annot_var_dyn, "\n");
+  }
+  else if (d_type == 2) {
+    if (target == 1) {
+      ann_ch0 = 'X';
+      ann_ch1 = 'V';
+    }
+    else {
+      ann_ch0 = 'V';
+      ann_ch1 = 'X';
+    }
+
+    sprintf(tmp_str, "|%c%c:%ld%c%c%ld%c",
+	    ann_ch0,ann_ch1,
+	    i0_pos+1,sp0, sim_char,i1_pos+1,sp1);
+    /* SAFE_STRNCAT(annot_var_s, tmp_str, n_annot_var_s); */
+    dyn_strcat(annot_var_dyn, tmp_str);
+  }
+}
+
+/* display push features is designed to display both individual
+   residue features (active site, variant, modification site) and
+   domain boundaries.  Domain boundary information is displayed when
+   the domain is closed or the alignment boundary has been exceeded
+   (for open domains).  Thus, it uses the current site for the end,
+   and information domfeat_data information in annot_stack
+
+   d_type == 1 : full display ([q]Region: %d-%d:%d-%d : score=%d ... )
+   d_type == 2 : -m9C/-m8CC calc_code "|RX:%d-%d:%d-%d:s=%d;b=%.1f;I=%.3f;Q=%.1f";
+   d_type == 3 : -m9I CALC_ID_DOM short domains, variants
+*/
+
+void
+display_push_features(void *annot_stack, struct dyn_string_str *annot_var_dyn,
+		      long i0_pos, char sp0, long i1_pos, char sp1, char sym,
+		      int tot_score, double comp, int sw_score, int n0, int n1,
+		      void *pstat_void, int d_type) {
+  struct domfeat_data *this_dom_p;
+  double lbits, total_bits, zscore, lprob, lpercid;
+  char *ann_comment, *bp;
+  char tmp_lstr[MAX_LSTR], ctarget, tmp_str[MAX_STR];
+  int q_min, q_max, l_min, l_max;
+  char *dt1_fmt, *dt2_fmt;
+  double tot_sw_norm;
+  int n_stack;
+
+  tot_sw_norm = (double)tot_score/(double)sw_score;
+
+  zscore = find_z(tot_score, 1.0, n1, comp, pstat_void);
+  total_bits = zs_to_bit(zscore, n0, n1);
+
+  /* this warning will be displayed if a site has more than 8
+   * annotations, and is being annotated both on the query and
+   * subject -- so it has been disabled */
+  /*
+  if ((n_stack = get_stack_len(annot_stack)) > 16) {
+    fprintf(stderr," *** warning [%s:%d] - annot stack >16: %d: n0: %d; n1: %d\n",__FILE__, __LINE__, n_stack,n0,n1);
+  }
+  */
+
+  while ((this_dom_p = (struct domfeat_data *)pop_stack(annot_stack))!=NULL) {
+
+    if (this_dom_p->annot_entry_p->label == '-') {
+      if (this_dom_p->annot_entry_p->target == 1) {
+	q_min = this_dom_p->a_pos+1;
+	l_min = this_dom_p->pos+1;
+	dt2_fmt = "|XR:%d-%d:%d-%d:s=%d;b=%.1f;I=%.3f;Q=%.1f";
+      }
+      else {
+	q_min = this_dom_p->pos+1;
+	l_min = this_dom_p->a_pos+1;
+	dt2_fmt = "|RX:%d-%d:%d-%d:s=%d;b=%.1f;I=%.3f;Q=%.1f";
+      }
+
+      if (this_dom_p->score < 0) {
+	lbits = 0.0;
+	lprob = 1.0;
+      }
+      else {
+	lbits = total_bits * (double)this_dom_p->score/sw_score;
+	zscore = find_z(this_dom_p->score * tot_sw_norm, 1.0, n1, comp, pstat_void);
+	lprob = zs_to_p(zscore);
+      }
+
+      if (lprob > 0.99) lprob = 0.0;
+      else if (lprob < 1e-300) lprob = 3000.0;
+      else lprob = -10.0*log(lprob)/log(10.0);
+
+      if (this_dom_p->n_alen - this_dom_p->n_gaplen > 0) {
+	lpercid = ((double)this_dom_p->n_ident)/(double)(this_dom_p->n_alen-this_dom_p->n_gaplen);
+      }
+      else lpercid = 0.0;	/* was -1.0, but 0.0 for consistency with annot_blast_btop2.pl */
+
+      if (d_type == 1) {
+	if (this_dom_p->annot_entry_p->target == 0) {
+	  dt1_fmt = " qRegion: %d-%d:%d-%d : score=%d; bits=%.1f; Id=%.3f; Q=%.1f :  %s\n";
+	} else {
+	  dt1_fmt = " Region: %d-%d:%d-%d : score=%d; bits=%.1f; Id=%.3f; Q=%.1f :  %s\n";
+	}
+	sprintf(tmp_lstr, dt1_fmt, q_min, i0_pos+1,
+		l_min, i1_pos+1, this_dom_p->score, lbits, lpercid, lprob,
+		(this_dom_p->annot_entry_p->comment) ? this_dom_p->annot_entry_p->comment : '\0');
+
+      }
+      else if (d_type == 2) {
+	sprintf(tmp_lstr,dt2_fmt,
+		q_min, i0_pos+1,
+		l_min, i1_pos+1, this_dom_p->score, lbits,lpercid, lprob);
+
+	if (this_dom_p->annot_entry_p->comment) {
+	  SAFE_STRNCPY(tmp_str,this_dom_p->annot_entry_p->comment,sizeof(tmp_str));
+	  if ((bp=strchr(tmp_str,' '))!=NULL) { *bp = '\0';}
+	  SAFE_STRNCAT(tmp_lstr,";C=",sizeof(tmp_lstr));
+	  SAFE_STRNCAT(tmp_lstr,tmp_str,sizeof(tmp_lstr));
+	}
+      }
+      else if (d_type == 3 && this_dom_p->annot_entry_p->target == 1) {	/* CALC_ID_DOM domain names */
+	SAFE_STRNCPY(tmp_str,(this_dom_p->annot_entry_p->comment) ? this_dom_p->annot_entry_p->comment : '\0',sizeof(tmp_str));
+	/* comment out to allow spaces in domain names */
+	if ((bp=strchr(tmp_str,' ')) != NULL) { *bp='\0';}
+	sprintf(tmp_lstr, "%s;",tmp_str);
+      }
+      /* SAFE_STRNCAT(annot_var_s,tmp_lstr,n_annot_var_s); */
+      dyn_strcat(annot_var_dyn, tmp_lstr);
+      /*
+      if (annot_var_dyn->c_size > 8192) {
+	fprintf(stderr,"*** error [%s:%d] -- display_push_annot() long ann_code[%d]: %s\n",  __FILE__,__LINE__,annot_var_dyn->c_size,tmp_lstr);
+      }
+      */
+    }
+    else if ((ann_comment = this_dom_p->annot_entry_p->comment)) {
+      if (d_type == 1 ) {
+	if (this_dom_p->annot_entry_p->target == 0) {dt1_fmt = " qSite:%c : %d%c%c%d%c : %s\n";}
+	else {dt1_fmt = " Site:%c : %d%c%c%d%c : %s\n";}
+	sprintf(tmp_lstr,dt1_fmt, this_dom_p->annot_entry_p->label,i0_pos+1, sp0,
+		sym, i1_pos+1, sp1, ann_comment);
+	/* SAFE_STRNCAT(annot_var_s,tmp_lstr,n_annot_var_s); */
+	dyn_strcat(annot_var_dyn, tmp_lstr);
+      }
+    }
+  }
+}
diff --git a/src/dec_pthr_subs.c b/src/dec_pthr_subs.c
new file mode 100644
index 0000000..f9e2ad7
--- /dev/null
+++ b/src/dec_pthr_subs.c
@@ -0,0 +1,246 @@
+/* $Id: dec_pthr_subs.c 625 2011-03-23 17:21:38Z wrp $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999 by William R. Pearson and the
+   Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+
+/* this file isolates the pthreads calls from the main program */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include "param.h"
+
+#include <pthread.h>
+#define XTERNAL
+#include "thr.h"
+#undef XTERNAL
+#include "pthr_subs.h"
+
+extern void work_thread (struct thr_str *work_info);
+
+/* start the threads working */
+
+void init_thr(int nthreads, struct thr_str *work_info)
+{
+  int status, i;
+  pthread_attr_t thread_attr;
+
+  if (nthreads > MAX_WORKERS) {
+    fprintf ( stderr," cannot start %d threads, max: %d\n",
+	      nthreads, MAX_WORKERS);
+    exit(1);
+  }
+
+  /* mutex and condition variable initialisation */
+
+  status = pthread_mutex_init(&reader_mutex, pthread_mutexattr_default);
+  check(status,"Reader_mutex init bad status\n");
+   
+  status = pthread_mutex_init(&worker_mutex, pthread_mutexattr_default);
+  check(status,"Worker_mutex init bad status\n");
+
+  status = pthread_cond_init(&reader_cond_var, pthread_condattr_default);
+  check(status,"Reader_cond_var init bad status\n");
+
+  status = pthread_cond_init(&worker_cond_var, pthread_condattr_default);
+  check(status,"Worker_cond_var init bad status\n");
+
+  status = pthread_mutex_init(&start_mutex, pthread_mutexattr_default);
+  check(status,"Start_mutex init bad status\n");
+
+  status = pthread_cond_init(&start_cond_var, pthread_condattr_default);
+  check(status,"Start_cond_var init bad status\n");
+
+  /* change stacksize on threads */    /***************************/
+
+  status = pthread_attr_create( &thread_attr );
+  check(status,"attribute create bad status\n");
+
+  status = pthread_attr_setstacksize( &thread_attr, 1000000);
+  check(status,"stacksize change bad status\n");
+
+  /* start the worker threads */
+
+  for (work_info->worker=0; work_info->worker < nthreads;
+       work_info->worker++) {
+    /**********************/
+    status=pthread_create(&threads[work_info->worker],thread_attr,
+			  (pthread_startroutine_t)&work_thread,
+			  (pthread_addr_t)work_info);
+    check(status,"Pthread_create failed\n");
+  }
+}
+
+void start_thr()
+{
+  int status;
+
+  /* tell threads to proceed */
+
+  status = pthread_mutex_lock(&start_mutex);
+  check(status,"Start_mutex lock bad status in main\n");
+
+  start_thread = 0;  /* lower predicate */
+
+  status = pthread_cond_broadcast(&start_cond_var);
+  status = pthread_mutex_unlock(&start_mutex);
+  check(status,"Start_mutex unlock bad status in main\n");
+}
+
+void get_rbuf(struct buf_head **cur_buf, int max_work_buf)
+{
+  int status;
+
+  status = pthread_mutex_lock(&reader_mutex);  /* lock reader_buf structure */
+
+  check(status,"Reader_mutex lock in master bad status\n");
+
+  /* no reader bufs:  wait for signal to proceed */
+  while (num_reader_bufs == 0) {
+    pthread_cond_wait(&reader_cond_var,&reader_mutex);
+  }
+
+  *cur_buf = reader_buf[reader_buf_readp];  /* get the buffer address */
+  reader_buf_readp = (reader_buf_readp+1)%(max_work_buf);  /* increment index */
+  num_reader_bufs--;
+
+  status = pthread_mutex_unlock(&reader_mutex);  /* unlock structure */
+  check(status,"Reader_mutex unlock in master bad status\n");
+}
+
+void put_rbuf(struct buf_head *cur_buf, int max_work_buf)
+{
+  int status;
+
+  /* give the buffer to a thread, and wait for more */
+  status = pthread_mutex_lock(&worker_mutex);  /* lock worker_buf_structure */
+  check(status,"Worker_mutex lock in master bad status\n");
+
+  /*  Put buffer onto available for workers list */
+  worker_buf[worker_buf_readp] = cur_buf;
+  worker_buf_readp = (worker_buf_readp+1)%(max_work_buf);
+  num_worker_bufs++;   /* increment number of buffers available to workers */
+
+  /*  Signal one worker to wake and start work */
+  status = pthread_cond_signal(&worker_cond_var);
+
+  status = pthread_mutex_unlock(&worker_mutex);
+  check(status,"Worker_mutex unlock in master bad status\n"); 
+}
+
+void put_rbuf_done(int nthreads, struct buf_head *cur_buf, int max_work_buf)
+{
+  int status, i;
+  void *exit_value;
+
+  /* give the buffer to a thread, and wait for more */
+  status = pthread_mutex_lock(&worker_mutex);  /* lock worker_buf_structure */
+  check(status,"Worker_mutex lock in master bad status\n");
+
+  /*  Put buffer onto available for workers list */
+  worker_buf[worker_buf_readp] = cur_buf;
+  worker_buf_readp = (worker_buf_readp+1)%(max_work_buf);
+  num_worker_bufs++;   /* increment number of buffers available to workers */
+
+  /*  Signal one worker to wake and start work */
+
+  reader_done = 1;
+  status = pthread_cond_broadcast(&worker_cond_var);
+
+  status = pthread_mutex_unlock(&worker_mutex);
+  check(status,"Worker_mutex unlock in master bad status\n"); 
+
+  /* wait for all buffers available (means all do_workers are done) */
+ 
+  for (i=0; i < nthreads; i++) {
+    status = pthread_join( threads[i], &exit_value);
+    check(status,"Pthread_join bad status\n");
+
+    status = pthread_detach( &threads[i]);
+    check(status,"Pthread_detach bad status\n");
+  } 
+}
+
+void wait_thr()
+{
+  int status;
+
+  /* Wait on master to give start signal */
+  status = pthread_mutex_lock(&start_mutex);
+  check(status,"Start_mutex lock bad status in worker\n");
+
+  while (start_thread) {
+         status = pthread_cond_wait(&start_cond_var, &start_mutex);
+         check(status,"Start_cond_wait bad status in worker\n");
+  }
+
+  status = pthread_mutex_unlock(&start_mutex);
+  check(status,"Start_mutex unlock bad status in worker\n");
+}
+
+int get_wbuf(struct buf_head **cur_buf, int max_work_buf)
+{
+  int status;
+
+  /* get a buffer to work on */
+  status = pthread_mutex_lock(&worker_mutex);
+  check(status,"First worker_mutex lock in worker bad status\n");
+
+  /*  No worker_bufs available:  wait for reader to produce some */
+  while (num_worker_bufs == 0) {
+    /*  Exit if reader has finished */
+    if (reader_done) {
+      pthread_mutex_unlock(&worker_mutex);
+      return 0;
+    }
+    pthread_cond_wait(&worker_cond_var,&worker_mutex);
+  } /* end while */
+
+  /*  Get the buffer from list */
+  *cur_buf = worker_buf[worker_buf_workp];
+  worker_buf_workp = (worker_buf_workp+1)%(max_work_buf);
+  num_worker_bufs--;
+
+  status = pthread_mutex_unlock(&worker_mutex);
+  check(status,"First worker_mutex unlock in worker bad status\n");
+  return 1;
+}
+
+void put_wbuf(struct buf_head *cur_buf, int max_work_buf)
+{
+  int status;
+
+  /* put buffer back on list for reader */
+  status = pthread_mutex_lock(&reader_mutex);
+  check(status,"Reader_mutex lock in worker bad status\n");
+    
+  reader_buf[reader_buf_workp] = cur_buf;
+  reader_buf_workp = (reader_buf_workp+1)%(max_work_buf);
+  num_reader_bufs++;
+
+     /* No reader_bufs available:  wake reader */
+  if (num_reader_bufs == 1) {
+    pthread_cond_signal(&reader_cond_var);
+  }
+
+  status = pthread_mutex_unlock(&reader_mutex);
+  check(status,"Reader_mutex unlock in worker bad status\n");
+}
diff --git a/src/dec_pthr_subs.h b/src/dec_pthr_subs.h
new file mode 100644
index 0000000..fd2b8e4
--- /dev/null
+++ b/src/dec_pthr_subs.h
@@ -0,0 +1,42 @@
+
+/* $Id: dec_pthr_subs.h 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+#include <pthread.h>
+
+#define check(status,string) \
+     if (status == -1) perror(string)   /* error macro for thread calls */
+
+#ifndef XTERNAL
+pthread_t threads[MAX_WORKERS];
+
+/* mutex stuff */
+
+pthread_mutex_t reader_mutex;      /* empty buffer pointer structure lock */
+pthread_mutex_t worker_mutex;      /* full buffer pointer structure lock */
+
+/* condition variable stuff */
+
+pthread_cond_t reader_cond_var;    /* condition variable for reader */
+pthread_cond_t worker_cond_var;    /* condition variable for workers */
+
+pthread_mutex_t start_mutex;       /* start-up synchronisation lock */
+pthread_cond_t start_cond_var;     /* start-up synchronisation condition variable */
+
+extern pthread_t threads[];
+
+/* mutex stuff */
+
+extern pthread_mutex_t reader_mutex;
+extern pthread_mutex_t worker_mutex;
+
+/* condition variable stuff */
+
+extern pthread_cond_t reader_cond_var;
+extern pthread_cond_t worker_cond_var;
+
+extern pthread_mutex_t start_mutex;
+extern pthread_cond_t start_cond_var;
+extern int start_thread;
+
+#endif
diff --git a/src/defs.h b/src/defs.h
new file mode 100644
index 0000000..aae77e1
--- /dev/null
+++ b/src/defs.h
@@ -0,0 +1,171 @@
+/* Concurrent read version */
+
+/* $Id: defs.h 1261 2014-06-11 19:38:36Z wrp $ */
+/* $Revision: 1261 $  */
+
+#ifdef SUNOS
+#include <sys/stdtypes.h>
+#endif
+
+#ifndef IS_BIG_ENDIAN
+#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN)
+#define IS_BIG_ENDIAN
+#else
+#undef IS_BIG_ENDIAN
+#endif
+#endif
+
+#if !defined(MAX_WORKERS) && !defined(PCOMPLIB)
+#define MAX_WORKERS 1
+#endif
+#if defined(PCOMPLIB) && !defined(MAXWRKR)
+#define MAXWRKR	64
+#endif
+
+#define SAFE_STRNCPY(dest,src,dest_len) strncpy(dest,src,dest_len); dest[dest_len-1]='\0'
+#define SAFE_STRNCAT(str,cat,str_len) strncat(str,cat,str_len-strlen(str)-1)
+
+/* constants associated with displaying annotation links */
+#ifndef DESCR_OFFSET
+#define DESCR_OFFSET 20
+#endif
+
+#define NO_FILE_EXIT 4
+
+
+/* 3-Oct-2003 - we can now have 2 nucleotide query types, DNA
+   and RNA.  pst.dnaseq can also be SEQT_RNA.
+   ldnaseq can only be DNA */
+
+#define SEQT_DNA 1
+#define SEQT_RNA 3	/* DNA and RNA seqtypes must be odd */
+
+#define SEQT_PROT 0
+#define SEQT_UNK -1
+#define SEQT_OTHER 2
+
+#ifndef DEF_NMLEN
+#define DEF_NMLEN 6
+#endif
+
+#define DEF_MIN_BITS 40	/* minimum number of bits required, appropriate for swissprot */
+
+/* unfortunately, there is an important relationship between MAXTRN and
+   MAXLIB embedded here.  MAXTRN must be >= (MAXLIB)/3
+   or it will be possible for a translated DNA sequence to be longer
+   than the translation space available */
+
+#define MAX_STR	512 /* standard label/message buffer */
+#define MAX_SSTR 32 /* short string */
+#define MAX_LSTR 4096 /* long label/message buffer */
+#define MAX_FN  120 /* maximum size of a file name */
+#define MAX_CH	40 /* maximum number of library choices */
+#ifndef SMALLMEM
+#define MAX_LF  2000 /* maximum numer of library files */
+#else
+#define MAX_LF  80 /* maximum numer of library files */
+#endif
+
+#ifndef MAX_MEMK
+#if defined(BIG_LIB64) && (defined(COMP_THR) || defined(PCOMPLIB))
+#define MAX_MEMK 8*1024*1024	/* 12 GB (<<10) for library in memory */
+#else
+#define MAX_MEMK 2*1024*1024	/* 2 GB (<<10) for library in memory */
+#endif
+#endif
+
+/* padding at the end of sequences for ALTIVEC, other vector
+   processors */
+#define SEQ_PAD 16
+
+#define MAX_UID 20 /* length of libstr, used for character keys with SQL */
+
+#define BUF_MULT 2	/* increase to increase the number of buffers */
+#define DEF_WORKER_BUF 6000000
+#define AVE_AA_LEN 400
+#define AVE_NT_LEN 1200
+
+#define MAX_RSTATS 500		/* number of random shuffle stats */
+#define MIN_LOCAL_LEN 33	/* minimum length for addn'l local alignments
+				   (should be in pstruct)*/
+#ifndef SMALLMEM
+#define MAXTST	40000		/* longest query */
+#define MAXLIB	150000		/* longest library sequence*/
+#define MAXLIB_P 45000
+#define MIN_RES 2000		/* minimum amount allocated for alignment */
+#ifndef TFAST
+#define MAXTRN  45000		/* buffer for fastx translation */
+#else
+#define MAXTRN 165000		/* buffer for tfastx translation, must be > 3 * MAXTST */
+#endif
+#define SEQDUP	150		/* future - overlap */
+#ifndef PCOMPLIB
+#ifndef MAX_BEST
+#define MAX_BEST	60000	/* max number of best scores */
+#endif
+#define MAX_STATS 60000
+#else
+#ifndef MAX_BEST
+#define MAX_BEST	60000	/* max number of best scores */
+#endif
+#define MAX_STATS 60000
+#endif
+#define BIGNUM  1000000000
+#ifndef MAXINT
+#define MAXINT 2147483647
+#endif
+#define MAXLN	120	/* size of a library name */
+#else
+#define MAXTST	1500
+#define MAXLIB	10000
+#define MAXLIB_P MAXLIB
+#define MIN_RES 1000
+#ifndef TFAST
+#define MAXTRN  4500
+#else
+#define MAXTRN 11500
+#endif
+#define SEQDUP	300
+#define MAX_BEST 2000
+#define MAX_STATS 20000
+#define BIGNUM  32767
+#define MAXINT  32767
+#define MAXLN	40	/* size of a library name */
+#endif
+#if !defined(TFAST)
+#define MAXDIAG	(MAXTST+MAXLIB)
+#else
+#define MAXDIAG	(MAXTST+MAXTRN)
+#endif
+
+#define MAXPAM	600	/* maximum allowable size of the pam matrix */
+#define PROF_MAX 500
+#define ALF_MAX 30
+
+#define max(a,b) (((a) > (b)) ? (a) : (b))
+#define min(a,b) (((a) < (b)) ? (a) : (b))
+
+#define MX_ATYPE 7	/* markx==0,1,2 7=> no alignment */
+#define MX_ASEP  8	/* markx==3  - separate lines */
+#define MX_AMAP  16	/* markx==4,5 - graphic map */
+#define MX_HTML  32	/* markx==6  - HTML */
+#define MX_M9SUMM 64	/* markx==9(c) */
+#define MX_M10FORM 128	/* markx==10 - verbose output */
+#define MX_M11OUT 256	/* markx==11 - lalign lav */
+#define MX_M8OUT 512	/* markx==8 blast8 output */
+#define MX_M8COMMENT 1024	/* markx==8 blast8 output */
+#define MX_MBLAST 2048	/* markx=B blast output */
+#define MX_MBLAST2 4096	/* markx=BB more blast output */
+#define MX_ANNOT_COORD 16384 /* -m 0, use -m 0B for both */
+#define MX_ANNOT_MID  32768 /* markx 0M, 1M, 2M annotations in middle */
+#define MX_RES_ALIGN_SCORE (1<<20)  /* show residue alignment score, not alignment */
+
+/* codes for -m 9 */
+#define SHOW_CODE_ID	1	/* identity only */
+#define SHOW_CODE_IDD   2	/* identity with domains */
+#define SHOW_CODE_ALIGN 4	/* encoded alignment */
+#define SHOW_CODE_CIGAR 8	/* CIGAR vs old encoded alignment */
+#define SHOW_CODE_BTOP	16	/* BLAST BTOP encoding */
+#define SHOW_CODE_MASK  12	/* use higher bits for annotation format */
+#define SHOW_CODE_EXT   16	/* encode identity, mismatch state */
+#define SHOW_ANNOT_FULL 32	/* show full-length annot in calc_code */
diff --git a/src/doinit.c b/src/doinit.c
new file mode 100644
index 0000000..f64b3a0
--- /dev/null
+++ b/src/doinit.c
@@ -0,0 +1,975 @@
+/*	doinit.c	general and function-specific initializations */
+
+/* $Id: doinit.c 1267 2014-07-29 13:50:40Z wrp $ */
+
+/* copyright (c) 1996, 1997, 1998, 2014 by William R. Pearson and the
+   Rector & Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* this file performs general initializations of search parameters
+
+   In addition, it calls several functions in init??.c that provide
+   program-specific initializations:
+
+   f_initenv()	- called from initenv()
+   f_getopt()	- called from initenv() during a getopt() scan
+   f_getarg()	- called from initenv() after the getopt() scan
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(UNIX) || defined(_MACH)
+#include <unistd.h>
+#endif
+#ifndef PCOMPLIB
+#ifdef IRIX
+#include <sys/sysmp.h>
+#endif
+#else
+#include "msg.h"	/* need for FIRSTNODE */
+#ifdef MPI_SRC
+#include "mpi.h"
+#endif
+#endif
+
+#ifdef WIN32
+#include <windows.h>
+#endif
+
+#include "defs.h"
+#include "param.h"
+#include "upam.h"	/* required for 'U' option change of nascii */
+
+#include "structs.h"
+
+#define XTERNAL
+#include "uascii.h"
+#undef XTERNAL
+
+#ifdef UNIX
+#include <getopt.h>
+#else
+extern int optind;		/* used by getopt() */
+extern char *optarg;
+#endif
+
+char prog_name[MAX_FN];
+
+extern void f_initenv(struct mngmsg *, struct pstruct *, unsigned char **);
+extern void f_lastenv(struct mngmsg *, struct pstruct *);
+extern void f_getopt(char, char *, struct mngmsg *, struct pstruct *);
+extern void f_getarg(int, char **, int, struct mngmsg *, struct pstruct *);
+extern void show_help(char *, int pgm_id);
+extern void show_all_help(char *pgm_name, int pgm_id);
+void g_init_opts(struct mngmsg *, struct pstruct *);
+void subs_env(char *dest, char *src, int dest_size);
+
+void add_ascii_ann(int *qascii, unsigned char *ann_arr);
+static int set_markx(int markx, int val, char c);
+static void pre_parse_markx(char *opt_arg, struct mngmsg *m_msp);
+static void parse_markx(char *opt_arg, struct markx_str *this_markx);
+static void get_annot_def_file(struct mngmsg *m_msp, char *fa_annot_env);
+void markx_to_m_msp(struct mngmsg *m_msp, struct markx_str *this_markx);
+void m_msp_to_markx(struct markx_str *this_markx, struct mngmsg *m_msp);
+
+int optcnt;
+int fa_max_workers=MAX_WORKERS;
+#ifdef PCOMPLIB
+int worker_1=0;
+int worker_n=0;
+#endif
+
+extern struct opt_def_str f_options[];
+
+void set_opt_disp_defs(char opt_char, struct opt_def_str *options, int type,
+		       int i_param1, int i_param2,
+		       double d_param1, double d_param2, char *s_param);
+
+/* ****************************************************************
+   The option/-help system has been substantially restructured to
+   allow more consistent -h/-help messages.
+
+   There are now two global arrays, opt_def_str g_options (global
+   options, parsed in doinit.c), and opt_def_str f_options
+   (function-specific options, parsed in initfa.c)
+
+   struct opt_def_str {
+     char opt_char;	# getopt single character option letter
+     int has_arg;	# does it have an option?
+     char *opt_str;	# getopt_long (future) long option name
+     char *opt_descr_s;	# short description of option
+     char *opt_descr_l; # long description of option (if NULL, use opt_descr_s)
+     int opt_rank;	# rank of option (not used)
+     int fmt_type;	# fmt type (for defaults): 1,2 ints, 3,4 doubles
+     int i_param1;	# int default1
+     int i_param2;	
+     double d_param1;	# double default1
+     double d_param2;
+   };
+
+   the g_opt_string and f_opt_string's parsed by getopt() are built
+   from these structures, guaranteeing that the options and help
+   messages are kept in sync.
+
+   long options descriptions (opt_descr_l) are saved in static arrays
+   (e.g. m_opt_descr[] in doinit.c, z_opt_descr[], s_opt_descr[] in
+   initfa.c
+
+   The default option values, which are displayed from i_param[1,2],
+   d_param[1,2], are set by g_init_opts() and f_init_opts() using
+   set_opt_disp_defs().  g_init_opts()/f_init_opts() should be called
+   as late as possible in the program.
+
+   **************************************************************** */
+
+static char m_opt_descr[] ="Output/alignment format;\n      0 - standard \":. \" alignment; 1 - \" xX\"; 2 - \".MS..\"; 3 - separate >fasta entries;\n      4 - \"---\" alignment map; 5 - 0+4; 6 - <html>;\n      8 - BLAST tabular; 8C commented BLAST tabular; 8CC BLAST tab CIGAR, 8CD BLAST tab CIGAR ext; 8CB BLAST tab BTOP\n      B - BLAST Query/Sbjct alignments; BB - complete BLAST output;\n      9 - FASTA tabular; 9c - FASTA tabular encoded; 9C FASTA tabular CIGAR encoded; 9B FASTA tabul [...]
+
+struct opt_def_str g_options[] = {
+  {'C', 1, "aname_length", "length of the query/sbjct name in alignments", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'D', 0, "debug", "enable debugging output", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'e', 1, "expand", "expand_script to extend hits", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'F', 1, "evalue_min", "min E()-value displayed", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#if defined(PCOMPLIB) || !defined(SHOW_HIST)
+  {'H', 0, "histogram", "show histogram", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#else
+  {'H', 0, "nohist", "no histogram", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#endif
+  {'i', 0, "revcomp", "search with reverse-complement", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#ifdef SHOW_HELP
+  {'I', 0, "interact", "interactive mode", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#endif
+  {'l', 1, "fastlibs", "FASTLIBS abbreviation file", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'L', 0, "long_info", "long library descriptions", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'m', 1, "outfmt", "output format", &m_opt_descr[0], 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'N', 1, "lib_length", "max library length before overlapping", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'o', 1, "offsets", "offset coordinates of query/subject", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'O', 1, "out", "write results to file", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#ifndef SHOW_HELP
+  {'q', 0, "quiet", "quiet -- do not prompt", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'Q', 0, "\0", "quiet -- do not prompt", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#else
+  {'q', 0, "quiet", "quiet [default] -- do not prompt", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'Q', 0, "\0", "quiet [default] -- do not prompt", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#endif
+  {'R', 1, "results_file", "raw score file", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'T', 1, "threads", "max threads/workers", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'v', 1, "shuffle_window", "shuffle window size", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'V', 1, "annotation", "annotation characters in query/library for aligments", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'w', 1, "aln_width", "width of alignment display", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'Z', 1, "db_size", "database size for E()-value", "[library entries] database size for E()-value", 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'\0', 0, "", "", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL}
+};
+
+/* set default option values for help */
+void g_init_opts(struct mngmsg *m_msp, struct pstruct *ppst) {
+  set_opt_disp_defs('C', g_options, 1, m_msp->nmlen, 0, 0.0, 0.0, NULL);
+  set_opt_disp_defs('F', g_options, 3, 0, 0, m_msp->e_low, 0.0, NULL);
+  set_opt_disp_defs('m', g_options, 1, m_msp->markx, 0, 0.0, 0.0, NULL);
+  set_opt_disp_defs('o', g_options, 2, (int)m_msp->sq0off, (int)m_msp->sq1off, 0.0, 0.0,NULL);
+  set_opt_disp_defs('T', g_options, 1, fa_max_workers, 0, 0.0, 0.0, NULL);
+  set_opt_disp_defs('v', g_options, 1, ppst->zs_win, 0, 0.0, 0.0, NULL);
+  set_opt_disp_defs('w', g_options, 1, m_msp->aln.llen, 0, 0.0, 0.0, NULL);
+}
+
+void
+build_optstr(char *opt_str, int opt_len, struct opt_def_str *opt_defs);
+
+static int long_info_set=0;
+static int llen_set = 0;
+static int markx_set = 0;
+
+/* initenv ()  initializes the environment */
+void initenv (int argc, char **argv, struct mngmsg *m_msp, 
+		 struct pstruct *ppst, unsigned char **aa0)
+{
+  char *cptr, *bp, *bp1;
+  int  copt;
+#ifdef WIN32
+  SYSTEM_INFO siSysInfo;
+#endif
+
+   /* options for all search functions */
+   /* char   *g_optstr = "b:BC:d:DE:F:HiK:l:Lm:N:O:QqR:T:v:V:w:W:X:Z:"; */
+
+   char g_optstring[MAX_STR];
+   char f_optstring[MAX_STR];
+   char optstring[MAX_STR];
+
+   /* help functions exit(); try first */
+   if (argc == 1) {
+     show_help(m_msp->pgm_name, ppst->pgm_id);
+   }
+   if (strcmp(argv[1],"-help")==0 || strcmp(argv[1],"--help")==0) {
+     show_all_help(m_msp->pgm_name, ppst->pgm_id);
+   }
+
+   build_optstr(g_optstring, sizeof(f_optstring), g_options);
+   build_optstr(f_optstring, sizeof(f_optstring), f_options);
+
+/*  these initializations will be used by all functions */
+
+   /* prog_name[] is only used for error messages */
+   strncpy(prog_name,argv[0],sizeof(prog_name));
+   prog_name[sizeof(prog_name)-1]='\0';
+
+#ifdef PCOMPLIB
+#ifdef MPI_SRC
+  MPI_Comm_size(MPI_COMM_WORLD,&fa_max_workers);
+  if (fa_max_workers <= 1) {
+    fprintf(stderr," nnodes = %d; no workers available\n",fa_max_workers);
+    exit(1);
+  }
+  else {
+    fa_max_workers -= FIRSTNODE;
+    fprintf(stderr," have %d workers\n",fa_max_workers);
+  }
+#endif
+#else	/* not PCOMPLIB */
+#if defined(IRIX)
+   fa_max_workers = sysmp(MP_NPROCS);
+#else
+#if defined(WIN32)
+   GetSystemInfo(&siSysInfo);
+   fa_max_workers = siSysInfo.dwNumberOfProcessors;
+#endif
+#if defined(UNIX) || defined(HAVE_SYSCONF)
+   fa_max_workers = sysconf(_SC_NPROCESSORS_CONF);
+#endif	/* UNIX || SYSCONF */
+#endif  /* !IRIX */
+#endif  /* !PCOMPLIB */
+
+   m_msp->ltitle[0] = '\0';
+
+   if ((cptr=getenv("FASTLIBS"))!=NULL) {
+     strncpy(m_msp->flstr,cptr,MAX_FN);
+     m_msp->flstr[MAX_FN-1] = '\0';
+   }
+   else m_msp->flstr[0]='\0';
+
+   m_msp->std_output = 1;
+   m_msp->hist.hist_a = NULL;
+   m_msp->outfile[0] = '\0';
+   m_msp->outfd = NULL;
+   m_msp->ldb_info.ldnaseq = SEQT_PROT;	/* library is protein */
+   m_msp->n1_low = ppst->n1_low = 0;
+   m_msp->n1_high = ppst->n1_high = BIGNUM;
+   m_msp->ql_start = 1;	/* start with first query sequence */
+   m_msp->ql_stop = BIGNUM;	/* end with the last query sequence */
+   m_msp->aa1save_buf_b = NULL;
+   m_msp->bline_buf_b = NULL;
+
+   m_msp->pamd1 = MAXSQ;
+   m_msp->pamd2 = MAXSQ;
+
+   m_msp->ldb_info.term_code = 0;
+
+   ppst->tr_type = 0;
+   ppst->debug_lib = 0;
+   m_msp->nshow = 20;
+   ppst->max_repeat = 50;
+   m_msp->nohist = 1;
+#if defined(PCOMPLIB)
+   m_msp->mshow = 20;
+#else
+#ifdef SHOW_HIST
+   m_msp->nohist = 0;
+#endif
+   m_msp->mshow = 50;
+#endif
+   m_msp->do_showbest = 1;
+   m_msp->ashow = -1;
+   m_msp->ashow_set = 0;
+   m_msp->nmlen = DEF_NMLEN;
+   m_msp->z_bits = 1;
+   m_msp->tot_ident = 0;
+   m_msp->mshow_set = 0;
+   m_msp->mshow_min = 0;
+   m_msp->aln.llen = 60;
+   m_msp->aln.llcntx = 30;
+   m_msp->aln.llcntx_set = 0;
+   m_msp->e_low = 0.0;
+   m_msp->e_cut_set = 0;
+   m_msp->revcomp = 0;
+   m_msp->long_info = 0;
+   m_msp->ldb_info.maxn = 0;
+   m_msp->ldb_info.dupn = SEQDUP;
+   m_msp->dfile[0] = '\0';
+   m_msp->tname[0] = '\0';
+   m_msp->lname[0] = '\0';
+   m_msp->link_lname[0] = '\0';
+   m_msp->show_code = 0;
+   m_msp->tot_show_code = 0;
+   m_msp->aln.showall = 0;
+   m_msp->markx = 0;
+   m_msp->tot_markx = 0;
+   m_msp->markx_list = NULL;
+   m_msp->align_done = 0;
+   m_msp->sq0off = m_msp->sq1off = 1;
+   strncpy(m_msp->sqnam,"aa",4);
+   strncpy(m_msp->sqtype,"protein",10);
+
+   /* annotation info */
+   m_msp->ann_flg = 0;
+   memset(m_msp->ann_arr,'\0',MAX_FN);
+   m_msp->ann_arr_def[0] = NULL;
+   m_msp->ann_arr_def[1] = NULL;
+   m_msp->annot0_sname[0]='\0';
+   m_msp->annot1_sname[0]='\0';
+   m_msp->annot_p = NULL;
+   m_msp->aa0a = NULL;
+   
+   ppst->LK_set = 0;
+   ppst->e_cut = m_msp->e_cut = 10.0;
+   ppst->e_cut_r = ppst->e_cut / 10.0;
+   ppst->do_rep = 1;
+   ppst->zs_win = 0;
+   ppst->show_ident = 0;
+
+   ppst->zdb_size = -1;
+   ppst->zdb_size_set = 0;
+   ppst->dnaseq = SEQT_PROT;	/* default is protein */
+   ppst->nt_align = 0;
+
+   ppst->other_info = NULL;
+
+   g_init_opts(m_msp, ppst);
+
+   f_initenv (m_msp, ppst, aa0);
+
+   SAFE_STRNCPY (optstring, g_optstring, sizeof (optstring));
+   SAFE_STRNCAT (optstring, f_optstring, sizeof (optstring));
+
+   while ((copt = getopt (argc, argv, optstring)) != EOF)
+   {
+      if (strchr (g_optstring, copt) != NULL)
+      {
+	switch (copt) {  /* switches for all options */
+	case 'C': 
+	  sscanf(optarg,"%d",&m_msp->nmlen);
+	  if (m_msp->nmlen > MAX_UID-1) m_msp->nmlen = MAX_UID-1;
+	  break;
+	case 'D': ppst->debug_lib = 1;
+	  break;
+	case 'e': 
+	  strncpy(m_msp->link_lname, optarg, MAX_LSTR);
+	  break;
+	case 'F':
+	  sscanf(optarg,"%lg",&m_msp->e_low);
+	  m_msp->e_cut_set = 1;
+	  break;
+#if defined(PCOMPLIB) || !defined(SHOW_HIST)
+	case 'H':
+	  m_msp->nohist = 0; break;
+#else
+	case 'H':
+	  m_msp->nohist = 1; break;
+#endif
+	case 'i':
+	  m_msp->revcomp = 1; break;
+	case 'I':
+	  m_msp->quiet = 0; break;
+	case 'l':
+	  strncpy(m_msp->flstr,optarg,MAX_FN);
+	  m_msp->flstr[MAX_FN-1]='\0';
+	  break;
+	case 'L':
+	  m_msp->long_info = 1;
+	  long_info_set = 1;
+	  break;
+	case 'm':
+	  pre_parse_markx(optarg, m_msp);
+	  markx_set = 1;
+	  break;
+	case 'N':
+	  sscanf(optarg,"%d",&m_msp->ldb_info.maxn);
+	  break;
+	case 'o':
+	  sscanf (optarg,"%ld %ld",&m_msp->sq0off,&m_msp->sq1off); break;
+	case 'O':
+	  strncpy(m_msp->outfile,optarg,MAX_FN);
+	  m_msp->outfile[MAX_FN-1]='\0';
+	  break;
+	case 'q':
+	case 'Q':
+	  m_msp->quiet = 1;
+	  break;
+	case 'R':
+	  strncpy (m_msp->dfile, optarg, MAX_FN);
+	  m_msp->dfile[MAX_FN-1]='\0';
+	  break;
+	case 'T':
+#ifdef PCOMPLIB
+	  if (strchr(optarg,'-') != NULL) {
+	    sscanf(optarg,"%d-%d",&worker_1,&worker_n);
+	    if (worker_1 > worker_n) {
+	      worker_1 = worker_n = 0;
+	    }
+	  }
+	  else 
+#endif
+	    sscanf (optarg, "%d", &fa_max_workers);
+	  if (fa_max_workers < 0) fa_max_workers=1;
+	  break;
+	case 'v':
+	  sscanf (optarg,"%d",&ppst->zs_win);
+	  break;
+	case 'V':
+	  if (optarg[0] == '=') {
+	    get_annot_def_file(m_msp, optarg+1);
+	  }
+	  else if ((cptr = getenv("FA_ANNOT_DEF"))) {
+	    get_annot_def_file(m_msp, cptr);
+	  }
+	  else if (optarg[0] == 'q' && (optarg[1]=='!' || optarg[1]=='<')) {
+	    strncpy(m_msp->annot0_sname,optarg+1,MAX_LSTR);
+	    m_msp->ann_flg = 2;
+	  }
+	  else if (optarg[0]=='!' || optarg[0]=='<') {
+	    strncpy(m_msp->annot1_sname,optarg,MAX_LSTR);
+	    m_msp->ann_flg = 2;
+	  }
+	  else {
+	      strncpy((char *)m_msp->ann_arr+1,optarg,MAX_FN-2);
+	      m_msp->ann_arr[0]='\0';
+	      m_msp->ann_arr[MAX_FN-2]='\0';
+	      m_msp->ann_arr_n = strlen((char *)m_msp->ann_arr+1);
+	      if (m_msp->ann_flg ==0) m_msp->ann_flg = 1;
+	  }
+
+	  if (strlen((char *)m_msp->ann_arr) > 0) {
+	    add_ascii_ann(qascii, m_msp->ann_arr);
+	  }
+
+	  break;
+/*
+	case 'V':
+	  fprintf(stderr," -V option not currently supported in parallel\n");
+	  break;
+*/
+	case 'w':
+	  sscanf (optarg,"%d",&m_msp->aln.llen);
+	  if (m_msp->aln.llen < 10) m_msp->aln.llen = 10;
+	  if (m_msp->aln.llen > 200) m_msp->aln.llen = 200;
+	  if (!m_msp->aln.llcntx_set) m_msp->aln.llcntx = m_msp->aln.llen/2;
+	  llen_set = 1;
+	  break;
+	case 'Z':
+	  sscanf(optarg,"%ld",&ppst->zdb_size);
+	  ppst->zdb_size_set = 1;
+	  break;
+	}
+      }
+      else if (strchr (f_optstring, copt))
+	 f_getopt (copt, optarg, m_msp, ppst);
+   }
+   optind--;
+
+   if (!markx_set || !(m_msp->markx & (MX_ATYPE+MX_ANNOT_COORD+MX_ANNOT_MID))) {
+     m_msp->markx = set_markx(m_msp->markx, 0, '\0');
+   }
+
+   /* done with options, check for initializations in initfa.c
+      (set sascii alphabet) */
+   f_lastenv (m_msp, ppst);
+
+   if (argc - optind < 3) return;
+   m_msp->tnamesize = sizeof (m_msp->tname);
+   if (argc - optind > 1) {strncpy (m_msp->tname, argv[optind + 1],MAX_FN);}
+   if (argc - optind > 2) {strncpy(m_msp->lname, argv[optind + 2],MAX_LSTR);}
+   f_getarg (argc, argv, optind, m_msp, ppst);
+}
+
+/* ann_scan scans an aa0 query sequence if -V ann_chars, and returns
+   an edited query sequence and allocates aa0a[n_n0+2] space for the
+   annotation */
+
+int
+ann_scan(unsigned char *aa0, int n0, unsigned char **aa0a_p, int seqtype)
+{
+  unsigned char *aa0p, *aa0d, *aa0ad;
+  int n_n0;
+
+  /* count how many "real" residues */
+
+  if (seqtype==SEQT_UNK) {
+    /* with SEQT_UNK, annotation characters are all < @, 
+       while sequence chars are all > @ */
+    for (n_n0=0, aa0p = aa0; aa0p < aa0+n0; aa0p++) {
+      if (*aa0p > '@' || *aa0p == ESS ) n_n0++;		/* ESS captures ',' in sequence */
+    }
+  }
+  else {
+    /* if the sequence type is known, then annotation chars are > NANN */
+    for (n_n0=0, aa0p = aa0; aa0p < aa0+n0; aa0p++) {
+      if (*aa0p < NANN ) n_n0++;
+    }
+  }
+
+  if (n_n0 == n0) {
+    *aa0a_p = NULL;
+    return n_n0;
+  }
+
+  aa0d = aa0;
+  /* n_n0 has the real sequence length */
+  if ((*aa0a_p = calloc(n_n0+2, sizeof(char)))==NULL) {
+    fprintf(stderr," cannot allocate annotation sequence: %d\n",n_n0);
+
+    /* this section is for failure, simply copy the correct sequence
+       and ignore the annotations */
+    if (seqtype==SEQT_UNK) {
+      for (aa0p = aa0; aa0p < aa0+n0; aa0p++) {
+	if (*aa0p > '@' || *aa0p == ESS) {*aa0d++ = *aa0p;}
+      }
+    }
+    else {
+      for (aa0p = aa0; aa0p < aa0+n0; aa0p++) {
+	if (*aa0p < NANN) {*aa0d++ = *aa0p;}
+      }
+    }
+    *aa0d = '\0';
+    return n_n0;
+  }
+
+  /* have aa0a_p annotation array allocated */
+  aa0ad = *aa0a_p;
+  if (seqtype==SEQT_UNK) {
+    for (aa0p = aa0; aa0p<aa0+n0; aa0p++) {
+      if (*aa0p > '@' || *aa0p == ESS) {*aa0d++ = *aa0p; *aa0ad++='\0';}
+      else if (aa0ad > *aa0a_p) { aa0ad[-1] = *aa0p - NANN;}
+    }
+  }
+  else {
+    for (aa0p = aa0; aa0p<aa0+n0; aa0p++) {
+      if (*aa0p < NANN) {*aa0d++ = *aa0p; *aa0ad++='\0';}
+      else if (aa0ad > *aa0a_p) { aa0ad[-1] = *aa0p - NANN;}
+    }
+  }
+  *aa0ad = *aa0d = '\0';
+  return n_n0;
+}
+
+/* renamed from ann_ascii() Feb, 2008 to allow ann_ascii[] */
+void
+add_ascii_ann(int *qascii, unsigned char *ann_arr)
+{
+  unsigned char *ann_p;
+  int ann_ix = NANN+1;
+
+  if (ann_arr[0] == '\0' && ann_arr[1]=='\0') {
+    ann_arr[0] = ' ';
+    ann_arr[0] = '\0';
+    return;
+  }
+
+  ann_arr[0] = ' ';
+  
+  if (strchr((char *)ann_arr+1,'*')) {qascii['*'] = NA;}
+
+  for (ann_p = ann_arr+1; *ann_p; ann_p++) {
+    if (qascii[*ann_p] == NA) { qascii[*ann_p] = ann_ix++;}
+  }
+}
+
+/* parse annotation description line */
+void add_annot_def(struct mngmsg *m_msp, char *line, int qa_flag) {
+  char *bp;
+  int i_ann;
+
+  if ((bp=strchr(line,'\r')) !=NULL || (bp=strchr(line,'\n')) != NULL) {
+    *bp = '\0';
+  }
+
+  if (m_msp->ann_arr[0]=='\0') {
+    m_msp->ann_arr[0] = ' ';
+    m_msp->ann_arr[1] = '\0';
+  }
+
+  /* set the character */
+  i_ann = strlen((char *)m_msp->ann_arr);
+  if ((bp = strchr((char *)m_msp->ann_arr,line[0]))!=NULL) {
+    i_ann = (unsigned char *)bp - m_msp->ann_arr;
+  }
+  else {
+    m_msp->ann_arr[i_ann] = line[0];
+    m_msp->ann_arr[i_ann+1] = '\0';	/* required for strchr(ann_arr) to work */
+    if (qa_flag) qascii[line[0]] = NANN + i_ann;
+  }
+
+  if ((bp=strchr(line,':'))!=NULL) {
+    /* allocate space for definitions */
+    if ((m_msp->ann_arr_def[i_ann]=(char *)calloc(strlen(bp+1)+1,sizeof(char)))!=NULL) {
+      /* read in the definitions and associate with symbol */
+      strncpy(m_msp->ann_arr_def[i_ann], bp+1,strlen(bp+1));
+    }
+  }
+  else {
+    m_msp->ann_arr_def[i_ann] = NULL;
+  }
+
+
+}
+
+/* read definitions of annotation symbols from a file */
+static void
+get_annot_def_file(struct mngmsg *m_msp, char *fa_annot_env) {
+  FILE *def_fp;
+  char *bp, *bpf, line[MAX_STR];
+  char tmp_annot_env[MAX_STR];
+
+  if ((bpf=strchr(fa_annot_env,' '))!=NULL) *bpf = '\0';
+
+  subs_env(tmp_annot_env, fa_annot_env, sizeof(tmp_annot_env));
+  /* check that the file exists */
+  if ((def_fp = fopen(tmp_annot_env,"r"))==NULL) {
+    fprintf(stderr,"*** error *** annotation definition file: %s not found\n",
+	    tmp_annot_env);
+    if (bpf) *bpf=' ';
+    return;
+  }
+  
+  /* read a line */
+  while (fgets(line, sizeof(line), def_fp)!=NULL) {
+    add_annot_def(m_msp, line, 0);
+  }
+  fclose(def_fp);
+  if (bpf) *bpf=' ';
+
+  if (strlen((char *)m_msp->ann_arr)>1) m_msp->ann_flg = 1;
+}
+
+int 
+set_markx(int markx, int val, char c) {
+
+  if (val < 3) {
+    if (c=='M') {
+      markx |= MX_ANNOT_MID;
+      markx &= (~MX_ANNOT_COORD);
+    }
+    else if (c=='B') {
+      markx |= MX_ANNOT_COORD;
+      markx |= MX_ANNOT_MID;
+    }
+    else {
+      markx |= MX_ANNOT_COORD;
+    }
+    if (c=='H') {
+      markx |= MX_HTML;
+    }
+    return markx | (MX_ATYPE & val);
+  }
+  else if (val == 3) {
+    markx |= (MX_ATYPE + MX_ASEP);
+  }
+  else if (val == 4) {
+    markx |= (MX_ATYPE + MX_AMAP);
+  }
+  else if (val == 5) {
+    markx |= MX_AMAP;
+  }
+  else if (val == 6 || c=='H') {
+    markx |= (MX_HTML) ;
+    if (c=='M') {
+      markx |= MX_ANNOT_MID;
+      markx &= (~MX_ANNOT_COORD);
+    }
+    else if (c=='B') {
+      markx |= MX_ANNOT_COORD;
+      markx |= MX_ANNOT_MID;
+    }
+    else {
+      markx |= MX_ANNOT_COORD;
+    }
+  }
+  else if (val == 8) {
+    markx |= MX_M9SUMM+MX_M8OUT;
+  }
+  else if (val == 9) {
+    markx |= MX_M9SUMM;
+  }
+  else if (val == 10) {
+    markx |= MX_M10FORM;
+  }
+  else if (val == 11) {
+    markx |= MX_M11OUT;
+  }
+
+  return markx;
+}
+
+void
+pre_parse_markx(char *opt_arg, struct mngmsg *m_msp) {
+  char *bp, *last_bp;
+  struct markx_str *tmp_markx, *cur_markx, *last_markx;
+
+  if (opt_arg[0] != 'F' && m_msp->markx_list != NULL) {
+    tmp_markx = m_msp->markx_list;
+  }
+  else {
+    if ((tmp_markx = (struct markx_str *)calloc(1,sizeof(struct markx_str)))==NULL) {
+      fprintf(stderr,"[error] Cannot allocate markx_list\n");
+      return;
+    }
+
+    /* initialize markx to m_msg defaults -- we do not use m_msp
+       directly, because it might have been changed by an earlier -m
+       out_fmt */
+
+    tmp_markx->nohist = 1;
+    if (m_msp->ashow_set) {tmp_markx->ashow = m_msp->ashow;}
+    else {tmp_markx->ashow = -1;}
+
+    tmp_markx->show_code = 0;
+    if (long_info_set) tmp_markx->long_info = 1;
+    else tmp_markx->long_info = 0;
+    if (llen_set) {
+      tmp_markx->aln_llen = m_msp->aln.llen;
+      tmp_markx->aln_llcntx = m_msp->aln.llcntx;
+      tmp_markx->aln_llcntx_set = m_msp->aln.llcntx_set;
+    }
+    else {
+      tmp_markx->aln_llen = 60;
+      if (m_msp->aln.llcntx_set) {
+	tmp_markx->aln_llcntx = m_msp->aln.llcntx;
+	tmp_markx->aln_llcntx_set = m_msp->aln.llcntx_set;
+      }
+      else {
+	tmp_markx->aln_llcntx = 30;
+	tmp_markx->aln_llcntx_set = 0;
+      }
+    }
+    tmp_markx->std_output = 1;
+  }
+
+  /* first check for -m "F file" format */
+  if (optarg[0] == 'F') {
+    if ((bp=strchr(optarg+1,' '))==NULL) {
+      fprintf(stderr,"-m F missing file name: %s\n",optarg);
+      return;
+    }
+    /* allocate space for file name */
+    if ((tmp_markx->out_file = calloc(strlen(bp+1)+1,sizeof(char)))==NULL) {
+      fprintf(stderr,"[error] Cannot allocate markx->out_file\n");
+      return;
+    }
+    strncpy(tmp_markx->out_file, bp+1, strlen(bp+1));
+    *bp = '\0';
+
+    last_bp = optarg+1;
+  }
+  else {
+    last_bp = optarg;
+  }
+
+  if (opt_arg[0] != 'F') {
+    m_msp_to_markx(tmp_markx, m_msp);
+  }
+
+  while ((bp=strchr(last_bp,','))!=NULL) {
+    *bp = '\0';
+    parse_markx(last_bp, tmp_markx);
+    *bp = ',';
+    last_bp = bp+1;
+  }
+
+  if (*last_bp) parse_markx(last_bp, tmp_markx);
+
+  if (m_msp->markx_list!=NULL) {
+    if (opt_arg[0] == 'F') {
+      /* if file name, add this to the end of the list */
+      last_markx = m_msp->markx_list;
+      for (cur_markx=m_msp->markx_list->next; cur_markx; cur_markx = cur_markx->next) {
+	last_markx = cur_markx;
+      }
+      last_markx->next = tmp_markx;
+    }
+    else if (tmp_markx != m_msp->markx_list) {
+      /* if no file name, then make this the first in the list,
+	 unless it is already there */
+      cur_markx = m_msp->markx_list;
+      m_msp->markx_list = tmp_markx;
+      tmp_markx->next = cur_markx;
+    }
+  }
+  else {
+    m_msp->markx_list = tmp_markx;
+  }
+
+  m_msp->tot_markx |= tmp_markx->markx;
+  m_msp->tot_show_code |= tmp_markx->show_code;
+
+  /* if no -m F, save options into m_msp */
+  if (optarg[0] != 'F') {
+    markx_to_m_msp(m_msp, tmp_markx);
+  }
+
+  return;
+}
+
+void
+parse_markx(char *optarg, struct markx_str *this) {
+  int itmp;
+  char ctmp, ctmp2;
+
+  itmp = 0;
+  ctmp = ctmp2 = '\0';
+
+  if (optarg[0] == 'B') {	/* BLAST alignment output */
+    this->markx = MX_MBLAST;
+    this->aln_llcntx = 0;
+    this->aln_llcntx_set = 1;
+    this->long_info=1;
+    this->ashow = -1;
+    if (optarg[1] == 'B') {	/* complete BLAST output */
+      this->markx += MX_MBLAST2;
+      this->nohist = 1;
+      this->aln_llen = 65;
+      this->std_output = 0;
+      return;
+    }
+    else if (optarg[1] == '8') {
+      sscanf(optarg,"%d%c%c",&itmp,&ctmp,&ctmp2);
+    }
+    else {return;}		/* done with BLAST aligment output */
+  }
+  else if (optarg[0] == 'A') {
+    this->markx += MX_RES_ALIGN_SCORE;
+    this->aln_llcntx = 0;
+    this->aln_llcntx_set = 1;
+    return;
+  }
+  else {
+    sscanf(optarg,"%d%c%c",&itmp,&ctmp,&ctmp2);
+  }
+  if (itmp==9) {
+    if (ctmp=='c') {this->show_code = SHOW_CODE_ALIGN;}
+    else if (ctmp=='d') {this->show_code = SHOW_CODE_ALIGN + SHOW_CODE_EXT;}
+    else if (ctmp=='C') {this->show_code = SHOW_CODE_CIGAR;}
+    else if (ctmp=='D') {this->show_code = SHOW_CODE_CIGAR + SHOW_CODE_EXT;}
+    else if (ctmp=='B') {this->show_code = SHOW_CODE_BTOP;}
+    else if (ctmp=='i') {this->show_code = SHOW_CODE_ID;}
+    else if (ctmp=='I') {this->show_code = SHOW_CODE_IDD;}
+  }
+  if (itmp > 6 && itmp != 11 && itmp != 10 && itmp != 9 && itmp != 8) itmp = 0;
+  this->markx = set_markx(this->markx,itmp,ctmp);
+  if (itmp == 11 ) { this->std_output = 0;}
+  if (itmp == 8) {
+    this->std_output = 0;
+    this->ashow = 0;
+    if (ctmp=='C') { this->markx += MX_M8COMMENT;}
+    if (ctmp2 == 'c') { this->show_code = SHOW_CODE_ALIGN;}
+    else if (ctmp2 == 'd') {this->show_code = SHOW_CODE_ALIGN + SHOW_CODE_EXT;}
+    else if (ctmp2 == 'C') {this->show_code = SHOW_CODE_CIGAR;}
+    else if (ctmp2 == 'D') {this->show_code = SHOW_CODE_CIGAR + SHOW_CODE_EXT;}
+    else if (ctmp2 == 'B') {this->show_code = SHOW_CODE_BTOP;}
+  }
+}
+
+/* transfer markx values for m_msp to m_msp */
+void
+markx_to_m_msp(struct mngmsg *m_msp, struct markx_str *this) {
+
+  m_msp->markx = this->markx;
+  m_msp->nohist = this->nohist;
+  m_msp->ashow = this->ashow;
+  m_msp->show_code = this->show_code;
+  m_msp->long_info = this->long_info;
+  m_msp->aln.llen = this->aln_llen;
+  m_msp->aln.llcntx = this->aln_llcntx;
+  m_msp->aln.llcntx_set = this->aln_llcntx_set;
+  m_msp->std_output = this->std_output;
+}
+
+/* save current m_msp values used with markx */
+void
+m_msp_to_markx(struct markx_str *this, struct mngmsg *m_msp) {
+
+  this->markx = m_msp->markx ;
+  this->nohist = m_msp->nohist ;
+  this->ashow = m_msp->ashow ;
+  this->show_code = m_msp->show_code ;
+  this->long_info = m_msp->long_info ;
+  this->aln_llen = m_msp->aln.llen ;
+  this->aln_llcntx = m_msp->aln.llcntx ;
+  this->aln_llcntx_set = m_msp->aln.llcntx_set ;
+  this->std_output = m_msp->std_output ;
+}
+
+/* put options from option table [struct opt_def_str *opt_defs] into
+   char *opt_str for getopt() */
+
+void
+build_optstr(char *opt_str, int max_len, struct opt_def_str *opt_defs) {
+  int i, opt_len = 0;
+  char *opt_pos;
+
+  opt_pos = opt_str;
+  for (i=0; opt_defs[i].opt_char != '\0'; i++) {
+    if (opt_len + 2 > max_len) {
+      fprintf(stderr," *** error -- options too long %d >= %d\n", opt_len, max_len);
+      break;
+    }
+    *opt_pos++ = opt_defs[i].opt_char;
+    opt_len++;
+    if (opt_defs[i].has_arg) {
+      *opt_pos++ = ':';
+      opt_len++;
+    }
+  }
+  *opt_pos = '\0';
+}
+
+/* set_opt_disp_defs associates parameter addresses with options */
+void
+set_opt_disp_defs(char opt_char, struct opt_def_str *options, int type,
+		  int i_param1, int i_param2,
+		  double d_param1, double d_param2,
+		  char *s_param) {
+  struct opt_def_str *this_opt;
+
+  this_opt = options;
+  while (this_opt->opt_char != '\0') {
+    if (this_opt->opt_char == opt_char) {
+      this_opt->fmt_type = type;
+      switch (type) {
+      case 1:
+	this_opt->i_param1 = i_param1;
+	break;
+      case 2:
+	this_opt->i_param1 = i_param1;
+	this_opt->i_param2 = i_param2;
+	break;
+      case 3:
+	this_opt->d_param1 = d_param1;
+	break;
+      case 4:
+	this_opt->d_param1 = d_param1;
+	this_opt->d_param2 = d_param2;
+	break;
+      case 5:
+	if (s_param != NULL) {
+	  this_opt->s_param = (char *)calloc(strlen(s_param)+1,sizeof(char));
+	  strncpy(this_opt->s_param,s_param,strlen(s_param));
+	}
+	else this_opt->s_param = NULL;
+	break;
+      }
+    }
+    this_opt++;
+  }
+}
diff --git a/src/drop_func.h b/src/drop_func.h
new file mode 100644
index 0000000..f9e8611
--- /dev/null
+++ b/src/drop_func.h
@@ -0,0 +1,185 @@
+/* drop_func.h */
+
+/* $Id: drop_func.h 1196 2013-07-19 20:18:21Z wrp $ */
+/* $Revision: 1196 $  */
+
+/* copyright (c) 2005, 2014 by William R. Pearson and The Rector & Vistors
+   of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* functions provided by each of the drop files */
+
+#ifdef DEBUG
+unsigned long adler32(unsigned long, const unsigned char *, unsigned int);
+#endif
+
+void	/* initializes f_struct **f_arg */
+init_work (unsigned char *aa0, int n0,
+	   struct pstruct *ppst,
+#ifndef DROP_INTERN
+	   void **f_arg
+#else
+	   struct f_struct **f_arg
+#endif
+);
+
+
+void	/* frees memory allocated in f_struct */
+close_work (const unsigned char *aa0, int n0,
+	    struct pstruct *ppst,
+#ifndef DROP_INTERN
+	   void **f_arg
+#else
+	   struct f_struct **f_arg
+#endif
+);
+
+void	/* documents search function, parameters */
+get_param (const struct pstruct *pstr, 
+	   char **pstring1, char *pstring2,
+	   struct score_count_s *);
+
+void	/* calculates alignment score(s), returns them in rst */
+do_work (const unsigned char *aa0, int n0,
+	 const unsigned char *aa1, int n1,
+	 int frame,
+	 const struct pstruct *ppst,
+#ifndef DROP_INTERN
+	 void *f_arg,
+#else
+	 struct f_struct *f_arg,
+#endif
+	 int qr_flg, int shuff_flg, struct rstruct *rst,
+	 struct score_count_s *);
+
+void	/* calculates optimal alignment score */
+do_opt (const unsigned char *aa0, int n0,
+	const unsigned char *aa1, int n1,
+	int frame,
+	struct pstruct *ppst,
+#ifndef DROP_INTERN
+	void *f_arg,
+#else
+	struct f_struct *f_arg,
+#endif
+	struct rstruct *rst
+	);
+
+struct a_res_str *	/* produces encoding of alignment */
+do_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, int repeat_thresh,
+	   struct pstruct *ppst, 
+#ifndef DROP_INTERN
+	   void *f_arg,
+#else
+	   struct f_struct *f_arg,
+#endif
+	   int *have_ares);
+
+void
+pre_cons(const unsigned char *aa, int n, int frame, 
+#ifndef DROP_INTERN
+	   void *f_arg
+#else
+	   struct f_struct *f_arg
+#endif
+	);
+
+void 
+aln_func_vals(int frame, struct a_struct *aln);
+
+#include "dyn_string.h"
+
+/* calc_cons_a - takes aa0, aa1, a_res, and produces seqc0, seqc1, 
+ *             and seqc0a, seqc1a - the annotated sequences 
+ */
+int
+calc_cons_a(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    int *nc,
+	    struct a_struct *aln,
+	    struct a_res_str *a_res,
+	    struct pstruct *ppst,
+	    char *seqc0, char *seqc1, char *seqca, int *seqc_score,
+	    const unsigned char *ann_arr,
+	    const unsigned char *aa0a, const struct annot_str *annot0_p, char *seqc0a, 
+	    const unsigned char *aa1a, const struct annot_str *annot1_p, char *seqc1a, 
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+#ifndef DROP_INTERN
+	    void *f_arg,
+#else
+	    struct f_struct *f_arg,
+#endif
+	    void *pstat_void
+	    );
+
+int	/* returns lenc - length of aligment */
+calc_code(const unsigned char *aa0, int n0,
+	  const unsigned char *aa1, int n1,
+	  struct a_struct *aln,
+	  struct a_res_str *a_res,
+	  struct pstruct *ppst,
+	  struct dyn_string_str *align_code_dyn,
+	  const unsigned char *ann_arr,
+	  const unsigned char *aa0a,
+	  const struct annot_str *annot0_p,
+	  const unsigned char *aa1a, 
+	  const struct annot_str *annot1_p,
+	  struct dyn_string_str *ann_str_dyn,
+	  int *score_delta,
+#ifndef DROP_INTERN
+	  void *f_arg,
+#else
+	  struct f_struct *f_arg,
+#endif
+	  void *pstat_void,
+	  int code_fmt
+	  );
+
+int 	/* returns lenc - length of alignment */
+calc_id(const unsigned char *aa0, int n0,
+	const unsigned char *aa1, int n1,
+	struct a_struct *aln, 
+	struct a_res_str *a_res,
+	struct pstruct *ppst,
+	const struct annot_str *annot0_p,
+	const struct annot_str *annot1_p,
+	int *score_delta,
+	struct dyn_string_str *annot_var_dyn,
+#ifndef DROP_INTERN
+	void *f_arg
+#else
+	struct f_struct *f_arg
+#endif
+	);
+
+int 	/* returns lenc - length of alignment */
+calc_idd(const unsigned char *aa0, int n0,
+	const unsigned char *aa1, int n1,
+	struct a_struct *aln, 
+	struct a_res_str *a_res,
+	struct pstruct *ppst,
+	const struct annot_str *annot0_p,
+	const struct annot_str *annot1_p,
+	int *score_delta,
+	struct dyn_string_str *annot_var_dyn,
+#ifndef DROP_INTERN
+	void *f_arg
+#else
+	struct f_struct *f_arg
+#endif
+	);
diff --git a/src/dropff2.c b/src/dropff2.c
new file mode 100644
index 0000000..cd2145c
--- /dev/null
+++ b/src/dropff2.c
@@ -0,0 +1,1394 @@
+/* $Id: dropff2.c 989 2012-07-24 19:37:38Z wrp $ */
+
+/* copyright (c) 1998, 1999, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* this code implements the "fastf" algorithm, which is designed to
+   deconvolve mixtures of protein sequences derived from mixed-peptide
+   Edman sequencing.  The expected input is:
+
+   >test | 40001 90043 | mgstm1
+   MGCEN,
+   MIDYP,
+   MLLAY,
+   MLLGY
+
+   Where the ','s indicate the length/end of the sequencing cycle
+   data.  Thus, in this example, the sequence is from a mixture of 4
+   peptides, M was found in the first position, G,I, and L(2) at the second,
+   C,D, L(2) at the third, etc.
+
+   Because the sequences are derived from mixtures, there need not be
+   any partial sequence "MGCEN", the actual deconvolved sequence might be
+   "MLDGN".
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include <ctype.h>
+
+#include "defs.h"
+#include "param.h"
+#include "structs.h"
+#include "tatstats.h"
+
+#define EOSEQ 0 
+#define ESS 59
+#define MAXHASH 32
+#define NMAP MAXHASH+1
+#define NMAP_X 23	/* re-code NMAP for 'X' */
+#define NMAP_Z 24	/* re-code NMAP for '*' */
+
+#ifndef MAXSAV
+#define MAXSAV 10
+#endif
+
+#define DROP_INTERN
+#include "drop_func.h"
+
+static char *verstr="4.21 May 2006 (ajm/wrp)";
+
+int shscore(unsigned char *aa0, const int n0, int **pam2, int nsq);
+
+#ifdef TFAST
+extern int aatran(const unsigned char *ntseq, unsigned char *aaseq, 
+		  const int maxs, const int frame);
+#endif
+
+struct hlstr { int next, pos;};
+
+void savemax(struct dstruct *, struct f_struct *);
+
+static int m0_spam(unsigned char *, const unsigned char *, int, struct savestr *,
+	    int **, struct f_struct *);
+static int m1_spam(unsigned char *, int,
+		   const unsigned char *, int,
+		   struct savestr *, int **, int, struct f_struct *);
+
+int sconn(struct savestr **v, int nsave, int cgap,
+	  struct f_struct *, struct rstruct *, const struct pstruct *,
+	  const unsigned char *aa0, int n0,
+	  const unsigned char *aa1, int n1,
+	  int opt_prob);
+
+void kpsort(struct savestr **, int);
+void kssort(struct savestr **, int);
+void kpsort(struct savestr **, int);
+
+int
+sconn_a(unsigned char *, int, int, struct f_struct *,
+	struct a_res_str *);
+
+/* initialize for fasta */
+
+void
+init_work (unsigned char *aa0, int n0, 
+	   struct pstruct *ppst,
+	   struct f_struct **f_arg)
+{
+   int mhv, phv;
+   int hmax;
+   int i0, ii0, hv;
+   struct f_struct *f_str;
+
+   int maxn0;
+   int i, j, q;
+   struct savestr *vmptr;
+   int *res;
+   int nsq;
+
+   nsq = ppst->nsqx;
+
+   f_str = (struct f_struct *) calloc(1, sizeof(struct f_struct));
+   if(f_str == NULL) {
+     fprintf(stderr, "Couldn't calloc f_str\n");
+     exit(1);
+   }
+
+   ppst->sw_flag = 0;
+
+   /* fastf3 cannot work with lowercase symbols as low complexity;
+      thus, NMAP must be disabled; this depends on aascii['X']  */
+   if (ppst->hsq[NMAP_X] == NMAP ) {ppst->hsq[NMAP_X]=1;}
+   if (ppst->hsq[NMAP_Z] == NMAP ) {ppst->hsq[NMAP_Z]=1;}
+
+   /*   this does not work for share ppst structs, as in threads */
+   /*else {fprintf(stderr," cannot find 'X'==NMAP\n");} */
+
+   for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
+      if (ppst->hsq[i0] < NMAP && ppst->hsq[i0] > mhv) mhv = ppst->hsq[i0];
+
+   if (mhv <= 0) {
+      fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
+      exit (1);
+   }
+
+   for (f_str->kshft = 0; mhv > 0; mhv /= 2)
+      f_str->kshft++;
+
+/*      kshft = 2;	*/
+   hmax = hv = (1 << f_str->kshft);
+   f_str->hmask = (hmax >> f_str->kshft) - 1;
+
+   if ((f_str->aa0 = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
+     fprintf (stderr, " cannot allocate f_str->aa0 array; %d\n",n0+1);
+     exit (1);
+   }
+   for (i=0; i<n0; i++) f_str->aa0[i] = aa0[i];
+   aa0 = f_str->aa0;
+
+   if ((f_str->aa0t = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
+     fprintf (stderr, " cannot allocate f_str0->aa0t array; %d\n",n0+1);
+     exit (1);
+   }
+   f_str->aa0ix = 0;
+
+   if ((f_str->harr = (struct hlstr *) calloc (hmax, sizeof (struct hlstr))) == NULL) {
+     fprintf (stderr, " cannot allocate hash array; hmax: %d hmask: %d\n",
+	      hmax,f_str->hmask);
+     exit (1);
+   }
+   if ((f_str->pamh1 = (int *) calloc (nsq+1, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate pamh1 array\n");
+     exit (1);
+   }
+   if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate pamh2 array\n");
+     exit (1);
+   }
+   if ((f_str->link = (struct hlstr *) calloc (n0, sizeof (struct hlstr))) == NULL) {
+     fprintf (stderr, " cannot allocate hash link array");
+     exit (1);
+   }
+
+   for (i0 = 0; i0 < hmax; i0++) {
+      f_str->harr[i0].next = -1;
+      f_str->harr[i0].pos = -1;
+   }
+
+   for (i0 = 0; i0 < n0; i0++) {
+      f_str->link[i0].next = -1;
+      f_str->link[i0].pos = -1;
+   }
+
+   /* encode the aa0 array */
+   /*
+     this code has been modified to allow for mixed peptide sequences
+      aa0[] = 5 8 9 3 4 NULL 5 12 3 7 2 NULL
+      the 'NULL' character resets the hash position counter, to indicate that
+      any of several residues can be in the same position.
+      We also need to keep track of the number of times this has happened, so that
+      we can redivide the sequence later
+
+      i0 counts through the sequence
+      ii0 counts through the hashed sequence
+
+      */
+
+   f_str->nm0 = 1;
+   f_str->nmoff = -1;
+   phv = hv = 0;
+   for (i0= ii0 = 0; i0 < n0; i0++, ii0++) {
+     /* reset the counter and start hashing again */
+     if (aa0[i0] == ESS || aa0[i0] == 0) {
+       aa0[i0] = 0;	/* set ESS to 0 */
+       /*       fprintf(stderr," converted ',' to 0\n");*/
+       i0++;	/* skip over the blank */
+       f_str->nm0++;
+       if (f_str->nmoff < 0) f_str->nmoff = i0;
+       phv = hv = 0;
+       ii0 = 0;
+     }
+     hv = ppst->hsq[aa0[i0]];
+     f_str->link[i0].next = f_str->harr[hv].next;
+     f_str->link[i0].pos = f_str->harr[hv].pos;
+     f_str->harr[hv].next = i0;
+     f_str->harr[hv].pos = ii0;
+     f_str->pamh2[hv] = ppst->pam2[0][aa0[i0]][aa0[i0]];
+   }
+   if (f_str-> nmoff < 0) f_str->nmoff = n0;
+
+
+#ifdef DEBUG
+   /*
+   fprintf(stderr," nmoff: %d/%d nm0: %d\n", f_str->nmoff, n0,f_str->nm0);
+   */
+#endif
+
+/*
+#ifdef DEBUG
+   fprintf(stderr," hmax: %d\n",hmax);
+   for ( hv=0; hv<hmax; hv++)
+       fprintf(stderr,"%2d %c %3d %3d\n",hv,
+	       (hv > 0 && hv < ppst->nsq ) ? ppst->sq[ppst->hsq[hv]] : ' ',
+	       f_str->harr[hv].pos,f_str->harr[hv].next);
+   fprintf(stderr,"----\n");
+   for ( hv=0; hv<n0; hv++)
+       fprintf(stderr,"%2d: %3d %3d\n",hv,
+	       f_str->link[hv].pos,f_str->link[hv].next);
+#endif
+*/
+
+   f_str->maxsav = MAXSAV;
+   if ((f_str->vmax = (struct savestr *)
+	calloc(MAXSAV,sizeof(struct savestr)))==NULL) {
+     fprintf(stderr, "Couldn't allocate vmax[%d].\n",f_str->maxsav);
+     exit(1);
+   }
+
+   if ((f_str->vptr = (struct savestr **)
+	calloc(MAXSAV,sizeof(struct savestr *)))==NULL) {
+     fprintf(stderr, "Couldn't allocate vptr[%d].\n",f_str->maxsav);
+     exit(1);
+   }
+
+   for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
+     vmptr->used = (int *) calloc(n0, sizeof(int));
+     if(vmptr->used == NULL) {
+       fprintf(stderr, "Couldn't alloc vmptr->used\n");
+       exit(1);
+     }
+   }
+
+/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
+   pam2[0][0] is now undefined for consistency with blast
+*/
+
+   for (i0 = 1; i0 <= ppst->nsq; i0++)
+     f_str->pamh1[i0] = ppst->pam2[0][i0][i0];
+
+   ppst->param_u.fa.cgap = shscore(aa0,f_str->nmoff-1,ppst->pam2[0],ppst->nsq)/3;
+   if (ppst->param_u.fa.cgap > ppst->param_u.fa.bestmax/4)
+     ppst->param_u.fa.cgap = ppst->param_u.fa.bestmax/4;
+
+   f_str->ndo = 0;
+   f_str->noff = n0-1;
+   if (f_str->diag==NULL) 
+     f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
+					      sizeof (struct dstruct));
+
+   if (f_str->diag == NULL)
+   {
+      fprintf (stderr, " cannot allocate diagonal arrays: %ld\n",
+	      (long) MAXDIAG * (long) (sizeof (struct dstruct)));
+      exit (1);
+   }
+
+#ifdef TFAST
+   if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
+					     sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
+     exit (1);
+   }
+   f_str->aa1x++;
+#endif
+
+   /* allocate space for the scoring arrays */
+   maxn0 = n0 + 4;
+
+   maxn0 = max(3*n0/2,MIN_RES);
+   if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
+     fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
+     exit(1);
+   }
+   f_str->res = res;
+   f_str->max_res = maxn0;
+
+   /* Tatusov Statistics Setup */
+
+   /* initialize priors array. */
+   if((f_str->priors = (double *)calloc(ppst->nsq+1, sizeof(double))) == NULL) {
+     fprintf(stderr, "Couldn't allocate priors array.\n");
+     exit(1);
+   }
+   calc_priors(f_str->priors, ppst, f_str, NULL, 0, ppst->pseudocts);
+
+   f_str->dotat = 0;
+   f_str->shuff_cnt = ppst->shuff_node;
+
+   /* End of Tatusov Statistics Setup */
+
+   *f_arg = f_str;
+}
+
+
+/* pstring1 is a message to the manager, currently 512 */
+/* pstring2 is the same information, but in a markx==10 format */
+void
+get_param (const struct pstruct *ppstr,
+	   char **pstring1, char *pstring2,
+	   struct score_count_s *s_info)
+{
+#ifndef TFAST
+  char *pg_str="FASTF";
+#else
+  char *pg_str="TFASTF";
+#endif
+
+  sprintf (pstring1[0], "%s (%s)",pg_str,verstr);
+  sprintf (pstring1[1], "%s matrix (%d:%d), join: %d",
+	   ppstr->pam_name, ppstr->pam_h,ppstr->pam_l,ppstr->param_u.fa.cgap);
+
+  if (ppstr->param_u.fa.iniflag) strcat(pstring1[0]," init1");
+
+  if (pstring2 != NULL) {
+    sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)\n\
+; pg_join: %d\n",
+	     pg_str,verstr, ppstr->pam_name, ppstr->pam_h,ppstr->pam_l,
+	     ppstr->param_u.fa.cgap);
+   }
+}
+
+void
+close_work (const unsigned char *aa0, const int n0,
+	    struct pstruct *ppst,
+	    struct f_struct **f_arg)
+{
+  struct f_struct *f_str;
+  struct savestr *vmptr;
+
+  f_str = *f_arg;
+
+  if (f_str != NULL) {
+
+    for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+      free(vmptr->used);
+
+    free(f_str->res);
+#ifdef TFAST
+    free(f_str->aa1x - 1); /* allocated, then aa1x++'ed */
+#endif
+    free(f_str->diag);
+    free(f_str->link);
+    free(f_str->pamh2); 
+    free(f_str->pamh1);
+    free(f_str->harr);
+    free(f_str->aa0t);
+    free(f_str->aa0);
+    free(f_str->priors);
+    free(f_str->vmax);
+    free(f_str->vptr);
+    free(f_str);
+    *f_arg = NULL;
+  }
+}
+
+int do_fastf (unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      const struct pstruct *ppst, struct f_struct *f_str,
+	      struct rstruct *rst, int *hoff, int opt_prob)
+{
+   int     nd;		/* diagonal array size */
+   int     lhval;
+   int     kfact;
+   register struct dstruct *dptr;
+   register int tscor;
+   register struct dstruct *diagp;
+   struct dstruct *dpmax;
+   register int lpos;
+   int     tpos, npos;
+   struct savestr *vmptr;
+   int     scor, tmp;
+   int     im, ib, nsave;
+   int     cmps ();		/* comparison routine for ksort */
+   const int *hsq;
+
+   hsq = ppst->hsq;
+
+   if (n1 < 1) {
+     rst->score[0] = rst->score[1] = rst->score[2] = 0;
+     rst->escore = 1.0;
+     rst->segnum = 0;
+     rst->seglen = 0;
+     return 1;
+   }
+
+   if (n0+n1+1 >= MAXDIAG) {
+     fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
+     rst->score[0] = rst->score[1] = rst->score[2] = -1;
+     rst->escore = 2.0;
+     rst->segnum = 0;
+     rst->seglen = 0;
+     return -1;
+   }
+
+   nd = n0 + n1;
+
+   dpmax = &f_str->diag[nd];
+   for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;) {
+      dptr->stop = -1;
+      dptr->dmax = NULL;
+      dptr++->score = 0;
+   }
+
+   /* initialize the saved segment structures */
+   for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
+      vmptr->score = 0;
+      memset(vmptr->used, 0, n0 * sizeof(int));
+   }
+
+   f_str->lowmax = f_str->vmax;
+   f_str->lowscor = 0;
+
+   /* start hashing */
+
+   diagp = &f_str->diag[f_str->noff];
+   for (lhval = lpos = 0; lpos < n1; lpos++, diagp++) {
+     if (hsq[aa1[lpos]]>=NMAP) {
+       lpos++ ; diagp++;
+       while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
+       if (lpos >= n1) break;
+       lhval = 0;
+     }
+     lhval = hsq[aa1[lpos]];
+     for (tpos = f_str->harr[lhval].pos, npos = f_str->harr[lhval].next;
+	  tpos >= 0; tpos = f_str->link[npos].pos, npos = f_str->link[npos].next) {
+       /* tscor gets position of end of current lpos diag run */
+       if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
+	 tscor++;		 /* move forward one */
+	 if ((tscor -= lpos) <= 0) { /* check for size of gap to this hit - */
+	                             /* includes implicit -1 mismatch penalty */
+	   scor = dptr->score;	     /* current score of this run */
+	   if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 &&
+	       f_str->lowscor < scor)	/* if updating tscor makes run worse, */
+	     savemax (dptr, f_str);     /* save it */
+
+	   if ((tscor += scor) >= kfact) {  /* add to current run if continuing */
+	     				    /* is better than restart (kfact) */
+	       dptr->score = tscor;
+	       dptr->stop = lpos;
+	     }
+	     else {
+	       dptr->score = kfact;	/* starting over is better */
+	       dptr->start = (dptr->stop = lpos);
+	     }
+	 }
+	 else {				/* continue current run */
+	   dptr->score += f_str->pamh1[aa0[tpos]]; 
+	   dptr->stop = lpos;
+	 }
+       }
+       else {				/* no diagonal run yet */
+	 dptr->score = f_str->pamh2[lhval];
+	 dptr->start = (dptr->stop = lpos);
+       }
+     }				/* end tpos */
+   }				/* end lpos */
+
+   for (dptr = f_str->diag; dptr < dpmax;) {
+     if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
+     dptr->stop = -1;
+     dptr->dmax = NULL;
+     dptr++->score = 0;
+   }
+   f_str->ndo = nd;
+
+/*
+        at this point all of the elements of aa1[lpos]
+        have been searched for elements of aa0[tpos]
+        with the results in diag[dpos]
+*/
+
+   /* set up pointers for sorting */
+
+   for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
+     if (vmptr->score > 0) {
+       vmptr->score = m0_spam (aa0, aa1, n1, vmptr, ppst->pam2[0], f_str);
+       f_str->vptr[nsave++] = vmptr;
+     }
+   }
+
+   /* sort them */
+   kssort (f_str->vptr, nsave);
+
+   
+#ifdef DEBUG
+   /*
+   for (ib=0; ib<nsave; ib++) {
+     fprintf(stderr,"0: %4d-%4d  1: %4d-%4d  dp: %d score: %d\n",
+  	     f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+  	     f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+  	     f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+  	     f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+       for (im=f_str->vptr[ib]->start; im<=f_str->vptr[ib]->stop; im++)
+         fprintf(stderr," %c:%c",ppst->sq[aa0[f_str->noff+im-f_str->vptr[ib]->dp]],
+		 ppst->sq[aa1[im]]);
+       fputc('\n',stderr);
+   }
+   fprintf(stderr,"---\n");
+   */
+   /* now use m_spam to re-evaluate */
+   /*
+   for (tpos = 0; tpos < n0; tpos++) {
+     fprintf(stderr,"%c:%2d ",ppst->sq[aa0[tpos]],aa0[tpos]);
+     if (tpos %10 == 9) fputc('\n',stderr);
+   }
+   fputc('\n',stderr);
+   */
+#endif   
+   
+   f_str->aa0ix = 0;
+   for (ib=0; ib < nsave; ib++) {
+     if ((vmptr=f_str->vptr[ib])->score > 0) {
+       vmptr->score = m1_spam (aa0, n0, aa1, n1, vmptr,
+			       ppst->pam2[0], ppst->pam_l, f_str);
+     }
+   }
+   /* reset aa0 - modified by m1_spam */
+   for (tpos = 0; tpos < n0; tpos++) {
+     if (aa0[tpos] >= 32) aa0[tpos] -= 32;
+   }
+
+   kssort(f_str->vptr,nsave);
+
+   for ( ; nsave > 0; nsave--) 
+     if (f_str->vptr[nsave-1]->score >0) break;
+
+   if (nsave <= 0) {
+     f_str->nsave = 0;
+     rst->score[0] = rst->score[1] = rst->score[2] = 0;
+     rst->escore = 1.0;
+
+     return 1;
+   }
+   else f_str->nsave = nsave;
+
+   
+#ifdef DEBUG
+   /*
+   fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,f_str->noff);
+   for (ib=0; ib<nsave; ib++) {
+     fprintf(stderr,"0: %4d-%4d  1: %4d-%4d  dp: %d score: %d\n",
+  	     f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+  	     f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+  	     f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+  	     f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+     for (im=f_str->vptr[ib]->start; im<=f_str->vptr[ib]->stop; im++)
+       fprintf(stderr," %c:%c",ppst->sq[aa0[f_str->noff+im-f_str->vptr[ib]->dp]],
+  	       ppst->sq[aa1[im]]);
+     fputc('\n',stderr);
+   }
+
+   fprintf(stderr,"---\n");
+   */
+#endif   
+
+   scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap, f_str,
+		 rst, ppst, aa0, n0, aa1, n1, opt_prob);
+
+   for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
+     if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
+
+   rst->score[1] = vmptr->score;
+   rst->score[0] = rst->score[2] = max (scor, vmptr->score);
+
+   return 1;
+}
+
+void do_work (const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      int frame,
+	      const struct pstruct *ppst, struct f_struct *f_str,
+	      int qr_flg, int shuff_flg, struct rstruct *rst,
+	      struct score_count_s *s_info)
+{
+  int opt_prob;
+  int hoff, n10, i;
+
+  if (qr_flg==1 && f_str->shuff_cnt <= 0) {
+    rst->escore = 2.0;
+    rst->score[0]=rst->score[1]=rst->score[2]= -1;
+    rst->valid_stat = 0;
+    return;
+  }
+
+  s_info->s_cnt[ppst->score_ix]++;
+  s_info->tot_scores++;
+
+  rst->valid_stat = 1;
+  if (f_str->dotat || ppst->zsflag == 4 || ppst->zsflag == 14 ) opt_prob=1;
+  else opt_prob = 0;
+  if (ppst->zsflag == 2 || ppst->zsflag == 12) opt_prob = 0;
+  if (qr_flg) {
+    opt_prob=1;
+    /*    if (frame==1) */
+      f_str->shuff_cnt--;
+  }
+
+  if (n1 < 1) {
+    rst->score[0] = rst->score[1] = rst->score[2] = -1;
+    rst->escore = 2.0;
+    return;
+  }
+
+#ifdef TFAST 
+  n10=aatran(aa1,f_str->aa1x,n1,frame);
+  if (ppst->debug_lib)
+    for (i=0; i<n10; i++)
+      if (f_str->aa1x[i]>ppst->nsq) {
+	fprintf(stderr,
+		"residue[%d/%d] %d range (%d)\n",i,n1,
+		f_str->aa1x[i],ppst->nsq);
+	f_str->aa1x[i]=0;
+	n10=i-1;
+      }
+
+  do_fastf (f_str->aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, opt_prob);
+#else	/* FASTF */
+  do_fastf (f_str->aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, opt_prob);
+#endif
+
+  rst->comp = rst->H = -1.0;
+
+}
+
+void do_opt (const unsigned char *aa0, int n0,
+	     const unsigned char *aa1, int n1,
+	     int frame,
+	     struct pstruct *ppst,
+	     struct f_struct *f_str,
+	     struct rstruct *rst)
+{
+  int optflag, tscore, hoff, n10;
+
+  optflag = ppst->param_u.fa.optflag;
+  ppst->param_u.fa.optflag = 1;
+
+#ifdef TFAST  
+  n10=aatran(aa1,f_str->aa1x,n1,frame);
+  do_fastf (f_str->aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, 1);
+#else	/* FASTA */
+  do_fastf(f_str->aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, 1);
+#endif
+  ppst->param_u.fa.optflag = optflag;
+}
+
+void
+savemax (dptr, f_str)
+  register struct dstruct *dptr;
+  struct f_struct *f_str;
+{
+   register int dpos;
+   register struct savestr *vmptr;
+   register int i;
+
+   dpos = (int) (dptr - f_str->diag);
+
+/* check to see if this is the continuation of a run that is already saved */
+
+   if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
+	 vmptr->start == dptr->start)
+   {
+      vmptr->stop = dptr->stop;
+      if ((i = dptr->score) <= vmptr->score)
+	 return;
+      vmptr->score = i;
+      if (vmptr != f_str->lowmax)
+	 return;
+   }
+   else
+   {
+      i = f_str->lowmax->score = dptr->score;
+      f_str->lowmax->dp = dpos;
+      f_str->lowmax->start = dptr->start;
+      f_str->lowmax->stop = dptr->stop;
+      dptr->dmax = f_str->lowmax;
+   }
+
+   for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+      if (vmptr->score < i)
+      {
+	 i = vmptr->score;
+	 f_str->lowmax = vmptr;
+      }
+   f_str->lowscor = i;
+}
+
+/* this version of spam() is designed to work with a collection of
+   subfragments, selecting the best amino acid at each position so
+   that, from each subfragment, each position is only used once.
+
+   As a result, m_spam needs to know the number of fragments.
+
+   In addition, it now requires a global alignment to the fragment
+   and resets the start and stop positions
+
+   */
+
+static int
+m1_spam (unsigned char *aa0, int n0,
+	 const unsigned char *aa1, int n1,
+	 struct savestr *dmax, int **pam2, int pam_l,
+	 struct f_struct *f_str)
+{
+  int     tpos, lpos, im, ii, nm, ci;
+  int     tot, ctot, pv;
+
+  struct {
+    int     start, stop, score;
+  } curv, maxv;
+  unsigned char *aa0p;
+  const unsigned char *aa1p;
+
+  lpos = dmax->start;                   /* position in library sequence */
+   tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
+   /* force global alignment, reset start*/
+   if (tpos < lpos) {
+     lpos = dmax->start -= tpos;
+     tpos = 0;
+   }
+   else {
+     tpos -= lpos;
+     lpos = dmax->start = 0;
+   }
+
+   dmax->stop = dmax->start + (f_str->nmoff -2 - tpos);
+   if (dmax->stop > n1) dmax->stop = n1;
+
+   /*
+   if (dmax->start < 0) {
+     tpos = -dmax->start;
+     lpos = dmax->start=0;
+   }
+   else tpos = 0;
+   */
+
+   aa1p = &aa1[lpos];
+   aa0p = &aa0[tpos];
+
+   nm = f_str->nm0;
+
+   tot = curv.score = maxv.score = 0;
+   for (; lpos <= dmax->stop; lpos++,aa0p++,aa1p++) {
+     ctot = pam_l;
+     ci = -1;
+     for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
+       if (aa0p[ii] < 32 && (pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
+	 ctot = pv;
+	 ci = ii;
+/*  	 fprintf(stderr, "lpos: %d im: %d ii: %d ci: %d ctot: %d pi: %d pv: %d\n", lpos, im, ii, ci, ctot, aa0p[ii], pam2[aa0p[ii]][*aa1p]); */
+       }
+     }
+     tot += ctot;
+     if (ci >= 0 && aa0p[ci] < 32) {
+#ifdef DEBUG
+/*         fprintf(stderr, "used: lpos: %d ci: %d : %c\n", lpos, ci, sq[aa0p[ci]]); */
+#endif
+       aa0p[ci] +=  32;
+       dmax->used[&aa0p[ci] - aa0] = 1;
+     }
+   }
+   return tot;
+}
+
+int ma_spam (unsigned char *aa0, int n0, const unsigned char *aa1,
+	     struct savestr *dmax, struct pstruct *ppst,
+	     struct f_struct *f_str)
+{
+  int **pam2;
+  int     tpos, lpos, im, ii, nm, ci, lp0;
+  int     tot, ctot, pv;
+  struct {
+    int     start, stop, score;
+  } curv, maxv;
+   const unsigned char *aa1p;
+   unsigned char *aa0p, *aa0pt;
+   int aa0t_flg;
+
+   pam2 = ppst->pam2[0];
+   aa0t_flg = 0;
+
+   lpos = dmax->start;			/* position in library sequence */
+   tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
+   lp0 = lpos = dmax->start;
+   aa1p = &aa1[lpos];
+   aa0p = &aa0[tpos];			/* real aa0 sequence */
+
+			/* the destination aa0 sequence (without nulls) */
+   aa0pt = &f_str->aa0t[f_str->aa0ix];
+
+   curv.start = lpos;
+   nm = f_str->nm0;
+
+   /* sometimes, tpos may be > 0, with lpos = 0 - fill with 'X' */
+   if (lpos == 0 && tpos > 0)
+     for (ii = 0; ii < tpos; ii++) *aa0pt++ = 31;  /* filler character */
+
+   tot = curv.score = maxv.score = 0;
+   for (; lpos <= dmax->stop; lpos++) {
+     ctot = ppst->pam_l;
+     ci = -1;
+     for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
+       if (aa0p[ii] < 32 && (pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
+	 ctot = pv;
+	 ci = ii;
+       }
+     }
+     tot += ctot;
+     if (ci >= 0) {
+       if (ci >= n0) {fprintf(stderr," warning - ci off end %d/%d\n",ci,n0);}
+       else {
+	 *aa0pt++ = aa0p[ci];
+	 aa0p[ci] +=  32;
+	 aa0t_flg=1;
+       }
+     }
+     aa0p++; aa1p++;
+   }
+
+   if (aa0t_flg) {
+     dmax->dp -= f_str->aa0ix;		/* shift ->dp for aa0t */
+     if ((ci=(int)(aa0pt-f_str->aa0t)) > n0) {
+       fprintf(stderr," warning - aapt off %d/%d end\n",ci,n0);
+     }
+     else 
+       *aa0pt++ = 0;			/* skip over NULL */
+
+     aa0pt = &f_str->aa0t[f_str->aa0ix];
+     aa1p = &aa1[lp0];
+
+     /*
+     for (im = 0; im < f_str->nmoff; im++)
+       fprintf(stderr,"%c:%c,",ppst->sq[aa0pt[im]],ppst->sq[aa1p[im]]);
+     fprintf(stderr,"- %3d (%3d:%3d)\n",dmax->score,f_str->aa0ix,lp0);
+     */
+
+     f_str->aa0ix += f_str->nmoff;	/* update offset into aa0t */
+   }
+   /*
+      fprintf(stderr," ma_spam returning: %d\n",tot);
+   */
+   return tot;
+}
+
+static int
+m0_spam (unsigned char *aa0, const unsigned char *aa1, int n1,
+	 struct savestr *dmax, int **pam2,
+	 struct f_struct *f_str)
+{
+   int tpos, lpos, lend, im, ii, nm;
+   int     tot, ctot, pv;
+   struct {
+     int     start, stop, score;
+   } curv, maxv;
+   const unsigned char *aa0p, *aa1p;
+
+   lpos = dmax->start;			/* position in library sequence */
+   tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
+   if (tpos > 0) {
+     if (lpos-tpos >= 0) {
+       lpos = dmax->start -= tpos;    /* force global alignment, reset start*/
+       tpos = 0;
+     }
+     else {
+       tpos -= lpos;
+       lpos = dmax->start = 0;
+     }
+   }
+
+   nm = f_str->nm0;
+   lend = dmax->stop;
+   if (n1 - (lpos + f_str->nmoff-2) < 0 ) {
+     lend = dmax->stop = (lpos - tpos) + f_str->nmoff-2;
+     if (lend >= n1) lend = n1-1;
+   }
+
+   aa1p = &aa1[lpos];
+   aa0p = &aa0[tpos];
+
+   curv.start = lpos;
+
+   tot = curv.score = maxv.score = 0;
+   for (; lpos <= lend; lpos++) {
+     ctot = -10000;
+     for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
+       if ((pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
+	 ctot = pv;
+       }
+     }
+     tot += ctot;
+     aa0p++; aa1p++;
+   }
+
+   /* reset dmax if necessary */
+
+   return tot;
+}
+
+/* sconn links up non-overlapping alignments and calculates the score */
+
+int sconn (struct savestr **v, int n, int cgap, struct f_struct *f_str,
+	   struct rstruct *rst, const struct pstruct *ppst,
+	   const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int opt_prob)
+{
+   int     i, si, cmpp ();
+   struct slink *start, *sl, *sj, *so, sarr[MAXSAV];
+   int     lstart, plstop;
+   double tatprob;
+
+   /* sarr[] saves each alignment score/position, and provides a link
+      back to the previous alignment that maximizes the score */
+
+   /*	sort the score left to right in lib pos */
+   kpsort (v, n);
+
+   start = NULL;
+
+   /* for the remaining runs, see if they fit */
+   for (i = 0, si = 0; i < n; i++) {
+
+     /* if the score is less than the gap penalty, it never helps */
+     if (!opt_prob && (v[i]->score < cgap) ){ continue; }
+
+     lstart = v[i]->start;
+
+     /* put the run in the group */
+     sarr[si].vp = v[i];
+     sarr[si].score = v[i]->score;
+     sarr[si].next = NULL;
+     sarr[si].prev = NULL;
+     sarr[si].tat = NULL;
+     
+     if(opt_prob) {
+       sarr[si].tatprob = 
+	 calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1,
+		      ppst->pam2[0],ppst->nsq, f_str,
+		      ppst->pseudocts, opt_prob,ppst->zsflag);
+       sarr[si].tat = sarr[si].newtat;
+     }
+
+     /* if it fits, then increase the score */
+     for (sl = start; sl != NULL; sl = sl->next) {
+       plstop = sl->vp->stop;
+       /* if end < start or start > end, add score */
+       if (plstop < lstart ) {
+	 if(!opt_prob) {
+	   sarr[si].score = sl->score + v[i]->score;
+	   sarr[si].prev = sl;
+	   /*
+	     fprintf(stderr,"sconn %d added %d/%d getting %d; si: %d, tat: %g\n",
+	     i,v[i]->start, v[i]->score,sarr[si].score,si, 2.0);
+	   */
+	   break;
+	 } else {
+	   tatprob = 
+	     calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1,
+			  ppst->pam2[0], ppst->nsq, f_str,
+			  ppst->pseudocts, opt_prob, ppst->zsflag);
+	   /* if our tatprob gets worse when we add this, forget it */
+	   if(tatprob > sarr[si].tatprob) {
+	     free(sarr[si].newtat->probs); /* get rid of new tat struct */
+	     free(sarr[si].newtat);
+	     continue;
+	   } else {
+	     sarr[si].tatprob = tatprob;
+	     free(sarr[si].tat->probs); /* get rid of old tat struct */
+	     free(sarr[si].tat);
+	     sarr[si].tat = sarr[si].newtat;
+	     sarr[si].prev = sl;
+	     sarr[si].score = sl->score + v[i]->score;
+	     /*
+	       fprintf(stderr,"sconn TAT %d added %d/%d getting %d; si: %d, tat: %g\n",
+	       i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
+	     */
+	     break;
+	   }
+	 }
+       }
+     }
+
+     /*	now recalculate where the score fits - resort the scores */
+     if (start == NULL) {
+       start = &sarr[si];
+     } else {
+       if(!opt_prob) { /* sort by scores */
+	 for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
+	   if (sarr[si].score > sj->score) { /* if new score > best score */
+	     sarr[si].next = sj;	     /* previous best linked to best */
+	     if (so != NULL)		
+	       so->next = &sarr[si];	     /* old best points to new best */
+	     else
+	       start = &sarr[si];
+	     break;
+	   }
+	   so = sj;			     /* old-best saved in so */
+	 }
+       } else { /* sort by tatprobs */
+	 for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
+	   if ( sarr[si].tatprob < sj->tatprob ||
+		((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
+	     sarr[si].next = sj;
+	     if (so != NULL)
+	       so->next = &sarr[si];
+	     else
+	       start = &sarr[si];
+	     break;
+	   }
+	   so = sj;
+	 }
+       }
+     }
+     si++;
+   }
+   
+   if(opt_prob) {
+     for (i = 0 ; i < si ; i++) {
+       free(sarr[i].tat->probs);
+       free(sarr[i].tat);
+     }
+   }
+
+   if (start != NULL) {
+
+     if(opt_prob)
+       rst->escore = start->tatprob;
+     else
+       rst->escore = 2.0;
+
+     rst->segnum = rst->seglen = 0;
+     for(sj = start ; sj != NULL; sj = sj->prev) {
+       rst->segnum++;
+       rst->seglen += sj->vp->stop - sj->vp->start + 1;
+     }
+     return (start->score);
+   } else {
+
+     if(opt_prob)
+       rst->escore = 1.0;
+     else
+       rst->escore = 2.0;
+
+     rst->segnum = rst->seglen = 0;
+     return (0);
+   }
+}
+
+void
+kssort (struct savestr **v, int n)
+{
+   int     gap, i, j;
+   struct savestr *tmp;
+
+   for (gap = n / 2; gap > 0; gap /= 2)
+      for (i = gap; i < n; i++)
+	 for (j = i - gap; j >= 0; j -= gap)
+	 {
+	    if (v[j]->score >= v[j + gap]->score)
+	       break;
+	    tmp = v[j];
+	    v[j] = v[j + gap];
+	    v[j + gap] = tmp;
+	 }
+}
+void
+kpsort (v, n)
+struct savestr *v[];
+int     n;
+{
+   int     gap, i, j;
+   struct savestr *tmp;
+
+   for (gap = n / 2; gap > 0; gap /= 2)
+      for (i = gap; i < n; i++)
+	 for (j = i - gap; j >= 0; j -= gap)
+	 {
+	    if (v[j]->start <= v[j + gap]->start)
+	       break;
+	    tmp = v[j];
+	    v[j] = v[j + gap];
+	    v[j + gap] = tmp;
+	 }
+}
+
+/* sorts alignments from right to left (back to front) based on stop */
+
+void
+krsort (v, n)
+struct savestr *v[];
+int     n;
+{
+   int     gap, i, j;
+   struct savestr *tmp;
+
+   for (gap = n / 2; gap > 0; gap /= 2)
+      for (i = gap; i < n; i++)
+	 for (j = i - gap; j >= 0; j -= gap)
+	 {
+	    if (v[j]->stop > v[j + gap]->stop)
+	       break;
+	    tmp = v[j];
+	    v[j] = v[j + gap];
+	    v[j + gap] = tmp;
+	 }
+}
+
+struct a_res_str *
+do_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, int repeat_thresh,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   int *have_ares)
+{
+  struct a_res_str *a_res;
+  int hoff, n10;
+  int ib;
+  unsigned char *aa0t;
+  const unsigned char *aa1p;
+
+  *have_ares = 0x2;	/* set 0x2 bit to indicate local copy */
+
+  if ((a_res = (struct a_res_str *)calloc(1, sizeof(struct a_res_str)))==NULL) {
+    fprintf(stderr," [do_walign] Cannot allocate a_res");
+    return NULL;
+  }
+
+#ifdef TFAST
+  f_str->n10 = n10 = aatran(aa1,f_str->aa1x,n1,frame);
+  aa1p = f_str->aa1x;
+#else
+  n10 = n1;
+  aa1p = aa1;
+#endif
+
+  do_fastf(f_str->aa0, n0, aa1p, n10, ppst, f_str, &a_res->rst, &hoff, 1);
+
+  /* the alignment portion takes advantage of the information left
+     over in f_str after do_fastf is done.  in particular, it is
+     easy to run a modified sconn() to produce the alignments.
+
+     unfortunately, the alignment display routine wants to have
+     things encoded as with bd_align and sw_align, so we need to do that.
+     */
+
+  if ((aa0t = (unsigned char *)calloc(n0+1,sizeof(unsigned char)))==NULL) {
+    fprintf(stderr," cannot allocate aa0t %d\n",n0+1);
+    exit(1);
+  }
+
+   kssort (f_str->vptr, f_str->nsave);
+   f_str->aa0ix = 0;
+   if (f_str->nsave > f_str->nm0) f_str->nsave = f_str->nm0;
+   for (ib=0; ib < f_str->nm0; ib++) {
+     if (f_str->vptr[ib]->score > 0) {
+       f_str->vptr[ib]->score = 
+	 ma_spam (f_str->aa0, n0, aa1p, f_str->vptr[ib], ppst, f_str);
+     }
+   }
+
+   /* after ma_spam is over, we need to reset aa0 */
+   for (ib = 0; ib < n0; ib++) {
+     if (f_str->aa0[ib] >= 32) f_str->aa0[ib] -= 32;
+   }
+
+   kssort(f_str->vptr,f_str->nsave);
+
+   for ( ; f_str->nsave > 0; f_str->nsave--) 
+     if (f_str->vptr[f_str->nsave-1]->score >0) break;
+
+  a_res->nres = sconn_a (aa0t,n0, ppst->param_u.fa.cgap, f_str,a_res);
+  free(aa0t);
+
+  a_res->res = f_str->res;
+  a_res->sw_score = a_res->rst.score[0];
+  return a_res;
+}
+
+/* this version of sconn is modified to provide alignment information */
+
+int sconn_a (unsigned char *aa0, int n0, int cgap, 
+	     struct f_struct *f_str,
+	     struct a_res_str *a_res)
+{
+   int     i, si, cmpp (), n;
+   unsigned char *aa0p;
+   int sx, dx, doff;
+
+   struct savestr **v;
+   struct slink {
+     int     score;
+     struct savestr *vp;
+     struct slink *snext;
+     struct slink *aprev;
+   } *start, *sl, *sj, *so, sarr[MAXSAV];
+   int     lstop, plstart;
+   int *res, nres, tres;
+
+/*	sort the score left to right in lib pos */
+
+   v = f_str->vptr;
+   n = f_str->nsave;
+
+   krsort (v, n);	/* sort from left to right in library */
+
+   start = NULL;
+
+/*	for each alignment, see if it fits */
+
+   for (i = 0, si = 0; i < n; i++) {
+
+/*	if the score is less than the join threshold, skip it */
+     if (v[i]->score < cgap) continue;
+
+     lstop = v[i]->stop;		/* have right-most lstart */
+
+/*	put the alignment in the group */
+
+     sarr[si].vp = v[i];
+     sarr[si].score = v[i]->score;
+     sarr[si].snext = NULL;
+     sarr[si].aprev = NULL;
+
+/* 	if it fits, then increase the score */
+/* start points to a sorted (by total score) list of candidate
+   overlaps */
+
+     for (sl = start; sl != NULL; sl = sl->snext) { 
+       plstart = sl->vp->start;
+       if (plstart > lstop ) {
+	 sarr[si].score = sl->score + v[i]->score;
+	 sarr[si].aprev = sl;
+	 break;		/* quit as soon as the alignment has been added */
+       }
+     }
+
+/* now recalculate the list of best scores */
+     if (start == NULL)
+       start = &sarr[si];	/* put the first one in the list */
+     else
+       for (sj = start, so = NULL; sj != NULL; sj = sj->snext) {
+	 if (sarr[si].score > sj->score) { /* new score better than old */
+	   sarr[si].snext = sj;		/* snext best after new score */
+	   if (so != NULL)
+	     so->snext = &sarr[si];	/* prev_best->snext points to best */
+	   else  start = &sarr[si];	/* start points to best */
+	   break;			/* stop looking */
+	 }
+	 so = sj;		/* previous candidate best */
+       }
+     si++;				/* increment to snext alignment */
+   }
+
+   /* we have the best set of alignments, write them to *res */
+   if (start != NULL) {
+     res = f_str->res;	/* set a destination for the alignment ops */
+     tres = nres = 0;	/* alignment op length = 0 */
+     aa0p = aa0;	/* point into query (needed for calcons later) */
+     a_res->min1 = start->vp->start;	/* start in library */
+     a_res->min0 = 0;			/* start in query */
+     for (sj = start; sj != NULL; sj = sj->aprev ) {
+       doff = (int)(aa0p-aa0) - (sj->vp->start-sj->vp->dp+f_str->noff);
+       /*
+       fprintf(stderr,"doff: %3d\n",doff);
+       */
+       for (dx=sj->vp->start,sx=sj->vp->start-sj->vp->dp+f_str->noff;
+	    dx <= sj->vp->stop; dx++) {
+	 *aa0p++ = f_str->aa0t[sx++];	/* copy residue into aa0 */
+	 tres++;			/* bump alignment counter */
+	 res[nres++] = 0;		/* put 0-op in res */
+       }
+       sj->vp->dp -= doff;
+       if (sj->aprev != NULL) {
+	 if (sj->aprev->vp->start - sj->vp->stop - 1 > 0 )
+	 /* put an insert op into res to get to next aligned block */
+	   tres += res[nres++] = (sj->aprev->vp->start - sj->vp->stop - 1);
+       }
+       /*
+       fprintf(stderr,"t0: %3d, tx: %3d, l0: %3d, lx: %3d, dp: %3d noff: %3d, score: %3d\n",
+	       sj->vp->start - sj->vp->dp + f_str->noff,
+	       sj->vp->stop - sj->vp->dp + f_str->noff,
+	       sj->vp->start,sj->vp->stop,sj->vp->dp,
+	       f_str->noff,sj->vp->score);
+       fprintf(stderr,"%3d - %3d: %3d\n",
+	       sj->vp->start,sj->vp->stop,sj->vp->score);
+       */
+       a_res->max1 = sj->vp->stop;
+       a_res->max0 = a_res->max1 - sj->vp->dp + f_str->noff;
+     }
+
+     /*
+     fprintf(stderr,"(%3d - %3d):(%3d - %3d)\n",
+     a_res->min0,a_res->max0,a_res->min1,a_res->max1);
+     */
+
+     /* now replace f_str->aa0t with aa0 */
+     for (i=0; i<n0; i++) f_str->aa0t[i] = aa0[i];
+
+     return tres;
+   }
+   else return (0);
+}
+
+/* calculate the 100% identical score */
+int
+shscore(unsigned char *aa0, int n0, int **pam2, int nsq)
+{
+  int i, sum;
+  for (i=0,sum=0; i<n0; i++)
+    if (aa0[i]!=0 && aa0[i]<=nsq) sum += pam2[aa0[i]][aa0[i]];
+  return sum;
+}
+
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+
+#ifdef TFAST
+  f_str->n10=aatran(aa1,f_str->aa1x,n1,frame);
+#endif
+}
+
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void 
+aln_func_vals(int frame, struct a_struct *aln) {
+
+#ifdef TFAST
+  aln->qlrev = 0;
+  aln->qlfact = 1;
+  aln->llfact = aln->llmult = 3;
+  aln->frame = 0;
+  if (frame > 3) aln->llrev = 1;
+#else	/* FASTF */
+  aln->llfact = aln->qlfact = aln->llmult = 1;
+  aln->llrev = aln->qlrev = 0;
+  aln->frame = 0;
+#endif
+}
+
+void aa0shuffle(unsigned char *aa0, int n0, struct f_struct *f_str) {
+
+  int i, j, k;
+  unsigned char tmp;
+
+  for (i = f_str->nmoff-1 ; --i ; ) {
+
+    /* j = nrand(i); if (i == j) continue;*/       /* shuffle columns */ 
+    j = (f_str->nmoff - 2) - i; if (i <= j) break; /* reverse columns */
+
+    /* swap all i'th column residues for all j'th column residues */
+    for(k = 0 ; k < f_str->nm0 ; k++) {
+      tmp = aa0[(k * (f_str->nmoff)) + i];
+      aa0[(k * (f_str->nmoff)) + i] = aa0[(k * (f_str->nmoff)) + j];
+      aa0[(k * (f_str->nmoff)) + j] = tmp;
+    }
+  }
+}
diff --git a/src/dropfs2.c b/src/dropfs2.c
new file mode 100644
index 0000000..516037e
--- /dev/null
+++ b/src/dropfs2.c
@@ -0,0 +1,1681 @@
+/*   $Id: dropfs2.c 1254 2014-01-29 16:03:40Z wrp $ */
+/* $Revision: 1254 $  */
+
+/* copyright (c) 1998, 1999, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* changed to return 2.0, rather than -1.0, for failure */
+
+/* Feb 4, 2005 - modifications to allow searches with ktup=2 for very
+   long queries.  This is a temporary solution to savemax(), spam()
+   which do not preserve exact matches
+
+   do_fasts() has been modified to allow higher maxsav for do_walign
+   than for do_work (2*nsegs, 6*nsegs)
+ */
+
+/* this code implements the "fasts" algorithm, which compares a set of
+   protein fragments to a protein sequence.  Comma's are used to separate
+   the sequence fragments, which need not be the same length.
+
+   The expected input is:
+
+   >mgstm1
+   MGDAPDFD,
+   MILGYW,
+   MLLEYTDS
+
+   The fragments do not need to be in the correct order (which is
+   presumably unknown from the peptide sequencing.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+
+#include "defs.h"
+#include "param.h"
+#include "tatstats.h"
+
+#define EOSEQ 0
+#define ESS 59
+#define NMAP_X 21 /* for 'X' - changed for NCBI */
+#define NMAP_Z 25 /* for '*' - changed for NCBI */
+#define MAXHASH 32
+#define NMAP MAXHASH+1
+
+static char *verstr="4.32 Feb 2007";
+
+#define DROP_INTERN
+#include "drop_func.h"
+
+int shscore(const unsigned char *aa0, const int n0, int **pam2, int nsq);
+extern void aancpy(char *to, char *from, int count, struct pstruct *ppst);
+
+#ifdef TFAST
+extern int aatran(const unsigned char *ntseq, unsigned char *aaseq, const int maxs, const int frame);
+#endif
+
+void savemax(struct dstruct *, struct f_struct *, int maxsav, int exact,int t_end);
+
+int spam(const unsigned char *, const unsigned char *, int, struct savestr *, int **, struct f_struct *);
+int sconn(struct savestr **v,
+	  int nsave,
+	  struct f_struct *,
+	  struct rstruct *,
+	  const struct pstruct *,
+	  const unsigned char *aa0, int n0,
+	  const unsigned char *aa1, int n1,
+	  int opt_prob);
+
+void kpsort(struct savestr **, int);
+void kssort(struct savestr **, int);	/* sort by score */
+int sconn_a(unsigned char *, int, 
+	    const unsigned char *, int,
+	    struct f_struct *, 
+	    struct a_res_str *,
+	    struct pstruct *);
+void kpsort(struct savestr **, int);
+
+/* initialize for fasta */
+
+void
+init_work (unsigned char *aa0, const int n0, 
+	   struct pstruct *ppst,
+	   struct f_struct **f_arg
+	   )
+{
+   int mhv, phv;
+   int hmax, nsegs;
+   int i0, ib, hv, old_hv;
+   int pamfact;
+   struct f_struct *f_str;
+   /* these used to be globals, but do not need to be */
+   int ktup, fact, kt1;
+
+   int maxn0;
+   int stmp;	/* temporary score */
+   int tmp_zsflag;
+   int i, j, q;
+   int tat_size;
+   int *res;
+
+   unsigned char *query;
+   int k, l, m, n, N, length, index;
+
+   double *tatprobptr;
+
+   f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+
+   ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
+   ktup = ppst->param_u.fa.ktup;
+   if ( ktup  > ppst->param_u.fa.bktup ) {
+     ktup = ppst->param_u.fa.ktup = ppst->param_u.fa.bktup;
+   }
+   fact = ppst->param_u.fa.scfact;
+
+   /* fasts3 cannot work with lowercase symbols as low complexity;
+      thus, NMAP must be disabled; this depends on aascii['X']  */
+   if (ppst->hsq[NMAP_X] == NMAP ) {ppst->hsq[NMAP_X]=1;}
+   if (ppst->hsq[NMAP_Z] == NMAP ) {ppst->hsq[NMAP_Z]=1;}
+   /* this does not work in a threaded environment */
+   /*    else {fprintf(stderr," cannot find 'X'==NMAP\n");} */
+
+   for (i0 = 1, mhv = -1; i0 < ppst->nsq; i0++)
+      if (ppst->hsq[i0] < NMAP && ppst->hsq[i0] > mhv)  mhv = ppst->hsq[i0];
+
+   if (mhv <= 0) {
+      fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
+      exit (1);
+   }
+
+   for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;
+
+/*      kshft = 2;	*/
+   kt1 = ktup-1;
+   hv = 1;
+   for (i0 = 0; i0 < ktup; i0++) hv = hv << f_str->kshft;
+   hmax = hv;
+   f_str->hmask = (hmax >> f_str->kshft) - 1;
+
+   if ((f_str->aa0t = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
+     fprintf (stderr, " cannot allocate f_str0->aa0t array; %d\n",n0+1);
+     exit (1);
+   }
+
+   if ((f_str->aa0ti = (int *) calloc(n0+1, sizeof(int))) == NULL) {
+     fprintf (stderr, " cannot allocate f_str0->aa0ti array; %d\n",n0+1);
+     exit (1);
+   }
+
+   if ((f_str->aa0b = (int *) calloc(n0+1, sizeof(int))) == NULL) {
+     fprintf (stderr, " cannot allocate f_str0->aa0b array; %d\n",n0+1);
+     exit (1);
+   }
+
+   if ((f_str->aa0e = (int *) calloc(n0+1, sizeof(int))) == NULL) {
+     fprintf (stderr, " cannot allocate f_str0->aa0e array; %d\n",n0+1);
+     exit (1);
+   }
+
+   if ((f_str->aa0i = (int *) calloc(n0+1, sizeof(int))) == NULL) {
+     fprintf (stderr, " cannot allocate f_str0->aa0i array; %d\n",n0+1);
+     exit (1);
+   }
+
+   if ((f_str->aa0s = (int *) calloc(n0+1, sizeof(int))) == NULL) {
+     fprintf (stderr, " cannot allocate f_str0->aa0s array; %d\n",n0+1);
+     exit (1);
+   }
+
+   if ((f_str->aa0l = (int *) calloc(n0+1, sizeof(int))) == NULL) {
+     fprintf (stderr, " cannot allocate f_str0->aa0l array; %d\n",n0+1);
+     exit (1);
+   }
+
+   if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate hash array: hmax: %d hmask: %d\n",
+	      hmax, f_str->hmask);
+     exit (1);
+   }
+   if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate pamh1 array\n");
+     exit (1);
+   }
+   if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate pamh2 array\n");
+     exit (1);
+   }
+
+   if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate hash link array");
+     exit (1);
+   }
+
+   /* for FASTS/FASTM, we want to know when we get to the end of a peptide,
+      so we can ensure that we set the end and restart */
+
+   if ((f_str->l_end = (int *) calloc (n0, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate link end array");
+     exit (1);
+   }
+
+   for (i0 = 0; i0 < hmax; i0++) f_str->harr[i0] = -1;
+   for (i0 = 0; i0 < n0; i0++) f_str->link[i0] = -1;
+   for (i0 = 0; i0 < n0; i0++) f_str->l_end[i0] = 0;
+
+   /* count the number of peptides */
+   nsegs = 1;
+   for (i0 = 0; i0 < n0; i0++) {
+     if (aa0[i0] == ESS || aa0[i0] == 0) nsegs++;
+   }
+
+   /* allocate space for peptides offsets, nm_u */
+   if ((f_str->nmoff = (int *)calloc(nsegs+1, sizeof(int)))==NULL) {
+     fprintf(stderr, " cannot allocat nmoff array: %d\n", nsegs);
+     exit(1);
+   }
+
+   if ((f_str->nm_u = (int *)calloc(nsegs+1, sizeof(int)))==NULL) {
+     fprintf(stderr, " cannot allocat nm_u array: %d\n", nsegs);
+     exit(1);
+   }
+
+   phv = hv = 0;
+   f_str->nmoff[0] = 0;
+   f_str->nm0 = 1;
+
+   /* encode the aa0 array */
+   if (kt1 > 0) {
+     hv = ppst->hsq[aa0[0]];
+     phv = ppst->pam2[0][aa0[0]][aa0[0]];
+   }
+
+   for (i0=kt1 ; i0 < n0; i0++) {
+     if (aa0[i0] == ESS || aa0[i0] == 0) {
+       /*       fprintf(stderr," converted %d to 0\n",aa0[i0]); */
+       aa0[i0] = EOSEQ;	/* set ESS to 0 */
+       f_str->nmoff[f_str->nm0++] = i0+1; 
+       f_str->l_end[i0-1] = 1;
+       phv = hv = 0;
+       if (kt1 > 0) {
+	 i0++;
+	 hv = ppst->hsq[aa0[i0]];
+	 phv = ppst->pam2[0][aa0[i0]][aa0[i0]];
+       }
+       continue;
+     }
+
+     hv = ((hv & f_str->hmask) << f_str->kshft) + ppst->hsq[aa0[i0]];
+     f_str->link[i0] = f_str->harr[hv];
+     f_str->harr[hv] = i0;
+     f_str->pamh2[hv] = (phv += ppst->pam2[0][aa0[i0]][aa0[i0]]);
+     phv -= ppst->pam2[0][aa0[i0 - kt1]][aa0[i0 - kt1]];
+   }
+   f_str->l_end[n0-1] = 1;
+
+   f_str->nmoff[f_str->nm0] = n0+1;
+
+   /*
+#ifdef DEBUG
+   fprintf(stderr, ">>%s\n",qtitle);
+   for (j=0; j<f_str->nm0; j++) {
+     for (i=f_str->nmoff[j]; i < f_str->nmoff[j+1]-1; i++) {
+       fprintf(stderr,"%c",ppst->sq[aa0[i]]);
+     }
+     fprintf(stderr," %d\n",aa0[i]);
+   }
+
+   for (j=1; j<=ppst->nsq; j++) {
+     fprintf(stderr, "%c %d\n", ppst->sq[j], f_str->harr[j]);
+   }
+
+   for (j=0; j<=n0; j++) {
+     fprintf(stderr, "%c %d\n", ppst->sq[aa0[j]], f_str->link[j]);
+   }
+
+#endif
+   */
+
+   /* build an integer array of the max score that can be achieved
+      from that position - use in savemax to mark some segments as
+      fixed */
+
+   /* setup aa0b[], aa0e[], which specify the begining and end of each
+      segment */
+
+   stmp = 0;
+   q = -1;
+   for (ib = i0 = 0; i0 < n0; i0++) {
+     f_str->aa0l[i0] = i0 - q;
+     if (aa0[i0]==EOSEQ) {
+       f_str->aa0b[i0] = -1;
+       f_str->aa0e[i0] = -1;
+       f_str->aa0i[i0] = -1;
+       f_str->aa0l[i0] = -1;
+       q = i0;
+       if (i0 > 0)f_str->aa0s[i0-1] = stmp;
+       stmp = 0;
+       ib++;
+     }
+     else {
+       stmp += ppst->pam2[0][aa0[i0]][aa0[i0]];
+     }
+
+     f_str->aa0b[i0] =  f_str->nmoff[ib];
+     f_str->aa0e[i0] =  f_str->nmoff[ib+1]-2;
+     f_str->aa0i[i0] =  ib;
+
+     /*
+     fprintf(stderr,"%2d %c: %2d %2d %2d\n",i0,ppst->sq[aa0[i0]],
+	     f_str->aa0b[i0],f_str->aa0e[i0],f_str->aa0i[i0]);
+     */
+   }
+   f_str->aa0s[n0-1]=stmp;	/* save last best possible score */
+
+   /* maxsav - maximum number of peptide alignments saved in search */
+   /* maxsav_w - maximum number of peptide alignments saved in
+      alignment */
+
+   f_str->maxsav = max(MAXSAV,2*f_str->nm0);
+   f_str->maxsav_w = max(MAXSAV,6*f_str->nm0);
+
+   if ((f_str->vmax = (struct savestr *)
+	calloc(f_str->maxsav_w,sizeof(struct savestr)))==NULL) {
+     fprintf(stderr, "Couldn't allocate vmax[%d].\n",f_str->maxsav_w);
+     exit(1);
+   }
+
+   if ((f_str->vptr = (struct savestr **)
+	calloc(f_str->maxsav_w,sizeof(struct savestr *)))==NULL) {
+     fprintf(stderr, "Couldn't allocate vptr[%d].\n",f_str->maxsav_w);
+     exit(1);
+   }
+
+   if ((f_str->sarr = (struct slink *)
+	calloc(f_str->maxsav_w,sizeof(struct slink)))==NULL) {
+     fprintf(stderr, "Couldn't allocate sarr[%d].\n",f_str->maxsav_w);
+     exit(1);
+   }
+
+   /* Tatusov Statistics Setup */
+
+   /* initialize priors array. */
+   if((f_str->priors = (double *)calloc(ppst->nsq+1, sizeof(double))) == NULL) {
+     fprintf(stderr, "Couldn't allocate priors array.\n");
+     exit(1);
+   }
+
+   calc_priors(f_str->priors, ppst, f_str, NULL, 0, ppst->pseudocts);
+
+   /* pre-calculate the Tatusov probability array for each full segment */
+
+   tmp_zsflag = ppst->zsflag;
+   if (tmp_zsflag > 20) tmp_zsflag -= 20;
+   if (tmp_zsflag > 10) tmp_zsflag -= 10;
+   if (tmp_zsflag >= 1 && tmp_zsflag <= 3 && f_str->nm0 <= 10) {
+
+     tat_size = (1<<f_str->nm0) -1;
+     f_str->dotat = 1;
+     f_str->tatprobs = (struct tat_str **) malloc((size_t)tat_size*sizeof(struct tat_str *));
+     if (f_str->tatprobs == NULL) {
+       fprintf (stderr, " cannot allocate tatprobs array: %ld\n",
+		tat_size * sizeof(struct tat_str *));
+       exit (1);
+     }
+
+     f_str->intprobs = (double **) malloc((size_t)tat_size * sizeof(double *));
+     if(f_str->intprobs == NULL) {
+       fprintf(stderr, "Couldn't allocate intprobs array.\n");
+       exit(1);
+     }
+
+     for(k = 0, l = f_str->nm0 ; k < l ; k++) {
+       query = &(aa0[f_str->nmoff[k]]);
+       length = f_str->nmoff[k+1] - f_str->nmoff[k] - 1;
+
+       /* this segment alone */
+       index = (1 << k) - 1;
+       generate_tatprobs(query, 0, length - 1, f_str->priors, ppst->pam2[0], ppst->nsq, &(f_str->tatprobs[index]), NULL);
+
+       /* integrate the probabilities */
+       N = f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore;
+       tatprobptr = (double *) calloc(N+1, sizeof(double));
+       if(tatprobptr == NULL) {
+	 fprintf(stderr, "Couldn't calloc tatprobptr.\n");
+	 exit(1);
+       }
+       f_str->intprobs[index] = tatprobptr;
+
+       for (i = 0; i <= N ; i++ ) {
+	 tatprobptr[i] = f_str->tatprobs[index]->probs[i];
+	 for (j = i + 1 ; j <= N ; j++ ) {
+	   tatprobptr[i] += f_str->tatprobs[index]->probs[j];
+	 }
+       }
+
+       /* this segment built on top of all other subcombinations */
+       for(i = 0, j = (1 << k) - 1 ; i < j ; i++) {
+	 index = (1 << k) + i;
+	 generate_tatprobs(query, 0, length - 1, f_str->priors, ppst->pam2[0], ppst->nsq, &(f_str->tatprobs[index]), f_str->tatprobs[i]);
+
+	 /* integrate the probabilities */
+	 N = f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore;
+	 tatprobptr = (double *) calloc(N+1, sizeof(double));
+	 if(tatprobptr == NULL) {
+	   fprintf(stderr, "Couldn't calloc tatprobptr.\n");
+	   exit(1);
+	 }
+	 f_str->intprobs[index] = tatprobptr;
+       
+	 for (m = 0; m <= N ; m++ ) {
+	   tatprobptr[m] = f_str->tatprobs[index]->probs[m];
+	   for (n = m + 1 ; n <= N ; n++ ) {
+	     tatprobptr[m] += f_str->tatprobs[index]->probs[n];
+	   }
+	 }
+       }
+     }
+   } else {
+     f_str->dotat = 0;
+     f_str->shuff_cnt = ppst->shuff_node;
+   }
+
+   /* End of Tatusov Statistics Setup */
+
+   /*
+   for (i0=1; i0<=ppst->nsq; i0++) {
+     fprintf(stderr," %c: %2d ",ppst->sq[i0],f_str->harr[i0]);
+     hv = f_str->harr[i0];
+     while (hv >= 0) {
+       fprintf(stderr," %2d",f_str->link[hv]);
+       hv = f_str->link[hv];
+     }
+     fprintf(stderr,"\n");
+   }
+   */
+
+/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
+   pam2[0][0] is now undefined for consistency with blast
+*/
+   for (i0 = 1; i0 <= ppst->nsq; i0++)
+     f_str->pamh1[i0] = ppst->pam2[0][i0][i0];
+
+   f_str->ndo = 0;
+   f_str->noff = n0-1;
+   if (f_str->diag==NULL) 
+     f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
+					      sizeof (struct dstruct));
+   if (f_str->diag == NULL) {
+      fprintf (stderr, " cannot allocate diagonal arrays: %ld\n",
+	      (long) MAXDIAG * (long) (sizeof (struct dstruct)));
+      exit (1);
+   }
+
+#ifdef TFAST
+   if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
+					     sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
+     exit (1);
+   }
+   f_str->aa1x++;
+#endif
+
+   maxn0 = max(3*n0/2,MIN_RES);
+   if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
+     fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
+     exit(1);
+   }
+   f_str->res = res;
+   f_str->max_res = maxn0;
+
+   *f_arg = f_str;
+}
+
+
+/* pstring1 is a message to the manager, currently 512 */
+/* pstring2 is the same information, but in a markx==10 format */
+void
+get_param (const struct pstruct *ppstr,
+	   char **pstring1, char *pstring2,
+	   struct score_count_s *s_info)
+{
+#ifdef FASTS
+#ifndef TFAST
+  char *pg_str="FASTS";
+#else
+  char *pg_str="TFASTS";
+#endif
+#endif
+
+#ifdef FASTM
+#ifndef TFAST
+  char *pg_str="FASTM";
+#else
+  char *pg_str="TFASTM";
+#endif
+#endif
+
+  sprintf (pstring1[0], "%s (%s)",pg_str,verstr);
+  sprintf (pstring1[1], "%s matrix (%d:%d), ktup=%d",
+	   ppstr->pam_name, ppstr->pam_h,ppstr->pam_l, ppstr->param_u.fa.ktup);
+
+  if (ppstr->param_u.fa.iniflag) strcat(pstring1[0]," init1");
+
+  if (pstring2 != NULL) {
+    sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)\n\
+; pg_gap-pen: %d %d\n; pg_ktup: %d\n",
+	     pg_str,verstr,ppstr->pam_name, ppstr->pam_h,ppstr->pam_l, ppstr->gdelval,
+	     ppstr->ggapval,ppstr->param_u.fa.ktup);
+   }
+}
+
+void
+close_work (const unsigned char *aa0, const int n0,
+	    struct pstruct *ppst,
+	    struct f_struct **f_arg)
+{
+  struct f_struct *f_str;
+  int i, j;
+
+  f_str = *f_arg;
+
+  if (f_str != NULL) {
+
+    free(f_str->res);
+#ifdef TFAST
+    free(f_str->aa1x - 1); /* because f_str->aa1x got ++'ed when allocated! */
+#endif
+    free(f_str->diag);
+    free(f_str->l_end);
+    free(f_str->link);
+    free(f_str->pamh2); 
+    free(f_str->pamh1);
+    free(f_str->harr);
+    free(f_str->vmax);
+    free(f_str->vptr);
+    free(f_str->sarr);
+    free(f_str->aa0l);
+    free(f_str->aa0s);
+    free(f_str->aa0i);
+    free(f_str->aa0e);
+    free(f_str->aa0b);
+    free(f_str->aa0ti);
+    free(f_str->aa0t);
+    free(f_str->nmoff);
+    free(f_str->nm_u);
+
+    if(f_str->dotat) {
+      for(i = 0, j = (1 << f_str->nm0) - 1 ; i < j ; i++) {
+	free(f_str->tatprobs[i]->probs);
+	free(f_str->tatprobs[i]);
+	free(f_str->intprobs[i]);
+      }
+      free(f_str->tatprobs);
+      free(f_str->intprobs);
+    }
+
+    free(f_str->priors);
+    free(f_str);
+    *f_arg = NULL;
+  }
+}
+
+void do_fasts (const unsigned char *aa0, const int n0,
+	       const unsigned char *aa1, const int n1,
+	       const struct pstruct *ppst, struct f_struct *f_str,
+	       struct rstruct *rst, int *hoff, int opt_prob,
+	       int maxsav)
+{
+   int     nd;		/* diagonal array size */
+   int     lhval;
+   int     kfact;
+   register struct dstruct *dptr;
+   register int tscor;
+   register struct dstruct *diagp;
+   struct dstruct *dpmax;
+   register int lpos;
+   int     tpos;
+   struct savestr *vmptr, *vmaxmax;
+   int     scor, tmp;
+   int     im, ib, nsave, i;
+   int     cmps ();		/* comparison routine for ksort */
+   int ktup;
+   int doffset;
+
+
+   vmaxmax = &f_str->vmax[maxsav];
+
+   ktup = ppst->param_u.fa.ktup;
+
+   if (n1 < ktup) {
+     rst->score[0] = rst->score[1] = rst->score[2] = 0;
+     rst->escore = 1.0;
+     rst->segnum = 0;
+     rst->seglen = 0;
+     return;
+   }
+
+   if (n0+n1+1 >= MAXDIAG) {
+     fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
+     rst->score[0] = rst->score[1] = rst->score[2] = -1;
+     rst->escore = 2.0;
+     rst->segnum = 0;
+     rst->seglen = 0;
+     return;
+   }
+
+   nd = n0 + n1;
+
+   dpmax = &f_str->diag[nd];
+   for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
+   {
+      dptr->stop = -1;
+      dptr->dmax = NULL;
+      dptr++->score = 0;
+   }
+
+   for (vmptr = f_str->vmax; vmptr < vmaxmax; vmptr++) {
+      vmptr->score = 0;
+      vmptr->exact = 0;
+   }
+   f_str->lowmax = f_str->vmax;
+   f_str->lowscor = 0;
+
+   /* start hashing */
+   diagp = &f_str->diag[f_str->noff];
+   for (lhval=lpos=0; lpos < n1; lpos++, diagp++) {
+     if (ppst->hsq[aa1[lpos]]>=NMAP) {	/* skip residue */
+       lpos++ ; diagp++;
+       while (lpos < n1 && ppst->hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
+       if (lpos >= n1) break;
+       lhval = 0;
+     }
+
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+
+     for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+
+       dptr = &diagp[-tpos];
+
+       if (f_str->l_end[tpos]) {
+	 if (dptr->score + f_str->pamh1[aa0[tpos]] == f_str->aa0s[tpos]) {
+	   dptr->stop = lpos;
+	   dptr->score = f_str->aa0s[tpos];
+	   savemax(dptr, f_str, maxsav, 1, tpos);
+	   dptr->dmax = NULL;
+	 }
+
+	 else if (dptr->score + f_str->pamh1[aa0[tpos]] > f_str->aa0s[tpos]) {
+	   /*
+	   fprintf(stderr,"exact match score too high: %d:%d %d < %d + %d - %d:%d - %d > %d\n",
+		   tpos, lpos, f_str->aa0s[tpos],dptr->score, f_str->pamh1[aa0[tpos]],
+		   dptr->start, dptr->stop,
+		   dptr->stop - dptr->start, f_str->aa0l[tpos]);
+	   */
+	   dptr->stop = lpos;
+	   dptr->start = lpos - f_str->aa0l[tpos];
+	   dptr->score = f_str->aa0s[tpos];
+	   savemax(dptr, f_str, maxsav, 1, tpos);
+	   dptr->dmax = NULL;
+	 }
+       }
+       else if ((tscor = dptr->stop) >= 0) {
+	 tscor++;	/* tscor is stop of current, increment it */
+	 if ((tscor -= lpos) <= 0) {  /* tscor, the end of the current
+					 match, is before lpos, so there
+					 is a mismatch - this is also the
+					 mismatch cost */
+	   tscor *= 2;
+	   scor = dptr->score;	/* save the run score on the diag */
+	   if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 
+	       && f_str->lowscor < scor) {
+	     /* if what we will get (tscor + kfact) is < 0 and the
+		score is better than the worst savemax() score, save
+		it */
+	     savemax (dptr, f_str, maxsav,0,-1);
+	   }
+
+	   /* if extending is better than starting over, extend */
+	   if ((tscor += scor) >= kfact) {
+	     dptr->score = tscor;
+	     dptr->stop = lpos;
+	     if (f_str->l_end[tpos]) {
+	       if (dptr->score == f_str->aa0s[tpos]) {
+		 savemax(dptr, f_str, maxsav,1,tpos);
+		 dptr->dmax = NULL;
+	       }
+	       else if (dptr->score > f_str->lowscor)
+		 savemax(dptr, f_str, maxsav,0,tpos);
+	     }
+	   }
+	   else {     /* otherwise, start new */
+	     dptr->score = kfact;
+	     dptr->start = dptr->stop = lpos;
+	   }
+	 } 
+	 else { /* tscor is after lpos, so extend one residue */
+	   dptr->score += f_str->pamh1[aa0[tpos]];
+	   dptr->stop = lpos;
+	   if (f_str->l_end[tpos]) {
+	     if (dptr->score == f_str->aa0s[tpos]) {
+	       savemax(dptr, f_str, maxsav,1,tpos);
+	       dptr->dmax = NULL;
+	     }
+	     else if (dptr->score > f_str->lowscor)
+	       savemax(dptr, f_str, maxsav,0,tpos);
+	   }
+	 }
+       }
+       else {	/* start new */
+	 dptr->score = f_str->pamh2[lhval];
+	 dptr->start = dptr->stop = lpos;
+       }
+     }				/* end tpos */
+   }				/* end lpos */
+
+   for (dptr = f_str->diag; dptr < dpmax;) {
+     if (dptr->score > f_str->lowscor) savemax (dptr, f_str, maxsav,0,-1);
+     dptr->stop = -1;
+     dptr->dmax = NULL;
+     dptr++->score = 0;
+   }
+   f_str->ndo = nd;
+
+/*
+        at this point all of the elements of aa1[lpos]
+        have been searched for elements of aa0[tpos]
+        with the results in diag[dpos]
+*/
+
+   for (nsave=0, vmptr=f_str->vmax; vmptr< vmaxmax; vmptr++) {
+      if (vmptr->score > 0) {
+	/*
+
+	fprintf(stderr,"%c 0: %4d-%4d  1: %4d-%4d  dp: %d score: %d",
+		(vmptr->exact ? 'x' : ' '),
+		f_str->noff+vmptr->start-vmptr->dp,
+		f_str->noff+vmptr->stop-vmptr->dp,
+		vmptr->start,vmptr->stop,
+		vmptr->dp,vmptr->score);
+	*/
+	vmptr->score = spam (aa0, aa1, n1, vmptr, ppst->pam2[0], f_str);
+	/*
+	fprintf(stderr,"  sscore: %d %d-%d\n",vmptr->score,vmptr->start,vmptr->stop);
+	*/
+	if (vmptr->score > 0) f_str->vptr[nsave++] = vmptr;
+      }
+   }
+
+   if (nsave <= 0) {
+     rst->score[0] = rst->score[1] = rst->score[2] = 0;
+     rst->escore = 1.0;
+     rst->segnum = 0;
+     rst->seglen = 0;
+     f_str->nsave = 0;
+     return;
+   }
+
+   /*
+   fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,f_str->noff);
+   for (ib=0; ib<nsave; ib++) {
+     fprintf(stderr,"%c 0: %4d-%4d  1: %4d-%4d  dp: %d score: %d\n",
+	     f_str->vptr[ib]->exact ? 'x' : ' ',
+	     f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+	     f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+	     f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+	     f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+   }
+
+   fprintf(stderr,"---\n");
+   */
+   kssort(f_str->vptr,nsave);
+
+   /* make certain each seg is used only once */
+
+   for (ib=0; ib<f_str->nm0; ib++) f_str->nm_u[ib]=0;
+   for (ib=0; ib < nsave; ib++) {
+     doffset = f_str->vptr[ib]->dp - f_str->noff;
+     tpos=f_str->aa0i[f_str->vptr[ib]->start - doffset];
+     if (f_str->nm_u[tpos] == 0) {
+       f_str->nm_u[tpos]=1;
+     } else {
+       f_str->vptr[ib]->score = -1;
+     }
+   }
+
+   kssort(f_str->vptr,nsave);
+   for (ib = nsave-1; ib >= 0; ib--) {
+     if (f_str->vptr[ib]->score > -1) break;
+   }
+   nsave = ib+1;
+
+#ifdef DEBUG
+   /*
+   for (ib = 0; ib < nsave; ib++) {
+     if (f_str->vptr[ib]->score > 1000) {
+       fprintf(stderr," score[%d] too high: %d\n",ib, f_str->vptr[ib]->score);
+       for (i=0; i< 10; i++) {
+	 fprintf(stderr, "%c:%d ",ppst->sq[aa1[i]],aa1[i]);
+       }
+       fprintf(stderr,"\n");
+
+       f_str->vptr[ib]->score = 0;
+     }
+   }
+   */
+#endif
+
+   scor = sconn (f_str->vptr, nsave, 
+		 f_str, rst, ppst, aa0, n0, aa1, n1,
+		 opt_prob);
+
+   if (rst->escore < 0.0) rst->escore = 2.0;
+   kssort(f_str->vptr,nsave);
+
+   /* here we should use an nsave that is consistent with sconn and nm0 */
+
+   f_str->nsave = nsave;
+   if (nsave > f_str->nm0) f_str->nsave = f_str->nm0;
+
+   rst->score[1] = f_str->vptr[0]->score;
+   rst->score[0] = rst->score[2] = max(scor, f_str->vptr[0]->score);
+
+}
+
+void do_work (const unsigned char *aa0, const int n0,
+	      const unsigned char *aa1, const int n1,
+	      int frame,
+	      const struct pstruct *ppst, struct f_struct *f_str,
+	      int qr_flg, int shuff_flg, struct rstruct *rst,
+	      struct score_count_s *s_info)
+{
+  int opt_prob;
+  int hoff, n10, i;
+
+  if (qr_flg==1 && f_str->shuff_cnt <= 0) {
+    rst->valid_stat = 0;
+    rst->escore = 2.0;
+    rst->score[0]=rst->score[1]=rst->score[2]= -1;
+    return;
+  }
+  rst->valid_stat = 1;
+
+  s_info->s_cnt[ppst->score_ix]++;
+  s_info->tot_scores++;
+
+  if (f_str->dotat || ppst->zsflag == 4 || ppst->zsflag == 14 ) opt_prob=1;
+  else opt_prob = 0;
+  if (ppst->zsflag == 2 || ppst->zsflag == 12) opt_prob = 0;
+  if (qr_flg) {
+    opt_prob=1;
+    /*    if (frame==1) */
+      f_str->shuff_cnt--;
+  }
+
+  if (n1 < ppst->param_u.fa.ktup) {
+    rst->score[0] = rst->score[1] = rst->score[2] = -1;
+    rst->escore = 2.0;
+    return;
+  }
+#ifdef TFAST
+  n10=aatran(aa1,f_str->aa1x,n1,frame);
+  if (ppst->debug_lib)
+    for (i=0; i<n10; i++)
+      if (f_str->aa1x[i]>ppst->nsq) {
+	fprintf(stderr,
+		"residue[%d/%d] %d range (%d)\n",i,n1,
+		f_str->aa1x[i],ppst->nsq);
+	f_str->aa1x[i]=0;
+	n10=i-1;
+      }
+
+  do_fasts (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, opt_prob, f_str->maxsav);
+#else	/* FASTA */
+  do_fasts (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, opt_prob, f_str->maxsav);
+#endif
+
+  rst->comp = rst->H = -1.0;
+}
+
+void do_opt (const unsigned char *aa0, const int n0,
+	     const unsigned char *aa1, const int n1,
+	     int frame,
+	     struct pstruct *ppst, struct f_struct *f_str,
+	     struct rstruct *rst)
+{
+  int lag, tscore, hoff, n10;
+
+#ifdef TFAST
+  n10=aatran(aa1,f_str->aa1x,n1,frame);
+  do_fasts (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, 1, f_str->maxsav);
+#else	/* FASTA */
+  do_fasts(aa0,n0,aa1,n1,ppst,f_str,rst, &hoff, 1, f_str->maxsav);
+#endif
+}
+
+
+/* modify savemax() so that full length 100% matches are marked
+   so that they cannot be removed - if we have a 100% match, mark "exact"
+
+   modify savemax() to split alignments that include a comma
+*/
+
+/* savemax(dptr, f_str, maxsav) takes a current diagonal run (saved in dptr),
+   and places it in the set of runs to be saved (in  f_str->vmax[])
+*/
+
+void 
+savemax (struct dstruct *dptr, struct f_struct *f_str, int maxsav,
+	 int exact, int tpos)
+{
+  register int dpos;	/* position along the diagonal, -n0 .. n1 */
+  int i, j, lowj;
+  register struct savestr *vmptr;
+  struct savestr *vmaxmax;
+
+  vmaxmax = &f_str->vmax[maxsav];
+
+  dpos = (int) (dptr - f_str->diag);	/* current diagonal */
+
+/* check to see if this is the continuation of a run that is already saved */
+/* if we are at the end of the query, save it regardless */
+
+/*  if (t_end > 0 && t_end < dptr->stop - dptr->start) {return;} */
+
+  if ((vmptr = dptr->dmax) != NULL	/* have an active run */
+      && vmptr->dp == dpos &&		/* on the correct diagonal */
+      vmptr->start == dptr->start) {	/* and it starts at the same place */
+    vmptr->stop = dptr->stop;	/* update the end of the match in vmax[] */
+
+    if (exact == 1) {
+    /*
+      fprintf(stderr,"have cont exact match: %d - %d:%d %d:%d = %d\n",
+	      dptr->score, dptr->start, dptr->stop,
+	      vmptr->start, vmptr->stop, dptr->stop - dptr->start+1);
+    */
+      exact = 1;
+    }
+
+
+/* if the score is worse, don't update, return - if the score gets bad
+   enough, it will restart in the diagonal scan */
+    if ((i = dptr->score) <= vmptr->score) { return;} 
+
+/* score is better, update */
+    vmptr->score = i;
+
+    vmptr->exact = exact;
+/* if the score is not the worst, return */
+    if (vmptr != f_str->lowmax) { return;}
+  }
+  else {	/* not a continuation */
+    /* save in the lowest place */
+    /*
+    fprintf(stderr," Replacing: %d - %d:%d => %d - %d:%d",
+	    f_str->lowmax->score, f_str->lowmax->start, f_str->lowmax->stop,
+	    dptr->score, dptr->start, dptr->stop);
+    */
+
+    vmptr = f_str->lowmax;
+
+    /*
+    if (exact == 1) {
+      fprintf(stderr,"have new exact match: %d - %d:%d = %d\n",
+	      dptr->score, dptr->start, dptr->stop, dptr->stop - dptr->start+1);
+    }
+    */
+    vmptr->exact = exact;
+
+    i = vmptr->score = dptr->score;   /* 'i' is used as a bound */
+    vmptr->dp = dpos;
+    vmptr->start = dptr->start;
+    vmptr->stop = dptr->stop;
+    dptr->dmax = vmptr;
+  }
+
+  /* rescan the list for the worst score */
+  for (vmptr = f_str->vmax;  vmptr < &f_str->vmax[maxsav] ; vmptr++) {
+    if (vmptr->score < i && !vmptr->exact) {
+      i = vmptr->score;
+      f_str->lowmax = vmptr;
+    }
+  }
+
+  f_str->lowscor = i;
+}
+
+/* this version of spam scans the diagonal to find the best local score,
+   then resets the boundaries for a global alignment and re-scans */
+
+/* NOOVERHANG allows one to score any overhanging alignment as zero.
+   Useful for SAGE alignments.  Normally, one allows overhangs because
+   of the possibility of partial sequences.
+*/
+
+#undef NOOVERHANG
+
+/* 
+   May, 2005 - spam() has an intesting bug that occurs when two
+   peptides match in order, separated by one position (the comma).  In
+   this case, spam() splits the match, and only returns the better of
+   the two matches.  So, if spam splits an alignment at a comma, it
+   needs the ability to insert the missing match.
+
+*/
+
+int spam (const unsigned char *aa0, const unsigned char *aa1,int n1,
+	  struct savestr *dmax, int **pam2,
+	  struct f_struct *f_str)
+{
+   int     lpos, doffset;
+   int     tot, mtot;
+   struct {
+     int  start, stop, score;
+   } curv, maxv;
+   register const unsigned char *aa0p, *aa1p;
+
+   curv.start = dmax->start;
+   aa1p = &aa1[dmax->start];
+   doffset = dmax->dp - f_str->noff;
+   aa0p = &aa0[dmax->start - doffset];
+
+   tot = curv.score = maxv.score = 0;
+   for (lpos = dmax->start; lpos <= dmax->stop; lpos++) {
+     tot += pam2[*aa0p++][*aa1p++];
+     if (tot > curv.score) {
+       curv.stop = lpos;	/* here, curv.stop is actually curv.max */
+       curv.score = tot;
+      }
+      else if (tot < 0) {
+	if (curv.score > maxv.score) {
+	  maxv.start = curv.start;
+	  maxv.stop = curv.stop;
+	  maxv.score = curv.score;
+	}
+	tot = curv.score = 0;
+	curv.start = lpos+1;
+      }
+   }
+
+   if (curv.score > maxv.score) {
+     maxv.start = curv.start;
+     maxv.stop = curv.stop;
+     maxv.score = curv.score;
+   }
+
+   if (maxv.score <= 0) return 0;
+
+   /* now, reset the boundaries of the alignment using aa0b[]
+      and aa0e[], which specify the residues that start and end
+      the segment */
+      
+   maxv.start = f_str->aa0b[maxv.stop-doffset] + doffset;
+   if (maxv.start < 0) {
+     maxv.start = 0;
+#ifdef NOOVERHANG
+     return 0;
+#endif
+   }
+
+   maxv.stop = f_str->aa0e[maxv.stop-doffset] + doffset;
+   if (maxv.stop > n1) {
+     maxv.stop = n1-1;
+#ifdef NOOVERHANG
+     return 0;
+#endif
+   }
+   aa1p = &aa1[lpos = maxv.start];
+   aa0p = &aa0[lpos - doffset];
+
+   for (tot=0; lpos <= maxv.stop; lpos++) {
+     tot += pam2[*aa0p++][*aa1p++];
+   }
+
+   maxv.score = tot;
+
+/*	if (maxv.start != dmax->start || maxv.stop != dmax->stop)
+		printf(" new region: %3d %3d %3d %3d\n",maxv.start,
+			dmax->start,maxv.stop,dmax->stop);
+*/
+   dmax->start = maxv.start;
+   dmax->stop = maxv.stop;
+
+   return maxv.score;
+}
+
+int sconn (struct savestr **v, int n, 
+	   struct f_struct *f_str,
+	   struct rstruct *rst, const struct pstruct *ppst,
+	   const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1, int opt_prob)
+{
+   int     i, si, cmpp ();
+   struct slink *start, *sl, *sj, *so, *sarr;
+   int     lstart, ltmp, tstart, plstop, ptstop, ptstart, tstop;
+   double  tatprob;
+   int     dotat;
+
+   sarr = f_str->sarr;
+
+   /*  sort the score left to right in lib pos */
+   kpsort (v, n);
+
+   start = NULL;
+   rst->score[0] = 0;
+   rst->escore = 2.0;
+
+/*  for the remaining runs, see if they fit */
+/*  lstart/lstop -> start/stop in library sequence
+    tstart/tstop -> start/stop in query sequence
+    plstart/plstop ->
+*/
+
+   for (i = 0, si = 0; i < n; i++) {
+
+     /* the segment is worth adding; find out where? */
+     lstart = v[i]->start;
+     ltmp = v[i]->stop;
+     tstart = lstart - v[i]->dp + f_str->noff;
+     tstop = ltmp - v[i]->dp + f_str->noff;
+
+     /*	put the run in the group */
+     sarr[si].vp = v[i];
+     sarr[si].score = v[i]->score;
+     sarr[si].next = NULL;
+     sarr[si].prev = NULL;
+     sarr[si].tat = NULL;
+
+/*
+  opt_prob for FASTS only has to do with using aa1 for priors,
+  i.e. we always calculate tatprobs for segments in FASTS (unlike
+  FASTF)
+*/
+     if(opt_prob) {
+       sarr[si].tatprob = 
+	 calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1, 
+		      ppst->pam2[0], ppst->nsq, f_str, 
+		      ppst->pseudocts, opt_prob, ppst->zsflag);
+       if (sarr[si].tatprob < 0.0) {
+	 fprintf(stderr," negative tatprob: %lg\n",sarr[si].tatprob);
+	 sarr[si].tatprob = 1.0;
+       }
+       sarr[si].tat = sarr[si].newtat;
+     }
+
+/*  if it fits, then increase the score
+
+    start points to the highest scoring run
+    -> next is the second highest, etc.
+    put the segment into the highest scoring run that it fits into
+*/
+     for (sl = start; sl != NULL; sl = sl->next) {
+       ltmp = sl->vp->start;
+ /* plstop -> previous lstop */
+       plstop = sl->vp->stop;
+ /* ptstart -> previous t(query) start */
+       ptstart = ltmp - sl->vp->dp + f_str->noff;
+ /* ptstop -> previous t(query) stop */
+       ptstop = plstop - sl->vp->dp + f_str->noff;
+#ifndef FASTM
+ /* if the previous library stop is before the current library start */
+       if (plstop < lstart && ( ptstop < tstart || ptstart > tstop))
+#else
+ /* if the previous library stop is before the current library start */
+       if (plstop < lstart && ptstop < tstart)
+#endif
+       {
+	 if(!opt_prob) {
+	    sarr[si].score = sl->score + v[i]->score;
+	    sarr[si].prev = sl;
+	    break;
+	  } else {
+	    tatprob = calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1, 
+				   ppst->pam2[0], ppst->nsq, f_str, 
+				   ppst->pseudocts, opt_prob, ppst->zsflag);
+	    /* if our tatprob gets worse when we add this, forget it */
+	    if(tatprob > sarr[si].tatprob) {
+	      free(sarr[si].newtat->probs); /* get rid of new tat struct */
+	      free(sarr[si].newtat);
+	      continue; /* reuse this sarr[si] */
+	    } else {
+	      sarr[si].tatprob = tatprob;
+	      free(sarr[si].tat->probs); /* get rid of old tat struct */
+	      free(sarr[si].tat);
+	      sarr[si].tat = sarr[si].newtat;
+	      sarr[si].prev = sl;
+	      sarr[si].score = sl->score + v[i]->score;
+	      /*
+		fprintf(stderr,"sconn %d added %d:%d getting %d; si: %d, tat: %g\n",
+		i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
+	      */
+	      break;
+	    }
+	  }
+	}
+      }
+      
+      /* now recalculate where the score fits */
+      if (start == NULL) start = &sarr[si];
+      else {
+	if(!opt_prob) {
+	  for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
+	    if (sarr[si].score > sj->score) {
+	      sarr[si].next = sj;
+	      if (so != NULL)
+		so->next = &sarr[si];
+	      else
+		start = &sarr[si];
+	      break;
+	    }
+	    so = sj;
+	  }
+	} else {
+	  for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
+	    if ( sarr[si].tatprob < sj->tatprob ||
+		 ((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
+	      sarr[si].next = sj;
+	      if (so != NULL)
+		so->next = &sarr[si];
+	      else
+		start = &sarr[si];
+	      break;
+	    }
+	    so = sj;
+	  }
+	}
+      }
+
+      si++;
+   }
+      
+   if(opt_prob) {
+     for (i = 0 ; i < si ; i++) {
+       free(sarr[i].tat->probs);
+       free(sarr[i].tat);
+     }
+   }
+
+   if (start != NULL) {
+     if(opt_prob) {
+       rst->escore = start->tatprob;
+     } else {
+       rst->escore = 2.0;
+     }
+
+     rst->segnum = rst->seglen = 0;
+     for(sj = start ; sj != NULL; sj = sj->prev) {
+       rst->segnum++;
+       rst->seglen += sj->vp->stop - sj->vp->start + 1;
+     }
+     return (start->score);
+   } else {
+     rst->escore = 1.0;
+   }
+
+   rst->segnum = rst->seglen = 0;
+   return (0);
+}
+
+void
+kssort (v, n)
+struct savestr *v[];
+int     n;
+{
+   int     gap, i, j;
+   struct savestr *tmp;
+
+   for (gap = n / 2; gap > 0; gap /= 2)
+      for (i = gap; i < n; i++)
+	 for (j = i - gap; j >= 0; j -= gap)
+	 {
+	    if (v[j]->score >= v[j + gap]->score)
+	       break;
+	    tmp = v[j];
+	    v[j] = v[j + gap];
+	    v[j + gap] = tmp;
+	 }
+}
+
+void
+kpsort (v, n)
+struct savestr *v[];
+int     n;
+{
+   int     gap, i, j;
+   struct savestr *tmp;
+
+   for (gap = n / 2; gap > 0; gap /= 2)
+      for (i = gap; i < n; i++)
+	 for (j = i - gap; j >= 0; j -= gap)
+	 {
+	    if (v[j]->start <= v[j + gap]->start)
+	       break;
+	    tmp = v[j];
+	    v[j] = v[j + gap];
+	    v[j + gap] = tmp;
+	 }
+}
+
+/* calculate the 100% identical score */
+int
+shscore(const unsigned char *aa0, const int n0, int **pam2, int nsq)
+{
+  int i, sum;
+  for (i=0,sum=0; i<n0; i++)
+    if (aa0[i] != EOSEQ && aa0[i]<=nsq) sum += pam2[aa0[i]][aa0[i]];
+  return sum;
+}
+
+/* sorts alignments from right to left (back to front) based on stop */
+
+void
+krsort (v, n)
+struct savestr *v[];
+int     n;
+{
+   int     gap, i, j;
+   struct savestr *tmp;
+
+   for (gap = n / 2; gap > 0; gap /= 2)
+      for (i = gap; i < n; i++)
+	 for (j = i - gap; j >= 0; j -= gap)
+	 {
+	    if (v[j]->stop > v[j + gap]->stop)
+	       break;
+	    tmp = v[j];
+	    v[j] = v[j + gap];
+	    v[j + gap] = tmp;
+	 }
+}
+
+struct a_res_str *
+do_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, int repeat_thresh,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   int *have_ares)
+{
+  struct a_res_str *a_res;
+  int hoff, n10;
+  struct rstruct rst;
+  int ib, i;
+  unsigned char *aa0t;
+  const unsigned char *aa1p;
+  struct savestr *vmptr;
+
+  /* the a_res for this function must always be re-calculated */
+
+  *have_ares = 0x02;	/* set 0x2 bit to indicate local copy */
+
+  if ((a_res = (struct a_res_str *)calloc(1, sizeof(struct a_res_str)))==NULL) {
+    fprintf(stderr," [do_walign] Cannot allocate a_res");
+    return NULL;
+  }
+
+#ifdef TFAST
+  f_str->n10 = n10 = aatran(aa1,f_str->aa1x,n1,frame);
+  aa1p = f_str->aa1x;
+#else
+  n10 = n1;
+  aa1p = aa1;
+#endif
+
+  do_fasts(aa0, n0, aa1p, n10, ppst, f_str, &rst, &hoff, 1, f_str->maxsav_w);
+  a_res->sw_score = rst.score[0];
+  memcpy(&a_res->rst, &rst, sizeof(rst));
+
+  /* the alignment portion takes advantage of the information left
+     over in f_str after do_fasts is done.  in particular, it is
+     easy to run a modified sconn() to produce the alignments.
+
+     unfortunately, the alignment display routine wants to have
+     things encoded as with bd_align and sw_align, so we need to do that.
+  */
+
+  /* unnecessary; do_fasts just did this */
+  /*  kssort(f_str->vptr,f_str->nsave);  */
+
+  /* at some point, we want one best score for each of the segments */
+
+
+  a_res->min0 = a_res->min1 = a_res->max0 = a_res->max1 = 0;
+  if (f_str->nsave <=0) {
+    a_res->res = NULL;
+    return a_res;
+  }
+
+  for ( ; f_str->nsave > 0; f_str->nsave--) 
+    if (f_str->vptr[f_str->nsave-1]->score >0) break;
+
+  if ((aa0t = (unsigned char *)calloc(n0+1,sizeof(unsigned char)))==NULL) {
+    fprintf(stderr," cannot allocate aa0t %d\n",n0+1);
+    exit(1);
+  }
+
+  /* copy aa0[] into f_str->aa0t[] */
+  for (i=0; i<n0; i++) f_str->aa0t[i] = aa0t[i] = aa0[i];
+  f_str->aa0t[i] = aa0t[i] = '\0';
+
+  a_res->nres = sconn_a (aa0t,n0,aa1p,n10,f_str, a_res, ppst);
+
+  free(aa0t);
+
+  a_res->res = f_str->res;
+  return a_res;
+}
+
+/* this version of sconn is modified to provide alignment information */
+/* in addition, it needs to know whether a segment has been used before */
+
+/* sconn_a fills in the res[nres] array, but this is passed implicitly
+   through f_str->res[f_str->nres] */
+
+int sconn_a (unsigned char *aa0, int n0,
+	     const unsigned char *aa1, int n1,
+	     struct f_struct *f_str,
+	     struct a_res_str *a_res,
+	     struct pstruct *ppst)
+{
+   int     i, si, cmpp (), n;
+   unsigned char *aa0p;
+   int sx, dx, doff, *aa0tip;
+
+   struct savestr **v;
+   struct slink *start, *sl, *sj, *so, *sarr;
+   int     lstart, lstop, ltmp, plstart, tstart, plstop, ptstop, ptstart, tstop;
+
+   int *res, nres, tres;
+
+   double tatprob;
+
+/*	sort the score left to right in lib pos */
+
+   v = f_str->vptr;
+   n = f_str->nsave;
+   sarr = f_str->sarr;
+
+   /* set things up in case nothing fits */
+   if (n <=0 || v[0]->score <= 0) return 0;
+
+   if (v[0]->score < 0) {
+     sarr[0].vp = v[0];
+     sarr[0].score = v[0]->score;
+     sarr[0].next = NULL;
+     sarr[0].prev = NULL;
+     start = &sarr[0];
+   }
+   else {
+
+     krsort (v, n);	/* sort from left to right in library */
+
+     start = NULL;
+
+     /*	for each alignment, see if it fits */
+
+
+     for (i = 0, si = 0; i < n; i++) {
+       /*	if the score is less than the join threshold, skip it */
+
+       if (v[i]->score < 0) continue;
+
+       lstart = v[i]->start;
+       lstop = v[i]->stop;
+       tstart = lstart - v[i]->dp + f_str->noff;
+       tstop = lstop - v[i]->dp + f_str->noff;
+
+       /*	put the alignment in the group */
+
+       sarr[si].vp = v[i];
+       sarr[si].score = v[i]->score;
+       sarr[si].next = NULL;
+       sarr[si].prev = NULL;
+       sarr[si].tat = NULL;
+
+       sarr[si].tatprob = 
+	 calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1, 
+		      ppst->pam2[0], ppst->nsq, f_str, 
+		      ppst->pseudocts, 1, ppst->zsflag);
+       sarr[si].tat = sarr[si].newtat;
+
+
+       /* 	if it fits, then increase the score */
+       /* start points to a sorted (by total score) list of candidate
+	  overlaps */
+
+       for (sl = start; sl != NULL; sl = sl->next) { 
+	 plstart = sl->vp->start;
+	 plstop = sl->vp->stop;
+	 ptstart = plstart - sl->vp->dp + f_str->noff;
+	 ptstop = plstop - sl->vp->dp + f_str->noff;
+#ifndef FASTM
+	 if (plstart > lstop && (ptstop < tstart || ptstart > tstop)) {
+#else
+         if (plstop > lstart && ptstart > tstop) {
+#endif
+	   /* alignment always uses probabilistic scoring ... */
+	   /*   sarr[si].score = sl->score + v[i]->score;
+		sarr[si].prev = sl;
+		break; */		/* quit as soon as the alignment has been added */
+
+	   tatprob = calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1, 
+				  ppst->pam2[0], ppst->nsq, f_str, 
+				  ppst->pseudocts, 1, ppst->zsflag);
+	   /* if our tatprob gets worse when we add this, forget it */
+	   if(tatprob > sarr[si].tatprob) {
+	     free(sarr[si].newtat->probs); /* get rid of new tat struct */
+	     free(sarr[si].newtat);
+	     continue; /* reuse this sarr[si] */
+	   } else {
+	     sarr[si].tatprob = tatprob;
+	     free(sarr[si].tat->probs); /* get rid of old tat struct */
+	     free(sarr[si].tat);
+	     sarr[si].tat = sarr[si].newtat;
+	     sarr[si].prev = sl;
+	     sarr[si].score = sl->score + v[i]->score;
+	     /*
+	       fprintf(stderr,"sconn %d added %d/%d getting %d; si: %d, tat: %g\n",
+	       i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
+	     */
+	     break;
+	   }
+	 }
+       }
+
+       /* now recalculate the list of best scores */
+       if (start == NULL)
+	 start = &sarr[si];	/* put the first one in the list */
+       else
+	 for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
+	   /* if (sarr[si].score > sj->score) { */ /* new score better than old */
+	   if ( sarr[si].tatprob < sj->tatprob ||
+		((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
+	     sarr[si].next = sj;		/* next best after new score */
+	     if (so != NULL)
+	       so->next = &sarr[si];	/* prev_best->next points to best */
+	     else  start = &sarr[si];	/* start points to best */
+	     break;			/* stop looking */
+	   }
+	   so = sj;		/* previous candidate best */
+	 }
+       si++;				/* increment to next alignment */
+     }
+   }
+
+   for (i = 0 ; i < si ; i++) {
+     free(sarr[i].tat->probs);
+     free(sarr[i].tat);
+   }
+
+   res = f_str->res;
+   tres = nres = 0;
+   aa0p = aa0;
+   aa0tip = f_str->aa0ti;	/* point to temporary index */
+   a_res->min1 = start->vp->start;
+   a_res->min0 = 0;
+
+   sx=start->vp->start-start->vp->dp+f_str->noff;
+   f_str->aa0t_off = sx - f_str->aa0b[sx];
+
+   for (sj = start; sj != NULL; sj = sj->prev ) {
+     doff = (int)(aa0p-aa0) - (sj->vp->start-sj->vp->dp+f_str->noff);
+     
+     /* fprintf(stderr,"doff: %3d\n",doff); */
+     
+     for (dx=sj->vp->start,sx=sj->vp->start-sj->vp->dp+f_str->noff;
+	  dx <= sj->vp->stop; dx++) {
+       *aa0tip++ = f_str->aa0i[sx];	/* save index */
+       *aa0p++ = f_str->aa0t[sx++];	/* save sequence at index */
+       tres++;
+       res[nres++] = 0;
+     }
+     sj->vp->dp -= doff;
+     if (sj->prev != NULL) {
+       if (sj->prev->vp->start - sj->vp->stop - 1 > 0 )
+	 tres += res[nres++] = (sj->prev->vp->start - sj->vp->stop - 1);
+     }
+
+     /*
+     fprintf(stderr,"t0: %3d, tx: %3d, l0: %3d, lx: %3d, dp: %3d noff: %3d, score: %3d\n",
+       sj->vp->start - sj->vp->dp + f_str->noff,
+       sj->vp->stop - sj->vp->dp + f_str->noff,
+       sj->vp->start,sj->vp->stop,sj->vp->dp,
+       f_str->noff,sj->vp->score);
+
+       fprintf(stderr,"%3d - %3d: %3d\n",
+       sj->vp->start,sj->vp->stop,sj->vp->score);
+     */
+     a_res->max1 = sj->vp->stop+1;
+     a_res->max0 = a_res->max1 - sj->vp->dp + f_str->noff;
+   }
+   *aa0p = '\0';	/* be sure to terminate the string */
+
+   /*
+   fprintf(stderr,"(%3d - %3d):(%3d - %3d)\n",
+	   a_res->min0,a_res->max0,a_res->min1,a_res->max1);
+   */
+   
+   /* now replace f_str->aa0t with aa0
+      (f_str->aa0t is permanent, aa0 is not)*/
+   for (i=0; i<n0; i++) f_str->aa0t[i] = aa0[i];
+
+   return tres;
+}
+
+/* for fasts (and fastf), pre_cons needs to set up f_str as well as do
+   necessary translations - for right now, simply do do_walign */
+
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+
+#ifdef TFAST
+  f_str->n10=aatran(aa1,f_str->aa1x,n1,frame);
+#endif
+
+}
+
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void 
+aln_func_vals(int frame, struct a_struct *aln) {
+
+#ifdef TFAST
+  aln->qlrev = 0;
+  aln->qlfact= 1;
+  aln->llfact = aln->llmult = 3;
+  if (frame > 3) aln->llrev = 1;
+  else aln->llrev = 0;
+  aln->frame = 0;
+#else	/* FASTS */
+  aln->llfact = aln->llmult = aln->qlfact = 1;
+  aln->llrev = aln->qlrev = 0;
+  aln->frame = 0;
+#endif
+}
+
+void aaptrshuffle(unsigned char *res, int n) {
+
+  int i, j;
+  unsigned char tmp;
+
+  for( i = n; --i; ) {
+
+    /* j = nrand(i); if (i == j) continue; */ /* shuffle */
+    j = (n - 1) - i; if (i <= j ) break; /* reverse */
+
+    tmp = res[i];
+    res[i] = res[j];
+    res[j] = tmp;
+  }
+}
+
+void aa0shuffle(unsigned char *aa0, int n0, struct f_struct *f_str) {
+
+  int i;
+  int j;
+
+  for(i = 0 ; i < f_str->nm0 ; i++) { /* for each fragment */
+
+    aaptrshuffle(&(aa0[f_str->nmoff[i]]), 
+		 f_str->nmoff[i+1] - f_str->nmoff[i] - 1 );
+
+  }
+
+}
diff --git a/src/dropfx.c b/src/dropfx.c
new file mode 100644
index 0000000..fcd9799
--- /dev/null
+++ b/src/dropfx.c
@@ -0,0 +1,4072 @@
+/*  $Id: dropfx.c 1280 2014-08-21 00:47:55Z wrp $ */
+
+/* copyright (c) 1998, 1999, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* implements the fastx algorithm, see:
+
+   W. R. Pearson, T. Wood, Z. Zhang, A W. Miller (1997) "Comparison of
+   DNA sequences with protein sequences" Genomics 46:24-36
+
+   see dropnfa.c for better variable descriptions and comments
+*/
+   
+/* 17-Sept-2008 - modified for multiple non-overlapping alignments */
+
+/* 18-Sept-2006 - remove global variables used for alignment */
+
+/* 22-June-2006 - correct incorrect alignment coordinates generated
+   after pro_dna() on projected DNA region.  
+*/
+
+/* 9-May-2003 -> 3.46 changed lx_band to use projected protein
+   boundary end.  this fixes some addressing issues on MacOSX, and
+   speeds up alignment on very long proteins
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+
+#include "defs.h"
+#include "param.h"
+#define XTERNAL
+#include "upam.h"
+
+/* this must be consistent with upam.h */
+#define MAXHASH 32
+#define NMAP MAXHASH+1
+
+/* globals for fasta */
+#define MAXWINDOW 64
+
+#ifndef MAXSAV
+#define MAXSAV 10
+#endif
+
+#ifndef ALLOCN0
+static char *verstr="3.8 June 2014";
+#else
+static char *verstr="3.8an0 June 2014";
+#endif
+
+struct dstruct		/* diagonal structure for saving current run */
+{			
+   int     score;	/* hash score of current match */
+   int     start;	/* start of current match */
+   int     stop;	/* end of current match */
+   struct savestr *dmax;   /* location in vmax[] where best score data saved */
+};
+
+struct savestr
+{
+   int     score;		/* pam score with segment optimization */
+   int     score0;		/* pam score of best single segment */
+   int     gscore;		/* score from global match */
+   int     dp;			/* diagonal of match */
+   int     start;		/* start of match in lib seq */
+   int     stop;		/* end of match in lib seq */
+};
+
+struct swstr { int H, E;};
+/* struct bdstr { int CC, DD, CP, DP;}; */
+
+#define SGW1 100
+#define SGW2 300
+struct smgl_str {
+  int C[SGW1+1][SGW2+1];
+  int st[SGW1+1][SGW2+1];
+  int D[SGW2+7], I[SGW2+1];
+};
+
+struct update_code_str {
+  int p_op_idx;
+  int p_op_cnt;
+  int show_code;
+  int cigar_order;
+  int show_ext;
+  char *op_map;
+};
+
+#ifndef TFAST
+static char *ori_code = "-x/=\\+*";	/* FASTX */
+static char *cigar_code = "DXFMRI*";
+#else
+static char *ori_code = "+x/=\\-*";	/* TFASTX */
+static char *cigar_code = "IXFMRD*";
+#endif
+
+static struct update_code_str *
+init_update_data(int show_code);
+
+static void
+sprintf_code(char *tmp_str, struct update_code_str *, int op_idx, int op_cnt);
+
+static void 
+update_code(char *al_str, int al_str_max, 
+	    struct update_code_str *update_data, int op, 
+	    int sim_code, unsigned char sp0, unsigned char sp1);
+
+static void
+close_update_data(char *al_str, int al_str_max,
+		  struct update_code_str *update_data);
+
+void kpsort (struct savestr **v, int n);
+extern void *init_stack(int, int);
+extern void push_stack(void *, void *);
+extern void *pop_stack(void *);
+extern void *free_stack(void *);
+
+struct sx_s {int C1, C2, C3, I1, I2, I3, flag; };
+
+struct f_struct {
+  struct dstruct *diag;
+  int ndo;
+  int noff;
+  int hmask;			/* hash constants */
+  int *pamh1;			/* pam based array */
+  int *pamh2;			/* pam based kfact array */
+  int *link, *harr;		/* hash arrays */
+  int kshft;			/* shift width */
+  int nsav;		/* number of saved runs, worst saved run */
+#ifndef TFAST
+  unsigned char *aa0x;		/* contains translated codons 111222333*/
+  unsigned char *aa0y;		/* contains translated codons 123123123*/
+#else
+  unsigned char *aa1x;		/* contains translated codons 111222333 */
+  unsigned char *aa1y;		/* contains translated codons 123123123 */
+  int have_yaa;			/* flag if translation is done */
+#endif
+  struct sx_s *cur;
+  int cur_sp_size;
+  int *waa0;
+  int *waa1;
+  struct smgl_str smgl_s;
+  int *res;
+  int max_res;
+};
+
+#define DROP_INTERN
+#include "drop_func.h"
+
+int shscore(unsigned char *aa0, int n0, int **pam2);
+int saatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
+extern int ELK_to_s(double E_join, int n0, int n1, double Lambda, double K, double H);
+
+int savemax (struct dstruct *dptr, int dpos, 
+	     struct savestr *vmax, struct savestr **lowmax);
+
+int spam (const unsigned char *aa0, const unsigned char *aa1, 
+	  struct savestr *dmax, int **pam2,
+	  struct f_struct *f_str);
+int sconn (struct savestr **v, int n,int cgap, int pgap, struct f_struct *f_str);
+int lx_band(const unsigned char *prot_seq, int len_prot,
+	    const unsigned char *dna_prot_seq, int len_dna_prot,
+	    int **pam_matrix, int gopen, int gext,
+	    int gshift, int start_diag, int width, struct f_struct *f_str);
+
+void fx_walign (const unsigned char *aa0, int n0,
+		const unsigned char *xaa, int n1, unsigned char *yaa,
+		int frame, int max_res,
+		struct pstruct *ppst, 
+		struct f_struct *f_str, 
+		struct a_res_str *a_res,
+		int score_thresh);
+
+struct a_res_str *
+merge_ares_chains(struct a_res_str *cur_ares, 
+		  struct a_res_str *tmpl_ares,
+		  int score_ix, const char *msg);
+
+extern void w_abort (char *p, char *p1);
+
+/* initialize for fasta */
+
+void
+init_work (unsigned char *aa0, int n0, 
+	   struct pstruct *ppst,
+	   struct f_struct **f_arg)
+{
+   int mhv, phv;
+   int hmax;
+   int i0, hv;
+   int pamfact;
+   int btemp;
+   struct f_struct *f_str;
+   int ktup;		/* word size examined */
+   int fact;		/* factor used to scale ktup match value */
+   int kt1;		/* ktup-1 */
+   int lkt;		/* last ktup - initiall kt1, but can be increased
+			   for hsq >= NMAP */
+
+   int maxn0;
+   int *pwaa;
+   int i, j, q;
+   struct swstr *ss, *r_ss;
+   int *waa;
+   int *res;
+   int nsq, ip, *hsq;
+#ifndef TFAST
+   int last_n0, itemp;
+   unsigned char *fd, *fs, *aa0x, *aa0y, *aa0s;
+   int n0x, n0x3;
+#endif
+
+  if (ppst->ext_sq_set) {
+    nsq = ppst->nsqx; ip = 1;
+    hsq = ppst->hsqx;
+  }
+  else {
+    nsq = ppst->nsqx; ip = 0;
+    hsq = ppst->hsq;
+  }
+
+   f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+
+   if (!ppst->param_u.fa.use_E_thresholds) {
+     btemp = 2 * ppst->param_u.fa.bestoff / 3 +
+       n0 / ppst->param_u.fa.bestscale +
+       ppst->param_u.fa.bkfact *
+       (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
+     btemp = min (btemp, ppst->param_u.fa.bestmax);
+     if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
+
+     ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
+     if (ppst->param_u.fa.optcut_set != 1) {
+#ifndef TFAST
+       ppst->param_u.fa.optcut = (btemp*5)/4;
+#else
+       ppst->param_u.fa.optcut = (btemp*4)/3;
+#endif
+     }
+   }
+
+#ifdef OLD_FASTA_GAP
+   ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
+#else
+   ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
+#endif
+
+   ppst->param_u.fa.cgap = max(ppst->param_u.fa.cgap, -ppst->param_u.fa.pgap);
+
+   pamfact = ppst->param_u.fa.pamfact;
+   ktup = ppst->param_u.fa.ktup;
+   fact = ppst->param_u.fa.scfact * ktup;
+
+   if (pamfact == -1)
+      pamfact = 0;
+   else if (pamfact == -2)
+      pamfact = 1;
+
+   for (i0 = 1, mhv = -1; i0 <=ppst->nsq; i0++)
+      if (hsq[i0] < NMAP && hsq[i0] > mhv) mhv = hsq[i0];
+
+   if (mhv <= 0) {
+      fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
+      exit (1);
+   }
+
+   for (f_str->kshft = 0; mhv > 0; mhv /= 2)
+      f_str->kshft++;
+
+/*      kshft = 2;	*/
+   kt1 = ktup - 1;
+   hv = 1;
+   for (i0 = 0; i0 < ktup; i0++) {
+     hv = hv << f_str->kshft;
+   }
+   hmax = hv;
+   f_str->hmask = (hmax >> f_str->kshft) - 1;
+
+   if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate hash array\n");
+     exit (1);
+   }
+   if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate pamh1 array\n");
+     exit (1);
+   }
+   if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate pamh2 array\n");
+     exit (1);
+   }
+   if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate hash link array");
+     exit (1);
+   }
+
+#ifdef TFAST
+   if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
+					     sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
+     exit (1);
+   }
+   f_str->aa1x++;
+
+   if ((f_str->aa1y =(unsigned char *)calloc((size_t)ppst->maxlen+2,
+					     sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa1y array %d\n", ppst->maxlen+2);
+     exit (1);
+   }
+   f_str->aa1y++;
+#else	/* FASTX */
+   maxn0 = n0 + 2;
+   if ((aa0x =(unsigned char *)calloc((size_t)maxn0,sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa0x array %d\n", maxn0);
+     exit (1);
+   }
+   aa0x++;
+   f_str->aa0x = aa0x;
+
+   if ((aa0y =(unsigned char *)calloc((size_t)maxn0,sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa0y array %d\n", maxn0);
+     exit (1);
+   }
+   aa0y++;
+   f_str->aa0y = aa0y;
+
+   last_n0 = 0;
+   for (itemp=0; itemp<3; itemp++) {
+     n0x = saatran(aa0,&aa0x[last_n0],n0,itemp);
+
+     /*
+       for (i=0; i<n0x; i++) {
+       fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);
+       if ((i%60)==59) fprintf(stderr,"\n");
+       }
+       fprintf(stderr,"\n");
+     */
+     last_n0 += n0x+1;
+   }
+   /*
+        fprintf(stderr,"\n");
+   */
+   for (itemp=0, fs=aa0x; itemp <3; itemp++,fs++) {
+     for (fd = &aa0y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
+     *fd=EOSEQ;
+   }
+
+   /* now switch aa0 and aa0x for hashing functions */
+   /* this seems dangerous in threaded code, but only the pointer is changed,
+      not the data itself */
+
+   fs = aa0;
+   aa0 = aa0x;
+   aa0x = fs;
+					 
+#endif
+
+   for (i0 = 0; i0 < hmax; i0++)
+      f_str->harr[i0] = -1;
+   for (i0 = 0; i0 < n0; i0++)
+      f_str->link[i0] = -1;
+
+   /* encode the aa0 array */
+
+   phv = hv = 0;
+   lkt = kt1;
+   for (i0 = 0; i0 < min(lkt,n0); i0++) {
+     if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt=i0+ktup; continue;}
+     hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+     phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
+   }
+
+   for (; i0 < n0; i0++) {
+     if (hsq[aa0[i0]] >= NMAP) {
+       hv=phv=0; 
+       lkt = i0+ktup;
+       /* restart hv, phv calculation */
+       for (; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
+	 if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt = i0+ktup; continue;}
+	 hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+	 phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
+       }
+     }
+     if (i0 >= n0) break;
+     hv = ((hv & f_str->hmask) << f_str->kshft) + hsq[aa0[i0]];
+     f_str->link[i0] = f_str->harr[hv];
+     f_str->harr[hv] = i0;
+     if (pamfact) {
+       f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
+       /* this check should always be true, but just in case */
+       if (hsq[aa0[i0-kt1]]<NMAP)
+	 phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
+     }
+     else f_str->pamh2[hv] = fact * ktup;
+   }
+
+#ifndef TFAST
+   /* done hashing, now switch aa0, aa0x back */
+   fs = aa0;
+   aa0 = aa0x;
+   aa0x = fs;
+#endif
+
+   if (pamfact)
+      for (i0 = 1; i0 < ppst->nsq; i0++)
+	 f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
+   else
+      for (i0 = 1; i0 < ppst->nsq; i0++)
+	 f_str->pamh1[i0] = fact;
+
+   f_str->ndo = 0;	/* used to save time on diagonals with long queries */
+
+#ifndef ALLOCN0
+   if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
+						 sizeof (struct dstruct)))==NULL) {
+      fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
+	      (long) MAXDIAG *sizeof (struct dstruct));
+      exit (1);
+     };
+#else
+   if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,
+					      sizeof (struct dstruct)))==NULL) {
+      fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
+	      (long)n0*sizeof (struct dstruct));
+      exit (1);
+     };
+#endif
+
+
+   if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {
+     fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
+     exit(1);
+   }
+
+   pwaa = waa;
+   for (i=0; i<nsq; i++) {
+     for (j=0;j<n0; j++) {
+       *pwaa = ppst->pam2[ip][i][aa0[j]];
+       pwaa++;
+     }
+   }
+   f_str->waa0 = waa;
+
+   if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {
+     fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
+     exit(1);
+   }
+
+   pwaa = waa;
+   for (i=0; i<nsq; i++) {
+     for (j=0;j<n0; j++) {
+       *pwaa = ppst->pam2[0][i][aa0[j]];
+       pwaa++;
+     }
+   }
+   f_str->waa1 = waa;
+
+#ifndef TFAST
+   maxn0 = max(2*n0,MIN_RES);
+#else
+   /* maxn0 needs to be large enough to accomodate introns
+      for TFASTX.  For all other functions, it will be
+      more reasonable. */
+   maxn0 = max(4*n0,MIN_RES);
+#endif
+   if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
+     fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
+     exit(1);
+   }
+   f_str->res = res;
+   f_str->max_res = maxn0;
+
+   *f_arg = f_str;
+}
+
+
+/* pstring1 is a message to the manager, currently 512 */
+/* pstring2 is the same information, but in a markx==10 format */
+void
+get_param (const struct pstruct *ppst,
+	   char **pstring1, char *pstring2,
+	   struct score_count_s *s_info)
+{
+  char options_str1[128];
+  char options_str2[128];
+#ifndef TFAST
+  char *pg_str="FASTX";
+#else
+  char *pg_str="TFASTX";
+#endif
+
+  if (!ppst->param_u.fa.use_E_thresholds) {
+    sprintf(options_str1,"join: %d (%.3g), opt: %d (%.3g)",
+	    ppst->param_u.fa.cgap, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.optcut, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+    sprintf(options_str2,"pg_join: %d (%.3g)\n; pg_optcut: %d (%.3g)",
+	    ppst->param_u.fa.cgap, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.optcut, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+  }
+  else {
+    sprintf(options_str1,"E-join: %.2g (%.3g), E-opt: %.2g (%.3g)",
+	    ppst->param_u.fa.E_join, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.E_band_opt, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+    sprintf(options_str2,"pg_join_E(): %.2g (%.3g)\n; pg_optcut_E(): %.2g (%.3g)",
+	    ppst->param_u.fa.E_join, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.E_band_opt, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+  }
+
+  if (!ppst->param_u.fa.optflag) {
+    sprintf (pstring1[0], "%s (%s)",pg_str,verstr);
+  }
+  else {
+    sprintf (pstring1[0], "%s (%s) [optimized]",pg_str,verstr);
+  }
+
+#ifdef OLD_FASTA_GAP
+  sprintf (pstring1[1], "%s matrix (%d:%d)%s, gap-pen: %d/%d, shift: %d\n ktup: %d, %s, width: %3d",
+#else
+  sprintf (pstring1[1], "%s matrix (%d:%d)%s, open/ext: %d/%d, shift: %d\n ktup: %d, %s, width: %3d",
+#endif
+	   ppst->pam_name, ppst->pam_h,ppst->pam_l,
+	   (ppst->ext_sq_set) ? "xS":"\0",
+	   ppst->gdelval, ppst->ggapval, ppst->gshift,
+	   ppst->param_u.fa.ktup, options_str1, ppst->param_u.fa.optwid);
+
+  if (ppst->param_u.fa.iniflag) strcat(pstring1[0]," init1");
+
+  if (pstring2 != NULL) {
+#ifdef OLD_FASTA_GAP
+    sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_gap-pen: %d %d\n; pg_ktup: %d\n; %s\n",
+#else
+     sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_open_ext: %d %d\n; pg_ktup: %d\n; %s\n",
+#endif
+	      pg_str,verstr,ppst->pam_name, ppst->pam_h,ppst->pam_l, 
+	      (ppst->ext_sq_set) ? "xS":"\0", ppst->gdelval,
+              ppst->ggapval,ppst->param_u.fa.ktup,options_str2);
+   }
+}
+
+void
+close_work (const unsigned char *aa0, int n0,
+	    struct pstruct *ppst,
+	    struct f_struct **f_arg)
+{
+  struct f_struct *f_str;
+
+  f_str = *f_arg;
+
+  if (f_str != NULL) {
+    if (f_str->cur != NULL) free(f_str->cur);
+#ifndef TFAST
+    f_str->aa0y--;
+    free(f_str->aa0y);
+    f_str->aa0x--;
+    free(f_str->aa0x);
+#else
+    f_str->aa1y--;
+    free(f_str->aa1y);
+    f_str->aa1x--;
+    free(f_str->aa1x);
+#endif
+    free(f_str->res);
+    free(f_str->waa1);
+    free(f_str->waa0);
+    free(f_str->diag);
+    free(f_str->link);
+    free(f_str->pamh2); 
+    free(f_str->pamh1);
+    free(f_str->harr);
+    free(f_str);
+    *f_arg = NULL;
+  }
+}
+
+/* do_fastx() always compares a (possibly translated) protein query
+   sequence to another protein sequence. 
+
+   #ifndef TFAST (e.g. FASTX),
+   then the hash table was built from the translated (amino-acid)
+   version of the query.
+
+   #ifdef TFAST, then aa0 is already a protein sequence
+
+   Args:
+   aa0, n0 query sequence
+   aa1, n1 library sequence
+   yaa translated DNA sequence (from either aa0 or aa1)
+   *ppst -> param struct
+   *f_str -> function structure set in init_work()
+   *rst -> scores (results struct)
+   *hoff -> offset of query in library sequence
+ */
+void do_fastx (const unsigned char *aa0, int n0,
+	       const unsigned char *aa1, int n1,
+	       unsigned char *yaa, 	/* translated 123123... */
+	       const struct pstruct *ppst, struct f_struct *f_str,
+	       struct rstruct *rst, int *hoff, int shuff_flg,
+	       struct score_count_s *s_info)
+{
+   int     nd;		/* diagonal array size */
+   int     lhval;
+   int     kfact;
+   int i;
+   int my_hoff;
+   int c_gap, opt_cut;
+   const unsigned char *aa_prot, *aa_trans_prot;
+   int n_aap, n_taap;
+   register struct dstruct *dptr;
+   struct savestr vmax[MAXSAV];	/* best matches saved for one sequence */
+   struct savestr *vptr[MAXSAV];
+   struct savestr *lowmax;
+   int lowscor;
+   register int tscor;
+
+#ifndef ALLOCN0
+   register struct dstruct *diagp;
+#else
+   register int dpos;
+   int     lposn0;
+#endif
+   struct dstruct *dpmax;
+   register int lpos;
+   int     tpos;
+   struct savestr *vmptr;
+   int     scor, tmp;
+   int     im, ib, nsave;
+   int ktup, kt1, ip, lkt, ktup_sq;
+   const int *hsq;
+   int n0_eff;
+#ifndef TFAST
+   int n0x31, n0x32;
+   n0x31 = (n0-2)/3;
+   n0x32 = n0x31+1+(n0-n0x31-1)/2;
+#else
+   const unsigned char *fs;
+   unsigned char *fd;
+   int n1x31, n1x32, itemp;
+   n1x31 = (n1-2)/3;
+   n1x32 = n1x31+1+(n1-n1x31-1)/2;
+#endif
+
+   if (ppst->ext_sq_set) {
+     ip = 1;
+     hsq = ppst->hsqx;
+   }
+   else {
+     ip = 0;
+     hsq = ppst->hsq;
+   }
+
+   ktup = ppst->param_u.fa.ktup;
+   kt1 = ktup-1;
+   if (ktup <= 3) {
+     ktup_sq = ktup*ktup;
+   }
+   else {
+     ktup_sq = ktup;
+   }
+   if (ktup == 1) ktup_sq *= 2;
+
+   if (n1 < ktup) {
+     rst->score[0] = rst->score[1] = rst->score[2] = 0;
+     return;
+   }
+
+   if (n0+n1+1 >= MAXDIAG) {
+     fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
+     rst->score[0] = rst->score[1] = rst->score[2] = -1;
+     return;
+   }
+
+   if (ppst->param_u.fa.use_E_thresholds) {
+     rst->valid_stat = 0;
+     n0_eff = n0;
+     if (n0 > 120) n0_eff = (n0+2)/3;
+     c_gap = ELK_to_s(ppst->param_u.fa.E_join*ktup_sq, n0_eff, n1, ppst->pLambda, ppst->pK, ppst->pH);
+     opt_cut = ELK_to_s(ppst->param_u.fa.E_band_opt*ktup_sq, n0_eff, n1, ppst->pLambda, ppst->pK, ppst->pH);
+   }
+   else {
+     c_gap = ppst->param_u.fa.cgap;
+     opt_cut = ppst->param_u.fa.optcut;
+     rst->valid_stat = 1;
+   }
+   /* if (shuff_flg) rst->valid_stat = 1; */
+
+   f_str->noff = n0 - 1;
+
+#ifdef ALLOCN0
+   nd = n0;
+#endif
+
+#ifndef ALLOCN0
+   nd = n0 + n1;
+#endif
+
+   dpmax = &f_str->diag[nd];
+   for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
+   {
+      dptr->stop = -1;
+      dptr->dmax = NULL;
+      dptr++->score = 0;
+   }
+
+   for (vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++)
+      vmptr->score = 0;
+   lowmax = vmax;
+   lowscor = 0;
+
+   /* start hashing */
+   lhval = 0;
+   lkt = kt1;
+   for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos<n1; lpos++) {
+     if (hsq[aa1[lpos]]>=NMAP) {
+       lhval = 0; lkt=lpos+ktup; continue;
+#ifdef ALLOCN0		/* reinitialize dptr */
+       dptr = &f_str->diag[lpos % nd];
+       dptr->stop = -1;
+       dptr->dmax = NULL;
+       dptr->score = 0;
+#endif
+     }
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+   }
+
+#ifndef ALLOCN0
+   diagp = &f_str->diag[f_str->noff + lkt];
+   for (; lpos < n1; lpos++, diagp++) {
+     /*     if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; continue;} */
+     if (hsq[aa1[lpos]]>=NMAP) {
+       lpos++ ; diagp++;
+       while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
+       if (lpos >= n1) break;
+       lhval = 0;
+     }
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+     for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+       if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
+#else
+   lposn0 = f_str->noff + lpos;
+   for (; lpos < n1; lpos++, lposn0++) {
+     if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;}
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+     for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+       dpos = lposn0 - tpos;
+       if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) {
+#endif
+	 tscor += ktup;
+	 if ((tscor -= lpos) <= 0) {	/* better to start over */
+	   scor = dptr->score;
+	   if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && lowscor < scor) {
+#ifdef ALLOCN0
+	     lowscor = savemax (dptr, dpos, vmax, &lowmax);
+#else
+	     lowscor = savemax (dptr, dptr - f_str->diag, vmax, &lowmax);
+#endif
+	   }
+	   if ((tscor += scor) >= kfact) {
+	     dptr->score = tscor;
+	     dptr->stop = lpos;
+	   }
+	   else {
+	     dptr->score = kfact;
+	     dptr->start = (dptr->stop = lpos) - kt1;
+	   }
+	 }				/* continue current run in diagonal */
+	 else {
+	   dptr->score += f_str->pamh1[aa0[tpos]];
+	   dptr->stop = lpos;
+	 }
+       }
+       else {
+	 dptr->score = f_str->pamh2[lhval];
+	 dptr->start = (dptr->stop = lpos) - kt1;
+       }
+     }				/* end tpos */
+
+#ifdef ALLOCN0
+      /* reinitialize diag structure */
+   loopl:
+     if ((dptr = &f_str->diag[lpos % nd])->score > lowscor) {
+       lowscor = savemax (dptr, lpos, vmax, &lowmax);
+     }
+     dptr->stop = -1;
+     dptr->dmax = NULL;
+     dptr->score = 0;
+#endif
+   }				/* end lpos */
+
+#ifdef ALLOCN0
+   for (tpos = 0, dpos = f_str->noff + n1 - 1; tpos < n0; tpos++, dpos--) {
+     if ((dptr = &f_str->diag[dpos % nd])->score > lowscor) {
+       lowscor = savemax (dptr, dpos, vmax, &lowmax);
+     }
+   }
+#else
+   for (dptr = f_str->diag; dptr < dpmax;) {
+     if (dptr->score > lowscor) {
+       lowscor = savemax (dptr, dptr - f_str->diag, vmax, &lowmax);
+     }
+     dptr->stop = -1;
+     dptr->dmax = NULL;
+     dptr++->score = 0;
+   }
+   f_str->ndo = nd;
+#endif
+
+/*
+        at this point all of the elements of aa1[lpos]
+        have been searched for elements of aa0[tpos]
+        with the results in diag[dpos]
+*/
+
+   for (nsave = 0, vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++)
+   {
+     /*
+       fprintf(stderr,"0: %4d-%4d  1: %4d-%4d  dp: %d score: %d\n",
+	       f_str->noff+vmptr->start-vmptr->dp,
+	       f_str->noff+vmptr->stop-vmptr->dp,
+	       vmptr->start,vmptr->stop,
+	       vmptr->dp,vmptr->score);
+     */
+      if (vmptr->score > 0) {
+	 vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[ip], f_str);
+	 vptr[nsave++] = vmptr;
+      }
+   }
+
+   if (nsave <= 0) {
+     rst->score[0] = rst->score[1] = rst->score[2] = 0;
+     return;
+   }
+       
+#ifndef TFAST
+   /* FASTX code here to modify the start, stop points for 
+      the three phases of the translated protein sequence
+      */
+   /*
+     fprintf(stderr,"n0x: %d; n0x31:%d; n0x32: %d\n",n0,n0x31,n0x32);
+     for (ib=0; ib<nsave; ib++) {
+       fprintf(stderr,"0: %4d-%4d  1: %4d-%4d  dp: %d score: %d\n",
+	       f_str->noff+vptr[ib]->start-vptr[ib]->dp,
+	       f_str->noff+vptr[ib]->stop-vptr[ib]->dp,
+	       vptr[ib]->start,vptr[ib]->stop,
+	       vptr[ib]->dp,vptr[ib]->score);
+     }
+
+     fprintf(stderr,"---\n");
+   */
+   for (ib=0; ib<nsave; ib++) {
+     if (f_str->noff-vptr[ib]->dp+vptr[ib]->start >= n0x32)
+       vptr[ib]->dp += n0x32;
+     if (f_str->noff-vptr[ib]->dp +vptr[ib]->start >= n0x31)
+       vptr[ib]->dp += n0x31;
+   }
+#else
+   /* TFASTX code here to modify the start, stop points for 
+      the three phases of the translated protein sequence
+      TFASTX modifies library start points, rather than 
+      query start points
+   */
+
+   for (ib=0; ib<nsave; ib++) {
+     if (vptr[ib]->start >= n1x32) {
+       vptr[ib]->start -= n1x32;
+       vptr[ib]->stop -= n1x32;
+       vptr[ib]->dp -= n1x32;
+     }
+     if (vptr[ib]->start >= n1x31) {
+       vptr[ib]->start -= n1x31;
+       vptr[ib]->stop -= n1x31;
+       vptr[ib]->dp -= n1x31;
+     }
+   }
+	    
+#endif /* TFASTX */
+
+   scor = sconn (vptr, nsave, c_gap, 
+		 ppst->param_u.fa.pgap, f_str);
+
+   for (vmptr=vptr[0],ib=1; ib<nsave; ib++)
+     if (vptr[ib]->score > vmptr->score) vmptr=vptr[ib];
+
+/*  kssort (vptr, nsave); */
+
+   rst->score[1] = vmptr->score;	/* best single score - init1*/
+   rst->score[0] = max (scor, vmptr->score);  /* initn */
+   rst->score[2] = rst->score[0];		/* initn */
+
+#ifndef TFAST /* FASTX */
+   *hoff = my_hoff=f_str->noff - vmptr->dp;
+#else
+   *hoff = my_hoff = vmptr->dp-f_str->noff;
+#endif
+
+   /*
+   if (n1 > 5000) {
+     fprintf(stderr," Long n1: %d\n",n1);
+   }
+   */
+
+   s_info->tot_scores++;
+   if (rst->score[0] >= c_gap) {s_info->s_cnt[0]++;}
+   if (ppst->param_u.fa.optflag) {
+#ifdef TFAST
+     if ( /* shuff_flg || */ rst->score[0] > opt_cut) {
+/* generate f_str->aa1y only if it is not there */
+       if ( !f_str->have_yaa ) {
+	 for (fs=aa1,itemp=0; itemp <3; itemp++,fs++) {
+	   for (fd= yaa+itemp; *fs!=EOSEQ; fd += 3, fs++) {*fd = *fs;}
+	   *fd=EOSEQ;
+	 }
+       }
+     }
+     aa_prot = aa0;
+     n_aap = n0;
+     aa_trans_prot= yaa;
+     n_taap = n1;
+#else 
+     aa_prot = aa1;
+     n_aap = n1;
+     aa_trans_prot= yaa;
+     n_taap = n0;
+#endif
+     if ( /* shuff_flg || */ rst->score[0] > opt_cut) {
+       s_info->s_cnt[2]++;
+       rst->valid_stat = 1;
+       rst->score[2] = lx_band(aa_prot,n_aap,aa_trans_prot,n_taap,
+			       ppst->pam2[ip],
+			       -ppst->gdelval,
+			       -ppst->ggapval,-ppst->gshift,
+			       my_hoff-ppst->param_u.fa.optwid/2,ppst->param_u.fa.optwid,
+			       f_str);
+     }
+   }
+}
+
+/* returns rst.score[0] - initn
+   	   rst.score[1] - init1
+	   rst.score[2] - opt
+*/
+
+void do_work (const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      int frame,
+	      const struct pstruct *ppst, struct f_struct *f_str,
+	      int qr_flg, int shuff_flg, struct rstruct *rst,
+	      struct score_count_s *s_info)
+{
+  int hoff;
+  int last_n1, itx, itt, n10, i;
+
+#ifdef TFAST
+  unsigned char *aa1x;
+  /* aa0 has a protein sequence */
+  /* aa1 has a raw DNA sequence */
+
+  itt = frame;
+  last_n1 = 0;
+  aa1x = f_str->aa1x;
+  for (itx= itt*3; itx< itt*3+3; itx++) {
+    n10  = saatran(aa1,&aa1x[last_n1],n1,itx);
+    /*
+    fprintf(stderr," itt %d itx: %d\n",itt,itx);
+    for (i=0; i<n10; i++) {
+      fprintf(stderr,"%c",aa[f_str->aa1x[last_n1+i]]);
+      if ((i%60)==59) fprintf(stderr,"\n");
+    }
+    fprintf(stderr,"\n");
+    */
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+  f_str->have_yaa = 0;
+#endif
+
+  rst->score[0] = rst->score[1] = rst->score[2] = 0;
+  rst->escore = 1.0;
+  rst->segnum = rst->seglen = 1;
+
+#ifndef TFAST
+  do_fastx (f_str->aa0x, n0, aa1, n1, f_str->aa0y, ppst, f_str, rst, &hoff, shuff_flg, s_info);
+#else /* tfastx */
+  do_fastx (aa0, n0, f_str->aa1x, n10, f_str->aa1y, ppst, f_str, rst, &hoff, shuff_flg, s_info);
+#endif
+
+  rst->comp = rst->H = -1.0;
+}
+
+void do_opt (const unsigned char *aa0, int n0,
+	     const unsigned char *aa1, int n1,
+	     int frame,
+	     struct pstruct *ppst,
+	     struct f_struct *f_str,
+	     struct rstruct *rst)
+{
+  int optflag, tscore, hoff;
+  struct score_count_s s_info;
+
+#ifdef TFAST
+  int last_n1, itx, itt, n10, i;
+  unsigned char *xaa;
+
+  /* aa0 has a protein sequence */
+  /* aa1 has a raw DNA sequence */
+
+  itt = frame;
+  last_n1 = 0;
+  xaa = f_str->aa1x;
+  for (itx= itt*3; itx< itt*3+3; itx++) {
+    n10  = saatran(aa1,&xaa[last_n1],n1,itx);
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+  f_str->have_yaa = 0;
+#endif
+
+  optflag = ppst->param_u.fa.optflag;
+  ppst->param_u.fa.optflag = 1;
+
+#ifndef TFAST
+  do_fastx (f_str->aa0x, n0, aa1, n1, f_str->aa0y, ppst, f_str, rst, &hoff, 0, &s_info);
+#else	/* TFASTX */
+  do_fastx (aa0, n0, xaa, n10, f_str->aa1y, ppst, f_str, rst, &hoff, 0, &s_info);
+#endif
+
+  ppst->param_u.fa.optflag = optflag;
+}
+
+int
+savemax (struct dstruct *dptr, int dpos,
+	 struct savestr *vmax, struct savestr **lowmax)
+{
+   struct savestr *vmptr;
+   int i;
+
+/* check to see if this is the continuation of a run that is already saved */
+
+   if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
+	 vmptr->start == dptr->start) {
+      vmptr->stop = dptr->stop;
+      if ((i = dptr->score) <= vmptr->score) return (*lowmax)->score;
+      vmptr->score = i;
+      if (vmptr != (*lowmax)) return (*lowmax)->score;
+   }
+   else {
+     i = (*lowmax)->score = dptr->score;
+     (*lowmax)->dp = dpos;
+     (*lowmax)->start = dptr->start;
+     (*lowmax)->stop = dptr->stop;
+     dptr->dmax = (*lowmax);
+   }
+
+   for (vmptr = vmax; vmptr < vmax+MAXSAV; vmptr++) {
+     if (vmptr->score < i) {
+       i = vmptr->score;
+       *lowmax = vmptr;
+     }
+   }
+   return i;
+}
+
+int spam (const unsigned char *aa0, const unsigned char *aa1,
+	  struct savestr *dmax, int **pam2,
+	  struct f_struct *f_str)
+{
+   int     lpos;
+   int     tot, mtot;
+   struct {
+     int     start, stop, score;
+   } curv, maxv;
+   const unsigned char *aa0p, *aa1p;
+
+   aa1p = &aa1[lpos = dmax->start];
+   aa0p = &aa0[lpos - dmax->dp + f_str->noff];
+   curv.start = lpos;
+
+   tot = curv.score = maxv.score = 0;
+   for (; lpos <= dmax->stop; lpos++) {
+     tot += pam2[*aa0p++][*aa1p++];
+     if (tot > curv.score) {
+       curv.stop = lpos;
+       curv.score = tot;
+      }
+      else if (tot < 0) {
+	if (curv.score > maxv.score) {
+	  maxv.start = curv.start;
+	  maxv.stop = curv.stop;
+	  maxv.score = curv.score;
+	}
+	tot = curv.score = 0;
+	curv.start = lpos+1;
+      }
+   }
+
+   if (curv.score > maxv.score) {
+     maxv.start = curv.start;
+     maxv.stop = curv.stop;
+     maxv.score = curv.score;
+   }
+
+/*	if (maxv.start != dmax->start || maxv.stop != dmax->stop)
+		printf(" new region: %3d %3d %3d %3d\n",maxv.start,
+			dmax->start,maxv.stop,dmax->stop);
+*/
+   dmax->start = maxv.start;
+   dmax->stop = maxv.stop;
+
+   return maxv.score;
+}
+
+#define XFACT 10
+
+int sconn (struct savestr **v, int n, 
+       int cgap, int pgap, struct f_struct *f_str)
+{
+  int     i, si;
+  struct slink {
+    int     score;
+    struct savestr *vp;
+    struct slink *next;
+  }      *start, *sl, *sj, *so, sarr[MAXSAV];
+  int     lstart, tstart, plstop, ptstop;
+
+/*	sort the score left to right in lib pos */
+
+  kpsort (v, n);
+
+  start = NULL;
+
+/*	for the remaining runs, see if they fit */
+
+   for (i = 0, si = 0; i < n; i++)
+   {
+
+/*	if the score is less than the gap penalty, it never helps */
+      if (v[i]->score < cgap)
+	 continue;
+      lstart = v[i]->start;
+      tstart = lstart - v[i]->dp + f_str->noff;
+
+/*	put the run in the group */
+      sarr[si].vp = v[i];
+      sarr[si].score = v[i]->score;
+      sarr[si].next = NULL;
+
+/* 	if it fits, then increase the score */
+      for (sl = start; sl != NULL; sl = sl->next)
+      {
+	 plstop = sl->vp->stop;
+	 ptstop = plstop - sl->vp->dp + f_str->noff;
+	 if (plstop < lstart+XFACT && ptstop < tstart+XFACT) {
+	   sarr[si].score = sl->score + v[i]->score + pgap;
+	   break;
+	 }
+      }
+
+/*	now recalculate where the score fits */
+      if (start == NULL)
+	 start = &sarr[si];
+      else
+	 for (sj = start, so = NULL; sj != NULL; sj = sj->next)
+	 {
+	    if (sarr[si].score > sj->score)
+	    {
+	       sarr[si].next = sj;
+	       if (so != NULL)
+		  so->next = &sarr[si];
+	       else
+		  start = &sarr[si];
+	       break;
+	    }
+	    so = sj;
+	 }
+      si++;
+   }
+
+   if (start != NULL)
+      return (start->score);
+   else
+      return (0);
+}
+
+void
+kssort (v, n)
+struct savestr *v[];
+int     n;
+{
+   int     gap, i, j;
+   struct savestr *tmp;
+
+   for (gap = n / 2; gap > 0; gap /= 2)
+      for (i = gap; i < n; i++)
+	 for (j = i - gap; j >= 0; j -= gap)
+	 {
+	    if (v[j]->score >= v[j + gap]->score)
+	       break;
+	    tmp = v[j];
+	    v[j] = v[j + gap];
+	    v[j + gap] = tmp;
+	 }
+}
+
+void
+kpsort (struct savestr **v, int n) {
+  int gap, i, j, k;
+  int incs[4] = { 21, 7, 3, 1 };
+  struct savestr *tmp;
+  int v_start;
+
+  for ( k = 0; k < 4; k++) {
+    gap = incs[k];
+    for (i = gap; i < n; i++) {
+      tmp = v[i];
+      j = i;
+      v_start = v[i]->start;
+      while (j >= gap && v[j - gap]->start > v_start) {
+	v[j] = v[j - gap];
+	j -= gap;
+      }
+      v[j] = tmp;
+    }
+  }
+}
+
+static void
+init_row(struct sx_s *row, int sp) {
+  int i;
+  for (i = 0; i < sp; i++) {
+      row[i].C1 = row[i].I1 = 0;
+      row[i].C2 = row[i].I2 = 0;
+      row[i].C3 = row[i].I3 = 0;
+      row[i].flag = 0;
+  }
+}
+
+int
+lx_band(const unsigned char *prot_seq,  /* array with protein sequence numbers*/
+	int len_prot,    /* length of prot. seq */
+	const unsigned char *dna_prot_seq, /* translated DNA sequence numbers*/
+	int len_dna_prot,   /* length trans. seq. */
+	int **pam_matrix,   /* scoring matrix */
+	int gopen, int gext, /* gap open, gap extend penalties */
+	int gshift,         /* frame-shift penalty */
+	int start_diag,     /* start diagonal of band */
+	int width,         /* width for band alignment */
+	struct f_struct *f_str)
+{
+  void *ckalloc();
+  int i, j, bd, bd1, x1, sp, p1=0, p2=0, end_prot;
+  int sc, del, best = 0, cd,ci, e1, e2, e3, cd1, cd2, cd3, f, gg;
+  register int *wt;
+  const unsigned char *dp;
+  register struct sx_s *ap, *aq;
+
+  sp = width+7;	
+  gg = gopen+gext;
+  /*  sp = sp/3; */
+  if (f_str->cur == NULL) {
+    f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
+    f_str->cur_sp_size = sp;
+  }
+  else if (f_str->cur_sp_size != sp) {
+    free(f_str->cur);
+    f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
+    f_str->cur_sp_size = sp;
+  }
+
+  init_row(f_str->cur, sp);
+
+  /*
+  if (start_diag %3 !=0) start_diag = start_diag/3-1;
+  else start_diag = start_diag/3;
+  */
+
+  /*
+  if (width % 3 != 0) width = width/3+1;
+  else width = width /3;
+  */
+
+  /* currently, this code assumes that the DNA sequence is longer than the
+     protein sequence. This is not always true.  len_prot in the loop below
+     should be decreased to the projection of the DNA on the protein */
+
+  x1 = start_diag; 		/* x1 = lower bound of DNA */
+
+  
+  end_prot = max(0,-width-start_diag) + (len_dna_prot+5)/3 + width;
+  end_prot = min(end_prot,len_prot);
+
+  /* i counts through protein sequence, x1 through DNAp */
+
+  for (i = max(0, -width-start_diag), x1+=i; i < end_prot; i++, x1++) {
+      bd = min(x1+width, len_dna_prot/3);	/* upper bound of band */
+      bd1 = max(0,x1);	                /* lower bound of band */
+      wt = pam_matrix[prot_seq[i]];
+      del = 1-x1;   /*adjustment*/
+      bd += del; 
+      bd1 +=del;
+
+      ap = &f_str->cur[bd1];
+      aq = ap+1;
+      e1 = f_str->cur[bd1-1].C3;
+      e2 = ap->C1;
+      cd1 = cd2= cd3= 0;
+
+      for (dp = &dna_prot_seq[(bd1-del)*3]; ap < &f_str->cur[bd]; ap++) {
+	  sc = max(max(e1, (e3=ap->C2))-gshift, e2)+wt[*dp++];
+	  if (cd1 > sc) sc = cd1;
+	  cd1 -= gext;
+	  if ((ci = aq->I1) > 0) {
+	      if (sc < ci) { ap->C1 = ci; ap->I1 = ci-gext;}
+	      else {
+		  ap->C1 = sc;
+		  sc -= gg;
+		  if (sc > 0) {
+		      if (sc > best) best =sc;
+		      if (cd1 < sc) cd1 = sc;
+		      ap->I1 = max(ci-gext, sc);
+		  } else ap->I1 = ci-gext;
+	      }
+	  } else {
+	      if (sc <= 0) {
+		  ap->I1 = ap->C1 = 0;
+	      } else {
+		  ap->C1 = sc; sc-=gg;
+		  if (sc >0) {
+		      if (sc > best) best =sc;
+		      if (cd1 < sc) cd1 = sc;
+		      ap->I1 = sc;
+		  } else ap->I1 = 0;
+	      }
+	  }
+	  sc = max(max(e2, (e1=ap->C3))-gshift, e3)+wt[*dp++];
+	  if (cd2 > sc) sc = cd2;
+	  cd2 -= gext;
+	  if ((ci = aq->I2) > 0) {
+	      if (sc < ci) { ap->C2 = ci; ap->I2 = ci-gext;}
+	      else {
+		  ap->C2 = sc;
+		  sc -= gg;
+		  if (sc > 0) {
+		      if (sc > best) best =sc;
+		      if (cd2 < sc) cd2 = sc;
+		      ap->I2 = max(ci-gext, sc);
+		  }
+	      }
+	  } else {
+	      if (sc <= 0) {
+		  ap->I2 = ap->C2 = 0;
+	      } else {
+		  ap->C2 = sc; sc-=gg;
+		  if (sc >0) {
+		      if (sc > best) best =sc;
+		      if (cd2 < sc) cd2 = sc;
+		      ap->I2 = sc;
+		  } else ap->I2 = 0;
+	      }
+	  }
+	  sc = max(max(e3, (e2=aq->C1))-gshift, e1)+wt[*dp++];
+	  if (cd3 > sc) sc = cd3;
+	  cd3 -= gext;
+	  if ((ci = aq++->I3) > 0) {
+	      if (sc < ci) { ap->C3 = ci; ap->I3 = ci-gext;}
+	      else {
+		  ap->C3 = sc;
+		  sc -= gg;
+		  if (sc > 0) {
+		      if (sc > best) best =sc;
+		      if (cd3 < sc) cd3 = sc;
+		      ap->I3 = max(ci-gext, sc);
+		  }
+	      }
+	  } else {
+	      if (sc <= 0) {
+		  ap->I3 = ap->C3 = 0;
+	      } else {
+		  ap->C3 = sc; sc-=gg;
+		  if (sc >0) {
+		      if (sc > best) best =sc;
+		      if (cd3 < sc) cd3 = sc;
+		      ap->I3 = sc;
+		  } else ap->I3 = 0;
+	      }
+	  }
+      }
+  }
+  /*  printf("The best score is %d\n", best); */
+  return best+gopen+gext;
+}
+
+/* ckalloc - allocate space; check for success */
+void *ckalloc(size_t amount)
+{
+  void *p;
+
+  if ((p = (void *)malloc( (size_t)amount)) == NULL)
+    w_abort("Ran out of memory.","");
+  return(p);
+}
+
+/* calculate the 100% identical score */
+int
+shscore(unsigned char *aa0, int n0, int **pam2)
+{
+  int i, sum;
+  for (i=0,sum=0; i<n0; i++)
+    sum += pam2[aa0[i]][aa0[i]];
+  return sum;
+}
+
+#define WIDTH 60
+
+/* code above is to convert sequence into numbers */
+
+typedef struct mat *match_ptr;
+
+typedef struct mat {
+	int i, j, l;
+	match_ptr next;
+} match_node;
+
+typedef struct {
+	int i,j;
+} state;
+
+typedef state *state_ptr;
+
+typedef struct st_s { int C, I, D;} *st_ptr;
+
+/* static st_ptr up=NULL, down, tp; */
+/* static int *st_up; */
+/* static int gop, gext, shift; */
+
+void *ckalloc(size_t);
+static match_ptr small_global(), global();
+static int local_align(), find_best();
+static void init_row2(),  init_ROW();
+
+int
+pro_dna(const unsigned char *prot_seq,	/* array with prot. seq. numbers*/
+	int len_prot,	 		/* length of prot. seq */
+	const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/
+	int len_dna_prot,		/* length trans. seq. */
+	int **pam_matrix,		/* scoring matrix */
+	int gopen, int gex,		/* gap open, gap extend penalties */
+	int gshift,        		/* frame-shift penalty */
+	struct smgl_str *smgl_sp,
+	int max_res,
+	struct a_res_str *a_res)	/* alignment info */
+{
+  match_ptr align, ap, aq;
+  int x, y, ex, ey, i, score;
+  int *alignment;
+  st_ptr up, down, tp;
+
+  /* these globals removed */
+  /*   gext = gex; gop = gopen; shift = gshift; */
+
+  /* for fastx (but not tfastx), these could be moved into init_work(),
+     and done only once */
+
+  up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+  down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+  tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+
+  /*local alignment find the best local alignment x (prot) and y (DNA)
+    is the starting position of the best local alignment
+    and ex (prot) ey (DNA) is the ending position */
+  score= local_align(&x, &y, &ex, &ey, pam_matrix,
+		     gopen, gex, gshift,
+		     dna_prot_seq, len_dna_prot,
+		     prot_seq, len_prot, up, down);
+
+  /* this is very strange, since local_align initialized up, down */
+  up += 3; down += 3; tp += 3;
+
+  /* x, y - start in prot, dna_prot */
+  a_res->min0 = x;	/* prot */
+  a_res->max0 = ex;	/* prot */
+
+  a_res->min1 = y;	/* DNA-prot */
+  a_res->max1 = ey;	/* DNA-prot */
+
+  align = global(x, y, ex, ey, pam_matrix, gopen, gex, gshift, 
+		 dna_prot_seq, prot_seq, 0, 0, &up, &down, &tp,
+		 smgl_sp);
+
+  alignment = a_res->res;
+
+  /* from earlier version */
+  /* alignment[0] = x; */ /* start of alignment in prot */
+  /* alignment[1] = y; */ /* start of alignment in DNA */
+
+  for (ap = align, i= 0; ap; i++) {
+    if (i < max_res) {alignment[i] = ap->l;}
+    aq = ap->next; free(ap); ap = aq;
+  }
+
+  if (i >= max_res) {
+    fprintf(stderr," alignment truncated: %d/%d\n", max_res,i);
+  }
+
+  up = &up[-3]; down = &down[-3]; tp = &tp[-3];
+  free(up); free(tp); free(down);
+  /* free(st_up); */ /*  moved into local align */
+
+  a_res->nres = i;	/* i has the length of the alignment */
+  return score;
+}
+
+static void
+swap(void **a, void **b) {
+  void *t;
+
+  t = *a;
+  *a = *b;
+  *b = t;
+}
+
+/*
+   local alignment find the best local alignment x and y
+   is the starting position of the best local alignment
+   and ex ey is the ending position 
+*/
+static int
+local_align(int *x, int *y, int *ex, int *ey,
+	    int **wgts, int gop, int gext, int shift,
+	    const unsigned char *dnap, int ld,
+	    const unsigned char *pro,  int lp,
+	    st_ptr up, st_ptr down) {
+
+  int i, j,  score, x1,x2,x3,x4, e1, e2 = 0, e3,
+    sc, del,  e, best = 0, *wt, cd, ci;
+  state_ptr cur_st, last_st, cur_i_st;
+  st_ptr cur, last;
+  const unsigned char *dp;
+  int *st_up, *cur_d_st;
+
+/*      
+   Array rowiC store the best scores of alignment ending at a position
+   Arrays rowiD, and rowiI store the best scores of alignment ending
+                 at a position with a deletion or insrtion
+   Arrays sti stores the starting position of the best alignment whose
+              score stored in the corresponding row array.
+   The program stores two rows to complete the computation, same is
+        for the global alignment routine.
+*/
+
+  /* for fastx (but not tfastx), this could be moved into init_work(),
+     and done only once */
+  st_up = (int *) ckalloc(sizeof(int)*(ld+10));
+  init_row2(st_up, ld+5);
+
+  ld += 2;
+  init_ROW(up, ld+1);	/* set to zero */
+  init_ROW(down, ld+1);	/* set to zero */
+
+
+  cur = up+1;
+  last = down+1; 
+
+  /* for fastx (but not tfastx), these could be moved into init_work(),
+     and done only once */
+  cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+  last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+  cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+
+  cur_d_st = st_up; 
+
+  dp = dnap-2;
+  for (i = 0; i < lp; i++) {
+    wt = &wgts[pro[i]][0];
+    for (j = 0; j < 2; j++) {
+      cur_st[j].i = i+1;
+      cur_st[j].j = j+1;
+    }
+    for (j = 2; j < ld; j++) {
+      score = wt[dp[j]];
+      del = -1;
+      if (j >= 3) {
+	sc = -score;
+	e3 = e2-shift; e2 = last[j-3].C;
+	e1 = last[j-2].C-shift; 
+	if (e1 > sc) {sc = e1; del = 2;}
+	if (e2 > sc) {sc = e2; del = 3;}
+	if (e3 > sc) {sc = e3; del = 4;} 
+      } else {
+	sc = e2  = 0;
+	if (sc < -score) sc=-score;
+	else del = 3;
+      }
+      sc += score;
+      if (sc < (ci=last[j].I)) {
+	sc = ci; del = 0;
+      }
+      if (sc < (cd=cur[j].D)) {
+	sc = cd; del = 5;
+      }
+      cur[j].C = sc;
+      e = sc  - gop;
+      if (e > cd) {
+	cur[j+3].D = e-gext;
+	cur_d_st[j+3] = 3;
+      } else {
+	cur[j+3].D = cd-gext;
+	cur_d_st[j+3] = cur_d_st[j]+3;
+      }
+      switch(del) {
+      case 5:
+	e1 = cur_d_st[j];
+	cur_st[j].i = cur_st[j-e1].i;
+	cur_st[j].j = cur_st[j-e1].j;
+	break;
+      case 0:
+	cur_st[j].i = cur_i_st[j].i;
+	cur_st[j].j = cur_i_st[j].j;
+	break;
+      case 2:
+      case 3:
+      case 4:
+	if (i) {
+	  if (j-del >= 0) {
+	    cur_st[j].i = last_st[j-del].i;
+	    cur_st[j].j = last_st[j-del].j;
+	  } else {
+	    cur_st[j].i = i;
+	    cur_st[j].j = 0;
+	  }
+	} else {
+	  cur_st[j].i = 0;
+	  cur_st[j].j = max(0, j-del+1);
+	}
+	break;
+      case -1:
+	cur_st[j].i = i+1;
+	cur_st[j].j = j+1;
+	break;
+      }
+      if (e > ci) {
+	cur[j].I  = e -gext;
+	cur_i_st[j].i = cur_st[j].i;
+	cur_i_st[j].j = cur_st[j].j;
+      } else {
+	cur[j].I  = ci- gext;
+      }
+      if (sc > best) {
+	x1 = cur_st[j].i;
+	x2 = cur_st[j].j;
+	best =sc;
+	x3 = i;
+	x4 = j;
+      }
+    }
+    swap((void **)&last, (void **)&cur);
+    swap((void **)&cur_st, (void **)&last_st);
+  }
+  /*	printf("The best score is %d\n", best); */
+  *x = x1; *y = x2; *ex = x3; *ey = x4;
+  free(cur_st); free(last_st); free(cur_i_st); 
+  free(st_up);
+  return best;
+}
+
+/* 
+   Both global_up and global_down do linear space score only global 
+   alignments on subsequence pro[x]...pro[ex], and dna[y]...dna[ey].
+   global_up does the algorithm upwards, from row x towards row y.
+   global_down does the algorithm downwards, from row y towards x.
+*/
+
+static void
+global_up(st_ptr *row1, st_ptr *row2,
+	  int x, int y, int ex, int ey, 
+	  int **wgts, int gop, int gext, int shift,
+	  unsigned char *dnap,
+	  unsigned char *pro,
+	  int N) {
+  int i, j, k, sc, e, e1, e2, e3, t, ci, cd, score, *wt;
+  st_ptr cur, last;
+
+  cur = *row1; last = *row2;
+
+  sc = -gop-gext;
+
+  for (j = 1; j <= ey-y+1; j++) {
+    if (j % 3 == 0) {last[j].C = sc; sc -= gext; last[j].I = sc-gop;}
+    else { last[j].I = last[j].C = -10000;}
+    cur[j].I = -10000;
+  }  
+
+  last[0].C = 0; cur[0].D = cur[1].D = cur[2].D = -10000;
+  last[0].D = last[1].D = last[2].D = -10000;
+
+  if (N) last[0].I = -gext;
+  else last[0].I = -gop-gext;
+
+  for (i = 1; i <= ex-x+1; i++) {
+    wt = &wgts[pro[i+x-1]][0]; e2 = last[0].C; e1 = -10000;
+    for (j = 0; j <= ey-y+1; j++) {
+      t = j+y;
+      sc = -10000; 
+      if (t < 3) score = -10000;
+      else score = wt[dnap[t-3]]; 
+      if (j < 4) {
+	if (j == 3) sc = e2;
+	else if (j == 2) sc = e2-shift;
+      } 
+      else {
+	e3 = e2; e2 = e1;
+	e1 = last[j-2].C;
+	sc = max(max(e1, e3)-shift, e2);
+      }
+      sc += score;
+      sc = max(sc, max(ci=last[j].I, cd = cur[j].D));
+      cur[j].C = sc;
+      cur[j+3].D = max(cd, sc-gop)-gext;
+      cur[j].I = max(ci, sc-gop)-gext;
+    }
+    swap((void **)&last, (void **)&cur);
+  }
+  for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
+  if (*row1 != last) swap((void **)row1, (void **)row2);
+}
+
+static void
+global_down(st_ptr *row1, st_ptr *row2,
+	    int x, int y, int ex, int ey,
+	    int **wgts, int gop, int gext, int shift,
+	    unsigned char *dnap, unsigned char *pro,
+	    int N) {
+  int i, j, k, sc, del, *tmp, e,  t, e1,e2,e3, ci,cd, s1, s2, s3, *wt;
+  st_ptr cur, last;
+
+  cur = (*row1); last = *row2;
+
+  sc = -gop-gext;
+
+  for (j = ey-y; j >= 0; j--) {
+    if ((ey-y+1-j) % 3) {last[j].C = sc; sc-=gext; last[j].I = sc-gop;}
+    else  last[j].I =  last[j].C = -10000;
+  } 
+
+  last[ey-y+1].C = 0;
+  cur[ey-y+1].D = cur[ey-y].D = cur[ey-y-1].D = -10000;
+  last[ey-y+1].D = last[ey-y].D = last[ey-y-1].D = -10000;
+
+  if (N) last[ey-y+1].I = -gext;
+  else last[ey-y+1].I = -gop-gext;
+
+  for (i = ex-x; i >= 0; i--) {
+    wt = &wgts[pro[i+x]][0]; e2 = last[ey-y+1].C; 
+    e1 = s2 = s3 = -10000; 
+    for (j = ey-y+1; j >= 0; j--) {
+      t = j+y;
+      s1 = wt[dnap[t-1]];
+      sc = -10000;
+      if (t+3 > ey) {
+	if (t+2==ey) sc = e2+s2;
+	else if (t+1==ey) sc = e2-shift+s1;
+      } else {
+	e3 = e2; e2 = e1;
+	e1 = last[j+2].C;
+	sc = max(max(e1+s1, e3+s3)-shift, e2+s2);
+      }
+      if (sc < (cd= cur[j].D)) {
+	sc = cd; 
+	cur[j-3].D = cd-gext;
+      } else cur[j-3].D =max(cd, sc-gop)-gext;
+      if (sc < (ci= last[j].I)) {
+	sc = ci; del = 0;
+	cur[j].I = ci - gext;
+      } else cur[j].I = max(sc-gop,ci)-gext;
+      cur[j].C = sc;
+      s3 = s2; s2 = s1;
+    }
+    swap((void **)&last, (void **)&cur);
+  }
+  for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
+  if (*row1 != last) swap((void **)row1, (void **)row2);
+}
+
+static void
+init_row2(int *row, int ld) {
+  int i;
+  for (i = 0; i < ld; i++) row[i] = 0;
+}
+
+static void
+init_ROW(st_ptr row, int ld) {
+  int i;
+  for (i = 0; i < ld; i++) row[i].I = row[i].D = row[i].C = 0;
+}
+
+static match_ptr
+combine(match_ptr x1, match_ptr x2, int st) {
+  match_ptr x;
+
+  if (x1 == NULL) return x2;
+  for (x = x1; x->next; x = x->next);
+  x->next = x2;
+  if (st) {
+    for (x = x2; x; x = x->next) {
+      x->j++;
+      if (x->l == 3 || x->l == 4) break;
+    }
+    x->l--;
+  }
+  return x1;
+}
+
+/*
+   global use the two upwards and downwards score only linear
+   space global alignment subroutine to recursively build the
+   alignment.
+*/
+
+match_ptr
+global(int x, int y, int ex, int ey, 
+       int **wgts, int gop, int gext, int shift,
+       unsigned char *dnap, 
+       unsigned char *pro,
+       int N1, int N2,
+       st_ptr *up_stp, st_ptr *dn_stp, st_ptr *tp_stp,
+       struct smgl_str *smgl_sp
+       )
+{
+  int m;
+  int m1, m2;
+  match_ptr x1, x2, mm1, mm2;
+  /*printf("%d %d %d %d\n", x,y, ex, ey);*/
+  /*
+    if the space required is limited, we can do a quadratic space
+    algorithm to find the alignment.
+  */
+  if (ex <= x) {
+    mm1  = NULL; mm2= NULL;
+    for (m = y+3; m <= ey; m+=3) {
+      x1 = (match_ptr) ckalloc(sizeof(match_node));
+      x1->l = 5; x1->next = mm1; 
+      if (mm1== NULL) mm2 = x1;
+      mm1 = x1;
+    }
+    if (ex == x) {
+      if ((ey-y) % 3 != 0) {
+	x1 = (match_ptr) ckalloc(sizeof(match_node));
+	x1->l = ((ey-y) % 3) +1; x1->next = NULL;
+	if (mm2) mm2->next = x1;
+	else mm1 = x1;
+      } else {
+	if (mm2) mm2->l = 4;
+      }
+    }
+    return mm1;
+  }
+  if (ey <= y) {
+    mm1  = NULL;
+    for (m = x; m <= ex; m++) {
+      x1 = (match_ptr) ckalloc(sizeof(match_node));
+      x1->l = 0; x1->next = mm1; mm1 = x1;
+    }
+    return mm1;
+  }
+  if (ex -x < SGW1-1 && ey-y < SGW2-1) 
+    return small_global(x,y,ex,ey,
+			wgts, gop, gext, shift,
+			dnap, pro, N1, N2, smgl_sp);
+  m = (x+ex)/2;
+  /*     
+	 Do the score only global alignment from row x to row m, m is
+	 the middle row of x and ex. Store the information of row m in
+	 upC, upD, and upI.
+  */
+  global_up(up_stp, tp_stp, x, y, m, ey, 
+	    wgts, gop, gext, shift,
+	    dnap, pro, N1);
+
+  /* 
+     Do the score only global alignment downwards from row ex
+     to row m+1, store information of row m+1 in downC downI and downD
+  */
+  global_down(dn_stp, tp_stp,  m+1, y, ex, ey, 
+	      wgts, gop, gext, shift,
+	      dnap, pro, N2);
+
+  /*
+    Use these information of row m and m+1, to find the crossing
+    point of the best alignment with the middle row. The crossing
+    point is given by m1 and m2. Then we recursively call global
+    itself to compute alignments in two smaller regions found by
+    the crossing point and combine the two alignments to form a
+    whole alignment. Return that alignment.
+  */
+  if (find_best(*up_stp, *dn_stp, &m1, &m2, ey-y+1, y, gop)) {
+    x1 = global(x, y, m, m1, wgts, gop, gext, shift, dnap, pro, N1, 0,
+		up_stp, dn_stp, tp_stp, smgl_sp);
+    x2 = global(m+1, m2, ex, ey, wgts, gop, gext, shift, dnap, pro, 0, N2,
+		up_stp, dn_stp, tp_stp, smgl_sp);
+    if (m1 == m2) x1 = combine(x1,x2,1);
+    else x1 = combine(x1, x2,0);
+  } else {
+    x1 = global(x, y, m-1, m1, wgts, gop, gext, shift,  dnap, pro, N1, 1,
+		up_stp, dn_stp, tp_stp, smgl_sp);
+    x2 = global(m+2, m2, ex, ey, wgts, gop, gext, shift, dnap, pro, 1, N2,
+		up_stp, dn_stp, tp_stp, smgl_sp);
+    mm1 = (match_ptr) ckalloc(sizeof(match_node));
+    mm1->i = m; mm1->l = 0; mm1->j = m1;
+    mm2 = (match_ptr) ckalloc(sizeof(match_node));
+    mm2->i = m+1; mm2->l = 0; mm2->j = m1;
+    mm1->next = mm2; mm2->next = x2;
+    x1 = combine(x1, mm1, 0);
+  }
+  return x1;
+}
+
+static int
+find_best(st_ptr up, st_ptr down,
+	  int *m1, int *m2,
+	  int ld, int y, int gop) {
+  int i, best = -100000, j = 0, s1, s2, s3, s4, st;
+  up++;
+  for (i = 1; i < ld; i++) {
+    s2 = up[i-1].C + down[i].C;
+    s4 = up[i-1].I + down[i].I + gop;
+    if (best < s2) {
+      best = s2; j = i; st = 1;
+    }
+    if (best < s4) {
+      best = s4; j = i; st = 0;
+    }
+  }
+  *m1 = j-1+y;
+  *m2 = j+y;
+  /*printf("find best score =%d\n", best);*/
+  return st;
+} 
+
+/*
+   An alignment is represented as a linked list whose element
+   is of type match_node. Each element represent an edge in the
+   path of the alignment graph. The fields of match_node are
+   l ---  gives the type of the edge.
+   i, j --- give the end position.
+*/
+
+static match_ptr
+small_global(int x, int y, int ex, int ey,
+	     int **wgts, int gop, int gext, int shift,
+	     unsigned char *dnap, unsigned char *pro,
+	     int N1, int N2, struct smgl_str *smgl_sp) {
+
+  /* int C[SGW1+1][SGW2+1], st[SGW1+1][SGW2+1], D[SGW2+7], I[SGW2+1]; */
+
+  int i, j, e, sc, score, del, k, t, *wt, ci, cd;
+  int *cI, *cD, *cC, *lC, *cst, e2, e3, e4;
+  match_ptr mp, first;
+
+  /*printf("small_global %d %d %d %d\n", x, y, ex, ey);*/
+  sc = -gop-gext; smgl_sp->C[0][0] = 0;
+
+  cI = smgl_sp->I;
+  if (N1) cI[0] = -gext; else cI[0] = sc;
+  for (j = 1; j <= ey-y+1; j++) {
+    if (j % 3== 0) {
+      smgl_sp->C[0][j] = sc;
+      sc -= gext;
+      cI[j] = sc-gop;
+    } 
+    else {cI[j] = smgl_sp->C[0][j] = -10000;}
+    smgl_sp->st[0][j] = 5;
+  }
+
+  lC = &smgl_sp->C[0][0];
+  cD = smgl_sp->D; cD[0] = cD[1] = cD[2] = -10000;
+
+  for (i = 1; i <= ex-x+1; i++) {
+    cC = &smgl_sp->C[i][0];	
+    wt = &wgts[pro[i+x-1]][0]; cst = &smgl_sp->st[i][0];
+    for (j = 0; j <=ey-y+1; j++) {
+      sc = -10000; del = 0;
+      ci = cI[j];
+      cd= cD[j];
+      t = j+y;
+      if (t < 3) score = -10000;
+      else score = wt[dnap[t-3]];
+      if (j >= 4) {
+	e2 = lC[j-2]-shift; sc = lC[j-3]; e4 = lC[j-4]-shift;
+	del = 3;
+	if (e2 > sc) { sc = e2; del = 2;}
+	if (e4 >= sc) { sc = e4; del = 4;}
+      } else {
+	if (j ==3) {sc= lC[0]; del = 3;}
+	else if (j == 2) {sc = lC[0]-shift; del = 2;}
+      }
+      sc = sc+score;
+      if (sc < ci) {
+	sc = ci; del = 0; 
+      }
+      if (sc <= cd) {
+	sc = cd;
+	del = 5;
+      }
+      cC[j] = sc;
+      sc -= gop;
+      if (sc < cd) {
+	del += 10;
+	cD[j+3] = cd - gext;
+      } else cD[j+3] = sc -gext;
+      if (sc < ci) {
+	del += 20;
+	cI[j] = ci-gext;
+      } else cI[j] = sc-gext;
+      *(cst++) = del;
+    }
+    lC = cC;
+  }
+  if (N2 && ci +gop > cC[ey-y+1]) {
+    smgl_sp->st[ex-x+1][ey-y+1] = 0;
+    /*printf("small score = %d\n", ci+gop);*/
+  } /*else printf("small score =%d\n", cC[ey-y+1]);*/
+  first = NULL; e = 1;
+  for (i = ex+1, j = ey+1; i > x || j > y; i--) {
+    mp = (match_ptr) ckalloc(sizeof(match_node));
+    mp->i = i-1;
+    k  = (t=smgl_sp->st[i-x][j-y])%10;
+    mp->j = j-1;
+    if (e == 5 && (t/10)%2 == 1) k = 5;
+    if (e == 0 && (t/20)== 1) k = 0;
+    if (k == 5) { j -= 3; i++; e=5;}
+    else {j -= k;if (k==0) e= 0; else e = 1;}
+    mp->l = k;
+    mp->next = first;
+    first = mp;
+  }
+
+  /*	for (i = 0; i <= ex-x; i++) {
+	for (j = 0; j <= ey-y; j++) 
+	printf("%d ", C[i][j]);
+	printf("\n");
+	}
+  */
+  return first;	
+}
+
+#define XTERNAL
+#include "upam.h"
+
+extern void
+display_alig(int *a, unsigned char *dna, unsigned char * pro, int length, int ld)
+{
+  int len = 0, i, j, x, y, lines, k;
+  char line1[100], line2[100], line3[100],
+    tmp[10] = "         ";
+  unsigned char *dna1, c1, c2, c3, *st;
+
+  dna1 = ckalloc((size_t)ld);
+  for (st = dna, i = 0; i < ld; i++, st++) dna1[i] = NCBIstdaa[*st];
+  line1[0] = line2[0] = line3[0] = '\0'; x= a[0]; y = a[1]-1;
+ 
+  for (len = 0, j = 2, lines = 0; j < length; j++) {
+    i = a[j];
+    /*printf("%d %d %d\n", i, len, b->j);*/
+    if (i > 0 && i < 5) tmp[i-2] = NCBIstdaa[pro[x++]];
+    if (i == 5) {
+      i = 3; tmp[0] = tmp[1] = tmp[2] = '-';
+      if (a[j+1] == 2) tmp[2] = ' ';
+    }
+    if (i > 0) {
+      strncpy(&line1[len], (const char *)&dna1[y], i); y+=i;
+    } else {line1[len] = '-'; i = 1; tmp[0] = NCBIstdaa[pro[x++]];}
+    strncpy(&line2[len], tmp, i);
+    for (k = 0; k < i; k++) {
+      if (tmp[k] != ' ' && tmp[k] != '-') {
+	if (k == 2) tmp[k] = '\\';
+	else if (k == 1) tmp[k] = '|';
+	else tmp[k] = '/';
+      } else tmp[k] = ' ';
+    }
+    if (i == 1) tmp[0] = ' ';
+    strncpy(&line3[len], tmp, i); 
+    tmp[0] = tmp[1] =  tmp[2] = ' ';
+    len += i;
+    line1[len] = line2[len] =line3[len]  = '\0'; 
+    if (len >= WIDTH) {
+      printf("\n%5d", WIDTH*lines++);
+      for (k = 10; k <= WIDTH; k+=10) 
+	printf("    .    :");
+      if (k-5 < WIDTH) printf("    .");
+      c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
+      line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
+      printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
+      line1[WIDTH] = c1; line2[WIDTH] = c2; line3[WIDTH] = c3;
+      strncpy(line1, &line1[WIDTH], sizeof(line1)-1);
+      strncpy(line2, &line2[WIDTH], sizeof(line2)-1);
+      strncpy(line3, &line3[WIDTH], sizeof(line3)-1);
+      len = len - WIDTH;
+    }
+  }
+  printf("\n%5d", WIDTH*lines);
+  for (k = 10; k < len; k+=10) 
+    printf("    .    :");
+  if (k-5 < len) printf("    .");
+  printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
+}
+
+
+/* alignment store the operation that align the protein and dna sequence.
+   The code of the number in the array is as follows:
+   0:     delete of an amino acid.
+   2:     frame shift, 2 nucleotides match with an amino acid
+   3:     match an  amino acid with a codon
+   4:     the other type of frame shift
+   5:     delete of a codon
+   
+
+   Also the first two element of the array stores the starting point 
+   in the protein and dna sequences in the local alignment.
+
+   Display looks like where WIDTH is assumed to be divisible by 10.
+
+    0    .    :    .    :    .    :    .    :    .    :    .    :
+     CCTATGATACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTG
+      P  M  I  L  G  Y  W  N  V  R  G  L  T  H  P  I  R  M  L  L 
+
+   60    .    :    .    :    .    :    .    :    .    :    .    :
+     GAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTT
+      E  Y  T  D  S  S  Y  D  E  K  R  Y  T  M  G  D  A  P  D  F 
+*/
+
+
+/* fatal - print message and die */
+void fatal(msg)
+char *msg;
+{
+	fprintf(stderr, "%s\n", msg);
+	exit(1);
+}
+
+void
+fx_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *xaa, int n1, unsigned char *yaa,
+	   int frame, int max_res,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   struct a_res_str *a_res,
+	   int score_thresh
+	   )
+{
+  unsigned char *local_xaa, *local_yaa;
+  int score;
+  int i, last_n1, itemp, n10;
+  int hoff, l_min, l_max, n_nt, n_aa, w_fact;
+  int score_ix, window;
+  int aa1_min_s, aa1_max_s;
+  unsigned char *fs, *fd;
+  struct score_count_s s_info;
+  int itx;
+  
+  memset(&s_info,0,sizeof(s_info));
+
+  score_ix = ppst->score_ix;
+
+  /*  check for large differences in sequence length - if there is a
+      large difference, use do_fastx() to get the offset. */
+
+#ifndef TFAST	/* FASTX */
+  n_nt = n0;
+  n_aa = n1;
+#else		/* TFASTX */
+  n_nt = n1;
+  n_aa = n0;
+#endif
+
+  do_fastx(aa0, n0, xaa, n1, yaa, ppst, f_str, &a_res->rst, &hoff,1, &s_info);
+
+  if (a_res->rst.score[score_ix] <= score_thresh) {
+    a_res->sw_score = 0;
+    a_res->n1 = n1;
+    return;
+  }
+
+  /* now we will do an alignment, but we need to be certain to do the
+     alignment in the region mapped by hoff to include the
+     high-scoring region */
+
+  /* if initn > 2 * init1, use wider window */
+  if (a_res->rst.score[0] > 2 * a_res->rst.score[1]) {w_fact = 4;} 
+  else w_fact = 2;
+
+  /* Here we need to use different strategies depending on whether we
+     have DNA or protein.  For a DNA query (protein library, FASTX), the
+     strategy is simple -- NULL bound the library protein sequence and
+     do the alignment.  For a protein query (TFASTX), things are more complex.
+     Moreover, the mapping must be calculated differently in each case.
+  */
+
+
+#ifndef TFAST /* map onto the protein (aa1) sequence */  
+  window = min(n1, ppst->param_u.fa.optwid);
+  l_min = max(0, -window - hoff);
+  l_max = min(n1, n0-hoff+window);
+
+  local_yaa = yaa;
+  local_xaa = (unsigned char *)xaa;
+  if (l_min > 0 || l_max < n1 -1 ) {
+    local_xaa = (unsigned char *)calloc(l_max - l_min+2,sizeof(char));
+    local_xaa++;
+    memcpy(local_xaa, xaa+l_min, l_max - l_min);
+  }
+/*
+  if (l_min > 0) {
+    aa1_min_s = xaa[l_min-1];
+    local_xaa[l_min-1] = '\0';
+  }
+  if (l_max < n1 - 1) {
+    aa1_max_s = xaa[l_max];
+    xaa[l_max] = '\0';
+  }
+*/
+#else
+  window = min(n0, ppst->param_u.fa.optwid);
+  l_min = max(0,(hoff-window)*3);
+  l_max = min((hoff+window+n0)*3,n_nt);
+  local_xaa = (unsigned char *)xaa;
+  local_yaa = yaa;
+  if (l_min > 0 || l_max <n_nt -1) {
+    local_yaa = (unsigned char*)calloc(l_max - l_min + 2,sizeof(unsigned char));
+    local_yaa++;
+    memcpy(local_yaa, yaa+l_min, l_max - l_min);
+  }
+
+  /*
+  if (l_min > 0)
+    aa1_min_s = yaa[l_min-1];
+    yaa[l_min-1] = '\0';
+  }
+  if (l_max < n_nt-1) {
+
+    aa1_max_s = yaa[l_max];
+    yaa[l_max] = '\0';
+  }
+  */
+#endif
+
+  if (a_res->rst.score[ppst->score_ix] <= score_thresh) {
+    a_res->sw_score = 0;
+    a_res->n1 = n1;
+    return;
+  }
+
+  /* pro_dna always compares protein to DNA, and returns protein
+     coordinates in a_res->min0,max0 */
+
+  a_res->sw_score = 
+    pro_dna(
+#ifndef TFAST	/* FASTX */
+	    local_xaa, l_max - l_min, 	 /* true protein is in aa1/xaa */
+	    yaa, n_nt,
+#else 		/* TFASTX */
+	    aa0, n0, 		/* true protein is in aa0 */
+	    local_yaa, l_max - l_min,
+#endif
+	    ppst->pam2[0],
+#ifdef OLD_FASTA_GAP
+	    -(ppst->gdelval - ppst->ggapval),
+#else
+	    -ppst->gdelval,
+#endif
+	    -ppst->ggapval,
+	    -ppst->gshift,
+	    &f_str->smgl_s,
+	    max_res, a_res);
+
+  /*
+  if (a_res->rst.score[0] < a_res->sw_score) {
+    a_res->rst.score[0] = a_res->sw_score;
+    a_res->rst.score[ppst->score_ix] = a_res->sw_score;
+  }
+  */
+
+#ifndef TFAST
+  if (l_min > 0 || l_max < n1-1) free(--local_xaa);
+/*
+  if (l_min > 0) {
+    xaa[l_min-1] = aa1_min_s;
+  }
+  if (l_max < n1 - 1) {
+    xaa[l_max] = aa1_max_s;
+  }
+*/
+  a_res->min0 += l_min;
+  a_res->max0 += l_min;
+#else
+  if (l_min > 0 || l_max < n1-1) free(--local_yaa);
+  /*
+  if (l_min > 0) {
+    yaa[l_min-1] = aa1_min_s;
+  }
+  if (l_max < n1 - 1) {
+    yaa[l_max] = aa1_max_s;
+  }
+  */
+  a_res->n1 = n1;
+  a_res->min1 += l_min;
+  a_res->max1 += l_min;
+#endif
+
+}
+
+/*
+  fx_malign is a recursive interface to fx_walign() that is called
+  from do_walign(). fx_malign() first does an alignment, then checks
+  to see if the score is greater than the threshold. If so, it tries
+  doing a left and right alignment.
+
+  In this implementation, the translation required for f_str->aa1x and
+  f_str->aa1y is done at each recursive level. A better implementation
+  would do the translation once, and then be more sophisticated about
+  the boundaries on f_str->aa1x,y.  This is challenging, however,
+  because there is no easy way to subset aa1x [111112222233333],
+  though it is possible to subset aa1y cleanly.  The current solution
+  is to re-generate xaa from yaa.
+
+  21-Nov-2010 -- like do_walign(), fx_malign() uses a const xaa, to
+  ensure that threads do not interfere with each other.  If a
+  sub-range is needed, a new sequence is produced.
+
+ */
+struct a_res_str *
+fx_malign (const unsigned char *aa0, int n0,
+	   const unsigned char *xaa, int n1, unsigned char *yaa,
+	   int frame, 
+	   int score_thresh, int max_res,
+	   struct pstruct *ppst,
+	   struct f_struct *f_str,
+	   struct a_res_str *cur_ares,
+	   int first_align)
+{
+  struct a_res_str *tmpl_ares, *tmpr_ares, *this_ares;
+  struct a_res_str *mtmpl_ares, *mtmpr_ares, *mt_next;
+  unsigned char *my_xaa;
+  unsigned char *local_xaa, *local_yaa;
+  int nxyaa;
+  int hoff, score_ix;
+  int min_alen;
+  struct rstruct rst;
+  /*   char save_res; */
+  int iphase, i;
+  unsigned char *fd;
+  int max_sub_score = -1;
+
+  score_ix = ppst->score_ix;
+
+  /* now we need alignment storage - get it */
+  if ((cur_ares->res = (int *)calloc((size_t)max_res,sizeof(int)))==NULL) {
+    fprintf(stderr," *** cannot allocate alignment results array %d\n",max_res);
+    exit(1);
+  }
+
+  cur_ares->next = NULL;
+    
+#ifdef TFAST  
+  min_alen = min(n0,MIN_LOCAL_LEN)*3;	/* n0 in aa, min_alen in nt */
+#else 
+  min_alen = min(n0/3,MIN_LOCAL_LEN);	/* no in nt, min_alen in aa */
+#endif
+
+#ifdef TFAST
+  /* convert yaa to xaa -- cannot use *fs to stop because subset
+     does not have '\0' in all three frames */
+  my_xaa = (unsigned char *)calloc(n1+2,sizeof(unsigned char));
+  my_xaa++;
+  for (fd=my_xaa, iphase = 0; iphase < 3; iphase++) {
+    for (i=iphase; i<n1; i+=3,fd++) *fd = yaa[i];
+  }
+  *fd=EOSEQ;
+#else
+  my_xaa = (unsigned char *)xaa;
+#endif
+
+  fx_walign(aa0, n0, my_xaa, n1, yaa, frame, max_res, 
+	    ppst, f_str, cur_ares,(first_align ? 1 : score_thresh));
+
+  /* in cur_ares, min0,max0 are always protein, min1,max1 are always
+     DNA, but n0 could be protein or DNA, depending on
+     FASTX/TFASTX */
+
+  if (!ppst->do_rep || cur_ares->rst.score[ppst->score_ix] <= score_thresh) {
+#ifdef TFAST
+    free(--my_xaa);
+#endif
+    return cur_ares;
+  }
+
+  /* we are going to do a recursive edit, so we need a local copy of
+     xaa (fastx) or yaa (tfastx) */
+
+#ifdef TFAST	/* TFASTX, n1 is nt */
+    nxyaa = cur_ares->min1;
+#else		/* FASTX n1 is aa */
+    nxyaa = cur_ares->min0;
+#endif
+
+  if (nxyaa >= min_alen) { /* try the left  */
+    /* allocate a_res */	
+    tmpl_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
+
+#ifdef TFAST	/* TFASTX, no xaa */
+    local_xaa = my_xaa;	/* my_xaa is calloc'ed for TFAST */
+    local_yaa = (unsigned char *)calloc(nxyaa+2, sizeof(unsigned char));
+    local_yaa++;  /* skip the initial zero */
+    memcpy(local_yaa, yaa, nxyaa);
+/*
+    save_res = yaa[cur_ares->min1]; 
+    yaa[cur_ares->min1] = '\0';
+*/
+#else
+    local_yaa = yaa;
+    local_xaa = (unsigned char *)calloc(nxyaa+2, sizeof(unsigned char));
+    local_xaa++;  /* skip the initial zero */
+    memcpy(local_xaa, xaa, nxyaa);
+/*
+    save_res = xaa[cur_ares->min0];
+    xaa[cur_ares->min0] = '\0';
+*/
+#endif
+    tmpl_ares = fx_malign(aa0, n0, local_xaa, nxyaa,
+			  local_yaa,
+			  frame, score_thresh, max_res, 
+			  ppst, f_str, tmpl_ares, 0);
+
+#ifdef TFAST
+    free(--local_yaa);	/* local_yaa, allocated above */
+#else
+    free(--local_xaa);	/* FASTX - local_xaa allocated above */
+#endif
+
+    if (tmpl_ares->rst.score[ppst->score_ix] > score_thresh) {
+      max_sub_score = tmpl_ares->rst.score[ppst->score_ix];
+    }
+    else {
+      if (tmpl_ares->res) free(tmpl_ares->res);
+      free(tmpl_ares);
+      tmpl_ares = NULL;
+    }
+  }
+  else {tmpl_ares = NULL;}
+
+  /* do the right */
+#ifdef TFAST	/* TFASTX - n0 is aa, n1 nt */
+  nxyaa = n1 - cur_ares->max1 - 1;
+#else		/* FASTX - n1 is aa, n0 nt */
+  /* this is counter-intuitive, because n1 is the length of the DNA
+     sequence in both cases */
+  nxyaa = n1 - cur_ares->max0 - 1;
+#endif
+
+  if (nxyaa >= min_alen) {    /* try the right  */      
+    /* allocate a_res */	
+    tmpr_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
+
+    /* find boundaries */
+#ifdef TFAST	/* TFASTX, no xaa */
+    local_xaa = my_xaa;
+    local_yaa = (unsigned char *)calloc(nxyaa+2, sizeof(unsigned char));
+    local_yaa++;  /* skip the initial zero */
+    memcpy(local_yaa, yaa+cur_ares->max1+1,nxyaa);
+/*
+    save_res = yaa[cur_ares->max1];
+    yaa[cur_ares->max1] = '\0';
+*/
+#else
+    local_yaa = yaa;
+    local_xaa = (unsigned char *)calloc(nxyaa+2, sizeof(unsigned char));
+    local_xaa++;  /* skip the initial zero */
+    memcpy(local_xaa, xaa+cur_ares->max0+1,nxyaa);
+/*
+    save_res = xaa[cur_ares->max0];
+    xaa[cur_ares->max0] = '\0';
+*/
+#endif
+    tmpr_ares = fx_malign(aa0, n0, 
+			  local_xaa, nxyaa, local_yaa,
+			  frame,
+			  score_thresh, max_res,
+			  ppst, f_str, tmpr_ares,0);
+#ifdef TFAST	/* TFASTX, no xaa */
+    free(--local_yaa);
+#else
+    free(--local_xaa);
+#endif
+/*    yaa[cur_ares->max1] = save_res;*/
+
+    if (tmpr_ares->rst.score[ppst->score_ix] > score_thresh) {
+      /* adjust the left boundary */
+      for (this_ares = tmpr_ares; this_ares; this_ares = this_ares->next) {
+#ifdef TFAST
+	this_ares->min1 += cur_ares->max1+1;
+	this_ares->max1 += cur_ares->max1+1;
+#else
+	this_ares->min0 += cur_ares->max0+1;
+	this_ares->max0 += cur_ares->max0+1;
+#endif
+      }
+
+      if (tmpr_ares->rst.score[ppst->score_ix] > max_sub_score) {
+	max_sub_score = tmpr_ares->rst.score[ppst->score_ix];
+      }
+    }
+    else {
+      if (tmpr_ares->res) free(tmpr_ares->res);
+      free(tmpr_ares);
+      tmpr_ares = NULL;
+    }
+  }
+  else {tmpr_ares = NULL;}
+
+#ifdef TFAST
+    free(--my_xaa);
+#endif
+
+  if (max_sub_score <= score_thresh) {
+    if (tmpl_ares) {
+      if (tmpl_ares->res) free(tmpl_ares->res);
+      free(tmpl_ares);
+    }
+    if (tmpr_ares) {
+      if (tmpr_ares->res) free(tmpr_ares->res);
+      free(tmpr_ares);
+    }
+    return cur_ares;
+  }
+
+  cur_ares = merge_ares_chains(cur_ares, tmpl_ares, score_ix, "left");
+  cur_ares = merge_ares_chains(cur_ares, tmpr_ares, score_ix, "right");
+
+  return cur_ares;
+}
+
+/* do_walign() can be called with aa0,n0 as nt (FASTX) or 
+   aa0,n0 as aa (TFASTX).  if aa0 is nt, then f_str->aa0x,y have the
+   translations already.  if aa0 is aa, then f_str->aa1x,y must be
+   generated.
+
+   This is the last time that aa0 can be nt or aa; in all lower
+   functions (fx_malign, do_fastx, fx_walign), both aa0, n0 and aa1,
+   n1 are amino acids; though one or the other may be translated.
+
+   In the lower functions, yaa can be aa0y (FASTX) or aa1y (TFASTX).
+   If it is aa1y, there may be no translation available.
+
+   21-Nov-2010 With fasta-36.3.1, do_walign() uses const aa0, aa1.  If aa1 needs
+   modification for recursive alignment, a copy is made.
+*/
+
+struct a_res_str *
+do_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, int repeat_thresh,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   int *have_ares)
+{
+  int hoff, use_E_thresholds_s, optflag_s, optcut_s, optwid_s, score;
+  struct a_res_str *a_res, *tmp_a_res;
+  int a_res_index;
+  int last_n1, itx, itt, n10, iphase;
+  unsigned char *xaa, *fs, *fd;
+  struct rstruct rst;
+#ifdef DEBUG
+  unsigned long adler32_crc;
+#endif
+
+  *have_ares = 0x3;	/* set 0x2 bit to indicate local copy */
+
+  if ((a_res = (struct a_res_str *)calloc(1, sizeof(struct a_res_str)))==NULL) {
+    fprintf(stderr," [do_walign] Cannot allocate a_res");
+    return NULL;
+  }
+
+#ifdef DEBUG
+  adler32_crc = adler32(1L,aa1,n1);
+#endif
+
+  use_E_thresholds_s = ppst->param_u.fa.use_E_thresholds;
+  optflag_s = ppst->param_u.fa.optflag;
+  optcut_s = ppst->param_u.fa.optcut;
+  optwid_s = ppst->param_u.fa.optwid;
+  ppst->param_u.fa.use_E_thresholds = 0;
+  ppst->param_u.fa.optflag = 1;
+  ppst->param_u.fa.optcut = 0;
+  if (!ppst->param_u.fa.optwid_set) {
+    ppst->param_u.fa.optwid *= 2;
+  }
+
+#ifndef TFAST	/* FASTX */
+  a_res = fx_malign(f_str->aa0x, n0, aa1, n1, f_str->aa0y, frame,
+		    repeat_thresh, f_str->max_res,
+		    ppst, f_str, a_res, 1);
+#else		/* TFASTX */
+  /* aa0 has a protein sequence */
+  /* aa1 has a raw DNA sequence */
+
+  itt = frame;
+  last_n1 = 0;
+  xaa = f_str->aa1x;
+  for (itx= itt*3; itx< itt*3+3; itx++) {
+    n10  = saatran(aa1,&xaa[last_n1],n1,itx);
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+  /* create aa1y from xaa */
+  for (fs=xaa,iphase=0; iphase <3; iphase++,fs++) {
+    for (fd= &f_str->aa1y[iphase]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
+    *fd=EOSEQ;
+  }
+  f_str->have_yaa = 1;
+
+  a_res = fx_malign(aa0, n0, xaa, n10, f_str->aa1y, frame,
+		    repeat_thresh, f_str->max_res,
+		    ppst, f_str, a_res, 1);
+#endif
+  /*
+  if (a_res->res[0] != 3) {
+    fprintf(stderr, "*** alignment does not start with match: %d\n",a_res->res[0]);
+  }
+  */
+
+#ifdef DEBUG
+  if (adler32(1L,aa1,n1) != adler32_crc) {
+    fprintf(stderr,"[dropfx.c/do_walign] adler32_crc mismatch n1: %d\n",n1);
+  }
+#endif
+
+  a_res_index = 0;
+  for (tmp_a_res=a_res; tmp_a_res; tmp_a_res = tmp_a_res->next) {
+    tmp_a_res->index = a_res_index++;
+  }
+
+  ppst->param_u.fa.use_E_thresholds = use_E_thresholds_s;
+  ppst->param_u.fa.optflag = optflag_s;
+  ppst->param_u.fa.optcut = optcut_s;
+  ppst->param_u.fa.optwid = optwid_s;
+  return a_res;
+}
+
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void 
+aln_func_vals(int frame, struct a_struct *aln) {
+
+#ifndef TFAST
+  aln->llrev = 0;
+  aln->llfact = 1;
+  aln->llmult = 1;
+  aln->qlfact = 3;
+  aln->frame = frame;
+  if (frame > 0) aln->qlrev = 1;
+  else aln->qlrev = 0;
+  aln->llrev = 0;
+#else	/* TFASTX */
+  aln->qlfact = 1;
+  aln->qlrev = 0;
+  aln->llfact = 3;
+  aln->llmult = 1;
+  aln->frame = frame;
+  if (frame > 0) aln->llrev = 1;
+  else aln->llrev = 0;
+  aln->qlrev = 0;
+#endif	/* TFASTX */
+}
+
+/* this function is required for programs like tfastx/y/s that do
+   translations on DNA sequences and save them in f_str->aa1??
+*/
+
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+#ifdef TFAST
+  int i, last_n1, itemp, n10;
+  unsigned char *fs, *fd;
+  int itx;
+
+  last_n1 = 0;
+  for (itx=3*frame; itx<3+3*frame; itx++) {
+    n10 = saatran(aa1,&f_str->aa1x[last_n1],n1,itx);
+/*
+  for (i=0; i<n10; i++) {
+  fprintf(stderr,"%c",ppst->sq[aa10[last_n1+i]]);
+  if ((i%60)==59) fprintf(stderr,"\n");
+  }
+  fprintf(stderr,"\n");
+*/
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+
+  /* create aa1y from aa1x */
+  for (fs=f_str->aa1x,itemp=0; itemp <3; itemp++,fs++) {
+    for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
+    *fd=EOSEQ;
+  }
+  f_str->have_yaa = 1;
+#endif
+}
+
+/*
+   Alignment: store the operation that align the protein and dna sequence.
+   The code of the number in the array is as follows:
+   0:     delete of an amino acid.
+   2:     frame shift, 2 nucleotides match with an amino acid
+   3:     match an  amino acid with a codon
+   4:     the other type of frame shift
+   5:     delete of a codon
+
+   The first two elements of the array stores the starting point 
+   in the protein and dna sequences in the local alignment.
+*/
+
+#include "a_mark.h"
+
+extern int align_type(int score, char sp0, char sp1, int nt_align, struct a_struct *aln, int pam_x_id_sim);
+
+extern void
+process_annot_match(int *itmp, int *pam2aa0v, 
+		    long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+		    struct annot_entry *annot_arr_p, char **ann_comment,
+		    void *annot_stack, int *have_push_features, int *v_delta,
+		    int *d_score_p, int *d_ident_p, int *d_alen_p, struct domfeat_link **left_domain_p,
+		    long *left_end_p, int init_score);
+
+extern int
+next_annot_match(int *itmp, int *pam2aa0v, 
+		 long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+		 int i_annot, int n_annot, struct annot_entry **annot_arr, char **ann_comment,
+		 void *annot_stack, int *have_push_features, int *v_delta,
+		 int *d_score_p, int *d_ident_p, int *d_alen_p, struct domfeat_link **left_domain,
+		 long *left_domain_end, int init_score);
+
+extern void
+close_annot_match (int ia, void *annot_stack, int *have_push_features,
+		   int *d_score_p, int *d_ident_p, int *d_alen_p,
+		   struct domfeat_link **left_domain_p,
+		   long *left_end_p, int init_score);
+
+extern void
+comment_var(long i0, char sp0, long i1, char sp1, char o_sp1, char sim_char,
+	    const char *ann_comment, struct dyn_string_str *annot_var_dyn,
+	    int target, int d_type);
+
+void
+display_push_features(void *annot_stack, struct dyn_string_str *annot_var_dyn,
+		      long i0_pos, char sp0, long i1_pos, char sp1, char sym, 
+		      int score, double comp, int n0, int n1,
+		      void *pstat_void, int d_type);
+
+#define DP_FULL_FMT 1	/* Region: score: bits: id: ... */
+#define Q_TARGET 0
+#define L_TARGET 1
+
+int seq_pos(int pos, int rev, int off);
+
+int
+calc_cons_a(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    int *nc,
+	    struct a_struct *aln,
+	    struct a_res_str *a_res, 
+	    struct pstruct *ppst,
+	    char *seqc0, char *seqc1, char *seqca, int *cumm_seq_score,
+	    const unsigned char *ann_arr,
+	    const unsigned char *aa0a, const struct annot_str *annot0_p, char *seqc0a,
+	    const unsigned char *aa1a, const struct annot_str *annot1_p, char *seqc1a,
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+	    struct f_struct *f_str,
+	    void *pstat_void)
+{
+  int i0, i1, i, j;
+  int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
+  int *i_spa;
+  char *sp0, *sp0a, *sp1, *sp1a, *spa, t_spa;
+  const unsigned char *sq;
+
+  const unsigned char *ap0, *ap1;
+  const unsigned char *ap1a;	/* ap1 always points to protein, and
+				   only protein has annotations */
+  int *rp, *rpmax;
+  int have_ann = 0;
+
+  /* variables for variant changes/region scores */
+  char tmp_str[MAX_LSTR];
+  void *annot_stack;
+  int have_push_features, prev_match;
+  char *sim_sym = aln_map_sym[MX_ACC];
+  struct annot_entry **s_annot1_arr_p;
+  int  i1_annot, v_delta, v_tmp;
+  long i0_offset, i1_offset;
+
+  long i1_left_end;
+  int d1_score, d1_ident, d1_alen;
+  struct domfeat_link *left_domain_list1;
+
+  char *ann_comment;
+
+  *score_delta = 0;
+  d1_score = d1_ident = d1_alen = 0;
+  i1_left_end = -1;
+  left_domain_list1 = NULL;
+
+  NULL_dyn_string(annot_var_dyn);
+
+  if (ppst->ext_sq_set) {sq = ppst->sqx;}
+  else {sq = ppst->sq;}
+
+#ifndef TFAST	/* FASTX */
+  aln->amin1 = aln->smin1 = a_res->min0;	/* prot */
+  aln->amin0 = aln->smin0 = a_res->min1;	/* DNA */
+
+  i0_offset = aln->q_offset;
+  i1_offset = aln->l_offset;
+
+  ap0 = f_str->aa0y;	/* translated DNA */
+  ap1 = aa1;		/* protein */
+
+  sp0 = seqc0;	/* translated DNA */
+  sp1 = seqc1;	/* protein */
+
+  have_ann = (seqc0a !=NULL && aa1a != NULL);
+  ap1a = aa1a;
+  sp1a = seqc1a;	/* protein library can have annotation */
+  sp0a = seqc0a;	/* sp0a is always ' ' - no translated
+			   annotation */
+#else		/* TFASTX */
+  aln->amin0 = aln->smin0 = a_res->min0;	/* DNA */
+  aln->amin1 = aln->smin1 = a_res->min1;	/* prot */
+
+  i1_offset = aln->q_offset;
+  i0_offset = aln->l_offset;
+
+  ap1 = aa0;		/* aa0 is protein */
+  /* with fx_malign(), there is no guarantee that we have a valid f_str->aa1y, so make one */
+  pre_cons(aa1,n1,aln->frame, f_str);
+  ap0 = f_str->aa1y;	/* aa1 is DNA */
+  sp1 = seqc0;		/* sp1 points to protein query */
+  sp0 = seqc1;		/* sp0 points to DNA */
+
+  have_ann = (seqc0a !=NULL && aa0a != NULL);
+  ap1a = aa0a;
+  sp1a = seqc0a;	/* protein query can have annotation */
+  sp0a = seqc1a;	/* sp0a is always ' ' - no translated
+			   annotation */
+#endif
+  spa = seqca;
+  if (cumm_seq_score) i_spa = cumm_seq_score;
+
+  rp = a_res->res;
+  rpmax = &a_res->res[a_res->nres];
+
+  lenc = not_c = aln->nident = aln->nmismatch = aln->nsim = aln->npos = ngap_p = ngap_d = nfs= 0;
+  i0 = a_res->min1;
+  i1 = a_res->min0;
+
+  v_delta = 0;
+  i1_annot = 0;
+  annot_stack = NULL;
+  s_annot1_arr_p = NULL;
+  have_push_features = prev_match = 0;
+  if (have_ann) {
+    if (annot1_p && annot1_p->n_annot > 0) annot_stack = init_stack(64,64);
+    if (annot1_p && annot1_p->n_annot > 0) {
+      s_annot1_arr_p = annot1_p->s_annot_arr_p;
+
+      while (i1_annot < annot1_p->n_annot) {
+	if (s_annot1_arr_p[i1_annot]->pos >= i1+i1_offset) {break;}
+	if (s_annot1_arr_p[i1_annot]->end < i1+i1_offset) {i1_annot++; continue;}
+
+	if (s_annot1_arr_p[i1_annot]->label == '-') {
+	  process_annot_match(&itmp, NULL, i1_offset+seq_pos(i1,aln->llrev,0), i0_offset + seq_pos(i0,aln->qlrev,0),
+			      sp1, sp1a, sq, s_annot1_arr_p[i1_annot],  &ann_comment, 
+			      annot_stack, &have_push_features, &v_delta,
+			      &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end, 0);
+	}
+	i1_annot++;
+      }
+    }
+  }
+
+  while (rp < rpmax) {
+    /*    fprintf(stderr,"%d %d %d (%c) %d (%c)\n"
+	  ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);
+    */
+    switch (*rp++) {
+    case 0: 	/* aa insertion */
+      *sp0++ = '-';
+      *sp1++ = sq[ap1[i1]];
+      *spa++ = M_DEL;
+      if (cumm_seq_score) {
+	if (prev_match) *i_spa = ppst->gdelval;
+	*i_spa++ += ppst->ggapval;
+      }
+
+      if (have_ann) {
+	*sp0a = ' ';
+	*sp1a = ann_arr[ap1a[i1]];
+	if (s_annot1_arr_p) {
+	  if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end) {
+
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], i1_offset+seq_pos(i1,aln->llrev,0),
+					i0_offset+seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq, 
+					i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+					&ann_comment, annot_stack, &have_push_features, &v_delta,
+					&d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,
+					ppst->ggapval+ppst->gdelval
+					);
+	  }
+
+	  if (prev_match) d1_score += ppst->gdelval;
+	  d1_score += ppst->ggapval;
+	  d1_alen++;
+	  prev_match = 0;
+	}
+	sp0a++; sp1a++;
+      }
+
+      if (have_ann && have_push_features) {
+	display_push_features(annot_stack, annot_var_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			      sim_sym[*spa], 
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
+	have_push_features = 0;
+      }
+
+      i1++;
+      lenc++;
+      ngap_d++;
+      break;
+    case 2:	/* -1 frameshift, which is treatead as an insertion/match for annotations */
+      nfs++;
+      /* frameshifts produce a two-character alignment string */
+      /* first annotate the frameshift  (first character) */
+      *sp0++ = '/';
+      i0 -= 1;
+      *sp1++ = '-';
+      *spa++ = M_DEL;
+
+      if (have_ann) {*sp0a++ = *sp1a++ = ' ';}
+      not_c++;
+
+      /* then annotate the match after the frameshift */
+
+      itmp=ppst->pam2[0][ap0[i0]][ap1[i1]];
+      *sp0 = sq[ap0[i0]];
+      *sp1 = sq[ap1[i1]];
+
+      if (cumm_seq_score) *i_spa++ = ppst->gshift;
+
+      if (have_ann) {
+	have_push_features = 0;
+	/* this simple strategy works because the coordinate system
+	   for the alignment is reversed appropriately */
+	*sp1a = ann_arr[ap1a[i1]];
+	*sp0a = ' ';
+	if (s_annot1_arr_p) {
+	  /* coordiates are much more complex for next_annot_match,
+	     and comment_var, because they may need to be reversed */
+
+	  if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end) {
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], i1_offset+seq_pos(i1,aln->llrev,0),
+					i0_offset+seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq, 
+					i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+					&ann_comment, annot_stack, &have_push_features, &v_delta,
+					&d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,0);
+
+	    if (sq[ap1[i1]] != *sp1) {
+	      t_spa = align_type(itmp, *sp0, *sp1, 0, NULL, ppst->pam_x_id_sim);
+
+	      comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			  i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			  sq[ap1[i1]], sim_sym[t_spa], ann_comment,
+			  annot_var_dyn,1,1);
+	    }
+	  }
+	  d1_score += ppst->gshift;
+	  d1_score += itmp;
+	  prev_match = 1;
+	}
+	sp0a++; sp1a++;
+      }
+
+      *spa = align_type(itmp, *sp0, *sp1, 0, aln, ppst->pam_x_id_sim);
+
+      d1_alen++;
+      if (*spa == M_IDENT) {d1_ident++;}
+
+      if (have_ann && have_push_features) {
+	display_push_features(annot_stack, annot_var_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			      sim_sym[*spa],
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
+	have_push_features = 0;
+      }
+
+      if (cumm_seq_score) *i_spa++ = itmp;
+      i0 += 3;
+      i1++;
+
+      sp0++; sp1++; spa++;
+      lenc++;
+      break;
+    case 3:	/* codon/aa match */
+      itmp=ppst->pam2[0][ap0[i0]][ap1[i1]];
+      *sp0 = sq[ap0[i0]];
+      *sp1 = sq[ap1[i1]];
+
+      if (have_ann) {
+	*sp1a = ann_arr[ap1a[i1]];
+	*sp0a = ' ';
+	if (s_annot1_arr_p) {
+	  if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end) {
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], i1_offset+seq_pos(i1,aln->llrev,0),
+					i0_offset+seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq, 
+					i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+					&ann_comment, annot_stack, &have_push_features, &v_delta,
+					&d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,0);
+
+	    if (sq[ap1[i1]] != *sp1) {
+	      t_spa = align_type(itmp, *sp0, *sp1, 0, NULL, ppst->pam_x_id_sim);
+
+	      comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			  i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			  sq[ap1[i1]], sim_sym[t_spa], ann_comment,
+			  annot_var_dyn, 1, 1);
+	    }
+	  }
+	  prev_match = 1;
+	  d1_score += itmp;
+	}
+	sp0a++; sp1a++;
+      }
+
+      *spa = align_type(itmp, *sp0, *sp1, 0, aln, ppst->pam_x_id_sim);
+      d1_alen++;
+      if (*spa == M_IDENT) {d1_ident++;}
+  
+      if (cumm_seq_score) *i_spa++ = itmp;
+
+      if (have_ann && have_push_features) {
+	display_push_features(annot_stack, annot_var_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			      sim_sym[*spa],
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
+	have_push_features = 0;
+      }
+
+      i0 += 3;
+      i1++;
+
+      sp0++; sp1++; spa++;
+      lenc++;
+      break;
+    case 4:	/* +1 frameshift */
+      nfs++;
+      /* frameshift produces two alignment characters */
+      /* first frameshift */
+      *sp0++ = '\\';
+      i0 += 1;
+      *sp1++ = '-';
+      *spa++ = M_DEL;
+      if (cumm_seq_score) *i_spa++ = ppst->gshift;
+
+      if (have_ann) {*sp1a++ = *sp0a++ = ' ';}
+      not_c++;
+
+      /* then alignment */
+      itmp=ppst->pam2[0][ap0[i0]][ap1[i1]];
+      *sp0 = sq[ap0[i0]];
+      *sp1 = sq[ap1[i1]];
+
+      if (have_ann) {
+	*sp1a = ann_arr[ap1a[i1]];
+	*sp0a = ' ';
+	if (s_annot1_arr_p) {
+	  if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end) {
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], i1_offset+seq_pos(i1,aln->llrev,0),
+					i0_offset+seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq, 
+					i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+					&ann_comment, annot_stack, &have_push_features, &v_delta,
+					&d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,0);
+
+	    if (sq[ap1[i1]] != *sp1) {
+	      t_spa = align_type(itmp, *sp0, *sp1, 0, NULL, ppst->pam_x_id_sim);
+
+	      comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			  i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			  sq[ap1[i1]], sim_sym[t_spa], ann_comment,
+			  annot_var_dyn, 1, DP_FULL_FMT);
+	    }
+	  }
+	  d1_score += ppst->gshift;
+	  d1_score += itmp;
+	  prev_match = 1;
+	}
+	sp0a++; sp1a++;
+      }
+
+      *spa = align_type(itmp, *sp0, *sp1, 0, aln, ppst->pam_x_id_sim);
+      d1_alen++;
+      if (*spa == M_IDENT) {d1_ident++;}
+
+      if (cumm_seq_score) *i_spa++ = itmp;
+
+      /* now we have done all the ?modified identity checks, display
+	 potential site annotations */
+      if (have_ann && have_push_features) {
+	display_push_features(annot_stack, annot_var_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			      sim_sym[*spa], 
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
+	have_push_features = 0;
+      }
+
+      i0 += 3;
+      i1++;
+
+      sp0++; sp1++; spa++;
+      lenc++;
+      break;
+    case 5:	/* codon insertion */
+      if (have_ann) {
+	*sp1a++ = *sp0a++ = ' ';
+      }
+
+      if (cumm_seq_score) {
+	if (prev_match) *i_spa = ppst->gdelval;
+	*i_spa++ = ppst->ggapval;
+      }
+
+      if (prev_match) d1_score += ppst->gdelval;
+      d1_score += ppst->ggapval;
+
+      prev_match = 0;
+
+      *sp0++ = sq[ap0[i0]];
+      i0 += 3;
+      *sp1++ = '-';
+      *spa++ = M_DEL;
+      lenc++;
+      ngap_p++;
+      break;
+    }
+  }
+
+  /* done with alignment loop */
+
+  if (have_ann) {
+    *sp0a = *sp1a = '\0';
+    if (s_annot1_arr_p) {
+      have_push_features = 0;
+
+      if (s_annot1_arr_p && i1_left_end > 0) {
+	close_annot_match(-1, annot_stack, &have_push_features,
+			  &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,
+			  0);
+      }
+
+      if (have_push_features) {
+	display_push_features(annot_stack, annot_var_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			      sim_sym[*spa], 
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
+	have_push_features = 0;
+      }
+    }
+  }
+  *spa = '\0';
+
+#ifndef TFAST
+  aln->amax0 = i0;
+  aln->amax1 = i1;
+  aln->ngap_q = ngap_d;
+  aln->ngap_l = ngap_p;
+#else
+  aln->amax1 = i0;
+  aln->amax0 = i1;
+  aln->ngap_q = ngap_p;
+  aln->ngap_l = ngap_d;
+#endif
+  aln->calc_last_set = 1;
+
+  aln->nfs = nfs;
+
+  *score_delta = v_delta;
+
+  if (have_ann) {
+    *sp0a = *sp1a = '\0';
+    have_push_features = 0;
+    /* check for left ends after alignment */
+    if (annot1_p && i1_left_end > 0) {
+      close_annot_match(-1, annot_stack, &have_push_features,
+			&d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,
+			0);
+    }
+
+    if (have_push_features) {
+      display_push_features(annot_stack, annot_var_dyn,
+			    i0_offset + seq_pos(aln->amax0-1,aln->qlrev,0), *sp0, 
+			    i1_offset + seq_pos(aln->amax1-1,aln->llrev,0), *sp1,
+			    sim_sym[*spa],
+			    a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
+    }
+
+    free_stack(annot_stack);
+  }
+
+  if (lenc < 0) lenc = 1;
+  *nc = lenc;
+/*	now we have the middle, get the right end */
+  return lenc+not_c;
+}
+
+void
+calc_astruct(struct a_struct *aln_p, struct a_res_str *a_res_p, struct f_struct *f_str) {
+
+  aln_p->calc_last_set = 0;
+
+#ifndef TFAST	/* FASTX */
+  aln_p->amin1 = a_res_p->min0;	/* prot */
+  aln_p->amin0 = a_res_p->min1;	/* DNA */
+  aln_p->amax1 = a_res_p->max0;	/* prot */
+  aln_p->amax0 = a_res_p->max1;	/* DNA */
+#else		/* TFASTX */
+  aln_p->amin0 = a_res_p->min0;	/* DNA */
+  aln_p->amin1 = a_res_p->min1;	/* prot */
+  aln_p->amax0 = a_res_p->max0;	/* DNA */
+  aln_p->amax1 = a_res_p->max1;	/* prot */
+#endif
+}
+
+/* build an array of match/ins/del - length strings */
+
+/* modified 10-June-2014 to distinguish matches from mismatches, op=1
+   (previously unused) indicates an aligned non-identity */
+
+/* op_codes are:  0 - aa insertion
+   		  1 - (now) aligned non-identity
+   		  2 - -1 frameshift
+   		  3 - aligned identity
+   		  4 - +1 frameshift
+   		  5 - codon insertion
+*/
+
+static struct update_code_str *
+init_update_data(show_code) {
+
+  struct update_code_str *update_data_p;
+
+  if ((update_data_p = (struct update_code_str *)calloc(1,sizeof(struct update_code_str)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - init_update_data(): cannot allocate update_code_str\n",
+	      __FILE__, __LINE__);
+    return NULL;
+  }
+
+  update_data_p->p_op_cnt = 0;
+  update_data_p->show_code = show_code;
+
+  if ((show_code & SHOW_CODE_MASK) == SHOW_CODE_CIGAR) {
+    update_data_p->op_map = cigar_code;
+    update_data_p->cigar_order = 1;
+  }
+  else {
+    update_data_p->op_map = ori_code;
+    update_data_p->cigar_order = 0;
+  }
+
+  if ((show_code & SHOW_CODE_EXT) == SHOW_CODE_EXT) {
+    update_data_p->show_ext = 1;
+  }
+  else {
+    update_data_p->show_ext = 0;
+  }
+
+  return update_data_p;
+}
+
+static void
+close_update_data(char *al_str, int al_str_max, 
+		  struct update_code_str *up_dp) {
+  char tmp_cnt[MAX_SSTR];
+
+  if (!up_dp) return;
+  sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx, up_dp->p_op_cnt);
+  strncat(al_str,tmp_cnt,al_str_max);
+
+  free(up_dp);
+}
+
+/* update_indel_code() has been modified to work more correctly with
+   ggsearch/glsearch, which, because alignments can start with either
+   insertions or deletions, can produce an initial code of "0=".  When
+   that happens, it is ignored and no code is added.
+
+   *al_str - alignment string [al_str_max] - not dynamic
+   op -- encoded operation, currently 0=match, 1-delete, 2-insert, 3-term-match, 4-mismatch
+   op_cnt -- length of run
+   show_code -- SHOW_CODE_CIGAR uses cigar_code, otherwise legacy
+*/
+
+/* update_indel_code() is called for insertions and deletions
+   update_match_code() is called for every match
+*/
+
+static void
+sprintf_code(char *tmp_str, struct update_code_str *up_dp, int op_idx, int op_cnt) {
+
+  if (op_cnt == 0) return;
+
+  if (up_dp->cigar_order) {
+    sprintf(tmp_str,"%d%c",op_cnt,up_dp->op_map[op_idx]);
+  }
+  else {
+    sprintf(tmp_str,"%c%d",up_dp->op_map[op_idx],op_cnt);
+  }
+}
+
+static void
+update_code(char *al_str, int al_str_max, 
+	    struct update_code_str *up_dp, int op, 
+	    int sim_code,  unsigned char sp0, unsigned char sp1)
+{
+  char tmp_cnt[MAX_SSTR];
+
+  /* there are two kinds of "op's", one time and accumulating */
+  /* op == 2, 4 are one-time: */
+
+  switch (op) {
+  case 2:
+  case 4:
+    sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+    strncat(al_str,tmp_cnt,al_str_max);
+    sprintf_code(tmp_cnt,up_dp, op, 1);
+    strncat(al_str,tmp_cnt,al_str_max);
+    up_dp->p_op_cnt = 0;
+    break;
+  case 0:
+  case 5:
+    if (op == up_dp->p_op_idx) {
+      up_dp->p_op_cnt++;
+    }
+    else {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+      strncat(al_str,tmp_cnt,al_str_max);
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    break;
+  case 1:
+  case 3:
+    if (sp0 != '*' && sp1 != '*') {	/* default case, not termination */
+      if (up_dp->show_ext) {
+	if (sim_code != M_IDENT) { op = 1;}
+      }
+    }
+    else {	/* have a termination codon, output for !SHOW_CODE_CIGAR */
+      if (!up_dp->cigar_order) {
+	if (sp0 == '*' || sp1 == '*') { op = 6;}
+      }
+      else if (up_dp->show_ext && (sp0 != sp1)) { op = 1;}
+    }
+
+    if (up_dp->p_op_cnt == 0) {
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    else if (op != up_dp->p_op_idx) {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+      strncat(al_str,tmp_cnt,al_str_max);
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    else {
+      up_dp->p_op_cnt++;
+    }
+    break;
+  }
+  return;
+}
+
+int calc_code(const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      struct a_struct *aln,
+	      struct a_res_str *a_res, 
+	      struct pstruct *ppst,
+	      char *al_str, int al_str_n, 
+	      const unsigned char *ann_arr,
+	      const unsigned char *aa0a,
+	      const struct annot_str *annot0_p,
+	      const unsigned char *aa1a, 
+	      const struct annot_str *annot1_p,
+	      struct dyn_string_str *annot_code_dyn,
+	      int *score_delta,
+	      struct f_struct *f_str,
+	      void *pstat_void,
+	      int display_code)
+{
+  int i0, i1, i, j;
+  int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
+  char op_char[10], ann_ch0, ann_ch1;
+  char sp0, sp1;
+  struct update_code_str *update_data_p;
+  unsigned char *sq;
+  const unsigned char *ap0, *ap1, *ap1a;
+  int *rp, *rpmax;
+
+  int have_ann = 0;
+  char tmp_astr[MAX_STR];
+  int sim_code, t_spa;
+  int show_code, annot_fmt;
+  char *sim_sym= aln_map_sym[MX_ACC];
+  int aa1c;
+  /* variables for variant changes */
+  char tmp_str[MAX_SSTR];
+
+  /* variables for variant changes, regions */
+  void *annot_stack;
+  struct annot_entry **s_annot1_arr_p;
+  int  i1_annot, v_delta, v_tmp;
+  long i0_offset, i1_offset;
+  int have_push_features, prev_match;
+
+  long i1_left_end;
+  int d1_score, d1_ident, d1_alen;
+  struct domfeat_link *left_domain_list1;
+  
+  *score_delta = 0;
+
+  show_code = (display_code & (SHOW_CODE_MASK + SHOW_CODE_EXT));
+  annot_fmt = 2;
+  if (display_code & SHOW_ANNOT_FULL) {
+    annot_fmt = 1;
+  }
+
+  if (ppst->ext_sq_set) {sq = ppst->sqx;}
+  else {sq = ppst->sq;}
+
+#ifndef TFAST	/* FASTX */
+  i0_offset = aln->q_offset;
+  i1_offset = aln->l_offset;
+
+  aln->amin1 = a_res->min0;	/* prot, i1 */
+  aln->amin0 = a_res->min1;	/* DNA,  i0 */
+
+  ap0 = f_str->aa0y;
+  ap1 = aa1;
+  have_ann = (ann_arr[0] != '\0' && aa1a != NULL);
+  ap1a = aa1a;
+#else		/* TFASTX */
+  i1_offset = aln->q_offset;
+  i0_offset = aln->l_offset;
+
+  aln->amin0 = a_res->min0;	/* DNA,  i1 */
+  aln->amin1 = a_res->min1;	/* prot, i0 */
+
+  ap1 = aa0;
+  /* with fx_malign(), there is no guarantee that we have a valid f_str->aa1y, so make one */
+  pre_cons(aa1,n1,aln->frame, f_str);
+  ap0 = f_str->aa1y;
+
+  have_ann = (ann_arr[0] != '\0' && aa0a != NULL);
+  ap1a = aa0a;
+#endif
+
+  rp = a_res->res;
+  rpmax = &a_res->res[a_res->nres];
+
+  lenc = not_c = aln->nident = aln->nmismatch = aln->nsim = aln->npos = ngap_p = ngap_d = nfs = 0;
+  
+  update_data_p = init_update_data(show_code);
+
+  i0 = a_res->min1;
+  i1 = a_res->min0;
+
+  v_delta = 0;
+  i1_annot = 0;
+  s_annot1_arr_p = NULL;
+  have_push_features = prev_match = 0;
+
+  i1_left_end = 0;
+  left_domain_list1 = NULL;
+  d1_score = d1_ident = d1_alen = 0;
+
+  if (have_ann) {
+    
+    if (annot0_p || annot1_p) annot_stack = init_stack(64,64);
+    if (annot1_p && annot1_p->n_annot > 0) {
+      s_annot1_arr_p = annot1_p->s_annot_arr_p;
+      while (i1_annot < annot1_p->n_annot) {
+	if (s_annot1_arr_p[i1_annot]->pos >= i1+i1_offset) {break;}
+	if (s_annot1_arr_p[i1_annot]->end < i1+i1_offset) {i1_annot++; continue;}
+
+	if (s_annot1_arr_p[i1_annot]->label == '-') {
+	  process_annot_match(&itmp, NULL, i1_offset+seq_pos(i1,aln->llrev,0), i0_offset + seq_pos(i0,aln->qlrev,0),
+			      &sp1, NULL, sq, s_annot1_arr_p[i1_annot],  NULL, 
+			      annot_stack, &have_push_features, &v_delta,
+			      &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end, 0);
+	}
+	i1_annot++;
+      }
+    }
+  }
+
+  while (rp < rpmax) {
+    switch (*rp++) {
+    case 0: 	/* aa insertion */
+      sim_code = 5; /* indel code */
+      update_code(al_str, al_str_n-strlen(al_str), update_data_p, 0, sim_code,'-','-');
+
+      if (s_annot1_arr_p) {
+	if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
+	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], i1_offset+seq_pos(i1,aln->llrev,0),
+				      i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+				      i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+				      NULL, annot_stack, &have_push_features, &v_delta, 
+				      &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,0);
+	}
+
+	if (prev_match) d1_score += ppst->gdelval;
+	d1_score += ppst->ggapval;
+	d1_alen++;
+	prev_match = 0;
+      }
+
+      /* check for an annotation */
+      if (have_ann && !(ann_arr[ap1a[i1]] == ' ' || ann_arr[ap1a[i1]]=='[' || ann_arr[ap1a[i1]]==']')) {
+#ifndef TFAST
+	sprintf(tmp_astr, "|%ld:%ld:X%c:%c%c%c",
+		i0_offset+seq_pos(i0,aln->qlrev,0)+1,i1_offset+seq_pos(i1,aln->llrev,0)+1,ann_arr[ap1a[i1]],sim_sym[sim_code],sp0,sp1);
+#else
+	sprintf(tmp_astr, "|%ld:%ld:%cX:%c%c%c",
+		i0_offset+seq_pos(i1,aln->llrev,0)+1,i1_offset+seq_pos(i0,aln->qlrev,0)+1,ann_arr[ap1a[i1]],sim_sym[sim_code],sp0,sp1);
+#endif
+	/* SAFE_STRNCAT(annot_code_s, tmp_astr, n_annot_code_s); */
+	dyn_strcat(annot_code_dyn, tmp_astr);
+      }
+
+      if (s_annot1_arr_p && have_push_features) {
+	display_push_features(annot_stack, annot_code_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
+			      sim_sym[sim_code],
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      i1++;
+      lenc++;
+      ngap_d++;
+      break;
+    case 2:	/* -1 frameshift */
+      /* close previous run */
+      update_code(al_str, al_str_n-strlen(al_str), update_data_p, 2, sim_code,'-','-');
+
+      nfs++;
+      i0 -= 1;
+      not_c++;
+
+      aa1c = ap1[i1];
+      itmp = ppst->pam2[0][ap0[i0]][aa1c];
+      sp0 = sq[aa0[i0]];
+      sp1 = sq[aa1c];
+
+      /* variant annot_p annotations can cause substitution */
+      if (s_annot1_arr_p) {
+	if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end) {
+	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], i1_offset+seq_pos(i1,aln->llrev,0),
+				      i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+				      i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+				      NULL, annot_stack, &have_push_features, &v_delta, 
+				      &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,0);
+	}
+
+	if (sq[aa1c] != sp1) {
+	  t_spa = align_type(itmp, sp0, sp1, 0, NULL, ppst->pam_x_id_sim);
+
+	  comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
+		      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
+		      sq[aa1c], sim_sym[t_spa], NULL, annot_code_dyn,
+		      1,annot_fmt);
+	}
+
+	d1_score += ppst->gshift;
+	d1_score += itmp;
+	prev_match = 1;
+      }
+
+      sim_code = align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
+
+      d1_alen++;
+      if (sim_code == M_IDENT) {d1_ident++;}
+
+      /* check for an annotation */
+      if (have_ann && !(ann_arr[ap1a[i1]] == ' ' || ann_arr[ap1a[i1]]=='[' || ann_arr[ap1a[i1]]==']')) {
+#ifndef TFAST
+	sprintf(tmp_astr, "|X%c:%ld%c%c%ld%c",
+		ann_arr[ap1a[i1]],i0_offset+seq_pos(i0,aln->qlrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i1,aln->llrev,0)+1,sp1);
+#else
+	sprintf(tmp_astr, "|%cX:%ld%c%c%ld%c",
+		ann_arr[ap1a[i1]],i0_offset+seq_pos(i1,aln->llrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i0,aln->qlrev,0)+1,sp1);
+
+#endif
+	/* SAFE_STRNCAT(annot_code_s, tmp_astr, n_annot_code_s); */
+	dyn_strcat(annot_code_dyn, tmp_astr);
+      }
+
+      if (s_annot1_arr_p && have_push_features) {
+	display_push_features(annot_stack, annot_code_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
+			      sim_sym[sim_code],
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      update_code(al_str, al_str_n-strlen(al_str), update_data_p, 3, sim_code,sp0,sp1);
+
+      if (sim_code == M_IDENT) {
+	aln->nident++;
+      }
+      else {
+	aln->nmismatch++;
+      }
+
+      i0 += 3;
+      i1++;
+      lenc++;
+      break;
+    case 3:	/* codon/aa match */
+      aa1c = ap1[i1];
+      itmp = ppst->pam2[0][ap0[i0]][aa1c];
+      sp0 = sq[ap0[i0]];
+      sp1 = sq[aa1c];
+
+      /* variant annot1_p annotations can cause substitution */
+      if (s_annot1_arr_p) {
+	if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end) {
+	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], i1_offset+seq_pos(i1,aln->llrev,0),
+				      i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+				      i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+				      NULL, annot_stack, &have_push_features, &v_delta,
+				      &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,0);
+	}
+
+	if (sq[aa1c] != sp1) {
+	  t_spa = align_type(itmp, sp0, sp1, 0, NULL, ppst->pam_x_id_sim);
+
+	  comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
+		      i1_offset+seq_pos(i1,aln->llrev,0), sp1, sq[aa1c],
+		      sim_sym[t_spa], NULL, annot_code_dyn,1,annot_fmt);
+	}
+
+	d1_score += itmp;
+	prev_match = 1;
+      }
+
+      sim_code = align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
+      d1_alen++;
+      if (sim_code == M_IDENT) {d1_ident++;}
+
+      /* check for an annotation */
+      if (have_ann && !(ann_arr[ap1a[i1]] == ' ' || ann_arr[ap1a[i1]]=='[' || ann_arr[ap1a[i1]]==']')) {
+#ifndef TFAST
+	sprintf(tmp_astr, "|X%c:%ld%c%c%ld%c",
+		ann_arr[ap1a[i1]],i0_offset+seq_pos(i0,aln->qlrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i1,aln->llrev,0)+1,sp1);
+#else
+	sprintf(tmp_astr, "|%cX:%ld%c%c%ld%c",
+		ann_arr[ap1a[i1]],i0_offset+seq_pos(i1,aln->llrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i0,aln->qlrev,0)+1,sp1);
+
+#endif
+	/* SAFE_STRNCAT(annot_code_s, tmp_astr, n_annot_code_s); */
+	dyn_strcat(annot_code_dyn, tmp_astr);
+      }
+
+      if (s_annot1_arr_p && have_push_features) {
+	display_push_features(annot_stack, annot_code_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
+			      sim_sym[sim_code],
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      update_code(al_str, al_str_n-strlen(al_str), update_data_p, 3, sim_code,sp0,sp1);
+
+      i0 += 3;
+      i1++;
+      lenc++;
+      break;
+    case 4:	/* +1 frameshift */
+      /* finish previous run */
+      update_code(al_str, al_str_n-strlen(al_str), update_data_p, 4, sim_code,'-','-');
+      /* mark frameshift */
+
+      nfs++;
+      i0 += 1;
+      not_c++;
+
+      aa1c = ap1[i1];
+      itmp = ppst->pam2[0][ap0[i0]][aa1c];
+      sp0 = sq[ap0[i0]];
+      sp1 = sq[aa1c];
+
+      /* variant annot1_p annotations can cause substitution */
+      if (s_annot1_arr_p) {
+	if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end) {
+	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], i1_offset+seq_pos(i1,aln->llrev,0),
+				      i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+				      i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+				      NULL, annot_stack, &have_push_features, &v_delta,
+				      &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,0);
+	}
+
+	if (sq[aa1c] != sp1) {
+	  t_spa = align_type(itmp, sp0, sp1, 0, NULL, ppst->pam_x_id_sim);
+
+	  comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
+		      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
+		      sq[aa1c], sim_sym[t_spa], NULL,
+		      annot_code_dyn,1,annot_fmt);
+	}
+
+	d1_score += itmp;
+	prev_match = 1;
+      }
+
+      sim_code = align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
+      d1_alen++;
+      if (sim_code == M_IDENT) {d1_ident++;}
+
+      /* check for an annotation */
+      if (have_ann && !(ann_arr[ap1a[i1]] == ' ' || ann_arr[ap1a[i1]]=='[' || ann_arr[ap1a[i1]]==']')) {
+#ifndef TFAST
+	sprintf(tmp_astr, "|%ld:%ld:X%c:%c%c%c",
+		i0_offset+seq_pos(i0,aln->qlrev,0)+1,i1_offset+seq_pos(i1,aln->llrev,0)+1,
+		ann_arr[ap1a[i1]],sim_sym[sim_code],sp0,sp1);
+#else
+	sprintf(tmp_astr, "|%ld:%ld:%cX:%c%c%c",
+		i0_offset+seq_pos(i1,aln->llrev,0)+1,i1_offset+seq_pos(i0,aln->qlrev,0)+1,
+		ann_arr[ap1a[i1]],sim_sym[sim_code],sp0,sp1);
+
+#endif
+	/* SAFE_STRNCAT(annot_code_s, tmp_astr, n_annot_code_s); */
+	dyn_strcat(annot_code_dyn, tmp_astr);
+      }
+	  
+      if (s_annot1_arr_p && have_push_features) {
+	display_push_features(annot_stack, annot_code_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), sp0, 
+			      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
+			      sim_sym[sim_code], 
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      update_code(al_str, al_str_n-strlen(al_str), update_data_p, 3, sim_code,sp0,sp1);
+
+      /* start next match/mismatch run */
+      if (sim_code == M_IDENT) {
+	aln->nident++;
+      }
+      else {
+	aln->nmismatch++;
+      }
+
+      i0 += 3;
+      i1++;
+      lenc++;
+      break;
+    case 5:	/* codon insertion */
+      sim_code = 5;
+      update_code(al_str, al_str_n-strlen(al_str), update_data_p, 5, sim_code,'-','-');
+
+      if (prev_match) d1_score += ppst->gdelval;
+      d1_score += ppst->ggapval;
+      d1_alen++;
+      prev_match = 0;
+
+      i0 += 3;
+      lenc++;
+      ngap_p++;
+      break;
+    }
+  }
+
+  close_update_data(al_str, al_str_n-strlen(al_str), update_data_p);
+
+#ifndef TFAST
+  aln->amax0 = i0;
+  aln->amax1 = i1;
+  aln->ngap_q = ngap_d;
+  aln->ngap_l = ngap_p;
+#else
+  aln->amax1 = i0;
+  aln->amax0 = i1;
+  aln->ngap_q = ngap_p;
+  aln->ngap_l = ngap_d;
+#endif
+  aln->calc_last_set = 1;
+
+  aln->nfs = nfs;
+
+  if (lenc < 0) lenc = 1;
+
+  if (have_ann) {
+    have_push_features = 0;
+    if (s_annot1_arr_p) {
+      /* also check for regions after alignment */
+      while (i1_annot < annot1_p->n_annot && s_annot1_arr_p[i1_annot]->pos < i1_offset+n1) {
+	if (s_annot1_arr_p[i1_annot]->label == '[') break;
+	if (s_annot1_arr_p[i1_annot]->label == ']') {
+	  push_stack(annot_stack, s_annot1_arr_p[i1_annot]);
+	  have_push_features = 1;
+	}
+	i1_annot++;
+      }
+    }
+
+    if (have_push_features) {
+      display_push_features(annot_stack, annot_code_dyn,
+			    i0_offset+a_res->max0-1, sp0,
+			    i1_offset+a_res->max1-1, sp1,
+			    sim_sym[sim_code], 
+			    a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+    }
+  }
+
+  if (annot0_p || s_annot1_arr_p) free_stack(annot_stack);
+  *score_delta = v_delta;
+
+  return lenc;
+}
+
+int calc_id(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    struct a_struct *aln,
+	    struct a_res_str *a_res, 
+	    struct pstruct *ppst,
+	    const struct annot_str *annot0_p,
+	    const struct annot_str *annot1_p,
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+	    struct f_struct *f_str)
+{
+  int i0, i1, i, j;
+  int lenc, not_c, ngap_p, ngap_d, nfs;
+  char sp0, sp1;
+  char tmp_str[MAX_SSTR];
+  unsigned char *sq;
+  const unsigned char *ap0, *ap1;
+  int *rp, *rpmax;
+
+  int aa1c;
+  /* variables for variant changes */
+  struct annot_entry **s_annot1_arr_p;
+  int  itmp, i1_annot, v_delta, v_tmp;
+  long i0_offset, i1_offset;
+
+  long i1_left_end;
+  int d1_score, d1_ident, d1_alen;
+  struct domfeat_link *left_domain_list1;
+
+  *score_delta = 0;
+  i1_left_end = -1;
+  left_domain_list1 = NULL;
+
+  NULL_dyn_string(annot_var_dyn);
+
+  if (ppst->ext_sq_set) {sq = ppst->sqx;}
+  else {sq = ppst->sq;}
+
+#ifndef TFAST	/* FASTX */
+  aln->amin1 = a_res->min0;	/* prot */
+  aln->amin0 = a_res->min1;	/* DNA */
+
+  i0_offset = aln->q_offset;
+  i1_offset = aln->l_offset;
+
+  ap0 = f_str->aa0y;
+  ap1 = aa1;
+#else	/* TFASTX */
+  aln->amin0 = a_res->min0;	/* DNA */
+  aln->amin1 = a_res->min1;	/* prot */
+
+  i1_offset = aln->q_offset;
+  i0_offset = aln->l_offset;
+
+  ap1 = aa0;
+  /* with fx_malign(), there is no guarantee that we have a valid f_str->aa1y, so make one */
+  pre_cons(aa1,n1,aln->frame, f_str);
+  ap0 = f_str->aa1y;
+#endif
+
+  rp = a_res->res;
+  rpmax = &a_res->res[a_res->nres];
+
+  lenc = not_c = aln->nident = aln->nmismatch = aln->nsim = aln->npos = ngap_p = ngap_d = nfs = 0;
+  i0 = a_res->min1;
+  i1 = a_res->min0;
+
+  v_delta = 0;
+  i1_annot = 0;
+
+  s_annot1_arr_p = NULL;
+  if (annot1_p && annot1_p->n_annot > 0) s_annot1_arr_p = annot1_p->s_annot_arr_p;
+
+  while (rp < rpmax) {
+    /*    fprintf(stderr,"%d %d %d (%c) %d (%c)\n"
+	  ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);
+    */
+    switch (*rp++) {
+    case 0: 	/* aa insertion */
+      i1++;
+      lenc++;
+      ngap_d++;
+      break;
+    case 2:	/* -1 frameshift */
+      nfs++;
+      i0 -= 1;
+      not_c++;
+
+      /* then check for v_delta after the frameshift */
+
+      itmp = ppst->pam2[0][ap0[i0]][ap1[i1]];
+
+      sp0 = sq[ap0[i0]];
+      sp1 = sq[ap1[i1]];
+
+      if (s_annot1_arr_p && (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end)) {
+	i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], i1_offset+seq_pos(i1,aln->llrev,0),
+				    i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+				    i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+				    NULL, NULL, NULL, &v_delta,
+				    &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,0);
+
+	if (sq[ap1[i1]] != sp1) {
+	  sprintf(tmp_str,"%c%d%c;",sq[ap1[i1]],i1+1,sp1);
+	  /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+	  dyn_strcat(annot_var_dyn, tmp_str);
+	}
+      }
+
+      align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
+
+      i0 += 3;
+      i1++;
+
+      lenc++;
+      break;
+    case 3:	/* codon/aa match */
+
+      aa1c = ap1[i1];
+      itmp = ppst->pam2[0][ap0[i0]][aa1c];
+      sp0 = sq[ap0[i0]];
+      sp1 = sq[ap1[i1]];
+
+      if (s_annot1_arr_p && (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end)) {
+	i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], i1_offset+seq_pos(i1,aln->llrev,0),
+				    i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+				    i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+				    NULL, NULL, NULL, &v_delta,
+				    &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,0);
+
+	if (sq[ap1[i1]] != sp1) {
+	  sprintf(tmp_str,"%c%d%c;",sq[ap1[i1]],i1+1,sp1);
+	  /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+	  dyn_strcat(annot_var_dyn, tmp_str);
+	}
+      }
+
+      align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
+
+      i0 += 3;
+      i1++;
+
+      lenc++;
+      break;
+    case 4:	/* +1 frameshift */
+      nfs++;
+      i0 += 1;
+      not_c++;
+
+
+      itmp = ppst->pam2[0][ap0[i0]][ap1[i1]];
+
+      sp0 = sq[ap0[i0]];
+      sp1 = sq[ap1[i1]];
+
+      if (s_annot1_arr_p && (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end)) {
+	i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], i1_offset+seq_pos(i1,aln->llrev,0),
+				    i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+				    i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+				    NULL, NULL, NULL, &v_delta,
+				    &d1_score, &d1_ident, &d1_alen, &left_domain_list1, &i1_left_end,0);
+
+	if (sq[ap1[i1]] != sp1) {
+	  sprintf(tmp_str,"%c%d%c;",sq[ap1[i1]],i1+1,sp1);
+	  /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+	  dyn_strcat(annot_var_dyn, tmp_str);
+	}
+      }
+
+      align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
+
+      i0 += 3;
+      i1++;
+      lenc++;
+
+      break;
+    case 5:	/* codon insertion */
+      i0 += 3;
+      lenc++;
+      ngap_p++;
+      break;
+    }
+  }
+
+#ifndef TFAST
+  aln->amax0 = i0;
+  aln->amax1 = i1;
+  aln->ngap_q = ngap_d;
+  aln->ngap_l = ngap_p;
+#else
+  aln->amax1 = i0;
+  aln->amax0 = i1;
+  aln->ngap_q = ngap_p;
+  aln->ngap_l = ngap_d;
+#endif
+  aln->calc_last_set = 1;
+
+  aln->nfs = nfs;
+
+  *score_delta = v_delta;
+
+  if (lenc < 0) lenc = 1;
+/*	now we have the middle, get the right end */
+  return lenc;
+}
diff --git a/src/dropfx2.c b/src/dropfx2.c
new file mode 100644
index 0000000..f7e4566
--- /dev/null
+++ b/src/dropfx2.c
@@ -0,0 +1,3892 @@
+/* $Id: dropfx.c 1280 2014-08-21 00:47:55Z wrp $ */
+/* $Revision: 1280 $  */
+
+/* copyright (c) 1998, 1999, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* implements the fastx algorithm, see:
+
+   W. R. Pearson, T. Wood, Z. Zhang, A W. Miller (1997) "Comparison of
+   DNA sequences with protein sequences" Genomics 46:24-36
+
+   see dropnfa.c for better variable descriptions and comments
+*/
+   
+/* 17-Sept-2008 - modified for multiple non-overlapping alignments */
+
+/* 18-Sept-2006 - remove global variables used for alignment */
+
+/* 22-June-2006 - correct incorrect alignment coordinates generated
+   after pro_dna() on projected DNA region.  
+*/
+
+/* 9-May-2003 -> 3.46 changed lx_band to use projected protein
+   boundary end.  this fixes some addressing issues on MacOSX, and
+   speeds up alignment on very long proteins
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+
+#include "defs.h"
+#include "param.h"
+#define XTERNAL
+#include "upam.h"
+
+/* this must be consistent with upam.h */
+#define MAXHASH 32
+#define NMAP MAXHASH+1
+
+/* globals for fasta */
+#define MAXWINDOW 64
+
+#ifndef MAXSAV
+#define MAXSAV 10
+#endif
+
+#ifndef ALLOCN0
+static char *verstr="3.8 June 2014";
+#else
+static char *verstr="3.8an0 June 2014";
+#endif
+
+struct dstruct		/* diagonal structure for saving current run */
+{			
+   int     score;	/* hash score of current match */
+   int     start;	/* start of current match */
+   int     stop;	/* end of current match */
+   struct savestr *dmax;   /* location in vmax[] where best score data saved */
+};
+
+struct savestr
+{
+   int     score;		/* pam score with segment optimization */
+   int     score0;		/* pam score of best single segment */
+   int     gscore;		/* score from global match */
+   int     dp;			/* diagonal of match */
+   int     start;		/* start of match in lib seq */
+   int     stop;		/* end of match in lib seq */
+};
+
+struct swstr { int H, E;};
+/* struct bdstr { int CC, DD, CP, DP;}; */
+
+#define SGW1 100
+#define SGW2 300
+struct smgl_str {
+  int C[SGW1+1][SGW2+1];
+  int st[SGW1+1][SGW2+1];
+  int D[SGW2+7], I[SGW2+1];
+};
+
+struct update_code_str {
+  int p_op_idx;
+  int p_op_cnt;
+  int btop_enc;
+  int show_code;
+  int cigar_order;
+  int show_ext;
+  char *op_map;
+};
+
+#ifdef TFAST
+static char *ori_code = "-x/=\\+*";	/* FASTX */
+static char *cigar_code = "DXFMRI*";
+#else
+static char *ori_code = "+x/=\\-*";	/* TFASTX */
+static char *cigar_code = "IXFMRD*";
+#endif
+
+unsigned long adler32(unsigned long, const unsigned char *, unsigned int);
+
+void kpsort (struct savestr **v, int n);
+extern void *init_stack(int, int);
+extern void push_stack(void *, void *);
+extern void *pop_stack(void *);
+extern void *free_stack(void *);
+extern struct domfeat_data * init_domfeat_data(const struct annot_str *annot_p);
+
+struct sx_s {int C1, C2, C3, I1, I2, I3, flag; };
+
+struct f_struct {
+  struct dstruct *diag;
+  int ndo;
+  int noff;
+  int hmask;			/* hash constants */
+  int *pamh1;			/* pam based array */
+  int *pamh2;			/* pam based kfact array */
+  int *link, *harr;		/* hash arrays */
+  int kshft;			/* shift width */
+  int nsav;		/* number of saved runs, worst saved run */
+#ifndef TFAST
+  unsigned char *aa0x;		/* contains translated codons 111222333*/
+  unsigned char *aa0y;		/* contains translated codons 123123123*/
+#else
+  unsigned char *aa1x;		/* contains translated codons 111222333 */
+  unsigned char *aa1y;		/* contains translated codons 123123123 */
+  int have_yaa;			/* flag if translation is done */
+#endif
+  struct sx_s *cur;
+  int cur_sp_size;
+  int *waa0;
+  int *waa1;
+  struct smgl_str smgl_s;
+  int *res;
+  int max_res;
+};
+
+#define DROP_INTERN
+#include "drop_func.h"
+
+int shscore(unsigned char *aa0, int n0, int **pam2);
+int saatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
+extern int ELK_to_s(double E_join, int n0, int n1, double Lambda, double K, double H);
+
+int savemax (struct dstruct *dptr, int dpos, 
+	     struct savestr *vmax, struct savestr **lowmax);
+
+int spam (const unsigned char *aa0, const unsigned char *aa1, 
+	  struct savestr *dmax, int **pam2,
+	  struct f_struct *f_str);
+int sconn (struct savestr **v, int n,int cgap, int pgap, struct f_struct *f_str);
+int lx_band(const unsigned char *prot_seq, int len_prot,
+	    const unsigned char *dna_prot_seq, int len_dna_prot,
+	    int **pam_matrix, int gopen, int gext,
+	    int gshift, int start_diag, int width, struct f_struct *f_str);
+
+void fx_walign (const unsigned char *aa0, int n0,
+		const unsigned char *xaa, int n1, unsigned char *yaa,
+		int frame, int max_res,
+		struct pstruct *ppst, 
+		struct f_struct *f_str, 
+		struct a_res_str *a_res,
+		int score_thresh);
+
+struct a_res_str *
+merge_ares_chains(struct a_res_str *cur_ares, 
+		  struct a_res_str *tmpl_ares,
+		  int score_ix, const char *msg);
+
+extern void w_abort (char *p, char *p1);
+
+static struct update_code_str *
+init_update_data(int show_code);
+
+static void
+sprintf_code(char *tmp_str, struct update_code_str *, int op_idx, int op_cnt);
+
+static void 
+update_code(struct dyn_string_str *align_code_dyn,
+	    struct update_code_str *update_data, int op, 
+	    int sim_code, unsigned char sp0, unsigned char sp1);
+
+static void
+close_update_data(struct dyn_string_str *align_code_dyn,
+		  struct update_code_str *update_data);
+
+/* initialize for fasta */
+
+void
+init_work (unsigned char *aa0, int n0, 
+	   struct pstruct *ppst,
+	   struct f_struct **f_arg)
+{
+   int mhv, phv;
+   int hmax;
+   int i0, hv;
+   int pamfact;
+   int btemp;
+   struct f_struct *f_str;
+   int ktup;		/* word size examined */
+   int fact;		/* factor used to scale ktup match value */
+   int kt1;		/* ktup-1 */
+   int lkt;		/* last ktup - initiall kt1, but can be increased
+			   for hsq >= NMAP */
+
+   int maxn0;
+   int *pwaa;
+   int i, j, q;
+   struct swstr *ss, *r_ss;
+   int *waa;
+   int *res;
+   int nsq, ip, *hsq;
+#ifndef TFAST
+   int last_n0, itemp;
+   unsigned char *fd, *fs, *aa0x, *aa0y, *aa0s;
+   int n0x, n0x3;
+#endif
+
+  if (ppst->ext_sq_set) {
+    nsq = ppst->nsqx; ip = 1;
+    hsq = ppst->hsqx;
+  }
+  else {
+    nsq = ppst->nsqx; ip = 0;
+    hsq = ppst->hsq;
+  }
+
+   f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+
+   if (!ppst->param_u.fa.use_E_thresholds) {
+     btemp = 2 * ppst->param_u.fa.bestoff / 3 +
+       n0 / ppst->param_u.fa.bestscale +
+       ppst->param_u.fa.bkfact *
+       (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
+     btemp = min (btemp, ppst->param_u.fa.bestmax);
+     if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
+
+     ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
+     if (ppst->param_u.fa.optcut_set != 1) {
+#ifndef TFAST
+       ppst->param_u.fa.optcut = (btemp*5)/4;
+#else
+       ppst->param_u.fa.optcut = (btemp*4)/3;
+#endif
+     }
+   }
+
+#ifdef OLD_FASTA_GAP
+   ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
+#else
+   ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
+#endif
+
+   ppst->param_u.fa.cgap = max(ppst->param_u.fa.cgap, -ppst->param_u.fa.pgap);
+
+   pamfact = ppst->param_u.fa.pamfact;
+   ktup = ppst->param_u.fa.ktup;
+   fact = ppst->param_u.fa.scfact * ktup;
+
+   if (pamfact == -1)
+      pamfact = 0;
+   else if (pamfact == -2)
+      pamfact = 1;
+
+   for (i0 = 1, mhv = -1; i0 <=ppst->nsq; i0++)
+      if (hsq[i0] < NMAP && hsq[i0] > mhv) mhv = hsq[i0];
+
+   if (mhv <= 0) {
+      fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
+      exit (1);
+   }
+
+   for (f_str->kshft = 0; mhv > 0; mhv /= 2)
+      f_str->kshft++;
+
+/*      kshft = 2;	*/
+   kt1 = ktup - 1;
+   hv = 1;
+   for (i0 = 0; i0 < ktup; i0++) {
+     hv = hv << f_str->kshft;
+   }
+   hmax = hv;
+   f_str->hmask = (hmax >> f_str->kshft) - 1;
+
+   if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate hash array\n");
+     exit (1);
+   }
+   if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate pamh1 array\n");
+     exit (1);
+   }
+   if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate pamh2 array\n");
+     exit (1);
+   }
+   if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate hash link array");
+     exit (1);
+   }
+
+#ifdef TFAST
+   if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
+					     sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
+     exit (1);
+   }
+   f_str->aa1x++;
+
+   if ((f_str->aa1y =(unsigned char *)calloc((size_t)ppst->maxlen+2,
+					     sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa1y array %d\n", ppst->maxlen+2);
+     exit (1);
+   }
+   f_str->aa1y++;
+#else	/* FASTX */
+   maxn0 = n0 + 2;
+   if ((aa0x =(unsigned char *)calloc((size_t)maxn0,sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa0x array %d\n", maxn0);
+     exit (1);
+   }
+   aa0x++;
+   f_str->aa0x = aa0x;
+
+   if ((aa0y =(unsigned char *)calloc((size_t)maxn0,sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa0y array %d\n", maxn0);
+     exit (1);
+   }
+   aa0y++;
+   f_str->aa0y = aa0y;
+
+   last_n0 = 0;
+   for (itemp=0; itemp<3; itemp++) {
+     n0x = saatran(aa0,&aa0x[last_n0],n0,itemp);
+
+     /*
+       for (i=0; i<n0x; i++) {
+       fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);
+       if ((i%60)==59) fprintf(stderr,"\n");
+       }
+       fprintf(stderr,"\n");
+     */
+     last_n0 += n0x+1;
+   }
+   /*
+        fprintf(stderr,"\n");
+   */
+   for (itemp=0, fs=aa0x; itemp <3; itemp++,fs++) {
+     for (fd = &aa0y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
+     *fd=EOSEQ;
+   }
+
+   /* now switch aa0 and aa0x for hashing functions */
+   /* this seems dangerous in threaded code, but only the pointer is changed,
+      not the data itself */
+
+   fs = aa0;
+   aa0 = aa0x;
+   aa0x = fs;
+					 
+#endif
+
+   for (i0 = 0; i0 < hmax; i0++)
+      f_str->harr[i0] = -1;
+   for (i0 = 0; i0 < n0; i0++)
+      f_str->link[i0] = -1;
+
+   /* encode the aa0 array */
+
+   phv = hv = 0;
+   lkt = kt1;
+   for (i0 = 0; i0 < min(lkt,n0); i0++) {
+     if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt=i0+ktup; continue;}
+     hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+     phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
+   }
+
+   for (; i0 < n0; i0++) {
+     if (hsq[aa0[i0]] >= NMAP) {
+       hv=phv=0; 
+       lkt = i0+ktup;
+       /* restart hv, phv calculation */
+       for (; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
+	 if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt = i0+ktup; continue;}
+	 hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+	 phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
+       }
+     }
+     if (i0 >= n0) break;
+     hv = ((hv & f_str->hmask) << f_str->kshft) + hsq[aa0[i0]];
+     f_str->link[i0] = f_str->harr[hv];
+     f_str->harr[hv] = i0;
+     if (pamfact) {
+       f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
+       /* this check should always be true, but just in case */
+       if (hsq[aa0[i0-kt1]]<NMAP)
+	 phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
+     }
+     else f_str->pamh2[hv] = fact * ktup;
+   }
+
+#ifndef TFAST
+   /* done hashing, now switch aa0, aa0x back */
+   fs = aa0;
+   aa0 = aa0x;
+   aa0x = fs;
+#endif
+
+   if (pamfact)
+      for (i0 = 1; i0 < ppst->nsq; i0++)
+	 f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
+   else
+      for (i0 = 1; i0 < ppst->nsq; i0++)
+	 f_str->pamh1[i0] = fact;
+
+   f_str->ndo = 0;	/* used to save time on diagonals with long queries */
+
+#ifndef ALLOCN0
+   if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
+						 sizeof (struct dstruct)))==NULL) {
+      fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
+	      (long) MAXDIAG *sizeof (struct dstruct));
+      exit (1);
+     };
+#else
+   if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,
+					      sizeof (struct dstruct)))==NULL) {
+      fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
+	      (long)n0*sizeof (struct dstruct));
+      exit (1);
+     };
+#endif
+
+
+   if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {
+     fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
+     exit(1);
+   }
+
+   pwaa = waa;
+   for (i=0; i<nsq; i++) {
+     for (j=0;j<n0; j++) {
+       *pwaa = ppst->pam2[ip][i][aa0[j]];
+       pwaa++;
+     }
+   }
+   f_str->waa0 = waa;
+
+   if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {
+     fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
+     exit(1);
+   }
+
+   pwaa = waa;
+   for (i=0; i<nsq; i++) {
+     for (j=0;j<n0; j++) {
+       *pwaa = ppst->pam2[0][i][aa0[j]];
+       pwaa++;
+     }
+   }
+   f_str->waa1 = waa;
+
+#ifndef TFAST
+   maxn0 = max(2*n0,MIN_RES);
+#else
+   /* maxn0 needs to be large enough to accomodate introns
+      for TFASTX.  For all other functions, it will be
+      more reasonable. */
+   maxn0 = max(4*n0,MIN_RES);
+#endif
+   if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
+     fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
+     exit(1);
+   }
+   f_str->res = res;
+   f_str->max_res = maxn0;
+
+   *f_arg = f_str;
+}
+
+
+/* pstring1 is a message to the manager, currently 512 */
+/* pstring2 is the same information, but in a markx==10 format */
+void
+get_param (const struct pstruct *ppst,
+	   char **pstring1, char *pstring2,
+	   struct score_count_s *s_info)
+{
+  char options_str1[128];
+  char options_str2[128];
+#ifndef TFAST
+  char *pg_str="FASTX";
+#else
+  char *pg_str="TFASTX";
+#endif
+
+  if (!ppst->param_u.fa.use_E_thresholds) {
+    sprintf(options_str1,"join: %d (%.3g), opt: %d (%.3g)",
+	    ppst->param_u.fa.cgap, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.optcut, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+    sprintf(options_str2,"pg_join: %d (%.3g)\n; pg_optcut: %d (%.3g)",
+	    ppst->param_u.fa.cgap, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.optcut, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+  }
+  else {
+    sprintf(options_str1,"E-join: %.2g (%.3g), E-opt: %.2g (%.3g)",
+	    ppst->param_u.fa.E_join, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.E_band_opt, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+    sprintf(options_str2,"pg_join_E(): %.2g (%.3g)\n; pg_optcut_E(): %.2g (%.3g)",
+	    ppst->param_u.fa.E_join, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.E_band_opt, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+  }
+
+  if (!ppst->param_u.fa.optflag) {
+    sprintf (pstring1[0], "%s (%s)",pg_str,verstr);
+  }
+  else {
+    sprintf (pstring1[0], "%s (%s) [optimized]",pg_str,verstr);
+  }
+
+#ifdef OLD_FASTA_GAP
+  sprintf (pstring1[1], "%s matrix (%d:%d)%s, gap-pen: %d/%d, shift: %d\n ktup: %d, %s, width: %3d",
+#else
+  sprintf (pstring1[1], "%s matrix (%d:%d)%s, open/ext: %d/%d, shift: %d\n ktup: %d, %s, width: %3d",
+#endif
+	   ppst->pam_name, ppst->pam_h,ppst->pam_l,
+	   (ppst->ext_sq_set) ? "xS":"\0",
+	   ppst->gdelval, ppst->ggapval, ppst->gshift,
+	   ppst->param_u.fa.ktup, options_str1, ppst->param_u.fa.optwid);
+
+  if (ppst->param_u.fa.iniflag) strcat(pstring1[0]," init1");
+
+  if (pstring2 != NULL) {
+#ifdef OLD_FASTA_GAP
+    sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_gap-pen: %d %d\n; pg_ktup: %d\n; %s\n",
+#else
+     sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_open_ext: %d %d\n; pg_ktup: %d\n; %s\n",
+#endif
+	      pg_str,verstr,ppst->pam_name, ppst->pam_h,ppst->pam_l, 
+	      (ppst->ext_sq_set) ? "xS":"\0", ppst->gdelval,
+              ppst->ggapval,ppst->param_u.fa.ktup,options_str2);
+   }
+}
+
+void
+close_work (const unsigned char *aa0, int n0,
+	    struct pstruct *ppst,
+	    struct f_struct **f_arg)
+{
+  struct f_struct *f_str;
+
+  f_str = *f_arg;
+
+  if (f_str != NULL) {
+    if (f_str->cur != NULL) free(f_str->cur);
+#ifndef TFAST
+    f_str->aa0y--;
+    free(f_str->aa0y);
+    f_str->aa0x--;
+    free(f_str->aa0x);
+#else
+    f_str->aa1y--;
+    free(f_str->aa1y);
+    f_str->aa1x--;
+    free(f_str->aa1x);
+#endif
+    free(f_str->res);
+    free(f_str->waa1);
+    free(f_str->waa0);
+    free(f_str->diag);
+    free(f_str->link);
+    free(f_str->pamh2); 
+    free(f_str->pamh1);
+    free(f_str->harr);
+    free(f_str);
+    *f_arg = NULL;
+  }
+}
+
+/* do_fastx() always compares a (possibly translated) protein query
+   sequence to another protein sequence. 
+
+   #ifndef TFAST (e.g. FASTX),
+   then the hash table was built from the translated (amino-acid)
+   version of the query.
+
+   #ifdef TFAST, then aa0 is already a protein sequence
+
+   Args:
+   aa0, n0 query sequence
+   aa1, n1 library sequence
+   yaa translated DNA sequence (from either aa0 or aa1)
+   *ppst -> param struct
+   *f_str -> function structure set in init_work()
+   *rst -> scores (results struct)
+   *hoff -> offset of query in library sequence
+ */
+void do_fastx (const unsigned char *aa0, int n0,
+	       const unsigned char *aa1, int n1,
+	       unsigned char *yaa, 	/* translated 123123... */
+	       const struct pstruct *ppst, struct f_struct *f_str,
+	       struct rstruct *rst, int *hoff, int shuff_flg,
+	       struct score_count_s *s_info)
+{
+   int     nd;		/* diagonal array size */
+   int     lhval;
+   int     kfact;
+   int i;
+   int my_hoff;
+   int c_gap, opt_cut;
+   const unsigned char *aa_prot, *aa_trans_prot;
+   int n_aap, n_taap;
+   register struct dstruct *dptr;
+   struct savestr vmax[MAXSAV];	/* best matches saved for one sequence */
+   struct savestr *vptr[MAXSAV];
+   struct savestr *lowmax;
+   int lowscor;
+   register int tscor;
+
+#ifndef ALLOCN0
+   register struct dstruct *diagp;
+#else
+   register int dpos;
+   int     lposn0;
+#endif
+   struct dstruct *dpmax;
+   register int lpos;
+   int     tpos;
+   struct savestr *vmptr;
+   int     scor, tmp;
+   int     im, ib, nsave;
+   int ktup, kt1, ip, lkt, ktup_sq;
+   const int *hsq;
+   int n0_eff;
+#ifndef TFAST
+   int n0x31, n0x32;
+   n0x31 = (n0-2)/3;
+   n0x32 = n0x31+1+(n0-n0x31-1)/2;
+#else
+   const unsigned char *fs;
+   unsigned char *fd;
+   int n1x31, n1x32, itemp;
+   n1x31 = (n1-2)/3;
+   n1x32 = n1x31+1+(n1-n1x31-1)/2;
+#endif
+
+   if (ppst->ext_sq_set) {
+     ip = 1;
+     hsq = ppst->hsqx;
+   }
+   else {
+     ip = 0;
+     hsq = ppst->hsq;
+   }
+
+   ktup = ppst->param_u.fa.ktup;
+   kt1 = ktup-1;
+   if (ktup <= 3) {
+     ktup_sq = ktup*ktup;
+   }
+   else {
+     ktup_sq = ktup;
+   }
+   if (ktup == 1) ktup_sq *= 2;
+
+   if (n1 < ktup) {
+     rst->score[0] = rst->score[1] = rst->score[2] = 0;
+     return;
+   }
+
+   if (n0+n1+1 >= MAXDIAG) {
+     fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
+     rst->score[0] = rst->score[1] = rst->score[2] = -1;
+     return;
+   }
+
+   if (ppst->param_u.fa.use_E_thresholds) {
+     rst->valid_stat = 0;
+     n0_eff = n0;
+     if (n0 > 120) n0_eff = (n0+2)/3;
+     c_gap = ELK_to_s(ppst->param_u.fa.E_join*ktup_sq, n0_eff, n1, ppst->pLambda, ppst->pK, ppst->pH);
+     opt_cut = ELK_to_s(ppst->param_u.fa.E_band_opt*ktup_sq, n0_eff, n1, ppst->pLambda, ppst->pK, ppst->pH);
+   }
+   else {
+     c_gap = ppst->param_u.fa.cgap;
+     opt_cut = ppst->param_u.fa.optcut;
+     rst->valid_stat = 1;
+   }
+   /* if (shuff_flg) rst->valid_stat = 1; */
+
+   f_str->noff = n0 - 1;
+
+#ifdef ALLOCN0
+   nd = n0;
+#endif
+
+#ifndef ALLOCN0
+   nd = n0 + n1;
+#endif
+
+   dpmax = &f_str->diag[nd];
+   for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
+   {
+      dptr->stop = -1;
+      dptr->dmax = NULL;
+      dptr++->score = 0;
+   }
+
+   for (vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++)
+      vmptr->score = 0;
+   lowmax = vmax;
+   lowscor = 0;
+
+   /* start hashing */
+   lhval = 0;
+   lkt = kt1;
+   for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos<n1; lpos++) {
+     if (hsq[aa1[lpos]]>=NMAP) {
+       lhval = 0; lkt=lpos+ktup; continue;
+#ifdef ALLOCN0		/* reinitialize dptr */
+       dptr = &f_str->diag[lpos % nd];
+       dptr->stop = -1;
+       dptr->dmax = NULL;
+       dptr->score = 0;
+#endif
+     }
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+   }
+
+#ifndef ALLOCN0
+   diagp = &f_str->diag[f_str->noff + lkt];
+   for (; lpos < n1; lpos++, diagp++) {
+     /*     if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; continue;} */
+     if (hsq[aa1[lpos]]>=NMAP) {
+       lpos++ ; diagp++;
+       while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
+       if (lpos >= n1) break;
+       lhval = 0;
+     }
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+     for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+       if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
+#else
+   lposn0 = f_str->noff + lpos;
+   for (; lpos < n1; lpos++, lposn0++) {
+     if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;}
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+     for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+       dpos = lposn0 - tpos;
+       if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) {
+#endif
+	 tscor += ktup;
+	 if ((tscor -= lpos) <= 0) {	/* better to start over */
+	   scor = dptr->score;
+	   if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && lowscor < scor) {
+#ifdef ALLOCN0
+	     lowscor = savemax (dptr, dpos, vmax, &lowmax);
+#else
+	     lowscor = savemax (dptr, dptr - f_str->diag, vmax, &lowmax);
+#endif
+	   }
+	   if ((tscor += scor) >= kfact) {
+	     dptr->score = tscor;
+	     dptr->stop = lpos;
+	   }
+	   else {
+	     dptr->score = kfact;
+	     dptr->start = (dptr->stop = lpos) - kt1;
+	   }
+	 }				/* continue current run in diagonal */
+	 else {
+	   dptr->score += f_str->pamh1[aa0[tpos]];
+	   dptr->stop = lpos;
+	 }
+       }
+       else {
+	 dptr->score = f_str->pamh2[lhval];
+	 dptr->start = (dptr->stop = lpos) - kt1;
+       }
+     }				/* end tpos */
+
+#ifdef ALLOCN0
+      /* reinitialize diag structure */
+   loopl:
+     if ((dptr = &f_str->diag[lpos % nd])->score > lowscor) {
+       lowscor = savemax (dptr, lpos, vmax, &lowmax);
+     }
+     dptr->stop = -1;
+     dptr->dmax = NULL;
+     dptr->score = 0;
+#endif
+   }				/* end lpos */
+
+#ifdef ALLOCN0
+   for (tpos = 0, dpos = f_str->noff + n1 - 1; tpos < n0; tpos++, dpos--) {
+     if ((dptr = &f_str->diag[dpos % nd])->score > lowscor) {
+       lowscor = savemax (dptr, dpos, vmax, &lowmax);
+     }
+   }
+#else
+   for (dptr = f_str->diag; dptr < dpmax;) {
+     if (dptr->score > lowscor) {
+       lowscor = savemax (dptr, dptr - f_str->diag, vmax, &lowmax);
+     }
+     dptr->stop = -1;
+     dptr->dmax = NULL;
+     dptr++->score = 0;
+   }
+   f_str->ndo = nd;
+#endif
+
+/*
+        at this point all of the elements of aa1[lpos]
+        have been searched for elements of aa0[tpos]
+        with the results in diag[dpos]
+*/
+
+   for (nsave = 0, vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++)
+   {
+     /*
+       fprintf(stderr,"0: %4d-%4d  1: %4d-%4d  dp: %d score: %d\n",
+	       f_str->noff+vmptr->start-vmptr->dp,
+	       f_str->noff+vmptr->stop-vmptr->dp,
+	       vmptr->start,vmptr->stop,
+	       vmptr->dp,vmptr->score);
+     */
+      if (vmptr->score > 0) {
+	 vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[ip], f_str);
+	 vptr[nsave++] = vmptr;
+      }
+   }
+
+   if (nsave <= 0) {
+     rst->score[0] = rst->score[1] = rst->score[2] = 0;
+     return;
+   }
+       
+#ifndef TFAST
+   /* FASTX code here to modify the start, stop points for 
+      the three phases of the translated protein sequence
+      */
+   /*
+     fprintf(stderr,"n0x: %d; n0x31:%d; n0x32: %d\n",n0,n0x31,n0x32);
+     for (ib=0; ib<nsave; ib++) {
+       fprintf(stderr,"0: %4d-%4d  1: %4d-%4d  dp: %d score: %d\n",
+	       f_str->noff+vptr[ib]->start-vptr[ib]->dp,
+	       f_str->noff+vptr[ib]->stop-vptr[ib]->dp,
+	       vptr[ib]->start,vptr[ib]->stop,
+	       vptr[ib]->dp,vptr[ib]->score);
+     }
+
+     fprintf(stderr,"---\n");
+   */
+   for (ib=0; ib<nsave; ib++) {
+     if (f_str->noff-vptr[ib]->dp+vptr[ib]->start >= n0x32)
+       vptr[ib]->dp += n0x32;
+     if (f_str->noff-vptr[ib]->dp +vptr[ib]->start >= n0x31)
+       vptr[ib]->dp += n0x31;
+   }
+#else
+   /* TFASTX code here to modify the start, stop points for 
+      the three phases of the translated protein sequence
+      TFASTX modifies library start points, rather than 
+      query start points
+   */
+
+   for (ib=0; ib<nsave; ib++) {
+     if (vptr[ib]->start >= n1x32) {
+       vptr[ib]->start -= n1x32;
+       vptr[ib]->stop -= n1x32;
+       vptr[ib]->dp -= n1x32;
+     }
+     if (vptr[ib]->start >= n1x31) {
+       vptr[ib]->start -= n1x31;
+       vptr[ib]->stop -= n1x31;
+       vptr[ib]->dp -= n1x31;
+     }
+   }
+	    
+#endif /* TFASTX */
+
+   scor = sconn (vptr, nsave, c_gap, 
+		 ppst->param_u.fa.pgap, f_str);
+
+   for (vmptr=vptr[0],ib=1; ib<nsave; ib++)
+     if (vptr[ib]->score > vmptr->score) vmptr=vptr[ib];
+
+/*  kssort (vptr, nsave); */
+
+   rst->score[1] = vmptr->score;	/* best single score - init1*/
+   rst->score[0] = max (scor, vmptr->score);  /* initn */
+   rst->score[2] = rst->score[0];		/* initn */
+
+#ifndef TFAST /* FASTX */
+   *hoff = my_hoff=f_str->noff - vmptr->dp;
+#else
+   *hoff = my_hoff = vmptr->dp-f_str->noff;
+#endif
+
+   /*
+   if (n1 > 5000) {
+     fprintf(stderr," Long n1: %d\n",n1);
+   }
+   */
+
+   s_info->tot_scores++;
+   if (rst->score[0] >= c_gap) {s_info->s_cnt[0]++;}
+   if (ppst->param_u.fa.optflag) {
+#ifdef TFAST
+     if ( /* shuff_flg || */ rst->score[0] > opt_cut) {
+/* generate f_str->aa1y only if it is not there */
+       if ( !f_str->have_yaa ) {
+	 for (fs=aa1,itemp=0; itemp <3; itemp++,fs++) {
+	   for (fd= yaa+itemp; *fs!=EOSEQ; fd += 3, fs++) {*fd = *fs;}
+	   *fd=EOSEQ;
+	 }
+       }
+     }
+     aa_prot = aa0;
+     n_aap = n0;
+     aa_trans_prot= yaa;
+     n_taap = n1;
+#else 
+     aa_prot = aa1;
+     n_aap = n1;
+     aa_trans_prot= yaa;
+     n_taap = n0;
+#endif
+     if ( /* shuff_flg || */ rst->score[0] > opt_cut) {
+       s_info->s_cnt[2]++;
+       rst->valid_stat = 1;
+       rst->score[2] = lx_band(aa_prot,n_aap,aa_trans_prot,n_taap,
+			       ppst->pam2[ip],
+			       -ppst->gdelval,
+			       -ppst->ggapval,-ppst->gshift,
+			       my_hoff-ppst->param_u.fa.optwid/2,ppst->param_u.fa.optwid,
+			       f_str);
+     }
+   }
+}
+
+/* returns rst.score[0] - initn
+   	   rst.score[1] - init1
+	   rst.score[2] - opt
+*/
+
+void do_work (const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      int frame,
+	      const struct pstruct *ppst, struct f_struct *f_str,
+	      int qr_flg, int shuff_flg, struct rstruct *rst,
+	      struct score_count_s *s_info)
+{
+  int hoff;
+  int last_n1, itx, itt, n10, i;
+
+#ifdef TFAST
+  unsigned char *aa1x;
+  /* aa0 has a protein sequence */
+  /* aa1 has a raw DNA sequence */
+
+  itt = frame;
+  last_n1 = 0;
+  aa1x = f_str->aa1x;
+  for (itx= itt*3; itx< itt*3+3; itx++) {
+    n10  = saatran(aa1,&aa1x[last_n1],n1,itx);
+    /*
+    fprintf(stderr," itt %d itx: %d\n",itt,itx);
+    for (i=0; i<n10; i++) {
+      fprintf(stderr,"%c",aa[f_str->aa1x[last_n1+i]]);
+      if ((i%60)==59) fprintf(stderr,"\n");
+    }
+    fprintf(stderr,"\n");
+    */
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+  f_str->have_yaa = 0;
+#endif
+
+  rst->score[0] = rst->score[1] = rst->score[2] = 0;
+  rst->escore = 1.0;
+  rst->segnum = rst->seglen = 1;
+
+#ifndef TFAST
+  do_fastx (f_str->aa0x, n0, aa1, n1, f_str->aa0y, ppst, f_str, rst, &hoff, shuff_flg, s_info);
+#else /* tfastx */
+  do_fastx (aa0, n0, f_str->aa1x, n10, f_str->aa1y, ppst, f_str, rst, &hoff, shuff_flg, s_info);
+#endif
+
+  rst->comp = rst->H = -1.0;
+}
+
+void do_opt (const unsigned char *aa0, int n0,
+	     const unsigned char *aa1, int n1,
+	     int frame,
+	     struct pstruct *ppst,
+	     struct f_struct *f_str,
+	     struct rstruct *rst)
+{
+  int optflag, tscore, hoff;
+  struct score_count_s s_info;
+
+#ifdef TFAST
+  int last_n1, itx, itt, n10, i;
+  unsigned char *xaa;
+
+  /* aa0 has a protein sequence */
+  /* aa1 has a raw DNA sequence */
+
+  itt = frame;
+  last_n1 = 0;
+  xaa = f_str->aa1x;
+  for (itx= itt*3; itx< itt*3+3; itx++) {
+    n10  = saatran(aa1,&xaa[last_n1],n1,itx);
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+  f_str->have_yaa = 0;
+#endif
+
+  optflag = ppst->param_u.fa.optflag;
+  ppst->param_u.fa.optflag = 1;
+
+#ifndef TFAST
+  do_fastx (f_str->aa0x, n0, aa1, n1, f_str->aa0y, ppst, f_str, rst, &hoff, 0, &s_info);
+#else	/* TFASTX */
+  do_fastx (aa0, n0, xaa, n10, f_str->aa1y, ppst, f_str, rst, &hoff, 0, &s_info);
+#endif
+
+  ppst->param_u.fa.optflag = optflag;
+}
+
+int
+savemax (struct dstruct *dptr, int dpos,
+	 struct savestr *vmax, struct savestr **lowmax)
+{
+   struct savestr *vmptr;
+   int i;
+
+/* check to see if this is the continuation of a run that is already saved */
+
+   if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
+	 vmptr->start == dptr->start) {
+      vmptr->stop = dptr->stop;
+      if ((i = dptr->score) <= vmptr->score) return (*lowmax)->score;
+      vmptr->score = i;
+      if (vmptr != (*lowmax)) return (*lowmax)->score;
+   }
+   else {
+     i = (*lowmax)->score = dptr->score;
+     (*lowmax)->dp = dpos;
+     (*lowmax)->start = dptr->start;
+     (*lowmax)->stop = dptr->stop;
+     dptr->dmax = (*lowmax);
+   }
+
+   for (vmptr = vmax; vmptr < vmax+MAXSAV; vmptr++) {
+     if (vmptr->score < i) {
+       i = vmptr->score;
+       *lowmax = vmptr;
+     }
+   }
+   return i;
+}
+
+int spam (const unsigned char *aa0, const unsigned char *aa1,
+	  struct savestr *dmax, int **pam2,
+	  struct f_struct *f_str)
+{
+   int     lpos;
+   int     tot, mtot;
+   struct {
+     int     start, stop, score;
+   } curv, maxv;
+   const unsigned char *aa0p, *aa1p;
+
+   aa1p = &aa1[lpos = dmax->start];
+   aa0p = &aa0[lpos - dmax->dp + f_str->noff];
+   curv.start = lpos;
+
+   tot = curv.score = maxv.score = 0;
+   for (; lpos <= dmax->stop; lpos++) {
+     tot += pam2[*aa0p++][*aa1p++];
+     if (tot > curv.score) {
+       curv.stop = lpos;
+       curv.score = tot;
+      }
+      else if (tot < 0) {
+	if (curv.score > maxv.score) {
+	  maxv.start = curv.start;
+	  maxv.stop = curv.stop;
+	  maxv.score = curv.score;
+	}
+	tot = curv.score = 0;
+	curv.start = lpos+1;
+      }
+   }
+
+   if (curv.score > maxv.score) {
+     maxv.start = curv.start;
+     maxv.stop = curv.stop;
+     maxv.score = curv.score;
+   }
+
+/*	if (maxv.start != dmax->start || maxv.stop != dmax->stop)
+		printf(" new region: %3d %3d %3d %3d\n",maxv.start,
+			dmax->start,maxv.stop,dmax->stop);
+*/
+   dmax->start = maxv.start;
+   dmax->stop = maxv.stop;
+
+   return maxv.score;
+}
+
+#define XFACT 10
+
+int sconn (struct savestr **v, int n, 
+       int cgap, int pgap, struct f_struct *f_str)
+{
+  int     i, si;
+  struct slink {
+    int     score;
+    struct savestr *vp;
+    struct slink *next;
+  }      *start, *sl, *sj, *so, sarr[MAXSAV];
+  int     lstart, tstart, plstop, ptstop;
+
+/*	sort the score left to right in lib pos */
+
+  kpsort (v, n);
+
+  start = NULL;
+
+/*	for the remaining runs, see if they fit */
+
+   for (i = 0, si = 0; i < n; i++)
+   {
+
+/*	if the score is less than the gap penalty, it never helps */
+      if (v[i]->score < cgap)
+	 continue;
+      lstart = v[i]->start;
+      tstart = lstart - v[i]->dp + f_str->noff;
+
+/*	put the run in the group */
+      sarr[si].vp = v[i];
+      sarr[si].score = v[i]->score;
+      sarr[si].next = NULL;
+
+/* 	if it fits, then increase the score */
+      for (sl = start; sl != NULL; sl = sl->next)
+      {
+	 plstop = sl->vp->stop;
+	 ptstop = plstop - sl->vp->dp + f_str->noff;
+	 if (plstop < lstart+XFACT && ptstop < tstart+XFACT) {
+	   sarr[si].score = sl->score + v[i]->score + pgap;
+	   break;
+	 }
+      }
+
+/*	now recalculate where the score fits */
+      if (start == NULL)
+	 start = &sarr[si];
+      else
+	 for (sj = start, so = NULL; sj != NULL; sj = sj->next)
+	 {
+	    if (sarr[si].score > sj->score)
+	    {
+	       sarr[si].next = sj;
+	       if (so != NULL)
+		  so->next = &sarr[si];
+	       else
+		  start = &sarr[si];
+	       break;
+	    }
+	    so = sj;
+	 }
+      si++;
+   }
+
+   if (start != NULL)
+      return (start->score);
+   else
+      return (0);
+}
+
+void
+kssort (v, n)
+struct savestr *v[];
+int     n;
+{
+   int     gap, i, j;
+   struct savestr *tmp;
+
+   for (gap = n / 2; gap > 0; gap /= 2)
+      for (i = gap; i < n; i++)
+	 for (j = i - gap; j >= 0; j -= gap)
+	 {
+	    if (v[j]->score >= v[j + gap]->score)
+	       break;
+	    tmp = v[j];
+	    v[j] = v[j + gap];
+	    v[j + gap] = tmp;
+	 }
+}
+
+void
+kpsort (struct savestr **v, int n) {
+  int gap, i, j, k;
+  int incs[4] = { 21, 7, 3, 1 };
+  struct savestr *tmp;
+  int v_start;
+
+  for ( k = 0; k < 4; k++) {
+    gap = incs[k];
+    for (i = gap; i < n; i++) {
+      tmp = v[i];
+      j = i;
+      v_start = v[i]->start;
+      while (j >= gap && v[j - gap]->start > v_start) {
+	v[j] = v[j - gap];
+	j -= gap;
+      }
+      v[j] = tmp;
+    }
+  }
+}
+
+static void
+init_row(struct sx_s *row, int sp) {
+  int i;
+  for (i = 0; i < sp; i++) {
+      row[i].C1 = row[i].I1 = 0;
+      row[i].C2 = row[i].I2 = 0;
+      row[i].C3 = row[i].I3 = 0;
+      row[i].flag = 0;
+  }
+}
+
+int
+lx_band(const unsigned char *prot_seq,  /* array with protein sequence numbers*/
+	int len_prot,    /* length of prot. seq */
+	const unsigned char *dna_prot_seq, /* translated DNA sequence numbers*/
+	int len_dna_prot,   /* length trans. seq. */
+	int **pam_matrix,   /* scoring matrix */
+	int gopen, int gext, /* gap open, gap extend penalties */
+	int gshift,         /* frame-shift penalty */
+	int start_diag,     /* start diagonal of band */
+	int width,         /* width for band alignment */
+	struct f_struct *f_str)
+{
+  void *ckalloc();
+  int i, j, bd, bd1, x1, sp, p1=0, p2=0, end_prot;
+  int sc, del, best = 0, cd,ci, e1, e2, e3, cd1, cd2, cd3, f, gg;
+  register int *wt;
+  const unsigned char *dp;
+  register struct sx_s *ap, *aq;
+
+  sp = width+7;	
+  gg = gopen+gext;
+  /*  sp = sp/3; */
+  if (f_str->cur == NULL) {
+    f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
+    f_str->cur_sp_size = sp;
+  }
+  else if (f_str->cur_sp_size != sp) {
+    free(f_str->cur);
+    f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
+    f_str->cur_sp_size = sp;
+  }
+
+  init_row(f_str->cur, sp);
+
+  /*
+  if (start_diag %3 !=0) start_diag = start_diag/3-1;
+  else start_diag = start_diag/3;
+  */
+
+  /*
+  if (width % 3 != 0) width = width/3+1;
+  else width = width /3;
+  */
+
+  /* currently, this code assumes that the DNA sequence is longer than the
+     protein sequence. This is not always true.  len_prot in the loop below
+     should be decreased to the projection of the DNA on the protein */
+
+  x1 = start_diag; 		/* x1 = lower bound of DNA */
+
+  
+  end_prot = max(0,-width-start_diag) + (len_dna_prot+5)/3 + width;
+  end_prot = min(end_prot,len_prot);
+
+  /* i counts through protein sequence, x1 through DNAp */
+
+  for (i = max(0, -width-start_diag), x1+=i; i < end_prot; i++, x1++) {
+      bd = min(x1+width, len_dna_prot/3);	/* upper bound of band */
+      bd1 = max(0,x1);	                /* lower bound of band */
+      wt = pam_matrix[prot_seq[i]];
+      del = 1-x1;   /*adjustment*/
+      bd += del; 
+      bd1 +=del;
+
+      ap = &f_str->cur[bd1];
+      aq = ap+1;
+      e1 = f_str->cur[bd1-1].C3;
+      e2 = ap->C1;
+      cd1 = cd2= cd3= 0;
+
+      for (dp = &dna_prot_seq[(bd1-del)*3]; ap < &f_str->cur[bd]; ap++) {
+	  sc = max(max(e1, (e3=ap->C2))-gshift, e2)+wt[*dp++];
+	  if (cd1 > sc) sc = cd1;
+	  cd1 -= gext;
+	  if ((ci = aq->I1) > 0) {
+	      if (sc < ci) { ap->C1 = ci; ap->I1 = ci-gext;}
+	      else {
+		  ap->C1 = sc;
+		  sc -= gg;
+		  if (sc > 0) {
+		      if (sc > best) best =sc;
+		      if (cd1 < sc) cd1 = sc;
+		      ap->I1 = max(ci-gext, sc);
+		  } else ap->I1 = ci-gext;
+	      }
+	  } else {
+	      if (sc <= 0) {
+		  ap->I1 = ap->C1 = 0;
+	      } else {
+		  ap->C1 = sc; sc-=gg;
+		  if (sc >0) {
+		      if (sc > best) best =sc;
+		      if (cd1 < sc) cd1 = sc;
+		      ap->I1 = sc;
+		  } else ap->I1 = 0;
+	      }
+	  }
+	  sc = max(max(e2, (e1=ap->C3))-gshift, e3)+wt[*dp++];
+	  if (cd2 > sc) sc = cd2;
+	  cd2 -= gext;
+	  if ((ci = aq->I2) > 0) {
+	      if (sc < ci) { ap->C2 = ci; ap->I2 = ci-gext;}
+	      else {
+		  ap->C2 = sc;
+		  sc -= gg;
+		  if (sc > 0) {
+		      if (sc > best) best =sc;
+		      if (cd2 < sc) cd2 = sc;
+		      ap->I2 = max(ci-gext, sc);
+		  }
+	      }
+	  } else {
+	      if (sc <= 0) {
+		  ap->I2 = ap->C2 = 0;
+	      } else {
+		  ap->C2 = sc; sc-=gg;
+		  if (sc >0) {
+		      if (sc > best) best =sc;
+		      if (cd2 < sc) cd2 = sc;
+		      ap->I2 = sc;
+		  } else ap->I2 = 0;
+	      }
+	  }
+	  sc = max(max(e3, (e2=aq->C1))-gshift, e1)+wt[*dp++];
+	  if (cd3 > sc) sc = cd3;
+	  cd3 -= gext;
+	  if ((ci = aq++->I3) > 0) {
+	      if (sc < ci) { ap->C3 = ci; ap->I3 = ci-gext;}
+	      else {
+		  ap->C3 = sc;
+		  sc -= gg;
+		  if (sc > 0) {
+		      if (sc > best) best =sc;
+		      if (cd3 < sc) cd3 = sc;
+		      ap->I3 = max(ci-gext, sc);
+		  }
+	      }
+	  } else {
+	      if (sc <= 0) {
+		  ap->I3 = ap->C3 = 0;
+	      } else {
+		  ap->C3 = sc; sc-=gg;
+		  if (sc >0) {
+		      if (sc > best) best =sc;
+		      if (cd3 < sc) cd3 = sc;
+		      ap->I3 = sc;
+		  } else ap->I3 = 0;
+	      }
+	  }
+      }
+  }
+  /*  printf("The best score is %d\n", best); */
+  return best+gopen+gext;
+}
+
+/* ckalloc - allocate space; check for success */
+void *ckalloc(size_t amount)
+{
+  void *p;
+
+  if ((p = (void *)malloc( (size_t)amount)) == NULL)
+    w_abort("Ran out of memory.","");
+  return(p);
+}
+
+/* calculate the 100% identical score */
+int
+shscore(unsigned char *aa0, int n0, int **pam2)
+{
+  int i, sum;
+  for (i=0,sum=0; i<n0; i++)
+    sum += pam2[aa0[i]][aa0[i]];
+  return sum;
+}
+
+#define WIDTH 60
+
+/* code above is to convert sequence into numbers */
+
+typedef struct mat *match_ptr;
+
+typedef struct mat {
+	int i, j, l;
+	match_ptr next;
+} match_node;
+
+typedef struct {
+	int i,j;
+} state;
+
+typedef state *state_ptr;
+
+typedef struct st_s { int C, I, D;} *st_ptr;
+
+/* static st_ptr up=NULL, down, tp; */
+/* static int *st_up; */
+/* static int gop, gext, shift; */
+
+void *ckalloc(size_t);
+static match_ptr small_global(), global();
+static int local_align(), find_best();
+static void init_row2(),  init_ROW();
+
+int
+pro_dna(const unsigned char *prot_seq,	/* array with prot. seq. numbers*/
+	int len_prot,	 		/* length of prot. seq */
+	const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/
+	int len_dna_prot,		/* length trans. seq. */
+	int **pam_matrix,		/* scoring matrix */
+	int gopen, int gex,		/* gap open, gap extend penalties */
+	int gshift,        		/* frame-shift penalty */
+	struct smgl_str *smgl_sp,
+	int max_res,
+	struct a_res_str *a_res)	/* alignment info */
+{
+  match_ptr align, ap, aq;
+  int x, y, ex, ey, i, score;
+  int *alignment;
+  st_ptr up, down, tp;
+
+  /* these globals removed */
+  /*   gext = gex; gop = gopen; shift = gshift; */
+
+  /* for fastx (but not tfastx), these could be moved into init_work(),
+     and done only once */
+
+  up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+  down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+  tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+
+  /*local alignment find the best local alignment x (prot) and y (DNA)
+    is the starting position of the best local alignment
+    and ex (prot) ey (DNA) is the ending position */
+  score= local_align(&x, &y, &ex, &ey, pam_matrix,
+		     gopen, gex, gshift,
+		     dna_prot_seq, len_dna_prot,
+		     prot_seq, len_prot, up, down);
+
+  /* this is very strange, since local_align initialized up, down */
+  up += 3; down += 3; tp += 3;
+
+  /* x, y - start in prot, dna_prot */
+  a_res->min0 = x;	/* prot */
+  a_res->max0 = ex;	/* prot */
+
+  a_res->min1 = y;	/* DNA-prot */
+  a_res->max1 = ey;	/* DNA-prot */
+
+  align = global(x, y, ex, ey, pam_matrix, gopen, gex, gshift, 
+		 dna_prot_seq, prot_seq, 0, 0, &up, &down, &tp,
+		 smgl_sp);
+
+  alignment = a_res->res;
+
+  /* from earlier version */
+  /* alignment[0] = x; */ /* start of alignment in prot */
+  /* alignment[1] = y; */ /* start of alignment in DNA */
+
+  for (ap = align, i= 0; ap; i++) {
+    if (i < max_res) {alignment[i] = ap->l;}
+    aq = ap->next; free(ap); ap = aq;
+  }
+
+  if (i >= max_res) {
+    fprintf(stderr," alignment truncated: %d/%d\n", max_res,i);
+  }
+
+  up = &up[-3]; down = &down[-3]; tp = &tp[-3];
+  free(up); free(tp); free(down);
+  /* free(st_up); */ /*  moved into local align */
+
+  a_res->nres = i;	/* i has the length of the alignment */
+  return score;
+}
+
+static void
+swap(void **a, void **b) {
+  void *t;
+
+  t = *a;
+  *a = *b;
+  *b = t;
+}
+
+/*
+   local alignment find the best local alignment x and y
+   is the starting position of the best local alignment
+   and ex ey is the ending position 
+*/
+static int
+local_align(int *x, int *y, int *ex, int *ey,
+	    int **wgts, int gop, int gext, int shift,
+	    const unsigned char *dnap, int ld,
+	    const unsigned char *pro,  int lp,
+	    st_ptr up, st_ptr down) {
+
+  int i, j,  score, x1,x2,x3,x4, e1, e2 = 0, e3,
+    sc, del,  e, best = 0, *wt, cd, ci;
+  state_ptr cur_st, last_st, cur_i_st;
+  st_ptr cur, last;
+  const unsigned char *dp;
+  int *st_up, *cur_d_st;
+
+/*      
+   Array rowiC store the best scores of alignment ending at a position
+   Arrays rowiD, and rowiI store the best scores of alignment ending
+                 at a position with a deletion or insrtion
+   Arrays sti stores the starting position of the best alignment whose
+              score stored in the corresponding row array.
+   The program stores two rows to complete the computation, same is
+        for the global alignment routine.
+*/
+
+  /* for fastx (but not tfastx), this could be moved into init_work(),
+     and done only once */
+  st_up = (int *) ckalloc(sizeof(int)*(ld+10));
+  init_row2(st_up, ld+5);
+
+  ld += 2;
+  init_ROW(up, ld+1);	/* set to zero */
+  init_ROW(down, ld+1);	/* set to zero */
+
+
+  cur = up+1;
+  last = down+1; 
+
+  /* for fastx (but not tfastx), these could be moved into init_work(),
+     and done only once */
+  cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+  last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+  cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+
+  cur_d_st = st_up; 
+
+  dp = dnap-2;
+  for (i = 0; i < lp; i++) {
+    wt = &wgts[pro[i]][0];
+    for (j = 0; j < 2; j++) {
+      cur_st[j].i = i+1;
+      cur_st[j].j = j+1;
+    }
+    for (j = 2; j < ld; j++) {
+      score = wt[dp[j]];
+      del = -1;
+      if (j >= 3) {
+	sc = -score;
+	e3 = e2-shift; e2 = last[j-3].C;
+	e1 = last[j-2].C-shift; 
+	if (e1 > sc) {sc = e1; del = 2;}
+	if (e2 > sc) {sc = e2; del = 3;}
+	if (e3 > sc) {sc = e3; del = 4;} 
+      } else {
+	sc = e2  = 0;
+	if (sc < -score) sc=-score;
+	else del = 3;
+      }
+      sc += score;
+      if (sc < (ci=last[j].I)) {
+	sc = ci; del = 0;
+      }
+      if (sc < (cd=cur[j].D)) {
+	sc = cd; del = 5;
+      }
+      cur[j].C = sc;
+      e = sc  - gop;
+      if (e > cd) {
+	cur[j+3].D = e-gext;
+	cur_d_st[j+3] = 3;
+      } else {
+	cur[j+3].D = cd-gext;
+	cur_d_st[j+3] = cur_d_st[j]+3;
+      }
+      switch(del) {
+      case 5:
+	e1 = cur_d_st[j];
+	cur_st[j].i = cur_st[j-e1].i;
+	cur_st[j].j = cur_st[j-e1].j;
+	break;
+      case 0:
+	cur_st[j].i = cur_i_st[j].i;
+	cur_st[j].j = cur_i_st[j].j;
+	break;
+      case 2:
+      case 3:
+      case 4:
+	if (i) {
+	  if (j-del >= 0) {
+	    cur_st[j].i = last_st[j-del].i;
+	    cur_st[j].j = last_st[j-del].j;
+	  } else {
+	    cur_st[j].i = i;
+	    cur_st[j].j = 0;
+	  }
+	} else {
+	  cur_st[j].i = 0;
+	  cur_st[j].j = max(0, j-del+1);
+	}
+	break;
+      case -1:
+	cur_st[j].i = i+1;
+	cur_st[j].j = j+1;
+	break;
+      }
+      if (e > ci) {
+	cur[j].I  = e -gext;
+	cur_i_st[j].i = cur_st[j].i;
+	cur_i_st[j].j = cur_st[j].j;
+      } else {
+	cur[j].I  = ci- gext;
+      }
+      if (sc > best) {
+	x1 = cur_st[j].i;
+	x2 = cur_st[j].j;
+	best =sc;
+	x3 = i;
+	x4 = j;
+      }
+    }
+    swap((void **)&last, (void **)&cur);
+    swap((void **)&cur_st, (void **)&last_st);
+  }
+  /*	printf("The best score is %d\n", best); */
+  *x = x1; *y = x2; *ex = x3; *ey = x4;
+  free(cur_st); free(last_st); free(cur_i_st); 
+  free(st_up);
+  return best;
+}
+
+/* 
+   Both global_up and global_down do linear space score only global 
+   alignments on subsequence pro[x]...pro[ex], and dna[y]...dna[ey].
+   global_up does the algorithm upwards, from row x towards row y.
+   global_down does the algorithm downwards, from row y towards x.
+*/
+
+static void
+global_up(st_ptr *row1, st_ptr *row2,
+	  int x, int y, int ex, int ey, 
+	  int **wgts, int gop, int gext, int shift,
+	  unsigned char *dnap,
+	  unsigned char *pro,
+	  int N) {
+  int i, j, k, sc, e, e1, e2, e3, t, ci, cd, score, *wt;
+  st_ptr cur, last;
+
+  cur = *row1; last = *row2;
+
+  sc = -gop-gext;
+
+  for (j = 1; j <= ey-y+1; j++) {
+    if (j % 3 == 0) {last[j].C = sc; sc -= gext; last[j].I = sc-gop;}
+    else { last[j].I = last[j].C = -10000;}
+    cur[j].I = -10000;
+  }  
+
+  last[0].C = 0; cur[0].D = cur[1].D = cur[2].D = -10000;
+  last[0].D = last[1].D = last[2].D = -10000;
+
+  if (N) last[0].I = -gext;
+  else last[0].I = -gop-gext;
+
+  for (i = 1; i <= ex-x+1; i++) {
+    wt = &wgts[pro[i+x-1]][0]; e2 = last[0].C; e1 = -10000;
+    for (j = 0; j <= ey-y+1; j++) {
+      t = j+y;
+      sc = -10000; 
+      if (t < 3) score = -10000;
+      else score = wt[dnap[t-3]]; 
+      if (j < 4) {
+	if (j == 3) sc = e2;
+	else if (j == 2) sc = e2-shift;
+      } 
+      else {
+	e3 = e2; e2 = e1;
+	e1 = last[j-2].C;
+	sc = max(max(e1, e3)-shift, e2);
+      }
+      sc += score;
+      sc = max(sc, max(ci=last[j].I, cd = cur[j].D));
+      cur[j].C = sc;
+      cur[j+3].D = max(cd, sc-gop)-gext;
+      cur[j].I = max(ci, sc-gop)-gext;
+    }
+    swap((void **)&last, (void **)&cur);
+  }
+  for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
+  if (*row1 != last) swap((void **)row1, (void **)row2);
+}
+
+static void
+global_down(st_ptr *row1, st_ptr *row2,
+	    int x, int y, int ex, int ey,
+	    int **wgts, int gop, int gext, int shift,
+	    unsigned char *dnap, unsigned char *pro,
+	    int N) {
+  int i, j, k, sc, del, *tmp, e,  t, e1,e2,e3, ci,cd, s1, s2, s3, *wt;
+  st_ptr cur, last;
+
+  cur = (*row1); last = *row2;
+
+  sc = -gop-gext;
+
+  for (j = ey-y; j >= 0; j--) {
+    if ((ey-y+1-j) % 3) {last[j].C = sc; sc-=gext; last[j].I = sc-gop;}
+    else  last[j].I =  last[j].C = -10000;
+  } 
+
+  last[ey-y+1].C = 0;
+  cur[ey-y+1].D = cur[ey-y].D = cur[ey-y-1].D = -10000;
+  last[ey-y+1].D = last[ey-y].D = last[ey-y-1].D = -10000;
+
+  if (N) last[ey-y+1].I = -gext;
+  else last[ey-y+1].I = -gop-gext;
+
+  for (i = ex-x; i >= 0; i--) {
+    wt = &wgts[pro[i+x]][0]; e2 = last[ey-y+1].C; 
+    e1 = s2 = s3 = -10000; 
+    for (j = ey-y+1; j >= 0; j--) {
+      t = j+y;
+      s1 = wt[dnap[t-1]];
+      sc = -10000;
+      if (t+3 > ey) {
+	if (t+2==ey) sc = e2+s2;
+	else if (t+1==ey) sc = e2-shift+s1;
+      } else {
+	e3 = e2; e2 = e1;
+	e1 = last[j+2].C;
+	sc = max(max(e1+s1, e3+s3)-shift, e2+s2);
+      }
+      if (sc < (cd= cur[j].D)) {
+	sc = cd; 
+	cur[j-3].D = cd-gext;
+      } else cur[j-3].D =max(cd, sc-gop)-gext;
+      if (sc < (ci= last[j].I)) {
+	sc = ci; del = 0;
+	cur[j].I = ci - gext;
+      } else cur[j].I = max(sc-gop,ci)-gext;
+      cur[j].C = sc;
+      s3 = s2; s2 = s1;
+    }
+    swap((void **)&last, (void **)&cur);
+  }
+  for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
+  if (*row1 != last) swap((void **)row1, (void **)row2);
+}
+
+static void
+init_row2(int *row, int ld) {
+  int i;
+  for (i = 0; i < ld; i++) row[i] = 0;
+}
+
+static void
+init_ROW(st_ptr row, int ld) {
+  int i;
+  for (i = 0; i < ld; i++) row[i].I = row[i].D = row[i].C = 0;
+}
+
+static match_ptr
+combine(match_ptr x1, match_ptr x2, int st) {
+  match_ptr x;
+
+  if (x1 == NULL) return x2;
+  for (x = x1; x->next; x = x->next);
+  x->next = x2;
+  if (st) {
+    for (x = x2; x; x = x->next) {
+      x->j++;
+      if (x->l == 3 || x->l == 4) break;
+    }
+    x->l--;
+  }
+  return x1;
+}
+
+/*
+   global use the two upwards and downwards score only linear
+   space global alignment subroutine to recursively build the
+   alignment.
+*/
+
+match_ptr
+global(int x, int y, int ex, int ey, 
+       int **wgts, int gop, int gext, int shift,
+       unsigned char *dnap, 
+       unsigned char *pro,
+       int N1, int N2,
+       st_ptr *up_stp, st_ptr *dn_stp, st_ptr *tp_stp,
+       struct smgl_str *smgl_sp
+       )
+{
+  int m;
+  int m1, m2;
+  match_ptr x1, x2, mm1, mm2;
+  /*printf("%d %d %d %d\n", x,y, ex, ey);*/
+  /*
+    if the space required is limited, we can do a quadratic space
+    algorithm to find the alignment.
+  */
+  if (ex <= x) {
+    mm1  = NULL; mm2= NULL;
+    for (m = y+3; m <= ey; m+=3) {
+      x1 = (match_ptr) ckalloc(sizeof(match_node));
+      x1->l = 5; x1->next = mm1; 
+      if (mm1== NULL) mm2 = x1;
+      mm1 = x1;
+    }
+    if (ex == x) {
+      if ((ey-y) % 3 != 0) {
+	x1 = (match_ptr) ckalloc(sizeof(match_node));
+	x1->l = ((ey-y) % 3) +1; x1->next = NULL;
+	if (mm2) mm2->next = x1;
+	else mm1 = x1;
+      } else {
+	if (mm2) mm2->l = 4;
+      }
+    }
+    return mm1;
+  }
+  if (ey <= y) {
+    mm1  = NULL;
+    for (m = x; m <= ex; m++) {
+      x1 = (match_ptr) ckalloc(sizeof(match_node));
+      x1->l = 0; x1->next = mm1; mm1 = x1;
+    }
+    return mm1;
+  }
+  if (ex -x < SGW1-1 && ey-y < SGW2-1) 
+    return small_global(x,y,ex,ey,
+			wgts, gop, gext, shift,
+			dnap, pro, N1, N2, smgl_sp);
+  m = (x+ex)/2;
+  /*     
+	 Do the score only global alignment from row x to row m, m is
+	 the middle row of x and ex. Store the information of row m in
+	 upC, upD, and upI.
+  */
+  global_up(up_stp, tp_stp, x, y, m, ey, 
+	    wgts, gop, gext, shift,
+	    dnap, pro, N1);
+
+  /* 
+     Do the score only global alignment downwards from row ex
+     to row m+1, store information of row m+1 in downC downI and downD
+  */
+  global_down(dn_stp, tp_stp,  m+1, y, ex, ey, 
+	      wgts, gop, gext, shift,
+	      dnap, pro, N2);
+
+  /*
+    Use these information of row m and m+1, to find the crossing
+    point of the best alignment with the middle row. The crossing
+    point is given by m1 and m2. Then we recursively call global
+    itself to compute alignments in two smaller regions found by
+    the crossing point and combine the two alignments to form a
+    whole alignment. Return that alignment.
+  */
+  if (find_best(*up_stp, *dn_stp, &m1, &m2, ey-y+1, y, gop)) {
+    x1 = global(x, y, m, m1, wgts, gop, gext, shift, dnap, pro, N1, 0,
+		up_stp, dn_stp, tp_stp, smgl_sp);
+    x2 = global(m+1, m2, ex, ey, wgts, gop, gext, shift, dnap, pro, 0, N2,
+		up_stp, dn_stp, tp_stp, smgl_sp);
+    if (m1 == m2) x1 = combine(x1,x2,1);
+    else x1 = combine(x1, x2,0);
+  } else {
+    x1 = global(x, y, m-1, m1, wgts, gop, gext, shift,  dnap, pro, N1, 1,
+		up_stp, dn_stp, tp_stp, smgl_sp);
+    x2 = global(m+2, m2, ex, ey, wgts, gop, gext, shift, dnap, pro, 1, N2,
+		up_stp, dn_stp, tp_stp, smgl_sp);
+    mm1 = (match_ptr) ckalloc(sizeof(match_node));
+    mm1->i = m; mm1->l = 0; mm1->j = m1;
+    mm2 = (match_ptr) ckalloc(sizeof(match_node));
+    mm2->i = m+1; mm2->l = 0; mm2->j = m1;
+    mm1->next = mm2; mm2->next = x2;
+    x1 = combine(x1, mm1, 0);
+  }
+  return x1;
+}
+
+static int
+find_best(st_ptr up, st_ptr down,
+	  int *m1, int *m2,
+	  int ld, int y, int gop) {
+  int i, best = -100000, j = 0, s1, s2, s3, s4, st;
+  up++;
+  for (i = 1; i < ld; i++) {
+    s2 = up[i-1].C + down[i].C;
+    s4 = up[i-1].I + down[i].I + gop;
+    if (best < s2) {
+      best = s2; j = i; st = 1;
+    }
+    if (best < s4) {
+      best = s4; j = i; st = 0;
+    }
+  }
+  *m1 = j-1+y;
+  *m2 = j+y;
+  /*printf("find best score =%d\n", best);*/
+  return st;
+} 
+
+/*
+   An alignment is represented as a linked list whose element
+   is of type match_node. Each element represent an edge in the
+   path of the alignment graph. The fields of match_node are
+   l ---  gives the type of the edge.
+   i, j --- give the end position.
+*/
+
+static match_ptr
+small_global(int x, int y, int ex, int ey,
+	     int **wgts, int gop, int gext, int shift,
+	     unsigned char *dnap, unsigned char *pro,
+	     int N1, int N2, struct smgl_str *smgl_sp) {
+
+  /* int C[SGW1+1][SGW2+1], st[SGW1+1][SGW2+1], D[SGW2+7], I[SGW2+1]; */
+
+  int i, j, e, sc, score, del, k, t, *wt, ci, cd;
+  int *cI, *cD, *cC, *lC, *cst, e2, e3, e4;
+  match_ptr mp, first;
+
+  /*printf("small_global %d %d %d %d\n", x, y, ex, ey);*/
+  sc = -gop-gext; smgl_sp->C[0][0] = 0;
+
+  cI = smgl_sp->I;
+  if (N1) cI[0] = -gext; else cI[0] = sc;
+  for (j = 1; j <= ey-y+1; j++) {
+    if (j % 3== 0) {
+      smgl_sp->C[0][j] = sc;
+      sc -= gext;
+      cI[j] = sc-gop;
+    } 
+    else {cI[j] = smgl_sp->C[0][j] = -10000;}
+    smgl_sp->st[0][j] = 5;
+  }
+
+  lC = &smgl_sp->C[0][0];
+  cD = smgl_sp->D; cD[0] = cD[1] = cD[2] = -10000;
+
+  for (i = 1; i <= ex-x+1; i++) {
+    cC = &smgl_sp->C[i][0];	
+    wt = &wgts[pro[i+x-1]][0]; cst = &smgl_sp->st[i][0];
+    for (j = 0; j <=ey-y+1; j++) {
+      sc = -10000; del = 0;
+      ci = cI[j];
+      cd= cD[j];
+      t = j+y;
+      if (t < 3) score = -10000;
+      else score = wt[dnap[t-3]];
+      if (j >= 4) {
+	e2 = lC[j-2]-shift; sc = lC[j-3]; e4 = lC[j-4]-shift;
+	del = 3;
+	if (e2 > sc) { sc = e2; del = 2;}
+	if (e4 >= sc) { sc = e4; del = 4;}
+      } else {
+	if (j ==3) {sc= lC[0]; del = 3;}
+	else if (j == 2) {sc = lC[0]-shift; del = 2;}
+      }
+      sc = sc+score;
+      if (sc < ci) {
+	sc = ci; del = 0; 
+      }
+      if (sc <= cd) {
+	sc = cd;
+	del = 5;
+      }
+      cC[j] = sc;
+      sc -= gop;
+      if (sc < cd) {
+	del += 10;
+	cD[j+3] = cd - gext;
+      } else cD[j+3] = sc -gext;
+      if (sc < ci) {
+	del += 20;
+	cI[j] = ci-gext;
+      } else cI[j] = sc-gext;
+      *(cst++) = del;
+    }
+    lC = cC;
+  }
+  if (N2 && ci +gop > cC[ey-y+1]) {
+    smgl_sp->st[ex-x+1][ey-y+1] = 0;
+    /*printf("small score = %d\n", ci+gop);*/
+  } /*else printf("small score =%d\n", cC[ey-y+1]);*/
+  first = NULL; e = 1;
+  for (i = ex+1, j = ey+1; i > x || j > y; i--) {
+    mp = (match_ptr) ckalloc(sizeof(match_node));
+    mp->i = i-1;
+    k  = (t=smgl_sp->st[i-x][j-y])%10;
+    mp->j = j-1;
+    if (e == 5 && (t/10)%2 == 1) k = 5;
+    if (e == 0 && (t/20)== 1) k = 0;
+    if (k == 5) { j -= 3; i++; e=5;}
+    else {j -= k;if (k==0) e= 0; else e = 1;}
+    mp->l = k;
+    mp->next = first;
+    first = mp;
+  }
+
+  /*	for (i = 0; i <= ex-x; i++) {
+	for (j = 0; j <= ey-y; j++) 
+	printf("%d ", C[i][j]);
+	printf("\n");
+	}
+  */
+  return first;	
+}
+
+#define XTERNAL
+#include "upam.h"
+
+extern void
+display_alig(int *a, unsigned char *dna, unsigned char * pro, int length, int ld)
+{
+  int len = 0, i, j, x, y, lines, k;
+  char line1[100], line2[100], line3[100],
+    tmp[10] = "         ";
+  unsigned char *dna1, c1, c2, c3, *st;
+
+  dna1 = ckalloc((size_t)ld);
+  for (st = dna, i = 0; i < ld; i++, st++) dna1[i] = NCBIstdaa[*st];
+  line1[0] = line2[0] = line3[0] = '\0'; x= a[0]; y = a[1]-1;
+ 
+  for (len = 0, j = 2, lines = 0; j < length; j++) {
+    i = a[j];
+    /*printf("%d %d %d\n", i, len, b->j);*/
+    if (i > 0 && i < 5) tmp[i-2] = NCBIstdaa[pro[x++]];
+    if (i == 5) {
+      i = 3; tmp[0] = tmp[1] = tmp[2] = '-';
+      if (a[j+1] == 2) tmp[2] = ' ';
+    }
+    if (i > 0) {
+      strncpy(&line1[len], (const char *)&dna1[y], i); y+=i;
+    } else {line1[len] = '-'; i = 1; tmp[0] = NCBIstdaa[pro[x++]];}
+    strncpy(&line2[len], tmp, i);
+    for (k = 0; k < i; k++) {
+      if (tmp[k] != ' ' && tmp[k] != '-') {
+	if (k == 2) tmp[k] = '\\';
+	else if (k == 1) tmp[k] = '|';
+	else tmp[k] = '/';
+      } else tmp[k] = ' ';
+    }
+    if (i == 1) tmp[0] = ' ';
+    strncpy(&line3[len], tmp, i); 
+    tmp[0] = tmp[1] =  tmp[2] = ' ';
+    len += i;
+    line1[len] = line2[len] =line3[len]  = '\0'; 
+    if (len >= WIDTH) {
+      printf("\n%5d", WIDTH*lines++);
+      for (k = 10; k <= WIDTH; k+=10) 
+	printf("    .    :");
+      if (k-5 < WIDTH) printf("    .");
+      c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
+      line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
+      printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
+      line1[WIDTH] = c1; line2[WIDTH] = c2; line3[WIDTH] = c3;
+      strncpy(line1, &line1[WIDTH], sizeof(line1)-1);
+      strncpy(line2, &line2[WIDTH], sizeof(line2)-1);
+      strncpy(line3, &line3[WIDTH], sizeof(line3)-1);
+      len = len - WIDTH;
+    }
+  }
+  printf("\n%5d", WIDTH*lines);
+  for (k = 10; k < len; k+=10) 
+    printf("    .    :");
+  if (k-5 < len) printf("    .");
+  printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
+}
+
+
+/* alignment store the operation that align the protein and dna sequence.
+   The code of the number in the array is as follows:
+   0:     delete of an amino acid.
+   2:     frame shift, 2 nucleotides match with an amino acid
+   3:     match an  amino acid with a codon
+   4:     the other type of frame shift
+   5:     delete of a codon
+   
+
+   Also the first two element of the array stores the starting point 
+   in the protein and dna sequences in the local alignment.
+
+   Display looks like where WIDTH is assumed to be divisible by 10.
+
+    0    .    :    .    :    .    :    .    :    .    :    .    :
+     CCTATGATACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTG
+      P  M  I  L  G  Y  W  N  V  R  G  L  T  H  P  I  R  M  L  L 
+
+   60    .    :    .    :    .    :    .    :    .    :    .    :
+     GAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTT
+      E  Y  T  D  S  S  Y  D  E  K  R  Y  T  M  G  D  A  P  D  F 
+*/
+
+
+/* fatal - print message and die */
+void fatal(msg)
+char *msg;
+{
+	fprintf(stderr, "%s\n", msg);
+	exit(1);
+}
+
+void
+fx_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *xaa, int n1, unsigned char *yaa,
+	   int frame, int max_res,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   struct a_res_str *a_res,
+	   int score_thresh
+	   )
+{
+  unsigned char *local_xaa, *local_yaa;
+  int score;
+  int i, last_n1, itemp, n10;
+  int hoff, l_min, l_max, n_nt, n_aa, w_fact;
+  int score_ix, window;
+  int aa1_min_s, aa1_max_s;
+  unsigned char *fs, *fd;
+  struct score_count_s s_info;
+  int itx;
+  
+  memset(&s_info,0,sizeof(s_info));
+
+  score_ix = ppst->score_ix;
+
+  /*  check for large differences in sequence length - if there is a
+      large difference, use do_fastx() to get the offset. */
+
+#ifndef TFAST	/* FASTX */
+  n_nt = n0;
+  n_aa = n1;
+#else		/* TFASTX */
+  n_nt = n1;
+  n_aa = n0;
+#endif
+
+  do_fastx(aa0, n0, xaa, n1, yaa, ppst, f_str, &a_res->rst, &hoff,1, &s_info);
+
+  if (a_res->rst.score[score_ix] <= score_thresh) {
+    a_res->sw_score = 0;
+    a_res->n1 = n1;
+    return;
+  }
+
+  /* now we will do an alignment, but we need to be certain to do the
+     alignment in the region mapped by hoff to include the
+     high-scoring region */
+
+  /* if initn > 2 * init1, use wider window */
+  if (a_res->rst.score[0] > 2 * a_res->rst.score[1]) {w_fact = 4;} 
+  else w_fact = 2;
+
+  /* Here we need to use different strategies depending on whether we
+     have DNA or protein.  For a DNA query (protein library, FASTX), the
+     strategy is simple -- NULL bound the library protein sequence and
+     do the alignment.  For a protein query (TFASTX), things are more complex.
+     Moreover, the mapping must be calculated differently in each case.
+  */
+
+
+#ifndef TFAST /* map onto the protein (aa1) sequence */  
+  window = min(n1, ppst->param_u.fa.optwid);
+  l_min = max(0, -window - hoff);
+  l_max = min(n1, n0-hoff+window);
+
+  local_yaa = yaa;
+  local_xaa = (unsigned char *)xaa;
+  if (l_min > 0 || l_max < n1 -1 ) {
+    local_xaa = (unsigned char *)calloc(l_max - l_min+2,sizeof(char));
+    local_xaa++;
+    memcpy(local_xaa, xaa+l_min, l_max - l_min);
+  }
+/*
+  if (l_min > 0) {
+    aa1_min_s = xaa[l_min-1];
+    local_xaa[l_min-1] = '\0';
+  }
+  if (l_max < n1 - 1) {
+    aa1_max_s = xaa[l_max];
+    xaa[l_max] = '\0';
+  }
+*/
+#else
+  window = min(n0, ppst->param_u.fa.optwid);
+  l_min = max(0,(hoff-window)*3);
+  l_max = min((hoff+window+n0)*3,n_nt);
+  local_xaa = (unsigned char *)xaa;
+  local_yaa = yaa;
+  if (l_min > 0 || l_max <n_nt -1) {
+    local_yaa = (unsigned char*)calloc(l_max - l_min + 2,sizeof(unsigned char));
+    local_yaa++;
+    memcpy(local_yaa, yaa+l_min, l_max - l_min);
+  }
+
+  /*
+  if (l_min > 0)
+    aa1_min_s = yaa[l_min-1];
+    yaa[l_min-1] = '\0';
+  }
+  if (l_max < n_nt-1) {
+
+    aa1_max_s = yaa[l_max];
+    yaa[l_max] = '\0';
+  }
+  */
+#endif
+
+  if (a_res->rst.score[ppst->score_ix] <= score_thresh) {
+    a_res->sw_score = 0;
+    a_res->n1 = n1;
+    return;
+  }
+
+  /* pro_dna always compares protein to DNA, and returns protein
+     coordinates in a_res->min0,max0 */
+
+  a_res->sw_score = 
+    pro_dna(
+#ifndef TFAST	/* FASTX */
+	    local_xaa, l_max - l_min, 	 /* true protein is in aa1/xaa */
+	    yaa, n_nt,
+#else 		/* TFASTX */
+	    aa0, n0, 		/* true protein is in aa0 */
+	    local_yaa, l_max - l_min,
+#endif
+	    ppst->pam2[0],
+#ifdef OLD_FASTA_GAP
+	    -(ppst->gdelval - ppst->ggapval),
+#else
+	    -ppst->gdelval,
+#endif
+	    -ppst->ggapval,
+	    -ppst->gshift,
+	    &f_str->smgl_s,
+	    max_res, a_res);
+
+  /*
+  if (a_res->rst.score[0] < a_res->sw_score) {
+    a_res->rst.score[0] = a_res->sw_score;
+    a_res->rst.score[ppst->score_ix] = a_res->sw_score;
+  }
+  */
+
+#ifndef TFAST
+  if (l_min > 0 || l_max < n1-1) free(--local_xaa);
+/*
+  if (l_min > 0) {
+    xaa[l_min-1] = aa1_min_s;
+  }
+  if (l_max < n1 - 1) {
+    xaa[l_max] = aa1_max_s;
+  }
+*/
+  a_res->min0 += l_min;
+  a_res->max0 += l_min;
+#else
+  if (l_min > 0 || l_max < n1-1) free(--local_yaa);
+  /*
+  if (l_min > 0) {
+    yaa[l_min-1] = aa1_min_s;
+  }
+  if (l_max < n1 - 1) {
+    yaa[l_max] = aa1_max_s;
+  }
+  */
+  a_res->n1 = n1;
+  a_res->min1 += l_min;
+  a_res->max1 += l_min;
+#endif
+
+}
+
+/*
+  fx_malign is a recursive interface to fx_walign() that is called
+  from do_walign(). fx_malign() first does an alignment, then checks
+  to see if the score is greater than the threshold. If so, it tries
+  doing a left and right alignment.
+
+  In this implementation, the translation required for f_str->aa1x and
+  f_str->aa1y is done at each recursive level. A better implementation
+  would do the translation once, and then be more sophisticated about
+  the boundaries on f_str->aa1x,y.  This is challenging, however,
+  because there is no easy way to subset aa1x [111112222233333],
+  though it is possible to subset aa1y cleanly.  The current solution
+  is to re-generate xaa from yaa.
+
+  21-Nov-2010 -- like do_walign(), fx_malign() uses a const xaa, to
+  ensure that threads do not interfere with each other.  If a
+  sub-range is needed, a new sequence is produced.
+
+ */
+struct a_res_str *
+fx_malign (const unsigned char *aa0, int n0,
+	   const unsigned char *xaa, int n1, unsigned char *yaa,
+	   int frame, 
+	   int score_thresh, int max_res,
+	   struct pstruct *ppst,
+	   struct f_struct *f_str,
+	   struct a_res_str *cur_ares,
+	   int first_align)
+{
+  struct a_res_str *tmpl_ares, *tmpr_ares, *this_ares;
+  struct a_res_str *mtmpl_ares, *mtmpr_ares, *mt_next;
+  unsigned char *my_xaa;
+  unsigned char *local_xaa, *local_yaa;
+  int nxyaa;
+  int hoff, score_ix;
+  int min_alen;
+  struct rstruct rst;
+  /*   char save_res; */
+  int iphase, i;
+  unsigned char *fd;
+  int max_sub_score = -1;
+
+  score_ix = ppst->score_ix;
+
+  /* now we need alignment storage - get it */
+  if ((cur_ares->res = (int *)calloc((size_t)max_res,sizeof(int)))==NULL) {
+    fprintf(stderr," *** cannot allocate alignment results array %d\n",max_res);
+    exit(1);
+  }
+
+  cur_ares->next = NULL;
+    
+#ifdef TFAST  
+  min_alen = min(n0,MIN_LOCAL_LEN)*3;	/* n0 in aa, min_alen in nt */
+#else 
+  min_alen = min(n0/3,MIN_LOCAL_LEN);	/* no in nt, min_alen in aa */
+#endif
+
+#ifdef TFAST
+  /* convert yaa to xaa -- cannot use *fs to stop because subset
+     does not have '\0' in all three frames */
+  my_xaa = (unsigned char *)calloc(n1+2,sizeof(unsigned char));
+  my_xaa++;
+  for (fd=my_xaa, iphase = 0; iphase < 3; iphase++) {
+    for (i=iphase; i<n1; i+=3,fd++) *fd = yaa[i];
+  }
+  *fd=EOSEQ;
+#else
+  my_xaa = (unsigned char *)xaa;
+#endif
+
+  fx_walign(aa0, n0, my_xaa, n1, yaa, frame, max_res, 
+	    ppst, f_str, cur_ares,(first_align ? 1 : score_thresh));
+
+  /* in cur_ares, min0,max0 are always protein, min1,max1 are always
+     DNA, but n0 could be protein or DNA, depending on
+     FASTX/TFASTX */
+
+  if (!ppst->do_rep || cur_ares->rst.score[ppst->score_ix] <= score_thresh) {
+#ifdef TFAST
+    free(--my_xaa);
+#endif
+    return cur_ares;
+  }
+
+  /* we are going to do a recursive edit, so we need a local copy of
+     xaa (fastx) or yaa (tfastx) */
+
+#ifdef TFAST	/* TFASTX, n1 is nt */
+    nxyaa = cur_ares->min1;
+#else		/* FASTX n1 is aa */
+    nxyaa = cur_ares->min0;
+#endif
+
+  if (nxyaa >= min_alen) { /* try the left  */
+    /* allocate a_res */	
+    tmpl_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
+
+#ifdef TFAST	/* TFASTX, no xaa */
+    local_xaa = my_xaa;	/* my_xaa is calloc'ed for TFAST */
+    local_yaa = (unsigned char *)calloc(nxyaa+2, sizeof(unsigned char));
+    local_yaa++;  /* skip the initial zero */
+    memcpy(local_yaa, yaa, nxyaa);
+/*
+    save_res = yaa[cur_ares->min1]; 
+    yaa[cur_ares->min1] = '\0';
+*/
+#else
+    local_yaa = yaa;
+    local_xaa = (unsigned char *)calloc(nxyaa+2, sizeof(unsigned char));
+    local_xaa++;  /* skip the initial zero */
+    memcpy(local_xaa, xaa, nxyaa);
+/*
+    save_res = xaa[cur_ares->min0];
+    xaa[cur_ares->min0] = '\0';
+*/
+#endif
+    tmpl_ares = fx_malign(aa0, n0, local_xaa, nxyaa,
+			  local_yaa,
+			  frame, score_thresh, max_res, 
+			  ppst, f_str, tmpl_ares, 0);
+
+#ifdef TFAST
+    free(--local_yaa);	/* local_yaa, allocated above */
+#else
+    free(--local_xaa);	/* FASTX - local_xaa allocated above */
+#endif
+
+    if (tmpl_ares->rst.score[ppst->score_ix] > score_thresh) {
+      max_sub_score = tmpl_ares->rst.score[ppst->score_ix];
+    }
+    else {
+      if (tmpl_ares->res) free(tmpl_ares->res);
+      free(tmpl_ares);
+      tmpl_ares = NULL;
+    }
+  }
+  else {tmpl_ares = NULL;}
+
+  /* do the right */
+#ifdef TFAST	/* TFASTX - n0 is aa, n1 nt */
+  nxyaa = n1 - cur_ares->max1 - 1;
+#else		/* FASTX - n1 is aa, n0 nt */
+  /* this is counter-intuitive, because n1 is the length of the DNA
+     sequence in both cases */
+  nxyaa = n1 - cur_ares->max0 - 1;
+#endif
+
+  if (nxyaa >= min_alen) {    /* try the right  */      
+    /* allocate a_res */	
+    tmpr_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
+
+    /* find boundaries */
+#ifdef TFAST	/* TFASTX, no xaa */
+    local_xaa = my_xaa;
+    local_yaa = (unsigned char *)calloc(nxyaa+2, sizeof(unsigned char));
+    local_yaa++;  /* skip the initial zero */
+    memcpy(local_yaa, yaa+cur_ares->max1+1,nxyaa);
+/*
+    save_res = yaa[cur_ares->max1];
+    yaa[cur_ares->max1] = '\0';
+*/
+#else
+    local_yaa = yaa;
+    local_xaa = (unsigned char *)calloc(nxyaa+2, sizeof(unsigned char));
+    local_xaa++;  /* skip the initial zero */
+    memcpy(local_xaa, xaa+cur_ares->max0+1,nxyaa);
+/*
+    save_res = xaa[cur_ares->max0];
+    xaa[cur_ares->max0] = '\0';
+*/
+#endif
+    tmpr_ares = fx_malign(aa0, n0, 
+			  local_xaa, nxyaa, local_yaa,
+			  frame,
+			  score_thresh, max_res,
+			  ppst, f_str, tmpr_ares,0);
+#ifdef TFAST	/* TFASTX, no xaa */
+    free(--local_yaa);
+#else
+    free(--local_xaa);
+#endif
+/*    yaa[cur_ares->max1] = save_res;*/
+
+    if (tmpr_ares->rst.score[ppst->score_ix] > score_thresh) {
+      /* adjust the left boundary */
+      for (this_ares = tmpr_ares; this_ares; this_ares = this_ares->next) {
+#ifdef TFAST
+	this_ares->min1 += cur_ares->max1+1;
+	this_ares->max1 += cur_ares->max1+1;
+#else
+	this_ares->min0 += cur_ares->max0+1;
+	this_ares->max0 += cur_ares->max0+1;
+#endif
+      }
+
+      if (tmpr_ares->rst.score[ppst->score_ix] > max_sub_score) {
+	max_sub_score = tmpr_ares->rst.score[ppst->score_ix];
+      }
+    }
+    else {
+      if (tmpr_ares->res) free(tmpr_ares->res);
+      free(tmpr_ares);
+      tmpr_ares = NULL;
+    }
+  }
+  else {tmpr_ares = NULL;}
+
+#ifdef TFAST
+    free(--my_xaa);
+#endif
+
+  if (max_sub_score <= score_thresh) {
+    if (tmpl_ares) {
+      if (tmpl_ares->res) free(tmpl_ares->res);
+      free(tmpl_ares);
+    }
+    if (tmpr_ares) {
+      if (tmpr_ares->res) free(tmpr_ares->res);
+      free(tmpr_ares);
+    }
+    return cur_ares;
+  }
+
+  cur_ares = merge_ares_chains(cur_ares, tmpl_ares, score_ix, "left");
+  cur_ares = merge_ares_chains(cur_ares, tmpr_ares, score_ix, "right");
+
+  return cur_ares;
+}
+
+/* do_walign() can be called with aa0,n0 as nt (FASTX) or 
+   aa0,n0 as aa (TFASTX).  if aa0 is nt, then f_str->aa0x,y have the
+   translations already.  if aa0 is aa, then f_str->aa1x,y must be
+   generated.
+
+   This is the last time that aa0 can be nt or aa; in all lower
+   functions (fx_malign, do_fastx, fx_walign), both aa0, n0 and aa1,
+   n1 are amino acids; though one or the other may be translated.
+
+   In the lower functions, yaa can be aa0y (FASTX) or aa1y (TFASTX).
+   If it is aa1y, there may be no translation available.
+
+   21-Nov-2010 With fasta-36.3.1, do_walign() uses const aa0, aa1.  If aa1 needs
+   modification for recursive alignment, a copy is made.
+*/
+
+struct a_res_str *
+do_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, int repeat_thresh,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   int *have_ares)
+{
+  int hoff, use_E_thresholds_s, optflag_s, optcut_s, optwid_s, score;
+  struct a_res_str *a_res, *tmp_a_res;
+  int a_res_index;
+  int last_n1, itx, itt, n10, iphase;
+  unsigned char *xaa, *fs, *fd;
+  struct rstruct rst;
+#ifdef DEBUG
+  unsigned long adler32_crc;
+#endif
+
+  *have_ares = 0x3;	/* set 0x2 bit to indicate local copy */
+
+  if ((a_res = (struct a_res_str *)calloc(1, sizeof(struct a_res_str)))==NULL) {
+    fprintf(stderr," [do_walign] Cannot allocate a_res");
+    return NULL;
+  }
+
+#ifdef DEBUG
+  adler32_crc = adler32(1L,aa1,n1);
+#endif
+
+  use_E_thresholds_s = ppst->param_u.fa.use_E_thresholds;
+  optflag_s = ppst->param_u.fa.optflag;
+  optcut_s = ppst->param_u.fa.optcut;
+  optwid_s = ppst->param_u.fa.optwid;
+  ppst->param_u.fa.use_E_thresholds = 0;
+  ppst->param_u.fa.optflag = 1;
+  ppst->param_u.fa.optcut = 0;
+  if (!ppst->param_u.fa.optwid_set) {
+    ppst->param_u.fa.optwid *= 2;
+  }
+
+#ifndef TFAST	/* FASTX */
+  a_res = fx_malign(f_str->aa0x, n0, aa1, n1, f_str->aa0y, frame,
+		    repeat_thresh, f_str->max_res,
+		    ppst, f_str, a_res, 1);
+#else		/* TFASTX */
+  /* aa0 has a protein sequence */
+  /* aa1 has a raw DNA sequence */
+
+  itt = frame;
+  last_n1 = 0;
+  xaa = f_str->aa1x;
+  for (itx= itt*3; itx< itt*3+3; itx++) {
+    n10  = saatran(aa1,&xaa[last_n1],n1,itx);
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+  /* create aa1y from xaa */
+  for (fs=xaa,iphase=0; iphase <3; iphase++,fs++) {
+    for (fd= &f_str->aa1y[iphase]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
+    *fd=EOSEQ;
+  }
+  f_str->have_yaa = 1;
+
+  a_res = fx_malign(aa0, n0, xaa, n10, f_str->aa1y, frame,
+		    repeat_thresh, f_str->max_res,
+		    ppst, f_str, a_res, 1);
+#endif
+  /*
+  if (a_res->res[0] != 3) {
+    fprintf(stderr, "*** alignment does not start with match: %d\n",a_res->res[0]);
+  }
+  */
+
+#ifdef DEBUG
+  if (adler32(1L,aa1,n1) != adler32_crc) {
+    fprintf(stderr,"[dropfx.c/do_walign] adler32_crc mismatch n1: %d\n",n1);
+  }
+#endif
+
+  a_res_index = 0;
+  for (tmp_a_res=a_res; tmp_a_res; tmp_a_res = tmp_a_res->next) {
+    tmp_a_res->index = a_res_index++;
+  }
+
+  ppst->param_u.fa.use_E_thresholds = use_E_thresholds_s;
+  ppst->param_u.fa.optflag = optflag_s;
+  ppst->param_u.fa.optcut = optcut_s;
+  ppst->param_u.fa.optwid = optwid_s;
+  return a_res;
+}
+
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void 
+aln_func_vals(int frame, struct a_struct *aln) {
+
+#ifndef TFAST
+  aln->llrev = 0;
+  aln->llfact = 1;
+  aln->llmult = 1;
+  aln->qlfact = 3;
+  aln->frame = frame;
+  if (frame > 0) aln->qlrev = 1;
+  else aln->qlrev = 0;
+  aln->llrev = 0;
+#else	/* TFASTX */
+  aln->qlfact = 1;
+  aln->qlrev = 0;
+  aln->llfact = 3;
+  aln->llmult = 1;
+  aln->frame = frame;
+  if (frame > 0) aln->llrev = 1;
+  else aln->llrev = 0;
+  aln->qlrev = 0;
+#endif	/* TFASTX */
+}
+
+/* this function is required for programs like tfastx/y/s that do
+   translations on DNA sequences and save them in f_str->aa1??
+*/
+
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+#ifdef TFAST
+  int i, last_n1, itemp, n10;
+  unsigned char *fs, *fd;
+  int itx;
+
+  last_n1 = 0;
+  for (itx=3*frame; itx<3+3*frame; itx++) {
+    n10 = saatran(aa1,&f_str->aa1x[last_n1],n1,itx);
+/*
+  for (i=0; i<n10; i++) {
+  fprintf(stderr,"%c",ppst->sq[aa10[last_n1+i]]);
+  if ((i%60)==59) fprintf(stderr,"\n");
+  }
+  fprintf(stderr,"\n");
+*/
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+
+  /* create aa1y from aa1x */
+  for (fs=f_str->aa1x,itemp=0; itemp <3; itemp++,fs++) {
+    for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
+    *fd=EOSEQ;
+  }
+  f_str->have_yaa = 1;
+#endif
+}
+
+/*
+   Alignment: store the operation that align the protein and dna sequence.
+   The code of the number in the array is as follows:
+   0:     delete of an amino acid.
+   2:     frame shift, 2 nucleotides match with an amino acid
+   3:     match an  amino acid with a codon
+   4:     the other type of frame shift
+   5:     delete of a codon
+
+   The first two elements of the array stores the starting point 
+   in the protein and dna sequences in the local alignment.
+*/
+
+#include "a_mark.h"
+
+extern int align_type(int score, char sp0, char sp1, int nt_align, struct a_struct *aln, int pam_x_id_sim);
+
+extern void
+process_annot_match(int *itmp, int *pam2aa0v, 
+		    long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+		    struct annot_entry *annot_arr_p, int n_annots, char **ann_comment,
+		    void *annot_stack, int *have_push_features, int *v_delta,
+		    int *d_score_p, int *d_ident_p, int *d_alen_p, int *d_gaplen_p,
+		    struct domfeat_data **left_domain_head_p,
+		    struct domfeat_data *left_domain_p,
+		    long *left_end_p, int init_score);
+
+extern int
+next_annot_match(int *itmp, int *pam2aa0v, 
+		 long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+		 int i_annot, int n_annot, struct annot_entry **annot_arr, char **ann_comment,
+		 void *annot_stack, int *have_push_features, int *v_delta,
+		 int *d_score_p, int *d_ident_p, int *d_alen_p, int *d_gaplen_p,
+		 struct domfeat_data **left_domain_head_p,
+		 struct domfeat_data *left_domain_p,
+		 long *left_domain_end, int init_score);
+
+extern void
+close_annot_match (int ia, void *annot_stack, int *have_push_features,
+		   int *d_score_p, int *d_ident_p, int *d_alen_p, int *d_gaplen_p,
+		   struct domfeat_data **left_domain_p,
+		   long *left_end_p, int init_score);
+
+extern void
+comment_var(long i0, char sp0, long i1, char sp1, char o_sp1, char sim_char,
+	    const char *ann_comment, struct dyn_string_str *annot_var_dyn,
+	    int target, int d_type);
+
+void
+display_push_features(void *annot_stack, struct dyn_string_str *annot_var_dyn,
+		      long i0_pos, char sp0, long i1_pos, char sp1, char sym, 
+		      int score, double comp, int sw_score, int n0, int n1,
+		      void *pstat_void, int d_type);
+
+#define DP_FULL_FMT 1	/* Region: score: bits: id: ... */
+#define Q_TARGET 0
+#define L_TARGET 1
+
+int seq_pos(int pos, int rev, int off);
+
+/* values of calc_func_mode */
+#define CALC_CONS 1
+#define CALC_CODE 2
+#define CALC_ID   3
+#define CALC_ID_DOM   4
+
+/* add_annot_code: adds annotation codes to struct dyn_string_str ann_code_dyn */
+void
+add_annot_code(int have_ann, char sp0, char sp1, 
+	       char ann_aa1_i1,
+	       long q_off_pos, long l_off_pos, char sim_sym_code,
+	       struct dyn_string_str *ann_code_dyn)
+{
+  char ann_ch0, ann_ch1;
+  char tmp_astr[MAX_STR];
+
+  ann_ch0 = ann_ch1 = '\0';
+
+  if (have_ann && ann_aa1_i1 != ' ') {
+    ann_ch0 = 'X';
+    ann_ch1 = ann_aa1_i1;
+  }
+  else {return;}
+
+  if (!(ann_ch1 == '['  || ann_ch1 == ']')) {
+    sprintf(tmp_astr, "|%c%c:%ld%c%c%ld%c",
+	    ann_ch0,ann_ch1, q_off_pos+1,sp0,
+	    sim_sym_code, l_off_pos+1,sp1);
+    dyn_strcat(ann_code_dyn, tmp_astr);
+  }
+}
+
+/* universal alignment code builder for calc_cons_a(), calc_code(), and calc_id() */
+/* see cal_cons2.c/calc_cons_u() for strategy */
+
+int
+calc_cons_u( /* inputs */
+	    const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    struct a_res_str *a_res,	/* alignment encoding */
+	    struct pstruct *ppst,
+	    struct f_struct *f_str,
+	    void *pstat_void,
+	    /* annotation stuff */
+	    const unsigned char *ann_arr,
+	    const unsigned char *aa0a, const struct annot_str *annot0_p,
+	    const unsigned char *aa1a, const struct annot_str *annot1_p,
+	    int calc_func_mode, 	/* CALC_CONS, CALC_CODE, CALC_ID */
+	    int display_code, 	/* used only by CALC_CODE */
+	    /* outputs */
+	    int *nc,
+	    char *seqc0, char *seqc1, char *seqca, int *cumm_seq_score,
+	    char *seqc0a, char *seqc1a,
+	    struct a_struct *aln,
+	    int *score_delta, 
+	    struct dyn_string_str *annot_var_dyn,
+	    struct dyn_string_str *align_code_dyn)
+{
+  int i0, i1, i, j;
+  int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
+  int *i_spa;
+  char *sp0_p, *sp0a_p, *sp1_p, *sp1a_p, *spa_p, t_spa;
+  char sp0_c, sp1_c, spa_c;	/* used for CALC_ID, CALC_CODE */
+  char sp0a_c, sp1a_c;		/* used for CALC_CODE */
+
+  struct update_code_str *update_data_p;
+
+  const unsigned char *sq;
+  const unsigned char *ap0, *ap1;
+  const unsigned char *ap1a;	/* ap1 always points to protein, and
+				   only protein has annotations */
+  const struct annot_str *annotp_p;	/* protein annotations from annot_str */
+  int comment_target;
+
+  int *rp, *rpmax;
+  int have_ann;
+
+  /* variables for variant changes/region scores */
+  char tmp_str[MAX_LSTR];
+  void *annot_stack;
+  int have_push_features, prev_match, *have_push_features_p;
+
+  char *sim_sym = aln_map_sym[MX_ACC];
+  struct annot_entry **s_annotp_arr_p;
+  int  i1_annot, v_delta, v_tmp;
+  long i0_offset, i1_offset;
+
+  long i1_left_end;
+  int show_code, annot_fmt, start_flag;
+
+  int d1_score, d1_ident, d1_alen, d1_gaplen;
+  struct domfeat_data *left_domain_list1, *left_domain_head1;
+
+  char *ann_comment;
+
+  *score_delta = 0;
+  d1_score = d1_ident = d1_alen = d1_gaplen = 0;
+  i1_left_end = -1;
+  left_domain_head1 = left_domain_list1 = NULL;
+
+  NULL_dyn_string(annot_var_dyn);
+
+  if (ppst->ext_sq_set) {sq = ppst->sqx;}
+  else {sq = ppst->sq;}
+
+#ifndef TFAST	/* FASTX */
+  comment_target = 1;
+  aln->amin1 = aln->smin1 = a_res->min0;	/* prot */
+  aln->amin0 = aln->smin0 = a_res->min1;	/* DNA */
+
+  i0_offset = aln->q_offset;
+  i1_offset = aln->l_offset;
+
+  ap0 = f_str->aa0y;	/* translated DNA */
+  ap1 = aa1;		/* protein */
+
+  ap1a = aa1a;
+  annotp_p = annot1_p;
+
+  if (calc_func_mode == CALC_CONS) {
+    have_ann = (seqc0a !=NULL && aa1a != NULL);
+    sp0_p = seqc0;	/* translated DNA */
+    sp1_p = seqc1;	/* protein */
+    spa_p = seqca;
+    sp1a_p = seqc1a;	/* protein library can have annotation */
+    sp0a_p = seqc0a;	/* sp0a is always ' ' - no translated annotation */
+    annot_fmt = DP_FULL_FMT;
+  }
+  else if (calc_func_mode == CALC_ID || calc_func_mode == CALC_ID_DOM) {
+    /* does not require aa0a/aa1a, only for variants */
+    have_ann = ((annotp_p && annotp_p->n_annot > 0) || (annot0_p && annot0_p->n_annot > 0));
+    sp0_p = &sp0_c;
+    sp1_p = &sp1_c;
+    spa_p = &spa_c;
+    sp0a_p = &sp0a_c;
+    sp1a_p = &sp1a_c;
+    annot_fmt = 3;
+  }
+  else if (calc_func_mode == CALC_CODE) {
+    spa_p = &spa_c;
+    sp0_p = &sp0_c;
+    sp1_p = &sp1_c;
+    sp0a_p = &sp0a_c;
+    sp1a_p = &sp1a_c;
+
+    show_code = (display_code & (SHOW_CODE_MASK+SHOW_CODE_EXT));	/* see defs.h; SHOW_CODE_ALIGN=2,_CIGAR=3,_CIGAR_EXT=4 */
+    annot_fmt = 2;
+    if (display_code & SHOW_ANNOT_FULL) {
+      annot_fmt = 1;
+    }
+    /* have_ann encodes number of sequences annotated */
+    have_ann = 0;
+    if ((annotp_p && annotp_p->n_annot > 0) || (aa1a != NULL)) { have_ann |= 2;}
+    update_data_p = init_update_data(show_code);
+  }
+  else {
+    fprintf(stderr,"*** error [%s:%d] --- cal_cons_u() invalid calc_func_mode: %d\n",
+	    __FILE__, __LINE__, calc_func_mode);
+    exit(1);
+  }
+
+#else		/* TFASTX */
+  comment_target = 0;
+  aln->amin0 = aln->smin0 = a_res->min0;	/* DNA */
+  aln->amin1 = aln->smin1 = a_res->min1;	/* prot */
+
+  i1_offset = aln->q_offset;
+  i0_offset = aln->l_offset;
+
+  ap1 = aa0;		/* aa0 is protein */
+  /* with fx_malign(), there is no guarantee that we have a valid f_str->aa1y, so make one */
+  pre_cons(aa1,n1,aln->frame, f_str);
+  ap0 = f_str->aa1y;	/* aa1 is DNA */
+  ap1a = aa0a;
+  annotp_p = annot0_p;
+
+  have_ann = (seqc0a !=NULL && aa0a != NULL);
+  if (calc_func_mode == CALC_CONS) {
+    sp1_p = seqc0;		/* sp1 points to protein query */
+    sp0_p = seqc1;		/* sp0 points to DNA */
+    spa_p = seqca;
+    sp1a_p = seqc0a;	/* protein query can have annotation */
+    sp0a_p = seqc1a;	/* sp0a is always ' ' - no translated annotation */
+    annot_fmt = DP_FULL_FMT;
+  }
+  else if (calc_func_mode == CALC_ID || calc_func_mode == CALC_ID_DOM) {
+    have_ann = (annotp_p && annotp_p->n_annot > 0);
+    spa_p = &spa_c;
+    sp0_p = &sp0_c;
+    sp1_p = &sp1_c;
+
+    sp0a_p = &sp0a_c;
+    sp1a_p = &sp1a_c;
+    annot_fmt = 3;
+
+    /* does not require aa0a/aa1a, only for variants */
+  }
+  else if (calc_func_mode == CALC_CODE) {
+    spa_p = &spa_c;
+    sp0_p = &sp0_c;
+    sp1_p = &sp1_c;
+
+    sp0a_p = &sp0a_c;
+    sp1a_p = &sp1a_c;
+
+    show_code = (display_code & (SHOW_CODE_MASK+SHOW_CODE_EXT));	/* see defs.h; SHOW_CODE_ALIGN=2,_CIGAR=3,_CIGAR_EXT=4 */
+    annot_fmt = 2;
+    if (display_code & SHOW_ANNOT_FULL) {
+      annot_fmt = 1;
+    }
+
+    /* have_ann encodes number of sequences annotated */
+    if ((annotp_p && annotp_p->n_annot > 0) || (ap1a != NULL)) { have_ann |= 1;}
+
+    update_data_p = init_update_data(show_code);
+  }
+  else {
+    fprintf(stderr,"*** error [%s:%d] --- cal_cons_u() invalid calc_func_mode: %d\n",
+	    __FILE__, __LINE__, calc_func_mode);
+    exit(1);
+  }
+#endif
+  if (cumm_seq_score) i_spa = cumm_seq_score;
+
+  rp = a_res->res;
+  rpmax = &a_res->res[a_res->nres];
+
+  lenc = not_c = aln->nident = aln->nmismatch = aln->nsim = aln->npos = ngap_p = ngap_d = nfs= 0;
+  i0 = a_res->min1;
+  i1 = a_res->min0;
+
+  v_delta = 0;
+  i1_annot = 0;
+  annot_stack = NULL;
+  s_annotp_arr_p = NULL;
+  have_push_features = prev_match = 0;
+
+  if (have_ann) {
+    have_push_features_p = &have_push_features;
+
+    if (annotp_p && annotp_p->n_annot > 0) {
+      annot_stack = init_stack(64,64);
+      left_domain_list1=init_domfeat_data(annotp_p);
+      s_annotp_arr_p = annotp_p->s_annot_arr_p;
+
+      while (i1_annot < annotp_p->n_annot) {
+	if (s_annotp_arr_p[i1_annot]->pos >= i1+i1_offset) {break;}
+	if (s_annotp_arr_p[i1_annot]->end <= i1+i1_offset) {i1_annot++; continue;}
+
+	if (s_annotp_arr_p[i1_annot]->label == '-') {
+	  process_annot_match(&itmp, NULL, 
+#ifndef TFAST
+			      i1_offset+seq_pos(i1,aln->llrev,0),
+			      i0_offset+seq_pos(i0,aln->qlrev,0),
+#else
+			      i1_offset+seq_pos(i1,aln->qlrev,0),
+			      i0_offset+seq_pos(i0,aln->llrev,0),
+#endif
+			      sp1_p, sp1a_p, sq, s_annotp_arr_p[i1_annot], annotp_p->n_annot,
+			      &ann_comment, annot_stack, have_push_features_p, &v_delta,
+			      &d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+			      &left_domain_head1, &left_domain_list1[i1_annot], &i1_left_end, 0);
+	}
+	i1_annot++;
+      }
+    }
+  }
+
+  while (rp < rpmax) {
+    /*    fprintf(stderr,"%d %d %d (%c) %d (%c)\n"
+	  ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);
+    */
+    switch (*rp++) {
+    case 0: 	/* aa insertion */
+      *sp0_p = '-';
+      *sp1_p = sq[ap1[i1]];
+      *spa_p = M_DEL;
+
+      if (calc_func_mode == CALC_CODE) {
+	*spa_p = 5; /* indel code */
+	update_code(align_code_dyn, update_data_p, 0, *spa_p,*sp0_p,*sp1_p);
+      }
+
+      if (calc_func_mode == CALC_CONS) {sp0_p++; sp1_p++; spa_p++;}
+
+      if (cumm_seq_score) {
+	if (prev_match) *i_spa = ppst->gdelval;
+	*i_spa++ += ppst->ggapval;
+      }
+
+      if (have_ann) {
+	if (calc_func_mode != CALC_ID && calc_func_mode != CALC_ID_DOM) {
+	  *sp0a_p = ' ';
+	  *sp1a_p = ann_arr[ap1a[i1]];
+	}
+	if (s_annotp_arr_p) {
+	  if (i1+i1_offset == s_annotp_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end) {
+
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], 
+#ifndef TFAST
+					i1_offset+seq_pos(i1,aln->llrev,0),	/* annotated target (prot) coordinate */
+					i0_offset+seq_pos(i0,aln->qlrev,0),
+#else
+					i1_offset+seq_pos(i1,aln->qlrev,0),
+					i0_offset+seq_pos(i0,aln->llrev,0),
+#endif
+					sp1_p, sp1a_p, sq, 
+					i1_annot, annotp_p->n_annot, s_annotp_arr_p,
+					&ann_comment, annot_stack, have_push_features_p, &v_delta,
+					&d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+					&left_domain_head1, left_domain_list1, &i1_left_end,
+					ppst->ggapval+ppst->gdelval);
+	  }
+
+	  if (prev_match) d1_score += ppst->gdelval;
+	  d1_score += ppst->ggapval;
+	  d1_alen++;
+	  d1_gaplen++;
+	  prev_match = 0;
+	}
+	if (calc_func_mode == CALC_CONS) {sp0a_p++; sp1a_p++;}
+      }
+
+      if (have_ann && have_push_features  &&  calc_func_mode != CALC_ID) {
+	display_push_features(annot_stack, annot_var_dyn,
+#ifndef TFAST
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			      i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			      i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			      sim_sym[*spa_p], 
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+			      n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      i1++;
+      lenc++;
+      ngap_d++;
+      break;
+    case 2:	/* -1 frameshift, which is treatead as an insertion/match for annotations */
+      nfs++;
+      /* frameshifts produce a two-character alignment string */
+      /* first annotate the frameshift  (first character) */
+      *sp0_p = '/';
+      i0 -= 1;
+      *sp1_p = '-';
+      *spa_p = M_DEL;
+
+      if (calc_func_mode == CALC_CODE) {
+	update_code(align_code_dyn, update_data_p, 2, *spa_p,*sp0_p,*sp1_p);
+      }
+
+      if (calc_func_mode == CALC_CONS) {
+	sp0_p++; sp1_p++; spa_p++;
+	if (have_ann) {*sp0a_p++ = *sp1a_p++ = ' ';}
+      }
+
+      not_c++;
+
+      /* then annotate the match after the frameshift */
+
+      itmp=ppst->pam2[0][ap0[i0]][ap1[i1]];
+      *sp0_p = sq[ap0[i0]];
+      *sp1_p = sq[ap1[i1]];
+
+      if (cumm_seq_score) *i_spa++ = ppst->gshift;
+
+      if (have_ann) {
+	have_push_features = 0;
+	/* this simple strategy works because the coordinate system
+	   for the alignment is reversed appropriately */
+	if (calc_func_mode != CALC_ID && calc_func_mode != CALC_ID_DOM) {
+	  *sp1a_p = ann_arr[ap1a[i1]];
+	  *sp0a_p = ' ';
+	}
+	if (s_annotp_arr_p) {
+	  /* coordiates are much more complex for next_annot_match,
+	     and comment_var, because they may need to be reversed */
+
+	  if (i1+i1_offset == s_annotp_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end) {
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]],
+#ifndef TFAST
+					i1_offset+seq_pos(i1,aln->llrev,0),
+					i0_offset+seq_pos(i0,aln->qlrev,0),
+#else
+					i1_offset+seq_pos(i1,aln->qlrev,0),
+					i0_offset+seq_pos(i0,aln->llrev,0),
+#endif
+					sp1_p, sp1a_p, sq, 
+					i1_annot, annotp_p->n_annot, s_annotp_arr_p,
+					&ann_comment, annot_stack, have_push_features_p, &v_delta,
+					&d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+					&left_domain_head1, left_domain_list1, &i1_left_end,0);
+
+	    if (sq[ap1[i1]] != *sp1_p) {
+	      t_spa = align_type(itmp, *sp0_p, *sp1_p, 0, NULL, ppst->pam_x_id_sim);
+
+	      if (calc_func_mode != CALC_ID && calc_func_mode != CALC_ID_DOM) {
+		comment_var(
+#ifndef TFAST
+			    i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			    i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			    i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			    i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			    sq[ap1[i1]], sim_sym[t_spa], ann_comment,
+			    annot_var_dyn,comment_target,annot_fmt);
+	      }
+	      else {
+		sprintf(tmp_str,"%c%d%c;",sq[ap1[i1]],i1+1,*sp1_p);
+		/*   SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+		dyn_strcat(annot_var_dyn, tmp_str);
+	      }
+	    }
+	  }
+	  d1_score += ppst->gshift;
+	  d1_score += itmp;
+	  prev_match = 1;
+	}
+	if (calc_func_mode == CALC_CONS) {sp0a_p++; sp1a_p++;}
+      }
+
+      *spa_p = align_type(itmp, *sp0_p, *sp1_p, 0, aln, ppst->pam_x_id_sim);
+
+      if (calc_func_mode == CALC_CODE) {
+	update_code(align_code_dyn, update_data_p, 3, *spa_p,*sp0_p,*sp1_p);
+      }
+
+      d1_alen++;
+      if (*spa_p == M_IDENT) {d1_ident++;}
+
+      if (have_ann && calc_func_mode == CALC_CODE) {
+    	  add_annot_code(have_ann, *sp0_p, *sp1_p, *sp1a_p,
+#ifndef TFAST
+			 i0_offset+seq_pos(i0,aln->qlrev,0),
+			 i1_offset+seq_pos(i1,aln->llrev,0),
+#else
+			 i1_offset+seq_pos(i1,aln->qlrev,0),
+			 i0_offset+seq_pos(i0,aln->llrev,0),
+#endif
+			 sim_sym[*spa_p], annot_var_dyn);
+	}
+
+      if (have_ann && have_push_features  &&  calc_func_mode != CALC_ID) {
+	display_push_features(annot_stack, annot_var_dyn,
+#ifndef TFAST
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			      i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			      i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			      sim_sym[*spa_p],
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+			      n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      if (cumm_seq_score) *i_spa++ = itmp;
+      i0 += 3;
+      i1++;
+
+      if (calc_func_mode == CALC_CONS) {sp0_p++; sp1_p++; spa_p++;}
+      lenc++;
+      break;
+    case 3:	/* codon/aa match */
+      itmp=ppst->pam2[0][ap0[i0]][ap1[i1]];
+      *sp0_p = sq[ap0[i0]];
+      *sp1_p = sq[ap1[i1]];
+
+      if (have_ann) {
+	if (calc_func_mode != CALC_ID &&  calc_func_mode != CALC_ID_DOM) {
+	  *sp1a_p = ann_arr[ap1a[i1]];
+	  *sp0a_p = ' ';
+	}
+	if (s_annotp_arr_p) {
+	  if (i1+i1_offset == s_annotp_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end) {
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]],
+#ifndef TFAST
+					i1_offset+seq_pos(i1,aln->llrev,0),
+					i0_offset+seq_pos(i0,aln->qlrev,0),
+#else
+					i1_offset+seq_pos(i1,aln->qlrev,0),
+					i0_offset+seq_pos(i0,aln->llrev,0),
+#endif
+					sp1_p, sp1a_p, sq, 
+					i1_annot, annotp_p->n_annot, s_annotp_arr_p,
+					&ann_comment, annot_stack, have_push_features_p, &v_delta,
+					&d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+					&left_domain_head1, left_domain_list1, &i1_left_end,0);
+
+	    if (sq[ap1[i1]] != *sp1_p) {
+	      t_spa = align_type(itmp, *sp0_p, *sp1_p, 0, NULL, ppst->pam_x_id_sim);
+
+	      if (calc_func_mode != CALC_ID && calc_func_mode != CALC_ID_DOM) {
+
+	      comment_var(
+#ifndef TFAST
+			    i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			    i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			    i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			    i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			  sq[ap1[i1]], sim_sym[t_spa], ann_comment,
+			  annot_var_dyn, comment_target, annot_fmt);
+	      }
+	      else {
+		sprintf(tmp_str,"%c%d%c;",sq[ap1[i1]],i1+1,*sp1_p);
+		/*  SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+		dyn_strcat(annot_var_dyn, tmp_str);
+	      }
+	    }
+	  }
+	  prev_match = 1;
+	  d1_score += itmp;
+	}
+	if (calc_func_mode == CALC_CONS) {sp0a_p++; sp1a_p++;}
+      }
+
+      *spa_p = align_type(itmp, *sp0_p, *sp1_p, 0, aln, ppst->pam_x_id_sim);
+      d1_alen++;
+      if (*spa_p == M_IDENT) {d1_ident++;}
+  
+      if (cumm_seq_score) *i_spa++ = itmp;
+
+      if (calc_func_mode == CALC_CODE) {
+	update_code(align_code_dyn, update_data_p, 3, *spa_p, *sp0_p, *sp1_p);
+      
+	if (have_push_features) {
+	  add_annot_code(have_ann, *sp0_p, *sp1_p, *sp1a_p,
+#ifndef TFAST
+			 i0_offset+seq_pos(i0,aln->qlrev,0),
+			 i1_offset+seq_pos(i1,aln->llrev,0),
+#else
+			 i1_offset+seq_pos(i1,aln->qlrev,0),
+			 i0_offset+seq_pos(i0,aln->llrev,0),
+#endif
+			 sim_sym[*spa_p], annot_var_dyn);
+	}
+      }
+
+      if (have_ann && have_push_features  &&  calc_func_mode != CALC_ID) {
+	display_push_features(annot_stack, annot_var_dyn,
+#ifndef TFAST
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			      i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			      i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			      sim_sym[*spa_p],
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+			      n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      i0 += 3;
+      i1++;
+
+      if (calc_func_mode == CALC_CONS) {sp0_p++; sp1_p++; spa_p++;}
+      lenc++;
+      break;
+    case 4:	/* +1 frameshift */
+      nfs++;
+      /* frameshift produces two alignment characters */
+      /* first frameshift */
+      *sp0_p = '\\';
+      i0 += 1;
+      *sp1_p = '-';
+      *spa_p = M_DEL;
+
+      if (calc_func_mode == CALC_CODE) {
+        update_code(align_code_dyn, update_data_p, 4, *spa_p, *sp0_p, *sp1_p);
+      }
+
+      if (calc_func_mode == CALC_CONS) {sp0_p++; sp1_p++; spa_p++;}
+
+      if (cumm_seq_score) *i_spa++ = ppst->gshift;
+
+      if (have_ann && calc_func_mode == CALC_CONS) {*sp1a_p++ = *sp0a_p++ = ' ';}
+      not_c++;
+
+      /* then alignment */
+      itmp=ppst->pam2[0][ap0[i0]][ap1[i1]];
+      *sp0_p = sq[ap0[i0]];
+      *sp1_p = sq[ap1[i1]];
+
+      if (have_ann) {
+	if (calc_func_mode != CALC_ID  && calc_func_mode != CALC_ID_DOM) {
+	  *sp1a_p = ann_arr[ap1a[i1]];
+	  *sp0a_p = ' ';
+	}
+	if (s_annotp_arr_p) {
+	  if (i1+i1_offset == s_annotp_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end) {
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], 
+#ifndef TFAST
+					i1_offset+seq_pos(i1,aln->llrev,0),
+					i0_offset+seq_pos(i0,aln->qlrev,0),
+#else
+					i1_offset+seq_pos(i1,aln->qlrev,0),
+					i0_offset+seq_pos(i0,aln->llrev,0),
+#endif
+					sp1_p, sp1a_p, sq, 
+					i1_annot, annotp_p->n_annot, s_annotp_arr_p,
+					&ann_comment, annot_stack, have_push_features_p, &v_delta,
+					&d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+					&left_domain_head1, left_domain_list1, &i1_left_end,0);
+
+	    if (sq[ap1[i1]] != *sp1_p) {
+	      t_spa = align_type(itmp, *sp0_p, *sp1_p, 0, NULL, ppst->pam_x_id_sim);
+	      if (calc_func_mode != CALC_ID  && calc_func_mode != CALC_ID_DOM) {
+		comment_var(
+#ifndef TFAST
+			    i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			    i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			    i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			    i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			    sq[ap1[i1]], sim_sym[t_spa], ann_comment,
+			    annot_var_dyn, comment_target, annot_fmt);
+	      }
+	      else {
+	      	sprintf(tmp_str,"%c%d%c;",sq[ap1[i1]],i1+1,*sp1_p);
+		/*   SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+		dyn_strcat(annot_var_dyn, tmp_str);
+	      }
+	    }
+	  }
+	  d1_score += ppst->gshift;
+	  d1_score += itmp;
+	  prev_match = 1;
+	}
+	if (calc_func_mode == CALC_CONS) {sp0a_p++; sp1a_p++;}
+      }
+
+      *spa_p = align_type(itmp, *sp0_p, *sp1_p, 0, aln, ppst->pam_x_id_sim);
+      d1_alen++;
+      if (*spa_p == M_IDENT) {d1_ident++;}
+
+      if (calc_func_mode == CALC_CODE) {
+	update_code(align_code_dyn, update_data_p, 3, *spa_p,*sp0_p,*sp1_p);
+      }
+
+      if (cumm_seq_score) *i_spa++ = itmp;
+
+      /* now we have done all the ?modified identity checks, display
+	 potential site annotations */
+      if (have_ann && have_push_features  &&  calc_func_mode != CALC_ID) {
+	display_push_features(annot_stack, annot_var_dyn,
+#ifndef TFAST
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			      i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			      i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			      sim_sym[*spa_p], 
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+			      n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      i0 += 3;
+      i1++;
+
+      if (calc_func_mode == CALC_CONS) {sp0_p++; sp1_p++; spa_p++;}
+      lenc++;
+      break;
+    case 5:	/* codon insertion */
+      if (have_ann && calc_func_mode == CALC_CONS) {
+	*sp1a_p++ = *sp0a_p++ = ' ';
+      }
+
+      if (cumm_seq_score) {
+	if (prev_match) *i_spa = ppst->gdelval;
+	*i_spa++ = ppst->ggapval;
+      }
+
+      if (prev_match) d1_score += ppst->gdelval;
+      d1_score += ppst->ggapval;
+
+      prev_match = 0;
+
+      *sp0_p = sq[ap0[i0]];
+      *sp1_p = '-';
+      *spa_p = M_DEL;
+
+      if (calc_func_mode == CALC_CODE) {
+	*spa_p = 5;
+	update_code(align_code_dyn, update_data_p, 5, *spa_p,*sp0_p,*sp1_p);
+      }
+
+      if (calc_func_mode == CALC_CONS) {sp0_p++; sp1_p++; spa_p++;}
+      i0 += 3;
+
+      lenc++;
+      ngap_p++;
+      break;
+    }
+  }
+
+  /* done with alignment loop */
+
+  if (calc_func_mode == CALC_CODE) {
+    close_update_data(align_code_dyn, update_data_p);
+  }
+
+  if (have_ann) {
+    if (calc_func_mode != CALC_ID  && calc_func_mode != CALC_ID_DOM) {*sp0a_p = *sp1a_p = '\0';}
+    if (s_annotp_arr_p) {
+      have_push_features = 0;
+
+      if (s_annotp_arr_p && i1_left_end > 0) {
+	close_annot_match(-1, annot_stack, have_push_features_p,
+			  &d1_score, &d1_ident, &d1_alen, &d1_gaplen, 
+			  &left_domain_head1, &i1_left_end,
+			  0);
+      }
+
+      if (have_push_features &&  calc_func_mode != CALC_ID) {
+	display_push_features(annot_stack, annot_var_dyn,
+#ifndef TFAST
+			      i0_offset+seq_pos(i0-1,aln->qlrev,0), *sp0_p,
+			      i1_offset+seq_pos(i1-1,aln->llrev,0), *sp1_p,
+#else
+			      i1_offset+seq_pos(i1-1,aln->qlrev,0), *sp0_p,
+			      i0_offset+seq_pos(i0-1,aln->llrev,0), *sp1_p,
+#endif
+			      sim_sym[*spa_p], 
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+			      n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+    }
+    if (left_domain_list1) free(left_domain_list1);
+    free_stack(annot_stack);
+  }
+  *spa_p = '\0';
+
+#ifndef TFAST
+  aln->amax0 = i0;
+  aln->amax1 = i1;
+  aln->ngap_q = ngap_d;
+  aln->ngap_l = ngap_p;
+#else
+  aln->amax1 = i0;
+  aln->amax0 = i1;
+  aln->ngap_q = ngap_p;
+  aln->ngap_l = ngap_d;
+#endif
+  aln->calc_last_set = 1;
+
+  aln->nfs = nfs;
+
+  *score_delta = v_delta;
+
+  if (lenc < 0) lenc = 1;
+  *nc = lenc;
+/*	now we have the middle, get the right end */
+  return lenc+not_c;
+}
+
+int
+calc_cons_a(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    int *nc,
+	    struct a_struct *aln,
+	    struct a_res_str *a_res, 
+	    struct pstruct *ppst,
+	    char *seqc0, char *seqc1, char *seqca, int *cumm_seq_score,
+	    const unsigned char *ann_arr,
+	    const unsigned char *aa0a, const struct annot_str *annot0_p, char *seqc0a,
+	    const unsigned char *aa1a, const struct annot_str *annot1_p, char *seqc1a,
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+	    struct f_struct *f_str,
+	    void *pstat_void)
+{
+  return calc_cons_u(
+		     aa0, n0, aa1, n1,
+		     a_res, ppst, f_str, pstat_void,
+		     ann_arr, aa0a, annot0_p, aa1a, annot1_p, CALC_CONS, 0,
+		     nc, seqc0, seqc1, seqca, cumm_seq_score,
+		     seqc0a, seqc1a, aln, score_delta, annot_var_dyn, NULL
+		     );
+}
+
+void
+calc_astruct(struct a_struct *aln_p, struct a_res_str *a_res_p, struct f_struct *f_str) {
+
+  aln_p->calc_last_set = 0;
+
+#ifndef TFAST	/* FASTX */
+  aln_p->amin1 = a_res_p->min0;	/* prot */
+  aln_p->amin0 = a_res_p->min1;	/* DNA */
+  aln_p->amax1 = a_res_p->max0;	/* prot */
+  aln_p->amax0 = a_res_p->max1;	/* DNA */
+#else		/* TFASTX */
+  aln_p->amin0 = a_res_p->min0;	/* DNA */
+  aln_p->amin1 = a_res_p->min1;	/* prot */
+  aln_p->amax0 = a_res_p->max0;	/* DNA */
+  aln_p->amax1 = a_res_p->max1;	/* prot */
+#endif
+}
+
+/* build an array of match/ins/del - length strings */
+
+/* modified 10-June-2014 to distinguish matches from mismatches, op=1
+   (previously unused) indicates an aligned non-identity */
+
+/* op_codes are:  0 - aa insertion
+   		  1 - (now) aligned non-identity
+   		  2 - -1 frameshift
+   		  3 - aligned identity
+   		  4 - +1 frameshift
+   		  5 - codon insertion
+*/
+
+static struct update_code_str *
+init_update_data(show_code) {
+
+  struct update_code_str *update_data_p;
+
+  if ((update_data_p = (struct update_code_str *)calloc(1,sizeof(struct update_code_str)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - init_update_data(): cannot allocate update_code_str\n",
+	      __FILE__, __LINE__);
+    return NULL;
+  }
+
+  update_data_p->p_op_idx = -1;
+  update_data_p->p_op_cnt = 0;
+  update_data_p->show_code = show_code;
+  update_data_p->btop_enc = 0;
+
+  if ((show_code & SHOW_CODE_CIGAR) == SHOW_CODE_CIGAR) {
+    update_data_p->op_map = cigar_code;
+    update_data_p->cigar_order = 1;
+  }
+  else if ((show_code & SHOW_CODE_BTOP) == SHOW_CODE_BTOP) {
+    update_data_p->op_map = ori_code;
+    update_data_p->cigar_order = 0;
+    update_data_p->btop_enc = 1;
+  }    
+  else {
+    update_data_p->op_map = ori_code;
+    update_data_p->cigar_order = 0;
+  }
+
+  if ((show_code & SHOW_CODE_EXT) == SHOW_CODE_EXT) {
+    update_data_p->show_ext = 1;
+  }
+  else {
+    update_data_p->show_ext = 0;
+  }
+
+  return update_data_p;
+}
+
+static void
+close_update_data(struct dyn_string_str *align_code_dyn,
+		  struct update_code_str *up_dp) {
+  char tmp_cnt[MAX_SSTR];
+
+  if (!up_dp) return;
+
+  if (up_dp->p_op_cnt) {
+    if (up_dp->btop_enc) {
+      sprintf(tmp_cnt,"%d",up_dp->p_op_cnt);
+      up_dp->p_op_cnt = 0;
+    }
+    else {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx, up_dp->p_op_cnt);
+    }
+    dyn_strcat(align_code_dyn,tmp_cnt);
+  }
+
+  free(up_dp);
+}
+
+/* sprintf_code() generates the short alignment code string (max
+   length MAX_SSTR=32) which is later added on to the dynamic
+   alignment code string
+
+   tmp_str[MAX_STR=32] -- alignment encoding output
+   up_dp -- used to determine cigar_order and mapping
+   op_idx -- code type 
+   
+
+*/
+
+static void
+sprintf_code(char *tmp_str, struct update_code_str *up_dp, int op_idx, int op_cnt) {
+
+  if (op_cnt == 0) return;
+
+  if (up_dp->cigar_order) {
+    sprintf(tmp_str,"%d%c",op_cnt,up_dp->op_map[op_idx]);
+  }
+  else {
+    sprintf(tmp_str,"%c%d",up_dp->op_map[op_idx],op_cnt);
+  }
+}
+
+/* only called for btop alignment encoding, for identity, update
+   count, otherwise, print previous count and current difference.
+   assumes that up_dp->p_op_cnt only tracks identity
+
+   for fx/fz, op=0, 
+*/
+
+static void
+sprintf_btop(char *tmp_str, 
+	     struct update_code_str *up_dp, 
+	     int op, int sim_code,
+	     unsigned char sp0, unsigned char sp1)
+{
+  char local_str[MAX_SSTR];
+  local_str[0]='\0';
+
+  /* only aligned identities update counts */
+  if (op==3 && sim_code == M_IDENT) {
+    up_dp->p_op_cnt++;
+    return;
+  }
+  else {
+    if (up_dp->p_op_cnt > 0) {
+      sprintf(local_str,"%d",up_dp->p_op_cnt);
+    }
+    up_dp->p_op_cnt = 0;
+    sprintf(tmp_str,"%s%c%c",local_str,sp0,sp1);
+  }
+}
+
+/* update_code() has been modified to work more correctly with
+   ggsearch/glsearch, which, because alignments can start with either
+   insertions or deletions, can produce an initial code of "0=".  When
+   that happens, it is ignored and no code is added.
+
+   *align_code_dyn - alignment string (dynamic)
+   op -- encoded operation, currently 0=match, 1-delete, 2-insert, 3-term-match, 4-mismatch
+   op_cnt -- length of run
+   show_code -- SHOW_CODE_CIGAR uses cigar_code, SHOW_CODE_ALIGN: legacy; SHOW_CODE_BTOP: btop
+*/
+
+static void
+update_code(struct dyn_string_str *align_code_dyn,
+	    struct update_code_str *up_dp, int op, 
+	    int sim_code,  unsigned char sp0, unsigned char sp1)
+{
+  char tmp_cnt[MAX_SSTR];
+  tmp_cnt[0]='\0';
+
+  if (up_dp->btop_enc) {
+    sprintf_btop(tmp_cnt, up_dp, op, sim_code, sp0, sp1);
+    dyn_strcat(align_code_dyn,tmp_cnt);
+    return;
+  }
+
+  /* there are two kinds of "op's", one time and accumulating */
+  /* op == 2, 4 -- frameshifts -- are one-time: */
+
+  switch (op) {
+  case 2:	/* frameshifts */
+  case 4:
+    sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+    dyn_strcat(align_code_dyn,tmp_cnt);
+
+    up_dp->p_op_idx = op;
+    up_dp->p_op_cnt = 1;
+    break;
+  case 0:	/* aa insertion */
+  case 5:	/* codon insertion (aa deletion) */
+    if (op == up_dp->p_op_idx) {
+      up_dp->p_op_cnt++;
+    }
+    else {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+      dyn_strcat(align_code_dyn,tmp_cnt);
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    break;
+  case 1:	/* mismatch (non-id match) */
+  case 3:	/* identical match */
+    if (sp0 != '*' && sp1 != '*') {	/* default case, not termination */
+      if (up_dp->show_ext) {
+	if (sim_code != M_IDENT) { op = 1;}
+      }
+    }
+    else {	/* have a termination codon, output for !SHOW_CODE_CIGAR */
+      if (!up_dp->cigar_order) {
+	if (sp0 == '*' || sp1 == '*') { op = 6;}
+      }
+      else if (up_dp->show_ext && (sp0 != sp1)) { op = 1;}
+    }
+
+    if (up_dp->p_op_cnt == 0) {
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    else if (op != up_dp->p_op_idx) {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+      dyn_strcat(align_code_dyn,tmp_cnt);
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    else {
+      up_dp->p_op_cnt++;
+    }
+    break;
+  }
+  return;
+}
+
+int calc_code(const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      struct a_struct *aln,
+	      struct a_res_str *a_res,
+	      struct pstruct *ppst,
+	      struct dyn_string_str *align_code_dyn,
+	      const unsigned char *ann_arr, 
+	      const unsigned char *aa0a,
+	      const struct annot_str *annot0_p,
+	      const unsigned char *aa1a,
+	      const struct annot_str *annot1_p,
+	      struct dyn_string_str *annot_code_dyn,
+	      int *score_delta,
+	      struct f_struct *f_str,
+	      void *pstat_void,
+	      int display_code)
+{
+  int nc;
+
+  return calc_cons_u(
+		     aa0, n0, aa1, n1,
+		     a_res, ppst, f_str, pstat_void,
+		     ann_arr, aa0a, annot0_p, aa1a, annot1_p, CALC_CODE, 
+		     display_code,
+		     &nc, NULL, NULL, NULL, NULL,
+		     NULL, NULL, aln, score_delta, annot_code_dyn,
+		     align_code_dyn
+		     );
+}
+
+/* calc_id never looks at domains or features, only variation */
+
+int calc_id(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    struct a_struct *aln, 
+	    struct a_res_str *a_res,
+	    struct pstruct *ppst,
+	    const struct annot_str *annot0_p,
+	    const struct annot_str *annot1_p,
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+	    struct f_struct *f_str)
+{
+  int nc;
+
+  return calc_cons_u(
+		     aa0, n0, aa1, n1,
+		     a_res, ppst, f_str, NULL,
+		     NULL, NULL, annot0_p, NULL, annot1_p, CALC_ID, 0,
+		     &nc, NULL, NULL, NULL, NULL,
+		     NULL, NULL, aln, score_delta, annot_var_dyn,
+		     NULL
+		     );
+}
+
+/* calc_idd also looks at domains */
+
+int calc_idd(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    struct a_struct *aln, 
+	    struct a_res_str *a_res,
+	    struct pstruct *ppst,
+	    const struct annot_str *annot0_p,
+	    const struct annot_str *annot1_p,
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+	    struct f_struct *f_str)
+{
+  int nc;
+
+  return calc_cons_u(
+		     aa0, n0, aa1, n1,
+		     a_res, ppst, f_str, NULL,
+		     NULL, NULL, annot0_p, NULL, annot1_p, CALC_ID_DOM, 0,
+		     &nc, NULL, NULL, NULL, NULL,
+		     NULL, NULL, aln, score_delta, annot_var_dyn,
+		     NULL
+		     );
+}
+
diff --git a/src/dropfz2.c b/src/dropfz2.c
new file mode 100644
index 0000000..0838ae2
--- /dev/null
+++ b/src/dropfz2.c
@@ -0,0 +1,3969 @@
+/* $Id: dropfz2.c 1280 2014-08-21 00:47:55Z wrp $ */
+/* $Revision: 1280 $  */
+
+/* copyright (c) 1998, 1999, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* 18-Sept-2006 - removed static global variables for alignment */
+
+/* 2002/06/23 finally correctly implement fix to translate 'N' to 'X' */
+
+/* 1999/11/29 modification by Z. Zhang to translate DNA 'N' as 'X' */
+
+/* implements an improved version of the fasty algorithm, see:
+
+   W. R. Pearson, T. Wood, Z. Zhang, A W. Miller (1997) "Comparison of
+   DNA sequences with protein sequences" Genomics 46:24-36
+
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <ctype.h>
+
+#include "defs.h"
+#include "param.h"
+#define XTERNAL
+#include "upam.h"
+#include "uascii.h"
+
+#define NT_N 16
+
+/* globals for fasta */
+#define MAXWINDOW 64
+
+#ifndef MAXSAV
+#define MAXSAV 10
+#endif
+
+#ifndef ALLOCN0
+static char *verstr="3.8 June 2014";
+#else
+static char *verstr="3.8an0 Jul 2014";
+#endif
+
+struct dstruct		/* diagonal structure for saving current run */
+{			
+   int     score;	/* hash score of current match */
+   int     start;	/* start of current match */
+   int     stop;	/* end of current match */
+   struct savestr *dmax;   /* location in vmax[] where best score data saved */
+};
+
+struct savestr
+{
+   int     score;		/* pam score with segment optimization */
+   int     score0;		/* pam score of best single segment */
+   int     gscore;		/* score from global match */
+   int     dp;			/* diagonal of match */
+   int     start;		/* start of match in lib seq */
+   int     stop;		/* end of match in lib seq */
+};
+
+struct update_code_str {
+  int p_op_idx;
+  int p_op_cnt;
+  int btop_enc;
+  int show_code;
+  int cigar_order;
+  int show_ext;
+  char *op_map;
+};
+
+#ifndef TFAST
+static char *ori_code = "-x/=\\+*";	/* FASTX */
+static char *cigar_code = "DXFMRI*";
+#else
+static char *ori_code = "+x/=\\-*";	/* TFASTX */
+static char *cigar_code = "IXFMRD*";
+#endif
+
+static struct update_code_str *
+init_update_data(int show_code);
+
+static void
+sprintf_code(char *tmp_str, struct update_code_str *, int op_idx, int op_cnt);
+
+static void 
+update_code(char *al_str, int al_str_max, 
+	    struct update_code_str *update_data, int op, 
+	    int sim_code, unsigned char sp0, unsigned char sp1);
+
+static void
+close_update_data(char *al_str, int al_str_max,
+		  struct update_code_str *update_data);
+
+void kpsort (struct savestr **v, int n);
+extern void *init_stack(int, int);
+extern void push_stack(void *, void *);
+extern void *pop_stack(void *);
+extern void *free_stack(void *);
+
+#define SGW1 100
+#define SGW2 300
+struct smgl_str {
+  int C[SGW1+1][SGW2+1];
+  int st[SGW1+1][SGW2+1];
+  int D[SGW2+7], I[SGW2+1];
+};
+
+struct sx_s {int C1, C2, C3, I1, I2, I3, flag; };
+
+struct wgt { int  iii, ii, iv;};
+struct wgtc {char c2, c3, c4, c5;};
+
+typedef struct st_s { int C, I, D;} *st_ptr;
+
+struct f_struct {
+  struct dstruct *diag;
+  int frame;
+  int ndo;
+  int noff;
+  int hmask;			/* hash constants */
+  int *pamh1;			/* pam based array */
+  int *pamh2;			/* pam based kfact array */
+  int *link, *harr;		/* hash arrays */
+  int kshft;			/* shift width */
+  int nsav, lowscor;		/* number of saved runs, worst saved run */
+#ifndef TFAST
+  unsigned char *aa0x, *aa0v;	/* aa0x - 111122223333 */
+#else
+  unsigned char *aa1x, *aa1v;	/* aa1x - 111122223333 */
+#endif				/* aa1v - computed codons */
+  struct sx_s *cur;
+  int cur_sp_size;
+  struct wgt **weight0;
+  struct wgt **weight1;
+  struct wgtc **weight_c;
+  int *waa;
+  int *res;
+  int max_res;
+  st_ptr up, down, tp;
+  struct smgl_str smgl_s;
+};
+
+#define DROP_INTERN
+#include "drop_func.h"
+
+static int dmatchz(const unsigned char *aa0, int n0,
+		   const unsigned char *aa1, int n1,
+		   const unsigned char *aa1v,
+		   int hoff, int window, 
+		   int **pam2, int gdelval, int ggapval, int gshift,
+		   struct f_struct *f_str);
+
+int shscore(unsigned char *aa0, int n0, int **pam2);
+int saatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
+extern int ELK_to_s(double E_join, int n0, int n1, double Lambda, double K, double H);
+
+int savemax (struct dstruct *, int, 
+	     struct savestr *vmax, struct savestr **lowmax);
+
+int spam (const unsigned char *aa0, const unsigned char *aa1, 
+	  struct savestr *dmax, int **pam2,
+	  struct f_struct *f_str);
+int sconn (struct savestr **v, int n,int cgap, int pgap, struct f_struct *f_str);
+int lx_band(const unsigned char *prot_seq, int len_prot,
+	    const unsigned char *dna_prot_seq, int len_dna_prot,
+	    int **pam_matrix, int gopen, int gext,
+	    int gshift, int start_diag, int width, struct f_struct *f_str);
+
+struct a_res_str *
+merge_ares_chains(struct a_res_str *cur_ares, 
+		  struct a_res_str *tmpl_ares,
+		  int score_ix, const char *msg);
+
+extern void w_abort (char *p, char *p1);
+extern void aagetmap(char *to, int n);
+
+/* initialize for fasta */
+/* modified 30-August-1999 by Zheng Zhang to work with an extended alphabet */
+/* Assume naa=47, and wgts[47][23] matches both upper and lower case
+amoino acids with another amino acid.  And also assume the DNA letter
+does not have upper/lower case difference.  If you also allow DNA
+sequence to be upper/lower case letters, more needs be changed. Not
+only here, but also in the alignment code, the way that pack a codon
+into a number between 0-63 need be changed. */
+
+/* modified so that if **weightci==NULL, do not fiddle with characters */
+
+/* modified 3-Aug-2010 for NCBIstdaa alphabet, which requires MAXUC
+   28, MAXLC 56, so we must have 58, not 47, entries */
+
+void
+init_weights(struct wgt ***weighti, struct wgtc ***weightci,
+	     int **wgts, int gshift, int gsubs, int naa)
+{
+  int i, j, do_wgtc=0;
+  int aa, b, a, x, y, z;
+  int *wwt, e;
+  struct wgt **weight;
+  struct wgtc **weightc;
+  char aacmap[64];
+  int temp[MAXLC+1][64]; /*change*/
+  char le[MAXLC+1][64];
+
+  if (naa > MAXLC) {
+    fprintf(stderr,"*** dropfz2.c compilation problem naa(%d) > MAXLX(%d) ***\n",
+	   naa, MAXLC);
+  }
+
+  if ((*weighti=(struct wgt **)calloc((size_t)(naa+1),sizeof(struct wgt *)))
+      ==NULL) {
+    fprintf(stderr," cannot allocate weights array: %d\n",naa);
+    exit(1);
+  }
+
+  weight = *weighti;
+  /* allocate weight[aa 0..MAXLC] */
+  for (aa=0; aa <= naa; aa++) {
+    if ((weight[aa]=(struct wgt *)calloc((size_t)256,sizeof(struct wgt)))
+	==NULL) {
+      fprintf(stderr," cannot allocate weight[]: %d/%d\n",aa,naa);
+      exit(1);
+    }
+  }
+
+  /* allocate weightci[aa 0..MAXLC] */
+  if (weightci !=NULL) {
+    if ((*weightci=(struct wgtc **)calloc((size_t)(naa+1),
+					  sizeof(struct wgtc *)))==NULL) {
+      fprintf(stderr," cannot allocate weight_c array: %d\n",naa);
+      exit(1);
+    }
+    weightc = *weightci;
+
+    for (aa=0; aa <= naa; aa++) {
+      if ((weightc[aa]=(struct wgtc *)calloc((size_t)256,sizeof(struct wgtc)))
+	  ==NULL) {
+	fprintf(stderr," cannot allocate weightc[]: %d/%d\n",aa,naa);
+	exit(1);
+      }
+    }
+    do_wgtc = 1;
+  }
+  else do_wgtc = 0;
+
+  aagetmap(aacmap,64);
+
+  for (aa = 0; aa < naa; aa++) {	/* change*/
+    wwt = wgts[aa];			/* pam matrix */
+    for (i = 0; i < 64; i++) {	/* i iterates through the codons */
+      x = -10000;		/* large negative */
+      y = i;
+      for (j = 0; j < 64; j++) {	/* j iterates through the codons */
+	z = ((~i & j) | (i & ~j));
+	b = 0;		/* score = 0 */
+	if (z % 4) b-= gsubs;
+	if (z /16) b-= gsubs;
+	if ((z /4) % 4) b -= gsubs;   
+	b += wwt[aascii[aacmap[j]]];  /* add the match score for char j*/
+	if (b > x) {
+	  x = b;		/* x has the score */
+	  y = j;		/* y has the character  (codon index)*/
+	}
+      }
+#ifdef DEBUG
+      if (y < 0 || y > 63) printf("%d %d %d %d ",aa, i, x, y);
+#endif
+      temp[aa][i] = x;
+      le[aa][i] = y;
+    }
+    /*            printf("\n"); */
+  }
+
+  for (aa= 0; aa < naa; aa++) { 
+      wwt = temp[aa];
+      for (i = 0; i < 256; i++) {
+          for (x=-100,b = 0; b < 4; b++) {
+              z = (i/ (1 << ((b+1)*2)))*(1<<(b*2))+(i%(1<<(b*2)));
+	      if (x < (e=wwt[z])) {
+		  x = e;
+		  if (do_wgtc) weightc[aa][i].c4 = aacmap[le[aa][z]];
+	      }
+          }
+          weight[aa][i].iv=x-gshift;
+          weight[aa][i].iii = wwt[i%64];
+
+	  if (do_wgtc) {
+	    weightc[aa][i].c5 = aacmap[le[aa][i%64]];
+	    weightc[aa][i].c3 = aacmap[i%64];
+	  }
+          x = i %16;
+          for (y = -100, b = 0; b < 3; b++) {
+              z = ((x >> (b*2)) << (b*2+2)) + (x % (1 << (b*2))); 
+              for (a = 0; a < 4; a++) {
+		  if ((e =wwt[z+(a<<(b*2))]) > y) {
+		      y = e;
+		      if (do_wgtc) 
+			weightc[aa][i].c2 = aacmap[le[aa][z+(a<<(b*2))]];
+		  }
+              }
+          }
+          weight[aa][i].ii = y-gshift;
+      }
+  }
+  /*106=CGGG*/
+  for (aa = 0; aa < naa; aa++) {
+    weight[aa][106].iii = wgts[aa][23]; /* is 23 the code for 'X'?*/
+    weight[aa][106].iv = weight[aa][106].ii = weight[aa][106].iii-gshift;
+    if (do_wgtc) {
+      weightc[aa][106].c5 = weightc[aa][106].c4 = weightc[aa][106].c3
+	= weightc[aa][106].c2 = 'X';
+    }
+  }
+}
+
+void
+free_weights(struct wgt ***weighti0, struct wgt ***weighti1, 
+	     struct wgtc ***weightci, int naa)
+{
+  int aa;
+  struct wgt **weight0;
+  struct wgt **weight1;
+  struct wgtc **weightc;
+
+  weight0 = *weighti0;
+  weight1 = *weighti1;
+  weightc = *weightci;
+
+  for (aa=0; aa < naa; aa++) {free(weight0[aa]);}
+  for (aa=0; aa < naa; aa++) {free(weight1[aa]);}
+  for (aa=0; aa < naa; aa++) {free(weightc[aa]);}
+
+  free(weight0);
+  free(weight1);
+  free(weightc);
+}
+
+static void
+pre_com(const unsigned char *aa0, int n0, unsigned char *aa0v) {
+  int dnav, i;
+  dnav = (hnt[aa0[0]]<<2) + hnt[aa0[1]];
+  for (i=2; i<n0; i++) {
+    dnav = ((dnav<<2)+hnt[aa0[i]])&255;
+    if (aa0[i] == NT_N  || aa0[i-1]==NT_N || aa0[i-2] == NT_N)  {
+      aa0v[i-2] = 106;
+    }
+    else {
+      if (dnav == 106/*CGGG*/) {dnav = 42/*AGGG*/;}
+      aa0v[i-2]=dnav;
+    }
+  }
+}
+
+static void
+pre_com_r(const unsigned char *aa0, int n0, unsigned char *aa0v) {
+  int dnav, i, ir;
+  dnav = ((3-hnt[aa0[n0-1]])<<2) + 3-hnt[aa0[n0-2]];
+  for (i=2, ir=n0-3; i<n0; i++,ir--) {
+    dnav = ((dnav<<2)+3-hnt[aa0[ir]])&255; 
+    if (aa0[ir] == NT_N || aa0[ir+1]==NT_N || aa0[ir+2] == NT_N)  {
+      aa0v[i-2] = 106;
+    }
+    else {
+      if (dnav == 106) dnav = 42;
+      aa0v[i-2]=dnav;
+    }
+  }
+}
+
+void
+init_work (unsigned char *aa0, int n0, 
+	   struct pstruct *ppst,
+	   struct f_struct **f_arg)
+{
+   int mhv, phv;
+   int hmax;
+   int i0, hv;
+   int pamfact;
+   int btemp;
+   struct f_struct *f_str;
+   struct bdstr *bss;
+   /* these used to be globals, but do not need to be */
+   int ktup, fact, kt1, lkt;
+
+   int maxn0;
+   int *pwaa;
+   int i, j, q;
+   struct swstr *ss, *r_ss;
+   int *waa;
+   int *res;
+   int nsq, ip, *hsq, naat;
+#ifndef TFAST
+   int last_n0, itemp, dnav;
+   unsigned char *fd, *fs, *aa0x, *aa0v;
+   int n0x, n0x3;
+#endif
+
+   if (nt[NT_N] != 'N') {
+     fprintf(stderr," nt[NT_N] (%d) != 'X' (%c) - recompile\n",NT_N,nt[NT_N]);
+     exit(1);
+   }     
+
+   if (ppst->ext_sq_set) {
+     nsq = ppst->nsqx; ip = 1;
+     hsq = ppst->hsqx;
+   }
+   else {
+     nsq = ppst->nsqx; ip = 0;
+     hsq = ppst->hsq;
+   }
+
+   f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+
+   if (!ppst->param_u.fa.use_E_thresholds) {
+     btemp = 2 * ppst->param_u.fa.bestoff / 3 +
+       n0 / ppst->param_u.fa.bestscale +
+       ppst->param_u.fa.bkfact *
+       (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
+     btemp = min (btemp, ppst->param_u.fa.bestmax);
+     if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
+
+     ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
+     if (ppst->param_u.fa.optcut_set != 1) {
+#ifndef TFAST
+       ppst->param_u.fa.optcut = (btemp*5)/4;
+#else
+       ppst->param_u.fa.optcut = (btemp*4)/3;
+#endif
+     }
+   }
+
+#ifdef OLD_FASTA_GAP
+   ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
+#else
+   ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
+#endif
+   pamfact = ppst->param_u.fa.pamfact;
+   ktup = ppst->param_u.fa.ktup;
+   fact = ppst->param_u.fa.scfact * ktup;
+
+#ifndef TFAST
+   /* before hashing, we must set up some space and translate the sequence */
+
+   maxn0 = n0 + 2;
+   if ((aa0x =(unsigned char *)calloc((size_t)maxn0,
+					     sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa0x array %d\n", maxn0);
+     exit (1);
+   }
+   aa0x++;
+   f_str->aa0x = aa0x;
+
+
+   if ((aa0v =(unsigned char *)calloc((size_t)maxn0,
+					     sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa0v array %d\n", maxn0);
+     exit (1);
+   }
+   aa0v++;
+   f_str->aa0v = aa0v;
+
+   /* make a precomputed codon number series */
+   pre_com(aa0, n0, aa0v);
+
+   last_n0 = 0;
+   for (itemp=0; itemp<3; itemp++) {
+     n0x=saatran(aa0,&aa0x[last_n0],n0,itemp);
+     /*         for (i=0; i<n0x; i++) {
+	   fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);
+	   if ((i%60)==59) fprintf(stderr,"\n");
+	   }
+	   fprintf(stderr,"\n");
+	   */
+     last_n0 += n0x+1;
+   }
+
+   /*     fprintf(stderr,"\n"); */
+   n0x = n0;
+   n0x3 = n0x/3;
+
+   /* now switch aa0 and aa0x for hashing functions */
+   fs = aa0;
+   aa0 = aa0x;
+   aa0x = fs;
+#endif
+
+   /* naat must always be MAXLC because library can have LC aa residues */
+   /*
+   if (ppst->ext_sq_set) naat = MAXLC;
+   else naat = MAXUC;
+   */
+   naat = MAXLC;
+
+   init_weights(&f_str->weight0, NULL,
+		ppst->pam2[ip],-ppst->gshift,-ppst->gsubs,naat);
+   init_weights(&f_str->weight1, &f_str->weight_c,
+		ppst->pam2[0],-ppst->gshift,-ppst->gsubs,naat);
+
+   if (pamfact == -1)
+      pamfact = 0;
+   else if (pamfact == -2)
+      pamfact = 1;
+
+   for (i0 = 1, mhv = -1; i0 < ppst->nsq; i0++)
+      if (hsq[i0] < NMAP && hsq[i0] > mhv)
+	 mhv = ppst->hsq[i0];
+
+   if (mhv <= 0)
+   {
+      fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
+      exit (1);
+   }
+
+   for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;
+
+/*      kshft = 2;	*/
+   kt1 = ktup - 1;
+   hv = 1;
+   for (i0 = 0; i0 < ktup; i0++)
+      hv = hv << f_str->kshft;
+   hmax = hv;
+   f_str->hmask = (hmax >> f_str->kshft) - 1;
+
+   if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate hash array\n");
+     exit (1);
+   }
+   if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate pamh1 array\n");
+     exit (1);
+   }
+   if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate pamh2 array\n");
+     exit (1);
+   }
+   if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate hash link array");
+     exit (1);
+   }
+
+   for (i0 = 0; i0 < hmax; i0++)
+      f_str->harr[i0] = -1;
+   for (i0 = 0; i0 < n0; i0++)
+      f_str->link[i0] = -1;
+
+   /* encode the aa0 array */
+   phv = hv = 0;
+   lkt = kt1;
+   for (i0 = 0; i0 < min(n0,lkt); i0++) {
+     if (hsq[aa0[i0]] >= NMAP) {
+       hv=phv=0; lkt = i0+ktup; continue;
+     }
+     hv = (hv << f_str->kshft) + ppst->hsq[aa0[i0]];
+     phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
+   }
+
+   for (; i0 < n0; i0++) {
+     if (hsq[aa0[i0]] >= NMAP) {
+       hv=phv=0;
+       /* restart hv, phv calculation */
+       for (lkt = i0+kt1; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
+	 if (hsq[aa0[i0]] >= NMAP) {
+	   hv=phv=0;
+	   lkt = i0+ktup;
+	   continue;
+	 }
+	 hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+	 phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;
+       }
+     }
+     if (i0 >= n0) break;
+     hv = ((hv & f_str->hmask) << f_str->kshft) + ppst->hsq[aa0[i0]];
+     f_str->link[i0] = f_str->harr[hv];
+     f_str->harr[hv] = i0;
+     if (pamfact) {
+       f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
+       if (hsq[aa0[i0-kt1]] < NMAP) 
+	 phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
+     }
+     else f_str->pamh2[hv] = fact * ktup;
+   }
+
+/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
+   pam2[0][0] is now undefined for consistency with blast
+*/
+
+   if (pamfact)
+      for (i0 = 1; i0 < ppst->nsq; i0++)
+	 f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
+   else
+      for (i0 = 1; i0 < ppst->nsq; i0++)
+	 f_str->pamh1[i0] = fact;
+
+   f_str->ndo = 0;	/* used to save time on diagonals with long queries */
+
+
+#ifndef ALLOCN0
+   if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
+						 sizeof (struct dstruct)))==NULL) {
+      fprintf (stderr," cannot allocate diagonal arrays: %lu\n",
+	       MAXDIAG *sizeof (struct dstruct));
+      exit (1);
+     };
+#else
+   if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,
+					      sizeof (struct dstruct)))==NULL) {
+      fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
+	      (long)n0*sizeof (struct dstruct));
+      exit (1);
+     };
+#endif
+
+#ifndef TFAST
+   /* done hashing, now switch aa0, aa0x back */
+   fs = aa0;
+   aa0 = aa0x;
+   aa0x = fs;
+#else
+   if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+4,
+					     sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+4);
+     exit (1);
+   }
+   f_str->aa1x++;
+
+   if ((f_str->aa1v =(unsigned char *)calloc((size_t)ppst->maxlen+4,
+					     sizeof(unsigned char))) == NULL) {
+     fprintf (stderr, "cannot allocate aa1v array %d\n", ppst->maxlen+4);
+     exit (1);
+   }
+   f_str->aa1v++;
+
+#endif
+
+   if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {
+     fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
+     exit(1);
+   }
+
+   pwaa = waa;
+   for (i=0; i<nsq; i++) {
+     for (j=0;j<n0; j++) {
+       *pwaa = ppst->pam2[ip][i][aa0[j]];
+       pwaa++;
+     }
+   }
+   f_str->waa = waa;
+
+#ifndef TFAST
+   maxn0 = max(2*n0,MIN_RES);
+#else
+   maxn0 = max(4*n0,MIN_RES);
+#endif
+   if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
+     fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
+     exit(1);
+   }
+   f_str->res = res;
+   f_str->max_res = maxn0;
+
+   *f_arg = f_str;
+}
+
+/* pstring1 is a message to the manager, currently 512 */
+/* pstring2 is the same information, but in a markx==10 format */
+void
+get_param (const struct pstruct *ppst,
+	   char **pstring1, char *pstring2,
+	   struct score_count_s *s_info)
+{
+  char options_str1[128];
+  char options_str2[128];
+#ifndef TFAST
+  char *pg_str="FASTY";
+#else
+  char *pg_str="TFASTY";
+#endif
+
+  if (!ppst->param_u.fa.use_E_thresholds) {
+    sprintf(options_str1,"join: %d (%.3g), opt: %d (%.3g)",
+	    ppst->param_u.fa.cgap, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.optcut, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+    sprintf(options_str2,"pg_join: %d (%.3g)\n; pg_optcut: %d (%.3g)",
+	    ppst->param_u.fa.cgap, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.optcut, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+  }
+  else {
+    sprintf(options_str1,"E-join: %.2g (%.3g), E-opt: %.2g (%.3g)",
+	    ppst->param_u.fa.E_join, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.E_band_opt, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+    sprintf(options_str2,"pg_join_E(): %.2g (%.3g)\n; pg_optcut_E(): %.2g (%.3g)",
+	    ppst->param_u.fa.E_join, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.E_band_opt, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+  }
+
+  if (!ppst->param_u.fa.optflag)
+    sprintf (pstring1[0], "%s (%s)",pg_str, verstr);
+  else 
+    sprintf (pstring1[0], "%s (%s) [optimized]",pg_str, verstr);
+
+  sprintf (pstring1[1], 
+#ifdef OLD_FASTA_GAP
+	   "%s matrix (%d:%d)%s, gap-pen: %3d/%3d, shift: %3d, subs: %3d\n ktup: %d, %s, width: %3d",
+#else
+	   "%s matrix (%d:%d)%s, open/ext: %3d/%3d, shift: %3d, subs: %3d\n ktup: %d, %s, width: %3d",
+#endif
+	   ppst->pam_name, ppst->pam_h,ppst->pam_l,
+	   (ppst->ext_sq_set) ? "xS":"\0",
+	   ppst->gdelval, ppst->ggapval,
+	   ppst->gshift,ppst->gsubs,
+	   ppst->param_u.fa.ktup, options_str1, ppst->param_u.fa.optwid);
+
+  if (ppst->param_u.fa.iniflag) strcat(pstring1[0]," init1");
+
+  if (pstring2 != NULL) {
+#ifdef OLD_FASTA_GAP
+    sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_gap-pen: %d %d\n; pg_ktup: %d\n; %s\n",
+#else
+	     sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_open-ext: %d %d\n; pg_ktup: %d\n; %s\n",
+#endif
+		      pg_str,verstr,ppst->pam_name, ppst->pam_h,ppst->pam_l,
+		      (ppst->ext_sq_set) ? "xS":"\0",  ppst->gdelval,
+		      ppst->ggapval,ppst->param_u.fa.ktup,options_str2);
+	     }
+  }
+
+void
+close_work (const unsigned char *aa0, int n0,
+	    struct pstruct *ppst,
+	    struct f_struct **f_arg)
+{
+  struct f_struct *f_str;
+  int naat;
+
+  f_str = *f_arg;
+
+  if (f_str != NULL) {
+   if (ppst->ext_sq_set) naat = MAXLC;
+   else naat = MAXUC;
+    free_weights(&f_str->weight0,&f_str->weight1,&f_str->weight_c,naat);
+    free(f_str->cur);
+#ifndef TFAST
+    f_str->aa0v--;
+    free(f_str->aa0v);
+    f_str->aa0x--;
+    free(f_str->aa0x);
+#else	/* TFAST */
+    f_str->aa1x--;
+    free(f_str->aa1x);
+    f_str->aa1v--;
+    free(f_str->aa1v);
+#endif
+    free(f_str->res);
+    free(f_str->waa);
+    free(f_str->diag);
+    free(f_str->link);
+    free(f_str->pamh2); 
+    free(f_str->pamh1);
+    free(f_str->harr);
+    free(f_str);
+    *f_arg = NULL;
+  }
+}
+
+void do_fastz (const unsigned char *aa0, int n0,
+	       const unsigned char *aa1, int n1,
+	       const unsigned char *aa1v,
+	       const struct pstruct *ppst, struct f_struct *f_str,
+	       struct rstruct *rst, int *hoff, int shuff_flg,
+	       struct score_count_s *s_info)
+{
+   int     nd;		/* diagonal array size */
+   int     lhval;
+   int     kfact;
+   int i;
+   register struct dstruct *dptr;
+   struct savestr vmax[MAXSAV];	/* best matches saved for one sequence */
+   struct savestr *vptr[MAXSAV];
+   struct savestr *lowmax;
+   int lowscor;
+   register int tscor;
+   int xdebug = 0;
+
+#ifndef ALLOCN0
+   register struct dstruct *diagp;
+#else
+   register int dpos;
+   int     lposn0;
+#endif
+   struct dstruct *dpmax;
+   register int lpos;
+   int     tpos;
+   struct savestr *vmptr;
+   int     scor, tmp;
+   int     im, ib, nsave;
+   int ktup, kt1, ip, lkt, ktup_sq;
+   const int *hsq;
+   int c_gap, opt_cut;
+#ifndef TFAST
+   int n0x31, n0x32;
+   n0x31 = (n0-2)/3;
+   n0x32 = n0x31+1+(n0-n0x31-1)/2;
+#else
+   unsigned char *fs, *fd;
+   int n1x31, n1x32, last_n1, itemp;
+   n1x31 = (n1-2)/3;
+   n1x32 = n1x31+1+(n1-n1x31-1)/2;
+#endif
+
+  if (ppst->ext_sq_set) {
+    ip = 1;
+    hsq = ppst->hsqx;
+  }
+  else {
+    ip = 0;
+    hsq = ppst->hsq;
+  }
+
+  ktup = ppst->param_u.fa.ktup;
+  ktup_sq = ktup*ktup;
+  if (ktup == 1) ktup_sq *= 2;
+
+  kt1 = ktup-1;
+
+  if (n1 < ktup) {
+    rst->score[0] = rst->score[1] = rst->score[2] = 0;
+    return;
+  }
+
+  if (n0+n1+1 >= MAXDIAG) {
+    fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
+    rst->score[0] = rst->score[1] = rst->score[2] = -1;
+    return;
+  }
+
+  if (ppst->param_u.fa.use_E_thresholds) {
+    c_gap = ELK_to_s(ppst->param_u.fa.E_join*ktup_sq*2.5, n0, n1, ppst->pLambda, ppst->pK, ppst->pH);
+    opt_cut = ELK_to_s(ppst->param_u.fa.E_band_opt*ktup_sq*2.0, n0, n1, ppst->pLambda, ppst->pK, ppst->pH);
+    rst->valid_stat = 0;
+  }
+  else {
+    c_gap = ppst->param_u.fa.cgap;
+    opt_cut = ppst->param_u.fa.optcut;
+    rst->valid_stat = 1;
+  }
+
+  f_str->noff = n0 - 1;
+
+#ifdef ALLOCN0
+   nd = n0;
+#endif
+
+#ifndef ALLOCN0
+   nd = n0 + n1;
+#endif
+
+   dpmax = &f_str->diag[nd];
+   for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
+   {
+      dptr->stop = -1;
+      dptr->dmax = NULL;
+      dptr++->score = 0;
+   }
+
+   for (vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++)
+      vmptr->score = 0;
+   lowmax = vmax;
+   lowscor = 0;
+
+  if (n1 > 1000 && aa1[0]==23 && aa1[100]==23 &&
+      aa1[1400]==23 && aa1[1401]!=23) {
+    xdebug = 1;
+  }
+  else xdebug = 0;
+
+   /* start hashing */
+   lhval = 0;
+   lkt = kt1;
+   for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos<n1; lpos++) {
+     /* restart lhval calculation */
+     if (hsq[aa1[lpos]]>=NMAP) {
+       lhval = 0; lkt=lpos+ktup;
+       continue;
+     }
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+   }
+
+#ifndef ALLOCN0
+   diagp = &f_str->diag[f_str->noff + lkt];
+   for (; lpos < n1; lpos++, diagp++) {
+     if (hsq[aa1[lpos]]>=NMAP) {
+       lpos++ ; diagp++;
+       while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
+       if (lpos >= n1) break;
+       lhval = 0;
+     }
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+     for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+       if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
+#else
+   lposn0 = f_str->noff + lpos;
+   for (; lpos < n1; lpos++, lposn0++) {
+     if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;}
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+     for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+       dpos = lposn0 - tpos;
+       if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) {
+#endif
+	 tscor += ktup;
+	 if ((tscor -= lpos) <= 0) {
+	   scor = dptr->score;
+	   if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && lowscor < scor) {
+#ifdef ALLOCN0
+	     lowscor = savemax (dptr, dpos, vmax, &lowmax);
+#else
+	     lowscor = savemax (dptr, dptr- f_str->diag, vmax, &lowmax);
+#endif
+	   }
+	   if ((tscor += scor) >= kfact) {
+	     dptr->score = tscor;
+	     dptr->stop = lpos;
+	   }
+	   else {
+	     dptr->score = kfact;
+	     dptr->start = (dptr->stop = lpos) - kt1;
+	   }
+	 }
+	 else {
+	   dptr->score += f_str->pamh1[aa0[tpos]];
+	   dptr->stop = lpos;
+	 }
+       }
+       else {
+	 dptr->score = f_str->pamh2[lhval];
+	 dptr->start = (dptr->stop = lpos) - kt1;
+       }
+     }				/* end tpos */
+
+#ifdef ALLOCN0
+      /* reinitialize diag structure */
+   loopl:
+     if ((dptr = &f_str->diag[lpos % nd])->score > lowscor)
+       lowscor = savemax (dptr, lpos, vmax, &lowmax);
+     dptr->stop = -1;
+     dptr->dmax = NULL;
+     dptr->score = 0;
+#endif
+   }	/* end lpos */
+
+#ifdef ALLOCN0
+   for (tpos = 0, dpos = f_str->noff + n1 - 1; tpos < n0; tpos++, dpos--) {
+     if ((dptr = &f_str->diag[dpos % nd])->score > lowscor)
+       lowscor = savemax (dptr, dpos, vmax, &lowmax, f_str);
+   }
+#else
+   for (dptr = f_str->diag; dptr < dpmax;) {
+     if (dptr->score > lowscor) savemax (dptr, dptr - f_str->diag, vmax, &lowmax);
+     dptr->stop = -1;
+     dptr->dmax = NULL;
+     dptr++->score = 0;
+   }
+   f_str->ndo = nd;
+#endif
+
+   for (nsave = 0, vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++) {
+     if (vmptr->score > 0) {
+       vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[ip], f_str);
+       vptr[nsave++] = vmptr;
+     }
+   }
+
+   if (nsave <= 0) {
+     rst->score[0] = rst->score[1] = rst->score[2] = 0;
+     return;
+   }
+       
+#ifndef TFAST
+   /* FASTX code here to modify the start, stop points for 
+      the three phases of the translated protein sequence
+   */
+
+   for (ib=0; ib<nsave; ib++) {
+     if (f_str->noff-vptr[ib]->dp+vptr[ib]->start >= n0x32)
+       vptr[ib]->dp += n0x32;
+     if (f_str->noff-vptr[ib]->dp +vptr[ib]->start >= n0x31)
+       vptr[ib]->dp += n0x31;
+   }
+#else
+   /* TFAST code here to modify the start, stop points for 
+      the three phases of the translated protein sequence
+      TFAST modifies library start points, rather than 
+      query start points
+   */
+
+   for (ib=0; ib<nsave; ib++) {
+     if (vptr[ib]->start >= n1x32) {
+       vptr[ib]->start -= n1x32;
+       vptr[ib]->stop -= n1x32;
+       vptr[ib]->dp -= n1x32;
+     }
+     if (vptr[ib]->start >= n1x31) {
+       vptr[ib]->start -= n1x31;
+       vptr[ib]->stop -= n1x31;
+       vptr[ib]->dp -= n1x31;
+     }
+   }
+#endif /* TFAST */
+
+   scor = sconn (vptr, nsave, c_gap, 
+		 ppst->param_u.fa.pgap, f_str);
+
+   for (vmptr=vptr[0],ib=1; ib<nsave; ib++)
+     if (vptr[ib]->score > vmptr->score) vmptr=vptr[ib];
+
+/*  kssort (vptr, nsave); */
+
+   rst->score[1] = vmptr->score;
+   rst->score[0] = max (scor, vmptr->score);
+   rst->score[2] = rst->score[0];		/* initn */
+
+   s_info->tot_scores++;
+   if (rst->score[0] > c_gap) { s_info->s_cnt[0]++;}
+#ifndef TFAST
+   *hoff=f_str->noff - vmptr->dp;
+#else /* TFAST */
+   *hoff=vmptr->dp-f_str->noff;
+#endif /* TFAST */
+   if (ppst->param_u.fa.optflag) {
+     if (/* shuff_flg || */ rst->score[0] > opt_cut) {
+       s_info->s_cnt[2]++;
+       rst->valid_stat = 1;
+       rst->score[2] = dmatchz(aa0, n0,aa1,n1, aa1v,
+			       *hoff,ppst->param_u.fa.optwid,
+			       ppst->pam2[ip],
+			       ppst->gdelval,ppst->ggapval,ppst->gshift,
+			       f_str);
+     }
+   }
+}
+
+void do_work (const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      int frame,
+	      const struct pstruct *ppst,
+	      struct f_struct *f_str,
+	      int qr_flg, int shuff_flg, struct rstruct *rst,
+	      struct score_count_s *s_info)
+{
+  int hoff;
+  int last_n1, itx, dnav, n10, i, ir;
+  unsigned char *aa1x;
+
+  rst->escore = 1.0;
+  rst->segnum = rst->seglen = 1;
+  rst->valid_stat = 0;
+
+  if (n1 < ppst->param_u.fa.ktup) {
+    rst->score[0] = rst->score[1] = rst->score[2] = 0;
+    return;
+  }
+
+#ifndef TFAST
+  do_fastz (f_str->aa0x, n0, aa1, n1, f_str->aa0v, ppst, f_str, rst, &hoff, shuff_flg, s_info);
+#else
+   /* make a precomputed codon number series */
+
+  if (frame == 0) {
+    pre_com(aa1, n1, f_str->aa1v);
+  }
+  else {
+    pre_com_r(aa1, n1, f_str->aa1v);
+  }
+
+  /* make translated sequence */
+  last_n1 = 0;
+  aa1x = f_str->aa1x;
+#ifdef DEBUG
+  if (frame > 1) {
+    fprintf(stderr, "*** fz_walign - frame: %d - out of range [0,1]\n",frame);
+  }
+#endif
+
+  for (itx= frame*3; itx< frame*3+3; itx++) {
+    n10  = saatran(aa1,&aa1x[last_n1],n1,itx);
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+
+  do_fastz (aa0, n0, f_str->aa1x, n10, f_str->aa1v, ppst, f_str, rst, &hoff, shuff_flg, s_info);
+#endif
+
+  rst->comp = rst->H = -1.0;
+}
+
+void do_opt (const unsigned char *aa0, int n0,
+	     const unsigned char *aa1, int n1,
+	     int frame,
+	     struct pstruct *ppst,
+	     struct f_struct *f_str,
+	     struct rstruct *rst)
+{
+  int optflag, tscore, hoff;
+  int last_n1, itx, n10, i, ir;
+  unsigned char *aa1x;
+  struct score_count_s s_info = {0, 0, 0, 0};
+
+  optflag = ppst->param_u.fa.optflag;
+  ppst->param_u.fa.optflag = 1;
+
+#ifndef TFAST
+  do_fastz (f_str->aa0x, n0, aa1, n1, f_str->aa0v, ppst, f_str, rst, &hoff, 0, &s_info);
+#else
+   /* make a precomputed codon number series */
+
+  if (frame == 0) {
+    pre_com(aa1, n1, f_str->aa1v);
+  }
+  else {
+    pre_com_r(aa1, n1, f_str->aa1v);
+  }
+
+  /* make translated sequence */
+  last_n1 = 0;
+  aa1x = f_str->aa1x;
+  for (itx= frame*3; itx< frame*3+3; itx++) {
+    n10  = saatran(aa1,&aa1x[last_n1],n1,itx);
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+
+  do_fastz (aa0, n0, f_str->aa1x, n10, f_str->aa1v, ppst, f_str, rst, &hoff, 0, &s_info );
+#endif
+
+  ppst->param_u.fa.optflag = optflag;
+}
+
+int
+savemax (struct dstruct *dptr, int dpos, 
+	 struct savestr *vmax, struct savestr **lowmax)
+{
+   struct savestr *vmptr;
+   int i;
+
+/* check to see if this is the continuation of a run that is already saved */
+
+   if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
+	 vmptr->start == dptr->start) {
+      vmptr->stop = dptr->stop;
+      if ((i = dptr->score) <= vmptr->score) return (*lowmax)->score;
+      vmptr->score = i;
+      if (vmptr != *lowmax) return (*lowmax)->score;
+   }
+   else {
+      i = (*lowmax)->score = dptr->score;
+      (*lowmax)->dp = dpos;
+      (*lowmax)->start = dptr->start;
+      (*lowmax)->stop = dptr->stop;
+      dptr->dmax = *lowmax;
+   }
+
+   for (vmptr = vmax; vmptr < vmax+MAXSAV; vmptr++) {
+     if (vmptr->score < i) {
+       i = vmptr->score;
+       *lowmax = vmptr;
+     }
+   }
+   return i;
+}
+
+int spam (const unsigned char *aa0,
+	  const unsigned char *aa1,
+	  struct savestr *dmax, int **pam2,
+	  struct f_struct *f_str)
+{
+   int     lpos;
+   int     tot, mtot;
+   struct {
+     int     start, stop, score;
+   } curv, maxv;
+   const unsigned char *aa0p, *aa1p;
+
+   aa1p = &aa1[lpos = dmax->start];
+   aa0p = &aa0[lpos - dmax->dp + f_str->noff];
+   curv.start = lpos;
+
+   tot = curv.score = maxv.score = 0;
+   for (; lpos <= dmax->stop; lpos++) {
+     tot += pam2[*aa0p++][*aa1p++];
+     if (tot > curv.score) {
+       curv.stop = lpos;
+       curv.score = tot;
+      }
+      else if (tot < 0) {
+	if (curv.score > maxv.score) {
+	  maxv.start = curv.start;
+	  maxv.stop = curv.stop;
+	  maxv.score = curv.score;
+	}
+	tot = curv.score = 0;
+	curv.start = lpos+1;
+      }
+   }
+
+   if (curv.score > maxv.score) {
+     maxv.start = curv.start;
+     maxv.stop = curv.stop;
+     maxv.score = curv.score;
+   }
+
+/*	if (maxv.start != dmax->start || maxv.stop != dmax->stop)
+		printf(" new region: %3d %3d %3d %3d\n",maxv.start,
+			dmax->start,maxv.stop,dmax->stop);
+*/
+   dmax->start = maxv.start;
+   dmax->stop = maxv.stop;
+
+   return maxv.score;
+}
+
+#define XFACT 10
+
+int sconn (struct savestr **v, int n, 
+       int cgap, int pgap, struct f_struct *f_str)
+{
+  int     i, si;
+  struct slink {
+    int     score;
+    struct savestr *vp;
+    struct slink *next;
+  }      *start, *sl, *sj, *so, sarr[MAXSAV];
+  int     lstart, tstart, plstop, ptstop;
+
+/*	sort the score left to right in lib pos */
+
+   kpsort (v, n);
+
+   start = NULL;
+
+/*	for the remaining runs, see if they fit */
+
+   for (i = 0, si = 0; i < n; i++)
+   {
+
+/*	if the score is less than the gap penalty, it never helps */
+      if (v[i]->score < cgap)
+	 continue;
+      lstart = v[i]->start;
+      tstart = lstart - v[i]->dp + f_str->noff;
+
+/*	put the run in the group */
+      sarr[si].vp = v[i];
+      sarr[si].score = v[i]->score;
+      sarr[si].next = NULL;
+
+/* 	if it fits, then increase the score */
+      for (sl = start; sl != NULL; sl = sl->next)
+      {
+	 plstop = sl->vp->stop;
+	 ptstop = plstop - sl->vp->dp + f_str->noff;
+	 if (plstop < lstart+XFACT && ptstop < tstart+XFACT) {
+	   sarr[si].score = sl->score + v[i]->score + pgap;
+	   break;
+	 }
+      }
+
+/*	now recalculate where the score fits */
+      if (start == NULL)
+	 start = &sarr[si];
+      else
+	 for (sj = start, so = NULL; sj != NULL; sj = sj->next)
+	 {
+	    if (sarr[si].score > sj->score)
+	    {
+	       sarr[si].next = sj;
+	       if (so != NULL)
+		  so->next = &sarr[si];
+	       else
+		  start = &sarr[si];
+	       break;
+	    }
+	    so = sj;
+	 }
+      si++;
+   }
+
+   if (start != NULL)
+      return (start->score);
+   else
+      return (0);
+}
+
+void
+kssort (v, n)
+struct savestr *v[];
+int     n;
+{
+   int     gap, i, j;
+   struct savestr *tmp;
+
+   for (gap = n / 2; gap > 0; gap /= 2)
+      for (i = gap; i < n; i++)
+	 for (j = i - gap; j >= 0; j -= gap)
+	 {
+	    if (v[j]->score >= v[j + gap]->score)
+	       break;
+	    tmp = v[j];
+	    v[j] = v[j + gap];
+	    v[j + gap] = tmp;
+	 }
+}
+
+void
+kpsort (struct savestr **v, int n) {
+  int gap, i, j, k;
+  int incs[4] = { 21, 7, 3, 1 };
+  struct savestr *tmp;
+  int v_start;
+
+  for ( k = 0; k < 4; k++) {
+    gap = incs[k];
+    for (i = gap; i < n; i++) {
+      tmp = v[i];
+      j = i;
+      v_start = v[i]->start;
+      while (j >= gap && v[j - gap]->start > v_start) {
+	v[j] = v[j - gap];
+	j -= gap;
+      }
+      v[j] = tmp;
+    }
+  }
+}
+
+static int
+dmatchz(const unsigned char *aa0, int n0,
+	const unsigned char *aa1, int n1,
+	const unsigned char *aa1v,
+	int hoff, int window, 
+	int **pam2, int gdelval, int ggapval, int gshift,
+	struct f_struct *f_str)
+{
+
+   hoff -= window/2;
+
+#ifndef TFAST
+   return lx_band(aa1,n1,f_str->aa0v,n0-2, 
+		  pam2,
+#ifdef OLD_FASTA_GAP
+		  -(gdelval - ggapval),
+#else
+		  -gdelval,
+#endif
+		  -ggapval,-gshift,
+		  hoff,window,f_str);
+#else
+   return lx_band(aa0,n0,aa1v,n1-2, 
+		  pam2,
+#ifdef OLD_FASTA_GAP
+		  -(gdelval - ggapval),
+#else
+		  -gdelval,
+#endif
+		  -ggapval,-gshift,
+		  hoff,window,f_str);
+#endif
+}
+
+static void
+init_row(struct sx_s *row, int sp) {
+  int i;
+  for (i = 0; i < sp; i++) {
+      row[i].C1 = row[i].I1 = 0;
+      row[i].C2 = row[i].I2 = 0;
+      row[i].C3 = row[i].I3 = 0;
+      row[i].flag = 0;
+  }
+}
+
+int lx_band(const unsigned char *prot_seq,  /* array with protein sequence numbers*/
+	    int len_prot,    /* length of prot. seq */
+	    const unsigned char *dna_prot_seq, /* translated DNA sequence numbers*/
+	    int len_dna_prot,   /* length trans. seq. */
+	    int **pam_matrix,   /* scoring matrix */
+	    int gopen, int gext, /* gap open, gap extend penalties */
+	    int gshift,         /* frame-shift penalty */
+	    int start_diag,     /* start diagonal of band */
+	    int width,         /* width for band alignment */
+	    struct f_struct *f_str)
+{
+  void *ckalloc();
+  int i, j, bd, bd1, x1, x2, sp, p1=0, p2=0, end_prot;
+  struct sx_s *last, *tmp;
+  int sc, del, best = 0, cd,ci, e1, e2, e3, cd1, cd2, cd3, f, gg;
+  const unsigned char *dp;
+  register struct sx_s *ap, *aq;
+  struct wgt *wt, *ww;
+  int aa, b, a,x,y,z;
+
+  sp = width+7;
+  gg = gopen+gext;
+  /* sp = sp/3+1; */
+
+  if (f_str->cur == NULL ) {
+    f_str->cur_sp_size = sp;
+    f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
+  }
+  else if (f_str->cur_sp_size != sp) {
+    free(f_str->cur);
+    f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
+    f_str->cur_sp_size = sp;
+  }
+
+  init_row(f_str->cur, sp);
+
+  /*
+  if (start_diag %3 !=0) start_diag = start_diag/3-1;
+  else start_diag = start_diag/3;
+  if (width % 3 != 0) width = width/3+1;
+  else width = width /3;
+  */
+
+  x1 = start_diag; 		/* x1 = lower bound of DNA */
+  x2 = 1;               /* the amount of position shift from last row*/
+
+  end_prot = max(0,-width-start_diag) + (len_dna_prot+5)/3 + width;
+  end_prot = min(end_prot,len_prot);
+
+  /* i counts through protein sequence, x1 through DNAp */
+
+  for (i = max(0, -width-start_diag), x1+=i; i < len_prot; i++, x1++) {
+      bd = min(x1+width, (len_dna_prot+2)/3);	/* upper bound of band */
+      bd1 = max(0,x1);	                /* lower bound of band */
+      wt = f_str->weight0[prot_seq[i]];
+      del = 1-x1;   /*adjustment*/
+      bd += del; 
+      bd1 +=del;
+
+      ap = &f_str->cur[bd1]; aq = ap+1;
+      e1 = f_str->cur[bd1-1].C3; e2 = ap->C1; cd1 = cd2= cd3= 0;
+      for (dp = &dna_prot_seq[(bd1-del)*3]; ap < &f_str->cur[bd]; ap++) {
+	  ww = &wt[(unsigned char) *dp++];
+	  sc = max(max(e1+ww->iv, (e3=ap->C2)+ww->ii), e2+ww->iii);
+	  if (cd1 > sc) sc = cd1;
+	  cd1 -= gext;
+	  if ((ci = aq->I1) > 0) {
+	      if (sc < ci) { ap->C1 = ci; ap->I1 = ci-gext;}
+	      else {
+		  ap->C1 = sc;
+		  sc -= gg;
+		  if (sc > 0) {
+		      if (sc > best) best =sc;
+		      if (cd1 < sc) cd1 = sc;
+		      ap->I1 = max(ci-gext, sc);
+		  } else ap->I1 = ci-gext;
+	      }
+	  } else {
+	      if (sc <= 0) {
+		  ap->I1 = ap->C1 = 0;
+	      } else {
+		  ap->C1 = sc; sc-=gg;
+		  if (sc >0) {
+		      if (sc > best) best =sc;
+		      if (cd1 < sc) cd1 = sc;
+		      ap->I1 = sc;
+		  } else ap->I1 = 0;
+	      }
+	  }
+	  ww = &wt[(unsigned char) *dp++];
+	  sc = max(max(e2+ww->iv, (e1=ap->C3)+ww->ii), e3+ww->iii);
+	  if (cd2 > sc) sc = cd2;
+	  cd2 -= gext;
+	  if ((ci = aq->I2) > 0) {
+	      if (sc < ci) { ap->C2 = ci; ap->I2 = ci-gext;}
+	      else {
+		  ap->C2 = sc;
+		  sc -= gg;
+		  if (sc > 0) {
+		      if (sc > best) best =sc;
+		      if (cd2 < sc) cd2 = sc;
+		      ap->I2 = max(ci-gext, sc);
+		  }
+	      }
+	  } else {
+	      if (sc <= 0) {
+		  ap->I2 = ap->C2 = 0;
+	      } else {
+		  ap->C2 = sc; sc-=gg;
+		  if (sc >0) {
+		      if (sc > best) best =sc;
+		      if (cd2 < sc) cd2 = sc;
+		      ap->I2 = sc;
+		  } else ap->I2 = 0;
+	      }
+	  }
+	  ww = &wt[(unsigned char)*dp++];
+	  sc = max(max(e3+ww->iv, (e2=aq->C1)+ww->ii), e1+ww->iii);
+	  if (cd3 > sc) sc = cd3;
+	  cd3 -= gext;
+	  if ((ci = aq++->I3) > 0) {
+	      if (sc < ci) { ap->C3 = ci; ap->I3 = ci-gext;}
+	      else {
+		  ap->C3 = sc;
+		  sc -= gg;
+		  if (sc > 0) {
+		      if (sc > best) best =sc;
+		      if (cd3 < sc) cd3 = sc;
+		      ap->I3 = max(ci-gext, sc);
+		  }
+	      }
+	  } else {
+	      if (sc <= 0) {
+		  ap->I3 = ap->C3 = 0;
+	      } else {
+		  ap->C3 = sc; sc-=gg;
+		  if (sc >0) {
+		      if (sc > best) best =sc;
+		      if (cd3 < sc) cd3 = sc;
+		      ap->I3 = sc;
+		  } else ap->I3 = 0;
+	      }
+	  }
+      }
+  }
+  /*  printf("The best score is %d\n", best); */
+  return best+gg;
+}
+
+/* ckalloc - allocate space; check for success */
+void *ckalloc(size_t amount)
+{
+	void *p;
+
+	if ((p = (void *)malloc( (size_t)amount)) == NULL)
+		w_abort("Ran out of memory.","");
+	return(p);
+}
+
+/* calculate the 100% identical score */
+int
+shscore(unsigned char *aa0, int n0, int **pam2)
+{
+  int i, sum;
+  for (i=0,sum=0; i<n0; i++)
+    sum += pam2[aa0[i]][aa0[i]];
+  return sum;
+}
+
+#define WIDTH 60
+
+typedef struct mat *match_ptr;
+
+typedef struct mat {
+	int i, j, l;
+	match_ptr next;
+} match_node;
+
+typedef struct { int i,j;} state;
+typedef state *state_ptr;
+
+
+void *ckalloc();
+static match_ptr small_global(), global();
+static int local_align(), find_best();
+static void init_row2(), init_ROW();
+
+int
+pro_dna(const unsigned char *prot_seq,  /* array with prot. seq. numbers*/
+	int len_prot,    /* length of prot. seq */
+	const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/
+	int len_dna_prot,   /* length trans. seq. */
+	int **pam_matrix,   /* scoring matrix */
+	int gopen, int gext, /* gap open, gap extend penalties */
+	int gshift,         /* frame-shift penalty */
+	struct f_struct *f_str,
+	int max_res,
+	struct a_res_str *a_res) /* alignment info */
+{
+  match_ptr align, ap, aq;
+  int x, y, ex, ey, i, score;
+  int *alignment;
+
+  f_str->up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+  f_str->down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+  f_str->tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+
+  /*local alignment find the best local alignment x and y
+    is the starting position of the best local alignment
+    and ex ey is the ending position */
+
+  score= local_align(&x, &y, &ex, &ey, 
+		     pam_matrix, gopen, gext,
+		     dna_prot_seq, len_dna_prot,
+		     prot_seq, len_prot, f_str);
+
+  f_str->up += 3; f_str->down += 3; f_str->tp += 3;
+
+  /* x, y - start in prot, dna_prot */
+  a_res->min0 = x;	/* prot */
+  a_res->min1 = y;	/* DNA */
+  a_res->max0 = ex;	/* prot */
+  a_res->max1 = ey;	/* DNA */
+
+  align = global(x, y, ex, ey, 
+		 pam_matrix, gopen, gext,
+		 dna_prot_seq, prot_seq,
+		 0, 0, f_str);
+
+  alignment = a_res->res;
+
+  for (ap = align, i= 0; ap; i++) {
+    if (i < max_res) alignment[i] = ap->l;
+    aq = ap->next; free(ap); ap = aq;
+  }
+  if (i >= max_res)
+    fprintf(stderr,"***alignment truncated: %d/%d***\n", max_res,i);
+
+  /*   up = &up[-3]; down = &down[-3]; tp = &tp[-3]; */
+  free(&f_str->up[-3]); free(&f_str->tp[-3]); free(&f_str->down[-3]);
+
+  a_res->nres = i;
+  return score;
+}
+
+static void
+swap(void **a, void **b)
+{
+    void *t = *a;
+    *a = *b;   *b = t;
+}
+
+/*
+   local alignment find the best local alignment x and y
+   is the starting position of the best local alignment
+   and ex ey is the ending position 
+*/
+static int
+local_align(int *x, int *y, int *ex, int *ey, 
+	    int **wgts, int gop, int gext,
+	    const unsigned char *dnap, int ld,
+	    const unsigned char *pro, int lp,
+	    struct f_struct *f_str)
+{
+  int i, j,  score, x1,x2,x3,x4, e1 = 0, e2 = 0, e3,
+    sc, del,  e, best = 0,  cd, ci, c;
+  struct wgt *wt, *ww;
+  state_ptr cur_st, last_st, cur_i_st;
+  st_ptr cur, last;
+  const unsigned char *dp;
+  int *cur_d_st, *st_up;
+
+  /*      
+	  Array rowiC stores the best scores of alignment ending at a position
+	  Arrays rowiD and rowiI store the best scores of alignment ending
+	  at a position with a deletion or insrtion
+	  Arrays sti stores the starting position of the best alignment whose
+	  score stored in the corresponding row array.
+	  The program stores two rows to complete the computation, same is
+	  for the global alignment routine.
+  */
+
+
+  st_up = (int *) ckalloc(sizeof(int)*(ld+10));
+  init_row2(st_up, ld+5);
+
+  ld += 2;
+
+  init_ROW(f_str->up, ld+1);
+  init_ROW(f_str->down, ld+1);
+  cur = f_str->up+1;
+  last = f_str->down+1; 
+
+  cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+  last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+  cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+  cur_d_st = st_up; 
+  dp = dnap-2;
+  for (i = 0; i < lp; i++) {
+    wt = f_str->weight1[pro[i]];  e2 =0; e1 = last[0].C;
+    for (j = 0; j < 2; j++) {
+      cur_st[j].i = i+1;
+      cur_st[j].j = j+1;
+    }
+    for (j = 2; j < ld; j++) {
+      ww = &wt[(unsigned char) dp[j]];
+      del = -1;
+      if (j >= 3) {
+	sc = 0;
+	e3 = e2; e2 = e1;
+	e1 = last[j-2].C; 
+	if ((e=e2+ww->iii) > sc) {sc = e; del = 3;}
+	if ((e=e1+ww->ii) > sc) {sc = e; del = 2;}
+	if ((e = e3+ww->iv) > sc) {sc = e; del = 4;} 
+      } else {
+	sc = e2  = 0;
+	if (ww->iii > 0) {sc = ww->iii; del = 3;}
+      }
+      if (sc < (ci=last[j].I)) {
+	sc = ci; del = 0;
+      }
+      if (sc < (cd=cur[j].D)) {
+	sc = cd; del = 5;
+      }
+      cur[j].C = sc;
+      e = sc  - gop;
+      if (e > cd) {
+	cur[j+3].D = e-gext;
+	cur_d_st[j+3] = 3;
+      } else {
+	cur[j+3].D = cd-gext;
+	cur_d_st[j+3] = cur_d_st[j]+3;
+      }
+      switch(del) {
+      case 5:
+	c = cur_d_st[j];
+	cur_st[j].i = cur_st[j-c].i;
+	cur_st[j].j = cur_st[j-c].j;
+	break;
+      case 0:
+	cur_st[j].i = cur_i_st[j].i;
+	cur_st[j].j = cur_i_st[j].j;
+	break;
+      case 2:
+      case 3:
+      case 4:
+	if (i) {
+	  if (j-del >= 0) {
+	    cur_st[j].i = last_st[j-del].i;
+	    cur_st[j].j = last_st[j-del].j;
+	  } else {
+	    cur_st[j].i = i;
+	    cur_st[j].j = 0;
+	  }
+	} else {
+	  cur_st[j].i = 0;
+	  cur_st[j].j = max(0, j-del+1);
+	}
+	break;
+      case -1:
+	cur_st[j].i = i+1;
+	cur_st[j].j = j+1;
+	break;
+      }
+      if (e > ci) {
+	cur[j].I  = e -gext;
+	cur_i_st[j].i = cur_st[j].i;
+	cur_i_st[j].j = cur_st[j].j;
+      } else {
+	cur[j].I  = ci- gext;
+      }
+      if (sc > best) {
+	x1 = cur_st[j].i;
+	x2 = cur_st[j].j;
+	best =sc;
+	x3 = i;
+	x4 = j;
+      }
+    }
+    swap((void *)&last, (void *)&cur);
+    swap((void *)&cur_st, (void *)&last_st);
+  }
+  /*	printf("The best score is %d\n", best);*/
+  *x = x1; *y = x2; *ex = x3; *ey = x4;
+  free(cur_st); free(last_st); free(cur_i_st); 
+  free(st_up);
+  return best;
+}
+
+/* 
+   Both global_up and global_down do linear space score only global 
+   alignments on subsequence pro[x]...pro[ex], and dna[y]...dna[ey].
+   global_up do the algorithm upwards, from row x towards row y.
+   global_down do the algorithm downwards, from row y towards x.
+*/
+
+static void
+global_up(st_ptr *row1, st_ptr *row2, 
+	  int x, int y, int ex, int ey, 
+	  int **wgts, int gop, int gext,
+	  unsigned char *dnap, unsigned char *pro, 
+	  int N, struct f_struct *f_str)
+{
+  int i, j, k, sc, e, e1, e2, e3, t, ci, cd, score;
+  struct wgt *wt, *ww;
+  st_ptr cur, last;
+
+  cur = *row1; last = *row2;
+  sc = -gop;
+  for (j = 0; j <= ey-y+1; j++) {
+    if (j % 3 == 0) {last[j].C = sc; sc -= gext; last[j].I = sc-gop;}
+    else { last[j].I = last[j].C = -10000;}
+  }  
+  last[0].C = 0; cur[0].D = cur[1].D = cur[2].D = -10000;
+  last[0].D = last[1].D = last[2].D = -10000;
+  if (N) last[0].I = -gext;
+  for (i = 1; i <= ex-x+1; i++) {
+    wt = f_str->weight1[pro[i+x-1]]; e1 = -10000; e2 = last[0].C;
+    for (j = 0; j <= ey-y+1; j++) {
+      t = j+y;
+      sc = -10000; 
+      ww = &wt[(unsigned char) dnap[t-3]]; 
+      if (j < 4) {
+	if (j == 3) {
+	  sc = e2+ww->iii;
+	} else if (j == 2) {
+	  sc = e2 + ww->ii;
+	}
+      } else {
+	e3 = e2; e2 = e1;
+	e1 = last[j-2].C;
+	sc = max(e2+ww->iii, max(e1+ww->ii, e3+ww->iv));
+      }
+      sc = max(sc, max(ci=last[j].I, cd = cur[j].D));
+      cur[j].C = sc;
+      cur[j+3].D = max(cd, sc-gop)-gext;
+      cur[j].I = max(ci, sc-gop)-gext;
+    }
+    swap((void *)&last, (void *)&cur);
+  }
+  /*printf("global up score =%d\n", last[ey-y+1].C);*/
+  for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;	
+  if (*row1 != last) swap((void *)row1, (void *)row2);
+}
+
+static void
+global_down(st_ptr *row1, st_ptr *row2,
+	    int x, int y, int ex, int ey, 
+	    int **wgts, int gop, int gext,
+	    unsigned char *dnap, unsigned char *pro,
+	    int N, struct f_struct *f_str)
+{
+  int i, j, k, sc, del, *tmp, e,  t, e1,e2,e3, ci,cd, score;
+  struct wgt *wt, *w1, *w2, *w3;
+  st_ptr cur, last;
+
+  cur = (*row1); last = *row2;
+  sc = -gop;
+  for (j = ey-y+1; j >= 0; j--) {
+    if ((ey-y+1-j) % 3) {last[j].C = sc; sc-=gext; last[j].I = sc-gop;}
+    else  last[j].I =  last[j].C = -10000;
+    cur[j].I = -10000;
+  } 
+  last[ey-y+1].C = 0;
+  if (N) last[ey-y+1].I = -gext;
+  cur[ey-y+1].D = cur[ey-y].D = cur[ey-y-1].D = -10000;
+  last[ey-y+1].D = last[ey-y].D = last[ey-y-1].D = -10000;
+  for (i = ex-x; i >= 0; i--) {
+    wt = f_str->weight1[pro[i+x]]; e2 = last[ey-y+1].C; 
+    e1 = -10000;
+    w3 = &wt[(unsigned char) dnap[ey]]; 
+    w2 = &wt[(unsigned char) dnap[ey-1]];
+    for (j = ey-y+1; j >= 0; j--) {
+      t = j+y;
+      w1 = &wt[(unsigned char) dnap[t-1]];
+      sc = -10000;
+      if (t+3 > ey) {
+	if (t+2 == ey) {
+	  sc = e2+w2->iii; 
+	} else if (t+1 == ey) {
+	  sc = e2+w1->ii;
+	}
+      } else {
+	e3 = e2; e2 = e1;
+	e1 = last[j+2].C;
+	sc = max(e2+w2->iii, max(e1+w1->ii,e3+w3->iv)) ;
+      }
+      if (sc < (cd= cur[j].D)) {
+	sc = cd; 
+	cur[j-3].D = cd-gext;
+      } else cur[j-3].D =max(cd, sc-gop)-gext;
+      if (sc < (ci= last[j].I)) {
+	sc = ci;
+	cur[j].I = ci - gext;
+      } else cur[j].I = max(sc-gop,ci)-gext;
+      cur[j].C = sc;
+      w3 = w2; w2 = w1;
+    }
+    swap((void *)&last, (void *)&cur);
+  }
+  for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
+  if (*row1 != last) swap((void *)row1, (void *)row2);
+}
+
+static void
+init_row2(int *row, int ld) {
+  int i;
+  for (i = 0; i < ld; i++) row[i] = 0;
+}
+
+static void init_ROW(st_ptr row, int ld) {
+  int i;
+  for (i = 0; i < ld; i++) row[i].I = row[i].D = row[i].C = 0;
+}
+
+static match_ptr
+combine(match_ptr x1, match_ptr x2, int st) {
+  match_ptr x;
+
+  if (x1 == NULL) return x2;
+  for (x = x1; x->next; x = x->next);
+  x->next = x2;
+  if (st) {
+    for (x = x2; x; x = x->next) {
+      x->j++;
+      if (x->l == 3 || x->l == 4) break;
+    }
+    x->l--;
+  }
+  return x1;
+}
+
+/*
+   global use the two upwards and downwards score only linear
+   space global alignment subroutine to recursively build the
+   alignment.
+*/
+
+match_ptr
+global(int x, int y, int ex, int ey,
+       int **wgts, int gop, int gext,
+       unsigned char *dnap, unsigned char *pro, int N1, int N2,
+       struct f_struct *f_str)
+{
+  int m;
+  int m1, m2;
+  match_ptr x1, x2, mm1, mm2;
+
+  /*printf("%d %d %d %d %d %d\n", x,y, ex, ey, N1, N2);*/
+  /*
+    if the space required is limited, we can do a quadratic space
+    algorithm to find the alignment.
+  */
+
+  if (ex <= x) {
+    mm1  = NULL;
+    for (m = y+3; m <= ey; m+=3) {
+      x1 = (match_ptr) ckalloc(sizeof(match_node));
+      x1->l = 5; x1->next = mm1; 
+      if (mm1== NULL) mm2 = x1;
+      mm1 = x1;
+    }
+    if (ex == x) {
+      if ((ey-y) % 3 != 0) {
+	x1  = (match_ptr) ckalloc(sizeof(match_node));
+	x1->l = ((ey-y) % 3) +1; x1->next = NULL;
+	if (mm1) mm2->next = x1; else mm1 = x1;
+      } else mm2->l = 4;
+    }
+    return mm1;
+  }
+  if (ey <= y) {
+    mm1  = NULL;
+    for (m = x; m <= ex; m++) {
+      x1 = (match_ptr) ckalloc(sizeof(match_node));
+      x1->l = 0; x1->next = mm1; mm1 = x1;
+    }
+    return mm1;
+  }
+  if (ex -x < SGW1 && ey-y < SGW2) 
+    return small_global(x,y,ex,ey,wgts, gop, gext,  dnap, pro, N1, N2,f_str);
+  m = (x+ex)/2;
+  /*     
+	 Do the score only global alignment from row x to row m, m is
+	 the middle row of x and ex. Store the information of row m in
+	 upC, upD, and upI.
+  */
+  global_up(&f_str->up, &f_str->tp,  x, y, m, ey,
+	    wgts, gop, gext,
+	    dnap, pro, N1, f_str);
+  /* 
+     Do the score only global alignment downwards from row ex
+     to row m+1, store information of row m+1 in downC downI and downD
+  */
+  global_down(&f_str->down, &f_str->tp, m+1, y, ex, ey,
+	      wgts, gop, gext,
+	      dnap, pro, N2, f_str);
+
+  /*
+    Use this information for row m and m+1 to find the crossing
+    point of the best alignment with the middle row. The crossing
+    point is given by m1 and m2. Then we recursively call global
+    itself to compute alignments in two smaller regions found by
+    the crossing point and combine the two alignments to form a
+    whole alignment. Return that alignment.
+  */
+  if (find_best(f_str->up, f_str->down, &m1, &m2, ey-y+1, y, gop)) {
+    x1 = global(x, y, m, m1, wgts, gop, gext, dnap, pro, N1, 0, f_str);
+    x2 = global(m+1, m2, ex, ey, wgts, gop, gext, dnap, pro, 0, N2, f_str);
+    if (m1 == m2) x1 = combine(x1,x2,1);
+    else x1 = combine(x1, x2,0);
+  } else {
+    x1 = global(x, y, m-1, m1, wgts, gop, gext, dnap, pro, N1, 1, f_str);
+    x2 = global(m+2, m2, ex, ey, wgts, gop, gext, dnap, pro, 1, N2, f_str);
+    mm1 = (match_ptr) ckalloc(sizeof(match_node));
+    mm1->i = m; mm1->l = 0; mm1->j = m1;
+    mm2 = (match_ptr) ckalloc(sizeof(match_node));
+    mm2->i = m+1; mm2->l = 0; mm2->j = m1;
+    mm1->next = mm2; mm2->next = x2;
+    x1 = combine(x1, mm1, 0);
+  }
+  return x1;
+}
+
+static int
+find_best(st_ptr up, st_ptr down, int *m1, int *m2, int ld, int y, int gop) {
+
+  int i, best = -1000, j = 0, s1, s2, s3, s4, st;
+
+  for (i = 1; i < ld; i++) {
+    s2 = up[i].C + down[i].C;
+    s4 = up[i].I + down[i].I + gop;
+    if (best < s2) {
+      best = s2; j = i; st = 1;
+    }
+    if (best < s4) {
+      best = s4; j = i; st = 0;
+    }
+  }
+  *m1 = j-1+y;
+  *m2 = j+y;
+  /*printf("score=%d\n", best);*/
+  return st;
+} 
+
+/*
+   An alignment is represented as a linked list whose element
+   is of type match_node. Each element represent an edge in the
+   path of the alignment graph. The fields of match_node are
+   l ---  gives the type of the edge.
+   i, j --- give the end position.
+*/
+
+static match_ptr
+small_global(int x, int y, int ex, int ey, 
+	     int **wgts, int gop, int gext,
+	     unsigned char *dnap, unsigned char *pro,
+	     int N1, int N2, struct f_struct *f_str) {
+
+  /* int C[SGW1+1][SGW2+1], st[SGW1+1][SGW2+1], D[SGW2+7], I[SGW2+1]; */
+  int i, j, e, sc, score, del, k, t,  ci, cd;
+  int *cI, *cD, *cC, *lC, *cst, e2, e3, e4;
+  match_ptr mp, first;
+  struct wgt *wt, *ww;
+
+  /*printf("small_global %d %d %d %d\n", x, y, ex, ey);*/
+  sc = -gop-gext; f_str->smgl_s.C[0][0] = 0; 
+
+  cI = f_str->smgl_s.I;
+  if (N1) cI[0] = -gext; else cI[0] = sc;
+
+  for (j = 1; j <= ey-y+1; j++) {
+    if (j % 3== 0) {
+      f_str->smgl_s.C[0][j] = sc;
+      sc -= gext;
+      cI[j] = sc-gop;
+    }
+    else {
+      cI[j] = f_str->smgl_s.C[0][j] = -10000;
+    }
+    f_str->smgl_s.st[0][j] = 5;
+  }
+
+  lC = &f_str->smgl_s.C[0][0];
+  cD = f_str->smgl_s.D; cD[0] = cD[1] = cD[2] = -10000;
+  for (i = 1; i <= ex-x+1; i++) {
+    cC = &f_str->smgl_s.C[i][0];	
+    wt = f_str->weight1[pro[i+x-1]]; cst = &f_str->smgl_s.st[i][0];
+    for (j = 0; j <=ey-y+1; j++) {
+      ci = cI[j];
+      cd= cD[j];
+      t = j+y;
+      ww = &wt[(unsigned char) dnap[t-3]];
+      if (j >= 4) {
+	sc = lC[j-3]+ww->iii; e2 = lC[j-2]+ww->ii;  
+	e4 = lC[j-4]+ww->iv; del = 3;
+	if (e2 > sc) { sc = e2; del = 2;}
+	if (e4 >= sc) { sc = e4; del = 4;}
+      } else {
+	if (j == 3) {
+	  sc = lC[0]+ww->iii; del =3;
+	} else if (j == 2) {
+	  sc = lC[0]+ww->ii; del = 2;
+	} else {sc = -10000; del = 0;}
+      }
+      if (sc < ci) {
+	sc = ci; del = 0; 
+      }
+      if (sc <= cd) {
+	sc = cd;
+	del = 5;
+      }
+      cC[j] = sc;
+      sc -= gop;
+      if (sc <= cd) {
+	del += 10;
+	cD[j+3] = cd - gext;
+      } else cD[j+3] = sc -gext;
+      if (sc < ci) {
+	del += 20;
+	cI[j] = ci-gext;
+      } else cI[j] = sc-gext;
+      *(cst++) = del;
+    }
+    lC = cC;
+  }
+  /*printf("small global score =%d\n", f_str->smgl_s.C[ex-x+1][ey-y+1]);*/
+  if (N2 && cC[ey-y+1] <  ci+gop) f_str->smgl_s.st[ex-x+1][ey-y+1] =0;
+  first = NULL; e = 1;
+  for (i = ex+1, j = ey+1; i > x || j > y; i--) {
+    mp = (match_ptr) ckalloc(sizeof(match_node));
+    mp->i = i-1;
+    k  = (t=f_str->smgl_s.st[i-x][j-y])%10;
+    mp->j = j-1;
+    if (e == 5 && (t/10)%2 == 1) k = 5;
+    if (e == 0 && (t/20)== 1) k = 0;
+    if (k == 5) { j -= 3; i++; e=5;}
+    else {j -= k;if (k==0) e= 0; else e = 1;}
+    mp->l = k;
+    mp->next = first;
+    first = mp;
+  }
+
+  /*	for (i = 0; i <= ex-x; i++) {
+	for (j = 0; j <= ey-y; j++) 
+	printf("%d ", C[i][j]);
+	printf("\n");
+	}
+  */
+  return first;	
+}
+
+#define XTERNAL
+#include "upam.h"
+
+void
+display_alig(int *a, unsigned char *dna, unsigned char *pro,
+	     int length, int ld, struct f_struct *f_str)
+{
+  int len = 0, i, j, x, y, lines, k, iaa;
+  static char line1[100], line2[100], line3[100],
+    tmp[10] = "         ", *st;
+  char *dna1, c1, c2, c3;
+
+  line1[0] = line2[0] = line3[0] = '\0'; x= a[0]; y = a[1]-3;
+
+  printf("\n%5d\n%5d", y+3, x);
+  for (len = 0, j = 2, lines = 0; j < length; j++) {
+    i = a[j];
+    line3[len] = ' ';
+    switch (i) {
+    case 3: 
+      y += 3;
+      line2[len] = NCBIstdaa[iaa=pro[x++]];
+      line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c5;
+      if (line1[len] != f_str->weight_c[iaa][(unsigned char) dna[y]].c3)
+	line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
+      break;
+    case 2:
+      y += 2;
+      line1[len] = '\\';
+      line2[len++] = ' ';
+      line2[len] = NCBIstdaa[iaa=pro[x++]];
+      line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c2;
+      line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
+      break;
+    case 4:
+      y += 4;
+      line1[len] = '/';
+      line2[len++] = ' ';
+      line2[len] = NCBIstdaa[iaa=pro[x++]];
+      line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c4;
+      line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
+      break;
+    case 5:
+      y += 3;
+      line1[len] = f_str->weight_c[0][(unsigned char) dna[y]].c3;
+      line2[len] = '-';
+      break;
+    case 0:
+      line1[len] = '-';
+      line2[len] = NCBIstdaa[pro[x++]];
+      break;
+    }
+    len++;
+    line1[len] = line2[len]  = line3[len]  = '\0'; 
+    if (len >= WIDTH) {
+      for (k = 10; k <= WIDTH; k+=10) 
+	printf("    .    :");
+      if (k-5 < WIDTH) printf("    .");
+      c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
+      line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
+      printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
+      line1[WIDTH] = c1; line2[WIDTH] = c2;
+      strncpy(line1, &line1[WIDTH], sizeof(line1)-1);
+      strncpy(line2, &line2[WIDTH], sizeof(line2)-1);
+      strncpy(line3, &line3[WIDTH], sizeof(line3)-1);
+      len = len - WIDTH;
+      printf("\n%5d\n%5d", y+3, x);
+    }
+  }
+  for (k = 10; k < len; k+=10) 
+    printf("    .    :");
+  if (k-5 < len) printf("    .");
+  printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
+}
+
+
+/* alignment store the operation that align the protein and dna sequence.
+   The code of the number in the array is as follows:
+   0:     delete of an amino acid.
+   2:     frame shift, 2 nucleotides match with an amino acid
+   3:     match an  amino acid with a codon
+   4:     the other type of frame shift
+   5:     delete of a codon
+   
+
+   Also the first two element of the array stores the starting point 
+   in the protein and dna sequences in the local alignment.
+
+   Display looks like where WIDTH is assumed to be divisible by 10.
+
+    0    .    :    .    :    .    :    .    :    .    :    .    :
+     AACE/N\PLK\G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ
+          I S   G S  V F   N R Q L A     G S  V F   N R Q L A    
+     AACE P P-- G HK Y TWA A C E P P---- G HK Y TWA A C E P P----
+
+   60    .    :    .    :    .    :    .    :    .    :    .    :
+     /G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LW
+      G S  V F   N R Q L A     G S  V F   N R Q L A     G S  V F 
+      G HK Y TWA A C E P P---- G HK Y TWA A C E P P---- G HK Y TW
+
+For frame shift, the middle row show the letter in the original sequence,
+and the letter in the top row is the amino acid that is chose by the 
+alignment (translated codon chosen from 4 nucleotides, or 2+1).
+*/
+
+/* fatal - print message and die */
+void
+fatal(msg)
+     char *msg;
+{
+  fprintf(stderr, "%s\n", msg);
+  exit(1);
+}
+
+/* 10-Feb-2010 - fz_walign modified to ensure that the final alignment
+   overlaps the initial lz_band() region.  In earlier versions, the
+   final alignment (using pam2p[0]) might have been outside the band
+   region */
+
+void
+fz_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, int max_res,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   struct a_res_str *a_res,
+	   int score_thresh)
+{
+  int score;
+  int i, last_n1, itemp, n10;
+  int hoff, nt_min, nt_max, n_nt, n_aa, w_fact;
+  int l_min, l_max, window;
+  unsigned char *fs, *fd;
+  /*
+  unsigned char *aa1_min_s, aa1_max_s;
+  */
+  unsigned char *local_aa1;
+  int optflag_s;
+  int itx;
+  unsigned char *aa1x;
+  struct score_count_s s_info = {0,0,0,0};;
+  
+#ifndef TFAST
+  do_fastz (f_str->aa0x, n0, aa1, n1, f_str->aa0v, ppst, f_str, &a_res->rst, &hoff, 1, &s_info);
+#else
+  /* make translated sequence */
+  last_n1 = 0;
+  aa1x = f_str->aa1x;
+  for (itx= frame*3; itx< frame*3+3; itx++) {
+    n10  = saatran(aa1,&aa1x[last_n1],n1,itx);
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+
+  /* do_fastz (lz_band) also needs a pre-computed number series */
+  if (frame == 0) {
+    pre_com(aa1, n1, f_str->aa1v);
+  }
+  else {
+    pre_com_r(aa1, n1, f_str->aa1v);
+  }
+
+  do_fastz (aa0, n0, f_str->aa1x, n10, f_str->aa1v, ppst, f_str, &a_res->rst, &hoff, 1, &s_info);
+#endif
+
+  if (a_res->rst.score[ppst->score_ix] < score_thresh) {
+    a_res->sw_score = 0;
+    a_res->n1 = n1;
+    return;
+  }
+
+#ifndef TFAST
+  window = min(n1, ppst->param_u.fa.optwid);
+  l_min = max(0, -window - hoff);
+  l_max = min(n1, n0-hoff+window);
+  local_aa1 = (unsigned char *)aa1;
+  if (l_min > 0 || l_max < n1 -1 ) {
+    local_aa1 = (unsigned char *)calloc(l_max - l_min + 2,sizeof(unsigned char));
+    local_aa1++;
+    memcpy(local_aa1,aa1+l_min, l_max - l_min);
+  }
+
+  /*
+  if (l_min > 0) {
+    aa1_min_s = aa1[l_min-1];
+    aa1[l_min-1] = '\0';
+  }
+  if (l_max < n1 - 1) {
+    aa1_max_s = aa1[l_max];
+    aa1[l_max] = '\0';
+  }
+  */
+#else
+  window = min(n0, ppst->param_u.fa.optwid);
+  if (frame==0) {
+    l_min = max(0,(hoff-window)*3);
+    l_max = min((hoff+window+n0)*3,n1);
+
+    local_aa1 = (unsigned char *)aa1;
+    if (l_min > 0 || l_max < n1 -1 ) {
+      local_aa1 = (unsigned char *)calloc(l_max - l_min + 2,sizeof(unsigned char));
+      local_aa1++;
+      memcpy(local_aa1,aa1+l_min, l_max - l_min);
+    }
+
+    /*
+    if (l_min > 0) {
+      aa1_min_s = aa1[l_min-1];
+      aa1[l_min-1] = '\0';
+    }
+    if (l_max < n1-1) {
+      aa1_max_s = aa1[l_max];
+      aa1[l_max] = '\0';
+    }
+    */
+    /* re-do precomputed codon number series for limited region */
+    pre_com(local_aa1, l_max - l_min, f_str->aa1v);
+  }
+  else {
+    /* things are more complicated here because the mapping of hoff is
+       with respect to the reversed aa1 */
+
+    l_max = n1 - max(0,(hoff-window)*3);
+    l_min = n1 - min((hoff+window+n0)*3,n1);
+
+    local_aa1 = (unsigned char *)aa1;
+    if (l_min > 0 || l_max < n1 -1 ) {
+      local_aa1 = (unsigned char *)calloc(l_max - l_min + 2,sizeof(unsigned char));
+      local_aa1++;
+      memcpy(local_aa1,aa1+l_min, l_max - l_min);
+    }
+
+    /*
+    if (l_min > 0) {
+      aa1_min_s = aa1[l_min-1];
+      aa1[l_min-1] = '\0';
+    }
+    if (l_max < n1-1) {
+      aa1_max_s = aa1[l_max];
+      aa1[l_max] = '\0';
+    }
+    */
+
+    pre_com_r(local_aa1, l_max - l_min, f_str->aa1v);
+  }
+#endif
+
+  a_res->sw_score = 
+    pro_dna(
+#ifndef TFAST
+	    aa1+l_min, l_max - l_min,
+	    f_str->aa0v, n0-2,
+#else
+	    aa0, n0,
+	    f_str->aa1v, l_max - l_min-2,
+#endif
+	    ppst->pam2[0],
+	    -ppst->gdelval, -ppst->ggapval, -ppst->gshift,
+	    f_str, f_str->max_res, a_res);
+
+  if (l_min > 0 || l_max < n1 - 1) { free(--local_aa1); }
+  /*
+  if (l_min > 0) {
+    aa1[l_min-1] = aa1_min_s;
+  }
+  if (l_max < n1 - 1) {
+    aa1[l_max] = aa1_max_s;
+  }
+  */
+#ifndef TFAST
+  a_res->min0 += l_min;
+  a_res->max0 += l_min;
+#else
+  if (frame==1) {
+    a_res->min1 += n1 - l_max;
+    a_res->max1 += n1 - l_max;
+  }
+  else {
+    a_res->min1 += l_min;
+    a_res->max1 += l_min;
+  }    
+#endif
+
+  /* display_alig(f_str->res,f_str->aa0v+2,aa1,*nres,n0-2,f_str); */
+}
+
+/*
+  fz_malign is a recursive interface to fz_walign() that is called
+  from do_walign(). fz_malign() first does an alignment, then checks
+  to see if the score is greater than the threshold. If so, it tries
+  doing a left and right alignment.
+
+  In this implementation, the DNA sequence is preserved as DNA for
+  TFAST, so that it can be sub-setted and translated correctly.  Thus,
+  the translation required for f_str->aa1x and f_str->aa1v is done at
+  each recursive level (in fz_walign).
+ */
+
+struct a_res_str *
+fz_malign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, 
+	   int score_thresh, int max_res,
+	   struct pstruct *ppst,
+	   struct f_struct *f_str,
+	   struct a_res_str *cur_ares,
+	   int first_align)
+{
+  struct a_res_str *tmpl_ares, *tmpr_ares, *this_ares;
+  struct a_res_str *mtmpl_ares, *mtmpr_ares, *mt_next;
+  int sq_start, sq_end, sq_save;
+  int hoff, score_ix;
+  int min_alen;
+  struct rstruct rst;
+  unsigned char *local_aa1;
+  /*   char save_res; */
+  int iphase, i;
+  unsigned char *fd;
+  int max_sub_score = -1;
+
+  score_ix = ppst->score_ix;
+
+#ifdef TFAST  
+  min_alen = min(n0,MIN_LOCAL_LEN)*3;	/* n0 in aa, min_alen in nt */
+#else 
+  min_alen = min(n0/3,MIN_LOCAL_LEN);	/* no in nt, min_alen in aa */
+#endif
+
+  /* now we need alignment storage - get it */
+  if ((cur_ares->res = (int *)calloc((size_t)max_res,sizeof(int)))==NULL) {
+    fprintf(stderr," *** cannot allocate alignment results array %d\n",max_res);
+    exit(1);
+  }
+
+  cur_ares->next = NULL;
+
+  fz_walign(aa0, n0, aa1, n1, frame, max_res, ppst, f_str, cur_ares, (first_align ? 1 : score_thresh));
+
+  /* in cur_ares, min0,max0 are always protein, min1,max1 are always
+     DNA, but n0 could be protein or DNA, depending on
+     FASTY/TFASTY */
+
+  if (!ppst->do_rep || cur_ares->rst.score[ppst->score_ix] < score_thresh) { 
+    return cur_ares;
+  }
+
+  /* have a score >= threshold - try left and right */
+
+  /* in code below, cur_ares->min0/max0 always refers to aa
+     cur_ares->min1/max1 always refers to nt
+
+     however, things are more complex because if frame==1, then
+     offsets are from the end (n1), not the beginning.  There is no
+     frame==1 for fasty, only for TFASTY
+  */
+  cur_ares->v_start = sq_start = 0;
+#ifdef TFAST
+  if (frame == 0) {sq_end = cur_ares->min1-1;}   /* aa1[sq_start --> sq_end] */
+  else {sq_end = n1 - cur_ares->max1;}
+  sq_save = sq_end;
+#else
+  sq_save = sq_end = cur_ares->min0;
+#endif
+  cur_ares->v_len = sq_end - sq_start;
+
+  if (cur_ares->v_len >= min_alen) { /* try the left  */
+      /* allocate a_res */	
+      tmpl_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
+      local_aa1 = (unsigned char *)calloc(cur_ares->v_len+2,sizeof(unsigned char));
+      local_aa1++;
+      memcpy(local_aa1, aa1, cur_ares->v_len);
+
+      /* 
+      save_res = aa1[sq_save];
+      aa1[sq_save] = '\0';
+      */
+      tmpl_ares = fz_malign(aa0, n0, local_aa1, cur_ares->v_len,
+			    frame, score_thresh, max_res, 
+			    ppst, f_str, tmpl_ares,0);
+
+      free(--local_aa1);
+      /*       aa1[sq_save] = save_res; */
+
+      if (tmpl_ares->rst.score[ppst->score_ix] > score_thresh) {
+	max_sub_score = tmpl_ares->rst.score[ppst->score_ix];
+#ifdef TFAST
+	if (frame == 1) {
+	  for (this_ares = tmpl_ares; this_ares; this_ares = this_ares->next) {
+	    this_ares->v_start += n1 - sq_end;
+	    this_ares->min1 += n1 - sq_end;
+	    this_ares->max1 += n1 - sq_end;
+	  }
+	}
+#endif
+      }
+      else {
+	if (tmpl_ares->res) free(tmpl_ares->res);
+	free(tmpl_ares);
+	tmpl_ares=NULL;
+      }
+    }
+    else {tmpl_ares = NULL;}
+
+  /* now the right end */
+  /* for fasty -- max positions refer to the aa,codon, not the next
+     residue, so they must be incremented */
+
+    sq_end = n1;
+#if TFAST
+    if (frame == 0) {sq_start = cur_ares->max1+1;}
+    else {sq_start = n1 - cur_ares->min1;}
+#else
+    sq_start = cur_ares->max0+1;
+#endif
+    sq_save = sq_start-1;
+    cur_ares->v_len = sq_end - sq_start;
+
+  if (cur_ares->v_len >= min_alen) { /* try the right  */
+      /* allocate a_res */	
+      tmpr_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
+
+      /* find boundaries */
+      local_aa1 = (unsigned char *)calloc(cur_ares->v_len+2,sizeof(unsigned char));
+      local_aa1++;
+      memcpy(local_aa1,aa1+sq_start,cur_ares->v_len);
+      /*
+      save_res = aa1[sq_save];
+      aa1[sq_save] = '\0';
+      */
+
+      tmpr_ares = fz_malign(aa0, n0, 
+			    local_aa1, cur_ares->v_len,
+			    frame,
+			    score_thresh, max_res,
+			    ppst, f_str, tmpr_ares,0);
+      free(--local_aa1);
+      /* 
+      aa1[sq_save] = save_res;
+      */
+
+      if (tmpr_ares->rst.score[ppst->score_ix] > score_thresh) {
+	/* adjust the left boundary */
+	for (this_ares = tmpr_ares; this_ares; this_ares = this_ares->next) {
+#ifndef TFAST
+	  this_ares->min0 += sq_start;
+	  this_ares->max0 += sq_start;
+#else
+	  if (frame == 0) {
+	    this_ares->v_start += sq_start;
+	    this_ares->min1 += sq_start;
+	    this_ares->max1 += sq_start;
+	  }
+#endif
+	}
+	if (tmpr_ares->rst.score[ppst->score_ix] > max_sub_score) {
+	  max_sub_score = tmpr_ares->rst.score[ppst->score_ix];
+	}
+      }
+      else {
+	if (tmpr_ares->res) free(tmpr_ares->res);
+	free(tmpr_ares);
+	tmpr_ares=NULL;
+      }
+    }
+    else {tmpr_ares = NULL;}
+
+    if (max_sub_score < score_thresh) return cur_ares;
+
+    cur_ares = merge_ares_chains(cur_ares, tmpl_ares, score_ix, "left");
+    cur_ares = merge_ares_chains(cur_ares, tmpr_ares, score_ix, "right");
+
+    return cur_ares;
+}
+
+/* do_walign() can be called with aa0,n0 as nt (FASTY) or 
+   aa0,n0 as aa (TFASTY).  if aa0 is nt, then f_str->aa0x,y have the
+   translations already.  if aa0 is aa, then f_str->aa1x,y must be
+   generated.
+*/
+
+struct a_res_str *
+do_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, int repeat_thresh,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   int *have_ares)
+{
+  struct a_res_str *a_res, *tmp_a_res;
+  int a_res_index;
+  int hoff, use_E_thresholds_s, optflag_s, optcut_s, optwid_s, score;
+  int last_n1, itx, itt, n10, iphase;
+  unsigned char *fs, *fd;
+  struct rstruct rst;
+#ifdef DEBUG
+  unsigned long adler32_crc;
+#endif
+
+  *have_ares = 0x3;	/* set 0x2 bit to indicate local copy */
+
+  if ((a_res = (struct a_res_str *)calloc(1, sizeof(struct a_res_str)))==NULL) {
+    fprintf(stderr," [do_walign] Cannot allocate a_res");
+    return NULL;
+  }
+
+#ifdef DEBUG
+  adler32_crc = adler32(1L,aa1,n1);
+#endif
+
+  f_str->frame = frame;	/* need frame for later pre_cons() in calcons() */
+
+  use_E_thresholds_s = ppst->param_u.fa.use_E_thresholds;
+  optflag_s = ppst->param_u.fa.optflag;
+  optcut_s = ppst->param_u.fa.optcut;
+  optwid_s = ppst->param_u.fa.optwid;
+  ppst->param_u.fa.use_E_thresholds = 0;
+  ppst->param_u.fa.optflag = 1;
+  if (!ppst->param_u.fa.optwid_set) {
+    ppst->param_u.fa.optwid *= 2;
+  }
+
+  a_res = fz_malign(aa0, n0, aa1, n1, frame,
+		    repeat_thresh, f_str->max_res,
+		    ppst, f_str, a_res, 1);
+
+#ifdef DEBUG
+  if (adler32(1L,aa1,n1) != adler32_crc) {
+    fprintf(stderr,"*** error [%s:%d] adler32_crc mismatch n1: %d\n",__FILE__, __LINE__, n1);
+  }
+#endif
+
+  a_res_index = 0;
+  for (tmp_a_res=a_res; tmp_a_res; tmp_a_res = tmp_a_res->next) {
+    tmp_a_res->index = a_res_index++;
+  }
+
+  ppst->param_u.fa.use_E_thresholds = use_E_thresholds_s;
+  ppst->param_u.fa.optflag = optflag_s;
+  ppst->param_u.fa.optwid = optwid_s;
+  return a_res;
+}
+
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+
+#ifdef TFAST
+  int i, last_n1, itemp, n10;
+  unsigned char *fs, *fd;
+  int itx;
+
+  /* make a precomputed codon number series */
+  if (frame==0) {
+    pre_com(aa1, n1, f_str->aa1v);
+  }
+  else { /* must do things backwards */
+    pre_com_r(aa1, n1, f_str->aa1v);
+  }
+#endif
+}
+
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void 
+aln_func_vals(int frame, struct a_struct *aln) {
+
+#ifndef TFAST
+  aln->llrev = 0;
+  aln->llfact = 1;
+  aln->llmult = 1;
+  aln->qlfact = 3;
+  aln->frame = frame;
+  if (frame > 0) aln->qlrev = 1;
+  else aln->qlrev = 0;
+  aln->llrev = 0;
+#else	/* TFASTX */
+  aln->qlfact = 1;
+  aln->qlrev = 0;
+  aln->llfact = 3;
+  aln->llmult = 1;
+  aln->frame = frame;
+  if (frame > 0) aln->llrev = 1;
+  else aln->llrev = 0;
+  aln->qlrev=0;
+#endif	/* TFASTX */
+}
+
+#include "structs.h"
+#include "a_mark.h"
+
+extern int align_type(int score, char sp0, char sp1, int nt_align, struct a_struct *aln, int pam_x_id_sim);
+
+extern int
+next_annot_match(int *itmp, int *pam2aa0v, long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+	   int i_annot, int n_annot, struct annot_entry **annot_arr, char **ann_comment,
+	   void *annot_stack, int *have_push_features, int *v_delta, 
+	   struct annot_entry **region_p, struct annot_entry *tmp_region_p, int init_score);
+
+extern void
+comment_var(long i0, char sp0, long i1, char sp1, char o_sp1, char sim_char,
+	    const char *ann_comment, struct dyn_string_str *annot_var_dyn, int target, int d_type);
+
+void
+display_push_features(void *annot_stack, struct dyn_string_str *annot_var_dyn,
+		      long i0, char sp0, long i1, char sp1, char sym, 
+		      struct annot_entry **region0_p,
+		      struct annot_entry **region1_p,
+		      int score, double comp, int n0, int n1,
+		      void *pstat_void, int d_type);
+
+#define DP_FULL_FMT 1	/* Region: score: bits: id: ... */
+#define Q_TARGET 0
+#define L_TARGET 1
+
+int seq_pos(int pos, int rev,int off);
+
+int
+calc_cons_a(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    int *nc,
+	    struct a_struct *aln,
+	    struct a_res_str *a_res,
+	    struct pstruct *ppst,
+	    char *seqc0, char *seqc1, char *seqca, int *cumm_seq_score,
+	    const unsigned char *ann_arr,
+	    const unsigned char *aa0a, const struct annot_str *annot0_p, char *seqc0a,
+	    const unsigned char *aa1a, const struct annot_str *annot1_p, char *seqc1a,
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+	    struct f_struct *f_str,
+	    void *pstat_void
+	    )
+{
+  int i0, i1;
+  int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
+  char *sp0, *sp0a, *sp1, *sp1a, *spa, t_spa;
+  int *i_spa;
+  const unsigned char *sq;
+  unsigned char aap;
+
+  const unsigned char *ap0, *ap1;
+  const unsigned char *ap1a;	/* ap1 always points to protein, and
+				   only protein has annotations */
+  int *rp, *rpmax;
+  int have_ann = 0;
+
+  /* variables for variant changes */
+  char tmp_str[MAX_LSTR];
+  int *annot_stack, annot_stack_n, annot_top=0;
+  char *sim_sym = aln_map_sym[MX_ACC];
+  struct annot_entry **s_annot1_arr_p, *region1_p, pre_annot1;
+  struct annot_entry **s_annot0_arr_p, *region0_p, pre_annot0;
+  struct annot_entry *this_annot_p;
+  int  i1_annot, v_delta, v_tmp;
+  int have_push_features, prev_match;
+  long i0_offset, i1_offset;
+
+  char *ann_comment;
+
+  *score_delta = 0;
+
+  if (ppst->ext_sq_set) {sq = ppst->sqx;}
+  else {sq = ppst->sq;}
+
+  /* res[0] has start of protein sequence */
+  /* res[1] has start of translated DNA sequence */
+
+#ifndef TFAST
+  aln->amin1 = aln->smin1 = a_res->min0;	/* start in protein sequence */
+  aln->amin0 = aln->smin0 = a_res->min1;	/* start in DNA/codon sequence */
+
+  i0_offset = aln->q_offset;
+  i1_offset = aln->l_offset;
+
+  ap0 = f_str->aa0v;		/* computed codons -> ap0*/
+  ap1 = aa1;			/* protein sequence -> ap1 */
+
+  sp0 = seqc0;	/* translated DNA */
+  sp1 = seqc1;	/* protein */
+
+  have_ann = (seqc0a != NULL && aa1a != NULL);
+  ap1a = aa1a;
+  sp1a = seqc1a;	/* protein library can have annotation */
+  sp0a = seqc0a;	/* sp0a is always ' ' - no translated
+			   annotation */
+#else	/* TFASTYZ */
+  if (aln->frame == 0) {
+    pre_com(aa1, n1, f_str->aa1v);
+  }
+  else {
+    pre_com_r(aa1, n1, f_str->aa1v);
+  }
+
+  aln->amin0 = aln->smin0 = a_res->min0;	/* start in protein sequence */
+  aln->amin1 = aln->smin1 = a_res->min1;	/* start in codon sequence */
+
+  i1_offset = aln->q_offset;
+  i0_offset = aln->l_offset;
+
+  ap1 = aa0;			/* protein sequence */
+  ap0 = f_str->aa1v;		/* computed codons -> ap0*/
+
+  sp0 = seqc1;	/* protein */
+  sp1 = seqc0;	/* translated DNA */
+
+  have_ann = (seqc0a != NULL && aa0a != NULL);
+  ap1a = aa0a;
+  sp1a = seqc0a;	/* protein query can have annotation */
+  sp0a = seqc1a;	/* sp0a is always ' ' - no translated
+			   annotation */
+#endif
+  spa = seqca;
+  i_spa = cumm_seq_score;
+
+  rp = a_res->res;			/* start of alignment info */
+  rpmax = &a_res->res[a_res->nres];	/* end of alignment info */
+
+  lenc = not_c = aln->nident = aln->nmismatch = aln->nsim = aln->npos = ngap_d = ngap_p = nfs = 0;
+  i0 = a_res->min1-3;	/* start of codon sequence */
+  i1 = a_res->min0;	/* start of protein sequence */
+
+  v_delta = 0;
+  i1_annot = 0;
+  region0_p = region1_p = NULL;
+  s_annot0_arr_p = s_annot1_arr_p = NULL;
+  annot_stack = NULL;
+  have_push_features = prev_match = 0;
+  if (have_ann) {
+    if (annot1_p && annot1_p->n_annot > 0)  annot_stack = init_stack(64,64);
+    if (annot1_p && annot1_p->n_annot > 0) {
+      s_annot1_arr_p = annot1_p->s_annot_arr_p;
+      while (i1_annot < annot1_p->n_annot && s_annot1_arr_p[i1_annot]->pos < i1) {
+	if (s_annot1_arr_p[i1_annot]->label == '[') {
+	  memcpy(&pre_annot1,s_annot1_arr_p[i1_annot], sizeof(struct annot_entry));
+	  pre_annot1.pos = aln->amin1 + i1_offset;
+	  pre_annot1.a_pos = aln->amin0 + i0_offset;
+	  region1_p = &pre_annot1;
+	  region1_p->score = region1_p->n_aln = region1_p->n_ident = 0;
+	}
+	else if (s_annot1_arr_p[i1_annot]->label == ']') {
+	  region1_p = NULL;
+	}
+	i1_annot++;
+      }
+    }
+  }
+
+  while (rp < rpmax ) {
+    switch (*rp++) {
+    case 3:		/* match */
+      i0 += 3;
+
+      *sp1 = sq[aap=ap1[i1]];
+      *sp0 = f_str->weight_c[aap][ap0[i0]].c5;
+      itmp = ppst->pam2[0][aap][pascii[*sp0]];
+
+
+      if (have_ann) {
+	*sp1a = ann_arr[ap1a[i1]];
+	*sp0a = ' ';
+	if (s_annot1_arr_p) {
+	  if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[*sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
+					i0_offset+seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq,
+					i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+					&ann_comment, annot_stack, &have_push_features, &v_delta,
+					&region1_p, &pre_annot1, 0);
+
+	    /* must be out of the loop to capture the last value */
+	    if (ppst->sq[ap1[i1]] != *sp1) {
+	      t_spa = align_type(itmp, *sp0, *sp1, 0, NULL, ppst->pam_x_id_sim);
+
+	      comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			  i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			  sq[ap1[i1]], sim_sym[t_spa], ann_comment,
+			  annot_var_dyn,1,1);
+	    }
+	  }
+	  prev_match = 1;
+	  if (region1_p) {region1_p->score += itmp;}
+	}
+	sp0a++; sp1a++;
+      }
+
+      *spa = align_type(itmp, *sp0, *sp1, 0, aln, ppst->pam_x_id_sim);
+
+      if (region1_p) {
+	region1_p->n_aln++;
+	if (*spa == M_IDENT) {region1_p->n_ident++;}
+      }
+
+      if (cumm_seq_score) *i_spa++ = itmp;
+
+      if (have_ann && have_push_features) {
+	display_push_features(annot_stack, annot_var_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			      sim_sym[*spa], &region0_p, &region1_p,
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
+	have_push_features = 0;
+      }
+
+      i1++;
+      sp0++; sp1++; spa++;
+      lenc++;
+      break;
+    case 2:		/* frame shift +2, then match */
+      nfs++;
+      i0 += 2;
+      *sp0++ = '/';
+      *sp1++ = '-';
+      *spa++ = M_DEL;
+      if (cumm_seq_score) *i_spa++ = ppst->gshift;
+
+      if (have_ann) {*sp0a++ = *sp1a++ = ' ';}
+      not_c++;
+
+      *sp1 = sq[aap=ap1[i1]];
+      *sp0 = f_str->weight_c[aap][ap0[i0]].c2;
+      itmp = ppst->pam2[0][pascii[*sp0]][aap];
+
+      if (have_ann) {
+	*sp1a = ann_arr[ap1a[i1]];
+	*sp0a = ' ';
+	if (s_annot1_arr_p) {
+	  if (i1 + i1_offset == s_annot1_arr_p[i1_annot]->pos) {
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[*sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
+					i0_offset+seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq,
+					i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+					&ann_comment, annot_stack, &have_push_features, &v_delta,
+					&region1_p, &pre_annot1, 0);
+
+	    /* must be out of the loop to capture the last value */
+	    if (ppst->sq[ap1[i1]] != *sp1) {
+	      t_spa = align_type(itmp, *sp0, *sp1, 0, NULL, ppst->pam_x_id_sim);
+
+	      comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			  i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			  sq[ap1[i1]], sim_sym[t_spa], ann_comment,
+			  annot_var_dyn,1,DP_FULL_FMT);
+	    }
+	  }
+	  if (region1_p) {
+	    region1_p->score += ppst->gshift;
+	    region1_p->score += itmp;
+	  }
+	  prev_match = 1;
+	}
+	sp0a++; sp1a++;
+      }
+
+      *spa = align_type(itmp, *sp0, *sp1, 0, aln, ppst->pam_x_id_sim);
+
+      if (region1_p) {
+	region1_p->n_aln++;
+	if (*spa == M_IDENT) {region1_p->n_ident++;}
+      }
+
+      if (have_ann && have_push_features) {
+	display_push_features(annot_stack, annot_var_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			      sim_sym[*spa], &region0_p, &region1_p,
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
+	have_push_features = 0;
+      }
+
+      if (cumm_seq_score) *i_spa++ = itmp;
+
+      i1++;
+      sp0++; sp1++; spa++;
+      lenc++;
+      break;
+    case 4:		/* frame shift, -1, then match */
+      nfs++;
+      i0 += 4;
+      if (have_ann) {
+	*sp1a++ = *sp0a++ = ' ';
+      }
+      *sp0++ = '\\';
+      *sp1++ = '-';
+      *spa++ = M_DEL;
+      if (cumm_seq_score) *i_spa++ = ppst->gshift;
+
+      not_c++;
+
+      *sp1 = sq[aap=ap1[i1]];
+      *sp0 = f_str->weight_c[aap][ap0[i0]].c4;
+      itmp = ppst->pam2[0][pascii[*sp0]][aap];
+
+      if (have_ann) {
+	*sp1a = ann_arr[ap1a[i1]];
+	*sp0a = ' ';
+	if (s_annot1_arr_p && i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
+	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[*sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
+				      i0_offset+seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq,
+				      i1_annot, annot1_p->n_annot, s_annot1_arr_p, &ann_comment,
+				      annot_stack, &have_push_features, &v_delta, &region1_p, &pre_annot1, 0);
+
+	  /* must be out of the loop to capture the last value */
+	  if (ppst->sq[ap1[i1]] != *sp1) {
+	    t_spa = align_type(itmp, *sp0, *sp1, 0, NULL, ppst->pam_x_id_sim);
+	    comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			sq[ap1[i1]], sim_sym[t_spa], ann_comment,
+			annot_var_dyn,1,DP_FULL_FMT);
+	  }
+	  prev_match = 1;
+	  if (region1_p) {region1_p->score += itmp;}
+	}
+	sp0a++; sp1a++;
+      }
+
+      *spa = align_type(itmp, *sp0, *sp1, 0, aln, ppst->pam_x_id_sim);
+      if (region1_p) {
+	region1_p->n_aln++;
+	if (*spa == M_IDENT) {region1_p->n_ident++;}
+      }
+
+      if (cumm_seq_score) *i_spa++ = itmp;
+
+      if (have_ann && have_push_features) {
+	display_push_features(annot_stack, annot_var_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			      sim_sym[*spa], &region0_p, &region1_p,
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
+	have_push_features = 0;
+      }
+
+      i1++;
+      sp0++; sp1++; spa++;
+      lenc++;
+      break;
+    case 5:		/* insertion in 0 */
+      if (have_ann) {
+	*sp1a++ = *sp0a++ = ' ';
+      }
+      i0 += 3;
+      *sp0++ = f_str->weight_c[0][ap0[i0]].c3;
+      *sp1++ = '-';
+      *spa++ = M_DEL;
+      lenc++;
+      ngap_p++;
+      if (cumm_seq_score) *i_spa++ = ppst->gdelval;
+      break;
+    case 0:		/* insertion in 1 */
+      *sp0++ = '-';
+      *sp1++ = sq[ap1[i1]];
+      *spa++ = M_DEL;
+      if (cumm_seq_score) {
+	if (prev_match) *i_spa = ppst->gdelval;
+	*i_spa++ += ppst->gdelval;
+      }
+
+      if (have_ann) {
+	*sp0a = ' ';
+	*sp1a = ann_arr[ap1a[i1]];
+
+	if (s_annot1_arr_p) {
+	  /* coordiates are much more complex for next_annot_match,
+	     and comment_var, because they may need to be reversed */
+
+	  if (i1 + i1_offset == s_annot1_arr_p[i1_annot]->pos) {
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], i1_offset+seq_pos(i1,aln->llrev,0),
+					i0_offset+seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq, 
+					i1_annot, annot1_p->n_annot, s_annot1_arr_p,
+					&ann_comment, annot_stack, &have_push_features, &v_delta,
+					&region1_p, &pre_annot1, 0);
+	  }
+
+	  if (region1_p) {
+	    if (prev_match) region1_p->score += ppst->gdelval;
+	    region1_p->score += ppst->ggapval;
+	    region1_p->n_aln++;
+	  }
+	  prev_match = 0;
+	}
+	sp0a++;  sp1a++;
+      }
+
+      if (have_ann && have_push_features) {
+	display_push_features(annot_stack, annot_var_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			      sim_sym[*spa], &region0_p, &region1_p,
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
+	have_push_features = 0;
+      }
+
+      i1++;
+      lenc++;
+      ngap_d++;
+      break;
+    }
+  }
+
+  if (have_ann) {
+    *sp0a = '\0';
+    if (s_annot1_arr_p) {
+      have_push_features = 0;
+      while (i1_annot < annot1_p->n_annot && s_annot1_arr_p[i1_annot]->pos < n1) {
+	if (s_annot1_arr_p[i1_annot]->label == '[') break;
+	if (s_annot1_arr_p[i1_annot]->label == ']') {
+	  push_stack(annot_stack, s_annot1_arr_p[i1_annot]);
+	  have_push_features = 1;
+	}
+	i1_annot++;
+      }
+      if (have_push_features) {
+	display_push_features(annot_stack, annot_var_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
+			      sim_sym[*spa],&region0_p, &region1_p,
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
+	have_push_features = 0;
+      }
+    }
+  }
+  *spa = '\0';
+
+#ifndef TFAST
+  aln->amax0 = i0+3;	/* end of codon sequence */
+  aln->amax1 = i1;	/* end of protein sequence */
+  aln->ngap_q = ngap_d;
+  aln->ngap_l = ngap_p;
+#else
+  aln->amax1 = i0+3;	/* end of codon sequence */
+  aln->amax0 = i1;	/* end of protein sequence */
+  aln->ngap_q = ngap_p;
+  aln->ngap_l = ngap_d;
+#endif
+  aln->calc_last_set = 1;
+  aln->nfs = nfs;
+  aln->amin0 = aln->smin0;
+  aln->amin1 = aln->smin1;
+
+  *score_delta = v_delta;
+
+  free_stack(annot_stack);
+
+  if (lenc < 0) lenc = 1;
+
+  *nc = lenc;
+/*	now we have the middle, get the right end */
+
+  return lenc+not_c;
+}
+
+void
+calc_astruct(struct a_struct *aln_p, struct a_res_str *a_res_p, struct f_struct *f_str) {
+
+  aln_p->calc_last_set = 0;
+
+#ifndef TFAST	/* FASTX */
+  aln_p->amin1 = a_res_p->min0;	/* prot */
+  aln_p->amin0 = a_res_p->min1;	/* DNA */
+  aln_p->amax1 = a_res_p->max0;	/* prot */
+  aln_p->amax0 = a_res_p->max1;	/* DNA */
+#else		/* TFASTX */
+  aln_p->amin0 = a_res_p->min0;	/* DNA */
+  aln_p->amin1 = a_res_p->min1;	/* prot */
+  aln_p->amax0 = a_res_p->max0;	/* DNA */
+  aln_p->amax1 = a_res_p->max1;	/* prot */
+#endif
+}
+
+/* build an array of match/ins/del - length strings */
+
+/* modified 10-June-2014 to distinguish matches from mismatches, op=1
+   (previously unused) indicates an aligned non-identity */
+
+/* op_codes are:  0 - aa insertion
+   		  1 - (now) aligned non-identity
+   		  2 - -1 frameshift
+   		  3 - aligned identity
+   		  4 - +1 frameshift
+   		  5 - codon insertion
+*/
+
+static struct update_code_str *
+init_update_data(show_code) {
+
+  struct update_code_str *update_data_p;
+
+  if ((update_data_p = (struct update_code_str *)calloc(1,sizeof(struct update_code_str)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - init_update_data(): cannot allocate update_code_str\n",
+	      __FILE__, __LINE__);
+    return NULL;
+  }
+
+  update_data_p->p_op_cnt = 0;
+  update_data_p->show_code = show_code;
+
+  if ((show_code & SHOW_CODE_MASK) == SHOW_CODE_CIGAR) {
+    update_data_p->op_map = cigar_code;
+    update_data_p->cigar_order = 1;
+  }
+  else {
+    update_data_p->op_map = ori_code;
+    update_data_p->cigar_order = 0;
+  }
+
+  if ((show_code & SHOW_CODE_EXT) == SHOW_CODE_EXT) {
+    update_data_p->show_ext = 1;
+  }
+  else {
+    update_data_p->show_ext = 0;
+  }
+
+  return update_data_p;
+}
+
+static void
+close_update_data(char *al_str, int al_str_max, 
+		  struct update_code_str *up_dp) {
+  char tmp_cnt[MAX_SSTR];
+
+  if (!up_dp) return;
+  sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx, up_dp->p_op_cnt);
+  strncat(al_str,tmp_cnt,al_str_max);
+
+  free(up_dp);
+}
+
+/* update_indel_code() has been modified to work more correctly with
+   ggsearch/glsearch, which, because alignments can start with either
+   insertions or deletions, can produce an initial code of "0=".  When
+   that happens, it is ignored and no code is added.
+
+   *al_str - alignment string [al_str_max] - not dynamic
+   op -- encoded operation, currently 0=match, 1-delete, 2-insert, 3-term-match, 4-mismatch
+   op_cnt -- length of run
+   show_code -- SHOW_CODE_CIGAR uses cigar_code, otherwise legacy
+*/
+
+/* update_indel_code() is called for insertions and deletions
+   update_match_code() is called for every match
+*/
+
+static void
+sprintf_code(char *tmp_str, struct update_code_str *up_dp, int op_idx, int op_cnt) {
+
+  if (op_cnt == 0) return;
+
+  if (up_dp->cigar_order) {
+    sprintf(tmp_str,"%d%c",op_cnt,up_dp->op_map[op_idx]);
+  }
+  else {
+    sprintf(tmp_str,"%c%d",up_dp->op_map[op_idx],op_cnt);
+  }
+}
+
+static void
+update_code(char *al_str, int al_str_max, 
+	    struct update_code_str *up_dp, int op, 
+	    int sim_code,  unsigned char sp0, unsigned char sp1)
+{
+  char tmp_cnt[MAX_SSTR];
+
+  /* there are two kinds of "op's", one time and accumulating */
+  /* op == 2, 4 are one-time: */
+
+  switch (op) {
+  case 2:
+  case 4:
+    sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+    strncat(al_str,tmp_cnt,al_str_max);
+    sprintf_code(tmp_cnt,up_dp, op, 1);
+    strncat(al_str,tmp_cnt,al_str_max);
+    up_dp->p_op_cnt = 0;
+    break;
+  case 0:
+  case 5:
+    if (op == up_dp->p_op_idx) {
+      up_dp->p_op_cnt++;
+    }
+    else {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+      strncat(al_str,tmp_cnt,al_str_max);
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    break;
+  case 1:
+  case 3:
+    if (sp0 != '*' && sp1 != '*') {	/* default case, not termination */
+      if (up_dp->show_ext) {
+	if (sim_code != M_IDENT) { op = 1;}
+      }
+    }
+    else {	/* have a termination codon, output for !SHOW_CODE_CIGAR */
+      if (!up_dp->cigar_order) {
+	if (sp0 == '*' || sp1 == '*') { op = 6;}
+      }
+      else if (up_dp->show_ext && (sp0 != sp1)) { op = 1;}
+    }
+
+    if (up_dp->p_op_cnt == 0) {
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    else if (op != up_dp->p_op_idx) {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+      strncat(al_str,tmp_cnt,al_str_max);
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    else {
+      up_dp->p_op_cnt++;
+    }
+    break;
+  }
+  return;
+}
+
+int calc_code(const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      struct a_struct *aln,
+	      struct a_res_str *a_res,
+	      struct pstruct *ppst,
+	      char *al_str, int al_str_n,
+	      const unsigned char *ann_arr,
+	      const unsigned char *aa0a,
+	      const struct annot_str *annot0_p,
+	      const unsigned char *aa1a, 
+	      const struct annot_str *annot1_p,
+	      struct dyn_string_str *annot_code_dyn,
+	      int *score_delta,
+	      struct f_struct *f_str,
+	      void *pstat_void,
+	      int display_code)
+{
+  int i0, i1;
+  int lenc, not_c, ngap_d, ngap_p, nfs;
+  char sp0, sp1;
+  struct update_code_str *update_data_p;
+  char op_char[10], ann_ch0, ann_ch1;
+  unsigned char aap;
+  const unsigned char *ap0, *ap1, *ap1a;
+  int *rp, *rpmax;
+  const unsigned char *sq;
+  int have_ann = 0;
+  char tmp_astr[MAX_STR];
+  int sim_code, t_spa;
+  int show_code, annot_fmt;
+  char *sim_sym= aln_map_sym[MX_ACC];
+  /* variables for variant changes */
+  void *annot_stack;
+  struct annot_entry **s_annot1_arr_p, *region1_p, pre_annot1;
+  struct annot_entry **s_annot0_arr_p, *region0_p, pre_annot0;
+  int  itmp, i1_annot, v_delta, v_tmp;
+  int have_push_features, prev_match;
+  long i0_offset, i1_offset;
+  
+  *score_delta = 0;
+
+  show_code = (display_code & SHOW_CODE_MASK + SHOW_CODE_EXT);
+  annot_fmt = 2;
+  if (display_code & SHOW_ANNOT_FULL) {
+    annot_fmt = 1;
+  }
+
+  if (ppst->ext_sq_set) {sq = ppst->sqx;}
+  else {sq = ppst->sq;}
+
+  /* don't fill in the ends */
+#ifndef TFAST
+  ap0 = f_str->aa0v;		/* computed codons -> ap0*/
+  ap1 = aa1;			/* protein sequence -> ap1 */
+  aln->smin1 = a_res->min0;	/* start in protein sequence */
+  aln->smin0 = a_res->min1;		/* start in DNA/codon sequence */
+
+  i0_offset = aln->q_offset;
+  i1_offset = aln->l_offset;
+
+  have_ann = (ann_arr[0] != '\0' && aa1a != NULL);
+  ap1a = aa1a;
+#else	/* TFASTYZ */
+  if (aln->frame == 0) { pre_com(aa1, n1, f_str->aa1v);}
+  else { pre_com_r(aa1, n1, f_str->aa1v);}
+
+  ap0 = f_str->aa1v;		/* computed codons -> ap0*/
+  ap1 = aa0;			/* protein sequence */
+  aln->smin0 = a_res->min0;	/* start in protein sequence */
+  aln->smin1 = a_res->min1;		/* start in codon sequence */
+
+  i1_offset = aln->q_offset;
+  i0_offset = aln->l_offset;
+
+  have_ann = (ann_arr[0] != '\0' && aa0a != NULL);
+  ap1a = aa0a;
+#endif
+
+  rp = a_res->res;			/* start of alignment info */
+  rpmax = &a_res->res[a_res->nres];		/* end of alignment info */
+
+/* now get the middle */
+
+  lenc = not_c = aln->nident = aln->nmismatch = aln->nsim = aln->npos = ngap_d = ngap_p = nfs = 0;
+  update_data_p = init_update_data(show_code);
+
+  i0 = a_res->min1-3;	/* start of codon sequence */
+  i1 = a_res->min0;	/* start of protein sequence */
+
+  v_delta = 0;
+  i1_annot = 0;
+  have_push_features = prev_match = 0;
+  region0_p = region1_p = NULL;
+  s_annot0_arr_p = s_annot1_arr_p = NULL;
+  annot_stack = NULL;
+  if (have_ann) {
+    if (annot1_p && annot1_p->n_annot > 0) annot_stack = init_stack(64,64);
+    if (annot1_p && annot1_p->n_annot > 0) {
+      s_annot1_arr_p = annot1_p->s_annot_arr_p;
+      while (i1_annot < annot1_p->n_annot && s_annot1_arr_p[i1_annot]->pos < i1) {
+	if (s_annot1_arr_p[i1_annot]->label == '[') {
+	  memcpy(&pre_annot1,s_annot1_arr_p[i1_annot], sizeof(struct annot_entry));
+	  pre_annot1.pos = aln->amin1 + i1_offset;
+	  pre_annot1.a_pos = aln->amin0 + i0_offset;
+	  region1_p = &pre_annot1;
+	  region1_p->score = region1_p->n_aln = region1_p->n_ident = 0;
+	}
+	else if (s_annot1_arr_p[i1_annot]->label == ']') {
+	  region1_p = NULL;
+	}
+	i1_annot++;
+      }
+    }
+  }
+
+  while (rp < rpmax ) {
+    switch (*rp++) {
+    case 0:		/* insert in 0 */
+      sim_code = 5; /* indel code */
+      update_code(al_str, al_str_n-strlen(al_str), update_data_p, 0, sim_code,'-','-');
+
+      if (s_annot1_arr_p) {
+	if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
+	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
+				      i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+				      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
+				      annot_stack, &have_push_features, &v_delta,
+				      &region1_p, &pre_annot1, 0);
+	}
+
+	if (region1_p) {
+	  if (prev_match) region1_p->score += ppst->gdelval;
+	  region1_p->score += ppst->ggapval;
+	  region1_p->n_aln++;
+	}
+	prev_match = 0;
+
+	if (have_push_features) {
+	  display_push_features(annot_stack, annot_code_dyn,
+				i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
+				i1_offset+seq_pos(i1,aln->llrev,0), sp1,
+				sim_sym[sim_code], &region0_p, &region1_p,
+				a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+	  have_push_features = 0;
+	}
+      }
+
+      i1++;
+      lenc++;
+      ngap_d++;
+      break;
+
+    case 2:			/* -1 frame shift */
+      update_code(al_str, al_str_n-strlen(al_str), update_data_p, 2, sim_code,'-','-');
+
+      nfs++;
+      i0 += 2;
+      not_c++;
+
+      sp1 = ppst->sq[aap=ap1[i1]];
+      sp0 = f_str->weight_c[aap][ap0[i0]].c2;
+      itmp = ppst->pam2[0][pascii[sp0]][aap];
+
+      if (s_annot1_arr_p) {
+	if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
+	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
+				      i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+				      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
+				      annot_stack, &have_push_features, &v_delta,
+				      &region1_p, &pre_annot1, 0);
+	}
+
+	if (sq[aap] != sp1) {
+	  t_spa = align_type(itmp, sp0, sp1, 0, NULL, ppst->pam_x_id_sim);
+
+	  comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
+		      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
+		      sq[aap], sim_sym[t_spa], NULL, 
+		      annot_code_dyn,1,annot_fmt);
+	}
+
+	if (region1_p) {
+	  region1_p->score += ppst->gshift;
+	  region1_p->score += itmp;
+	}
+	prev_match = 1;
+      }
+
+      sim_code = align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
+      if (region1_p) {
+	region1_p->n_aln++;
+	if (sim_code == M_IDENT) {region1_p->n_ident++;}
+      }
+
+      /* check for an annotation */
+      if (have_ann && !(ann_arr[aa1a[i1]] == ' ' || ann_arr[aa1a[i1]] == '[' || ann_arr[aa1a[i1]] == ']')) {
+#ifndef TFAST
+	sprintf(tmp_astr, "|X%c:%ld%c%c%ld%c",
+		ann_arr[ap1a[i1]],i0_offset+seq_pos(i0,aln->qlrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i1,aln->llrev,0)+1,sp1);
+#else
+	sprintf(tmp_astr, "|%cX:%ld%c%c%ld%c",
+		ann_arr[ap1a[i1]],i0_offset+seq_pos(i1,aln->llrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i0,aln->qlrev,0)+1,sp1);
+
+#endif
+	/* SAFE_STRNCAT(annot_code_s, tmp_astr, n_annot_code_s); */
+	dyn_strcat(annot_code_dyn, tmp_astr);
+      }
+      
+      if (s_annot1_arr_p && have_push_features) {
+	display_push_features(annot_stack, annot_code_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), sp0, 
+			      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
+			      sim_sym[sim_code], &region0_p, &region1_p,
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      i1++;
+      lenc++;
+      break;
+
+    case 3:			/* match */
+      i0 += 3;
+
+      sp1 = ppst->sq[aap=ap1[i1]];
+      sp0 = f_str->weight_c[aap][ap0[i0]].c5;
+      itmp = ppst->pam2[0][aap][pascii[sp0]];
+
+      if (s_annot1_arr_p) {
+	if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
+	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
+				      i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+				      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
+				      annot_stack, &have_push_features, &v_delta,
+				      &region1_p, &pre_annot1, 0);
+	}
+
+	if (sq[aap] != sp1) {
+	  t_spa = align_type(itmp, sp0, sp1, 0, NULL, ppst->pam_x_id_sim);
+
+	  comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
+		      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
+		      sq[aap], sim_sym[t_spa], NULL, annot_code_dyn,
+		      1,annot_fmt);
+	}
+
+	if (region1_p) {region1_p->score += itmp;}
+	prev_match = 1;
+      }
+
+      sim_code = align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
+      if (region1_p) {
+	region1_p->n_aln++;
+	if (sim_code == M_IDENT) {region1_p->n_ident++;}
+      }
+
+      /* check for an annotation */
+      if (have_ann && !(ann_arr[ap1a[i1]] == ' ' || ann_arr[ap1a[i1]]=='[' || ann_arr[ap1a[i1]]==']')) {
+#ifndef TFAST
+	sprintf(tmp_astr, "|X%c:%ld%c%c%ld%c",
+		ann_arr[ap1a[i1]],i0_offset+seq_pos(i0,aln->qlrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i1,aln->llrev,0)+1,sp1);
+#else
+	sprintf(tmp_astr, "|%cX:%ld%c%c%ld%c",
+		ann_arr[ap1a[i1]],i0_offset+seq_pos(i1,aln->llrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i0,aln->qlrev,0)+1,sp1);
+
+#endif
+	/* SAFE_STRNCAT(annot_code_s, tmp_astr, n_annot_code_s); */
+	dyn_strcat(annot_code_dyn, tmp_astr);
+      }
+
+      if (s_annot1_arr_p && have_push_features) {
+	display_push_features(annot_stack, annot_code_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
+			      sim_sym[sim_code], &region0_p, &region1_p,
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+      
+      update_code(al_str, al_str_n-strlen(al_str), update_data_p, 3, sim_code,sp0,sp1);
+
+      i1++;
+      lenc++;
+      break;
+
+    case 4:		/* +1 frame shift */
+      /* finish previous run */
+      update_code(al_str, al_str_n-strlen(al_str), update_data_p, 4, sim_code,'-','-');
+      /* mark frameshift */
+
+      nfs++;
+      i0 += 4;
+      not_c++;
+
+      sp1 = ppst->sq[aap=ap1[i1]];
+      sp0 = f_str->weight_c[aap][ap0[i0]].c2;
+      itmp = ppst->pam2[0][aap][pascii[sp0]];
+
+      if (s_annot1_arr_p) {
+	if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
+	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
+				      i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+				      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
+				      annot_stack, &have_push_features, &v_delta, &region1_p, &pre_annot1, 0);
+	}
+
+	if (sq[aap] != sp1) {
+	  t_spa = align_type(itmp, sp0, sp1, 0, NULL, ppst->pam_x_id_sim);
+
+	  comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
+		      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
+		      sq[aap], sim_sym[t_spa], NULL, annot_code_dyn,
+		      1,annot_fmt);
+	}
+
+	if (region1_p) {
+	  region1_p->score += ppst->gshift;
+	  region1_p->score += itmp;
+	}
+	prev_match = 1;
+      }
+
+      sim_code = align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
+      if (region1_p) {
+	region1_p->n_aln++;
+	if (sim_code == M_IDENT) {region1_p->n_ident++;}
+      }
+
+      if (have_ann && !(ann_arr[ap1a[i1]] == ' ' || ann_arr[ap1a[i1]]=='[' || ann_arr[ap1a[i1]]==']')) {
+#ifndef TFAST
+	sprintf(tmp_astr, "|X%c:%ld%c%c%ld%c",
+		ann_arr[ap1a[i1]],i0_offset+seq_pos(i0,aln->qlrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i1,aln->llrev,0)+1,sp1);
+#else
+	sprintf(tmp_astr, "|%cX:%ld%c%c%ld%c",
+		ann_arr[ap1a[i1]],i0_offset+seq_pos(i1,aln->llrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i0,aln->qlrev,0)+1,sp1);
+
+#endif
+	/* SAFE_STRNCAT(annot_code_s, tmp_astr, n_annot_code_s); */
+	dyn_strcat(annot_code_dyn, tmp_astr);
+      }
+
+      if (s_annot1_arr_p && have_push_features) {
+	display_push_features(annot_stack, annot_code_dyn,
+			      i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
+			      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
+			      sim_sym[sim_code], &region0_p, &region1_p,
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      i1++;
+      lenc++;
+      break;
+
+    case 5:		/* insert in 1 */
+      sim_code = 5;
+      update_code(al_str, al_str_n-strlen(al_str), update_data_p, 5, sim_code,'-','-');
+
+      i0 += 3;
+      lenc++;
+      ngap_p++;
+      break;
+    }
+  }
+
+  close_update_data(al_str, al_str_n-strlen(al_str), update_data_p);
+
+#ifndef TFAST
+  aln->amax0 = i0+3;	/* end of codon sequence */
+  aln->amax1 = i1;	/* end of protein sequence */
+  aln->ngap_q = ngap_d;
+  aln->ngap_l = ngap_p;
+#else
+  aln->amax1 = i0+3;	/* end of codon sequence */
+  aln->amax0 = i1;	/* end of protein sequence */
+  aln->ngap_q = ngap_p;
+  aln->ngap_l = ngap_d;
+#endif
+  aln->calc_last_set = 1;
+  aln->nfs = nfs;
+  aln->amin0 = aln->smin0;
+  aln->amin1 = aln->smin1;
+
+  *score_delta = v_delta;
+
+  if (have_ann) {
+    have_push_features = 0;
+    if (s_annot1_arr_p) {
+      /* also check for regions after alignment */
+      while (i1_annot < annot1_p->n_annot && s_annot1_arr_p[i1_annot]->pos < i1_offset+n1) {
+	if (s_annot1_arr_p[i1_annot]->label == '[') break;
+	if (s_annot1_arr_p[i1_annot]->label == ']') {
+	  push_stack(annot_stack, s_annot1_arr_p[i1_annot]);
+	  have_push_features = 1;
+	}
+	i1_annot++;
+      }
+    }
+
+    if (have_push_features) {
+      display_push_features(annot_stack, annot_code_dyn,
+			    i0_offset+a_res->max0-1, sp0,
+			    i1_offset+a_res->max1-1, sp1,
+			    sim_sym[sim_code], &region0_p, &region1_p,
+			    a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
+    }
+
+    if (!annot_stack) free_stack(annot_stack);
+  }
+
+
+  if (lenc < 0) lenc = 1;
+
+/*	now we have the middle, get the right end */
+
+  return lenc;
+}
+
+int calc_id(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    struct a_struct *aln, 
+	    struct a_res_str *a_res, 
+	    struct pstruct *ppst,
+	    const struct annot_str *annot0_p,
+	    const struct annot_str *annot1_p,
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+	    struct f_struct *f_str)
+{
+  int i0, i1;
+  int lenc, not_c, ngap_d, ngap_p, nfs;
+  char sp0, sp1;
+  unsigned char aap;
+  const unsigned char *ap0, *ap1;
+  int *rp, *rpmax;
+
+  int aa1c;
+  /* variables for variant changes */
+  struct annot_entry **s_annot1_arr_p;
+  int  itmp, i1_annot, v_delta, v_tmp;
+  long i0_offset, i1_offset;
+
+  char tmp_str[MAX_SSTR];
+  const unsigned char *sq;
+  
+  *score_delta = 0;
+  NULL_dyn_string(annot_var_dyn);
+
+  if (ppst->ext_sq_set) {sq = ppst->sqx;}
+  else {sq = ppst->sq;}
+
+  /* don't fill in the ends */
+#ifndef TFAST	/* FASTYZ */
+  ap0 = f_str->aa0v;		/* computed codons -> ap0*/
+  ap1 = aa1;			/* protein sequence -> ap1 */
+  aln->smin1 = a_res->min0;	/* start in protein sequence */
+  aln->smin0 = a_res->min1;		/* start in DNA/codon sequence */
+
+  i0_offset = aln->q_offset;
+  i1_offset = aln->l_offset;
+#else	/* TFASTYZ */
+
+  if (aln->frame == 0) {
+    pre_com(aa1, n1, f_str->aa1v);
+  }
+  else {
+    pre_com_r(aa1, n1, f_str->aa1v);
+  }
+
+  ap0 = f_str->aa1v;		/* computed codons -> ap0*/
+  ap1 = aa0;			/* protein sequence */
+  aln->smin0 = a_res->min0;	/* start in protein sequence */
+  aln->smin1 = a_res->min1;	/* start in codon sequence */
+
+  i1_offset = aln->q_offset;
+  i0_offset = aln->l_offset;
+#endif
+
+  rp = a_res->res;			/* start of alignment info */
+  rpmax = &a_res->res[a_res->nres];		/* end of alignment info */
+
+/* now get the middle */
+
+  lenc = not_c = aln->nident = aln->nmismatch = aln->nsim = aln->npos = ngap_d = ngap_p = nfs = 0;
+  i0 = a_res->min1-3;	/* start of codon sequence */
+  i1 = a_res->min0;	/* start of protein sequence */
+
+  v_delta = 0;
+  i1_annot = 0;
+  s_annot1_arr_p = NULL;
+  if (annot1_p && annot1_p->n_annot > 0) s_annot1_arr_p = annot1_p->s_annot_arr_p;
+
+  while (rp < rpmax ) {
+    switch (*rp++) {
+    case 3:	/* match */
+      i0 += 3;
+      sp1 = ppst->sq[aap=ap1[i1]];
+      sp0 = f_str->weight_c[aap][ap0[i0]].c5;
+
+      itmp = ppst->pam2[0][pascii[sp0]][aap];
+
+      if (s_annot1_arr_p && i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
+	i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0), i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+			      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
+			      NULL, NULL, &v_delta,NULL, NULL, 0);
+
+	if (ppst->sq[aap] != sp1) {
+	  sprintf(tmp_str,"%c%d%c;",ppst->sq[aap],i1+1,sp1);
+	  /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+	  dyn_strcat(annot_var_dyn, tmp_str);
+	}
+      }
+
+      align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
+
+      i1++;
+      lenc++;
+      break;
+    case 2:
+      nfs++;
+      i0 += 2;
+      not_c++;
+      sp1 = ppst->sq[aap=ap1[i1]];
+      sp0 = f_str->weight_c[aap][ap0[i0]].c2;
+
+      itmp = ppst->pam2[0][aap][pascii[sp0]];
+
+      if (s_annot1_arr_p && i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
+	i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0), i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+			      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
+			      NULL, NULL, &v_delta,NULL, NULL, 0);
+
+	if (ppst->sq[aap] != sp1) {
+	  sprintf(tmp_str,"%c%d%c;",ppst->sq[aap],i1+1,sp1);
+	  /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+	  dyn_strcat(annot_var_dyn, tmp_str);
+	}
+      }
+
+      align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
+
+      i1++;
+      lenc++;
+      break;
+    case 4:
+      nfs++;
+      i0 += 4;
+      not_c++;
+      sp1 = ppst->sq[aap=ap1[i1]];
+      sp0 = f_str->weight_c[aap][ap0[i0]].c4;
+      itmp = ppst->pam2[0][pascii[sp0]][aap];
+
+      if (s_annot1_arr_p && i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
+	i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0), i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
+			      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
+			      NULL, NULL, &v_delta,NULL, NULL, 0);
+
+	if (ppst->sq[aap] != sp1) {
+	  sprintf(tmp_str,"%c%d%c;",ppst->sq[aap],i1+1,sp1);
+	  /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+	  dyn_strcat(annot_var_dyn, tmp_str);
+	}
+      }
+
+      align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
+
+      i1++;
+      lenc++;
+      break;
+    case 5:
+      i0 += 3;
+      lenc++;
+      ngap_p++;
+      break;
+    case 0:
+      i1++;
+      lenc++;
+      ngap_d++;
+      break;
+    }
+  }
+
+#ifndef TFAST
+  aln->amax0 = i0+3;	/* end of codon sequence */
+  aln->amax1 = i1;	/* end of protein sequence */
+  aln->ngap_q = ngap_d;
+  aln->ngap_l = ngap_p;
+#else
+  aln->amax1 = i0+3;	/* end of codon sequence */
+  aln->amax0 = i1;	/* end of protein sequence */
+  aln->ngap_q = ngap_p;
+  aln->ngap_l = ngap_d;
+#endif
+  aln->calc_last_set = 1;
+  aln->nfs = nfs;
+  aln->amin0 = aln->smin0;
+  aln->amin1 = aln->smin1;
+
+  if (lenc < 0) lenc = 1;
+
+/*	now we have the middle, get the right end */
+
+  return lenc;
+}
diff --git a/src/dropfz3.c b/src/dropfz3.c
new file mode 100644
index 0000000..551ce0a
--- /dev/null
+++ b/src/dropfz3.c
@@ -0,0 +1,3864 @@
+/* $Id: dropfz2.c 1280 2014-08-21 00:47:55Z wrp $ */
+
+/* copyright (c) 1998, 1999, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* 18-Sept-2006 - removed static global variables for alignment */
+
+/* 2002/06/23 finally correctly implement fix to translate 'N' to 'X' */
+
+/* 1999/11/29 modification by Z. Zhang to translate DNA 'N' as 'X' */
+
+/* implements an improved version of the fasty algorithm, see:
+
+   W. R. Pearson, T. Wood, Z. Zhang, A W. Miller (1997) "Comparison of
+   DNA sequences with protein sequences" Genomics 46:24-36
+
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <ctype.h>
+
+#include "defs.h"
+#include "param.h"
+#define XTERNAL
+#include "upam.h"
+#include "uascii.h"
+
+#define NT_N 16
+
+/* globals for fasta */
+#define MAXWINDOW 64
+
+#ifndef MAXSAV
+#define MAXSAV 10
+#endif
+
+#ifndef ALLOCN0
+static char *verstr="3.8 June 2014";
+#else
+static char *verstr="3.8an0 Jul 2014";
+#endif
+
+struct dstruct		/* diagonal structure for saving current run */
+{			
+   int     score;	/* hash score of current match */
+   int     start;	/* start of current match */
+   int     stop;	/* end of current match */
+   struct savestr *dmax;   /* location in vmax[] where best score data saved */
+};
+
+struct savestr
+{
+   int     score;		/* pam score with segment optimization */
+   int     score0;		/* pam score of best single segment */
+   int     gscore;		/* score from global match */
+   int     dp;			/* diagonal of match */
+   int     start;		/* start of match in lib seq */
+   int     stop;		/* end of match in lib seq */
+};
+
+struct update_code_str {
+  int p_op_idx;
+  int p_op_cnt;
+  int btop_enc;
+  int show_code;
+  int cigar_order;
+  int show_ext;
+  char *op_map;
+};
+
+#ifdef TFAST
+static char *ori_code = "-x/=\\+*";	/* FASTX */
+static char *cigar_code = "DXFMRI*";
+#else
+static char *ori_code = "+x/=\\-*";	/* TFASTX */
+static char *cigar_code = "IXFMRD*";
+#endif
+
+unsigned long adler32(unsigned long, const unsigned char *, unsigned int);
+void kpsort (struct savestr **v, int n);
+extern void *init_stack(int, int);
+extern void push_stack(void *, void *);
+extern void *pop_stack(void *);
+extern void *free_stack(void *);
+extern struct domfeat_data * init_domfeat_data(const struct annot_str *annot_p);
+
+#define SGW1 100
+#define SGW2 300
+struct smgl_str {
+  int C[SGW1+1][SGW2+1];
+  int st[SGW1+1][SGW2+1];
+  int D[SGW2+7], I[SGW2+1];
+};
+
+struct sx_s {int C1, C2, C3, I1, I2, I3, flag; };
+
+struct wgt { int  iii, ii, iv;};
+struct wgtc {char c2, c3, c4, c5;};
+
+typedef struct st_s { int C, I, D;} *st_ptr;
+
+struct f_struct {
+  struct dstruct *diag;
+  int frame;
+  int ndo;
+  int noff;
+  int hmask;			/* hash constants */
+  int *pamh1;			/* pam based array */
+  int *pamh2;			/* pam based kfact array */
+  int *link, *harr;		/* hash arrays */
+  int kshft;			/* shift width */
+  int nsav, lowscor;		/* number of saved runs, worst saved run */
+#ifndef TFAST
+  unsigned char *aa0x, *aa0v;	/* aa0x - 111122223333 */
+#else
+  unsigned char *aa1x, *aa1v;	/* aa1x - 111122223333 */
+#endif				/* aa1v - computed codons */
+  struct sx_s *cur;
+  int cur_sp_size;
+  struct wgt **weight0;
+  struct wgt **weight1;
+  struct wgtc **weight_c;
+  int *waa;
+  int *res;
+  int max_res;
+  st_ptr up, down, tp;
+  struct smgl_str smgl_s;
+};
+
+#define DROP_INTERN
+#include "drop_func.h"
+
+static int dmatchz(const unsigned char *aa0, int n0,
+		   const unsigned char *aa1, int n1,
+		   const unsigned char *aa1v,
+		   int hoff, int window, 
+		   int **pam2, int gdelval, int ggapval, int gshift,
+		   struct f_struct *f_str);
+
+int shscore(unsigned char *aa0, int n0, int **pam2);
+int saatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
+extern int ELK_to_s(double E_join, int n0, int n1, double Lambda, double K, double H);
+
+int savemax (struct dstruct *, int, 
+	     struct savestr *vmax, struct savestr **lowmax);
+
+int spam (const unsigned char *aa0, const unsigned char *aa1, 
+	  struct savestr *dmax, int **pam2,
+	  struct f_struct *f_str);
+int sconn (struct savestr **v, int n,int cgap, int pgap, struct f_struct *f_str);
+int lx_band(const unsigned char *prot_seq, int len_prot,
+	    const unsigned char *dna_prot_seq, int len_dna_prot,
+	    int **pam_matrix, int gopen, int gext,
+	    int gshift, int start_diag, int width, struct f_struct *f_str);
+
+struct a_res_str *
+merge_ares_chains(struct a_res_str *cur_ares, 
+		  struct a_res_str *tmpl_ares,
+		  int score_ix, const char *msg);
+
+static struct update_code_str *
+init_update_data(int show_code);
+
+static void
+sprintf_code(char *tmp_str, struct update_code_str *, int op_idx, int op_cnt);
+
+static void 
+update_code(struct dyn_string_str *align_code_dyn,
+	    struct update_code_str *update_data, int op, 
+	    int sim_code, unsigned char sp0, unsigned char sp1);
+
+static void
+close_update_data(struct dyn_string_str *align_code_dyn,
+		  struct update_code_str *update_data);
+
+extern void w_abort (char *p, char *p1);
+extern void aagetmap(char *to, int n);
+
+/* initialize for fasta */
+/* modified 30-August-1999 by Zheng Zhang to work with an extended alphabet */
+/* Assume naa=47, and wgts[47][23] matches both upper and lower case
+amoino acids with another amino acid.  And also assume the DNA letter
+does not have upper/lower case difference.  If you also allow DNA
+sequence to be upper/lower case letters, more needs be changed. Not
+only here, but also in the alignment code, the way that pack a codon
+into a number between 0-63 need be changed. */
+
+/* modified so that if **weightci==NULL, do not fiddle with characters */
+
+/* modified 3-Aug-2010 for NCBIstdaa alphabet, which requires MAXUC
+   28, MAXLC 56, so we must have 58, not 47, entries */
+
+void
+init_weights(struct wgt ***weighti, struct wgtc ***weightci,
+	     int **wgts, int gshift, int gsubs, int naa)
+{
+  int i, j, do_wgtc=0;
+  int aa, b, a, x, y, z;
+  int *wwt, e;
+  struct wgt **weight;
+  struct wgtc **weightc;
+  char aacmap[64];
+  int temp[MAXLC+1][64]; /*change*/
+  char le[MAXLC+1][64];
+
+  if (naa > MAXLC) {
+    fprintf(stderr,"*** dropfz2.c compilation problem naa(%d) > MAXLX(%d) ***\n",
+	   naa, MAXLC);
+  }
+
+  if ((*weighti=(struct wgt **)calloc((size_t)(naa+1),sizeof(struct wgt *)))
+      ==NULL) {
+    fprintf(stderr," cannot allocate weights array: %d\n",naa);
+    exit(1);
+  }
+
+  weight = *weighti;
+  /* allocate weight[aa 0..MAXLC] */
+  for (aa=0; aa <= naa; aa++) {
+    if ((weight[aa]=(struct wgt *)calloc((size_t)256,sizeof(struct wgt)))
+	==NULL) {
+      fprintf(stderr," cannot allocate weight[]: %d/%d\n",aa,naa);
+      exit(1);
+    }
+  }
+
+  /* allocate weightci[aa 0..MAXLC] */
+  if (weightci !=NULL) {
+    if ((*weightci=(struct wgtc **)calloc((size_t)(naa+1),
+					  sizeof(struct wgtc *)))==NULL) {
+      fprintf(stderr," cannot allocate weight_c array: %d\n",naa);
+      exit(1);
+    }
+    weightc = *weightci;
+
+    for (aa=0; aa <= naa; aa++) {
+      if ((weightc[aa]=(struct wgtc *)calloc((size_t)256,sizeof(struct wgtc)))
+	  ==NULL) {
+	fprintf(stderr," cannot allocate weightc[]: %d/%d\n",aa,naa);
+	exit(1);
+      }
+    }
+    do_wgtc = 1;
+  }
+  else do_wgtc = 0;
+
+  aagetmap(aacmap,64);
+
+  for (aa = 0; aa < naa; aa++) {	/* change*/
+    wwt = wgts[aa];			/* pam matrix */
+    for (i = 0; i < 64; i++) {	/* i iterates through the codons */
+      x = -10000;		/* large negative */
+      y = i;
+      for (j = 0; j < 64; j++) {	/* j iterates through the codons */
+	z = ((~i & j) | (i & ~j));
+	b = 0;		/* score = 0 */
+	if (z % 4) b-= gsubs;
+	if (z /16) b-= gsubs;
+	if ((z /4) % 4) b -= gsubs;   
+	b += wwt[aascii[aacmap[j]]];  /* add the match score for char j*/
+	if (b > x) {
+	  x = b;		/* x has the score */
+	  y = j;		/* y has the character  (codon index)*/
+	}
+      }
+#ifdef DEBUG
+      if (y < 0 || y > 63) printf("%d %d %d %d ",aa, i, x, y);
+#endif
+      temp[aa][i] = x;
+      le[aa][i] = y;
+    }
+    /*            printf("\n"); */
+  }
+
+  for (aa= 0; aa < naa; aa++) { 
+      wwt = temp[aa];
+      for (i = 0; i < 256; i++) {
+          for (x=-100,b = 0; b < 4; b++) {
+              z = (i/ (1 << ((b+1)*2)))*(1<<(b*2))+(i%(1<<(b*2)));
+	      if (x < (e=wwt[z])) {
+		  x = e;
+		  if (do_wgtc) weightc[aa][i].c4 = aacmap[le[aa][z]];
+	      }
+          }
+          weight[aa][i].iv=x-gshift;
+          weight[aa][i].iii = wwt[i%64];
+
+	  if (do_wgtc) {
+	    weightc[aa][i].c5 = aacmap[le[aa][i%64]];
+	    weightc[aa][i].c3 = aacmap[i%64];
+	  }
+          x = i %16;
+          for (y = -100, b = 0; b < 3; b++) {
+              z = ((x >> (b*2)) << (b*2+2)) + (x % (1 << (b*2))); 
+              for (a = 0; a < 4; a++) {
+		  if ((e =wwt[z+(a<<(b*2))]) > y) {
+		      y = e;
+		      if (do_wgtc) 
+			weightc[aa][i].c2 = aacmap[le[aa][z+(a<<(b*2))]];
+		  }
+              }
+          }
+          weight[aa][i].ii = y-gshift;
+      }
+  }
+  /*106=CGGG*/
+  for (aa = 0; aa < naa; aa++) {
+    weight[aa][106].iii = wgts[aa][23]; /* is 23 the code for 'X'?*/
+    weight[aa][106].iv = weight[aa][106].ii = weight[aa][106].iii-gshift;
+    if (do_wgtc) {
+      weightc[aa][106].c5 = weightc[aa][106].c4 = weightc[aa][106].c3
+	= weightc[aa][106].c2 = 'X';
+    }
+  }
+}
+
+void
+free_weights(struct wgt ***weighti0, struct wgt ***weighti1, 
+	     struct wgtc ***weightci, int naa)
+{
+  int aa;
+  struct wgt **weight0;
+  struct wgt **weight1;
+  struct wgtc **weightc;
+
+  weight0 = *weighti0;
+  weight1 = *weighti1;
+  weightc = *weightci;
+
+  for (aa=0; aa < naa; aa++) {free(weight0[aa]);}
+  for (aa=0; aa < naa; aa++) {free(weight1[aa]);}
+  for (aa=0; aa < naa; aa++) {free(weightc[aa]);}
+
+  free(weight0);
+  free(weight1);
+  free(weightc);
+}
+
+static void
+pre_com(const unsigned char *aa0, int n0, unsigned char *aa0v) {
+  int dnav, i;
+  dnav = (hnt[aa0[0]]<<2) + hnt[aa0[1]];
+  for (i=2; i<n0; i++) {
+    dnav = ((dnav<<2)+hnt[aa0[i]])&255;
+    if (aa0[i] == NT_N  || aa0[i-1]==NT_N || aa0[i-2] == NT_N)  {
+      aa0v[i-2] = 106;
+    }
+    else {
+      if (dnav == 106/*CGGG*/) {dnav = 42/*AGGG*/;}
+      aa0v[i-2]=dnav;
+    }
+  }
+}
+
+static void
+pre_com_r(const unsigned char *aa0, int n0, unsigned char *aa0v) {
+  int dnav, i, ir;
+  dnav = ((3-hnt[aa0[n0-1]])<<2) + 3-hnt[aa0[n0-2]];
+  for (i=2, ir=n0-3; i<n0; i++,ir--) {
+    dnav = ((dnav<<2)+3-hnt[aa0[ir]])&255; 
+    if (aa0[ir] == NT_N || aa0[ir+1]==NT_N || aa0[ir+2] == NT_N)  {
+      aa0v[i-2] = 106;
+    }
+    else {
+      if (dnav == 106) dnav = 42;
+      aa0v[i-2]=dnav;
+    }
+  }
+}
+
+void
+init_work (unsigned char *aa0, int n0, 
+	   struct pstruct *ppst,
+	   struct f_struct **f_arg)
+{
+   int mhv, phv;
+   int hmax;
+   int i0, hv;
+   int pamfact;
+   int btemp;
+   struct f_struct *f_str;
+   struct bdstr *bss;
+   /* these used to be globals, but do not need to be */
+   int ktup, fact, kt1, lkt;
+
+   int maxn0;
+   int *pwaa;
+   int i, j, q;
+   struct swstr *ss, *r_ss;
+   int *waa;
+   int *res;
+   int nsq, ip, *hsq, naat;
+#ifndef TFAST
+   int last_n0, itemp, dnav;
+   unsigned char *fd, *fs, *aa0x, *aa0v;
+   int n0x, n0x3;
+#endif
+
+   if (nt[NT_N] != 'N') {
+     fprintf(stderr," nt[NT_N] (%d) != 'X' (%c) - recompile\n",NT_N,nt[NT_N]);
+     exit(1);
+   }     
+
+   if (ppst->ext_sq_set) {
+     nsq = ppst->nsqx; ip = 1;
+     hsq = ppst->hsqx;
+   }
+   else {
+     nsq = ppst->nsqx; ip = 0;
+     hsq = ppst->hsq;
+   }
+
+   f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+
+   if (!ppst->param_u.fa.use_E_thresholds) {
+     btemp = 2 * ppst->param_u.fa.bestoff / 3 +
+       n0 / ppst->param_u.fa.bestscale +
+       ppst->param_u.fa.bkfact *
+       (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
+     btemp = min (btemp, ppst->param_u.fa.bestmax);
+     if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
+
+     ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
+     if (ppst->param_u.fa.optcut_set != 1) {
+#ifndef TFAST
+       ppst->param_u.fa.optcut = (btemp*5)/4;
+#else
+       ppst->param_u.fa.optcut = (btemp*4)/3;
+#endif
+     }
+   }
+
+#ifdef OLD_FASTA_GAP
+   ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
+#else
+   ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
+#endif
+   pamfact = ppst->param_u.fa.pamfact;
+   ktup = ppst->param_u.fa.ktup;
+   fact = ppst->param_u.fa.scfact * ktup;
+
+#ifndef TFAST
+   /* before hashing, we must set up some space and translate the sequence */
+
+   maxn0 = n0 + 2;
+   if ((aa0x =(unsigned char *)calloc((size_t)maxn0,
+					     sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa0x array %d\n", maxn0);
+     exit (1);
+   }
+   aa0x++;
+   f_str->aa0x = aa0x;
+
+
+   if ((aa0v =(unsigned char *)calloc((size_t)maxn0,
+					     sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa0v array %d\n", maxn0);
+     exit (1);
+   }
+   aa0v++;
+   f_str->aa0v = aa0v;
+
+   /* make a precomputed codon number series */
+   pre_com(aa0, n0, aa0v);
+
+   last_n0 = 0;
+   for (itemp=0; itemp<3; itemp++) {
+     n0x=saatran(aa0,&aa0x[last_n0],n0,itemp);
+     /*         for (i=0; i<n0x; i++) {
+	   fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);
+	   if ((i%60)==59) fprintf(stderr,"\n");
+	   }
+	   fprintf(stderr,"\n");
+	   */
+     last_n0 += n0x+1;
+   }
+
+   /*     fprintf(stderr,"\n"); */
+   n0x = n0;
+   n0x3 = n0x/3;
+
+   /* now switch aa0 and aa0x for hashing functions */
+   fs = aa0;
+   aa0 = aa0x;
+   aa0x = fs;
+#endif
+
+   /* naat must always be MAXLC because library can have LC aa residues */
+   /*
+   if (ppst->ext_sq_set) naat = MAXLC;
+   else naat = MAXUC;
+   */
+   naat = MAXLC;
+
+   init_weights(&f_str->weight0, NULL,
+		ppst->pam2[ip],-ppst->gshift,-ppst->gsubs,naat);
+   init_weights(&f_str->weight1, &f_str->weight_c,
+		ppst->pam2[0],-ppst->gshift,-ppst->gsubs,naat);
+
+   if (pamfact == -1)
+      pamfact = 0;
+   else if (pamfact == -2)
+      pamfact = 1;
+
+   for (i0 = 1, mhv = -1; i0 < ppst->nsq; i0++)
+      if (hsq[i0] < NMAP && hsq[i0] > mhv)
+	 mhv = ppst->hsq[i0];
+
+   if (mhv <= 0)
+   {
+      fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
+      exit (1);
+   }
+
+   for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;
+
+/*      kshft = 2;	*/
+   kt1 = ktup - 1;
+   hv = 1;
+   for (i0 = 0; i0 < ktup; i0++)
+      hv = hv << f_str->kshft;
+   hmax = hv;
+   f_str->hmask = (hmax >> f_str->kshft) - 1;
+
+   if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate hash array\n");
+     exit (1);
+   }
+   if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate pamh1 array\n");
+     exit (1);
+   }
+   if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate pamh2 array\n");
+     exit (1);
+   }
+   if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
+     fprintf (stderr, " cannot allocate hash link array");
+     exit (1);
+   }
+
+   for (i0 = 0; i0 < hmax; i0++)
+      f_str->harr[i0] = -1;
+   for (i0 = 0; i0 < n0; i0++)
+      f_str->link[i0] = -1;
+
+   /* encode the aa0 array */
+   phv = hv = 0;
+   lkt = kt1;
+   for (i0 = 0; i0 < min(n0,lkt); i0++) {
+     if (hsq[aa0[i0]] >= NMAP) {
+       hv=phv=0; lkt = i0+ktup; continue;
+     }
+     hv = (hv << f_str->kshft) + ppst->hsq[aa0[i0]];
+     phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
+   }
+
+   for (; i0 < n0; i0++) {
+     if (hsq[aa0[i0]] >= NMAP) {
+       hv=phv=0;
+       /* restart hv, phv calculation */
+       for (lkt = i0+kt1; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
+	 if (hsq[aa0[i0]] >= NMAP) {
+	   hv=phv=0;
+	   lkt = i0+ktup;
+	   continue;
+	 }
+	 hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+	 phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;
+       }
+     }
+     if (i0 >= n0) break;
+     hv = ((hv & f_str->hmask) << f_str->kshft) + ppst->hsq[aa0[i0]];
+     f_str->link[i0] = f_str->harr[hv];
+     f_str->harr[hv] = i0;
+     if (pamfact) {
+       f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
+       if (hsq[aa0[i0-kt1]] < NMAP) 
+	 phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
+     }
+     else f_str->pamh2[hv] = fact * ktup;
+   }
+
+/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
+   pam2[0][0] is now undefined for consistency with blast
+*/
+
+   if (pamfact)
+      for (i0 = 1; i0 < ppst->nsq; i0++)
+	 f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
+   else
+      for (i0 = 1; i0 < ppst->nsq; i0++)
+	 f_str->pamh1[i0] = fact;
+
+   f_str->ndo = 0;	/* used to save time on diagonals with long queries */
+
+
+#ifndef ALLOCN0
+   if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
+						 sizeof (struct dstruct)))==NULL) {
+      fprintf (stderr," cannot allocate diagonal arrays: %lu\n",
+	       MAXDIAG *sizeof (struct dstruct));
+      exit (1);
+     };
+#else
+   if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,
+					      sizeof (struct dstruct)))==NULL) {
+      fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
+	      (long)n0*sizeof (struct dstruct));
+      exit (1);
+     };
+#endif
+
+#ifndef TFAST
+   /* done hashing, now switch aa0, aa0x back */
+   fs = aa0;
+   aa0 = aa0x;
+   aa0x = fs;
+#else
+   if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+4,
+					     sizeof(unsigned char)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+4);
+     exit (1);
+   }
+   f_str->aa1x++;
+
+   if ((f_str->aa1v =(unsigned char *)calloc((size_t)ppst->maxlen+4,
+					     sizeof(unsigned char))) == NULL) {
+     fprintf (stderr, "cannot allocate aa1v array %d\n", ppst->maxlen+4);
+     exit (1);
+   }
+   f_str->aa1v++;
+
+#endif
+
+   if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {
+     fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
+     exit(1);
+   }
+
+   pwaa = waa;
+   for (i=0; i<nsq; i++) {
+     for (j=0;j<n0; j++) {
+       *pwaa = ppst->pam2[ip][i][aa0[j]];
+       pwaa++;
+     }
+   }
+   f_str->waa = waa;
+
+#ifndef TFAST
+   maxn0 = max(2*n0,MIN_RES);
+#else
+   maxn0 = max(4*n0,MIN_RES);
+#endif
+   if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
+     fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
+     exit(1);
+   }
+   f_str->res = res;
+   f_str->max_res = maxn0;
+
+   *f_arg = f_str;
+}
+
+/* pstring1 is a message to the manager, currently 512 */
+/* pstring2 is the same information, but in a markx==10 format */
+void
+get_param (const struct pstruct *ppst,
+	   char **pstring1, char *pstring2,
+	   struct score_count_s *s_info)
+{
+  char options_str1[128];
+  char options_str2[128];
+#ifndef TFAST
+  char *pg_str="FASTY";
+#else
+  char *pg_str="TFASTY";
+#endif
+
+  if (!ppst->param_u.fa.use_E_thresholds) {
+    sprintf(options_str1,"join: %d (%.3g), opt: %d (%.3g)",
+	    ppst->param_u.fa.cgap, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.optcut, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+    sprintf(options_str2,"pg_join: %d (%.3g)\n; pg_optcut: %d (%.3g)",
+	    ppst->param_u.fa.cgap, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.optcut, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+  }
+  else {
+    sprintf(options_str1,"E-join: %.2g (%.3g), E-opt: %.2g (%.3g)",
+	    ppst->param_u.fa.E_join, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.E_band_opt, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+    sprintf(options_str2,"pg_join_E(): %.2g (%.3g)\n; pg_optcut_E(): %.2g (%.3g)",
+	    ppst->param_u.fa.E_join, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppst->param_u.fa.E_band_opt, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+  }
+
+  if (!ppst->param_u.fa.optflag)
+    sprintf (pstring1[0], "%s (%s)",pg_str, verstr);
+  else 
+    sprintf (pstring1[0], "%s (%s) [optimized]",pg_str, verstr);
+
+  sprintf (pstring1[1], 
+#ifdef OLD_FASTA_GAP
+	   "%s matrix (%d:%d)%s, gap-pen: %3d/%3d, shift: %3d, subs: %3d\n ktup: %d, %s, width: %3d",
+#else
+	   "%s matrix (%d:%d)%s, open/ext: %3d/%3d, shift: %3d, subs: %3d\n ktup: %d, %s, width: %3d",
+#endif
+	   ppst->pam_name, ppst->pam_h,ppst->pam_l,
+	   (ppst->ext_sq_set) ? "xS":"\0",
+	   ppst->gdelval, ppst->ggapval,
+	   ppst->gshift,ppst->gsubs,
+	   ppst->param_u.fa.ktup, options_str1, ppst->param_u.fa.optwid);
+
+  if (ppst->param_u.fa.iniflag) strcat(pstring1[0]," init1");
+
+  if (pstring2 != NULL) {
+#ifdef OLD_FASTA_GAP
+    sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_gap-pen: %d %d\n; pg_ktup: %d\n; %s\n",
+#else
+	     sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_open-ext: %d %d\n; pg_ktup: %d\n; %s\n",
+#endif
+		      pg_str,verstr,ppst->pam_name, ppst->pam_h,ppst->pam_l,
+		      (ppst->ext_sq_set) ? "xS":"\0",  ppst->gdelval,
+		      ppst->ggapval,ppst->param_u.fa.ktup,options_str2);
+	     }
+  }
+
+void
+close_work (const unsigned char *aa0, int n0,
+	    struct pstruct *ppst,
+	    struct f_struct **f_arg)
+{
+  struct f_struct *f_str;
+  int naat;
+
+  f_str = *f_arg;
+
+  if (f_str != NULL) {
+   if (ppst->ext_sq_set) naat = MAXLC;
+   else naat = MAXUC;
+    free_weights(&f_str->weight0,&f_str->weight1,&f_str->weight_c,naat);
+    free(f_str->cur);
+#ifndef TFAST
+    f_str->aa0v--;
+    free(f_str->aa0v);
+    f_str->aa0x--;
+    free(f_str->aa0x);
+#else	/* TFAST */
+    f_str->aa1x--;
+    free(f_str->aa1x);
+    f_str->aa1v--;
+    free(f_str->aa1v);
+#endif
+    free(f_str->res);
+    free(f_str->waa);
+    free(f_str->diag);
+    free(f_str->link);
+    free(f_str->pamh2); 
+    free(f_str->pamh1);
+    free(f_str->harr);
+    free(f_str);
+    *f_arg = NULL;
+  }
+}
+
+void do_fastz (const unsigned char *aa0, int n0,
+	       const unsigned char *aa1, int n1,
+	       const unsigned char *aa1v,
+	       const struct pstruct *ppst, struct f_struct *f_str,
+	       struct rstruct *rst, int *hoff, int shuff_flg,
+	       struct score_count_s *s_info)
+{
+   int     nd;		/* diagonal array size */
+   int     lhval;
+   int     kfact;
+   int i;
+   register struct dstruct *dptr;
+   struct savestr vmax[MAXSAV];	/* best matches saved for one sequence */
+   struct savestr *vptr[MAXSAV];
+   struct savestr *lowmax;
+   int lowscor;
+   register int tscor;
+   int xdebug = 0;
+
+#ifndef ALLOCN0
+   register struct dstruct *diagp;
+#else
+   register int dpos;
+   int     lposn0;
+#endif
+   struct dstruct *dpmax;
+   register int lpos;
+   int     tpos;
+   struct savestr *vmptr;
+   int     scor, tmp;
+   int     im, ib, nsave;
+   int ktup, kt1, ip, lkt, ktup_sq;
+   const int *hsq;
+   int c_gap, opt_cut;
+#ifndef TFAST
+   int n0x31, n0x32;
+   n0x31 = (n0-2)/3;
+   n0x32 = n0x31+1+(n0-n0x31-1)/2;
+#else
+   unsigned char *fs, *fd;
+   int n1x31, n1x32, last_n1, itemp;
+   n1x31 = (n1-2)/3;
+   n1x32 = n1x31+1+(n1-n1x31-1)/2;
+#endif
+
+  if (ppst->ext_sq_set) {
+    ip = 1;
+    hsq = ppst->hsqx;
+  }
+  else {
+    ip = 0;
+    hsq = ppst->hsq;
+  }
+
+  ktup = ppst->param_u.fa.ktup;
+  ktup_sq = ktup*ktup;
+  if (ktup == 1) ktup_sq *= 2;
+
+  kt1 = ktup-1;
+
+  if (n1 < ktup) {
+    rst->score[0] = rst->score[1] = rst->score[2] = 0;
+    return;
+  }
+
+  if (n0+n1+1 >= MAXDIAG) {
+    fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
+    rst->score[0] = rst->score[1] = rst->score[2] = -1;
+    return;
+  }
+
+  if (ppst->param_u.fa.use_E_thresholds) {
+    c_gap = ELK_to_s(ppst->param_u.fa.E_join*ktup_sq*2.5, n0, n1, ppst->pLambda, ppst->pK, ppst->pH);
+    opt_cut = ELK_to_s(ppst->param_u.fa.E_band_opt*ktup_sq*2.0, n0, n1, ppst->pLambda, ppst->pK, ppst->pH);
+    rst->valid_stat = 0;
+  }
+  else {
+    c_gap = ppst->param_u.fa.cgap;
+    opt_cut = ppst->param_u.fa.optcut;
+    rst->valid_stat = 1;
+  }
+
+  f_str->noff = n0 - 1;
+
+#ifdef ALLOCN0
+   nd = n0;
+#endif
+
+#ifndef ALLOCN0
+   nd = n0 + n1;
+#endif
+
+   dpmax = &f_str->diag[nd];
+   for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
+   {
+      dptr->stop = -1;
+      dptr->dmax = NULL;
+      dptr++->score = 0;
+   }
+
+   for (vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++)
+      vmptr->score = 0;
+   lowmax = vmax;
+   lowscor = 0;
+
+  if (n1 > 1000 && aa1[0]==23 && aa1[100]==23 &&
+      aa1[1400]==23 && aa1[1401]!=23) {
+    xdebug = 1;
+  }
+  else xdebug = 0;
+
+   /* start hashing */
+   lhval = 0;
+   lkt = kt1;
+   for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos<n1; lpos++) {
+     /* restart lhval calculation */
+     if (hsq[aa1[lpos]]>=NMAP) {
+       lhval = 0; lkt=lpos+ktup;
+       continue;
+     }
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+   }
+
+#ifndef ALLOCN0
+   diagp = &f_str->diag[f_str->noff + lkt];
+   for (; lpos < n1; lpos++, diagp++) {
+     if (hsq[aa1[lpos]]>=NMAP) {
+       lpos++ ; diagp++;
+       while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
+       if (lpos >= n1) break;
+       lhval = 0;
+     }
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+     for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+       if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
+#else
+   lposn0 = f_str->noff + lpos;
+   for (; lpos < n1; lpos++, lposn0++) {
+     if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;}
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+     for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+       dpos = lposn0 - tpos;
+       if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) {
+#endif
+	 tscor += ktup;
+	 if ((tscor -= lpos) <= 0) {
+	   scor = dptr->score;
+	   if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && lowscor < scor) {
+#ifdef ALLOCN0
+	     lowscor = savemax (dptr, dpos, vmax, &lowmax);
+#else
+	     lowscor = savemax (dptr, dptr- f_str->diag, vmax, &lowmax);
+#endif
+	   }
+	   if ((tscor += scor) >= kfact) {
+	     dptr->score = tscor;
+	     dptr->stop = lpos;
+	   }
+	   else {
+	     dptr->score = kfact;
+	     dptr->start = (dptr->stop = lpos) - kt1;
+	   }
+	 }
+	 else {
+	   dptr->score += f_str->pamh1[aa0[tpos]];
+	   dptr->stop = lpos;
+	 }
+       }
+       else {
+	 dptr->score = f_str->pamh2[lhval];
+	 dptr->start = (dptr->stop = lpos) - kt1;
+       }
+     }				/* end tpos */
+
+#ifdef ALLOCN0
+      /* reinitialize diag structure */
+   loopl:
+     if ((dptr = &f_str->diag[lpos % nd])->score > lowscor)
+       lowscor = savemax (dptr, lpos, vmax, &lowmax);
+     dptr->stop = -1;
+     dptr->dmax = NULL;
+     dptr->score = 0;
+#endif
+   }	/* end lpos */
+
+#ifdef ALLOCN0
+   for (tpos = 0, dpos = f_str->noff + n1 - 1; tpos < n0; tpos++, dpos--) {
+     if ((dptr = &f_str->diag[dpos % nd])->score > lowscor)
+       lowscor = savemax (dptr, dpos, vmax, &lowmax, f_str);
+   }
+#else
+   for (dptr = f_str->diag; dptr < dpmax;) {
+     if (dptr->score > lowscor) savemax (dptr, dptr - f_str->diag, vmax, &lowmax);
+     dptr->stop = -1;
+     dptr->dmax = NULL;
+     dptr++->score = 0;
+   }
+   f_str->ndo = nd;
+#endif
+
+   for (nsave = 0, vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++) {
+     if (vmptr->score > 0) {
+       vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[ip], f_str);
+       vptr[nsave++] = vmptr;
+     }
+   }
+
+   if (nsave <= 0) {
+     rst->score[0] = rst->score[1] = rst->score[2] = 0;
+     return;
+   }
+       
+#ifndef TFAST
+   /* FASTX code here to modify the start, stop points for 
+      the three phases of the translated protein sequence
+   */
+
+   for (ib=0; ib<nsave; ib++) {
+     if (f_str->noff-vptr[ib]->dp+vptr[ib]->start >= n0x32)
+       vptr[ib]->dp += n0x32;
+     if (f_str->noff-vptr[ib]->dp +vptr[ib]->start >= n0x31)
+       vptr[ib]->dp += n0x31;
+   }
+#else
+   /* TFAST code here to modify the start, stop points for 
+      the three phases of the translated protein sequence
+      TFAST modifies library start points, rather than 
+      query start points
+   */
+
+   for (ib=0; ib<nsave; ib++) {
+     if (vptr[ib]->start >= n1x32) {
+       vptr[ib]->start -= n1x32;
+       vptr[ib]->stop -= n1x32;
+       vptr[ib]->dp -= n1x32;
+     }
+     if (vptr[ib]->start >= n1x31) {
+       vptr[ib]->start -= n1x31;
+       vptr[ib]->stop -= n1x31;
+       vptr[ib]->dp -= n1x31;
+     }
+   }
+#endif /* TFAST */
+
+   scor = sconn (vptr, nsave, c_gap, 
+		 ppst->param_u.fa.pgap, f_str);
+
+   for (vmptr=vptr[0],ib=1; ib<nsave; ib++)
+     if (vptr[ib]->score > vmptr->score) vmptr=vptr[ib];
+
+/*  kssort (vptr, nsave); */
+
+   rst->score[1] = vmptr->score;
+   rst->score[0] = max (scor, vmptr->score);
+   rst->score[2] = rst->score[0];		/* initn */
+
+   s_info->tot_scores++;
+   if (rst->score[0] > c_gap) { s_info->s_cnt[0]++;}
+#ifndef TFAST
+   *hoff=f_str->noff - vmptr->dp;
+#else /* TFAST */
+   *hoff=vmptr->dp-f_str->noff;
+#endif /* TFAST */
+   if (ppst->param_u.fa.optflag) {
+     if (/* shuff_flg || */ rst->score[0] > opt_cut) {
+       s_info->s_cnt[2]++;
+       rst->valid_stat = 1;
+       rst->score[2] = dmatchz(aa0, n0,aa1,n1, aa1v,
+			       *hoff,ppst->param_u.fa.optwid,
+			       ppst->pam2[ip],
+			       ppst->gdelval,ppst->ggapval,ppst->gshift,
+			       f_str);
+     }
+   }
+}
+
+void do_work (const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      int frame,
+	      const struct pstruct *ppst,
+	      struct f_struct *f_str,
+	      int qr_flg, int shuff_flg, struct rstruct *rst,
+	      struct score_count_s *s_info)
+{
+  int hoff;
+  int last_n1, itx, dnav, n10, i, ir;
+  unsigned char *aa1x;
+
+  rst->escore = 1.0;
+  rst->segnum = rst->seglen = 1;
+  rst->valid_stat = 0;
+
+  if (n1 < ppst->param_u.fa.ktup) {
+    rst->score[0] = rst->score[1] = rst->score[2] = 0;
+    return;
+  }
+
+#ifndef TFAST
+  do_fastz (f_str->aa0x, n0, aa1, n1, f_str->aa0v, ppst, f_str, rst, &hoff, shuff_flg, s_info);
+#else
+   /* make a precomputed codon number series */
+
+  if (frame == 0) {
+    pre_com(aa1, n1, f_str->aa1v);
+  }
+  else {
+    pre_com_r(aa1, n1, f_str->aa1v);
+  }
+
+  /* make translated sequence */
+  last_n1 = 0;
+  aa1x = f_str->aa1x;
+#ifdef DEBUG
+  if (frame > 1) {
+    fprintf(stderr, "*** fz_walign - frame: %d - out of range [0,1]\n",frame);
+  }
+#endif
+
+  for (itx= frame*3; itx< frame*3+3; itx++) {
+    n10  = saatran(aa1,&aa1x[last_n1],n1,itx);
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+
+  do_fastz (aa0, n0, f_str->aa1x, n10, f_str->aa1v, ppst, f_str, rst, &hoff, shuff_flg, s_info);
+#endif
+
+  rst->comp = rst->H = -1.0;
+}
+
+void do_opt (const unsigned char *aa0, int n0,
+	     const unsigned char *aa1, int n1,
+	     int frame,
+	     struct pstruct *ppst,
+	     struct f_struct *f_str,
+	     struct rstruct *rst)
+{
+  int optflag, tscore, hoff;
+  int last_n1, itx, n10, i, ir;
+  unsigned char *aa1x;
+  struct score_count_s s_info = {0, 0, 0, 0};
+
+  optflag = ppst->param_u.fa.optflag;
+  ppst->param_u.fa.optflag = 1;
+
+#ifndef TFAST
+  do_fastz (f_str->aa0x, n0, aa1, n1, f_str->aa0v, ppst, f_str, rst, &hoff, 0, &s_info);
+#else
+   /* make a precomputed codon number series */
+
+  if (frame == 0) {
+    pre_com(aa1, n1, f_str->aa1v);
+  }
+  else {
+    pre_com_r(aa1, n1, f_str->aa1v);
+  }
+
+  /* make translated sequence */
+  last_n1 = 0;
+  aa1x = f_str->aa1x;
+  for (itx= frame*3; itx< frame*3+3; itx++) {
+    n10  = saatran(aa1,&aa1x[last_n1],n1,itx);
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+
+  do_fastz (aa0, n0, f_str->aa1x, n10, f_str->aa1v, ppst, f_str, rst, &hoff, 0, &s_info );
+#endif
+
+  ppst->param_u.fa.optflag = optflag;
+}
+
+int
+savemax (struct dstruct *dptr, int dpos, 
+	 struct savestr *vmax, struct savestr **lowmax)
+{
+   struct savestr *vmptr;
+   int i;
+
+/* check to see if this is the continuation of a run that is already saved */
+
+   if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
+	 vmptr->start == dptr->start) {
+      vmptr->stop = dptr->stop;
+      if ((i = dptr->score) <= vmptr->score) return (*lowmax)->score;
+      vmptr->score = i;
+      if (vmptr != *lowmax) return (*lowmax)->score;
+   }
+   else {
+      i = (*lowmax)->score = dptr->score;
+      (*lowmax)->dp = dpos;
+      (*lowmax)->start = dptr->start;
+      (*lowmax)->stop = dptr->stop;
+      dptr->dmax = *lowmax;
+   }
+
+   for (vmptr = vmax; vmptr < vmax+MAXSAV; vmptr++) {
+     if (vmptr->score < i) {
+       i = vmptr->score;
+       *lowmax = vmptr;
+     }
+   }
+   return i;
+}
+
+int spam (const unsigned char *aa0,
+	  const unsigned char *aa1,
+	  struct savestr *dmax, int **pam2,
+	  struct f_struct *f_str)
+{
+   int     lpos;
+   int     tot, mtot;
+   struct {
+     int     start, stop, score;
+   } curv, maxv;
+   const unsigned char *aa0p, *aa1p;
+
+   aa1p = &aa1[lpos = dmax->start];
+   aa0p = &aa0[lpos - dmax->dp + f_str->noff];
+   curv.start = lpos;
+
+   tot = curv.score = maxv.score = 0;
+   for (; lpos <= dmax->stop; lpos++) {
+     tot += pam2[*aa0p++][*aa1p++];
+     if (tot > curv.score) {
+       curv.stop = lpos;
+       curv.score = tot;
+      }
+      else if (tot < 0) {
+	if (curv.score > maxv.score) {
+	  maxv.start = curv.start;
+	  maxv.stop = curv.stop;
+	  maxv.score = curv.score;
+	}
+	tot = curv.score = 0;
+	curv.start = lpos+1;
+      }
+   }
+
+   if (curv.score > maxv.score) {
+     maxv.start = curv.start;
+     maxv.stop = curv.stop;
+     maxv.score = curv.score;
+   }
+
+/*	if (maxv.start != dmax->start || maxv.stop != dmax->stop)
+		printf(" new region: %3d %3d %3d %3d\n",maxv.start,
+			dmax->start,maxv.stop,dmax->stop);
+*/
+   dmax->start = maxv.start;
+   dmax->stop = maxv.stop;
+
+   return maxv.score;
+}
+
+#define XFACT 10
+
+int sconn (struct savestr **v, int n, 
+       int cgap, int pgap, struct f_struct *f_str)
+{
+  int     i, si;
+  struct slink {
+    int     score;
+    struct savestr *vp;
+    struct slink *next;
+  }      *start, *sl, *sj, *so, sarr[MAXSAV];
+  int     lstart, tstart, plstop, ptstop;
+
+/*	sort the score left to right in lib pos */
+
+   kpsort (v, n);
+
+   start = NULL;
+
+/*	for the remaining runs, see if they fit */
+
+   for (i = 0, si = 0; i < n; i++)
+   {
+
+/*	if the score is less than the gap penalty, it never helps */
+      if (v[i]->score < cgap)
+	 continue;
+      lstart = v[i]->start;
+      tstart = lstart - v[i]->dp + f_str->noff;
+
+/*	put the run in the group */
+      sarr[si].vp = v[i];
+      sarr[si].score = v[i]->score;
+      sarr[si].next = NULL;
+
+/* 	if it fits, then increase the score */
+      for (sl = start; sl != NULL; sl = sl->next)
+      {
+	 plstop = sl->vp->stop;
+	 ptstop = plstop - sl->vp->dp + f_str->noff;
+	 if (plstop < lstart+XFACT && ptstop < tstart+XFACT) {
+	   sarr[si].score = sl->score + v[i]->score + pgap;
+	   break;
+	 }
+      }
+
+/*	now recalculate where the score fits */
+      if (start == NULL)
+	 start = &sarr[si];
+      else
+	 for (sj = start, so = NULL; sj != NULL; sj = sj->next)
+	 {
+	    if (sarr[si].score > sj->score)
+	    {
+	       sarr[si].next = sj;
+	       if (so != NULL)
+		  so->next = &sarr[si];
+	       else
+		  start = &sarr[si];
+	       break;
+	    }
+	    so = sj;
+	 }
+      si++;
+   }
+
+   if (start != NULL)
+      return (start->score);
+   else
+      return (0);
+}
+
+void
+kssort (v, n)
+struct savestr *v[];
+int     n;
+{
+   int     gap, i, j;
+   struct savestr *tmp;
+
+   for (gap = n / 2; gap > 0; gap /= 2)
+      for (i = gap; i < n; i++)
+	 for (j = i - gap; j >= 0; j -= gap)
+	 {
+	    if (v[j]->score >= v[j + gap]->score)
+	       break;
+	    tmp = v[j];
+	    v[j] = v[j + gap];
+	    v[j + gap] = tmp;
+	 }
+}
+
+void
+kpsort (struct savestr **v, int n) {
+  int gap, i, j, k;
+  int incs[4] = { 21, 7, 3, 1 };
+  struct savestr *tmp;
+  int v_start;
+
+  for ( k = 0; k < 4; k++) {
+    gap = incs[k];
+    for (i = gap; i < n; i++) {
+      tmp = v[i];
+      j = i;
+      v_start = v[i]->start;
+      while (j >= gap && v[j - gap]->start > v_start) {
+	v[j] = v[j - gap];
+	j -= gap;
+      }
+      v[j] = tmp;
+    }
+  }
+}
+
+static int
+dmatchz(const unsigned char *aa0, int n0,
+	const unsigned char *aa1, int n1,
+	const unsigned char *aa1v,
+	int hoff, int window, 
+	int **pam2, int gdelval, int ggapval, int gshift,
+	struct f_struct *f_str)
+{
+
+   hoff -= window/2;
+
+#ifndef TFAST
+   return lx_band(aa1,n1,f_str->aa0v,n0-2, 
+		  pam2,
+#ifdef OLD_FASTA_GAP
+		  -(gdelval - ggapval),
+#else
+		  -gdelval,
+#endif
+		  -ggapval,-gshift,
+		  hoff,window,f_str);
+#else
+   return lx_band(aa0,n0,aa1v,n1-2, 
+		  pam2,
+#ifdef OLD_FASTA_GAP
+		  -(gdelval - ggapval),
+#else
+		  -gdelval,
+#endif
+		  -ggapval,-gshift,
+		  hoff,window,f_str);
+#endif
+}
+
+static void
+init_row(struct sx_s *row, int sp) {
+  int i;
+  for (i = 0; i < sp; i++) {
+      row[i].C1 = row[i].I1 = 0;
+      row[i].C2 = row[i].I2 = 0;
+      row[i].C3 = row[i].I3 = 0;
+      row[i].flag = 0;
+  }
+}
+
+int lx_band(const unsigned char *prot_seq,  /* array with protein sequence numbers*/
+	    int len_prot,    /* length of prot. seq */
+	    const unsigned char *dna_prot_seq, /* translated DNA sequence numbers*/
+	    int len_dna_prot,   /* length trans. seq. */
+	    int **pam_matrix,   /* scoring matrix */
+	    int gopen, int gext, /* gap open, gap extend penalties */
+	    int gshift,         /* frame-shift penalty */
+	    int start_diag,     /* start diagonal of band */
+	    int width,         /* width for band alignment */
+	    struct f_struct *f_str)
+{
+  void *ckalloc();
+  int i, j, bd, bd1, x1, x2, sp, p1=0, p2=0, end_prot;
+  struct sx_s *last, *tmp;
+  int sc, del, best = 0, cd,ci, e1, e2, e3, cd1, cd2, cd3, f, gg;
+  const unsigned char *dp;
+  register struct sx_s *ap, *aq;
+  struct wgt *wt, *ww;
+  int aa, b, a,x,y,z;
+
+  sp = width+7;
+  gg = gopen+gext;
+  /* sp = sp/3+1; */
+
+  if (f_str->cur == NULL ) {
+    f_str->cur_sp_size = sp;
+    f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
+  }
+  else if (f_str->cur_sp_size != sp) {
+    free(f_str->cur);
+    f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
+    f_str->cur_sp_size = sp;
+  }
+
+  init_row(f_str->cur, sp);
+
+  /*
+  if (start_diag %3 !=0) start_diag = start_diag/3-1;
+  else start_diag = start_diag/3;
+  if (width % 3 != 0) width = width/3+1;
+  else width = width /3;
+  */
+
+  x1 = start_diag; 		/* x1 = lower bound of DNA */
+  x2 = 1;               /* the amount of position shift from last row*/
+
+  end_prot = max(0,-width-start_diag) + (len_dna_prot+5)/3 + width;
+  end_prot = min(end_prot,len_prot);
+
+  /* i counts through protein sequence, x1 through DNAp */
+
+  for (i = max(0, -width-start_diag), x1+=i; i < len_prot; i++, x1++) {
+      bd = min(x1+width, (len_dna_prot+2)/3);	/* upper bound of band */
+      bd1 = max(0,x1);	                /* lower bound of band */
+      wt = f_str->weight0[prot_seq[i]];
+      del = 1-x1;   /*adjustment*/
+      bd += del; 
+      bd1 +=del;
+
+      ap = &f_str->cur[bd1]; aq = ap+1;
+      e1 = f_str->cur[bd1-1].C3; e2 = ap->C1; cd1 = cd2= cd3= 0;
+      for (dp = &dna_prot_seq[(bd1-del)*3]; ap < &f_str->cur[bd]; ap++) {
+	  ww = &wt[(unsigned char) *dp++];
+	  sc = max(max(e1+ww->iv, (e3=ap->C2)+ww->ii), e2+ww->iii);
+	  if (cd1 > sc) sc = cd1;
+	  cd1 -= gext;
+	  if ((ci = aq->I1) > 0) {
+	      if (sc < ci) { ap->C1 = ci; ap->I1 = ci-gext;}
+	      else {
+		  ap->C1 = sc;
+		  sc -= gg;
+		  if (sc > 0) {
+		      if (sc > best) best =sc;
+		      if (cd1 < sc) cd1 = sc;
+		      ap->I1 = max(ci-gext, sc);
+		  } else ap->I1 = ci-gext;
+	      }
+	  } else {
+	      if (sc <= 0) {
+		  ap->I1 = ap->C1 = 0;
+	      } else {
+		  ap->C1 = sc; sc-=gg;
+		  if (sc >0) {
+		      if (sc > best) best =sc;
+		      if (cd1 < sc) cd1 = sc;
+		      ap->I1 = sc;
+		  } else ap->I1 = 0;
+	      }
+	  }
+	  ww = &wt[(unsigned char) *dp++];
+	  sc = max(max(e2+ww->iv, (e1=ap->C3)+ww->ii), e3+ww->iii);
+	  if (cd2 > sc) sc = cd2;
+	  cd2 -= gext;
+	  if ((ci = aq->I2) > 0) {
+	      if (sc < ci) { ap->C2 = ci; ap->I2 = ci-gext;}
+	      else {
+		  ap->C2 = sc;
+		  sc -= gg;
+		  if (sc > 0) {
+		      if (sc > best) best =sc;
+		      if (cd2 < sc) cd2 = sc;
+		      ap->I2 = max(ci-gext, sc);
+		  }
+	      }
+	  } else {
+	      if (sc <= 0) {
+		  ap->I2 = ap->C2 = 0;
+	      } else {
+		  ap->C2 = sc; sc-=gg;
+		  if (sc >0) {
+		      if (sc > best) best =sc;
+		      if (cd2 < sc) cd2 = sc;
+		      ap->I2 = sc;
+		  } else ap->I2 = 0;
+	      }
+	  }
+	  ww = &wt[(unsigned char)*dp++];
+	  sc = max(max(e3+ww->iv, (e2=aq->C1)+ww->ii), e1+ww->iii);
+	  if (cd3 > sc) sc = cd3;
+	  cd3 -= gext;
+	  if ((ci = aq++->I3) > 0) {
+	      if (sc < ci) { ap->C3 = ci; ap->I3 = ci-gext;}
+	      else {
+		  ap->C3 = sc;
+		  sc -= gg;
+		  if (sc > 0) {
+		      if (sc > best) best =sc;
+		      if (cd3 < sc) cd3 = sc;
+		      ap->I3 = max(ci-gext, sc);
+		  }
+	      }
+	  } else {
+	      if (sc <= 0) {
+		  ap->I3 = ap->C3 = 0;
+	      } else {
+		  ap->C3 = sc; sc-=gg;
+		  if (sc >0) {
+		      if (sc > best) best =sc;
+		      if (cd3 < sc) cd3 = sc;
+		      ap->I3 = sc;
+		  } else ap->I3 = 0;
+	      }
+	  }
+      }
+  }
+  /*  printf("The best score is %d\n", best); */
+  return best+gg;
+}
+
+/* ckalloc - allocate space; check for success */
+void *ckalloc(size_t amount)
+{
+	void *p;
+
+	if ((p = (void *)malloc( (size_t)amount)) == NULL)
+		w_abort("Ran out of memory.","");
+	return(p);
+}
+
+/* calculate the 100% identical score */
+int
+shscore(unsigned char *aa0, int n0, int **pam2)
+{
+  int i, sum;
+  for (i=0,sum=0; i<n0; i++)
+    sum += pam2[aa0[i]][aa0[i]];
+  return sum;
+}
+
+#define WIDTH 60
+
+typedef struct mat *match_ptr;
+
+typedef struct mat {
+	int i, j, l;
+	match_ptr next;
+} match_node;
+
+typedef struct { int i,j;} state;
+typedef state *state_ptr;
+
+
+void *ckalloc();
+static match_ptr small_global(), global();
+static int local_align(), find_best();
+static void init_row2(), init_ROW();
+
+int
+pro_dna(const unsigned char *prot_seq,  /* array with prot. seq. numbers*/
+	int len_prot,    /* length of prot. seq */
+	const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/
+	int len_dna_prot,   /* length trans. seq. */
+	int **pam_matrix,   /* scoring matrix */
+	int gopen, int gext, /* gap open, gap extend penalties */
+	int gshift,         /* frame-shift penalty */
+	struct f_struct *f_str,
+	int max_res,
+	struct a_res_str *a_res) /* alignment info */
+{
+  match_ptr align, ap, aq;
+  int x, y, ex, ey, i, score;
+  int *alignment;
+
+  f_str->up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+  f_str->down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+  f_str->tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+
+  /*local alignment find the best local alignment x and y
+    is the starting position of the best local alignment
+    and ex ey is the ending position */
+
+  score= local_align(&x, &y, &ex, &ey, 
+		     pam_matrix, gopen, gext,
+		     dna_prot_seq, len_dna_prot,
+		     prot_seq, len_prot, f_str);
+
+  f_str->up += 3; f_str->down += 3; f_str->tp += 3;
+
+  /* x, y - start in prot, dna_prot */
+  a_res->min0 = x;	/* prot */
+  a_res->min1 = y;	/* DNA */
+  a_res->max0 = ex;	/* prot */
+  a_res->max1 = ey;	/* DNA */
+
+  align = global(x, y, ex, ey, 
+		 pam_matrix, gopen, gext,
+		 dna_prot_seq, prot_seq,
+		 0, 0, f_str);
+
+  alignment = a_res->res;
+
+  for (ap = align, i= 0; ap; i++) {
+    if (i < max_res) alignment[i] = ap->l;
+    aq = ap->next; free(ap); ap = aq;
+  }
+  if (i >= max_res)
+    fprintf(stderr,"***alignment truncated: %d/%d***\n", max_res,i);
+
+  /*   up = &up[-3]; down = &down[-3]; tp = &tp[-3]; */
+  free(&f_str->up[-3]); free(&f_str->tp[-3]); free(&f_str->down[-3]);
+
+  a_res->nres = i;
+  return score;
+}
+
+static void
+swap(void **a, void **b)
+{
+    void *t = *a;
+    *a = *b;   *b = t;
+}
+
+/*
+   local alignment find the best local alignment x and y
+   is the starting position of the best local alignment
+   and ex ey is the ending position 
+*/
+static int
+local_align(int *x, int *y, int *ex, int *ey, 
+	    int **wgts, int gop, int gext,
+	    const unsigned char *dnap, int ld,
+	    const unsigned char *pro, int lp,
+	    struct f_struct *f_str)
+{
+  int i, j,  score, x1,x2,x3,x4, e1 = 0, e2 = 0, e3,
+    sc, del,  e, best = 0,  cd, ci, c;
+  struct wgt *wt, *ww;
+  state_ptr cur_st, last_st, cur_i_st;
+  st_ptr cur, last;
+  const unsigned char *dp;
+  int *cur_d_st, *st_up;
+
+  /*      
+	  Array rowiC stores the best scores of alignment ending at a position
+	  Arrays rowiD and rowiI store the best scores of alignment ending
+	  at a position with a deletion or insrtion
+	  Arrays sti stores the starting position of the best alignment whose
+	  score stored in the corresponding row array.
+	  The program stores two rows to complete the computation, same is
+	  for the global alignment routine.
+  */
+
+
+  st_up = (int *) ckalloc(sizeof(int)*(ld+10));
+  init_row2(st_up, ld+5);
+
+  ld += 2;
+
+  init_ROW(f_str->up, ld+1);
+  init_ROW(f_str->down, ld+1);
+  cur = f_str->up+1;
+  last = f_str->down+1; 
+
+  cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+  last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+  cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+  cur_d_st = st_up; 
+  dp = dnap-2;
+  for (i = 0; i < lp; i++) {
+    wt = f_str->weight1[pro[i]];  e2 =0; e1 = last[0].C;
+    for (j = 0; j < 2; j++) {
+      cur_st[j].i = i+1;
+      cur_st[j].j = j+1;
+    }
+    for (j = 2; j < ld; j++) {
+      ww = &wt[(unsigned char) dp[j]];
+      del = -1;
+      if (j >= 3) {
+	sc = 0;
+	e3 = e2; e2 = e1;
+	e1 = last[j-2].C; 
+	if ((e=e2+ww->iii) > sc) {sc = e; del = 3;}
+	if ((e=e1+ww->ii) > sc) {sc = e; del = 2;}
+	if ((e = e3+ww->iv) > sc) {sc = e; del = 4;} 
+      } else {
+	sc = e2  = 0;
+	if (ww->iii > 0) {sc = ww->iii; del = 3;}
+      }
+      if (sc < (ci=last[j].I)) {
+	sc = ci; del = 0;
+      }
+      if (sc < (cd=cur[j].D)) {
+	sc = cd; del = 5;
+      }
+      cur[j].C = sc;
+      e = sc  - gop;
+      if (e > cd) {
+	cur[j+3].D = e-gext;
+	cur_d_st[j+3] = 3;
+      } else {
+	cur[j+3].D = cd-gext;
+	cur_d_st[j+3] = cur_d_st[j]+3;
+      }
+      switch(del) {
+      case 5:
+	c = cur_d_st[j];
+	cur_st[j].i = cur_st[j-c].i;
+	cur_st[j].j = cur_st[j-c].j;
+	break;
+      case 0:
+	cur_st[j].i = cur_i_st[j].i;
+	cur_st[j].j = cur_i_st[j].j;
+	break;
+      case 2:
+      case 3:
+      case 4:
+	if (i) {
+	  if (j-del >= 0) {
+	    cur_st[j].i = last_st[j-del].i;
+	    cur_st[j].j = last_st[j-del].j;
+	  } else {
+	    cur_st[j].i = i;
+	    cur_st[j].j = 0;
+	  }
+	} else {
+	  cur_st[j].i = 0;
+	  cur_st[j].j = max(0, j-del+1);
+	}
+	break;
+      case -1:
+	cur_st[j].i = i+1;
+	cur_st[j].j = j+1;
+	break;
+      }
+      if (e > ci) {
+	cur[j].I  = e -gext;
+	cur_i_st[j].i = cur_st[j].i;
+	cur_i_st[j].j = cur_st[j].j;
+      } else {
+	cur[j].I  = ci- gext;
+      }
+      if (sc > best) {
+	x1 = cur_st[j].i;
+	x2 = cur_st[j].j;
+	best =sc;
+	x3 = i;
+	x4 = j;
+      }
+    }
+    swap((void *)&last, (void *)&cur);
+    swap((void *)&cur_st, (void *)&last_st);
+  }
+  /*	printf("The best score is %d\n", best);*/
+  *x = x1; *y = x2; *ex = x3; *ey = x4;
+  free(cur_st); free(last_st); free(cur_i_st); 
+  free(st_up);
+  return best;
+}
+
+/* 
+   Both global_up and global_down do linear space score only global 
+   alignments on subsequence pro[x]...pro[ex], and dna[y]...dna[ey].
+   global_up do the algorithm upwards, from row x towards row y.
+   global_down do the algorithm downwards, from row y towards x.
+*/
+
+static void
+global_up(st_ptr *row1, st_ptr *row2, 
+	  int x, int y, int ex, int ey, 
+	  int **wgts, int gop, int gext,
+	  unsigned char *dnap, unsigned char *pro, 
+	  int N, struct f_struct *f_str)
+{
+  int i, j, k, sc, e, e1, e2, e3, t, ci, cd, score;
+  struct wgt *wt, *ww;
+  st_ptr cur, last;
+
+  cur = *row1; last = *row2;
+  sc = -gop;
+  for (j = 0; j <= ey-y+1; j++) {
+    if (j % 3 == 0) {last[j].C = sc; sc -= gext; last[j].I = sc-gop;}
+    else { last[j].I = last[j].C = -10000;}
+  }  
+  last[0].C = 0; cur[0].D = cur[1].D = cur[2].D = -10000;
+  last[0].D = last[1].D = last[2].D = -10000;
+  if (N) last[0].I = -gext;
+  for (i = 1; i <= ex-x+1; i++) {
+    wt = f_str->weight1[pro[i+x-1]]; e1 = -10000; e2 = last[0].C;
+    for (j = 0; j <= ey-y+1; j++) {
+      t = j+y;
+      sc = -10000; 
+      ww = &wt[(unsigned char) dnap[t-3]]; 
+      if (j < 4) {
+	if (j == 3) {
+	  sc = e2+ww->iii;
+	} else if (j == 2) {
+	  sc = e2 + ww->ii;
+	}
+      } else {
+	e3 = e2; e2 = e1;
+	e1 = last[j-2].C;
+	sc = max(e2+ww->iii, max(e1+ww->ii, e3+ww->iv));
+      }
+      sc = max(sc, max(ci=last[j].I, cd = cur[j].D));
+      cur[j].C = sc;
+      cur[j+3].D = max(cd, sc-gop)-gext;
+      cur[j].I = max(ci, sc-gop)-gext;
+    }
+    swap((void *)&last, (void *)&cur);
+  }
+  /*printf("global up score =%d\n", last[ey-y+1].C);*/
+  for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;	
+  if (*row1 != last) swap((void *)row1, (void *)row2);
+}
+
+static void
+global_down(st_ptr *row1, st_ptr *row2,
+	    int x, int y, int ex, int ey, 
+	    int **wgts, int gop, int gext,
+	    unsigned char *dnap, unsigned char *pro,
+	    int N, struct f_struct *f_str)
+{
+  int i, j, k, sc, del, *tmp, e,  t, e1,e2,e3, ci,cd, score;
+  struct wgt *wt, *w1, *w2, *w3;
+  st_ptr cur, last;
+
+  cur = (*row1); last = *row2;
+  sc = -gop;
+  for (j = ey-y+1; j >= 0; j--) {
+    if ((ey-y+1-j) % 3) {last[j].C = sc; sc-=gext; last[j].I = sc-gop;}
+    else  last[j].I =  last[j].C = -10000;
+    cur[j].I = -10000;
+  } 
+  last[ey-y+1].C = 0;
+  if (N) last[ey-y+1].I = -gext;
+  cur[ey-y+1].D = cur[ey-y].D = cur[ey-y-1].D = -10000;
+  last[ey-y+1].D = last[ey-y].D = last[ey-y-1].D = -10000;
+  for (i = ex-x; i >= 0; i--) {
+    wt = f_str->weight1[pro[i+x]]; e2 = last[ey-y+1].C; 
+    e1 = -10000;
+    w3 = &wt[(unsigned char) dnap[ey]]; 
+    w2 = &wt[(unsigned char) dnap[ey-1]];
+    for (j = ey-y+1; j >= 0; j--) {
+      t = j+y;
+      w1 = &wt[(unsigned char) dnap[t-1]];
+      sc = -10000;
+      if (t+3 > ey) {
+	if (t+2 == ey) {
+	  sc = e2+w2->iii; 
+	} else if (t+1 == ey) {
+	  sc = e2+w1->ii;
+	}
+      } else {
+	e3 = e2; e2 = e1;
+	e1 = last[j+2].C;
+	sc = max(e2+w2->iii, max(e1+w1->ii,e3+w3->iv)) ;
+      }
+      if (sc < (cd= cur[j].D)) {
+	sc = cd; 
+	cur[j-3].D = cd-gext;
+      } else cur[j-3].D =max(cd, sc-gop)-gext;
+      if (sc < (ci= last[j].I)) {
+	sc = ci;
+	cur[j].I = ci - gext;
+      } else cur[j].I = max(sc-gop,ci)-gext;
+      cur[j].C = sc;
+      w3 = w2; w2 = w1;
+    }
+    swap((void *)&last, (void *)&cur);
+  }
+  for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
+  if (*row1 != last) swap((void *)row1, (void *)row2);
+}
+
+static void
+init_row2(int *row, int ld) {
+  int i;
+  for (i = 0; i < ld; i++) row[i] = 0;
+}
+
+static void init_ROW(st_ptr row, int ld) {
+  int i;
+  for (i = 0; i < ld; i++) row[i].I = row[i].D = row[i].C = 0;
+}
+
+static match_ptr
+combine(match_ptr x1, match_ptr x2, int st) {
+  match_ptr x;
+
+  if (x1 == NULL) return x2;
+  for (x = x1; x->next; x = x->next);
+  x->next = x2;
+  if (st) {
+    for (x = x2; x; x = x->next) {
+      x->j++;
+      if (x->l == 3 || x->l == 4) break;
+    }
+    x->l--;
+  }
+  return x1;
+}
+
+/*
+   global use the two upwards and downwards score only linear
+   space global alignment subroutine to recursively build the
+   alignment.
+*/
+
+match_ptr
+global(int x, int y, int ex, int ey,
+       int **wgts, int gop, int gext,
+       unsigned char *dnap, unsigned char *pro, int N1, int N2,
+       struct f_struct *f_str)
+{
+  int m;
+  int m1, m2;
+  match_ptr x1, x2, mm1, mm2;
+
+  /*printf("%d %d %d %d %d %d\n", x,y, ex, ey, N1, N2);*/
+  /*
+    if the space required is limited, we can do a quadratic space
+    algorithm to find the alignment.
+  */
+
+  if (ex <= x) {
+    mm1  = NULL;
+    for (m = y+3; m <= ey; m+=3) {
+      x1 = (match_ptr) ckalloc(sizeof(match_node));
+      x1->l = 5; x1->next = mm1; 
+      if (mm1== NULL) mm2 = x1;
+      mm1 = x1;
+    }
+    if (ex == x) {
+      if ((ey-y) % 3 != 0) {
+	x1  = (match_ptr) ckalloc(sizeof(match_node));
+	x1->l = ((ey-y) % 3) +1; x1->next = NULL;
+	if (mm1) mm2->next = x1; else mm1 = x1;
+      } else mm2->l = 4;
+    }
+    return mm1;
+  }
+  if (ey <= y) {
+    mm1  = NULL;
+    for (m = x; m <= ex; m++) {
+      x1 = (match_ptr) ckalloc(sizeof(match_node));
+      x1->l = 0; x1->next = mm1; mm1 = x1;
+    }
+    return mm1;
+  }
+  if (ex -x < SGW1 && ey-y < SGW2) 
+    return small_global(x,y,ex,ey,wgts, gop, gext,  dnap, pro, N1, N2,f_str);
+  m = (x+ex)/2;
+  /*     
+	 Do the score only global alignment from row x to row m, m is
+	 the middle row of x and ex. Store the information of row m in
+	 upC, upD, and upI.
+  */
+  global_up(&f_str->up, &f_str->tp,  x, y, m, ey,
+	    wgts, gop, gext,
+	    dnap, pro, N1, f_str);
+  /* 
+     Do the score only global alignment downwards from row ex
+     to row m+1, store information of row m+1 in downC downI and downD
+  */
+  global_down(&f_str->down, &f_str->tp, m+1, y, ex, ey,
+	      wgts, gop, gext,
+	      dnap, pro, N2, f_str);
+
+  /*
+    Use this information for row m and m+1 to find the crossing
+    point of the best alignment with the middle row. The crossing
+    point is given by m1 and m2. Then we recursively call global
+    itself to compute alignments in two smaller regions found by
+    the crossing point and combine the two alignments to form a
+    whole alignment. Return that alignment.
+  */
+  if (find_best(f_str->up, f_str->down, &m1, &m2, ey-y+1, y, gop)) {
+    x1 = global(x, y, m, m1, wgts, gop, gext, dnap, pro, N1, 0, f_str);
+    x2 = global(m+1, m2, ex, ey, wgts, gop, gext, dnap, pro, 0, N2, f_str);
+    if (m1 == m2) x1 = combine(x1,x2,1);
+    else x1 = combine(x1, x2,0);
+  } else {
+    x1 = global(x, y, m-1, m1, wgts, gop, gext, dnap, pro, N1, 1, f_str);
+    x2 = global(m+2, m2, ex, ey, wgts, gop, gext, dnap, pro, 1, N2, f_str);
+    mm1 = (match_ptr) ckalloc(sizeof(match_node));
+    mm1->i = m; mm1->l = 0; mm1->j = m1;
+    mm2 = (match_ptr) ckalloc(sizeof(match_node));
+    mm2->i = m+1; mm2->l = 0; mm2->j = m1;
+    mm1->next = mm2; mm2->next = x2;
+    x1 = combine(x1, mm1, 0);
+  }
+  return x1;
+}
+
+static int
+find_best(st_ptr up, st_ptr down, int *m1, int *m2, int ld, int y, int gop) {
+
+  int i, best = -1000, j = 0, s1, s2, s3, s4, st;
+
+  for (i = 1; i < ld; i++) {
+    s2 = up[i].C + down[i].C;
+    s4 = up[i].I + down[i].I + gop;
+    if (best < s2) {
+      best = s2; j = i; st = 1;
+    }
+    if (best < s4) {
+      best = s4; j = i; st = 0;
+    }
+  }
+  *m1 = j-1+y;
+  *m2 = j+y;
+  /*printf("score=%d\n", best);*/
+  return st;
+} 
+
+/*
+   An alignment is represented as a linked list whose element
+   is of type match_node. Each element represent an edge in the
+   path of the alignment graph. The fields of match_node are
+   l ---  gives the type of the edge.
+   i, j --- give the end position.
+*/
+
+static match_ptr
+small_global(int x, int y, int ex, int ey, 
+	     int **wgts, int gop, int gext,
+	     unsigned char *dnap, unsigned char *pro,
+	     int N1, int N2, struct f_struct *f_str) {
+
+  /* int C[SGW1+1][SGW2+1], st[SGW1+1][SGW2+1], D[SGW2+7], I[SGW2+1]; */
+  int i, j, e, sc, score, del, k, t,  ci, cd;
+  int *cI, *cD, *cC, *lC, *cst, e2, e3, e4;
+  match_ptr mp, first;
+  struct wgt *wt, *ww;
+
+  /*printf("small_global %d %d %d %d\n", x, y, ex, ey);*/
+  sc = -gop-gext; f_str->smgl_s.C[0][0] = 0; 
+
+  cI = f_str->smgl_s.I;
+  if (N1) cI[0] = -gext; else cI[0] = sc;
+
+  for (j = 1; j <= ey-y+1; j++) {
+    if (j % 3== 0) {
+      f_str->smgl_s.C[0][j] = sc;
+      sc -= gext;
+      cI[j] = sc-gop;
+    }
+    else {
+      cI[j] = f_str->smgl_s.C[0][j] = -10000;
+    }
+    f_str->smgl_s.st[0][j] = 5;
+  }
+
+  lC = &f_str->smgl_s.C[0][0];
+  cD = f_str->smgl_s.D; cD[0] = cD[1] = cD[2] = -10000;
+  for (i = 1; i <= ex-x+1; i++) {
+    cC = &f_str->smgl_s.C[i][0];	
+    wt = f_str->weight1[pro[i+x-1]]; cst = &f_str->smgl_s.st[i][0];
+    for (j = 0; j <=ey-y+1; j++) {
+      ci = cI[j];
+      cd= cD[j];
+      t = j+y;
+      ww = &wt[(unsigned char) dnap[t-3]];
+      if (j >= 4) {
+	sc = lC[j-3]+ww->iii; e2 = lC[j-2]+ww->ii;  
+	e4 = lC[j-4]+ww->iv; del = 3;
+	if (e2 > sc) { sc = e2; del = 2;}
+	if (e4 >= sc) { sc = e4; del = 4;}
+      } else {
+	if (j == 3) {
+	  sc = lC[0]+ww->iii; del =3;
+	} else if (j == 2) {
+	  sc = lC[0]+ww->ii; del = 2;
+	} else {sc = -10000; del = 0;}
+      }
+      if (sc < ci) {
+	sc = ci; del = 0; 
+      }
+      if (sc <= cd) {
+	sc = cd;
+	del = 5;
+      }
+      cC[j] = sc;
+      sc -= gop;
+      if (sc <= cd) {
+	del += 10;
+	cD[j+3] = cd - gext;
+      } else cD[j+3] = sc -gext;
+      if (sc < ci) {
+	del += 20;
+	cI[j] = ci-gext;
+      } else cI[j] = sc-gext;
+      *(cst++) = del;
+    }
+    lC = cC;
+  }
+  /*printf("small global score =%d\n", f_str->smgl_s.C[ex-x+1][ey-y+1]);*/
+  if (N2 && cC[ey-y+1] <  ci+gop) f_str->smgl_s.st[ex-x+1][ey-y+1] =0;
+  first = NULL; e = 1;
+  for (i = ex+1, j = ey+1; i > x || j > y; i--) {
+    mp = (match_ptr) ckalloc(sizeof(match_node));
+    mp->i = i-1;
+    k  = (t=f_str->smgl_s.st[i-x][j-y])%10;
+    mp->j = j-1;
+    if (e == 5 && (t/10)%2 == 1) k = 5;
+    if (e == 0 && (t/20)== 1) k = 0;
+    if (k == 5) { j -= 3; i++; e=5;}
+    else {j -= k;if (k==0) e= 0; else e = 1;}
+    mp->l = k;
+    mp->next = first;
+    first = mp;
+  }
+
+  /*	for (i = 0; i <= ex-x; i++) {
+	for (j = 0; j <= ey-y; j++) 
+	printf("%d ", C[i][j]);
+	printf("\n");
+	}
+  */
+  return first;	
+}
+
+#define XTERNAL
+#include "upam.h"
+
+void
+display_alig(int *a, unsigned char *dna, unsigned char *pro,
+	     int length, int ld, struct f_struct *f_str)
+{
+  int len = 0, i, j, x, y, lines, k, iaa;
+  static char line1[100], line2[100], line3[100],
+    tmp[10] = "         ", *st;
+  char *dna1, c1, c2, c3;
+
+  line1[0] = line2[0] = line3[0] = '\0'; x= a[0]; y = a[1]-3;
+
+  printf("\n%5d\n%5d", y+3, x);
+  for (len = 0, j = 2, lines = 0; j < length; j++) {
+    i = a[j];
+    line3[len] = ' ';
+    switch (i) {
+    case 3: 
+      y += 3;
+      line2[len] = NCBIstdaa[iaa=pro[x++]];
+      line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c5;
+      if (line1[len] != f_str->weight_c[iaa][(unsigned char) dna[y]].c3)
+	line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
+      break;
+    case 2:
+      y += 2;
+      line1[len] = '\\';
+      line2[len++] = ' ';
+      line2[len] = NCBIstdaa[iaa=pro[x++]];
+      line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c2;
+      line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
+      break;
+    case 4:
+      y += 4;
+      line1[len] = '/';
+      line2[len++] = ' ';
+      line2[len] = NCBIstdaa[iaa=pro[x++]];
+      line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c4;
+      line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
+      break;
+    case 5:
+      y += 3;
+      line1[len] = f_str->weight_c[0][(unsigned char) dna[y]].c3;
+      line2[len] = '-';
+      break;
+    case 0:
+      line1[len] = '-';
+      line2[len] = NCBIstdaa[pro[x++]];
+      break;
+    }
+    len++;
+    line1[len] = line2[len]  = line3[len]  = '\0'; 
+    if (len >= WIDTH) {
+      for (k = 10; k <= WIDTH; k+=10) 
+	printf("    .    :");
+      if (k-5 < WIDTH) printf("    .");
+      c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
+      line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
+      printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
+      line1[WIDTH] = c1; line2[WIDTH] = c2;
+      strncpy(line1, &line1[WIDTH], sizeof(line1)-1);
+      strncpy(line2, &line2[WIDTH], sizeof(line2)-1);
+      strncpy(line3, &line3[WIDTH], sizeof(line3)-1);
+      len = len - WIDTH;
+      printf("\n%5d\n%5d", y+3, x);
+    }
+  }
+  for (k = 10; k < len; k+=10) 
+    printf("    .    :");
+  if (k-5 < len) printf("    .");
+  printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
+}
+
+
+/* alignment store the operation that align the protein and dna sequence.
+   The code of the number in the array is as follows:
+   0:     delete of an amino acid.
+   2:     frame shift, 2 nucleotides match with an amino acid
+   3:     match an  amino acid with a codon
+   4:     the other type of frame shift
+   5:     delete of a codon
+   
+
+   Also the first two element of the array stores the starting point 
+   in the protein and dna sequences in the local alignment.
+
+   Display looks like where WIDTH is assumed to be divisible by 10.
+
+    0    .    :    .    :    .    :    .    :    .    :    .    :
+     AACE/N\PLK\G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ
+          I S   G S  V F   N R Q L A     G S  V F   N R Q L A    
+     AACE P P-- G HK Y TWA A C E P P---- G HK Y TWA A C E P P----
+
+   60    .    :    .    :    .    :    .    :    .    :    .    :
+     /G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LW
+      G S  V F   N R Q L A     G S  V F   N R Q L A     G S  V F 
+      G HK Y TWA A C E P P---- G HK Y TWA A C E P P---- G HK Y TW
+
+For frame shift, the middle row show the letter in the original sequence,
+and the letter in the top row is the amino acid that is chose by the 
+alignment (translated codon chosen from 4 nucleotides, or 2+1).
+*/
+
+/* fatal - print message and die */
+void
+fatal(msg)
+     char *msg;
+{
+  fprintf(stderr, "%s\n", msg);
+  exit(1);
+}
+
+/* 10-Feb-2010 - fz_walign modified to ensure that the final alignment
+   overlaps the initial lz_band() region.  In earlier versions, the
+   final alignment (using pam2p[0]) might have been outside the band
+   region */
+
+void
+fz_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, int max_res,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   struct a_res_str *a_res,
+	   int score_thresh)
+{
+  int score;
+  int i, last_n1, itemp, n10;
+  int hoff, nt_min, nt_max, n_nt, n_aa, w_fact;
+  int l_min, l_max, window;
+  unsigned char *fs, *fd;
+  /*
+  unsigned char *aa1_min_s, aa1_max_s;
+  */
+  unsigned char *local_aa1;
+  int optflag_s;
+  int itx;
+  unsigned char *aa1x;
+  struct score_count_s s_info = {0,0,0,0};;
+  
+#ifndef TFAST
+  do_fastz (f_str->aa0x, n0, aa1, n1, f_str->aa0v, ppst, f_str, &a_res->rst, &hoff, 1, &s_info);
+#else
+  /* make translated sequence */
+  last_n1 = 0;
+  aa1x = f_str->aa1x;
+  for (itx= frame*3; itx< frame*3+3; itx++) {
+    n10  = saatran(aa1,&aa1x[last_n1],n1,itx);
+    last_n1 += n10+1;
+  }
+  n10 = last_n1-1;
+
+  /* do_fastz (lz_band) also needs a pre-computed number series */
+  if (frame == 0) {
+    pre_com(aa1, n1, f_str->aa1v);
+  }
+  else {
+    pre_com_r(aa1, n1, f_str->aa1v);
+  }
+
+  do_fastz (aa0, n0, f_str->aa1x, n10, f_str->aa1v, ppst, f_str, &a_res->rst, &hoff, 1, &s_info);
+#endif
+
+  if (a_res->rst.score[ppst->score_ix] <= score_thresh) {
+    a_res->sw_score = 0;
+    a_res->n1 = n1;
+    return;
+  }
+
+#ifndef TFAST
+  window = min(n1, ppst->param_u.fa.optwid);
+  l_min = max(0, -window - hoff);
+  l_max = min(n1, n0-hoff+window);
+  local_aa1 = (unsigned char *)aa1;
+  if (l_min > 0 || l_max < n1 -1 ) {
+    local_aa1 = (unsigned char *)calloc(l_max - l_min + 2,sizeof(unsigned char));
+    local_aa1++;
+    memcpy(local_aa1,aa1+l_min, l_max - l_min);
+  }
+
+  /*
+  if (l_min > 0) {
+    aa1_min_s = aa1[l_min-1];
+    aa1[l_min-1] = '\0';
+  }
+  if (l_max < n1 - 1) {
+    aa1_max_s = aa1[l_max];
+    aa1[l_max] = '\0';
+  }
+  */
+#else
+  window = min(n0, ppst->param_u.fa.optwid);
+  if (frame==0) {
+    l_min = max(0,(hoff-window)*3);
+    l_max = min((hoff+window+n0)*3,n1);
+
+    local_aa1 = (unsigned char *)aa1;
+    if (l_min > 0 || l_max < n1 -1 ) {
+      local_aa1 = (unsigned char *)calloc(l_max - l_min + 2,sizeof(unsigned char));
+      local_aa1++;
+      memcpy(local_aa1,aa1+l_min, l_max - l_min);
+    }
+
+    /*
+    if (l_min > 0) {
+      aa1_min_s = aa1[l_min-1];
+      aa1[l_min-1] = '\0';
+    }
+    if (l_max < n1-1) {
+      aa1_max_s = aa1[l_max];
+      aa1[l_max] = '\0';
+    }
+    */
+    /* re-do precomputed codon number series for limited region */
+    pre_com(local_aa1, l_max - l_min, f_str->aa1v);
+  }
+  else {
+    /* things are more complicated here because the mapping of hoff is
+       with respect to the reversed aa1 */
+
+    l_max = n1 - max(0,(hoff-window)*3);
+    l_min = n1 - min((hoff+window+n0)*3,n1);
+
+    local_aa1 = (unsigned char *)aa1;
+    if (l_min > 0 || l_max < n1 -1 ) {
+      local_aa1 = (unsigned char *)calloc(l_max - l_min + 2,sizeof(unsigned char));
+      local_aa1++;
+      memcpy(local_aa1,aa1+l_min, l_max - l_min);
+    }
+
+    /*
+    if (l_min > 0) {
+      aa1_min_s = aa1[l_min-1];
+      aa1[l_min-1] = '\0';
+    }
+    if (l_max < n1-1) {
+      aa1_max_s = aa1[l_max];
+      aa1[l_max] = '\0';
+    }
+    */
+
+    pre_com_r(local_aa1, l_max - l_min, f_str->aa1v);
+  }
+#endif
+
+  a_res->sw_score = 
+    pro_dna(
+#ifndef TFAST
+	    aa1+l_min, l_max - l_min,
+	    f_str->aa0v, n0-2,
+#else
+	    aa0, n0,
+	    f_str->aa1v, l_max - l_min-2,
+#endif
+	    ppst->pam2[0],
+	    -ppst->gdelval, -ppst->ggapval, -ppst->gshift,
+	    f_str, f_str->max_res, a_res);
+
+  if (l_min > 0 || l_max < n1 - 1) { free(--local_aa1); }
+  /*
+  if (l_min > 0) {
+    aa1[l_min-1] = aa1_min_s;
+  }
+  if (l_max < n1 - 1) {
+    aa1[l_max] = aa1_max_s;
+  }
+  */
+#ifndef TFAST
+  a_res->min0 += l_min;
+  a_res->max0 += l_min;
+#else
+  if (frame==1) {
+    a_res->min1 += n1 - l_max;
+    a_res->max1 += n1 - l_max;
+  }
+  else {
+    a_res->min1 += l_min;
+    a_res->max1 += l_min;
+  }    
+#endif
+
+  /* display_alig(f_str->res,f_str->aa0v+2,aa1,*nres,n0-2,f_str); */
+}
+
+/*
+  fz_malign is a recursive interface to fz_walign() that is called
+  from do_walign(). fz_malign() first does an alignment, then checks
+  to see if the score is greater than the threshold. If so, it tries
+  doing a left and right alignment.
+
+  In this implementation, the DNA sequence is preserved as DNA for
+  TFAST, so that it can be sub-setted and translated correctly.  Thus,
+  the translation required for f_str->aa1x and f_str->aa1v is done at
+  each recursive level (in fz_walign).
+ */
+
+struct a_res_str *
+fz_malign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, 
+	   int score_thresh, int max_res,
+	   struct pstruct *ppst,
+	   struct f_struct *f_str,
+	   struct a_res_str *cur_ares,
+	   int first_align)
+{
+  struct a_res_str *tmpl_ares, *tmpr_ares, *this_ares;
+  struct a_res_str *mtmpl_ares, *mtmpr_ares, *mt_next;
+  int sq_start, sq_end, sq_save;
+  int hoff, score_ix;
+  int min_alen;
+  struct rstruct rst;
+  unsigned char *local_aa1;
+  /*   char save_res; */
+  int iphase, i;
+  unsigned char *fd;
+  int max_sub_score = -1;
+
+  score_ix = ppst->score_ix;
+
+#ifdef TFAST  
+  min_alen = min(n0,MIN_LOCAL_LEN)*3;	/* n0 in aa, min_alen in nt */
+#else 
+  min_alen = min(n0/3,MIN_LOCAL_LEN);	/* no in nt, min_alen in aa */
+#endif
+
+  /* now we need alignment storage - get it */
+  if ((cur_ares->res = (int *)calloc((size_t)max_res,sizeof(int)))==NULL) {
+    fprintf(stderr," *** cannot allocate alignment results array %d\n",max_res);
+    exit(1);
+  }
+
+  cur_ares->next = NULL;
+
+  fz_walign(aa0, n0, aa1, n1, frame, max_res, ppst, f_str, cur_ares, (first_align ? 1 : score_thresh));
+
+  /* in cur_ares, min0,max0 are always protein, min1,max1 are always
+     DNA, but n0 could be protein or DNA, depending on
+     FASTY/TFASTY */
+
+  if (!ppst->do_rep || cur_ares->rst.score[ppst->score_ix] <= score_thresh) { 
+    return cur_ares;
+  }
+
+  /* have a score >= threshold - try left and right */
+
+  /* in code below, cur_ares->min0/max0 always refers to aa
+     cur_ares->min1/max1 always refers to nt
+
+     however, things are more complex because if frame==1, then
+     offsets are from the end (n1), not the beginning.  There is no
+     frame==1 for fasty, only for TFASTY
+  */
+  cur_ares->v_start = sq_start = 0;
+#ifdef TFAST
+  if (frame == 0) {sq_end = cur_ares->min1-1;}   /* aa1[sq_start --> sq_end] */
+  else {sq_end = n1 - cur_ares->max1;}
+  sq_save = sq_end;
+#else
+  sq_save = sq_end = cur_ares->min0;
+#endif
+  cur_ares->v_len = sq_end - sq_start;
+
+  if (cur_ares->v_len >= min_alen) { /* try the left  */
+      /* allocate a_res */	
+      tmpl_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
+      local_aa1 = (unsigned char *)calloc(cur_ares->v_len+2,sizeof(unsigned char));
+      local_aa1++;
+      memcpy(local_aa1, aa1, cur_ares->v_len);
+
+      /* 
+      save_res = aa1[sq_save];
+      aa1[sq_save] = '\0';
+      */
+      tmpl_ares = fz_malign(aa0, n0, local_aa1, cur_ares->v_len,
+			    frame, score_thresh, max_res, 
+			    ppst, f_str, tmpl_ares,0);
+
+      free(--local_aa1);
+      /*       aa1[sq_save] = save_res; */
+
+      if (tmpl_ares->rst.score[ppst->score_ix] > score_thresh) {
+	max_sub_score = tmpl_ares->rst.score[ppst->score_ix];
+#ifdef TFAST
+	if (frame == 1) {
+	  for (this_ares = tmpl_ares; this_ares; this_ares = this_ares->next) {
+	    this_ares->v_start += n1 - sq_end;
+	    this_ares->min1 += n1 - sq_end;
+	    this_ares->max1 += n1 - sq_end;
+	  }
+	}
+#endif
+      }
+      else {
+	if (tmpl_ares->res) free(tmpl_ares->res);
+	free(tmpl_ares);
+	tmpl_ares=NULL;
+      }
+    }
+    else {tmpl_ares = NULL;}
+
+  /* now the right end */
+  /* for fasty -- max positions refer to the aa,codon, not the next
+     residue, so they must be incremented */
+
+    sq_end = n1;
+#if TFAST
+    if (frame == 0) {sq_start = cur_ares->max1+1;}
+    else {sq_start = n1 - cur_ares->min1;}
+#else
+    sq_start = cur_ares->max0+1;
+#endif
+    sq_save = sq_start-1;
+    cur_ares->v_len = sq_end - sq_start;
+
+  if (cur_ares->v_len >= min_alen) { /* try the right  */
+      /* allocate a_res */	
+      tmpr_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
+
+      /* find boundaries */
+      local_aa1 = (unsigned char *)calloc(cur_ares->v_len+2,sizeof(unsigned char));
+      local_aa1++;
+      memcpy(local_aa1,aa1+sq_start,cur_ares->v_len);
+      /*
+      save_res = aa1[sq_save];
+      aa1[sq_save] = '\0';
+      */
+
+      tmpr_ares = fz_malign(aa0, n0, 
+			    local_aa1, cur_ares->v_len,
+			    frame,
+			    score_thresh, max_res,
+			    ppst, f_str, tmpr_ares,0);
+      free(--local_aa1);
+      /* 
+      aa1[sq_save] = save_res;
+      */
+
+      if (tmpr_ares->rst.score[ppst->score_ix] > score_thresh) {
+	/* adjust the left boundary */
+	for (this_ares = tmpr_ares; this_ares; this_ares = this_ares->next) {
+#ifndef TFAST
+	  this_ares->min0 += sq_start;
+	  this_ares->max0 += sq_start;
+#else
+	  if (frame == 0) {
+	    this_ares->v_start += sq_start;
+	    this_ares->min1 += sq_start;
+	    this_ares->max1 += sq_start;
+	  }
+#endif
+	}
+	if (tmpr_ares->rst.score[ppst->score_ix] > max_sub_score) {
+	  max_sub_score = tmpr_ares->rst.score[ppst->score_ix];
+	}
+      }
+      else {
+	if (tmpr_ares->res) free(tmpr_ares->res);
+	free(tmpr_ares);
+	tmpr_ares=NULL;
+      }
+    }
+    else {tmpr_ares = NULL;}
+
+    if (max_sub_score <= score_thresh) return cur_ares;
+
+    cur_ares = merge_ares_chains(cur_ares, tmpl_ares, score_ix, "left");
+    cur_ares = merge_ares_chains(cur_ares, tmpr_ares, score_ix, "right");
+
+    return cur_ares;
+}
+
+/* do_walign() can be called with aa0,n0 as nt (FASTY) or 
+   aa0,n0 as aa (TFASTY).  if aa0 is nt, then f_str->aa0x,y have the
+   translations already.  if aa0 is aa, then f_str->aa1x,y must be
+   generated.
+*/
+
+struct a_res_str *
+do_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, int repeat_thresh,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   int *have_ares)
+{
+  struct a_res_str *a_res, *tmp_a_res;
+  int a_res_index;
+  int hoff, use_E_thresholds_s, optflag_s, optcut_s, optwid_s, score;
+  int last_n1, itx, itt, n10, iphase;
+  unsigned char *fs, *fd;
+  struct rstruct rst;
+#ifdef DEBUG
+  unsigned long adler32_crc;
+#endif
+
+  *have_ares = 0x3;	/* set 0x2 bit to indicate local copy */
+
+  if ((a_res = (struct a_res_str *)calloc(1, sizeof(struct a_res_str)))==NULL) {
+    fprintf(stderr," [do_walign] Cannot allocate a_res");
+    return NULL;
+  }
+
+#ifdef DEBUG
+  adler32_crc = adler32(1L,aa1,n1);
+#endif
+
+  f_str->frame = frame;	/* need frame for later pre_cons() in calcons() */
+
+  use_E_thresholds_s = ppst->param_u.fa.use_E_thresholds;
+  optflag_s = ppst->param_u.fa.optflag;
+  optcut_s = ppst->param_u.fa.optcut;
+  optwid_s = ppst->param_u.fa.optwid;
+  ppst->param_u.fa.use_E_thresholds = 0;
+  ppst->param_u.fa.optflag = 1;
+  if (!ppst->param_u.fa.optwid_set) {
+    ppst->param_u.fa.optwid *= 2;
+  }
+
+  a_res = fz_malign(aa0, n0, aa1, n1, frame,
+		    repeat_thresh, f_str->max_res,
+		    ppst, f_str, a_res, 1);
+
+#ifdef DEBUG
+  if (adler32(1L,aa1,n1) != adler32_crc) {
+    fprintf(stderr,"*** error [%s:%d] adler32_crc mismatch n1: %d\n",__FILE__, __LINE__, n1);
+  }
+#endif
+
+  a_res_index = 0;
+  for (tmp_a_res=a_res; tmp_a_res; tmp_a_res = tmp_a_res->next) {
+    tmp_a_res->index = a_res_index++;
+  }
+
+  ppst->param_u.fa.use_E_thresholds = use_E_thresholds_s;
+  ppst->param_u.fa.optflag = optflag_s;
+  ppst->param_u.fa.optwid = optwid_s;
+  return a_res;
+}
+
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+
+#ifdef TFAST
+  int i, last_n1, itemp, n10;
+  unsigned char *fs, *fd;
+  int itx;
+
+  /* make a precomputed codon number series */
+  if (frame==0) {
+    pre_com(aa1, n1, f_str->aa1v);
+  }
+  else { /* must do things backwards */
+    pre_com_r(aa1, n1, f_str->aa1v);
+  }
+#endif
+}
+
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void 
+aln_func_vals(int frame, struct a_struct *aln) {
+
+#ifndef TFAST
+  aln->llrev = 0;
+  aln->llfact = 1;
+  aln->llmult = 1;
+  aln->qlfact = 3;
+  aln->frame = frame;
+  if (frame > 0) aln->qlrev = 1;
+  else aln->qlrev = 0;
+  aln->llrev = 0;
+#else	/* TFASTX */
+  aln->qlfact = 1;
+  aln->qlrev = 0;
+  aln->llfact = 3;
+  aln->llmult = 1;
+  aln->frame = frame;
+  if (frame > 0) aln->llrev = 1;
+  else aln->llrev = 0;
+  aln->qlrev=0;
+#endif	/* TFASTX */
+}
+
+#include "structs.h"
+#include "a_mark.h"
+
+extern int align_type(int score, char sp0, char sp1, int nt_align, struct a_struct *aln, int pam_x_id_sim);
+
+extern void
+process_annot_match(int *itmp, int *pam2aa0v, 
+		    long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+		    struct annot_entry *annot_arr_p, int n_annots, char **ann_comment,
+		    void *annot_stack, int *have_push_features, int *v_delta,
+		    int *d_score_p, int *d_ident_p, int *d_alen_p, int *d_gaplen_p,
+		    struct domfeat_data **left_domain_head_p,
+		    struct domfeat_data *left_domain_p,
+		    long *left_end_p, int init_score);
+
+extern int
+next_annot_match(int *itmp, int *pam2aa0v, 
+		 long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
+		 int i_annot, int n_annot, struct annot_entry **annot_arr, char **ann_comment,
+		 void *annot_stack, int *have_push_features, int *v_delta,
+		 int *d_score_p, int *d_ident_p, int *d_alen_p, int *d_gaplen_p,
+		 struct domfeat_data **left_domain_head_p,
+		 struct domfeat_data *left_domain_p,
+		 long *left_domain_end, int init_score);
+
+extern void
+close_annot_match (int ia, void *annot_stack, int *have_push_features,
+		   int *d_score_p, int *d_ident_p, int *d_alen_p, int *d_gaplen_p,
+		   struct domfeat_data **left_domain_p,
+		   long *left_end_p, int init_score);
+
+extern void
+comment_var(long i0, char sp0, long i1, char sp1, char o_sp1, char sim_char,
+	    const char *ann_comment, struct dyn_string_str *annot_var_dyn,
+	    int target, int d_type);
+
+void
+display_push_features(void *annot_stack, struct dyn_string_str *annot_var_dyn,
+		      long i0_pos, char sp0, long i1_pos, char sp1, char sym, 
+		      int score, double comp, int sw_score, int n0, int n1,
+		      void *pstat_void, int d_type);
+
+#define DP_FULL_FMT 1	/* Region: score: bits: id: ... */
+#define Q_TARGET 0
+#define L_TARGET 1
+
+int seq_pos(int pos, int rev,int off);
+
+/* values of calc_func_mode */
+#define CALC_CONS 1
+#define CALC_CODE 2
+#define CALC_ID   3
+#define CALC_ID_DOM   4
+
+/* add_annot_code: adds annotation codes to struct dyn_string_str ann_code_dyn */
+void
+add_annot_code(int have_ann, char sp0, char sp1, 
+	       char ann_aa1_i1,
+	       long q_off_pos, long l_off_pos, char sim_sym_code,
+	       struct dyn_string_str *ann_code_dyn)
+{
+  char ann_ch0, ann_ch1;
+  char tmp_astr[MAX_STR];
+
+  ann_ch0 = ann_ch1 = '\0';
+
+  if (have_ann && ann_aa1_i1 != ' ') {
+    ann_ch0 = 'X';
+    ann_ch1 = ann_aa1_i1;
+  }
+  else {return;}
+
+  if (!(ann_ch1 == '['  || ann_ch1 == ']')) {
+    sprintf(tmp_astr, "|%c%c:%ld%c%c%ld%c",
+	    ann_ch0,ann_ch1, q_off_pos+1,sp0,
+	    sim_sym_code, l_off_pos+1,sp1);
+    dyn_strcat(ann_code_dyn, tmp_astr);
+  }
+}
+
+/* universal alignment code builder for calc_cons_a(), calc_code(), and calc_id() */
+/* see cal_cons2.c/calc_cons_u() for strategy */
+
+int
+calc_cons_u( /* inputs */
+	    const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    struct a_res_str *a_res,	/* alignment encoding */
+	    struct pstruct *ppst,
+	    struct f_struct *f_str,
+	    void *pstat_void,
+	    /* annotation stuff */
+	    const unsigned char *ann_arr,
+	    const unsigned char *aa0a, const struct annot_str *annot0_p,
+	    const unsigned char *aa1a, const struct annot_str *annot1_p,
+	    int calc_func_mode, 	/* CALC_CONS, CALC_CODE, CALC_ID */
+	    int display_code, 	/* used only by CALC_CODE */
+	    /* outputs */
+	    int *nc,
+	    char *seqc0, char *seqc1, char *seqca, int *cumm_seq_score,
+	    char *seqc0a, char *seqc1a,
+	    struct a_struct *aln,
+	    int *score_delta, 
+	    struct dyn_string_str *annot_var_dyn,
+	    struct dyn_string_str *align_code_dyn)
+{
+  int i0, i1, i, j;
+  int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
+  int *i_spa;
+  char *sp0_p, *sp0a_p, *sp1_p, *sp1a_p, *spa_p, t_spa;
+  char sp0_c, sp1_c, spa_c;	/* used for CALC_ID, CALC_CODE */
+  char sp0a_c, sp1a_c;		/* used for CALC_CODE */
+
+  struct update_code_str *update_data_p;
+
+  const unsigned char *sq;
+  unsigned char aap;
+
+  const unsigned char *ap0, *ap1;
+  const unsigned char *ap1a;	/* ap1 always points to protein, and
+				   only protein has annotations */
+  const struct annot_str *annotp_p;	/* protein annotations from annot_str */
+  int comment_target;
+
+  int *rp, *rpmax;
+  int have_ann;
+
+  /* variables for variant changes/region scores */
+  char tmp_str[MAX_LSTR];
+  void *annot_stack;
+  int have_push_features, prev_match, *have_push_features_p;
+
+  char *sim_sym = aln_map_sym[MX_ACC];
+  struct annot_entry **s_annotp_arr_p;
+  int  i1_annot, v_delta, v_tmp;
+  long i0_offset, i1_offset;
+
+  long i1_left_end;
+  int show_code, annot_fmt, start_flag;
+
+  int d1_score, d1_ident, d1_alen, d1_gaplen;
+  struct domfeat_data *left_domain_list1, *left_domain_head1;
+
+  char *ann_comment;
+
+  *score_delta = 0;
+  d1_score = d1_ident = d1_alen = d1_gaplen = 0;
+  i1_left_end = -1;
+  left_domain_head1 = left_domain_list1 = NULL;
+
+  NULL_dyn_string(annot_var_dyn);
+
+  if (ppst->ext_sq_set) {sq = ppst->sqx;}
+  else {sq = ppst->sq;}
+
+#ifndef TFAST	/* FASTX */
+  comment_target = 1;
+  aln->amin1 = aln->smin1 = a_res->min0;	/* prot */
+  aln->amin0 = aln->smin0 = a_res->min1;	/* DNA */
+
+  i0_offset = aln->q_offset;
+  i1_offset = aln->l_offset;
+
+  ap0 = f_str->aa0v;	/* translated DNA */
+  ap1 = aa1;		/* protein */
+
+  ap1a = aa1a;
+  annotp_p = annot1_p;
+
+  if (calc_func_mode == CALC_CONS) {
+    have_ann = (seqc0a !=NULL && aa1a != NULL);
+    sp0_p = seqc0;	/* translated DNA */
+    sp1_p = seqc1;	/* protein */
+    spa_p = seqca;
+    sp1a_p = seqc1a;	/* protein library can have annotation */
+    sp0a_p = seqc0a;	/* sp0a is always ' ' - no translated annotation */
+    annot_fmt = DP_FULL_FMT;
+  }
+  else if (calc_func_mode == CALC_ID || calc_func_mode == CALC_ID_DOM) {
+    /* does not require aa0a/aa1a, only for variants */
+    have_ann = ((annotp_p && annotp_p->n_annot > 0) || (annot0_p && annot0_p->n_annot > 0));
+    sp0_p = &sp0_c;
+    sp1_p = &sp1_c;
+    spa_p = &spa_c;
+    sp0a_p = &sp0a_c;
+    sp1a_p = &sp1a_c;
+    annot_fmt = 3;
+  }
+  else if (calc_func_mode == CALC_CODE) {
+    spa_p = &spa_c;
+    sp0_p = &sp0_c;
+    sp1_p = &sp1_c;
+    sp0a_p = &sp0a_c;
+    sp1a_p = &sp1a_c;
+
+    show_code = (display_code & (SHOW_CODE_MASK+SHOW_CODE_EXT));	/* see defs.h; SHOW_CODE_ALIGN=2,_CIGAR=3,_CIGAR_EXT=4 */
+    annot_fmt = 2;
+    if (display_code & SHOW_ANNOT_FULL) {
+      annot_fmt = 1;
+    }
+    /* have_ann encodes number of sequences annotated */
+    have_ann = 0;
+    if ((annotp_p && annotp_p->n_annot > 0) || (aa1a != NULL)) { have_ann |= 2;}
+    update_data_p = init_update_data(show_code);
+  }
+  else {
+    fprintf(stderr,"*** error [%s:%d] --- cal_cons_u() invalid calc_func_mode: %d\n",
+	    __FILE__, __LINE__, calc_func_mode);
+    exit(1);
+  }
+
+#else		/* TFASTX */
+  comment_target = 0;
+  aln->amin0 = aln->smin0 = a_res->min0;	/* DNA */
+  aln->amin1 = aln->smin1 = a_res->min1;	/* prot */
+
+  i1_offset = aln->q_offset;
+  i0_offset = aln->l_offset;
+
+  ap1 = aa0;		/* aa0 is protein */
+  /* with fx_malign(), there is no guarantee that we have a valid f_str->aa1y, so make one */
+  pre_cons(aa1,n1,aln->frame, f_str);
+  ap0 = f_str->aa1v;	/* aa1 is DNA */
+  ap1a = aa0a;
+  annotp_p = annot0_p;
+
+  have_ann = (seqc0a !=NULL && aa0a != NULL);
+  if (calc_func_mode == CALC_CONS) {
+    sp1_p = seqc0;		/* sp1 points to protein query */
+    sp0_p = seqc1;		/* sp0 points to DNA */
+    spa_p = seqca;
+    sp1a_p = seqc0a;	/* protein query can have annotation */
+    sp0a_p = seqc1a;	/* sp0a is always ' ' - no translated annotation */
+    annot_fmt = DP_FULL_FMT;
+  }
+  else if (calc_func_mode == CALC_ID || calc_func_mode == CALC_ID_DOM)  {
+    have_ann = (annotp_p && annotp_p->n_annot > 0);
+    spa_p = &spa_c;
+    sp0_p = &sp0_c;
+    sp1_p = &sp1_c;
+
+    sp0a_p = &sp0a_c;
+    sp1a_p = &sp1a_c;
+    annot_fmt = 3;
+
+    /* does not require aa0a/aa1a, only for variants */
+  }
+  else if (calc_func_mode == CALC_CODE) {
+    spa_p = &spa_c;
+    sp0_p = &sp0_c;
+    sp1_p = &sp1_c;
+
+    sp0a_p = &sp0a_c;
+    sp1a_p = &sp1a_c;
+
+    show_code = (display_code & (SHOW_CODE_MASK+SHOW_CODE_EXT));	/* see defs.h; SHOW_CODE_ALIGN=2,_CIGAR=3,_CIGAR_EXT=4 */
+    annot_fmt = 2;
+    if (display_code & SHOW_ANNOT_FULL) {
+      annot_fmt = 1;
+    }
+
+    /* have_ann encodes number of sequences annotated */
+    if ((annotp_p && annotp_p->n_annot > 0) || (ap1a != NULL)) { have_ann |= 1;}
+
+    update_data_p = init_update_data(show_code);
+  }
+  else {
+    fprintf(stderr,"*** error [%s:%d] --- cal_cons_u() invalid calc_func_mode: %d\n",
+	    __FILE__, __LINE__, calc_func_mode);
+    exit(1);
+  }
+#endif
+  if (cumm_seq_score) i_spa = cumm_seq_score;
+
+  rp = a_res->res;
+  rpmax = &a_res->res[a_res->nres];
+
+  lenc = not_c = aln->nident = aln->nmismatch = aln->nsim = aln->npos = ngap_p = ngap_d = nfs= 0;
+  i0 = a_res->min1 - 3;
+  i1 = a_res->min0;
+
+  v_delta = 0;
+  i1_annot = 0;
+  annot_stack = NULL;
+  s_annotp_arr_p = NULL;
+  have_push_features = prev_match = 0;
+  if (have_ann) {
+    have_push_features_p = &have_push_features;
+
+    if (annotp_p && annotp_p->n_annot > 0) {
+      annot_stack = init_stack(64,64);
+      left_domain_list1=init_domfeat_data(annotp_p);
+
+      s_annotp_arr_p = annotp_p->s_annot_arr_p;
+
+      while (i1_annot < annotp_p->n_annot) {
+	if (s_annotp_arr_p[i1_annot]->pos >= i1+i1_offset) {break;}
+	if (s_annotp_arr_p[i1_annot]->end <= i1+i1_offset) {i1_annot++; continue;}
+
+	if (s_annotp_arr_p[i1_annot]->label == '-') {
+	  process_annot_match(&itmp, NULL, i1_offset+seq_pos(i1,aln->llrev,0), i0_offset + seq_pos(i0,aln->qlrev,0),
+			      sp1_p, sp1a_p, sq, s_annotp_arr_p[i1_annot], annotp_p->n_annot,  &ann_comment, 
+			      annot_stack, have_push_features_p, &v_delta,
+			      &d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+			      &left_domain_head1, &left_domain_list1[i1_annot], &i1_left_end, 0);
+	}
+	i1_annot++;
+      }
+    }
+  }
+
+  while (rp < rpmax) {
+    /*    fprintf(stderr,"%d %d %d (%c) %d (%c)\n"
+	  ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);
+    */
+
+    switch (*rp++) {
+    case 3:		/* match */
+      i0 += 3;
+
+      *sp1_p = sq[aap=ap1[i1]];
+      *sp0_p = f_str->weight_c[aap][ap0[i0]].c5;
+      itmp = ppst->pam2[0][aap][pascii[*sp0_p]];
+
+
+      if (have_ann) {
+	if (calc_func_mode != CALC_ID  && calc_func_mode != CALC_ID_DOM) {
+	  *sp1a_p = ann_arr[ap1a[i1]];
+	  *sp0a_p = ' ';
+	}
+	if (s_annotp_arr_p) {
+	  if (i1+i1_offset == s_annotp_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end) {
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[*sp0_p]],
+#ifndef TFAST
+					i1_offset+seq_pos(i1,aln->llrev,0),	/* annotated target (prot) coordinate */
+					i0_offset+seq_pos(i0,aln->qlrev,0),
+#else
+					i1_offset+seq_pos(i1,aln->qlrev,0),
+					i0_offset+seq_pos(i0,aln->llrev,0),
+#endif
+					sp1_p, sp1a_p, sq,
+					i1_annot, annotp_p->n_annot, s_annotp_arr_p,
+					&ann_comment, annot_stack, have_push_features_p, &v_delta,
+					&d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+					&left_domain_head1, left_domain_list1, &i1_left_end,
+					0);
+
+	    /* must be out of the loop to capture the last value */
+	    if (ppst->sq[ap1[i1]] != *sp1_p) {
+	      t_spa = align_type(itmp, *sp0_p, *sp1_p, 0, NULL, ppst->pam_x_id_sim);
+
+	      if (calc_func_mode != CALC_ID  && calc_func_mode != CALC_ID_DOM) {
+	      comment_var(
+#ifndef TFAST
+			  i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			  i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			  i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			  i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			  sq[ap1[i1]], sim_sym[t_spa], ann_comment,
+			  annot_var_dyn,comment_target,annot_fmt);
+	      }
+	      else {
+		sprintf(tmp_str,"%c%d%c;",ppst->sq[ap1[i1]],i1+1,*sp1_p);
+		/*  SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+		dyn_strcat(annot_var_dyn, tmp_str);
+	      }
+	    }
+	  }
+	  prev_match = 1;
+	  d1_score += itmp;
+	}
+	if (calc_func_mode == CALC_CONS) {sp0a_p++; sp1a_p++;}
+      }
+
+      *spa_p = align_type(itmp, *sp0_p, *sp1_p, 0, aln, ppst->pam_x_id_sim);
+      d1_alen++;
+      if (*spa_p == M_IDENT) {d1_ident++;}
+
+      if (cumm_seq_score) *i_spa++ = itmp;
+
+      if (calc_func_mode == CALC_CODE) {
+	update_code(align_code_dyn, update_data_p, 3, *spa_p, *sp0_p, *sp1_p);
+
+	if (have_ann && have_push_features) {
+    	  add_annot_code(have_ann, *sp0_p, *sp1_p, *sp1a_p,
+#ifndef TFAST
+			 i0_offset+seq_pos(i0,aln->qlrev,0),
+			 i1_offset+seq_pos(i1,aln->llrev,0),
+#else
+			 i1_offset+seq_pos(i1,aln->qlrev,0),
+			 i0_offset+seq_pos(i0,aln->llrev,0),
+#endif
+			 sim_sym[*spa_p], annot_var_dyn);
+	}
+      }
+
+      if (have_ann && have_push_features &&  calc_func_mode != CALC_ID) {
+	display_push_features(annot_stack, annot_var_dyn,
+#ifndef TFAST
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			      i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			      i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			      sim_sym[*spa_p],
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+			      n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      i1++;
+      if (calc_func_mode == CALC_CONS) {sp0_p++; sp1_p++; spa_p++;}
+      lenc++;
+      break;
+    case 2:		/* frame shift +2, then match */
+      nfs++;
+      i0 += 2;
+      *sp0_p = '/';
+      *sp1_p = '-';
+      *spa_p = M_DEL;
+
+      if (calc_func_mode == CALC_CODE) {
+	update_code(align_code_dyn, update_data_p, 2, *spa_p,*sp0_p,*sp1_p);
+      }
+
+      if (cumm_seq_score) *i_spa++ = ppst->gshift;
+
+      if (calc_func_mode == CALC_CONS) {
+	sp0_p++; sp1_p++; spa_p++;
+	if (have_ann) {*sp0a_p++ = *sp1a_p++ = ' ';}
+      }
+
+      not_c++;
+
+      *sp1_p = sq[aap=ap1[i1]];
+      *sp0_p = f_str->weight_c[aap][ap0[i0]].c2;
+      itmp = ppst->pam2[0][pascii[*sp0_p]][aap];
+
+      if (cumm_seq_score) *i_spa++ = ppst->gshift;
+
+      if (have_ann) {
+	have_push_features = 0;
+	if (calc_func_mode != CALC_ID && calc_func_mode != CALC_ID_DOM) {
+	  *sp1a_p = ann_arr[ap1a[i1]];
+	  *sp0a_p = ' ';
+	}
+	if (s_annotp_arr_p) {
+	  if (i1 + i1_offset == s_annotp_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end) {
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[*sp0_p]],
+#ifndef TFAST
+					i1_offset+seq_pos(i1,aln->llrev,0),	/* annotated target (prot) coordinate */
+					i0_offset+seq_pos(i0,aln->qlrev,0),
+#else
+					i1_offset+seq_pos(i1,aln->qlrev,0),
+					i0_offset+seq_pos(i0,aln->llrev,0),
+#endif
+					sp1_p, sp1a_p, sq,
+					i1_annot, annotp_p->n_annot, s_annotp_arr_p,
+					&ann_comment, annot_stack, have_push_features_p, &v_delta,
+					&d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+					&left_domain_head1, left_domain_list1, &i1_left_end,0);
+
+	    /* must be out of the loop to capture the last value */
+	    if (ppst->sq[ap1[i1]] != *sp1_p) {
+	      t_spa = align_type(itmp, *sp0_p, *sp1_p, 0, NULL, ppst->pam_x_id_sim);
+
+	      if (calc_func_mode != CALC_ID && calc_func_mode != CALC_ID_DOM) {
+		comment_var(
+#ifndef TFAST
+			    i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			    i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			    i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			    i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			    sq[ap1[i1]], sim_sym[t_spa], ann_comment,
+			    annot_var_dyn,comment_target,annot_fmt);
+	      }
+	      else {
+		sprintf(tmp_str,"%c%d%c;",sq[ap1[i1]],i1+1,*sp1_p);
+		/*   SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+		dyn_strcat(annot_var_dyn, tmp_str);
+	      }
+	    }
+	  }
+	  d1_score += ppst->gshift;
+	  d1_score += itmp;
+	  prev_match = 1;
+	}
+	if (calc_func_mode == CALC_CONS) {sp0a_p++; sp1a_p++;}
+      }
+
+      *spa_p = align_type(itmp, *sp0_p, *sp1_p, 0, aln, ppst->pam_x_id_sim);
+
+      if (calc_func_mode == CALC_CODE) {
+	update_code(align_code_dyn, update_data_p, 3, *spa_p,*sp0_p,*sp1_p);
+      }
+
+      d1_alen++;
+      if (*spa_p == M_IDENT) {d1_ident++;}
+
+      if (have_ann && have_push_features  &&  calc_func_mode != CALC_ID) {
+	if (calc_func_mode == CALC_CODE) {
+    	  add_annot_code(have_ann, *sp0_p, *sp1_p, *sp1a_p,
+#ifndef TFAST
+			 i0_offset+seq_pos(i0,aln->qlrev,0),
+			 i1_offset+seq_pos(i1,aln->llrev,0),
+#else
+			 i1_offset+seq_pos(i1,aln->qlrev,0),
+			 i0_offset+seq_pos(i0,aln->llrev,0),
+#endif
+			 sim_sym[*spa_p], annot_var_dyn);
+	}
+	display_push_features(annot_stack, annot_var_dyn,
+#ifndef TFAST
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			      i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			      i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			      sim_sym[*spa_p],
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+			      n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      if (cumm_seq_score) *i_spa++ = itmp;
+
+      i1++;
+      if (calc_func_mode == CALC_CONS) {sp0_p++; sp1_p++; spa_p++;}
+      lenc++;
+      break;
+    case 4:		/* frame shift, -1, then match */
+      nfs++;
+      i0 += 4;
+
+      *sp0_p = '\\';
+      *sp1_p = '-';
+      *spa_p = M_DEL;
+
+      if (calc_func_mode == CALC_CODE) {
+        update_code(align_code_dyn, update_data_p, 4, *spa_p,*sp0_p,*sp1_p);
+      }
+
+      if (calc_func_mode == CALC_CONS) {sp0_p++; sp1_p++; spa_p++;}
+
+      if (cumm_seq_score) *i_spa++ = ppst->gshift;
+
+      if (have_ann && calc_func_mode == CALC_CONS) {*sp1a_p++ = *sp0a_p++ = ' ';}
+      not_c++;
+
+      *sp1_p = sq[aap=ap1[i1]];
+      *sp0_p = f_str->weight_c[aap][ap0[i0]].c4;
+      itmp = ppst->pam2[0][pascii[*sp0_p]][aap];
+
+      if (have_ann) {
+	if (calc_func_mode != CALC_ID  && calc_func_mode != CALC_ID_DOM) {
+	  *sp1a_p = ann_arr[ap1a[i1]];
+	  *sp0a_p = ' ';
+	}
+	if (s_annotp_arr_p && (i1+i1_offset == s_annotp_arr_p[i1_annot]->pos || i1+i1_offset == i1_left_end)) {
+	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[*sp0_p]],
+#ifndef TFAST
+					i1_offset+seq_pos(i1,aln->llrev,0),	/* annotated target (prot) coordinate */
+					i0_offset+seq_pos(i0,aln->qlrev,0),
+#else
+					i1_offset+seq_pos(i1,aln->qlrev,0),
+					i0_offset+seq_pos(i0,aln->llrev,0),
+#endif
+				      sp1_p, sp1a_p, sq,
+				      i1_annot, annotp_p->n_annot, s_annotp_arr_p, &ann_comment,
+				      annot_stack, have_push_features_p, &v_delta,
+				      &d1_score, &d1_ident, &d1_alen, &d1_gaplen,
+				      &left_domain_head1, left_domain_list1, &i1_left_end,0);
+
+	  /* must be out of the loop to capture the last value */
+	  if (ppst->sq[ap1[i1]] != *sp1_p) {
+	    t_spa = align_type(itmp, *sp0_p, *sp1_p, 0, NULL, ppst->pam_x_id_sim);
+	    if (calc_func_mode != CALC_ID  && calc_func_mode != CALC_ID_DOM) {
+	      comment_var(
+#ifndef TFAST
+			    i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			    i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			    i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			    i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			  sq[ap1[i1]], sim_sym[t_spa], ann_comment,
+			  annot_var_dyn, comment_target, annot_fmt);
+	    }
+	    else {
+	      sprintf(tmp_str,"%c%d%c;",ppst->sq[ap1[i1]],i1+1,*sp1_p);
+	      /*   SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
+	      dyn_strcat(annot_var_dyn, tmp_str);
+	    }
+	  }
+	  d1_score += ppst->gshift;
+	  d1_score += itmp;
+	  prev_match = 1;
+	}
+	if (calc_func_mode == CALC_CONS) {sp0a_p++; sp1a_p++;}
+      }
+
+      *spa_p = align_type(itmp, *sp0_p, *sp1_p, 0, aln, ppst->pam_x_id_sim);
+
+      if (calc_func_mode == CALC_CODE) {
+	update_code(align_code_dyn, update_data_p, 3, *spa_p,*sp0_p,*sp1_p);
+      }
+
+      d1_alen++;
+      if (*spa_p == M_IDENT) {d1_ident++;}
+      if (cumm_seq_score) *i_spa++ = itmp;
+
+      if (have_ann && have_push_features &&  calc_func_mode != CALC_ID) {
+	display_push_features(annot_stack, annot_var_dyn,
+#ifndef TFAST
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			      i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			      i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			      sim_sym[*spa_p], 
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+			      n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      i1++;
+      if (calc_func_mode == CALC_CONS) {sp0_p++; sp1_p++; spa_p++;}
+      lenc++;
+      break;
+    case 5:		/* insertion in 0 */
+      if (have_ann && calc_func_mode == CALC_CONS) {
+	*sp1a_p++ = *sp0a_p++ = ' ';
+      }
+
+      if (cumm_seq_score) {
+	if (prev_match) *i_spa = ppst->gdelval;
+	*i_spa++ = ppst->ggapval;
+      }
+
+      if (prev_match) d1_score += ppst->gdelval;
+      d1_score += ppst->ggapval;
+
+      prev_match = 0;
+
+      i0 += 3;
+      *sp0_p = f_str->weight_c[0][ap0[i0]].c3;
+      *sp1_p = '-';
+      *spa_p = M_DEL;
+
+      if (calc_func_mode == CALC_CODE) {
+	*spa_p = 5;
+	update_code(align_code_dyn, update_data_p, 5, *spa_p,*sp0_p,*sp1_p);
+      }
+
+      lenc++;
+      ngap_p++;
+
+      if (calc_func_mode == CALC_CONS) {sp0_p++; sp1_p++; spa_p++;}
+      break;
+    case 0:		/* insertion in 1 */
+
+      *sp0_p = '-';
+      *sp1_p = sq[ap1[i1]];
+      *spa_p = M_DEL;
+
+      if (calc_func_mode == CALC_CODE) {
+	*spa_p = 5; /* indel code */
+	update_code(align_code_dyn, update_data_p, 0, *spa_p,*sp0_p,*sp1_p);
+      }
+
+      if (cumm_seq_score) {
+	if (prev_match) *i_spa = ppst->gdelval;
+	*i_spa++ += ppst->gdelval;
+      }
+
+      if (calc_func_mode == CALC_CONS) {sp0_p++; sp1_p++; spa_p++;}
+
+      if (have_ann) {
+	if (calc_func_mode != CALC_ID  && calc_func_mode != CALC_ID_DOM) {
+	  *sp0a_p = ' ';
+	  *sp1a_p = ann_arr[ap1a[i1]];
+	}
+
+	if (s_annotp_arr_p) {
+	  /* coordiates are much more complex for next_annot_match,
+	     and comment_var, because they may need to be reversed */
+
+	  if (i1 + i1_offset == s_annotp_arr_p[i1_annot]->pos) {
+	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]],
+#ifndef TFAST
+					i1_offset+seq_pos(i1,aln->llrev,0),	/* annotated target (prot) coordinate */
+					i0_offset+seq_pos(i0,aln->qlrev,0),
+#else
+					i1_offset+seq_pos(i1,aln->qlrev,0),
+					i0_offset+seq_pos(i0,aln->llrev,0),
+#endif
+					sp1_p, sp1a_p, sq, 
+					i1_annot, annotp_p->n_annot, s_annotp_arr_p,
+					&ann_comment, annot_stack, have_push_features_p, &v_delta,
+					&d1_score, &d1_ident, &d1_alen,  &d1_gaplen,
+					&left_domain_head1, left_domain_list1, &i1_left_end,0);
+
+	  }
+
+	  if (prev_match) d1_score += ppst->gdelval;
+	  d1_score += ppst->ggapval;
+	  d1_alen++;
+	  d1_gaplen++;
+	  prev_match = 0;
+	}
+	if (calc_func_mode == CALC_CONS) {sp0a_p++; sp1a_p++;}
+      }
+
+      if (have_ann && have_push_features  &&  calc_func_mode != CALC_ID) {
+	display_push_features(annot_stack, annot_var_dyn,
+#ifndef TFAST
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			      i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			      i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			      sim_sym[*spa_p], 
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+			      n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+
+      i1++;
+      lenc++;
+      ngap_d++;
+      break;
+    }
+  }
+
+  /* done with alignment loop */
+
+  if (calc_func_mode == CALC_CODE) {
+    close_update_data(align_code_dyn, update_data_p);
+  }
+
+  if (have_ann) {
+    if (calc_func_mode != CALC_ID  && calc_func_mode != CALC_ID_DOM) {*sp0a_p = *sp1a_p = '\0';}
+    if (s_annotp_arr_p) {
+      have_push_features = 0;
+
+      if (s_annotp_arr_p && i1_left_end > 0) {
+	close_annot_match(-1, annot_stack, have_push_features_p,
+			  &d1_score, &d1_ident, &d1_alen,  &d1_gaplen,
+			  &left_domain_head1, &i1_left_end,
+			  0);
+      }
+
+      if (have_push_features  &&  calc_func_mode != CALC_ID) {
+	display_push_features(annot_stack, annot_var_dyn,
+#ifndef TFAST
+			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0_p,
+			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1_p,
+#else
+			      i1_offset+seq_pos(i1,aln->qlrev,0), *sp0_p,
+			      i0_offset+seq_pos(i0,aln->llrev,0), *sp1_p,
+#endif
+			      sim_sym[*spa_p], 
+			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, a_res->sw_score,
+			      n0, n1, pstat_void, annot_fmt);
+	have_push_features = 0;
+      }
+    }
+    if (left_domain_list1) free(left_domain_list1);
+    free_stack(annot_stack);
+  }
+  *spa_p = '\0';
+
+#ifndef TFAST
+  aln->amax0 = i0;
+  aln->amax1 = i1;
+  aln->ngap_q = ngap_d;
+  aln->ngap_l = ngap_p;
+#else
+  aln->amax1 = i0;
+  aln->amax0 = i1;
+  aln->ngap_q = ngap_p;
+  aln->ngap_l = ngap_d;
+#endif
+  aln->calc_last_set = 1;
+
+  aln->nfs = nfs;
+
+  *score_delta = v_delta;
+
+  if (lenc < 0) lenc = 1;
+  *nc = lenc;
+/*	now we have the middle, get the right end */
+  return lenc+not_c;
+}
+
+int
+calc_cons_a(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    int *nc,
+	    struct a_struct *aln,
+	    struct a_res_str *a_res, 
+	    struct pstruct *ppst,
+	    char *seqc0, char *seqc1, char *seqca, int *cumm_seq_score,
+	    const unsigned char *ann_arr,
+	    const unsigned char *aa0a, const struct annot_str *annot0_p, char *seqc0a,
+	    const unsigned char *aa1a, const struct annot_str *annot1_p, char *seqc1a,
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+	    struct f_struct *f_str,
+	    void *pstat_void)
+{
+  return calc_cons_u(
+		     aa0, n0, aa1, n1,
+		     a_res, ppst, f_str, pstat_void,
+		     ann_arr, aa0a, annot0_p, aa1a, annot1_p, CALC_CONS, 0,
+		     nc, seqc0, seqc1, seqca, cumm_seq_score,
+		     seqc0a, seqc1a, aln, score_delta, annot_var_dyn, NULL
+		     );
+}
+
+void
+calc_astruct(struct a_struct *aln_p, struct a_res_str *a_res_p, struct f_struct *f_str) {
+
+  aln_p->calc_last_set = 0;
+
+#ifndef TFAST	/* FASTX */
+  aln_p->amin1 = a_res_p->min0;	/* prot */
+  aln_p->amin0 = a_res_p->min1;	/* DNA */
+  aln_p->amax1 = a_res_p->max0;	/* prot */
+  aln_p->amax0 = a_res_p->max1;	/* DNA */
+#else		/* TFASTX */
+  aln_p->amin0 = a_res_p->min0;	/* DNA */
+  aln_p->amin1 = a_res_p->min1;	/* prot */
+  aln_p->amax0 = a_res_p->max0;	/* DNA */
+  aln_p->amax1 = a_res_p->max1;	/* prot */
+#endif
+}
+
+/* build an array of match/ins/del - length strings */
+
+/* modified 10-June-2014 to distinguish matches from mismatches, op=1
+   (previously unused) indicates an aligned non-identity */
+
+/* op_codes are:  0 - aa insertion
+   		  1 - (now) aligned non-identity
+   		  2 - -1 frameshift
+   		  3 - aligned identity
+   		  4 - +1 frameshift
+   		  5 - codon insertion
+*/
+
+static struct update_code_str *
+init_update_data(show_code) {
+
+  struct update_code_str *update_data_p;
+
+  if ((update_data_p = (struct update_code_str *)calloc(1,sizeof(struct update_code_str)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] - init_update_data(): cannot allocate update_code_str\n",
+	      __FILE__, __LINE__);
+    return NULL;
+  }
+
+  update_data_p->p_op_idx = -1;
+  update_data_p->p_op_cnt = 0;
+  update_data_p->show_code = show_code;
+  update_data_p->btop_enc = 0;
+
+  if ((show_code & SHOW_CODE_CIGAR) == SHOW_CODE_CIGAR) {
+    update_data_p->op_map = cigar_code;
+    update_data_p->cigar_order = 1;
+  }
+  else if ((show_code & SHOW_CODE_BTOP) == SHOW_CODE_BTOP) {
+    update_data_p->op_map = ori_code;
+    update_data_p->cigar_order = 0;
+    update_data_p->btop_enc = 1;
+  }    
+  else {
+    update_data_p->op_map = ori_code;
+    update_data_p->cigar_order = 0;
+  }
+
+  if ((show_code & SHOW_CODE_EXT) == SHOW_CODE_EXT) {
+    update_data_p->show_ext = 1;
+  }
+  else {
+    update_data_p->show_ext = 0;
+  }
+
+  return update_data_p;
+}
+
+static void
+close_update_data(struct dyn_string_str *align_code_dyn,
+		  struct update_code_str *up_dp) {
+  char tmp_cnt[MAX_SSTR];
+
+  if (!up_dp) return;
+
+  if (up_dp->btop_enc) {
+    sprintf(tmp_cnt,"%d",up_dp->p_op_cnt);
+    up_dp->p_op_cnt = 0;
+  }
+  else {
+    sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx, up_dp->p_op_cnt);
+  }
+  dyn_strcat(align_code_dyn, tmp_cnt);
+
+  free(up_dp);
+}
+
+/* update_indel_code() has been modified to work more correctly with
+   ggsearch/glsearch, which, because alignments can start with either
+   insertions or deletions, can produce an initial code of "0=".  When
+   that happens, it is ignored and no code is added.
+
+   *align_code_dyn - alignment string (dynamic)
+   op -- encoded operation, currently 0=match, 1-delete, 2-insert, 3-term-match, 4-mismatch
+   op_cnt -- length of run
+   show_code -- SHOW_CODE_CIGAR uses cigar_code, otherwise legacy
+*/
+
+/* update_indel_code() is called for insertions and deletions
+   update_match_code() is called for every match
+*/
+
+static void
+sprintf_code(char *tmp_str, struct update_code_str *up_dp, int op_idx, int op_cnt) {
+
+  if (op_cnt == 0) return;
+
+  if (up_dp->cigar_order) {
+    sprintf(tmp_str,"%d%c",op_cnt,up_dp->op_map[op_idx]);
+  }
+  else {
+    sprintf(tmp_str,"%c%d",up_dp->op_map[op_idx],op_cnt);
+  }
+}
+
+/* only called for btop alignment encoding, for identity, update
+   count, otherwise, print previous count and current difference.
+   assumes that up_dp->p_op_cnt only tracks identity
+
+   for fx/fz, op=0, 
+*/
+
+static void
+sprintf_btop(char *tmp_str, 
+	     struct update_code_str *up_dp, 
+	     int op, int sim_code,
+	     unsigned char sp0, unsigned char sp1)
+{
+  char local_str[MAX_SSTR];
+  local_str[0]='\0';
+
+  tmp_str[0] = '\0';
+
+  /* only aligned identities update counts */
+  if (op==3 && sim_code == M_IDENT) {
+    up_dp->p_op_cnt++;
+    return;
+  }
+  else {
+    if (up_dp->p_op_cnt > 0) {
+      sprintf(local_str,"%d",up_dp->p_op_cnt);
+    }
+    up_dp->p_op_cnt = 0;
+    sprintf(tmp_str,"%s%c%c",local_str,sp0,sp1);
+  }
+}
+
+static void
+update_code(struct dyn_string_str *align_code_dyn,
+	    struct update_code_str *up_dp, int op, 
+	    int sim_code,  unsigned char sp0, unsigned char sp1)
+{
+  char tmp_cnt[MAX_SSTR];
+  tmp_cnt[0]='\0';
+
+  if (up_dp->btop_enc) {
+    sprintf_btop(tmp_cnt, up_dp, op, sim_code, sp0, sp1);
+    dyn_strcat(align_code_dyn, tmp_cnt);
+    return;
+  }
+
+  /* there are two kinds of "op's", one time and accumulating */
+  /* op == 2, 4 are one-time: */
+
+  switch (op) {
+  case 2:
+  case 4:
+    sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+    dyn_strcat(align_code_dyn, tmp_cnt);
+    sprintf_code(tmp_cnt,up_dp, op, 1);
+    dyn_strcat(align_code_dyn, tmp_cnt);
+    up_dp->p_op_cnt = 0;
+    break;
+  case 0:
+  case 5:
+    if (op == up_dp->p_op_idx) {
+      up_dp->p_op_cnt++;
+    }
+    else {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+      dyn_strcat(align_code_dyn, tmp_cnt);
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    break;
+  case 1:
+  case 3:
+    if (sp0 != '*' && sp1 != '*') {	/* default case, not termination */
+      if (up_dp->show_ext) {
+	if (sim_code != M_IDENT) { op = 1;}
+      }
+    }
+    else {	/* have a termination codon, output for !SHOW_CODE_CIGAR */
+      if (!up_dp->cigar_order) {
+	if (sp0 == '*' || sp1 == '*') { op = 6;}
+      }
+      else if (up_dp->show_ext && (sp0 != sp1)) { op = 1;}
+    }
+
+    if (up_dp->p_op_cnt == 0) {
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    else if (op != up_dp->p_op_idx) {
+      sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
+      dyn_strcat(align_code_dyn, tmp_cnt);
+      up_dp->p_op_idx = op;
+      up_dp->p_op_cnt = 1;
+    }
+    else {
+      up_dp->p_op_cnt++;
+    }
+    break;
+  }
+  return;
+}
+
+int calc_code(const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      struct a_struct *aln,
+	      struct a_res_str *a_res,
+	      struct pstruct *ppst,
+	      struct dyn_string_str *align_code_dyn,
+	      const unsigned char *ann_arr, 
+	      const unsigned char *aa0a,
+	      const struct annot_str *annot0_p,
+	      const unsigned char *aa1a,
+	      const struct annot_str *annot1_p,
+	      struct dyn_string_str *annot_code_dyn,
+	      int *score_delta,
+	      struct f_struct *f_str,
+	      void *pstat_void,
+	      int display_code)
+{
+  int nc;
+
+  return calc_cons_u(
+		     aa0, n0, aa1, n1,
+		     a_res, ppst, f_str, pstat_void,
+		     ann_arr, aa0a, annot0_p, aa1a, annot1_p, CALC_CODE, 
+		     display_code,
+		     &nc, NULL, NULL, NULL, NULL,
+		     NULL, NULL, aln, score_delta, annot_code_dyn,
+		     align_code_dyn
+		     );
+}
+
+/* calc_id never looks at domains or features, only variation */
+
+int calc_id(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    struct a_struct *aln, 
+	    struct a_res_str *a_res,
+	    struct pstruct *ppst,
+	    const struct annot_str *annot0_p,
+	    const struct annot_str *annot1_p,
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+	    struct f_struct *f_str)
+{
+  int nc;
+
+  return calc_cons_u(
+		     aa0, n0, aa1, n1,
+		     a_res, ppst, f_str, NULL,
+		     NULL, NULL, annot0_p, NULL, annot1_p, CALC_ID, 0,
+		     &nc, NULL, NULL, NULL, NULL,
+		     NULL, NULL, aln, score_delta, annot_var_dyn,
+		     NULL
+		     );
+}
+
+/* calc_idd looks at domains and variation */
+
+int calc_idd(const unsigned char *aa0, int n0,
+	    const unsigned char *aa1, int n1,
+	    struct a_struct *aln, 
+	    struct a_res_str *a_res,
+	    struct pstruct *ppst,
+	    const struct annot_str *annot0_p,
+	    const struct annot_str *annot1_p,
+	    int *score_delta,
+	    struct dyn_string_str *annot_var_dyn,
+	    struct f_struct *f_str)
+{
+  int nc;
+
+  return calc_cons_u(
+		     aa0, n0, aa1, n1,
+		     a_res, ppst, f_str, NULL,
+		     NULL, NULL, annot0_p, NULL, annot1_p, CALC_ID_DOM, 0,
+		     &nc, NULL, NULL, NULL, NULL,
+		     NULL, NULL, aln, score_delta, annot_var_dyn,
+		     NULL
+		     );
+}
diff --git a/src/dropgsw2.c b/src/dropgsw2.c
new file mode 100644
index 0000000..3184858
--- /dev/null
+++ b/src/dropgsw2.c
@@ -0,0 +1,1128 @@
+/* $Id: dropgsw2.c $ */
+
+/* copyright (c) 1996, 2014 by William R. Pearson and The Rector &
+   Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* 17-Aug-2006 - removed globals *sapp/last - alignment should be thread safe */
+
+/* 12-Oct-2005 - converted to use a_res and aln for alignment coordinates */
+
+/* 4-Nov-2004 - Diagonal Altivec Smith-Waterman included */
+
+/* 14-May-2003 - modified to return alignment start at 0, rather than
+   1, for begin:end alignments
+
+   25-Feb-2003 - modified to support Altivec parallel Smith-Waterman
+
+   22-Sep-2003 - removed Altivec support at request of Sencel lawyers
+*/
+
+/* this code uses an implementation of the Smith-Waterman algorithm
+   designed by Phil Green, U. of Washington, that is 1.5 - 2X faster
+   than my Miller and Myers implementation. */
+
+/* the shortcuts used in this program prevent it from calculating scores
+   that are less than the gap penalty for the first residue in a gap. As
+   a result this code cannot be used with very large gap penalties, or
+   with very short sequences, and probably should not be used with prss3.
+*/
+
+/* version 3.2 fixes a subtle bug that was encountered while running
+   do_walign() interspersed with do_work().  This happens only with -m
+   9 and pvcomplib.  The fix was to more explicitly zero-out ss[] at
+   the beginning of do_work.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+
+#include "defs.h"
+#include "param.h"
+
+static char *verstr="7.2 Nov 2010";
+
+#include "dropgsw2.h"
+
+#define DROP_INTERN
+#include "drop_func.h"
+
+#ifdef SW_ALTIVEC
+#include "smith_waterman_altivec.h"
+#endif
+#ifdef SW_SSE2
+#include "smith_waterman_sse2.h"
+#endif
+
+struct swstr {int H, E;};
+
+extern void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst,
+			double *aa0_f, double **kp);
+extern int do_karlin(const unsigned char *aa1, int n1,
+		     int **pam2, const struct pstruct *ppst,
+		     double *aa0_f, double *kar_p, double *lambda, double *H);
+
+extern int sw_walign (int **pam2p, int n0,
+		      const unsigned char *aa1, int n1,
+		      int q, int r,
+		      struct swstr *ss,
+		      struct a_res_str *a_res
+		      );
+
+extern struct a_res_str *
+nsw_malign (int ***pam2p, int pam_ix, int n0,
+	    const unsigned char *aa1, int n1,
+	    int score_thresh, int max_res,
+	    int gdelval, int ggapval, 
+	    struct swstr *ss, 
+	    struct a_res_str *cur_ares,
+	    int (*fn_walign)
+	    (
+	     int **pam2p, int n0,
+	     const unsigned char *aa1, int n1,
+	     int q, int r,
+	     struct swstr *ss,
+	     struct a_res_str *a_res
+	     ),
+	    int do_rep
+	    );
+
+void
+SIM(const unsigned char *A, /* seq1 indexed A[1..M] */
+    const unsigned char *B, /* seq2 indexed B[1..N] */
+    int M, int N,		/* len seq1, seq2 */
+    struct pstruct *ppst,	/* parameters */
+    int nseq,			/* nseq - number of different sequences */
+    int mini_score,		/* cut-off score */
+    int max_count,		/* number of alignments */
+    struct a_res_str *a_res);	/* alignment result structure */
+
+static int
+FLOCAL_ALIGN(const unsigned char *aa0, const unsigned char *aa1,
+	     int n0, int n1, int low, int up,
+	     int **W, int GG,int HH, int MW,
+	     struct f_struct *f_str);
+
+extern void aancpy(char *to, char *from, int count, struct pstruct *ppst);
+
+int same_seq(const unsigned char *aa0, int n0,
+	     const unsigned char *aa1, int n1);
+
+static int
+prof_score(const unsigned char *aa1, int n0, int *pwaa_s);
+
+/* initialize for Smith-Waterman optimal score */
+
+void
+init_work (unsigned char *aa0, int n0,
+	   struct pstruct *ppst,
+	   struct f_struct **f_arg)
+{
+  int ip;
+  int *pwaa_s, *pwaa_a;
+  int e, f, i, j;
+  struct f_struct *f_str;
+  int **pam2p;
+  struct swstr *ss;
+  int nsq;
+
+#if defined(SW_ALTIVEC) || defined(SW_SSE2)
+  int l, data, bias;
+  unsigned char *  pc;
+  unsigned short * ps;
+  int  overflow;
+
+  int n_count;
+  int col_len;
+#endif
+
+  if (ppst->ext_sq_set) {
+    nsq = ppst->nsqx; ip = 1;
+  }
+  else {
+    /* set for lower-case for memory mapped DBs with lower case encoding */
+    nsq = ppst->nsqx; ip = 0;
+  }
+
+   /* allocate space for function globals */
+   f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+
+   if((ppst->zsflag%10) == 6) {
+     f_str->kar_p = NULL;
+     init_karlin(aa0, n0, ppst, &f_str->aa0_f[0], &f_str->kar_p);
+   }
+  
+   /* allocate space for the scoring arrays */
+   if ((ss = (struct swstr *) calloc (n0+2, sizeof (struct swstr)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate ss array %3d\n", n0);
+     exit (1);
+   }
+   ss++;
+
+   ss[n0].H = -1;	/* this is used as a sentinel - normally H >= 0 */
+   ss[n0].E = 1;
+   f_str->ss = ss;
+
+   /* initialize variable (-S) pam matrix */
+   if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+     fprintf(stderr,"cannot allocate waa_s array %3d\n",nsq*n0);
+     exit(1);
+   }
+
+   /* initialize pam2p[1] pointers */
+   if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
+     fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
+     exit(1);
+   }
+
+   pam2p = f_str->pam2p[1];
+   if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+     fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
+     exit(1);
+   }
+
+   for (i=1; i<n0; i++) {
+     pam2p[i]= pam2p[0] + (i*(nsq+1));
+   }
+
+   /* initialize universal (alignment) matrix */
+   if ((f_str->waa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+     fprintf(stderr,"cannot allocate waa_a struct %3d\n",nsq*n0);
+     exit(1);
+   }
+   
+   /* initialize pam2p[0] pointers */
+   if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
+     fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
+     exit(1);
+   }
+
+   pam2p = f_str->pam2p[0];
+   if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+     fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
+     exit(1);
+   }
+
+   for (i=1; i<n0; i++) {
+     pam2p[i]= pam2p[0] + (i*(nsq+1));
+   }
+
+   /* 
+      pwaa effectively has a sequence profile --
+       pwaa[0..n0-1] has pam score for residue 0 (-BIGNUM)
+       pwaa[n0..2n0-1] has pam scores for amino acid 1 (A)
+       pwaa[2n0..3n0-1] has pam scores for amino acid 2 (R), ...
+
+       thus: pwaa = f_str->waa_s + (*aa1p++)*n0; sets up pwaa so that
+       *pwaa++ rapidly moves though the scores of the aa1p[] position
+       without further indexing
+
+       For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have
+       a different score in each position.
+   */
+
+   pwaa_s = f_str->waa_s;
+   pwaa_a = f_str->waa_a;
+   if (ppst->pam_pssm) {
+     for (e = 0; e <=nsq; e++)	{	/* for each residue in the alphabet */
+       for (f = 0; f < n0; f++) {	/* for each position in aa0 */
+	 *pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e];
+	 *pwaa_a++ = f_str->pam2p[0][f][e]  = ppst->pam2p[0][f][e];
+       }
+     }
+   }
+   else {	/* initialize scanning matrix */
+     for (e = 0; e <=nsq; e++)	/* for each residue in the alphabet */
+       for (f = 0; f < n0; f++)	{	/* for each position in aa0 */
+	 *pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][aa0[f]][e];
+	 *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][aa0[f]][e];
+       }
+   }
+
+#if defined(SW_ALTIVEC)
+
+   /* First we allocate memory for the workspace - i.e. the single row
+    * of storage for H/F. Since this might be run on Linux or AIX too,
+    * we don't assume anything about the memory allocation but align
+    * it ourselves.  We need two vectors (16 bytes each) per element,
+    * and some padding space to make it cache-line aligned.
+
+    * MAXTST+MAXLIB is longest allowed database sequence length...
+    * this should be m_msg.max_tot, but m_msg is not available, but
+    * ppst->maxlen has maxn, which is appropriate.
+    */
+
+     f_str->workspace_memory  = (void *)malloc(2*16*(ppst->maxlen+SEQ_PAD)+256);
+     f_str->workspace  = (void *) ((((size_t) f_str->workspace_memory) + 255) & (~0xff));
+   /* We always use a scoring profile in altivec, but the layout is a bit strange 
+    * in order to optimize memory access order and thus cache efficiency.
+    * Normally we first try 8-bit scoring in altivec, and if this leads to overflow
+    * we recompute the score with 16-bit accuracy. Because of this we need to construct
+    * two score profiles.
+    * Since altivec always loads 16 bytes from aligned memory, corresponding to 8 or 16 
+    * elements (for 16 and 8 bit scoring, respectively), we organize the scoring 
+    * profile like this for 8-bit accuracy:
+    *
+    * 1. The profile starts on 256-byte aligned memory (cache line on G5 is 128 bytes).
+    * 2. First we have the score for the full alphabet for the first 16 residues of
+    *    the query, i.e. positions 0-15 are the scores for the first 16 query letters
+    *    vs. the first in the alphabet, positions 16-31 the scores for the same 16
+    *    query positions against alphabet letter two, etc.
+    * 3. After alphabet_size*16bytes we start with the scores for residues 16-31 in
+    *    the query, organized in the same way.
+    * 4. At the end of the query sequence, we pad the scoring to the next 16-tuple
+    *    with neutral scores.
+    * 5. The total size of the profile is thus alphabet_size*N, where N is the 
+    *    size of the query rounded up to the next 16-tuple.
+    *
+    * The word (16-bit) profile is identical, but scores are stored as 8-tuples.
+    */
+
+   f_str->word_score_memory = (void *)malloc(10*2*(nsq+2)*(n0+1+16)+256);
+   f_str->byte_score_memory = (void *)malloc(10*(nsq+2)*(n0+1+16)+256);
+
+   f_str->word_score = (unsigned short *) ((((size_t) f_str->word_score_memory) + 255) & (~0xff));
+   f_str->byte_score = (unsigned char *) ((((size_t) f_str->byte_score_memory) + 255) & (~0xff));
+
+   overflow = 0;
+
+   if (ppst->pam_pssm) {
+     /* Use a position-specific scoring profile. 
+      * This is essentially what we are going to construct anyway, but we'll
+      * reorder it to suit altivec.
+      */       
+     bias = 127;
+     for(i = 1; i < nsq ; i++) {
+	 for(j = 0; j < n0 ; j++) {
+	     data = ppst->pam2p[ip][j][i];
+	     if(data<bias) bias = data;
+           }
+       }
+
+     /* Fill our specially organized byte- and word-size scoring arrays. */
+     ps = f_str->word_score;
+     for(f = 0; f<n0 ; f+=8) {
+       /* e=0 */
+       for(i=0 ; i<8 ; i++) {
+	 *ps++ = (unsigned short) 0;
+       }
+       /* for each chunk of 8 residues in our query */
+       for(e = 1; e<=nsq; e++) {
+	 for(i=0 ; i<8 ; i++) {
+	   l = f + i;
+	   if(l<n0) {
+	     data = ppst->pam2p[ip][l][e] - bias;
+	   }
+	   else {
+	     data = 0;
+	   }
+	   *ps++ = (unsigned short)data;
+	 }
+       }
+     }
+     pc = f_str->byte_score;
+     for(f = 0; f<n0 ; f+=16) {
+       /* e=0 */
+       for(i=0 ; i<16 ; i++) {
+	 *pc++ = (unsigned char)0;
+       }       
+           
+       for(e = 1; e<=nsq; e++) {
+	 for(i=0 ; i<16 ; i++) {
+	   l = f + i;
+	   if(l<n0) {
+	     data = ppst->pam2p[ip][l][e] - bias;
+	   }
+	   else {
+	     data = 0;
+	   }
+	   if(data>255) {
+	     /*
+	     printf("Fatal error. data: %d bias: %d, position: %d/%d, Score out of range for 8-bit Altivec/VMX datatype.\n",data,bias,l,e);
+	     exit(1);
+	     */
+	     overflow = 1;
+	   }
+	   *pc++ = (unsigned char)data;
+	 }
+       }
+     }
+   }
+   else {
+     /* Classical simple substitution matrix */
+     /* Find the bias to use in the substitution matrix */
+     bias = 127;
+     for(i = 1; i < nsq ; i++) {
+       for(j = 1; j < nsq ; j++) {
+	 data = ppst->pam2[ip][i][j];
+#ifdef DEBUG
+	 if (data < -1000) {
+	   fprintf(stderr, "*** low data: %d [%d][%d][%d]\n",data,ip,i,j);
+	 }
+#endif
+	 if(data<bias) bias = data;
+       }
+     }
+     /* Fill our specially organized byte- and word-size scoring arrays. */
+     ps = f_str->word_score;
+     for(f = 0; f<n0 ; f+=8) {
+       /* e=0 */
+       for(i=0 ; i<8 ; i++) {
+	 *ps++ = (unsigned short) 0;
+       }       
+       /* for each chunk of 8 residues in our query */
+       for(e = 1; e<=nsq; e++) {
+	 for(i=0 ; i<8 ; i++) {
+	   l = f + i;
+	   if(l<n0) {
+	     data = ppst->pam2[ip][aa0[l]][e] - bias;
+	   }
+	   else {
+	     data = 0;
+	   }
+	   *ps++ = (unsigned short)data;
+	 }
+       }
+     }
+     pc = f_str->byte_score;
+     for(f = 0; f<n0 ; f+=16) {
+       /* e=0 */
+       for(i=0 ; i<16 ; i++) {
+	 *pc++ = (unsigned char)0;
+       }
+           
+       for(e = 1; e<=nsq; e++) {
+	 for(i=0 ; i<16 ; i++) {
+	   l = f + i;
+	   if (l<n0) {
+	     data = ppst->pam2[ip][aa0[l]][e] - bias;
+	   }
+	   else {
+	     data = 0;
+	   }
+	   if(data>255) {
+	     /*
+	     printf("Fatal error. Score out of range for 8-bit Altivec/VMX datatype.\n");
+	     exit(1);
+	     */
+	     overflow = 1;
+	   }
+	   *pc++ = (unsigned char)data;
+	 }
+       }
+     }
+   }
+       
+   f_str->bias = (unsigned char) (-bias);
+   f_str->alphabet_size = nsq+1;
+
+   /* Some variable to keep track of how many 8-bit runs we need to rerun
+    * in 16-bit accuracy. If there are too many reruns it can be faster
+    * to use 16-bit alignments directly. 
+    */
+   
+   /* We can only do 8-bit alignments if the scores were small enough. */
+   if(overflow==0) f_str->try_8bit   = 1;
+   else f_str->try_8bit   = 0;
+
+   f_str->done_8bit  = 0;
+   f_str->done_16bit = 0;
+       
+#endif /* SW_ALTIVEC */
+
+#if defined(SW_SSE2)
+   /* First we allocate memory for the workspace - i.e. two rows for H and
+    * one row for F.  We also need enough space to hold a temporary
+    * scoring profile which will be query_length * 16 (sse2 word length).
+    * Since this might be run on Linux or AIX too, we don't assume 
+    * anything about the memory allocation but align it ourselves.
+    */
+    f_str->workspace_memory  = (void *)malloc(3*16*(MAXTST+MAXLIB+32)+256);
+    f_str->workspace  = (void *) ((((size_t) f_str->workspace_memory) + 255) & (~0xff));
+
+   /* We always use a scoring profile for the SSE2 implementation, but the layout
+    * is a bit strange.  The scoring profile is parallel to the query, but is
+    * accessed in a stripped pattern.  The query is divided into equal length
+    * segments.  The number of segments is equal to the number of elements
+    * processed in the SSE2 register.  For 8-bit calculations, the query will
+    * be divided into 16 equal length parts.  If the query is not long enough
+    * to fill the last segment, it will be filled with neutral weights.  The
+    * first element in the SSE register will hold a value from the first segment,
+    * the second element of the SSE register will hold a value from the
+    * second segment and so on.  So if the query length is 288, then each
+    * segment will have a length of 18.  So the first 16 bytes will  have
+    * the following weights: Q1, Q19, Q37, ... Q271; the next 16 bytes will
+    * have the following weights: Q2, Q20, Q38, ... Q272; and so on until
+    * all parts of all segments have been written.  The last seqment will
+    * have the following weights: Q18, Q36, Q54, ... Q288.  This will be
+    * done for the entire alphabet.
+    */
+
+    f_str->word_score_memory = (void *)malloc((n0 + 32) * sizeof (short) * (nsq + 1) + 256);
+    f_str->byte_score_memory = (void *)malloc((n0 + 32) * sizeof (char) * (nsq + 1) + 256);
+
+    f_str->word_score = (unsigned short *) ((((size_t) f_str->word_score_memory) + 255) & (~0xff));
+    f_str->byte_score = (unsigned char *) ((((size_t) f_str->byte_score_memory) + 255) & (~0xff));
+
+    overflow = 0;
+
+    if (ppst->pam_pssm) {
+        /* Use a position-specific scoring profile. 
+        * This is essentially what we are going to construct anyway, but we'll
+        * reorder it to suit sse2.
+        */       
+        bias = 127;
+        for (i = 1; i < nsq ; i++) {
+            for (j = 0; j < n0 ; j++) {
+                data = ppst->pam2p[ip][j][i];
+                if (data < bias) {
+                    bias = data;
+                }
+            }
+        }
+
+        /* Fill our specially organized byte- and word-size scoring arrays. */
+        ps = f_str->word_score;
+        col_len = (n0 + 7) / 8;
+        n_count = (n0 + 7) & 0xfffffff8;
+        for (f = 0; f < n_count; ++f) {
+            *ps++ = 0;
+        }
+        for (f = 1; f < nsq ; f++) {
+            for (e = 0; e < col_len; e++) {
+                for (i = e; i < n_count; i += col_len) {
+		  if ( i < n0) { data = ppst->pam2p[ip][i][f];}
+		  else {data = 0;}
+		  *ps++ = (unsigned short)data;
+                }
+            }
+        }
+        pc = f_str->byte_score;
+        col_len = (n0 + 15) / 16;
+        n_count = (n0 + 15) & 0xfffffff0;
+        for (f = 0; f < n_count; ++f) {
+            *pc++ = 0;
+        }
+        for (f = 1; f < nsq ; f++) {
+            for (e = 0; e < col_len; e++) {
+                for (i = e; i < n_count; i += col_len) {
+		  if ( i < n0 ) { data = ppst->pam2p[ip][i][f] - bias;}
+		  else {data = 0 - bias;}
+		  if (data > 255) {
+		    printf("Fatal error. data: %d bias: %d, position: %d/%d, "
+			   "Score out of range for 8-bit SSE2 datatype.\n",
+			   data, bias, f, e);
+		    exit(1);
+		  }
+		  *pc++ = (unsigned char)data;
+		}
+	    }
+        }
+    }
+    else 
+    {
+        /* Classical simple substitution matrix */
+        /* Find the bias to use in the substitution matrix */
+        bias = 127;
+        for (i = 1; i < nsq ; i++) {
+            for (j = 1; j < nsq ; j++) {
+                data = ppst->pam2[ip][i][j];
+		if (data < -128) {
+		  fprintf(stderr,"*** ERROR *** data out of range: %d[%d][%d,%d]\n",
+			  data, ip, i, j);
+		}
+                if (data < bias) {
+                    bias = data;
+                }
+            }
+        }
+
+        /* Fill our specially organized byte- and word-size scoring arrays. */
+        ps = f_str->word_score;
+        col_len = (n0 + 7) / 8;
+        n_count = (n0 + 7) & 0xfffffff8;
+        for (f = 0; f < n_count; ++f) {
+            *ps++ = 0;
+        }
+        for (f = 1; f < nsq ; f++) {
+            for (e = 0; e < col_len; e++) {
+                for (i = e; i < n_count; i += col_len) {
+                    if (i >= n0) {
+                        data = 0;
+                    } else {
+                        data = ppst->pam2[ip][aa0[i]][f];
+                    }
+                    *ps++ = (unsigned short)data;
+                }
+            }
+        }
+
+        pc = f_str->byte_score;
+        col_len = (n0 + 15) / 16;
+        n_count = (n0 + 15) & 0xfffffff0;
+        for (f = 0; f < n_count; ++f) {
+            *pc++ = 0;
+        }
+        for (f = 1; f < nsq ; f++) {
+            for (e = 0; e < col_len; e++) {
+                for (i = e; i < n_count; i += col_len) {
+                    if (i >= n0) {
+                        data = -bias;
+                    } else {
+                        data = ppst->pam2[ip][aa0[i]][f] - bias;
+                    }
+                    if (data > 255) {
+                        printf("Fatal error. data: %d bias: %d, position: %d/%d, "
+                               "Score out of range for 8-bit SSE2 datatype.\n",
+                               data, bias, f, e);
+                        exit(1);
+                    }
+                    *pc++ = (unsigned char)data;
+                }
+            }
+        }
+    }
+       
+    f_str->bias = (unsigned char) (-bias);
+    f_str->alphabet_size = nsq+1;
+
+    /* Some variable to keep track of how many 8-bit runs we need to rerun
+     * in 16-bit accuracy. If there are too many reruns it can be faster
+     * to use 16-bit alignments directly. 
+     */
+   
+    /* We can only do 8-bit alignments if the scores were small enough. */
+    f_str->try_8bit = (overflow == 0) ? 1 : 0;
+
+    f_str->done_8bit  = 0;
+    f_str->done_16bit = 0;
+#endif /* SW_SSE2 */
+
+    /* minimum allocation for alignment */
+    f_str->max_res =  max(3*n0/2,MIN_RES);
+
+    *f_arg = f_str;
+}
+
+void close_work (const unsigned char *aa0, int n0,
+		 struct pstruct *ppst,
+		 struct f_struct **f_arg)
+{
+  struct f_struct *f_str;
+
+  f_str = *f_arg;
+
+  if (f_str != NULL) {
+    if (f_str->kar_p !=NULL) free(f_str->kar_p);
+    f_str->ss--;
+    free(f_str->ss);
+    free(f_str->waa_a);
+    free(f_str->pam2p[0][0]);
+    free(f_str->pam2p[0]);
+    free(f_str->waa_s);
+    free(f_str->pam2p[1][0]);
+    free(f_str->pam2p[1]);
+
+#if defined(SW_ALTIVEC) || defined(SW_SSE2)
+    free(f_str->workspace_memory);
+    free(f_str->word_score_memory);
+    free(f_str->byte_score_memory);
+#endif
+    free(f_str);
+    *f_arg = NULL;
+  }
+}
+
+
+/* pstring1 is a message to the manager, currently 512 */
+/*void get_param(struct pstruct *ppst,char *pstring1)*/
+void    get_param (const struct pstruct *ppst,
+		   char **pstring1, char *pstring2,
+		   struct score_count_s *s_cnt_info)
+{
+  char pg_str[120];
+  char psi_str[120];
+
+#if defined(SW_ALTIVEC)
+  strncpy(pg_str,"Smith-Waterman (Altivec/VMX, Erik Lindahl 2004)",sizeof(pg_str));
+#endif
+#if defined(SW_SSE2)
+  strncpy(pg_str,"Smith-Waterman (SSE2, Michael Farrar 2006)",sizeof(pg_str));
+#endif
+#if !defined(SW_ALTIVEC) && !defined(SW_SSE2)
+  strncpy(pg_str,"Smith-Waterman (PGopt)",sizeof(pg_str));
+#endif
+
+  if (ppst->pam_pssm) { strncpy(psi_str,"-PSI",sizeof(psi_str));}
+  else { psi_str[0]='\0';}
+
+  sprintf (pstring1[0], "%s (%s)", pg_str, verstr);
+  sprintf (pstring1[1], 
+#ifdef OLD_FASTA_GAP
+	   "%s matrix%s (%d:%d)%s, gap-penalty: %d/%d",
+#else
+	   "%s matrix%s (%d:%d)%s, open/ext: %d/%d",
+#endif
+	   ppst->pam_name, psi_str, ppst->pam_h,ppst->pam_l, 
+	   (ppst->ext_sq_set)?"xS":"\0", ppst->gdelval, ppst->ggapval);
+
+   if (pstring2 != NULL) {
+#ifdef OLD_FASTA_GAP
+     sprintf(pstring2,"; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s%s (%d:%d)%s\n; pg_gap-pen: %d %d\n",
+#else
+     sprintf(pstring2,"; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s%s (%d:%d)%s\n; pg_open-ext: %d %d\n",
+#endif
+	     pg_str,verstr,ppst->pam_name,psi_str,ppst->pam_h,ppst->pam_l, 
+	     (ppst->ext_sq_set)?"xS":"\0",ppst->gdelval,ppst->ggapval);
+   }
+}
+
+void do_work (const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      int frame,
+	      const struct pstruct *ppst, struct f_struct *f_str,
+	      int qr_flg, int shuff_flg, struct rstruct *rst,
+	      struct score_count_s *sc_info)
+{
+  int     score;
+  double lambda, H;
+  int i;
+  
+#ifdef LALIGN
+  if (same_seq(aa0,n0,aa1,n1)) {
+    rst->score[0] = prof_score(aa1, n0, f_str->waa_s);
+    return;
+  }
+#endif
+
+  rst->alg_info = 0;
+  rst->valid_stat = 1;
+  sc_info->s_cnt[0]++;
+  sc_info->tot_scores++;
+
+#ifdef SW_ALTIVEC
+  if(f_str->try_8bit)
+  {
+      score = smith_waterman_altivec_byte(aa0,
+                                          f_str->byte_score,
+                                          n0,
+                                          aa1,
+                                          n1,
+                                          f_str->bias,
+#ifndef OLD_FASTA_GAP
+                                          -(ppst->gdelval + ppst->ggapval),
+#else
+                                          -ppst->gdelval,
+#endif
+                                          -ppst->ggapval,
+                                          f_str);
+      
+      f_str->done_8bit++;
+      
+      if(score>=255)
+      {
+          /* Overflow, so we have to redo it in 16 bits. */
+          score = smith_waterman_altivec_word(aa0,
+                                              f_str->word_score,
+                                              n0,
+                                              aa1,
+                                              n1,
+                                              f_str->bias,
+#ifndef OLD_FASTA_GAP
+                                              -(ppst->gdelval + ppst->ggapval),
+#else
+                                              -ppst->gdelval,
+#endif
+                                              -ppst->ggapval,
+                                              f_str);
+          
+          /* The 8 bit version is roughly 50% faster than the 16 bit version,
+           * so we are fine if less than about 1/3 of the runs have to
+           * be rerun with 16 bits. If it is more, and we have tried at least
+           * 500 sequences, we switch off the 8-bit mode.
+           */
+          f_str->done_16bit++;
+          if(f_str->done_8bit>500 && (3*f_str->done_16bit)>(f_str->done_8bit))
+              f_str->try_8bit = 0;
+      }
+  }
+  else
+  { 
+      /* Just use the 16-bit altivec version directly */
+      score = smith_waterman_altivec_word(aa0,
+                                          f_str->word_score,
+                                          n0,
+                                          aa1,
+                                          n1,
+                                          f_str->bias,
+#ifndef OLD_FASTA_GAP
+                                          -(ppst->gdelval + ppst->ggapval),
+#else
+                                          -ppst->gdelval,
+#endif
+                                          -ppst->ggapval,
+                                          f_str);
+  }      
+
+#endif /* not Altivec */
+
+#if defined(SW_SSE2)
+
+  if(f_str->try_8bit)
+  {
+      score = smith_waterman_sse2_byte(aa0,
+                                       f_str->byte_score,
+                                       n0,
+                                       aa1,
+                                       n1,
+                                       f_str->bias,
+#ifndef OLD_FASTA_GAP
+                                       -(ppst->gdelval + ppst->ggapval),
+#else
+                                       -ppst->gdelval,
+#endif
+                                       -ppst->ggapval,
+                                       f_str);
+      
+      f_str->done_8bit++;
+      
+      if(score>=255)
+      {
+          /* Overflow, so we have to redo it in 16 bits. */
+          score = smith_waterman_sse2_word(aa0,
+                                           f_str->word_score,
+                                           n0,
+                                           aa1,
+                                           n1,
+#ifndef OLD_FASTA_GAP
+                                           -(ppst->gdelval + ppst->ggapval),
+#else
+                                           -ppst->gdelval,
+#endif
+                                           -ppst->ggapval,
+                                           f_str);
+          
+          /* The 8 bit version is roughly 50% faster than the 16 bit version,
+           * so we are fine if less than about 1/3 of the runs have to
+           * be rerun with 16 bits. If it is more, and we have tried at least
+           * 500 sequences, we switch off the 8-bit mode.
+           */
+          f_str->done_16bit++;
+          if(f_str->done_8bit>500 && (3*f_str->done_16bit)>(f_str->done_8bit))
+              f_str->try_8bit = 0;
+      }
+  }
+  else
+  { 
+      /* Just use the 16-bit altivec version directly */
+      score = smith_waterman_sse2_word(aa0,
+                                       f_str->word_score,
+                                       n0,
+                                       aa1,
+                                       n1,
+#ifndef OLD_FASTA_GAP
+                                       -(ppst->gdelval + ppst->ggapval),
+#else
+                                       -ppst->gdelval,
+#endif
+                                       -ppst->ggapval,
+                                       f_str);
+  }      
+#endif
+
+#if !defined(SW_ALTIVEC) && !defined(SW_SSE2)
+
+  score = FLOCAL_ALIGN(aa0,aa1,n0,n1,0,0,
+                       NULL,
+#ifdef OLD_FASTA_GAP
+                       -(ppst->gdelval - ppst->ggapval),
+#else
+                       -ppst->gdelval,
+#endif
+                       -ppst->ggapval,0,f_str);
+#endif
+
+  rst->score[0] = score;
+  rst->score[1] = rst->score[2] = 0;
+
+  if(((ppst->zsflag % 10) == 6) &&
+     (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f, 
+		f_str->kar_p, &lambda, &H)>0)) {
+    rst->comp = 1.0/lambda;
+    rst->H = H;
+  }
+  else {rst->comp = rst->H = -1.0;}
+
+}
+
+static int
+FLOCAL_ALIGN(const unsigned char *aa0, const unsigned char *aa1,
+	     int n0, int n1, int low, int up,
+	     int **W, int GG,int HH, int MW,
+	     struct f_struct *f_str) {
+
+  register int *pwaa;
+  register struct swstr *ssj;
+  struct swstr *ss;
+  register int h, e, f, p;
+  int temp, score;
+  int gap_ext, n_gap_init;
+
+  const unsigned char *aa1p;
+  ss = f_str->ss;
+  ss[n0].H = -1;
+  ss[n0].E = 1;
+
+  n_gap_init = GG + HH;
+  gap_ext = -HH;	/* GG, HH are both positive,
+			   gap_ext penalty should be negative */
+
+  score = 0;
+  for (h=0; h<n0; h++) {	  /* initialize 0th row */
+    ss[h].H = ss[h].E = 0;
+  }
+  
+  aa1p=aa1;
+  while (*aa1p) {		/* relies on aa1[n1]==0 for EOS flag */
+    /* waa_s has the offsets for each residue in aa0 into pam2 */
+    /* waa_s has complexity (-S) dependent scores */
+    pwaa = f_str->waa_s + (*aa1p++)*n0;
+    ssj = ss;
+
+    e = f = h = p = 0;
+  zero_f:	/* in this section left-gap f==0, and is never examined */
+
+    while (1) {	/* build until h > n_gap_init (f < 0 until h > n_gap_init) */
+      		/* bump through the pam[][]'s for each of the aa1[] matches to
+	  	   aa0[], because of the way *pwaa is set up */
+
+      h = p + *pwaa++;		/* increment diag value */
+      p = ssj->H;		/* get next diag value */
+      if ((e = ssj->E) > 0 ) {	/* >0 from up-gap */
+	if (p == -1) goto next_row;	/* done, -1=ss[n0].H sentinel */
+	if (h < e) h = e;	/* up-gap better than diag */
+	else 
+	  if (h > n_gap_init) {	/* we won't starting a new up-gap */
+	    e += gap_ext;	/* but we might be extending one */
+	    goto transition;	/* good h > n_gap_diag; scan f */
+	  }
+	e += gap_ext;		/* up-gap decreased */
+	ssj->E =  (e > 0) ?  e : 0;	/* set to 0 if < 0 */
+	ssj++->H = h;		/* diag match updated */
+      }
+      else {			/* up-gap (->E) is 0 */
+	if ( h > 0) {		/* diag > 0 */
+	  if (h > n_gap_init) {	/* we won't be starting a new up-gap */
+	    e = 0;		/* and we won't be extending one */
+	    goto transition;	/* good h > n_gap_diag; scan f */
+	  }
+	  ssj++->H = h;		/* update diag */
+	}
+	else ssj++->H = 0;	/* update diag to 0 */
+      }
+    }
+
+    /* here h > n_gap_init and h > e, => the next f will be > 0 */
+  transition:
+#ifdef DEBUG
+    if ( h > 10000) 
+      fprintf(stderr,"h: %d ssj: %d\n",h, (int)(ssj-ss));
+#endif
+    if ( score < h ) score = h;	/* save best score, only when h > n_gap_init */
+
+    temp = h - n_gap_init;	/* best score for starting a new gap */
+    if ( f < temp ) f = temp;	/* start a left-gap? */
+    if ( e < temp ) e = temp;	/* start an up-gap? */
+    ssj->E = ( e > 0 ) ? e : 0;	/* update up-gap */
+    ssj++->H = h;		/* update diag */
+    e = 0;
+
+    do {			/* stay here until f <= 0 */
+      h = p + *pwaa++;		/* diag + match/mismatch */
+      p = ssj->H;		/* save next (right) diag */
+
+      if ( h < f ) h = f;	/* update diag using left gap */
+      f += gap_ext;		/* update next left-gap */
+
+      if ((e = ssj->E) > 0) {	/* good up gap */
+	if (p == -1) goto next_row;	/* at the end of the row */
+	if ( h < e ) h = e;	/* update diag using up-gap */
+	else
+	  if ( h > n_gap_init ) {
+	    e += gap_ext;	/* update up gap */
+	    goto transition;	/* good diag > n_gap_init, restart */
+	  }
+	e += gap_ext;		/* update up-gap */
+	ssj->E = (e > 0) ? e : 0;	/* e must be >= 0 */
+	ssj++->H = h;		/* update diag */
+      }
+      else {			/* up-gap <= 0 */
+	if ( h > n_gap_init ) {
+	  e = 0;
+	  goto transition;	/* good diag > n_gap_init; restart */
+	}
+	ssj++->H = h;		/* update diag */
+      }
+    } while ( f > 0 );		/* while left gap f > 0  */
+    goto zero_f;		/* otherwise, go to f==0 section */
+  next_row:
+    ;
+  }		/* end while(*aap1) {} */
+
+  return score;
+
+}		/* here we should be all done */
+
+void do_opt (const unsigned char *aa0, int n0,
+	     const unsigned char *aa1, int n1,
+	     int frame,
+	     struct pstruct *ppst, struct f_struct *f_str,
+	     struct rstruct *rst)
+{
+}
+
+struct a_res_str *
+do_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, int repeat_thresh,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   int *have_ares)
+{
+  int a_res_index;
+  struct a_res_str *a_res, *tmp_a_res;
+
+  *have_ares = 0x3;	/* set 0x2 bit to indicate local copy */
+
+  if ((a_res = (struct a_res_str *)calloc(1, sizeof(struct a_res_str)))==NULL) {
+    fprintf(stderr," [do_walign] Cannot allocate a_res");
+    return NULL;
+  }
+
+#ifndef LALIGN
+  a_res = nsw_malign(f_str->pam2p, (ppst->ext_sq_set ? 1 : 0), n0, aa1, n1,
+		     repeat_thresh, f_str->max_res,
+		     -ppst->gdelval, -ppst->ggapval,
+		     f_str->ss, a_res,
+		     &sw_walign, ppst->do_rep);
+
+#else	/* LALIGN */
+  if (!ppst->show_ident && same_seq(aa0, n0, aa1, n1)) ppst->nseq = 1;
+  else ppst->nseq = 2;
+
+  SIM(aa0-1, aa1-1, n0, n1, ppst, ppst->nseq, repeat_thresh, ppst->max_repeat, a_res);
+#endif
+
+  /* set a_res->index for alignments */
+
+  a_res_index = 0;
+  for (tmp_a_res=a_res; tmp_a_res; tmp_a_res = tmp_a_res->next) {
+    tmp_a_res->index = a_res_index++;
+  }
+
+  return a_res;
+}
+
+/*
+#define XTERNAL
+#include "upam.h"
+
+void
+print_seq_prof(unsigned char *A, int M,
+	       unsigned char *B, int N,
+	       int **w, int iw, int dir) {
+  char c_max;
+  int i_max, j_max, i,j;
+
+  char *c_dir="LRlr";
+
+  for (i=1; i<=min(60,M); i++) {
+    fprintf(stderr,"%c",aa[A[i]]);
+  }
+  fprintf(stderr, - %d\n,M);
+
+  for (i=0; i<min(60,M); i++) {
+    i_max = -1;
+    for (j=1; j<21; j++) {
+      if (w[iw+i][j]> i_max) {
+	i_max = w[iw+i][j]; 
+	j_max = j;
+      }
+    }
+    fprintf(stderr,"%c",aa[j_max]);
+  }
+  fputc(':',stderr);
+
+  for (i=1; i<=min(60,N); i++) {
+    fprintf(stderr,"%c",aa[B[i]]);
+  }
+
+  fprintf(stderr," -%c: %d,%d\n",c_dir[dir],M,N);
+}
+*/
+
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+
+#ifdef TFAST
+  f_str->n10 = aatran(aa1,f_str->aa1x,n1,frame);
+#endif
+
+}
+
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void 
+aln_func_vals(int frame, struct a_struct *aln) {
+
+  aln->llfact = aln->llmult = aln->qlfact = 1;
+  aln->llrev = 0;
+  if (frame > 0) aln->qlrev = 1;
+  else aln->qlrev = 0;
+  aln->frame = 0;
+}
+
+/* calculate the 100% identical score */
+int
+prof_score(const unsigned char *aa1p, int n0, int *pwaa_s)
+{
+  int sum=0;
+
+  while (*aa1p) {
+    sum += pwaa_s[(*aa1p++)*n0];
+    pwaa_s++;
+  }
+  return sum;
+}
+
+int same_seq(const unsigned char *aa0, int n0,
+	     const unsigned char *aa1, int n1)
+{
+  const unsigned char *ap0, *ap1;
+  int cnt=0;
+
+  if (n0 != n1) return 0;
+
+  ap0 = aa0;
+  ap1 = aa1;
+  
+  while ( *ap0 && *ap0++ == *ap1++ ) {cnt++;}
+  if (cnt != n0) return 0;
+  return 1;
+}
diff --git a/src/dropgsw2.h b/src/dropgsw2.h
new file mode 100644
index 0000000..5878321
--- /dev/null
+++ b/src/dropgsw2.h
@@ -0,0 +1,46 @@
+/* $Id: dropgsw2.h 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+/* global definitions shared by dropgsw.c and altivec.c */
+
+/* definitions for SW */
+
+struct f_struct {
+  struct swstr *ss;
+  int *waa_s, *waa_a;
+  int **pam2p[2];
+  int max_res;
+  double aa0_f[MAXSQ];
+  double *kar_p;
+  double e_cut;
+  int show_ident;
+  int max_repeat;
+#if defined(SW_ALTIVEC) || defined(SW_SSE2)
+  unsigned char      bias;
+  unsigned char      ceiling;
+  unsigned short *   word_score;
+  unsigned char *    byte_score;
+  void *             workspace;
+  int                alphabet_size;
+  void *             word_score_memory;
+  void *             byte_score_memory;
+  void *             workspace_memory;
+  int                try_8bit;
+  int                done_8bit;
+  int                done_16bit;
+#endif
+};
+
+#ifdef LALIGN
+void SIM(const unsigned char *A, /* seq1 indexed A[1..M] */
+	 const unsigned char *B, /* seq2 indexed B[1..N] */
+	 int M, int N,		 /* len seq1, seq2 */
+	 struct pstruct *ppst,	/* parameters */
+	 int nseq,		 /* nseq - number of different sequences */
+	 int mini_score,	 /* cut-off score */
+	 int max_count,		 /* number of alignments */
+	 struct a_res_str *a_res);	/* alignment result structure */
+
+int same_seq(const unsigned char *aa0, int n0,
+	     const unsigned char *aa1, int n1);
+#endif
diff --git a/src/dropnfa.c b/src/dropnfa.c
new file mode 100644
index 0000000..f19889b
--- /dev/null
+++ b/src/dropnfa.c
@@ -0,0 +1,2250 @@
+/* $Id: dropnfa.c $ */
+
+/* copyright (c) 1998, 1999, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* 17-Sept-2008 - modified for multiple non-overlapping alignments */
+
+/* 18-Sep-2006 - removed global variables for alignment from nw_align
+   and bg_align */
+
+/* 18-Oct-2005 - converted to use a_res and aln for alignment coordinates */
+
+/* 14-May-2003 - modified to return alignment start at 0, rather than
+   1, for begin:end alignments
+*/
+
+/*
+  implements the fasta algorithm, see:
+
+  W. R. Pearson, D. J. Lipman (1988) "Improved tools for biological
+  sequence comparison" Proc. Natl. Acad. Sci. USA 85:2444-2448
+
+  This version uses Smith-Waterman for final protein alignments
+
+  W. R. Pearson (1996) "Effective protein sequence comparison"
+  Methods Enzymol. 266:227-258
+
+
+  26-April-2001 - -DGAP_OPEN redefines -f, as gap open penalty
+
+  4-Nov-2001 - modify spam() while(1).
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+
+#include "defs.h"
+#include "param.h"
+
+/* this must be consistent with upam.h */
+#define MAXHASH 32
+#define NMAP MAXHASH+1
+
+/* globals for fasta */
+#define MAXWINDOW 64
+
+#ifndef MAXSAV
+#define MAXSAV 10
+#endif
+
+static char *verstr="3.8 Nov 2011";
+
+extern void w_abort(char *, char *);
+int shscore(const unsigned char *aa0, int n0, int **pam2);
+extern void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst,
+			double *aa0_f, double **kp);
+extern void init_karlin_a(struct pstruct *, double *, double **);
+extern int do_karlin(const unsigned char *, int n1, int **,
+		     const struct pstruct *, double *, double *,
+		     double *, double *);
+extern int ELK_to_s(double E_join, int n0, int n1, double Lambda, double K, double H);
+
+extern void aancpy(char *to, char *from, int count, struct pstruct *ppst);
+char *ckalloc(size_t);
+unsigned long adler32(unsigned long, const unsigned char *, unsigned int);
+
+#ifdef TFASTA
+extern int aatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
+#endif
+
+#include "dropnfa.h"
+
+#define DROP_INTERN
+#include "drop_func.h"
+
+struct swstr { int H, E;};
+
+int
+dmatch (const unsigned char *aa0, int n0,
+	const unsigned char *aa1, int n1,
+	int hoff, int window, 
+	int **pam2, int gdelval, int ggapval,
+	struct f_struct *f_str);
+
+
+extern int sw_walign (int **pam2p, int n0,
+		      const unsigned char *aa1, int n1,
+		      int q, int r,
+		      struct swstr *ss,
+		      struct a_res_str *a_res
+		      );
+
+int bd_walign (const unsigned char *aa0, int n0,
+	       const unsigned char *aa1, int n1,
+	       struct pstruct *ppst, 
+	       struct f_struct *f_str, int hoff,
+	       struct a_res_str *a_res);
+
+struct a_res_str *
+merge_ares_chains(struct a_res_str *cur_ares, 
+		  struct a_res_str *tmpl_ares,
+		  int score_ix, const char *msg);
+
+/* initialize for fasta */
+
+void
+init_work (unsigned char *aa0, int n0, 
+	   struct pstruct *ppst,
+	   struct f_struct **f_arg
+	   )
+{
+  int mhv, phv;
+  int hmax;
+  int i0, hv;
+  int pamfact;
+  int btemp;
+  struct f_struct *f_str;
+  /* these used to be globals, but do not need to be */
+  int ktup;		/* word size examined */
+  int fact;		/* factor used to scale ktup match value */
+  int kt1;		/* ktup-1 */
+  int lkt;		/* last ktup - initiall kt1, but can be increased
+			   for param_u.fa.hsq >= NMAP */
+
+  int maxn0;		/* used in band alignment */
+  int *pwaa_a, *pwaa_s;		/* pam[aa0[]] profile */
+  int i, j, e, f;
+  struct swstr *ss;
+  int **pam2p;
+  int *waa;
+  int nsq, ip, *hsq;
+
+  if (ppst->ext_sq_set) {
+    nsq = ppst->nsqx; ip = 1;
+    hsq = ppst->hsqx;
+  }
+  else {
+    nsq = ppst->nsqx; ip = 0;
+    hsq = ppst->hsq;
+  }
+
+  f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+
+#ifndef TFASTA
+  if((ppst->zsflag%10) == 6) {
+    f_str->kar_p = NULL;
+    init_karlin(aa0, n0, ppst, &f_str->aa0_f[0], &f_str->kar_p);
+  }
+#endif
+
+  if (!ppst->param_u.fa.use_E_thresholds) {	/* old fashioned thresholds */
+    btemp = 2 * ppst->param_u.fa.bestoff / 3 +
+      n0 / ppst->param_u.fa.bestscale +
+      ppst->param_u.fa.bkfact * 
+      (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
+
+    if (ppst->nt_align) {
+      btemp = (btemp*ppst->pam_h)/5;  /* normalize to standard +5/-4 */
+    }
+    else {
+      /* correct problem produced by allowing bktup=3, which increases
+	 btemp by bkfact for proteins 2-May-2011 */
+      if (ppst->param_u.fa.ktup < 3) btemp -= ppst->param_u.fa.bkfact;
+    }
+
+    btemp = min (btemp, ppst->param_u.fa.bestmax);
+    if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
+
+    ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
+
+    if (ppst->param_u.fa.optcut_set != 1) {
+#ifndef TFASTA
+      ppst->param_u.fa.optcut = btemp;
+#else
+      ppst->param_u.fa.optcut = (btemp*3)/2;
+#endif
+    }
+  }
+
+#ifndef OLD_FASTA_GAP
+  ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
+#else
+  ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
+#endif
+  pamfact = ppst->param_u.fa.pamfact;
+  ktup = ppst->param_u.fa.ktup;
+  fact = ppst->param_u.fa.scfact * ktup;
+
+  if (pamfact == -1) pamfact = 0;
+  else if (pamfact == -2) pamfact = 1;
+
+  for (i0 = 1, mhv = -1; i0 < ppst->nsq; i0++)
+    if (hsq[i0] < NMAP && hsq[i0] > mhv) mhv = hsq[i0];
+
+  if (mhv <= 0) {
+    fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
+    exit (1);
+  }
+
+  for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;
+
+  /*      kshft = 2;	*/
+  kt1 = ktup - 1;
+  hv = 1;
+  for (i0 = 0; i0 < ktup; i0++) hv = hv << f_str->kshft;
+  hmax = hv;
+  f_str->hmask = (hmax >> f_str->kshft) - 1;
+
+  if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
+    fprintf (stderr, " *** cannot allocate hash array: hmax: %d hmask: %d\n",
+	     hmax, f_str->hmask);
+    exit (1);
+  }
+
+  if ((f_str->pamh1 = (int *) calloc (nsq+1, sizeof (int))) == NULL) {
+    fprintf (stderr, " *** cannot allocate pamh1 array nsq=%d\n",nsq);
+    exit (1);
+  }
+
+  if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
+    fprintf (stderr, " *** cannot allocate pamh2 array hmax=%d\n",hmax);
+    exit (1);
+  }
+
+  if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
+    fprintf (stderr, " *** cannot allocate hash link array n0=%d",n0);
+    exit (1);
+  }
+
+  for (i0 = 0; i0 < hmax; i0++) f_str->harr[i0] = -1;
+  for (i0 = 0; i0 < n0; i0++) f_str->link[i0] = -1;
+
+  /* encode the aa0 array */
+  phv = hv = 0;
+  lkt = kt1;
+  /* restart hv, phv calculation */
+  for (i0 = 0; i0 < min(lkt,n0); i0++) {
+    if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt = i0+ ktup; continue;}
+    hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+    phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;
+  }
+
+  for (; i0 < n0; i0++) {
+    if (hsq[aa0[i0]] >= NMAP) {
+      hv=phv=0;
+      /* restart hv, phv calculation */
+      for (lkt = i0+kt1; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
+	if (hsq[aa0[i0]] >= NMAP) {
+	  hv=phv=0; 
+	  lkt = i0+ktup;
+	  continue;
+	}
+	hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+	phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;
+      }
+    }
+    if (i0 >= n0) break;
+    hv = ((hv & f_str->hmask) << f_str->kshft) + hsq[aa0[i0]];
+    f_str->link[i0] = f_str->harr[hv];
+    f_str->harr[hv] = i0;
+    if (pamfact) {
+      f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
+      /* this check should always be true, but just in case */
+      if (hsq[aa0[i0-kt1]]<NMAP)
+	phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
+    }
+    else f_str->pamh2[hv] = fact * ktup;
+  }
+
+  /* this has been modified from 0..<ppst->nsq to 1..< ppst->nsq because the
+     pam2[0][0] is now undefined for consistency with blast
+  */
+
+  if (pamfact) {
+    for (i0 = 1; i0 < nsq; i0++) {
+      f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
+    }
+  }
+  else {
+    for (i0 = 1; i0 < nsq; i0++) {
+      f_str->pamh1[i0] = fact;
+    }
+  }
+  f_str->ndo = 0;
+  if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
+						sizeof (struct dstruct)))==NULL) {
+    fprintf (stderr," *** cannot allocate diagonal arrays: %lu\n",
+	     MAXDIAG *sizeof (struct dstruct));
+    exit (1);
+  };
+
+
+#ifdef TFASTA
+  if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
+					    sizeof(unsigned char)))
+      == NULL) {
+    fprintf (stderr, " *** cannot allocate aa1x array %d\n", ppst->maxlen+2);
+    exit (1);
+  }
+  f_str->aa1x++;
+#endif
+
+  f_str->bss_size = ppst->param_u.fa.optwid*2+4;
+  f_str->bss = (struct bdstr *) calloc((size_t)ppst->param_u.fa.optwid*2+4,
+				       sizeof(struct bdstr));
+  f_str->bss++;
+
+  /* allocate space for the scoring arrays */
+  maxn0 = n0 + 4;
+  if ((ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr)))
+      == NULL) {
+    fprintf (stderr, " *** cannot allocate ss array %3d\n", n0);
+    exit (1);
+  }
+  ss++;
+
+  ss[n0].H = -1;	/* this is used as a sentinel - normally H >= 0 */
+  ss[n0].E = 1;
+  f_str->ss = ss;
+
+  /* initialize variable (-S) pam matrix */
+  if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+    fprintf(stderr,"*** error [%s:%d] cannot allocate waa_s array %3d\n",
+	    __FILE__, __LINE__, nsq*n0);
+    exit(1);
+  }
+
+  /* initialize pam2p[1] pointers */
+  if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
+    fprintf(stderr,"*** error [%s:%d] cannot allocate pam2p[1] array %3d\n",
+	    __FILE__, __LINE__, n0);
+    exit(1);
+  }
+
+  pam2p = f_str->pam2p[1];
+  if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+    fprintf(stderr,"*** error [%s:%d] cannot allocate pam2p[1][] array %3d\n",
+	    __FILE__, __LINE__, nsq*n0);
+    exit(1);
+  }
+
+  for (i=1; i<n0; i++) {
+    pam2p[i]= pam2p[0] + (i*(nsq+1));
+  }
+
+  /* initialize universal (alignment) matrix */
+  if ((f_str->waa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+    fprintf(stderr,"*** error [%s:%d] cannot allocate waa_a struct %3d\n",
+	    __FILE__, __LINE__, nsq*n0);
+    exit(1);
+  }
+   
+  /* initialize pam2p[0] pointers */
+  if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
+    fprintf(stderr,"*** error [%s:%d] cannot allocate pam2p[1] array %3d\n",
+	    __FILE__, __LINE__, n0);
+    exit(1);
+  }
+
+  pam2p = f_str->pam2p[0];
+  if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+    fprintf(stderr,"*** error [%s:%d] cannot allocate pam2p[1][] array %3d\n",
+	    __FILE__, __LINE__, nsq*n0);
+    exit(1);
+  }
+
+  for (i=1; i<n0; i++) {
+    pam2p[i]= pam2p[0] + (i*(nsq+1));
+  }
+
+  /* 
+     pwaa effectively has a sequence profile --
+     pwaa[0..n0-1] has pam score for residue 0 (-BIGNUM)
+     pwaa[n0..2n0-1] has pam scores for residue 1 (A)
+     pwaa[2n0..3n-1] has pam scores for residue 2 (R), ...
+
+     thus: pwaa = f_str->waa_s + (*aa1p++)*n0; sets up pwaa so that
+     *pwaa++ rapidly moves though the scores of the aa1p[] position
+     without further indexing
+
+     For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have
+     a different score in each position.
+  */
+
+  if (ppst->pam_pssm) {
+    pwaa_s = f_str->waa_s;
+    pwaa_a = f_str->waa_a;
+    for (e = 0; e < nsq; e++)	{	/* for each residue in the alphabet */
+      for (f = 0; f < n0; f++) {	/* for each position in aa0 */
+	*pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e];
+	*pwaa_a++ = f_str->pam2p[0][f][e]  = ppst->pam2p[0][f][e];
+      }
+    }
+  }
+  else {	/* initialize scanning matrix */
+    pwaa_s = f_str->waa_s;
+    pwaa_a = f_str->waa_a;
+    for (e = 0; e <nsq; e++)	/* for each residue in the alphabet */
+      for (f = 0; f < n0; f++)	{	/* for each position in aa0 */
+	*pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][aa0[f]][e];
+	*pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][aa0[f]][e];
+      }
+  }
+
+  f_str->max_res = max(3*n0/2,MIN_RES);
+
+  *f_arg = f_str;
+}
+
+/* pstring1 is a message to the manager, currently 512 */
+/* pstring2 is the same information, but in a markx==10 format */
+void
+get_param (const struct pstruct *ppstr, 
+	   char **pstring1, char *pstring2, struct score_count_s *s_info)
+{
+  char options_str1[128];
+  char options_str2[128];
+#ifndef TFASTA
+  char *pg_str="FASTA";
+#else
+  char *pg_str="TFASTA";
+#endif
+
+  if (!ppstr->param_u.fa.use_E_thresholds) {
+    sprintf(options_str1,"join: %d (%0.3g), opt: %d (%0.3g)",
+	    ppstr->param_u.fa.cgap, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppstr->param_u.fa.optcut, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+    sprintf(options_str2,"pg_join: %d (%.3g)\n; pg_optcut: %d (%.3g)",
+	    ppstr->param_u.fa.cgap, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppstr->param_u.fa.optcut, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+  }
+  else {
+    sprintf(options_str1,"E-join: %.2g (%.3g), E-opt: %.2g (%.3g)",
+	    ppstr->param_u.fa.E_join, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppstr->param_u.fa.E_band_opt, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+    sprintf(options_str2,"pg_join_E(): %.2g (%.3g)\n; pg_optcut_E(): %.2g (%.3g)",
+	    ppstr->param_u.fa.E_join, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
+	    ppstr->param_u.fa.E_band_opt, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
+  }
+
+  if (!ppstr->param_u.fa.optflag) {
+     sprintf (pstring1[0], "%s (%s)", pg_str, verstr);
+     if (ppstr->param_u.fa.iniflag) strcat(pstring1[0]," init1");
+  }
+  else {
+     sprintf (pstring1[0], "%s (%s) [optimized]", pg_str, verstr);
+  }
+
+  sprintf (pstring1[1], 
+#ifdef OLD_FASTA_GAP
+	   "%s matrix (%d:%d)%s, gap-pen: %d/%d\n ktup: %d, %s, width: %3d",
+#else
+	   "%s matrix (%d:%d)%s, open/ext: %d/%d\n ktup: %d, %s, width: %3d",
+#endif
+	   ppstr->pam_name, ppstr->pam_h,ppstr->pam_l,
+	   (ppstr->ext_sq_set) ? "xS":"\0",
+	   ppstr->gdelval, ppstr->ggapval,
+	   ppstr->param_u.fa.ktup, options_str1,
+	   ppstr->param_u.fa.optwid);
+
+
+   if (pstring2 != NULL) {
+     sprintf (pstring2, 
+#ifdef OLD_FASTA_GAP
+	      "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)\n\
+; pg_gap-pen: %d %d\n; pg_ktup: %d\n; %s\n",
+#else
+     "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)\n\
+; pg_open-ext: %d %d\n; pg_ktup: %d\n; %s\n",
+#endif
+	      pg_str,verstr,ppstr->pam_name, ppstr->pam_h,ppstr->pam_l, ppstr->gdelval,
+              ppstr->ggapval,ppstr->param_u.fa.ktup,options_str2);
+   }
+}
+
+void
+close_work (const unsigned char *aa0, int n0,
+	    struct pstruct *ppst,
+	    struct f_struct **f_arg)
+{
+  struct f_struct *f_str;
+
+  f_str = *f_arg;
+
+  if (f_str != NULL) {
+    if (f_str->kar_p!=NULL) free(f_str->kar_p);
+    f_str->ss--;
+    free(f_str->ss);
+
+    f_str->bss--;
+    free(f_str->bss);
+    f_str->bss_size = 0;
+
+    /*    free(f_str->res);  */
+    free(f_str->waa_a);
+    free(f_str->waa_s);
+
+    free(f_str->pam2p[1][0]);
+    free(f_str->pam2p[1]);
+
+    free(f_str->pam2p[0][0]);
+    free(f_str->pam2p[0]);
+
+
+    free(f_str->diag);
+    free(f_str->link);
+    free(f_str->pamh2); 
+    free(f_str->pamh1);
+    free(f_str->harr);
+
+    free(f_str);
+    *f_arg = NULL;
+  }
+  else {
+    fprintf(stderr, "*** error [%s:%d] close_work() with NULL f_str ***\n",
+	    __FILE__, __LINE__);
+  }
+}
+
+int savemax (struct dstruct *, int, 
+	     struct savestr *vmax, 
+	     struct savestr **lowmax);
+
+
+int spam (const unsigned char *, const unsigned char *, struct savestr *,
+	 int **, int, int, int);
+int sconn(struct savestr **, int nsave, int cgap, int pgap, int noff);
+void kpsort(struct savestr **, int);
+
+extern int
+NW_ALIGN(const unsigned char *, const unsigned char *, int, int, 
+      int **pam2p, int, int q, int r, int *res, int *nc);
+
+static int
+LOCAL_ALIGN(const unsigned char *, const unsigned char *,
+	    int, int, int, int,
+	    int **, int, int, int *, int *, int *, int *, int,
+	    struct f_struct *);
+
+static int
+B_ALIGN(const unsigned char *A, const unsigned char *B, int M,
+	int N, int low, int up, int **W, int G, int H, int *S,
+	int *nS, int MW, int MX, struct bdstr *bss, struct mtp_str *mtp);
+
+static void
+do_fasta (const unsigned char *aa0, int n0,
+	  const unsigned char *aa1, int n1,
+	  const struct pstruct *ppst, struct f_struct *f_str,
+	  struct rstruct *rst, int *hoff, int shuff_flg,
+	  struct score_count_s *s_info) {
+   int     nd;		/* diagonal array size */
+   int     lhval;
+   int     kfact;
+   struct savestr vmax[MAXSAV];	/* best matches saved for one sequence */
+   struct savestr *vptr[MAXSAV];
+   struct savestr *lowmax;
+   int lowscor;
+   register struct dstruct *dptr;
+   register int tscor;
+
+   register struct dstruct *diagp;
+   int noff;
+   struct dstruct *dpmax, *dpmin;
+   register int lpos;
+   int     tpos;
+   struct savestr *vmptr;
+   int     scor, ib, nsave;
+   int xdrop, do_extend;
+   int ktup, kt1, lkt, ip, ktup_sq;
+   const int *hsq;
+   int opt_cut, c_gap;
+
+
+  if (ppst->ext_sq_set) {
+    ip = 1;
+    hsq = ppst->hsqx;
+  }
+  else {
+    ip = 0;
+    hsq = ppst->hsq;
+  }
+
+  xdrop = -ppst->pam_l;
+  /* do extended alignment in spam iff protein or short sequences */
+  do_extend = !ppst->nt_align || (n0 < 50) || (n1 < 50);
+
+  ktup = ppst->param_u.fa.ktup;
+  kt1 = ktup-1;
+  if (ktup <= 3) {
+    ktup_sq = ktup*ktup;
+  }
+  else {
+    ktup_sq = ktup;
+  }
+  if (ktup == 1) ktup_sq *= 2;
+
+  if (n1 < ktup) {
+     rst->score[0] = rst->score[1] = rst->score[2] = 0;
+     return;
+   }
+
+   if (n0+n1+1 >= MAXDIAG) {
+     fprintf(stderr,"*** error [%s:%d] n0,n1 too large: %d + %d (%d) > %d \n",
+	     __FILE__, __LINE__, n0,n1,n0+n1+1,MAXDIAG);
+     rst->score[0] = rst->score[1] = rst->score[2] = -1;
+     return;
+   }
+
+   /* dynamically set optcut and cgap */
+   if (ppst->param_u.fa.use_E_thresholds) {
+     c_gap = ELK_to_s(ppst->param_u.fa.E_join*ktup_sq, n0, n1, ppst->pLambda, ppst->pK, ppst->pH);
+     opt_cut = ELK_to_s(ppst->param_u.fa.E_band_opt*ktup_sq, n0, n1, ppst->pLambda, ppst->pK, ppst->pH);
+     rst->valid_stat = 0;
+   }
+   else {
+     c_gap = ppst->param_u.fa.cgap;
+     opt_cut = ppst->param_u.fa.optcut;
+     rst->valid_stat = 1;
+   }
+
+   s_info->tot_scores++;
+
+   nd = n0 + n1;
+
+   /* here we are cleaning up after the previous run, when the
+      structure is initialized (init_work), ndo is set to 0, so the
+      initialization keeps going out to nd only when nd increases
+
+      dpmax now remembers the largest diagp with hits, and only cleansa
+      up to there
+   */
+
+   dpmin = dpmax = &f_str->diag[nd];
+   for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;){
+      dptr->stop = -1;
+      dptr->dmax = NULL;
+      dptr++->score = 0;
+   }
+   dpmax = f_str->diag;
+
+   for (vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++) {
+     vmptr->score = 0;
+   }
+
+   lowmax = vmax;
+   lowscor = 0;
+
+   /* start hashing */
+   lhval = 0;
+   lkt = kt1;
+   for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos <n1; lpos++) {
+     /* restart lhval calculation */
+     if (hsq[aa1[lpos]]>=NMAP) {
+       lhval = 0; lkt = lpos + ktup;
+       continue;
+     }
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+   }
+
+   noff = n0 - 1;
+   diagp = &f_str->diag[noff + lkt];
+   for (; lpos < n1; lpos++, diagp++) {
+     /* skip over low complexity */
+     if (hsq[aa1[lpos]]>=NMAP) {
+       lpos++ ; diagp++;
+       while (hsq[aa1[lpos]]>=NMAP && lpos < n1 ) {lpos++; diagp++;}
+       if (lpos >= n1) break;
+       lhval = 0;
+     }
+
+     /* lhval is the hash value of the library sequence */
+     lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+     /* tpos gives the locations where the library word matches */
+
+     /* get the diagonal of the initial hit */
+     tpos = f_str->harr[lhval];
+     if (tpos >= 0) {       /* we have a hit */
+       /* tpos = link[tpos] means that tpos always gets smaller, do
+	  diagp-tpos gets bigger */
+       if (diagp-tpos < dpmin) dpmin = diagp - tpos;
+       for (; tpos >= 0; tpos = f_str->link[tpos]) {
+	 /* here tscor is actually the end of the run */
+	 if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
+	   /* increased by ktup word length */
+	   tscor += ktup;
+	   /* now tscor becomes the penalty for the unmatched
+	      (non-identical) residues in the diagonal between the
+	      previous match and the current match (lpos) */
+	   if ((tscor -= lpos) <= 0) {
+	     scor = dptr->score;
+	     /* the score is getting worse; if it is better than lowscor, save it */
+	     if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && lowscor < scor) {
+	       lowscor = savemax(dptr, dptr - f_str->diag, vmax, &lowmax);
+	     }
+	     /* tscor is now a candidate score of the current run;
+		kfact is the score for starting over */
+	     if ((tscor += scor) >= kfact) {
+	       /* continuing better */
+	       dptr->score = tscor;
+	       dptr->stop = lpos;
+	     }
+	     else {
+	       /* starting over better */
+	       dptr->score = kfact;
+	       dptr->start = (dptr->stop = lpos) - kt1;
+	     }
+	   }
+	   else {	 /* continuing a match */
+	     dptr->score += f_str->pamh1[aa0[tpos]];
+	     dptr->stop = lpos;
+	   }
+	 }
+	 else {       /* no run in this diagonal yet */
+	   dptr->score = f_str->pamh2[lhval];
+	   dptr->start = (dptr->stop = lpos) - kt1;
+	 }
+	 /* dptr is biggest at the end of tpos = link[tpos] */
+	 if (dptr > dpmax) dpmax = dptr;
+       }
+     }				/* end tpos */
+   }				/* end lpos */
+
+   for (dptr = dpmin; dptr <= dpmax;) {
+     if (dptr->score > lowscor) {
+      lowscor = savemax (dptr, dptr-f_str->diag, vmax, &lowmax);
+     }
+     dptr->stop = -1;
+     dptr->dmax = NULL;
+     dptr++->score = 0;
+   }
+   f_str->ndo = nd;
+
+/*
+        at this point all of the elements of aa1[lpos]
+        have been searched for elements of aa0[tpos]
+        with the results in diag[dpos]
+*/
+   for (nsave = 0, vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++)    {
+     /*
+     fprintf(stderr,"0: %4d-%4d  1: %4d-%4d  dp: %d score: %d\n",
+	     noff+vmptr->start-vmptr->dp,
+	     noff+vmptr->stop-vmptr->dp,
+	     vmptr->start,vmptr->stop,
+	     vmptr->dp,vmptr->score);
+
+     */
+      if (vmptr->score > 0) {
+	vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[ip], xdrop,
+			     noff,do_extend);
+	if (vmptr->score > 0 ) vptr[nsave++] = vmptr;
+      }
+   }
+
+   if (nsave <= 0) {
+     rst->score[0] = rst->score[1] = rst->score[2] = 0;
+     return;
+   }
+       
+   /*
+   fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,noff);
+   for (ib=0; ib<nsave; ib++) {
+     fprintf(stderr,"0: %4d-%4d  1: %4d-%4d  dp: %d score: %d\n",
+	     noff+vptr[ib]->start-vptr[ib]->dp,
+	     noff+vptr[ib]->stop-vptr[ib]->dp,
+	     vptr[ib]->start,vptr[ib]->stop,
+	     vptr[ib]->dp,vptr[ib]->score);
+   }
+   fprintf(stderr,"---\n");
+   */
+
+   /* find the best init1 score */
+   for (vmptr=vptr[0],ib=1; ib<nsave; ib++) {
+     if (vptr[ib]->score > vmptr->score) vmptr=vptr[ib];
+   }
+
+   /* sconn does not modify vmptr->score, so only do it if it will help */
+   if (vmptr->score >= c_gap ) {
+     s_info->s_cnt[0]++;
+     scor = sconn (vptr, nsave, c_gap, ppst->param_u.fa.pgap, noff);
+     rst->alg_info |= 1;
+   }
+   else { scor = vmptr->score;}
+
+/*  kssort (vptr, nsave); */
+
+   rst->score[1] = vmptr->score;
+   rst->score[0] = max (scor, vmptr->score);
+   rst->score[2] = rst->score[0];		/* initn */
+
+   *hoff= noff - vmptr->dp;	/* always need *hoff */
+   if (ppst->param_u.fa.optflag) {
+     if ( /* shuff_flg || */ rst->score[0] > opt_cut ) {
+       rst->score[2] = dmatch (aa0, n0, aa1, n1, *hoff,
+			     ppst->param_u.fa.optwid, ppst->pam2[ip],
+			     ppst->gdelval,ppst->ggapval,f_str);
+
+       s_info->s_cnt[2]++;
+       rst->alg_info |= 2;
+       rst->valid_stat = 1;
+     }
+     /*
+     else if (rst->score[0] > c_gap) {
+       rst->score[2] = dmatch (aa0, n0, aa1, n1, *hoff, 4, ppst->pam2[ip],
+			     ppst->gdelval,ppst->ggapval,f_str);
+     }
+     */
+   }
+   else {	/* we never do dmatch, so initn is valid */
+     rst->valid_stat = 1;
+   }
+}
+
+void do_work (const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      int frame,
+	      const struct pstruct *ppst, struct f_struct *f_str,
+	      int qr_flg, int shuff_flg, struct rstruct *rst, 
+	      struct score_count_s *s_info)
+{
+  int hoff, n10;
+
+  double lambda, H;
+  
+  rst->score[0] = rst->score[1] = rst->score[2] = 0;
+  rst->escore = 1.0;
+  rst->segnum = rst->seglen = 1;
+  rst->valid_stat = 0;
+  rst->alg_info = 0;
+
+  if (n1 < ppst->param_u.fa.ktup) return;
+
+#ifdef TFASTA  
+  n10=aatran(aa1,f_str->aa1x,n1,frame);
+  do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, shuff_flg, s_info);
+#else	/* FASTA */
+  do_fasta (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, shuff_flg, s_info);
+#endif
+
+#ifndef TFASTA
+  if((ppst->zsflag%10) == 6 && 
+     (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f, 
+		f_str->kar_p, &lambda, &H)>0)) {
+    rst->comp = 1.0/lambda;
+    rst->H = H;
+  }
+  else {rst->comp = rst->H = -1.0;}
+#else
+  rst->comp = rst->H = -1.0;
+#endif
+}
+
+void do_opt (const unsigned char *aa0, int n0,
+	     const unsigned char *aa1, int n1,
+	     int frame,
+	     struct pstruct *ppst,
+	     struct f_struct *f_str,
+	     struct rstruct *rst)
+{
+  int optflag, tscore, hoff, n10;
+  struct score_count_s s_info;
+
+  optflag = ppst->param_u.fa.optflag;
+  ppst->param_u.fa.optflag = 1;
+
+#ifdef TFASTA  
+  n10=aatran(aa1,f_str->aa1x,n1,frame);
+  do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, 0, &s_info);
+#else	/* FASTA */
+  do_fasta(aa0,n0,aa1,n1,ppst,f_str,rst, &hoff, 0, &s_info);
+#endif
+  ppst->param_u.fa.optflag = optflag;
+}
+
+int
+savemax (struct dstruct *dptr, int dpos,
+	 struct savestr *vmax, struct savestr **lowmax)
+{
+   struct savestr *vmptr;
+   int i;
+
+/* check to see if this is the continuation of a run that is already saved */
+
+   if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
+	 vmptr->start == dptr->start) {
+      vmptr->stop = dptr->stop;
+      if ((i = dptr->score) <= vmptr->score) return (*lowmax)->score;
+      vmptr->score = i;
+      if (vmptr != (*lowmax)) return (*lowmax)->score;
+   }
+   else {
+     i = (*lowmax)->score = dptr->score;
+     (*lowmax)->dp = dpos;
+     (*lowmax)->start = dptr->start;
+     (*lowmax)->stop = dptr->stop;
+     dptr->dmax = (*lowmax);
+   }
+
+   for (vmptr = vmax; vmptr < vmax+MAXSAV; vmptr++) {
+     if (vmptr->score < i) {
+       i = vmptr->score;
+       *lowmax = vmptr;
+     }
+   }
+   return i;
+}
+
+int spam (const unsigned char *aa0, const unsigned char *aa1,
+	  struct savestr *dmax, int **pam2, int xdrop,
+	  int noff, int do_extend)
+{
+   register int lpos, tot;
+   register const unsigned char *aa0p, *aa1p;
+
+   int drop_thresh;
+
+   struct {
+     int     start, stop, score;
+   } curv, maxv;
+
+   aa1p = &aa1[lpos= dmax->start];	/* get the start of lib seq */
+   aa0p = &aa0[lpos - dmax->dp + noff];	/* start of query */
+#ifdef DEBUG
+   /* also add check in calling routine */
+   if (aa0p < aa0) { return -99; }
+#endif
+   curv.start = lpos;			/* start index in lib seq */
+
+   tot = curv.score = maxv.score = 0;
+
+   for (; lpos <= dmax->stop; lpos++) {
+     tot += pam2[*aa0p++][*aa1p++];
+     if (tot > curv.score) {		/* update current score */
+       curv.stop = lpos;
+       curv.score = tot;
+      }
+      else if (tot < 0) {
+	if (curv.score > maxv.score) {	/* save score, start, stop */
+	  maxv.start = curv.start;
+	  maxv.stop = curv.stop;
+	  maxv.score = curv.score;
+	}
+	tot = curv.score = 0;		/* reset running score */
+	curv.start = lpos+1;		/* reset start */
+	if(lpos >= dmax->stop) break;	/* if the zero is beyond stop, quit */
+      }
+   }
+
+   if (curv.score > maxv.score) {
+     maxv.start = curv.start;
+     maxv.stop = curv.stop;
+     maxv.score = curv.score;
+   }
+
+#ifndef NOSPAM_EXT
+
+   /* now check to see if the score gets better by extending */
+   if (do_extend && maxv.score > xdrop) {
+
+     if (maxv.stop == dmax->stop) {
+       tot = maxv.score;
+       drop_thresh = maxv.score - xdrop;
+       aa1p = &aa1[lpos= dmax->stop];
+       aa0p = &aa0[lpos - dmax->dp + noff];
+       while (tot > drop_thresh ) {
+	 ++lpos;
+	 tot += pam2[*(++aa0p)][*(++aa1p)];
+	 if (tot > maxv.score) {
+	   maxv.start = lpos;
+	   maxv.score = tot;
+	   drop_thresh = tot - xdrop;
+	 }
+       }
+     }
+
+     /* scan backwards now */
+
+     if (maxv.start == dmax->start) {
+       tot = maxv.score;
+       drop_thresh = maxv.score - xdrop;
+       aa1p = &aa1[lpos= dmax->start];
+       aa0p = &aa0[lpos - dmax->dp + noff];
+       while (tot > drop_thresh) {
+	 --lpos;
+	 tot += pam2[*(--aa0p)][*(--aa1p)];
+	 if (tot > maxv.score) {
+	   maxv.start = lpos;
+	   maxv.score = tot;
+	   drop_thresh = tot - xdrop;
+	 }
+       }
+     }
+   }
+#endif
+
+/*	if (maxv.start != dmax->start || maxv.stop != dmax->stop)
+		printf(" new region: %3d %3d %3d %3d\n",maxv.start,
+			dmax->start,maxv.stop,dmax->stop);
+*/
+   dmax->start = maxv.start;
+   dmax->stop = maxv.stop;
+
+   return maxv.score;
+}
+
+int sconn (struct savestr **v, int n, int cgap, int pgap, int noff)
+{
+   int     i, si;
+   struct slink
+   {
+      int     score;
+      struct savestr *vp;
+      struct slink *next;
+   }      *start, *sl, *sj, *so, sarr[MAXSAV];
+   int     lstart, tstart, plstop, ptstop;
+
+/*	sort the score left to right in lib pos */
+
+   kpsort (v, n);
+
+   start = NULL;
+
+/*	for the remaining runs, see if they fit */
+
+   for (i = 0, si = 0; i < n; i++) {
+
+/*	if the score is less than the gap penalty, it never helps */
+     if (v[i]->score < cgap) continue;
+     lstart = v[i]->start;
+     tstart = lstart - v[i]->dp + noff;
+
+/*	put the run in the group */
+     sarr[si].vp = v[i];
+     sarr[si].score = v[i]->score;
+     sarr[si].next = NULL;
+
+/* 	if it fits, then increase the score */
+      for (sl = start; sl != NULL; sl = sl->next)
+      {
+	 plstop = sl->vp->stop;
+	 ptstop = plstop - sl->vp->dp + noff;
+	 if (plstop < lstart && ptstop < tstart)
+	 {
+	    sarr[si].score = sl->score + v[i]->score + pgap;
+	    break;
+	 }
+      }
+
+/*	now recalculate where the score fits */
+      if (start == NULL)
+	 start = &sarr[si];
+      else
+	 for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
+	    if (sarr[si].score > sj->score) {
+	       sarr[si].next = sj;
+	       if (so != NULL) so->next = &sarr[si];
+	       else  start = &sarr[si];
+	       break;
+	    }
+	    so = sj;
+	 }
+      si++;
+   }
+
+   if (start != NULL)
+      return (start->score);
+   else
+      return (0);
+}
+
+void
+kssort (v, n)
+struct savestr *v[];
+int     n;
+{
+   int     gap, i, j;
+   struct savestr *tmp;
+
+   for (gap = n / 2; gap > 0; gap /= 2)
+      for (i = gap; i < n; i++)
+	 for (j = i - gap; j >= 0; j -= gap)
+	 {
+	    if (v[j]->score >= v[j + gap]->score)
+	       break;
+	    tmp = v[j];
+	    v[j] = v[j + gap];
+	    v[j + gap] = tmp;
+	 }
+}
+
+void
+kpsort (struct savestr **v, int n) {
+  int gap, i, j, k;
+  int incs[4] = { 21, 7, 3, 1 };
+  struct savestr *tmp;
+  int v_start;
+
+  for ( k = 0; k < 4; k++) {
+    gap = incs[k];
+    for (i = gap; i < n; i++) {
+      tmp = v[i];
+      j = i;
+      v_start = v[i]->start;
+      while (j >= gap && v[j - gap]->start > v_start) {
+	v[j] = v[j - gap];
+	j -= gap;
+      }
+      v[j] = tmp;
+    }
+  }
+}
+
+int dmatch (const unsigned char *aa0, int n0,
+		   const unsigned char *aa1, int n1,
+		   int hoff, int window, 
+		   int **pam2, int gdelval, int ggapval,
+		   struct f_struct *f_str)
+{
+   int low, up;
+
+   window = min (n1, window);
+
+#ifdef DEBUG
+   if (window > f_str->bss_size) {
+     fprintf(stderr,"*** error [%s:%d] dropnfa.c:dmatch window [%d] out of range [%d]\n",
+	     __FILE__, __LINE__, window, f_str->bss_size);
+     window = f_str->bss_size - 4;
+   }
+#endif
+
+   /* hoff is the offset found from aa1 to seq 2 by dmatch */
+
+   low = -window/2-hoff;
+   up = low+window;
+
+   return FLOCAL_ALIGN(aa0-1,aa1-1,n0,n1, low, up,
+		      pam2,
+#ifdef OLD_FASTA_GAP
+		       -(gdelval-ggapval),
+#else
+		       -gdelval,
+#endif
+		       -ggapval,window,f_str);
+ }
+
+
+/* A PACKAGE FOR LOCALLY ALIGNING TWO SEQUENCES WITHIN A BAND:
+
+   To invoke, call LOCAL_ALIGN(A,B,M,N,L,U,W,G,H,MW).
+   The parameters are explained as follows:
+	A, B : two sequences to be aligned
+	M : the length of sequence A
+	N : the length of sequence B
+	L : lower bound of the band
+	U : upper bound of the band
+	W : scoring table for matches and mismatches
+	G : gap-opening penalty
+	H : gap-extension penalty
+	MW  : maximum window size
+*/
+
+#include <stdio.h>
+
+#define MININT -9999999
+
+int
+FLOCAL_ALIGN(const unsigned char *A, const unsigned char *B,
+	     int M, int N, int low, int up,
+	     int **W, int G,int H, int MW,
+	     struct f_struct *f_str)
+{
+  int band;
+  register struct bdstr *bssp;
+  int i, j, si, ei;
+  int c, d, e, m;
+  int leftd, rightd;
+  int best_score;
+  int *wa, curd;
+  int ib;
+  
+  bssp = f_str->bss;
+
+  m = G+H;
+  low = max(-M, low);
+  up = min(N, up);
+  
+  if (N <= 0) return 0;
+
+  if (M <= 0) return 0;
+
+  band = up-low+1;
+  if (band < 1) {
+    fprintf(stderr,"***  error [%s:%d] low > up is unacceptable!: M: %d N: %d l/u: %d/%d\n",
+	    __FILE__, __LINE__, M, N, low, up);
+    return 0;
+  }
+
+  if (low > 0) leftd = 1;
+  else if (up < 0) leftd = band;
+  else leftd = 1-low;
+  rightd = band;
+  si = max(0,-up);	/* start index -1 */
+  ei = min(M,N-low);	/* end index */
+  bssp[leftd].CC = 0;
+  for (j = leftd+1; j <= rightd; j++) {
+    bssp[j].CC = 0;
+    bssp[j].DD = -G;
+  }
+
+  bssp[rightd+1].CC = MININT;
+  bssp[rightd+1].DD = MININT;
+
+  best_score = 0;
+  bssp[leftd-1].CC = MININT;
+  bssp[leftd].DD = -G;
+
+  for (i = si+1; i <= ei; i++) {
+    if (i > N-up) rightd--;
+    if (leftd > 1) leftd--;
+    wa = W[A[i]];
+    if ((c = bssp[leftd+1].CC-m) > (d = bssp[leftd+1].DD-H)) d = c;
+    if ((ib = leftd+low-1+i ) > 0) c = bssp[leftd].CC+wa[B[ib]];
+
+    if (d > c) c = d;
+    if (c < 0) c = 0;
+    e = c-G;
+    bssp[leftd].DD = d;
+    bssp[leftd].CC = c;
+    if (c > best_score) best_score = c;
+
+    for (curd=leftd+1; curd <= rightd; curd++) {
+      if ((c = c-m) > (e = e-H)) e = c;
+      if ((c = bssp[curd+1].CC-m) > (d = bssp[curd+1].DD-H)) d = c;
+      c = bssp[curd].CC + wa[B[curd+low-1+i]];
+      if (e > c) c = e;
+      if (d > c) c = d;
+      if (c < 0) c = 0;
+      bssp[curd].CC = c;
+      bssp[curd].DD = d;
+      if (c > best_score) best_score = c;
+    }
+  }
+
+  return best_score;
+}
+
+/* ckalloc - allocate space; check for success */
+char *ckalloc(size_t amount)
+{
+  char *p;
+
+  if ((p = malloc( (unsigned) amount)) == NULL)
+    w_abort("Ran out of memory.","");
+  return(p);
+}
+
+/* calculate the 100% identical score */
+int
+shscore(const unsigned char *aa0, int n0, int **pam2)
+{
+  int i, sum;
+  for (i=0,sum=0; i<n0; i++)
+    sum += pam2[aa0[i]][aa0[i]];
+  return sum;
+}
+
+static int
+BCHECK_SCORE(const unsigned char *A, const unsigned char *B,
+		       int M, int N, int *S, int **w, int g, int h,
+		       int *nres)
+{ 
+  register int   i,  j, op, nc;
+  int *Ssave;
+  int score;
+
+  score = i = j = op = nc = 0;
+  Ssave = S;
+  while (i < M || j < N) {
+	op = *S++;
+	if (op == 0) {
+	  score = w[A[++i]][B[++j]] + score;
+	  nc++;
+/*  	  fprintf(stderr,"op0 %4d %4d %4d %4d\n",i,j,w[A[i]][B[i]],score); */
+	}
+	else if (op > 0) {
+	  score = score - (g+op*h);
+/*  	  fprintf(stderr,"op> %4d %4d %4d %4d %4d\n",i,j,op,-(g+op*h),score); */
+	  j += op;
+	  nc += op;
+	} else {
+	  score = score - (g-op*h);
+/*  	  fprintf(stderr,"op< %4d %4d %4d %4d %4d\n",i,j,op,-(g-op*h),score); */
+	  i -= op;
+	  nc -= op;
+	}
+  }
+  *nres = nc;
+  return score;
+}
+
+/* bd_malign is a recursive interface to bd_walign() that is called
+   from do_walign(). bd_malign() first does an alignment, then checks
+   to see if the score is greater than the threshold. If so, it tries
+   doing a left and right alignment.
+ */
+struct a_res_str *
+bd_malign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int score_thresh, int max_res,
+	   struct pstruct *ppst,
+	   struct f_struct *f_str,
+	   struct a_res_str *cur_ares, int first_align)
+{
+  struct a_res_str *tmpl_ares, *tmpr_ares, *this_ares, *last_ares;
+  int hoff, l_min, l_max, window;
+  unsigned char *local_aa1;
+  int local_n1;
+  /*
+    unsigned char aa1_min_s, aa1_max_s;
+  */
+  struct rstruct rst;
+  int score_ix;
+  char save_res;
+  int max_sub_score = -1;
+  int min_alen;
+  int have_local_aa1;
+  struct score_count_s s_info = {0, 0, 0};
+
+  min_alen = min(MIN_LOCAL_LEN,n0);
+
+  score_ix = ppst->score_ix;
+
+  /* now we need alignment storage - get it */
+  if ((cur_ares->res = (int *)calloc((size_t)max_res,sizeof(int)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] cannot allocate alignment results array %d\n",
+	    __FILE__, __LINE__, max_res);
+    exit(1);
+  }
+
+  cur_ares->next = NULL;
+  cur_ares->n1 = n1;
+    
+  /* lots of changes to optcut, optwid, and optflag were made in do_walign */
+
+  do_fasta(aa0, n0, aa1, n1, ppst, f_str, &cur_ares->rst, &hoff,0, &s_info);
+
+  if (first_align || cur_ares->rst.score[score_ix]>score_thresh) {
+    if (ppst->sw_flag) {
+      /* hoff gives us a projection of the query on the library
+	 sequence, which can be used to limit the portions of the
+	 library sequence that will be aligned by Smith-Waterman
+
+	 this ensures that the optimal alignment (with pam2p[0], not
+	 pam2p[1]), aligns the correct region when the only difference between
+	 the regions is the lseg encoding
+      */
+
+      window = min(n1, ppst->param_u.fa.optwid);
+      /*  this windowing seems inappropriate when Smith-Waterman is used (sw_flag),
+	  but it is done to ensure that seg'ed regions are ignored 
+      */
+      l_min = 0;
+      l_max = n1;
+      if (ppst->pam_x_set) {
+	l_min = max(0, -window-hoff);
+	l_max = min(n1, n0-hoff+window);
+      }
+
+      have_local_aa1 = 0;
+      local_aa1 = (unsigned char *)aa1;
+      if (l_min > 0 || l_max < n1 - 1) {
+	if (l_max - l_min < 0) {
+	  fprintf(stderr,"*** error [%s:%d] l_min: %d > l_max %d\n",__FILE__, __LINE__, l_min,l_max);
+	  exit(1);
+	}
+	if ((local_aa1 = (unsigned char *)calloc(l_max - l_min +2,sizeof(unsigned char *)))==NULL) {
+	  fprintf(stderr,"*** error [%s:%d] Cannot allocate local_aa1\n",__FILE__, __LINE__);
+	  exit(1);
+	}
+	  
+	local_aa1++;
+	memcpy(local_aa1,aa1+l_min, l_max - l_min);
+	have_local_aa1 = 1;
+      }
+      /*
+	if (l_min > 0) {
+	aa1_min_s = aa1[l_min-1];
+	aa1[l_min-1] = '\0';
+	}
+	if (l_max < n1 - 1) {
+	aa1_max_s = aa1[l_max];
+	aa1[l_max] = '\0';
+	}
+      */
+      cur_ares->sw_score = sw_walign(f_str->pam2p[0], n0, local_aa1, l_max - l_min,
+				     -ppst->gdelval, -ppst->ggapval,
+				     f_str->ss, cur_ares);
+
+      cur_ares->min1 += l_min;
+      cur_ares->max1 += l_min;
+      cur_ares->n1 += l_min;
+      if (have_local_aa1) {free(--local_aa1);}
+      /*
+	if (l_min > 0) { aa1[l_min-1] = aa1_min_s;}
+	if (l_max < n1 - 1) { aa1[l_max] = aa1_max_s;}
+      */
+    }
+    else {
+      cur_ares->sw_score = bd_walign(aa0, n0, aa1, n1, ppst, f_str, hoff, cur_ares);
+    }
+  }
+  else {
+    cur_ares->nres = 0;
+    cur_ares->sw_score=0;
+    cur_ares->rst.score[0] = cur_ares->rst.score[1] = cur_ares->rst.score[2] = 0;
+  }
+
+  /* check to see if a variant is better */
+
+  if (!ppst->do_rep || cur_ares->rst.score[score_ix] <= score_thresh) {return cur_ares;}
+
+  if (cur_ares->min1 >= min_alen) { /* try the left  */
+    /* allocate a_res */	
+    tmpl_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
+
+    local_aa1 = (unsigned char *)calloc(cur_ares->min1+2,sizeof(unsigned char));
+    local_aa1++;
+    memcpy(local_aa1,aa1,cur_ares->min1);
+    /*      
+	    save_res = aa1[cur_ares->min1-1];
+	    aa1[cur_ares->min1-1] = '\0';
+    */
+    tmpl_ares = bd_malign(aa0, n0, local_aa1, cur_ares->min1,
+			  score_thresh, max_res, 
+			  ppst, f_str, tmpl_ares,0);
+
+    free(--local_aa1);
+    /*
+      aa1[cur_ares->min1-1] = save_res;
+    */
+
+    /*      fprintf(stderr," bd_malign ret(%d-%d) left: %d-%d/%d-%d : %d\n",
+	    cur_ares->min1, cur_ares->max1,
+	    tmpl_ares->min0,tmpl_ares->max0,
+	    tmpl_ares->min1,tmpl_ares->max1,
+	    tmpl_ares->rst.score[score_ix]);
+    */
+
+    if (tmpl_ares->rst.score[score_ix] > score_thresh) {
+      max_sub_score = tmpl_ares->rst.score[score_ix];
+    }
+    else {
+      if (tmpl_ares->res) free(tmpl_ares->res);
+      free(tmpl_ares);
+      tmpl_ares=NULL;
+    }
+  }
+  else {tmpl_ares = NULL;}
+
+  local_n1 = n1 - cur_ares->max1;
+  if (local_n1 >= min_alen) { /* try the right  */      
+    /* allocate a_res */	
+    tmpr_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
+
+    /* find boundaries */
+    local_aa1 = (unsigned char *)calloc(local_n1+2, sizeof(unsigned char));
+    local_aa1++;
+    memcpy(local_aa1,aa1+cur_ares->max1, local_n1);
+    /*
+      save_res = aa1[cur_ares->max1-1];
+      aa1[cur_ares->max1-1] = '\0';
+    */
+    tmpr_ares = bd_malign(aa0, n0, local_aa1, local_n1,
+			  score_thresh, max_res,
+			  ppst, f_str, tmpr_ares,0);
+    free(--local_aa1);
+    /*
+      aa1[cur_ares->max1-1] = save_res;
+    */
+    /*       fprintf(stderr," bd_malign ret(%d-%d) right: %d-%d/%d-%d : %d\n",
+	     cur_ares->min1, cur_ares->max1,
+	     tmpr_ares->min0,tmpr_ares->max0,
+	     tmpr_ares->min1,tmpr_ares->max1,
+	     tmpr_ares->rst.score[score_ix]);
+    */
+
+    if (tmpr_ares->rst.score[score_ix] > score_thresh) {
+      /* adjust the left boundary */
+      for (this_ares = tmpr_ares; this_ares; this_ares = this_ares->next) {
+	this_ares->min1 += cur_ares->max1;
+	this_ares->max1 += cur_ares->max1;
+	last_ares=this_ares;
+      }
+      if (tmpr_ares->rst.score[score_ix] > max_sub_score) {
+	max_sub_score = tmpr_ares->rst.score[score_ix];
+      }
+    }
+    else {
+      if (tmpr_ares->res) free(tmpr_ares->res);
+      free(tmpr_ares);
+      tmpr_ares = NULL;
+    }
+  }
+  else {tmpr_ares = NULL;}
+
+  if (max_sub_score <= score_thresh) return cur_ares;
+
+  /*
+    fprintf(stderr, "lr: %d l: %d r:%d\n",
+    max_sub_score,
+    (tmpl_ares ? tmpl_ares->rst.score[score_ix] : 0),
+    (tmpr_ares ? tmpr_ares->rst.score[score_ix] : 0));
+  */
+
+  cur_ares = merge_ares_chains(cur_ares, tmpl_ares, score_ix, "left");
+  cur_ares = merge_ares_chains(cur_ares, tmpr_ares, score_ix, "right");
+
+  return cur_ares;
+}
+
+/* A PACKAGE FOR LOCALLY ALIGNING TWO SEQUENCES WITHIN A BAND:
+
+   To invoke, call LOCAL_ALIGN(A,B,M,N,L,U,W,G,H,S,dflag,&SI,&SJ,&EI,&EJ,MW).
+   The parameters are explained as follows:
+	A, B : two sequences to be aligned
+	M : the length of sequence A
+	N : the length of sequence B
+	L : lower bound of the band
+	U : upper bound of the band
+	W : scoring table for matches and mismatches
+	G : gap-opening penalty
+	H : gap-extension penalty
+	dflag : 0 - no display or backward pass
+	*SI : starting position of sequence A in the optimal local alignment
+	*SJ : starting position of sequence B in the optimal local alignment
+	*EI : ending position of sequence A in the optimal local alignment
+	*EJ : ending position of sequence B in the optimal local alignment
+	MW  : maximum window size
+*/
+
+int bd_walign (const unsigned char *aa0, int n0,
+	       const unsigned char *aa1, int n1,
+	       struct pstruct *ppst, 
+	       struct f_struct *f_str, int hoff,
+	       struct a_res_str *a_res)
+{
+   int low, up, score;
+   int min0, min1, max0, max1;
+   int window;
+
+   window = min (n1, ppst->param_u.fa.optwid);
+   if (window > f_str->bss_size) {
+     fprintf(stderr,"*** error [%s:%d] walign window [%d] out of range [%d]\n",
+	     __FILE__, __LINE__, window, f_str->bss_size);
+     window = f_str->bss_size - 4;
+   }
+
+   /* hoff is the offset found from aa1 to seq 2 by dmatch */
+   low = -window/2-hoff;
+   up = low+window;
+
+   score=LOCAL_ALIGN(aa0-1,aa1-1,n0,n1, low, up,
+		    ppst->pam2[0], -ppst->gdelval, -ppst->ggapval,
+		    &min0,&min1,&max0,&max1,ppst->param_u.fa.optwid,f_str);
+   a_res->n1 = n1;
+  
+   if (score <=0) {
+     fprintf(stderr,"*** [%s:%d] n0/n1: %d/%d hoff: %d window: %d\n",
+	     __FILE__, __LINE__, n0, n1, hoff, window);
+     return 0;
+   }
+  
+/*
+  fprintf(stderr," ALIGN: start0: %d start1: %d stop0: %d stop1: %d, bot: %d top: %d, win: %d MX %d\n",
+  min0-1,min1-1,max0-min0+1,max1-min1+1,low-(min1-min0),up-(min1-min0),
+  ppst->param_u.fa.optwid,n0);
+*/
+
+   a_res->min0 = min0-1; a_res->min1 = min1-1;
+   a_res->max0 = max0; a_res->max1 = max1; 
+
+   B_ALIGN(aa0-1+min0-1,aa1-1+min1-1,max0-min0+1,max1-min1+1,
+	   low-(min1-min0),up-(min1-min0),
+	   ppst->pam2[0], -ppst->gdelval, -ppst->ggapval,
+	   a_res->res,&a_res->nres,ppst->param_u.fa.optwid,n0,f_str->bss, &f_str->mtp);
+
+   return score;
+}
+
+static int
+LOCAL_ALIGN(const unsigned char *A, const unsigned char *B,
+	    int M, int N,
+	    int low, int up, int **W, int G,int H,
+	    int *psi, int *psj, int *pei, int *pej, int MW,
+	    struct f_struct *f_str)
+{ 
+  int band;
+  register struct bdstr *bssp;
+  int i, j, si, ei;
+  int c, d, e, t, m;
+  int leftd, rightd;
+  int best_score, starti, startj, endi, endj;
+  int *wa, curd;
+  int ib;
+  char flag;
+  
+  bssp = f_str->bss;
+
+  m = G+H;
+  low = max(-M, low);
+  up = min(N, up);
+  
+  if (N <= 0) { 
+    *psi = *psj = *pei = *pej;
+    return 0;
+  }
+  if (M <= 0) {
+    *psi = *psj = *pei = *pej;
+    return 0;
+  }
+  band = up-low+1;
+  if (band < 1) {
+    fprintf(stderr,"*** error [%s:%d] low > up is unacceptable!: M: %d N: %d l/u: %d/%d\n",
+	    __FILE__, __LINE__,  M, N, low, up);
+    return -1;
+  }
+
+  /* already done by init_work(); 
+  j = (MW + 2 + 2) * sizeof(struct bdstr);
+  if (f_str->bss==NULL) f_str->bss = (struct bdstr *) ckalloc(j);
+  */
+
+  if (low > 0) leftd = 1;
+  else if (up < 0) leftd = band;
+  else leftd = 1-low;
+  rightd = band;
+  si = max(0,-up);
+  ei = min(M,N-low);
+  bssp[leftd].CC = 0;
+  for (j = leftd+1; j <= rightd; j++) {
+    bssp[j].CC = 0;
+    bssp[j].DD = -G;
+  }
+  bssp[rightd+1].CC = MININT;
+  bssp[rightd+1].DD = MININT;
+  best_score = 0;
+  endi = si;
+  endj = si+low;
+  bssp[leftd-1].CC = MININT;
+  bssp[leftd].DD = -G;
+  for (i = si+1; i <= ei; i++) {
+    if (i > N-up) rightd--;
+    if (leftd > 1) leftd--;
+    wa = W[A[i]];
+    if ((c = bssp[leftd+1].CC-m) > (d = bssp[leftd+1].DD-H)) d = c;
+    if ((ib = leftd+low-1+i ) > 0) c = bssp[leftd].CC+wa[B[ib]];
+/*
+    if (ib > N) fprintf(stderr,"B[%d] out of range %d\n",ib,N);
+*/
+    if (d > c) c = d;
+    if (c < 0) c = 0;
+    e = c-G;
+    bssp[leftd].DD = d;
+    bssp[leftd].CC = c;
+    if (c > best_score) {
+      best_score = c;
+      endi = i;
+      endj = ib;
+    }
+    for (curd=leftd+1; curd <= rightd; curd++) {
+      if ((c = c-m) > (e = e-H)) e = c;
+      if ((c = bssp[curd+1].CC-m) > (d = bssp[curd+1].DD-H)) d = c;
+/*
+      if ((ib=curd+low-1+i) <= 0 || ib > N)
+	fprintf(stderr,"B[%d]:%d\n",ib,B[ib]);
+*/
+      c = bssp[curd].CC + wa[B[curd+low-1+i]];
+      if (e > c) c = e;
+      if (d > c) c = d;
+      if (c < 0) c = 0;
+      bssp[curd].CC = c;
+      bssp[curd].DD = d;
+      if (c > best_score) {
+	best_score = c;
+	endi = i;
+	endj = curd+low-1+i;
+      }
+    }
+  }
+  
+  leftd = max(1,-endi-low+1);
+  rightd = band-(up-(endj-endi));
+  bssp[rightd].CC = 0;
+  t = -G;
+  for (j = rightd-1; j >= leftd; j--) {
+    bssp[j].CC = t = t-H;
+    bssp[j].DD = t-G;
+  }
+  for (j = rightd+1; j <= band; ++j) bssp[j].CC = MININT;
+  bssp[leftd-1].CC = bssp[leftd-1].DD = MININT;
+  bssp[rightd].DD = -G;
+  flag = 0;
+  for (i = endi; i >= 1; i--) {
+    if (i+low <= 0) leftd++;
+    if (rightd < band) rightd++;
+    wa = W[A[i]];
+    if ((c = bssp[rightd-1].CC-m) > (d = bssp[rightd-1].DD-H)) d = c;
+    if ((ib = rightd+low-1+i) <= N) c = bssp[rightd].CC+wa[B[ib]];
+
+/*
+    if (ib <= 0) fprintf(stderr,"rB[%d] <1\n",ib);
+*/
+    if (d > c) c = d;
+    e = c-G;
+    bssp[rightd].DD = d;
+    bssp[rightd].CC = c;
+    if (c == best_score) {
+      starti = i;
+      startj = ib;
+      flag = 1;
+      break;
+    }
+    for (curd=rightd-1; curd >= leftd; curd--) {
+      if ((c = c-m) > (e = e-H)) e = c;
+      if ((c = bssp[curd-1].CC-m) > (d = bssp[curd-1].DD-H)) d = c;
+
+/*
+      if ((ib=curd+low-1+i) <= 0 || ib > N)
+	fprintf(stderr,"i: %d, B[%d]:%d\n",i,ib,B[ib]);
+*/
+      c = bssp[curd].CC + wa[B[curd+low-1+i]];
+      if (e > c) c = e;
+      if (d > c) c = d;
+      bssp[curd].CC = c;
+      bssp[curd].DD = d;
+      if (c == best_score) {
+	starti = i;
+	startj = curd+low-1+i;
+	flag = 1;
+	break;
+      }
+    }
+    if (flag == 1) break;
+  }
+  
+  if (starti < 0 || starti > M || startj < 0 || startj > N) {
+    printf("starti=%d, startj=%d\n",starti,startj);
+    *psi = *psj = *pei = *pej;
+    exit(1);
+  }
+  *psi = starti;
+  *psj = startj;
+  *pei = endi;
+  *pej = endj;
+  return best_score;
+}
+
+/* A PACKAGE FOR GLOBALLY ALIGNING TWO SEQUENCES WITHIN A BAND:
+
+   To invoke, call B_ALIGN(A,B,M,N,L,U,W,G,H,S,MW,MX).
+   The parameters are explained as follows:
+	A, B : two sequences to be aligned
+	M : the length of sequence A
+	N : the length of sequence B
+	L : lower bound of the band
+	U : upper bound of the band
+	W : scoring table for matches and mismatches
+	G : gap-opening penalty
+	H : gap-extension penalty
+	S : script for DISPLAY routine
+	MW : maximum window size
+	MX : maximum length sequence M to be aligned
+*/
+
+/* static int IP; */
+/* static int *MP[3]; */		/* save crossing points */
+/* static int *FP; */			/* forward dividing points */
+/* static char *MT[3]; */		/* 0: rep, 1: del, 2: ins */
+/* static char *FT; */
+
+#define gap(k)  ((k) <= 0 ? 0 : g+h*(k))	/* k-symbol indel cost */
+
+/* Append "Delete k" op */
+#define DEL(k)				\
+{ if (*last < 0)			\
+    *last = (*sapp)[-1] -= (k);		\
+  else {				\
+    *last = (*sapp)[0] = -(k);		\
+    (*sapp)++;				\
+  }					\
+}
+
+/* Append "Insert k" op */
+#define INS(k)				\
+{ if (*last > 0)			\
+    *last = (*sapp)[-1] += (k);		\
+  else {				\
+    *last = (*sapp)[0] = (k);		\
+    (*sapp)++;				\
+  }					\
+}
+
+#define REP { *last = (*sapp)[0] = 0; (*sapp)++;} /* Append "Replace" op */
+
+/* bg_align(A,B,M,N,up,low,tb,te) returns the cost of an optimum conversion between
+  A[1..M] and B[1..N] and appends such a conversion to the current script.
+  tb(te)= 1  no gap-open penalty if the conversion begins(ends) with a delete.
+  tb(te)= 2  no gap-open penalty if the conversion begins(ends) with an insert.
+*/
+static int
+bg_align(const unsigned char *A, const unsigned char *B, 
+	 int M, int N,
+	 int low, int up, int tb, int te,
+	 int **w, int g, int h,
+	 struct bdstr *bss, struct mtp_str *mtp,
+	 int **sapp, int *last)
+{
+  int rmid, k, l, r, v, kt;
+  int t1, t2, t3;
+
+  {
+  int band, midd;
+  int leftd, rightd;	/* for CC, DD, CP and DP */
+  register int curd;	/* current index for CC, DD CP and DP */
+  register int i, j;
+  register int c, d, e;
+  int t, fr, *wa, ib, m;
+
+  /* Boundary cases: M <= 0 , N <= 0, or up-low <= 0 */
+  if (N <= 0) { 
+    if (M > 0) { DEL(M) }
+    return 0;
+  }
+  if (M <= 0) {
+    INS(N)
+    return 0;
+  }
+  if ((band = up-low+1) <= 1) {
+    for (i = 1; i <= M; i++) { REP }
+    return 0;
+  }
+
+  /* Divide: Find all crossing points */
+
+  /* Initialization */
+  m = g + h;
+
+  midd = band/2 + 1;
+  rmid = low + midd - 1;
+  leftd = 1-low;
+  rightd = up-low+1;
+  if (leftd < midd) {
+    fr = -1;
+    for (j = 0; j < midd; j++) 
+      bss[j].CP = bss[j].DP = -1;
+    for (j = midd; j <= rightd; j++) {
+      bss[j].CP = bss[j].DP = 0;
+    }
+    mtp->MP[0][0] = -1;
+    mtp->MP[1][0] = -1;
+    mtp->MP[2][0] = -1;
+    mtp->MT[0][0] = mtp->MT[1][0] = mtp->MT[2][0] = 0;
+  } else if (leftd > midd) {
+    fr = leftd-midd;
+    for (j = 0; j <= midd; j++) {
+      bss[j].CP = bss[j].DP = fr;
+    }
+    for (j = midd+1; j <= rightd; j++) 
+      bss[j].CP = bss[j].DP = -1;
+    mtp->MP[0][fr] = -1;
+    mtp->MP[1][fr] = -1;
+    mtp->MP[2][fr] = -1;
+    mtp->MT[0][fr] = mtp->MT[1][fr] = mtp->MT[2][fr] = 0;
+  } else {
+    fr = 0;
+    for (j = 0; j < midd; j++) {
+      bss[j].CP = bss[j].DP = 0;
+    }
+    for (j = midd; j <= rightd; j++) {
+      bss[j].CP = bss[j].DP = 0;
+    }
+    mtp->MP[0][0] = -1;
+    mtp->MP[1][0] = -1;
+    mtp->MP[2][0] = -1;
+    mtp->MT[0][0] = mtp->MT[1][0] = mtp->MT[2][0] = 0;
+  }
+
+  bss[leftd].CC = 0;
+  if (tb == 2) t = 0;
+  else t = -g;
+  for (j = leftd+1; j <= rightd; j++) {
+    bss[j].CC = t = t-h;
+    bss[j].DD = t-g;
+  }
+  bss[rightd+1].CC = MININT;
+  bss[rightd+1].DD = MININT;
+  if (tb == 1) bss[leftd].DD = 0;
+  else bss[leftd].DD = -g;
+  bss[leftd-1].CC = MININT;
+  for (i = 1; i <= M; i++) {
+    if (i > N-up) rightd--;
+    if (leftd > 1) leftd--;
+    wa = w[A[i]];
+    if ((c = bss[leftd+1].CC-m) > (d = bss[leftd+1].DD-h)) {
+      d = c;
+      bss[leftd].DP = bss[leftd+1].CP;
+    } else bss[leftd].DP = bss[leftd+1].DP;
+    if ((ib = leftd+low-1+i) > 0) c = bss[leftd].CC+wa[B[ib]];
+    if (d > c || ib <= 0) {
+      c = d;
+      bss[leftd].CP = bss[leftd].DP;
+    }
+    e = c-g;
+    bss[leftd].DD = d;
+    bss[leftd].CC = c;
+    mtp->IP = bss[leftd].CP;
+    if (leftd == midd) bss[leftd].CP = bss[leftd].DP = mtp->IP = i;
+    for (curd=leftd+1; curd <= rightd; curd++) {
+      if (curd != midd) {
+	if ((c = c-m) > (e = e-h)) {
+	  e = c;
+	  mtp->IP = bss[curd-1].CP;
+	}  /* otherwise, mtp->IP is unchanged */
+	if ((c = bss[curd+1].CC-m) > (d = bss[curd+1].DD-h)) {
+	  d = c;
+	  bss[curd].DP = bss[curd+1].CP;
+	} else {
+	  bss[curd].DP = bss[curd+1].DP;
+	}
+	c = bss[curd].CC + wa[B[curd+low-1+i]];
+	if (c < d || c < e) {
+	  if (e > d) {
+	    c = e;
+	    bss[curd].CP = mtp->IP;
+	  } else {
+	    c = d;
+	    bss[curd].CP = bss[curd].DP;
+	  }
+	} /* otherwise, CP is unchanged */
+	bss[curd].CC = c;
+	bss[curd].DD = d;
+      } else {
+	if ((c = c-m) > (e = e-h)) {
+	  e = c;
+	  mtp->MP[1][i] = bss[curd-1].CP;
+	  mtp->MT[1][i] = 2;
+	} else {
+	  mtp->MP[1][i] = mtp->IP;
+	  mtp->MT[1][i] = 2;
+	}
+	if ((c = bss[curd+1].CC-m) > (d = bss[curd+1].DD-h)) {
+	  d = c;
+	  mtp->MP[2][i] = bss[curd+1].CP;
+	  mtp->MT[2][i] = 1;
+	} else {
+	  mtp->MP[2][i] = bss[curd+1].DP;
+	  mtp->MT[2][i] = 1;
+	}
+	c = bss[curd].CC + wa[B[curd+low-1+i]];
+	if (c < d || c < e) {
+	  if (e > d) {
+	    c = e;
+	    mtp->MP[0][i] = mtp->MP[1][i];
+	    mtp->MT[0][i] = 2;
+	  } else {
+	    c = d;
+	    mtp->MP[0][i] = mtp->MP[2][i];
+	    mtp->MT[0][i] = 1;
+	  }
+	} else {
+	  mtp->MP[0][i] = i-1;
+	  mtp->MT[0][i] = 0;
+	}
+	if (c-g > e) {
+	  mtp->MP[1][i] = mtp->MP[0][i];
+	  mtp->MT[1][i] = mtp->MT[0][i];
+	}
+	if (c-g > d) {
+	  mtp->MP[2][i] = mtp->MP[0][i];
+	  mtp->MT[2][i] = mtp->MT[0][i];
+	}
+	bss[curd].CP = bss[curd].DP = mtp->IP = i;
+	bss[curd].CC = c;
+	bss[curd].DD = d;
+      }
+    }
+  }
+
+  /* decide which path to be traced back */
+  if (te == 1 && d+g > c) {
+    k = bss[rightd].DP;
+    l = 2;
+  } else if (te == 2 && e+g > c) {
+    k = mtp->IP;
+    l = 1;
+  } else {
+    k = bss[rightd].CP;
+    l = 0;
+  }
+  if (rmid > N-M) l = 2;
+  else if (rmid < N-M) l = 1;
+  v = c;
+  }
+  /* Conquer: Solve subproblems recursively */
+
+  /* trace back */
+  r = -1;	
+  for (; k > -1; r=k, k=mtp->MP[l][r], l=mtp->MT[l][r]){
+    mtp->FP[k] = r;
+    mtp->FT[k] = l;	/* l=0,1,2 */
+  }
+  /* forward dividing */
+  if (r == -1) { /* optimal alignment did not cross the middle diagonal */
+    if (rmid < 0) {
+      bg_align(A,B,M,N,rmid+1,up,tb,te,w,g,h,bss, mtp, sapp, last);
+    }
+    else {
+      bg_align(A,B,M,N,low,rmid-1,tb,te,w,g,h,bss, mtp, sapp, last);
+    }
+  } else {
+    k = r;
+    l = mtp->FP[k];
+    kt = mtp->FT[k];
+
+    /* first block */
+    if (rmid < 0) {
+      bg_align(A,B,r-1,r+rmid,rmid+1,min(up,r+rmid),tb,1,w,g,h,bss, mtp, sapp,last);
+      DEL(1)
+    } else if (rmid > 0) {
+      bg_align(A,B,r,r+rmid-1,max(-r,low),rmid-1,tb,2,w,g,h,bss, mtp, sapp,last);
+      INS(1)
+    }
+
+    /* intermediate blocks */
+    t2 = up-rmid-1;
+    t3 = low-rmid+1;
+    for (; l > -1; k = l, l = mtp->FP[k], kt = mtp->FT[k]) {
+      if (kt == 0) { REP }
+      else if (kt == 1) { /* right-hand side triangle */
+	INS(1)
+	t1 = l-k-1;
+	bg_align(A+k,B+k+rmid+1,t1,t1,0,min(t1,t2),2,1,w,g,h,bss, mtp, sapp,last);
+	DEL(1)
+      }
+      else { /* kt == 2, left-hand side triangle */
+	DEL(1)
+	t1 = l-k-1;
+	bg_align(A+k+1,B+k+rmid,t1,t1,max(-t1,t3),0,1,2,w,g,h,bss, mtp, sapp,last);
+	INS(1)
+      }
+    }
+
+    /* last block */
+    if (N-M > rmid) {
+      INS(1)
+      t1 = k+rmid+1;
+      bg_align(A+k,B+t1,M-k,N-t1,0,min(N-t1,t2),2,te,w,g,h,bss, mtp, sapp,last);
+    } else if (N-M < rmid) {
+      DEL(1)
+      t1 = M-(k+1);
+      bg_align(A+k+1,B+k+rmid,t1,N-(k+rmid),max(-t1,t3),0,1,te,w,g,h,
+	       bss,mtp,sapp,last);
+    }
+  }
+  return(v);
+}
+
+int B_ALIGN(const unsigned char *A, const unsigned char *B,
+	    int M, int N,
+	    int low, int up, int **W, int G, int H, int *S, int *nS,
+	    int MW, int MX, struct bdstr *bss, struct mtp_str *mtp)
+{ 
+  int c, i, j;
+  int g, h;
+  size_t mj;
+  int check_score;
+  int **sapp, *sapp_v, *last, last_v;
+
+  g = G;
+  h = H;
+  sapp_v = S;
+  sapp = &sapp_v;
+
+  last_v = 0;
+  last = &last_v;
+
+  low = min(max(-M, low),min(N-M,0));
+  up = max(min(N, up),max(N-M,0));
+
+  if (N <= 0) { 
+    if (M > 0) { DEL(M); }
+    return -gap(M);
+  }
+  if (M <= 0) {
+    INS(N);
+    return -gap(N);
+  }
+  if (up-low+1 <= 1) {
+    c = 0;
+    for (i = 1; i <= M; i++) {
+      REP;
+      c += W[A[i]][B[i]];
+    }
+    return c;
+  }
+
+  if (mtp->MT[0]==NULL) {
+    mj = (MX+1) * sizeof(int);
+    mtp->MT[0] = (int *) ckalloc(mj);
+    mtp->MT[1] = (int *) ckalloc(mj);
+    mtp->MT[2] = (int *) ckalloc(mj);
+    mtp->FT = (int *) ckalloc(mj);
+
+    /* mj *= sizeof(int); (already done) */
+    mtp->MP[0] = (int *) ckalloc(mj);
+    mtp->MP[1] = (int *) ckalloc(mj);
+    mtp->MP[2] = (int *) ckalloc(mj);
+    mtp->FP = (int *) ckalloc(mj);
+  }
+
+  c = bg_align(A,B,M,N,low,up,0,0,W,G,H,bss, mtp, sapp, last);
+
+  check_score = BCHECK_SCORE(A,B,M,N,S,W,G,H,nS);
+
+  free(mtp->FP); free(mtp->MP[2]); free(mtp->MP[1]); free(mtp->MP[0]);
+  free(mtp->FT); free(mtp->MT[2]); free(mtp->MT[1]); free(mtp->MT[0]);
+  mtp->MT[0]=NULL;
+
+  if (check_score != c)
+    printf("\nBCheck_score=%d != %d\n", check_score,c);
+  return c;
+}
+
+struct a_res_str *
+do_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, int repeat_thresh,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   int *have_ares)
+{
+  int hoff, use_E_thresholds_s, optflag_s, optcut_s, optwid_s, n10, score;
+  const unsigned char *aa1p;
+  struct rstruct rst;
+  struct a_res_str *a_res, *tmp_a_res;
+  int a_res_index;
+#ifdef DEBUG
+  unsigned long adler32_crc;
+#endif
+
+#ifdef TFASTA  
+  f_str->n10 = n10=aatran(aa1,f_str->aa1x,n1,frame);
+  aa1p = f_str->aa1x;
+#else
+  n10 = n1;
+  aa1p = aa1;
+#endif
+
+#ifdef DEBUG
+  adler32_crc = adler32(1L,aa1,n1);
+#endif
+
+  *have_ares = 0x3;	/* set 0x2 bit to indicate local copy */
+
+  if ((a_res = (struct a_res_str *)calloc(1, sizeof(struct a_res_str)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] Cannot allocate a_res", __FILE__, __LINE__);
+    return NULL;
+  }
+
+  use_E_thresholds_s = ppst->param_u.fa.use_E_thresholds;
+  optflag_s = ppst->param_u.fa.optflag;
+  optcut_s = ppst->param_u.fa.optcut;
+  optwid_s = ppst->param_u.fa.optwid;
+  ppst->param_u.fa.use_E_thresholds = 0;
+  ppst->param_u.fa.optflag = 1;
+  ppst->param_u.fa.optcut = 0;
+  if (!ppst->param_u.fa.optwid_set) {
+    ppst->param_u.fa.optwid *= 2;
+  }
+
+  a_res = bd_malign(aa0, n0, aa1p, n10,
+		    repeat_thresh, f_str->max_res,
+		    ppst, f_str, a_res,1);
+
+  ppst->param_u.fa.use_E_thresholds = use_E_thresholds_s;
+  ppst->param_u.fa.optflag = optflag_s;
+  ppst->param_u.fa.optcut = optcut_s;
+  ppst->param_u.fa.optwid = optwid_s;
+
+#ifdef DEBUG
+  if (adler32(1L,aa1,n1) != adler32_crc) {
+    fprintf(stderr,"*** error [%s:%d] adler32_crc mismatch n1: %d\n",__FILE__, __LINE__, n1);
+  }
+#endif
+
+  a_res_index = 0;
+  for (tmp_a_res=a_res; tmp_a_res; tmp_a_res = tmp_a_res->next) {
+    tmp_a_res->index = a_res_index++;
+  }
+
+  return a_res;
+}
+
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+
+#ifdef TFASTA
+  f_str->n10 = aatran(aa1,f_str->aa1x,n1,frame);
+#endif
+}
+
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void 
+aln_func_vals(int frame, struct a_struct *aln) {
+
+#ifdef TFASTA
+  aln->qlfact = 1;
+  aln->llfact = 3;
+  aln->llmult = 3;
+  aln->qlrev = 0;
+  aln->frame = frame;
+  if (frame > 2) {
+    aln->llrev = 1;
+    aln->frame = 3 - frame;
+  }
+  else aln->llrev = 0;
+#else	/* FASTA */
+  aln->llfact = aln->qlfact = aln->llmult = 1;
+  aln->llrev = 0;
+  if (frame > 0) aln->qlrev = 1;
+  else aln->qlrev = 0;
+  aln->frame = 0;
+#endif
+}
diff --git a/src/dropnfa.h b/src/dropnfa.h
new file mode 100644
index 0000000..09fe455
--- /dev/null
+++ b/src/dropnfa.h
@@ -0,0 +1,84 @@
+
+/* $Id: dropnfa.h 795 2011-07-04 15:36:12Z wrp $ */
+/* $Revision: 795 $  */
+
+/* global definitions shared by dropnfa.c and altivec.c */
+
+#ifndef MAXSAV
+#define MAXSAV 10
+#endif
+
+
+
+struct dstruct		/* diagonal structure for saving current run */
+{			
+   int     score;	/* hash score of current match */
+   int     start;	/* start of current match */
+   int     stop;	/* end of current match */
+   struct savestr *dmax;   /* location in vmax[] where best score data saved */
+};
+
+struct savestr
+{
+   int     score;		/* pam score with segment optimization */
+   int     score0;		/* pam score of best single segment */
+   int     gscore;		/* score from global match */
+   int     dp;			/* diagonal of match */
+   int     start;		/* start of match in lib seq */
+   int     stop;		/* end of match in lib seq */
+};
+
+struct bdstr {
+  int CC, DD, CP, DP;
+};
+
+struct mtp_str {		/* used to hold previous static values */
+  int IP;
+  int *MP[3];			/* save crossing points */
+  int *FP;			/* forward dividing points */
+  int *MT[3];			/* 0: rep, 1: del, 2: ins  -- was char, now int */
+  int *FT;			/* was char, now int */
+};
+
+struct f_struct {
+  struct dstruct *diag;
+  int ndo;
+  int hmask;			/* hash constants */
+  int *pamh1;			/* pam based array */
+  int *pamh2;			/* pam based kfact array */
+  int *link, *harr;		/* hash arrays */
+  int kshft;			/* shift width */
+  int c_gap, opt_cut;
+#ifdef TFASTA
+  unsigned char *aa1x;
+  int n10;
+#endif
+  struct bdstr *bss;
+  struct mtp_str mtp;
+  int bss_size;
+  struct swstr *ss;
+  struct swstr *f_ss, *r_ss;
+  int *waa_s, *waa_a;
+  int **pam2p[2];
+  int max_res;
+  double aa0_f[MAXSQ];
+  double *kar_p;
+
+#ifdef FA_ALTIVEC
+  int vec_len;
+  vecInt **vec_matrix;
+  vector signed ALTIVEC_SIZE *vec_HH;
+  vector signed ALTIVEC_SIZE *vec_EE;
+
+  int vec_len2;
+  vecInt2 **vec_matrix2;
+  vector signed ALTIVEC_SIZE2 *vec_HH2;
+  vector signed ALTIVEC_SIZE2 *vec_EE2;
+#endif
+};
+
+static int
+FLOCAL_ALIGN(const unsigned char *A, const unsigned char *B,
+	     int M, int N, int low, int up,
+	     int **W, int G,int H, int MW,
+	     struct f_struct *f_str);
diff --git a/src/dropnnw2.c b/src/dropnnw2.c
new file mode 100644
index 0000000..a1112e6
--- /dev/null
+++ b/src/dropnnw2.c
@@ -0,0 +1,900 @@
+/* $Id: dropnnw2.c $ */
+/* $Revision: 1140 $  */
+
+/* copyright (c) 1996, 2007, 2014 by William R. Pearson and The Rector &
+   Visitors of the Univeristy of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* 4-April-2007 - convert to global alignment */
+
+/* 17-Aug-2006 - removed globals *sapp/last - alignment should be thread safe */
+
+/* 12-Oct-2005 - converted to use a_res and aln for alignment coordinates */
+
+/* 4-Nov-2004 - Diagonal Altivec Smith-Waterman included */
+
+/* 14-May-2003 - modified to return alignment start at 0, rather than
+   1, for begin:end alignments
+
+   25-Feb-2003 - modified to support Altivec parallel Smith-Waterman
+
+   22-Sep-2003 - removed Altivec support at request of Sencel lawyers
+*/
+
+/* this code uses an implementation of the Smith-Waterman algorithm
+   designed by Phil Green, U. of Washington, that is 1.5 - 2X faster
+   than my Miller and Myers implementation. */
+
+/* the shortcuts used in this program prevent it from calculating scores
+   that are less than the gap penalty for the first residue in a gap. As
+   a result this code cannot be used with very large gap penalties, or
+   with very short sequences, and probably should not be used with prss3.
+*/
+
+/* version 3.2 fixes a subtle bug that was encountered while running
+   do_walign() interspersed with do_work().  This happens only with -m
+   9 and pvcomplib.  The fix was to more explicitly zero-out ss[] at
+   the beginning of do_work.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+
+#include "defs.h"
+#include "param.h"
+
+static char *verstr="6.0 April 2007";
+
+#include "dropgsw2.h"
+
+#define DROP_INTERN
+#include "drop_func.h"
+
+#ifdef SW_SSE2
+#ifdef GLOBAL_GLOBAL
+#include "global_sse2.h"
+#define GLOBAL_BYTE global_sse2_byte
+#define GLOBAL_WORD global_sse2_word
+#else
+#include "glocal_sse2.h"
+#define GLOBAL_BYTE glocal_sse2_byte
+#define GLOBAL_WORD glocal_sse2_word
+#endif
+#endif
+
+struct swstr {int H, E;};
+
+extern void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst,
+			double *aa0_f, double **kp);
+extern int do_karlin(const unsigned char *aa1, int n1,
+		     int **pam2, const struct pstruct *ppst,
+		     double *aa0_f, double *kar_p, double *lambda, double *H);
+
+extern int
+NW_ALIGN(int IW, const unsigned char *B,
+	 int M, int N,
+	 int **W, int G, int H, int *res, int *nres
+	 );
+
+static int
+FGLOBAL_ALIGN(int *pwaa, const unsigned char *aa1,
+	      int n0, int n1,
+	      int GG,int HH,
+	      struct swstr *ss);
+
+extern struct a_res_str *
+nsw_malign (int ***pam2p, int pam_ix, int n0,
+	    const unsigned char *aa1, int n1,
+	    int score_thresh, int max_res,
+	    int gdelval, int ggapval, 
+	    struct swstr *ss, 
+	    struct a_res_str *cur_ares,
+	    int (*fn_walign)
+	    (
+	     int **pam2p, int n0,
+	     const unsigned char *aa1, int n1,
+	     int q, int r,
+	     struct swstr *ss,
+	     struct a_res_str *a_res
+	     ),
+	    int do_rep
+	    );
+
+static 
+void DISPLAY(const unsigned char *A, const unsigned char *B, 
+	     int M, int N,
+	     int *S, int AP, int BP, char *sq);
+
+extern void aancpy(char *to, char *from, int count, struct pstruct *ppst);
+
+/* initialize for Smith-Waterman optimal score */
+
+void
+init_work (unsigned char *aa0, int n0,
+	   struct pstruct *ppst,
+	   struct f_struct **f_arg)
+{
+  int maxn0, ip;
+  int *pwaa_s, *pwaa_a;
+  int e, f, i, j, l;
+  int *res;
+  struct f_struct *f_str;
+  int **pam2p;
+  struct swstr *ss;
+  int nsq;
+
+#if defined(SW_ALTIVEC) || defined(SW_SSE2)
+  int data, bias, ceiling, gap;
+  unsigned char *  pc;
+  unsigned short * ps;
+  int  overflow;
+
+  int n_count;
+  int col_len;
+#endif
+
+  if (ppst->ext_sq_set) {
+    nsq = ppst->nsqx; ip = 1;
+  }
+  else {
+    /* with memory mapped databases with lc chars, always nsqx */
+    nsq = ppst->nsqx; ip = 0;
+  }
+
+  /* initialize range of length appropriate */
+
+  if (ppst->n1_low == 0 ) {
+    ppst->n1_low = (int)(0.75 * (float)n0 + 0.5);
+  }
+
+#if defined(GLOBAL_GLOBAL)
+  if (ppst->n1_high == BIGNUM) {
+    ppst->n1_high = (int)(1.33 * (float)n0 - 0.5);
+  }
+#endif
+
+  /* allocate space for function globals */
+  f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+
+  if((ppst->zsflag%10) == 6) {
+    f_str->kar_p = NULL;
+    init_karlin(aa0, n0, ppst, &f_str->aa0_f[0], &f_str->kar_p);
+  }
+  
+  /* allocate space for the scoring arrays */
+  if ((ss = (struct swstr *) calloc (n0+2, sizeof (struct swstr)))
+      == NULL) {
+    fprintf (stderr, "cannot allocate ss array %3d\n", n0);
+    exit (1);
+  }
+  ss++;
+
+  f_str->ss = ss;
+
+  /* initialize variable (-S) pam matrix */
+  if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+    fprintf(stderr,"cannot allocate waa_s array %3d\n",nsq*n0);
+    exit(1);
+  }
+
+  /* initialize pam2p[1] pointers */
+  if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
+    fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
+    exit(1);
+  }
+
+  pam2p = f_str->pam2p[1];
+  if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+    fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
+    exit(1);
+  }
+
+  for (i=1; i<n0; i++) {
+    pam2p[i]= pam2p[0] + (i*(nsq+1));
+  }
+
+  /* initialize universal (alignment) matrix */
+  if ((f_str->waa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+    fprintf(stderr,"cannot allocate waa_a struct %3d\n",nsq*n0);
+    exit(1);
+  }
+   
+  /* initialize pam2p[0] pointers */
+  if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
+    fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
+    exit(1);
+  }
+
+  pam2p = f_str->pam2p[0];
+  if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+    fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
+    exit(1);
+  }
+
+  for (i=1; i<n0; i++) {
+    pam2p[i]= pam2p[0] + (i*(nsq+1));
+  }
+
+  /* 
+     pwaa effectively has a sequence profile --
+     pwaa[0..n0-1] has pam score for residue 0 (-BIGNUM)
+     pwaa[n0..2n0-1] has pam scores for residue 1 (A)
+     pwaa[2n0..3n-1] has pam scores for residue 2 (R), ...
+
+     thus: pwaa = f_str->waa_s + (*aa1p++)*n0; sets up pwaa so that
+     *pwaa++ rapidly moves though the scores of the aa1p[] position
+     without further indexing
+
+     For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have
+     a different score in each position.
+  */
+
+  pwaa_s = f_str->waa_s;
+  pwaa_a = f_str->waa_a;
+  if (ppst->pam_pssm) {
+    for (e = 0; e <nsq; e++)	{	/* for each residue in the alphabet */
+      for (f = 0; f < n0; f++) {	/* for each position in aa0 */
+	*pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e];
+	*pwaa_a++ = f_str->pam2p[0][f][e]  = ppst->pam2p[0][f][e];
+      }
+    }
+  }
+  else {	/* initialize scanning matrix */
+    for (e = 0; e <nsq; e++)	/* for each residue in the alphabet */
+      for (f = 0; f < n0; f++)	{	/* for each position in aa0 */
+	*pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][aa0[f]][e];
+	*pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][aa0[f]][e];
+      }
+  }
+
+  /* these structures are used for producing alignments */
+
+#if defined(SW_SSE2)
+  /* First we allocate memory for the workspace - i.e. two rows for H and
+   * one row for F.  We also need enough space to hold a temporary
+   * scoring profile which will be query_length * 16 (sse2 word length).
+   * Since this might be run on Linux or AIX too, we don't assume 
+   * anything about the memory allocation but align it ourselves.
+   */
+  f_str->workspace_memory  = (void *)malloc(3*16*(MAXTST+MAXLIB+32)+256);
+  f_str->workspace  = (void *)((((size_t)f_str->workspace_memory) + 255) & (~0xff));
+
+  /* We always use a scoring profile for the SSE2 implementation, but the layout
+   * is a bit strange.  The scoring profile is parallel to the query, but is
+   * accessed in a stripped pattern.  The query is divided into equal length
+   * segments.  The number of segments is equal to the number of elements
+   * processed in the SSE2 register.  For 8-bit calculations, the query will
+   * be divided into 16 equal length parts.  If the query is not long enough
+   * to fill the last segment, it will be filled with neutral weights.  The
+   * first element in the SSE register will hold a value from the first segment,
+   * the second element of the SSE register will hold a value from the
+   * second segment and so on.  So if the query length is 288, then each
+   * segment will have a length of 18.  So the first 16 bytes will  have
+   * the following weights: Q1, Q19, Q37, ... Q271; the next 16 bytes will
+   * have the following weights: Q2, Q20, Q38, ... Q272; and so on until
+   * all parts of all segments have been written.  The last seqment will
+   * have the following weights: Q18, Q36, Q54, ... Q288.  This will be
+   * done for the entire alphabet.
+   */
+
+  f_str->word_score_memory = (void *)malloc((n0 + 32) * sizeof(short) * (nsq + 1) + 256);
+  f_str->byte_score_memory = (void *)malloc((n0 + 32) * sizeof(char) * (nsq + 1) + 256);
+
+  f_str->word_score = (unsigned short *)((((size_t)f_str->word_score_memory) + 255) & (~0xff));
+  f_str->byte_score = (unsigned char *)((((size_t)f_str->byte_score_memory) + 255) & (~0xff));
+
+  overflow = 0;
+  gap = -2 * ppst->ggapval;
+
+  if (ppst->pam_pssm) {
+    /* Use a position-specific scoring profile. 
+     * This is essentially what we are going to construct anyway, but we'll
+     * reorder it to suit sse2.
+     */       
+    bias = 127;
+    ceiling = 0;
+    for (i = 1; i < nsq ; i++) {
+      for (j = 0; j < n0 ; j++) {
+        data = ppst->pam2p[ip][j][i];
+        if (data < bias) {
+          bias = data;
+        }
+        if (data > ceiling) {
+          ceiling = data;
+        }
+      }
+    }
+    bias += gap;
+    if (bias > 0) {
+      bias = 0;
+    }
+
+
+    /* Fill our specially organized byte- and word-size scoring arrays. */
+    ps = f_str->word_score;
+    col_len = (n0 + 7) / 8;
+    n_count = (n0 + 7) & 0xfffffff8;
+    for (f = 0; f < n_count; ++f) {
+      *ps++ = 0;
+    }
+    for (f = 1; f < nsq ; f++) {
+      for (e = 0; e < col_len; e++) {
+        for (i = e; i < n_count; i += col_len) {
+          if ( i < n0) { data = ppst->pam2p[ip][i][f] + gap;}
+          else {data = 0;}
+          *ps++ = (unsigned short)(data);
+        }
+      }
+    }
+    pc = f_str->byte_score;
+    col_len = (n0 + 15) / 16;
+    n_count = (n0 + 15) & 0xfffffff0;
+    for (f = 0; f < n_count; ++f) {
+      *pc++ = 0;
+    }
+    for (f = 1; f < nsq ; f++) {
+      for (e = 0; e < col_len; e++) {
+        for (i = e; i < n_count; i += col_len) {
+          if ( i < n0 ) { data = ppst->pam2p[ip][i][f] + gap;}
+          else {data = 0;}
+          if (data > 255) {
+            printf("Fatal error. data: %d bias: %d, position: %d/%d, "
+                   "Score out of range for 8-bit SSE2 datatype.\n",
+                   data, bias, f, e);
+            exit(1);
+          }
+          *pc++ = (unsigned char)(data-bias);
+        }
+      }
+    }
+  } else {
+    /* Classical simple substitution matrix */
+    /* Find the bias to use in the substitution matrix */
+    bias = 127;
+    ceiling = 0;
+    for (i = 1; i < nsq ; i++) {
+      for (j = 1; j < nsq ; j++) {
+        data = ppst->pam2[ip][i][j];
+        if (data < bias) {
+          bias = data;
+        }
+        if (data > ceiling) {
+          ceiling = data;
+        }
+      }
+    }
+    bias += gap;
+    if (bias > 0) {
+      bias = 0;
+    }
+
+    /* Fill our specially organized byte- and word-size scoring arrays. */
+    ps = f_str->word_score;
+    col_len = (n0 + 7) / 8;
+    n_count = (n0 + 7) & 0xfffffff8;
+    for (f = 0; f < n_count; ++f) {
+      *ps++ = 0;
+    }
+    for (f = 1; f < nsq ; f++) {
+      for (e = 0; e < col_len; e++) {
+        for (i = e; i < n_count; i += col_len) {
+          if (i >= n0) {
+            data = 0;
+          } else {
+            data = ppst->pam2[ip][aa0[i]][f] + gap;
+          }
+          *ps++ = (unsigned short)(data);
+        }
+      }
+    }
+
+    pc = f_str->byte_score;
+    col_len = (n0 + 15) / 16;
+    n_count = (n0 + 15) & 0xfffffff0;
+    for (f = 0; f < n_count; ++f) {
+      *pc++ = 0;
+    }
+    for (f = 1; f < nsq ; f++) {
+      for (e = 0; e < col_len; e++) {
+        for (i = e; i < n_count; i += col_len) {
+          if (i >= n0) {
+            data = 0;
+          } else {
+            data = ppst->pam2[ip][aa0[i]][f] + gap;
+          }
+          if (data > 255) {
+            printf("Fatal error. data: %d bias: %d, position: %d/%d, "
+                   "Score out of range for 8-bit SSE2 datatype.\n",
+                   data, bias, f, e);
+            exit(1);
+          }
+          *pc++ = (unsigned char)(data-bias);
+        }
+      }
+    }
+  }
+       
+  f_str->ceiling = (unsigned char) (ceiling + gap - bias);
+  f_str->bias = (unsigned char) (-bias);
+  f_str->alphabet_size = nsq;
+
+  /* Some variable to keep track of how many 8-bit runs we need to rerun
+   * in 16-bit accuracy. If there are too many reruns it can be faster
+   * to use 16-bit alignments directly. 
+   */
+   
+  /* We can only do 8-bit alignments if the scores were small enough. */
+  f_str->try_8bit = (overflow == 0) ? 1 : 0;
+
+  f_str->done_8bit  = 0;
+  f_str->done_16bit = 0;
+#endif /* SW_SSE2 */
+
+  /* minimum allocation for alignment */
+  f_str->max_res = max(3*n0/2,MIN_RES);
+
+  *f_arg = f_str;
+}
+
+void close_work (const unsigned char *aa0, int n0,
+		 struct pstruct *ppst,
+		 struct f_struct **f_arg)
+{
+  struct f_struct *f_str;
+
+  f_str = *f_arg;
+
+  if (f_str != NULL) {
+    if (f_str->kar_p !=NULL) free(f_str->kar_p);
+    f_str->ss--;
+    free(f_str->ss);
+    free(f_str->waa_a);
+    free(f_str->pam2p[0][0]);
+    free(f_str->pam2p[0]);
+    free(f_str->waa_s);
+    free(f_str->pam2p[1][0]);
+    free(f_str->pam2p[1]);
+
+#if defined(SW_ALTIVEC) || defined(SW_SSE2)
+    free(f_str->workspace_memory);
+    free(f_str->word_score_memory);
+    free(f_str->byte_score_memory);
+#endif
+    free(f_str);
+    *f_arg = NULL;
+  }
+}
+
+
+/* pstring1 is a message to the manager, currently 512 */
+/*void get_param(struct pstruct *pstr,char *pstring1)*/
+void
+get_param (const struct pstruct *ppst,
+	   char **pstring1, char *pstring2,
+	   struct score_count_s *s_info)
+{
+
+  char pg_str[120];
+  char psi_str[120];
+
+#ifdef SW_SSE2
+#if defined(GLOBAL_GLOBAL) 
+  char *pg_desc = "Global/Global affine Needleman-Wunsch (SSE2, Michael Farrar 2010)";
+#else
+  char *pg_desc = "Global/Local affine Needleman-Wunsch (SSE2, Michael Farrar 2010)";
+#endif
+#else
+#if defined(GLOBAL_GLOBAL) 
+  char *pg_desc = "Global/Global affine Needleman-Wunsch (2007)";
+#else
+  char *pg_desc = "Global/Local affine Needleman-Wunsch (2007)";
+#endif
+#endif
+
+  strncpy(pg_str, pg_desc,  sizeof(pg_str));
+
+  if (ppst->pam_pssm) { strncpy(psi_str,"-PSI",sizeof(psi_str));}
+  else { psi_str[0]='\0';}
+
+  sprintf (pstring1[0], "%s (%s)", pg_str, verstr);
+  sprintf (pstring1[1], 
+#ifdef OLD_FASTA_GAP
+	   "%s matrix%s (%d:%d)%s, gap-penalty: %d/%d",
+#else
+	   "%s matrix%s (%d:%d)%s, open/ext: %d/%d",
+#endif
+	   ppst->pam_name, psi_str, ppst->pam_h,ppst->pam_l, 
+	   (ppst->ext_sq_set)?"xS":"\0", ppst->gdelval, ppst->ggapval);
+
+   if (pstring2 != NULL) {
+#ifdef OLD_FASTA_GAP
+     sprintf(pstring2,"; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_gap-pen: %d %d\n",
+#else
+     sprintf(pstring2,"; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_open-ext: %d %d\n",
+#endif
+	     pg_str,verstr,psi_str,ppst->pam_h,ppst->pam_l, 
+	     (ppst->ext_sq_set)?"xS":"\0",ppst->gdelval,ppst->ggapval);
+   }
+}
+
+void do_work (const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      int frame,
+	      const struct pstruct *ppst, struct f_struct *f_str,
+	      int qr_flg, int shuff_flg, struct rstruct *rst,
+	      struct score_count_s *s_info)
+{
+  int     score;
+  double lambda, H;
+  int i;
+  
+  rst->valid_stat = 1;
+  s_info->s_cnt[0]++;
+  s_info->tot_scores++;
+
+#if defined(SW_SSE2)
+
+  score = OVERFLOW_SCORE;
+
+  if (f_str->try_8bit) {
+    score = GLOBAL_BYTE(n0,
+                        f_str->byte_score,
+                        aa1,
+                        n1,
+#ifndef OLD_FASTA_GAP
+                        //-(ppst->gdelval + ppst->ggapval),
+                        -ppst->gdelval,
+#else
+                        //-ppst->gdelval,
+                        -(ppst->gdelval - ppst->ggapval),
+#endif
+                        -ppst->ggapval,
+                        f_str->ceiling,
+                        f_str->bias,
+                        f_str);
+      
+    f_str->done_8bit++;
+    
+    /* The 8 bit version is roughly 50% faster than the 16 bit version,
+     * so we are fine if less than about 1/3 of the runs have to
+     * be rerun with 16 bits. If it is more, and we have tried at least
+     * 500 sequences, we switch off the 8-bit mode.
+     */
+    if (score == OVERFLOW) {
+      f_str->done_16bit++;
+      if(f_str->done_8bit>500 && (3*f_str->done_16bit)>(f_str->done_8bit))
+        f_str->try_8bit = 0;
+    }
+  }
+      
+  if (score == OVERFLOW_SCORE) {
+    /* Overflow, so we have to redo it in 16 bits. */
+    score = GLOBAL_WORD(n0,
+                        f_str->word_score,
+                        aa1,
+                        n1,
+#ifndef OLD_FASTA_GAP
+                        //-(ppst->gdelval + ppst->ggapval),
+                        -ppst->gdelval,
+#else
+                        //-ppst->gdelval,
+                        -(ppst->gdelval - ppst->ggapval),
+#endif
+                        -ppst->ggapval,
+                        f_str->ceiling,
+                        f_str);
+  }
+#else
+
+  score = FGLOBAL_ALIGN(f_str->waa_s,aa1,n0,n1,
+#ifdef OLD_FASTA_GAP
+                       -(ppst->gdelval - ppst->ggapval),
+#else
+                       -ppst->gdelval,
+#endif
+                       -ppst->ggapval,f_str->ss);
+#endif
+
+  rst->score[0] = score;
+
+  if(((ppst->zsflag % 10) == 6) &&
+     (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f, 
+		f_str->kar_p, &lambda, &H)>0)) {
+    rst->comp = 1.0/lambda;
+    rst->H = H;
+  }
+  else {rst->comp = rst->H = -1.0;}
+
+}
+
+/* nw_walign is the equivalent of sw_walign from dropnnw.c -- it is to
+   be called from nw_malign (the equivalent of sw_malign */
+
+int
+nw_walign (int **pam2p, int n0,
+	   const unsigned char *aa1, int n1,
+	   int q, int r,
+	   struct swstr *ss,
+	   struct a_res_str *a_res
+	   )
+{
+  const unsigned char *aa1p;
+  register int i, j;
+  register struct swstr *ssj;
+  int e, f, h, p;
+  int qr, t;
+  int score;
+  int cost, I, J, K, L;
+
+  qr = q + r;
+
+  score = -BIGNUM;
+  J = n0-1; L = 0;	/* alignments are global in aa0[n0] */
+
+  /* initialize 0th row */
+  ss[0].H = 0;
+  ss[0].E = t = -q;	/* must be re-initialized because it was
+			   filled in the reverse direction in previous
+			   invocations */
+  /* must count from ss+1, ss[0].H = 0 */
+  for (ssj=ss+1; ssj <= ss+n0 ; ssj++) {
+    ssj->H = t = t - r;
+    ssj->E = t - q;
+  }
+
+  aa1p = aa1;
+  i = 0;
+  t = -q;
+  while (*aa1p) {
+    p = ss[0].H;
+#ifndef GLOBAL_GLOBAL	/* GLOBAL_LOCAL */
+    ss[0].H = h = t = 0;
+#else	
+    ss[0].H = h = t = t - r;
+#endif
+    f = t - q;
+    /* pwaa = waa + (*aa1p++ * n0); */
+    /* ssj must start at ss+1, ss[0].H = 0,
+       but j must go 0 .. n0-1 for pam2p[j]  */
+    for (ssj = ss+1, j=0; j < n0; ssj++,j++) {
+      if ((h =   h     - qr) > /* gap open from left best */
+	  /* gap extend from left gapped */
+	  (f =   f     - r)) f = h;	/* if better, use new gap opened */
+      if ((h = ssj->H - qr) >	/* gap open from up best */
+	  /* gap extend from up gap */
+	  (e = ssj->E - r)) e = h;	/* if better, use new gap opened */
+      h = p + pam2p[j][*aa1p];
+      /* h = p + *pwaa++; */		/* diagonal match */
+      if (h < f ) h = f;	/* left gap better, reset */
+      if (h < e ) h = e;	/* up gap better, reset */
+      p = ssj->H;		/* save previous best score */
+      ssj->H = h;		/* save (new) up diag-matched */
+      ssj->E = e;		/* save upper gap opened */
+    }
+#ifndef GLOBAL_GLOBAL
+    if (h > score) {		/* ? new best score at the end of each row */
+      score = h;		/* save best */
+      I = i;			/* row */
+    }
+#endif
+    /*
+    fprintf(stderr," r %d - score: %d ssj[]: %d\n", i,score,
+	    ss[(i <= n0) ? i-1 : n0-1].H);
+    */
+    aa1p++;	/* aa1p goes down the path graph, row by row */
+    i++;	/* increment the row */
+  }		/* done with forward pass */
+
+#ifdef GLOBAL_GLOBAL
+  cost = score = h;
+  K = 0;
+  I = n1 - 1;
+#else
+  /*   fprintf(stderr, " r: %d - score: %d\n", I, score); */
+
+  /* to get the start point, go backwards */
+  
+  cost = -BIGNUM;
+  K = 0;
+  ss[n0].H = 0;
+  t = -q;
+  for (ssj=ss+n0-1; ssj>=ss; ssj--) {
+    ssj->H = t = t - r;
+    ssj->E= t - q;
+  }
+
+  t = 0;
+  for (i=I; i>=0; i--) {
+    p = ss[n0].H;
+    ss[n0].H = h = t = t-r;
+    f = t-q;
+    for (ssj=ss+J, j= J-1; j>=0; ssj--, j--) {
+      if ((h =   h     - qr) > /* gap open from left best */
+	  /* gap extend from left gapped */
+	  (f =   f     - r)) f = h;	/* if better, use new gap opened */
+      if ((h = ssj->H - qr) >	/* gap open from up best */
+	  /* gap extend from up gap */
+	  (e = ssj->E - r)) e = h;	/* if better, use new gap opened */
+      h = p + pam2p[j][aa1[i]];		/* diagonal match */
+      if (h < f ) h = f;	/* left gap better, reset */
+      if (h < e ) h = e;	/* up gap better, reset */
+      p = ssj->H;		/* save previous best score */
+      ssj->H = h;		/* save (new) up diag-matched */
+      ssj->E = e;		/* save upper gap opened */
+    }
+    if (h > cost) {
+      cost = h;
+      K = i;
+      if (cost >= score) goto found;
+    }
+  }
+  /* at this point, ss[0].E has a very high value for good alignments */
+ found:
+#endif /* not GLOBAL_GLOBAL */
+
+/*   fprintf(stderr," *** %d=%d: L: %3d-%3d/%3d; K: %3d-%3d/%3d\n",score,cost,L,J+1,n0,K,I+1,n1); */
+
+/* in the f_str version, the *res array is already allocated at 4*n0/3 */
+
+  a_res->n1 = n1;
+  a_res->max0 = J+1; a_res->min0 = L; a_res->max1 = I+1; a_res->min1 = K;
+  
+/* this code no longer refers to aa0[], it uses pam2p[0][L] instead */
+  NW_ALIGN(L,&aa1[K-1],J-L+1,I-K+1,pam2p,q,r,a_res->res,&a_res->nres);
+
+/*  DISPLAY(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,res,L,K,ppst->sq); */
+
+/* return *res and nres */
+
+  return score;
+}
+
+#define gap(k)  ((k) <= 0 ? 0 : g+h*(k))	/* k-symbol indel cost */
+
+static int
+FGLOBAL_ALIGN(int *waa, const unsigned char *aa1,
+	      int n0, int n1,
+	      int q, int r,
+	      struct swstr *ss)
+{
+  const unsigned char *aa1p;
+  register int *pwaa;
+  int i,j;
+  struct swstr *ssj;
+  int t;
+  int e, f, h, p;
+  int qr;
+  int score;
+  int ij, max_col, max_ij;
+
+  /* q - gap open is positve */
+  /* r - gap extend is positive */
+
+  qr = q+r;
+
+  score = -BIGNUM;
+
+  /* initialize 0th row */
+  ss[0].H = 0;
+  ss[0].E = t = -q;
+  for (ssj = ss+1; ssj <= ss+n0 ; ssj++) {
+    ssj->H = t = t - r;
+    ssj->E = t - q;
+  }
+
+  aa1p = aa1;
+  t = -q;
+  while (*aa1p) {
+    p = ss[0].H;
+#if defined(GLOBAL_GLOBAL)
+    ss[0].H = h = t = t - r;
+#else		/* GLOBAL_LOCAL */
+    ss[0].H = h = t = 0;
+#endif
+    f = t - q;
+    pwaa = waa + (*aa1p++ * n0);
+    for (ssj = ss+1; ssj <= ss+n0; ssj++) {	/* go across query */
+      if ((h =   h    - qr) > (f =   f    - r)) f = h;
+      if ((h = ssj->H - qr) > (e = ssj->E - r)) e = h;
+      h = p + *pwaa++;
+      if (h < f) h = f;
+      if (h < e) h = e;
+      p = ssj->H;
+      ssj->H = h;
+      ssj->E = e;
+    }
+#if !defined(GLOBAL_GLOBAL)	/* GLOBAL_LOCAL */
+    if (h > score) {
+      score = h;	/* at end of query, update score */
+    }
+#endif
+  }				/* done with forward pass */
+#ifdef GLOBAL_GLOBAL
+  score = h;
+#endif
+  return score;
+}
+
+void do_opt (const unsigned char *aa0, int n0,
+	     const unsigned char *aa1, int n1,
+	     int frame,
+	     struct pstruct *ppst, struct f_struct *f_str,
+	     struct rstruct *rst)
+{
+}
+
+/* this do_walign simply calls nsw_malign using nw_walign it is
+   modeled after the same do_walign code in dropgsw2.c 
+
+   It makes no sense to use this strategy for GLOBAL_GLOBAL
+   alignments, but hopefully they will take care of themselves.
+*/
+
+struct a_res_str *
+do_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, int repeat_thresh,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   int *have_ares)
+{
+  struct a_res_str *a_res, *tmp_a_res;
+  int a_res_index;
+
+  *have_ares = 0x3;	/* set 0x2 bit to indicate local copy */
+
+  if ((a_res = (struct a_res_str *)calloc(1, sizeof(struct a_res_str)))==NULL) {
+    fprintf(stderr," [do_walign] Cannot allocate a_res");
+    return NULL;
+  }
+  
+  a_res = nsw_malign(f_str->pam2p, (ppst->ext_sq_set ? 1 : 0), n0, aa1, n1,
+		     repeat_thresh, f_str->max_res,
+		     -ppst->gdelval, -ppst->ggapval,
+		     f_str->ss, a_res,
+		     &nw_walign, ppst->do_rep
+		     );
+
+  a_res_index = 0;
+  for (tmp_a_res=a_res; tmp_a_res; tmp_a_res = tmp_a_res->next) {
+    tmp_a_res->index = a_res_index++;
+  }
+
+  return a_res;
+}
+
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+
+#ifdef TFAST
+  f_str->n10 = aatran(aa1,f_str->aa1x,n1,frame);
+#endif
+
+}
+
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void 
+aln_func_vals(int frame, struct a_struct *aln) {
+
+  aln->llfact = aln->llmult = aln->qlfact = 1;
+  aln->llrev = 0;
+  if (frame > 0) aln->qlrev = 1;
+  else aln->qlrev = 0;
+  aln->frame = 0;
+}
diff --git a/src/dropnsw.c b/src/dropnsw.c
new file mode 100644
index 0000000..3aba3ff
--- /dev/null
+++ b/src/dropnsw.c
@@ -0,0 +1,424 @@
+/* $Id: dropnsw.c $ */
+
+/* copyright (c) 1994, 1995, 1996, 2014 by William R. Pearson and the
+   Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/*
+  this is a slower version of dropgsw.c that implements the Smith-Waterman
+  algorithm.  It lacks the shortcuts in dropgsw.c that prevent scores less
+  than the penalty for the first residue in a gap from being generated.
+
+  Thus, dropnsw.c should be used for tests with very large gap penalties,
+  and is more appropriate for programs like prss3, which are interested
+  in accurate low scores.
+*/
+
+/* the do_walign() code in this file is not thread_safe */
+/* init_work(), do_work(), are thread safe */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+
+#include "defs.h"
+#include "param.h"
+
+static char *verstr="3.5 Aug 2009";
+
+struct swstr { int H, E;};
+
+struct f_struct {
+  struct swstr *ss;
+  struct swstr *f_ss;
+  struct swstr *r_ss;
+  int *waa_s, *waa_a;
+  int **pam2p[2];
+  int max_res;
+  double aa0_f[MAXSQ];
+  double *kar_p;
+};
+
+#define DROP_INTERN
+#include "drop_func.h"
+
+extern int do_karlin(const unsigned char *aa1, int n1,
+		     int **pam2, const struct pstruct *ppst,
+		     double *aa0_f, double *kar_p, double *lambda, double *H);
+extern int sw_walign (int **pam2p, int n0,
+		      const unsigned char *aa1, int n1,
+		      int q, int r,
+		      struct swstr *ss,
+		      struct a_res_str *a_res
+		      );
+
+extern struct a_res_str *
+nsw_malign (int ***pam2p, int pam_ix, int n0,
+	    const unsigned char *aa1, int n1,
+	    int score_thresh, int max_res,
+	    int gdelval, int ggapval, 
+	    struct swstr *ss, 
+	    struct a_res_str *cur_ares,
+	    int score_ix,
+	    int (*fn_walign)
+	    (
+	     int **pam2p, int n0,
+	     const unsigned char *aa1, int n1,
+	     int q, int r,
+	     struct swstr *ss,
+	     struct a_res_str *a_res
+	     ),
+	    int do_rep
+	    );
+
+void
+SIM(const unsigned char *A, /* seq1 indexed A[1..M] */
+    const unsigned char *B, /* seq2 indexed B[1..N] */
+    int M, int N,		/* len seq1, seq2 */
+    struct pstruct *ppst,	/* parameters */
+    int nseq,			/* nseq - number of different sequences */
+    int mini_score,		/* cut-off score */
+    int max_count,		/* number of alignments */
+    struct a_res_str *a_res);	/* alignment result structure */
+
+/* initialize for Smith-Waterman optimal score */
+
+void init_work (unsigned char *aa0, int n0,
+		struct pstruct *ppst,
+		struct f_struct **f_arg)
+{
+   int maxn0;
+   int *pwaa_s, *pwaa_a;
+   int e, f, i, j, q;
+   int *res;
+   struct f_struct *f_str;
+   int **pam2p;
+   struct swstr *ss, *f_ss, *r_ss;
+   int nsq, ip;
+
+   ppst->stats_mod = 1;
+   if (ppst->ext_sq_set) {
+     nsq = ppst->nsqx; ip = 1;
+   }
+   else {
+     nsq = ppst->nsqx; ip = 0;
+   }
+
+   f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+
+   /* allocate space for the scoring arrays */
+   maxn0 = n0 + 2;
+   if ((ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr)))
+	 == NULL) {
+     fprintf (stderr, "cannot allocate ss array %3d\n", n0);
+     exit (1);
+   }
+   ss++;
+   f_str->ss = ss;
+
+   if ((f_ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate f_ss array %3d\n", n0);
+     exit (1);
+   }
+   f_ss++;
+   f_str->f_ss = f_ss;
+
+   if ((r_ss = (struct swstr *) calloc (n0+2, sizeof (struct swstr)))
+       == NULL) {
+     fprintf (stderr, "cannot allocate r_ss array %3d\n", n0);
+     exit (1);
+   }
+   r_ss++;
+   f_str->r_ss = r_ss;
+
+   /* initialize variable (-S) pam matrix */
+   if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+     fprintf(stderr,"cannot allocate waa_s array %3d\n",nsq*n0);
+     exit(1);
+   }
+
+   if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
+     fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
+     exit(1);
+   }
+
+   pam2p = f_str->pam2p[1];
+   if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+     fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
+     exit(1);
+   }
+
+   for (i=1; i<n0; i++) {
+     pam2p[i]= pam2p[0] + (i*(nsq+1));
+   }
+
+   /* initialize universal (alignment) matrix */
+   if ((f_str->waa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+     fprintf(stderr,"cannot allocate waa_a struct %3d\n",nsq*n0);
+     exit(1);
+   }
+
+   if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
+     fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
+     exit(1);
+   }
+
+   pam2p = f_str->pam2p[0];
+   if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+     fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
+     exit(1);
+   }
+
+   for (i=1; i<n0; i++) {
+     pam2p[i]= pam2p[0] + (i*(nsq+1));
+   }
+
+   /* 
+      pwaa effectively has a sequence profile --
+       pwaa[0..n0-1] has pam score for residue 0 (-BIGNUM)
+       pwaa[n0..2n0-1] has pam scores for residue 1 (A)
+       pwaa[2n0..3n-1] has pam scores for residue 2 (R), ...
+
+       thus: pwaa = f_str->waa_s + (*aa1p++)*n0; sets up pwaa so that
+       *pwaa++ rapidly moves though the scores of the aa1p[] position
+       without further indexing
+
+       For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have
+       a different score in each position.
+   */
+
+   if (ppst->pam_pssm) {
+     pwaa_s = f_str->waa_s;
+     pwaa_a = f_str->waa_a;
+     for (e = 0; e <nsq; e++)	{	/* for each residue in the alphabet */
+       for (f = 0; f < n0; f++) {	/* for each position in aa0 */
+	 *pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e];
+	 *pwaa_a++ = f_str->pam2p[0][f][e]  = ppst->pam2p[0][f][e];
+       }
+     }
+   }
+   else {	/* initialize scanning matrix */
+     pwaa_s = f_str->waa_s;
+     pwaa_a = f_str->waa_a;
+     for (e = 0; e <nsq; e++)	/* for each residue in the alphabet */
+       for (f = 0; f < n0; f++)	{	/* for each position in aa0 */
+	 *pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][e][aa0[f]];
+	 *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][e][aa0[f]];
+       }
+   }
+
+   /* minimum allocation for alignment */
+   f_str->max_res =  max(3*n0/2,MIN_RES);
+
+   *f_arg = f_str;
+}
+
+void close_work (const unsigned char *aa0, int n0,
+		 struct pstruct *ppst, struct f_struct **f_arg)
+{
+  struct f_struct *f_str;
+
+  f_str = *f_arg;
+
+  if (f_str != NULL) {
+    if (f_str->kar_p !=NULL) free(f_str->kar_p);
+    f_str->ss--;
+    free(f_str->ss);
+    free(f_str->waa_a);
+    free(f_str->pam2p[0][0]);
+    free(f_str->pam2p[0]);
+    free(f_str->waa_s);
+    free(f_str->pam2p[1][0]);
+    free(f_str->pam2p[1]);
+
+    free(f_str);
+    *f_arg = NULL;
+  }
+}
+
+/* pstring1 is a message to the manager, currently 512 */
+/*void get_param(struct pstruct *pstr,char **pstring1)*/
+void
+get_param (const struct pstruct *ppst, 
+	   char **pstring1, char *pstring2)
+{
+  char psi_str[120];
+
+  char *pg_str="Smith-Waterman";
+
+  if (ppst->pam_pssm) { strncpy(psi_str,"-PSI",sizeof(psi_str));}
+  else { psi_str[0]='\0';}
+
+  sprintf (pstring1[0], " %s (%s)", pg_str, verstr);
+  sprintf (pstring1[1],
+#ifdef OLD_FASTA_GAP
+	   "%s matrix%s (%d:%d)%s, gap-penalty: %d/%d",
+#else
+	   "%s matrix%s (%d:%d)%s, open/ext: %d/%d",
+#endif
+	   ppst->pam_name, psi_str, ppst->pam_h,ppst->pam_l, 
+	   (ppst->ext_sq_set)?"xS":"\0", ppst->gdelval, ppst->ggapval);
+
+   if (pstring2 != NULL) {
+     sprintf(pstring2,
+#ifdef OLD_FASTA_GAP
+	     "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_gap-pen: %d %d\n",
+#else
+	     "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_open-ext: %d %d\n",
+#endif
+	     pg_str,verstr,psi_str,ppst->pam_h,ppst->pam_l, 
+	     (ppst->ext_sq_set)?"xS":"\0",ppst->gdelval,ppst->ggapval);
+   }
+}
+
+
+void do_work (const unsigned char *aa0, int n0,
+	      const unsigned char *aa1, int n1,
+	      int frame,
+	      const struct pstruct *ppst, struct f_struct *f_str,
+	      int qr_flg, int shuff_flg,
+	      struct rstruct *rst)
+{
+   const unsigned char *aa0p, *aa1p;
+   register struct swstr *ssj;
+   struct swstr *ss, *f_ss, *r_ss;
+   register int *pwaa;
+   int *waa;
+   register int i, j;
+   int     e, f, h, p;
+   int     q, r, m;
+   int     score;
+
+   double lambda, H, K;
+
+   rst->escore = 1.0;
+   rst->segnum = rst->seglen = 1;
+
+   waa = f_str->waa_s;
+   ss = f_str->ss;
+   f_ss = f_str->f_ss;
+   r_ss = f_str->r_ss;
+
+#ifdef OLD_FASTA_GAP
+   q = -(ppst->gdelval - ppst->ggapval);
+#else
+   q = -ppst->gdelval;
+#endif
+   r = -ppst->ggapval;
+   m = q + r;
+
+   /* initialize 0th row */
+   for (ssj=ss; ssj<&ss[n0]; ssj++) {
+     ssj->H = 0;
+     ssj->E = -q;
+   }
+
+   rst->valid_stat = 1;
+   score = 0;
+   aa1p = aa1;
+   while (*aa1p) {
+     h = p = 0;
+     f = -q;
+     pwaa = waa + (*aa1p++ * n0);
+     for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) {
+       if ((h =   h     - m) > (f =   f     - r)) f = h;
+       if ((h = ssj->H - m) > (e = ssj->E - r)) e = h;
+       h = p + *pwaa++;
+       if (h < 0 ) h = 0;
+       if (h < f ) h = f;
+       if (h < e ) h = e;
+       p = ssj->H;
+       ssj->H = h;
+       ssj->E = e;
+       if (h > score) score = h;
+     }
+   }				/* done with forward pass */
+
+   rst->score[0] = score;
+
+  if(((ppst->zsflag % 10) == 6) &&
+     (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f, 
+		f_str->kar_p, &lambda, &H)>0)) {
+     rst->comp = 1.0/lambda;
+     rst->H = H;
+   }
+  else {rst->comp = rst->H = -1.0;}
+}				/* here we should be all done */
+
+void    do_opt (const unsigned char *aa0, int n0,
+		const unsigned char *aa1, int n1,
+		int frame,
+		struct pstruct *pst, struct f_struct *f_str,
+		struct rstruct *rstr)
+{
+}
+
+struct a_res_str *
+do_walign (const unsigned char *aa0, int n0,
+	   const unsigned char *aa1, int n1,
+	   int frame, int repeat_thresh,
+	   struct pstruct *ppst, 
+	   struct f_struct *f_str, 
+	   int *have_ares)
+{
+  struct a_res_str *a_res, *tmp_a_res;
+  int a_res_index;
+
+  *have_ares = 0x3;	/* set 0x2 bit to indicate local copy */
+  
+  if ((a_res = (struct a_res_str *)calloc(1, sizeof(struct a_res_str)))==NULL) {
+    fprintf(stderr," [do_walign] Cannot allocate a_res");
+    return NULL;
+  }
+
+#ifndef LALIGN
+  a_res = nsw_malign(f_str->pam2p, (ppst->ext_sq_set?1:0), n0, aa1, n1,
+		     repeat_thresh, f_str->max_res,
+		     -ppst->gdelval, -ppst->ggapval,
+		     f_str->ss, a_res,ppst->score_ix,
+		     &sw_walign, ppst->do_rep
+		     );
+#else	/* LALIGN */
+  if (!ppst->show_ident && same_seq(aa0, n0, aa1, n1)) ppst->nseq = 1;
+  else ppst->nseq = 2;
+
+  SIM(aa0-1, aa1-1, n0, n1, ppst, ppst->nseq, repeat_thresh, ppst->max_repeat, a_res);
+#endif
+
+  a_res_index = 0;
+  for (tmp_a_res=a_res; tmp_a_res; tmp_a_res = tmp_a_res->next) {
+    tmp_a_res->index = a_res_index++;
+  }
+
+  return a_res;
+}
+
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {}
+
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void 
+aln_func_vals(int frame, struct a_struct *aln) {
+
+  aln->llfact = aln->llmult = aln->qlfact = 1;
+  aln->llrev = 0;
+  if (frame > 0) aln->qlrev = 1;
+  else aln->qlrev = 0;
+  aln->frame = 0;
+}
diff --git a/src/dyn_string.h b/src/dyn_string.h
new file mode 100644
index 0000000..1eeb520
--- /dev/null
+++ b/src/dyn_string.h
@@ -0,0 +1,30 @@
+/* $Id: dyn_string.h 1197 2013-07-19 20:25:19Z wrp $ */
+/* $Revision: 1197 $  */
+
+/* structure, functions for dynamic strings */
+
+struct dyn_string_str {
+  char *string;
+  int c_size;
+  int mx_size;
+  int inc;
+};
+
+/* initial allocation */
+struct dyn_string_str *init_dyn_string(int size, int inc);
+
+/* strcpy */
+void dyn_strcpy(struct dyn_string_str *dyn_string, char *value);
+
+/* strcat */
+void dyn_strcat(struct dyn_string_str *dyn_string, char *value);
+
+/* free */
+void free_dyn_string(struct dyn_string_str *dyn_string);
+
+/* reset */
+void reset_dyn_string(struct dyn_string_str *dyn_string);
+
+/* initialize to '\0' */
+#define NULL_dyn_string(str) str->string[0]='\0'; str->c_size = 0;
+
diff --git a/src/faatran.c b/src/faatran.c
new file mode 100644
index 0000000..1477456
--- /dev/null
+++ b/src/faatran.c
@@ -0,0 +1,445 @@
+/* $Id: faatran.c $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and
+   The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/*	aatran.c	translates from nt to aa, 1 char codes */
+/*	modified July 2, 1987 for all 6 frames */
+/*	23 Jan 1991	fixed bug for short sequences */
+
+/* 	this mapping is not alphabet independent */
+
+#define XTERNAL
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "upam.h"
+#include "uascii.h"
+
+/*
+1. The Standard Code (transl_table=1)
+
+By default all transl_table in GenBank flatfiles are equal to id 1, and this
+is not shown. When transl_table is not equal to id 1, it is shown as a
+qualifier on the CDS feature.
+
+*/
+static
+char *AA1="FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+  Starts = ---M---------------M---------------M----------------------------
+  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+
+2. The Vertebrate Mitochondrial Code (transl_table=2)
+*/
+static
+char *AA2 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG";
+/*
+  Starts = --------------------------------MMMM---------------M------------
+  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+
+3. The Yeast Mitochondrial Code (transl_table=3)
+*/
+static
+char *AA3 ="FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+  Starts = -----------------------------------M----------------------------
+  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+
+4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and the
+Mycoplasma/Spiroplasma Code (transl_table=4)
+*/
+static
+char *AA4 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+  Starts = --MM---------------M------------MMMM---------------M------------
+  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+
+5. The Invertebrate Mitochondrial Code (transl_table=5)
+*/
+static
+char *AA5 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG";
+/*
+  Starts = ---M----------------------------MMMM---------------M------------
+  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+
+6. The Ciliate, Dasycladacean and Hexamita Nuclear Code (transl_table=6)
+*/
+static
+char *AA6 ="FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+  Starts = -----------------------------------M----------------------------
+  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+
+9. The Echinoderm Mitochondrial Code (transl_table=9)
+*/
+static
+char *AA7 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
+/*
+  Starts = -----------------------------------M----------------------------
+  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+
+10. The Euplotid Nuclear Code (transl_table=10)
+*/
+static
+char *AA10="FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+  Starts = -----------------------------------M----------------------------
+  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+
+11. The Bacterial "Code" (transl_table=11)
+*/
+static
+char *AA11="FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+  Starts = ---M---------------M------------MMMM---------------M------------
+  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+
+12. The Alternative Yeast Nuclear Code (transl_table=12)
+*/
+static
+char *AA12 ="FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+  Starts = -------------------M---------------M----------------------------
+  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+
+13. The Ascidian Mitochondrial Code (transl_table=13)
+*/
+static
+char *AA13="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG";
+/*
+  Starts = -----------------------------------M----------------------------
+  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+
+14. The Flatworm Mitochondrial Code (transl_table=14)
+*/
+static
+char *AA14 ="FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
+/*
+  Starts = -----------------------------------M----------------------------
+  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+
+15. Blepharisma Nuclear Code (transl_table=15)
+*/
+static
+char *AA15="FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+  Starts = -----------------------------------M----------------------------
+  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+*/
+
+static
+char *AA16 ="FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/* 
+  id 16 ,
+  name "Chlorophycean Mitochondrial" ,
+  sncbieaa "-----------------------------------M----------------------------"
+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+*/
+
+static
+char *AA21 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
+/*
+  name "Trematode Mitochondrial" ,
+  id 21 ,
+  sncbieaa "-----------------------------------M---------------M------------"
+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+*/
+
+static
+char *AA22 ="FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+  name "Scenedesmus obliquus Mitochondrial" ,
+  id 22 ,
+  sncbieaa "-----------------------------------M----------------------------"
+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+*/
+
+static
+char *AA23 ="FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+  name "Thraustochytrium Mitochondrial" ,
+  id 23 ,
+  sncbieaa "--------------------------------M--M---------------M------------"
+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+*/
+
+
+static char aacmap[64]={
+  'K','N','K','N','T','T','T','T','R','S','R','S','I','I','M','I',
+  'Q','H','Q','H','P','P','P','P','R','R','R','R','L','L','L','L',
+  'E','D','E','D','A','A','A','A','G','G','G','G','V','V','V','V',
+  '*','Y','*','Y','S','S','S','S','*','C','W','C','L','F','L','F'
+};
+
+static int aamap[64];	/* integer aa values */
+static int aamapr[64]; /* reverse sequence map */
+
+/* tnt is used only by aatran.c. It must be consistent with lascii and
+the nt alphabet. It uses 3,3 because T and U are considered separately
+*/
+static int tnt[]={0,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,0,0,
+		    0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,0,0};
+
+static int debug_set;
+
+int
+aatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame)
+{
+  int iaa, im, nna, i;
+  register int *nnp;
+  const unsigned char *nts0;
+  register int *aamp;
+  register unsigned char *aap;
+
+  iaa=nna=(maxs-(frame<3?frame:frame-3))/3;
+  if (nna <= 3 ) {
+    aaseq[0]=EOSEQ;
+    return 0;
+  }
+
+  nnp = tnt;
+
+  if (frame < 3) {
+    aamp = aamap;
+    nts0 = &ntseq[frame];
+    aap = aaseq;
+    while (nna--) {
+      im = nnp[*nts0++]<<4;
+      im += nnp[*nts0++]<<2;
+      im += nnp[*nts0++];
+      *aap++ = aamp[im];
+
+      /* this check is included because of a bug in tfasty 
+         which occurs only during the alignment process */
+
+#ifdef DEBUG
+      if (debug_set && aamp[im] > MAXUC) {
+	fprintf(stderr,"faatran: %d %d %d %d %d?%d\n",
+		*(nts0-3),*(nts0-2),*(nts0-1), im, aamp[im],aamap[im]);
+
+	/* this allows recovery, but should not be done frequently */
+	for (i=0; i<64; i++) {
+	  aamap[i]=aascii[aacmap[i]];
+	  aamapr[i]=aascii[aacmap[(~i)&63]];
+	}
+	*(aap-1) = aamp[im];
+      }
+#endif
+    }
+  }
+  else {
+    aamp = aamapr;
+    nts0 = &ntseq[maxs-(frame-3)];
+    aap = aaseq;
+    while (nna--) {
+      im = nnp[*--nts0]<<4;
+      im += nnp[*--nts0]<<2;
+      im += nnp[*--nts0];
+      *aap++ = aamp[im];
+      /* this check is included because of a bug in tfasty 
+         which occurs only during the alignment process */
+
+#ifdef DEBUG
+      if (debug_set && aamp[im] > MAXUC) {
+	fprintf(stderr,"faatran: %d %d %d %d %d?%d\n",
+		*(nts0-3),*(nts0-2),*(nts0-1), im, aamp[im],aamap[im]);
+
+	/* this allows recovery, but should not be done frequently */
+	for (i=0; i<64; i++) {
+	  aamap[i]=aascii[aacmap[i]];
+	  aamapr[i]=aascii[aacmap[(~i)&63]];
+	}
+	*(aap-1) = aamp[im];
+      }
+#endif
+    }
+  }
+  aaseq[iaa]=EOSEQ;
+  return iaa;
+}
+
+/* slower version that masks out NNN,XXX */
+
+/*                - A C G T U R Y M W S K D H V B N X */
+static int snt[]={0,0,1,2,3,3,0,1,0,0,4,4,4,4,4,4,4,4};
+
+int
+saatran(const unsigned char *ntseq,
+	unsigned char *aaseq, int maxs, int frame)
+{
+  int iaa, im, it, nna, xflag;
+  register int *nnp;
+  const unsigned char *nts0;
+  register int *aamp;
+  register unsigned char *aap;
+
+  iaa=nna=(maxs-(frame<3?frame:frame-3))/3;
+  if (nna <= 3 ) {
+    aaseq[0]=EOSEQ;
+    return 0;
+  }
+
+  nnp = snt;
+  if (frame < 3) {
+    aamp = aamap;
+    nts0 = &ntseq[frame];
+    aap = aaseq;
+    while (nna--) {
+      xflag = 0;
+      if ((it=nnp[*nts0++])<4) {im = it<<4;}
+      else {xflag = 1; im=0;}
+      if ((it=nnp[*nts0++])<4) {im += it<<2;}
+      else xflag = 1;
+      if ((it=nnp[*nts0++])<4) {im += it;}
+      else xflag = 1;
+      if (xflag) *aap++ = aascii['X'];
+      else *aap++ = aamp[im];
+    }
+  }
+  else {
+    aamp = aamapr;
+    nts0 = &ntseq[maxs-(frame-3)];
+    aap = aaseq;
+    while (nna--) {
+      xflag = 0;
+      if ((it=nnp[*--nts0]) < 4) im = it<<4;
+      else {xflag = 1; im=0;}
+      if ((it=nnp[*--nts0]) < 4) im += it<<2;
+      else xflag = 1;
+      if ((it=nnp[*--nts0]) < 4) im += it;
+      else xflag = 1;
+      if (xflag) *aap++ = aascii['X'];
+      else *aap++ = aamp[im];
+    }
+  }
+  aaseq[iaa]=EOSEQ;
+  return iaa;
+}
+
+void
+aainit(int tr_type, int debug)
+{
+  int i,j;
+  char *aasmap;
+  int ascii_star;
+  int imap[4]={3,1,0,2}, i0, i1, i2, ii;
+
+  debug_set = debug;
+
+  aasmap = AA1;
+
+  ascii_star = aascii['*'];
+  aascii['*'] = TERM;
+
+  if (tr_type > 0) {
+    /* need to put in a new translation table */
+    switch (tr_type) {
+    case 1: aasmap = AA1; break;
+    case 2: aasmap = AA2; break;
+    case 3: aasmap = AA3; break;
+    case 4: aasmap = AA4; break;
+    case 5: aasmap = AA5; break;
+    case 6: aasmap = AA6; break;
+    case 7: aasmap = AA7; break;
+    case 10: aasmap = AA10; break;
+    case 11: aasmap = AA11; break;
+    case 12: aasmap = AA12; break;
+    case 13: aasmap = AA13; break;
+    case 14: aasmap = AA14; break;
+    case 15: aasmap = AA15; break;
+    case 16: aasmap = AA16; break;
+    case 21: aasmap = AA21; break;
+    case 22: aasmap = AA22; break;
+    case 23: aasmap = AA23; break;
+
+    default: aasmap = AA1; break;
+    }
+
+    if (debug) fprintf(stderr," codon table: %d\n     new old\n",tr_type);
+    for (i0 = 0; i0 < 4; i0++)
+      for (i1 = 0; i1 < 4; i1++)
+	for (i2 = 0; i2 < 4; i2++) {
+	  ii = (imap[i0]<<4) + (imap[i1]<<2) + imap[i2];
+	  if (debug &&  aacmap[ii] != *aasmap) {
+	    fprintf(stderr," %c%c%c: %c - %c\n",
+		    nt[imap[i0]+1],nt[imap[i1]+1],nt[imap[i2]+1],
+		    *aasmap,aacmap[ii]);
+	  }
+	  aacmap[ii]= *aasmap++;
+	}
+
+
+    for (i=0; i<64; i++) {
+      fprintf(stderr,"'%c',",aacmap[i]);
+      if ((i%16)==15) fputc('\n',stderr);
+    }
+    fputc('\n',stderr);
+
+  }
+  for (i=0; i<64; i++) {
+    aamap[i]=aascii[aacmap[i]];
+    if (aamap[i] > TERM) {
+      fprintf(stderr," *** error - codon out of range: %d %d (%c)\n",i,aamap[i], NCBIstdaa_l[aamap[i]] );
+    }
+    aamapr[i]=aascii[aacmap[(~i)&63]];
+    if (aamapr[i] > TERM) {
+      fprintf(stderr," *** error - codon_r out of range: %d %d (%c)\n",i,aamapr[i], NCBIstdaa_l[aamapr[i]]);
+    }
+  }
+  aascii['*'] = ascii_star;
+}
+
+void
+aagetmap(char *to, int n) 
+{
+  int i;
+  for (i=0; i<n; i++) to[i] = aacmap[i];
+}
diff --git a/src/getenv.c b/src/getenv.c
new file mode 100644
index 0000000..bc3cd9b
--- /dev/null
+++ b/src/getenv.c
@@ -0,0 +1,56 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define MAXENV 1024
+char *envstr;
+
+char *mgetenv(str)
+char *str;
+{
+	static int EnvInit=0;
+
+	char *eptr, *esptr, *bp;
+	int i,esize;
+	FILE *fenv;
+ 	
+	if (EnvInit==0) {
+		EnvInit=1;
+		if ((fenv=fopen("environment","r"))!=NULL) {
+			if ((envstr=malloc((size_t)(esize=MAXENV)))==NULL) {
+				fclose(fenv); goto noenv;}
+			esptr=envstr; esize -= 10;
+			while (fgets(esptr,esize,fenv)!=NULL) {
+				if ((bp=strchr(esptr,'\n'))!=NULL) *bp='\0';
+				esize -= (i=strlen(esptr)+1);
+				esptr += i;
+				}
+			fclose(fenv);
+			esptr='\0';
+			}
+		else envstr=NULL;
+	}
+	
+	if (envstr==NULL) return NULL;
+	else {		
+		for (eptr=envstr; *eptr; eptr += strlen(eptr)+1) {
+			if (strncmp(str,eptr,(long)strlen(str))==0) {
+				return strchr(eptr,'=')+1;
+				}
+			}
+		return NULL;
+		}
+noenv:	envstr=NULL; return NULL;
+	}
+
+strnpcpy(to,from,max)
+	char *to; Str255 from; size_t max;
+{
+	size_t i, n;
+	
+	n = (*from<max) ? *from : max;
+	from++;
+
+	for (i=0; i<n; i++) *to++ = *from++;
+	if (n<max) *to='\0';
+	}
diff --git a/src/getopt.c b/src/getopt.c
new file mode 100644
index 0000000..0d76146
--- /dev/null
+++ b/src/getopt.c
@@ -0,0 +1,64 @@
+/*LINTLIBRARY*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define ERR(s, c)	if(opterr){\
+	char errbuf[3];\
+	errbuf[0] = c; errbuf[1] = '\n'; errbuf[2]='\0';\
+	(void) fputs(argv[0],stderr);\
+	(void) fputs(s,stderr);\
+	(void) fputs(errbuf,stderr);}
+
+
+int	opterr = 1;
+int	optind = 1;
+int	optopt;
+char	*optarg;
+
+int
+getopt(argc, argv, opts)
+int	argc;
+char	**argv, *opts;
+{
+	static int sp = 1;
+	register int c;
+	register char *cp;
+
+	if(sp == 1)
+		if(optind >= argc ||
+		   argv[optind][0] != '-' || argv[optind][1] == '\0')
+			return(EOF);
+		else if(strcmp(argv[optind], "--") == 0) {
+			optind++;
+			return(EOF);
+		}
+	optopt = c = argv[optind][sp];
+	if(c == ':' || (cp=strchr(opts, c)) == NULL) {
+		ERR(": illegal option -- ", c);
+		if(argv[optind][++sp] == '\0') {
+			optind++;
+			sp = 1;
+		}
+		return('?');
+	}
+	if(*++cp == ':') {
+		if(argv[optind][sp+1] != '\0')
+			optarg = &argv[optind++][sp+1];
+		else if(++optind >= argc) {
+			ERR(": option requires an argument -- ", c);
+			sp = 1;
+			return('?');
+		} else
+			optarg = argv[optind++];
+		sp = 1;
+	} else {
+		if(argv[optind][++sp] == '\0') {
+			sp = 1;
+			optind++;
+		}
+		optarg = NULL;
+	}
+	return(c);
+}
diff --git a/src/getseq.c b/src/getseq.c
new file mode 100644
index 0000000..ef912d3
--- /dev/null
+++ b/src/getseq.c
@@ -0,0 +1,313 @@
+/* getseq.c */
+
+/* copyright (c) 1987,1988,1989,1992,1995,2000, 2014 by William R. Pearson
+   and The Rectors and Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/*	May, June 1987	- modified for rapid read of database 
+	This is one of three alternative files that can be used to
+	read a database.  The three files are nxgetaa.c, nmgetaa.c, and
+	mmgetaa.c.
+
+	nxgetaa.c contains the original code for reading databases, and
+	is still used for Mac and PC versions of fasta33 (which do not
+	use mmap).
+
+	nmgetaa.c and mmgetaa.c are used together.  nmgetaa.c provides
+	the same functions as nxgetaa.c if memory mapping is not used,
+	mmgetaa.c provides the database reading functions if memory
+	mapping is used. The decision to use memory mapping is made on
+	a file-by-file basis.
+
+	June 2, 1987 - added TFASTA
+	March 30, 1988 - combined ffgetaa, fgetgb;
+	April 8, 1988 - added PIRLIB format for unix
+	Feb 4, 1989 - added universal subroutines for libraries
+	December, 1995 - added range option file.name:1-1000
+	Feb 22, 2002 - fix to allow "plain" text file queries
+
+	getnt.c	associated subroutines for matching sequences */
+
+/*  $Id: getseq.c 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+/*
+	8-April-88
+	The compile time #define PIRLIB allows this routine to be used
+	to read protein and DNA sequence libraries in the NBRF/PIR
+	VAX/VMS library format.  That is:
+
+	>P1;LCBO
+	This is a line of description
+	GTYH ... the sequence starts on this line
+
+	This may ease conversion from UWGCG format libraries. It
+	has not been extensively tested.
+
+	In addition, sequence libraries with a '>' in the 4th position
+	are recognized as NBRF format libraries for consistency with
+	UWGCG
+*/
+
+/* 	Nov 12, 1987	- this version checks to see if the sequence
+	is DNA or protein by asking whether > 85% is A, C, G, T
+
+	May 5, 1988 - modify the DNA/PROTEIN checker by re-reading
+	DNA sequences in order to check for 'U'.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+#include "structs.h"
+
+#ifndef SFCHAR
+#define SFCHAR ':'
+#endif
+
+#define XTERNAL
+#include "uascii.h"
+#include "upam.h"
+#undef XTERNAL
+
+#define YES 1
+#define NO 0
+#define MAXLINE 512
+
+#ifndef min
+#define min(x,y) ((x) > (y) ? (y) : (x))
+#endif
+
+#define NO_FORMAT 0
+#define FASTA_FORMAT 1
+#define GCG_FORMAT 2
+
+static int seq_format=NO_FORMAT;
+static char seq_title[200];
+
+int scanseq(unsigned char *, int, char *);
+void sf_sort(int *, int);
+extern void init_ascii(int is_ext, int *sascii, int is_dna);
+
+/* getseq	- get a query sequence, possibly re-reading to set type
+   returns	- length of query sequence or error = 0
+
+   char *filen	- name of file to be opened
+   char *seq	- destination for query sequence
+   int maxs	- maximum length of query
+   char libstr[20] - short description (locus or acc)
+   int *dnaseq  - -1 => use scanseq to determine sequence type
+   		   0 => must be protein
+		   1 => must be DNA
+   long *sq0off	- offset into query specified by query_file:1001-2000
+*/   
+
+int
+getseq(char *filen, int *qascii, unsigned char *seq, int maxs, char *libstr, long *sq0off)
+{
+  FILE *fptr;
+  char line[512],*bp, *bp1, *bpn, *tp;
+  int i, rn, n;
+  int ic;
+  int sstart, sstop, sset=0;
+  int llen, l_offset;
+
+  seq_title[0]='\0';
+  libstr[0]='\0';
+
+  sstart = sstop = -1;
+#ifndef DOS
+  if ((bp=strchr(filen,':'))!=NULL && *(bp+1)!='\0') {
+#else
+  if ((bp=strchr(filen+3,':'))!=NULL && *(bp+1)!='\0') {
+#endif
+    *bp='\0';
+    if (*(bp+1)=='-') {
+      sstart = 0;
+      sscanf(bp+2,"%d",&sstop);
+    }
+    else {
+      sscanf(bp+1,"%d-%d",&sstart,&sstop);
+      sstart--;
+      if (sstop <= 0 ) sstop = BIGNUM;
+    }
+    sset=1;
+  }
+  else {
+    sstart = 0;
+    sstop = BIGNUM;
+  }
+
+  /* check for input from stdin */
+  if (strcmp(filen,"-") && strcmp(filen,"@")) {
+    if ((fptr=fopen(filen,"r"))==NULL) {
+      fprintf(stderr," could not open %s\n",filen);
+      return 0;
+    }
+  }
+  else {
+    fptr = stdin;
+  }
+  rn = n=0;
+
+  while(fgets(line,sizeof(line),fptr)!=NULL) {
+    l_offset = 0;
+    if (line[0]=='>') {
+      seq_format = FASTA_FORMAT;
+      if ((bp=(char *)strchr(line,'\n'))!=NULL) *bp='\0';
+      strncpy(seq_title,line+1,sizeof(seq_title));
+      seq_title[sizeof(seq_title)-1]='\0';
+      if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
+      strncpy(libstr,line+1,12);
+      libstr[12]='\0';
+    }
+    else if (seq_format==NO_FORMAT && strcmp(line,"..")==0) {
+      seq_format = GCG_FORMAT;
+/*
+      if (*dnaseq != 1) qascii['*'] = qascii['X'];
+*/
+      l_offset = 10;
+      llen = strlen(line);
+      while (strncmp(&line[llen-3],"..\n",(size_t)3) != 0) {
+	if (fgets(line,sizeof(line),fptr)==NULL) return 0;
+	llen = strlen(line);
+      }
+      bp = strtok(line," \t");
+/*
+      if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
+      else if ((bp=(char *)strchr(line,'\n'))!=NULL) *bp='\0';
+*/
+      if (bp!=NULL) strncpy(libstr,bp,12);
+      else strncpy(libstr,filen,12);
+      libstr[12]='\0';
+      if (fgets(line,sizeof(line),fptr)==NULL) return 0;
+    }
+    else {
+      if (libstr[0]=='\0') strncpy(libstr,filen,12);
+      libstr[12]='\0';
+    }
+
+    if (seq_format==GCG_FORMAT && strlen(line)<l_offset) continue;
+
+    if (line[0]!='>'&& line[0]!=';') {
+      for (i=l_offset; (n<maxs && rn < sstop)&&
+	     ((ic=qascii[line[i]&AAMASK])<EL); i++)
+	if (ic<NA && ++rn > sstart) seq[n++]= ic;
+      if (ic == ES || rn > sstop) break;
+    }
+  }
+
+  if (n==maxs) {
+    fprintf(stderr," sequence may be truncated %d %d\n",n,maxs);
+    fflush(stderr);
+  }
+  if ((bp=strchr(libstr,'\n'))!=NULL) *bp = '\0';
+  if ((bp=strchr(libstr,'\r'))!=NULL) *bp = '\0';
+  seq[n]= EOSEQ;
+
+
+  if (seq_format !=GCG_FORMAT) 
+    while(fgets(line,sizeof(line),fptr)!=NULL) {
+	if (line[0]!='>'&& line[0]!=';') {
+	  for (i=0; (n<maxs && rn < sstop)&&
+		 ((ic=qascii[line[i]&AAMASK])<EL); i++)
+	    if (ic<NA && ++rn > sstart ) seq[n++]= ic;
+	  if (ic == ES || rn > sstop) break;
+	}
+    }
+  else {
+    llen = strlen(line);
+    while (strncmp(&line[llen-3],"..\n",(size_t)3) != 0) {
+      if (fgets(line,sizeof(line),fptr)==NULL) return 0;
+      llen = strlen(line);
+    }
+    while (fgets(line,sizeof(line),fptr)!=NULL) {
+      if (strlen(line)<l_offset) continue;
+      for (i=l_offset; (n<maxs && rn < sstop) &&
+	     ((ic=qascii[line[i]&AAMASK])<EL); i++)
+	if (ic<NA && ++rn > sstart ) seq[n++]= ic;
+      if (ic == ES || rn > sstop ) break;
+    }
+  }
+
+  if (n==maxs) {
+    fprintf(stderr," sequence may be truncated %d %d\n",n,maxs);
+    fflush(stderr);
+  }
+  seq[n]= EOSEQ;
+
+  if (fptr!=stdin) fclose(fptr);
+
+  if (sset==1) {
+    sstart++;
+    filen[strlen(filen)]=':';
+    if (*sq0off==1 || sstart>=1) *sq0off = sstart;
+  }
+
+  return n;
+}
+
+int
+gettitle(char *filen, char *title, int len) {
+  FILE *fptr;
+  char line[512];
+  char *bp;
+  int sset;
+#ifdef WIN32
+  char *strpbrk();
+#endif
+
+  sset = 0;
+
+  if (strncmp(filen,"-",1)==0 || strncmp(filen,"@",1)==0) {
+    strncpy(title,seq_title,len);
+    title[len-1]='\0';
+    return (int)strlen(title);
+  }
+
+  if ((bp=strchr(filen,':'))!=NULL) { *bp='\0'; sset=1;}
+	  
+
+  if ((fptr=fopen(filen,"r"))==NULL) {
+    fprintf(stderr," file %s was not found\n",filen);
+    fflush(stderr);
+    return 0;
+  }
+
+  if (sset==1) filen[strlen(filen)]=':';
+
+  while(fgets(line,sizeof(line),fptr)!=NULL) {
+    if (line[0]=='>'|| line[0]==';') goto found;
+  }
+  fclose(fptr);
+  title[0]='\0';
+  return 0;
+
+ found:
+
+#ifdef WIN32
+  bp = strpbrk(line,"\n\r");
+#else
+  bp = strchr(line,'\n');
+#endif
+  if (bp!=NULL) *bp = 0;
+  strncpy(title,line,len);
+  title[len-1]='\0';
+  fclose(fptr);
+  return strlen(title);
+}
+
diff --git a/src/global_sse2.c b/src/global_sse2.c
new file mode 100644
index 0000000..87999ea
--- /dev/null
+++ b/src/global_sse2.c
@@ -0,0 +1,547 @@
+/******************************************************************
+  Copyright 2010 by Michael Farrar.  All rights reserved.
+  This program may not be sold or incorporated into a commercial product,
+  in whole or in part, without written consent of Michael Farrar.  For 
+  further information regarding permission for use or reproduction, please 
+  contact: Michael Farrar at farrar.michael at gmail.com.
+*******************************************************************/
+
+/*
+  Written by Michael Farrar, 2010.
+  Please send bug reports and/or suggestions to farrar.michael at gmail.com.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "defs.h"
+#include "param.h"
+#include "dropgsw2.h"
+#include "global_sse2.h"
+
+#ifdef __SUNPRO_C
+#include <sunmedia_intrin.h>
+#else
+#include <emmintrin.h>
+#endif
+
+#ifdef SW_SSE2
+
+static inline __m128i
+max_epu16(__m128i a, __m128i b)
+{
+  a = _mm_subs_epu16 (a, b);
+  b = _mm_adds_epu16 (b, a);
+  return b;
+}
+
+
+int
+global_sse2_word(int                  queryLength,
+                 unsigned short      *profile,
+                 const unsigned char *dbSeq,
+                 int                  dbLength,
+                 unsigned short       gapOpen,
+                 unsigned short       gapExtend,
+                 unsigned short       ceiling,
+                 struct f_struct     *f_str)
+{
+  int     i, j;
+
+  int     score;
+  int     scale;
+  int     temp;
+  int     distance;
+
+  int     offset;
+  int     position;
+
+  int     cmp;
+  int     iter;
+    
+  __m128i *pvH;
+  __m128i *pvE;
+
+  __m128i vE, vF, vH;
+  __m128i vHNext;
+  __m128i vFPrev;
+
+  __m128i vGapOpen;
+  __m128i vGapExtend;
+  __m128i vCeiling;
+
+  __m128i vScale;
+  __m128i vScaleAmt;
+  __m128i vScaleTmp;
+
+  __m128i vTemp;
+  __m128i vNull;
+
+  __m128i *pvScore;
+
+  scale = 0;
+  iter = (queryLength + 7) / 8;
+  offset = (queryLength - 1) % iter;
+  position = 7 - (queryLength - 1) / iter;
+
+  pvH = (__m128i *)f_str->workspace;
+  pvE = pvH + iter;
+
+  /* Load gap opening penalty to all elements of a constant */
+  vGapOpen = _mm_setzero_si128();	/* transfered from Apple Devel smith_waterman_sse2.c fix */
+  vGapOpen = _mm_insert_epi16 (vGapOpen, gapOpen, 0);
+  vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0);
+  vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0);
+
+  /* Load gap extension penalty to all elements of a constant */
+  vGapExtend = _mm_setzero_si128();	/* transfered from Apple Devel smith_waterman_sse2.c fix */
+  vGapExtend = _mm_insert_epi16 (vGapExtend, gapExtend, 0);
+  vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0);
+  vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0);
+
+  /* Generate the ceiling before scaling */
+  vTemp = _mm_setzero_si128();	/* transfered from Apple Devel smith_waterman_sse2.c fix */
+  vTemp = _mm_insert_epi16 (vTemp, ceiling, 0);
+  vTemp = _mm_shufflelo_epi16 (vTemp, 0);
+  vTemp = _mm_shuffle_epi32 (vTemp, 0);
+  vCeiling = _mm_cmpeq_epi16 (vTemp, vTemp);
+  vCeiling = _mm_srli_epi16 (vCeiling, 1);
+  vCeiling = _mm_subs_epi16 (vCeiling, vTemp);
+  vCeiling = _mm_subs_epi16 (vCeiling, vGapOpen);
+
+  vNull = _mm_cmpeq_epi16 (vTemp, vTemp);
+  vNull = _mm_slli_epi16 (vNull, 15);
+  vScaleAmt = _mm_xor_si128 (vNull, vNull);
+
+  /* Zero out the storage vector */
+  vTemp = _mm_adds_epi16 (vNull, vGapOpen);
+  for (i = 0; i < iter; i++) {
+    _mm_store_si128 (pvH + i, vTemp);
+    _mm_store_si128 (pvE + i, vNull);
+  }
+
+  /* initialize F */
+  vF = vNull;
+  vFPrev = vNull;
+
+  /* load and scale H for the next round */
+  vTemp = _mm_srli_si128 (vGapOpen, 14);
+  vH = _mm_load_si128 (pvH + iter - 1);
+  vH = _mm_adds_epi16 (vH, vTemp);
+
+  for (i = 0; i < dbLength; ++i) {
+    /* fetch first data asap. */
+    pvScore = (__m128i *) profile + dbSeq[i] * iter;
+
+    vF = vNull;
+
+    vH = _mm_max_epi16 (vH, vFPrev);
+    for (j = 0; j < iter; j++) {
+      /* correct H from the previous columns F */
+      vHNext = _mm_load_si128 (pvH + j);
+      vHNext = _mm_max_epi16 (vHNext, vFPrev);
+
+      /* load and correct E value */
+      vE = _mm_load_si128 (pvE + j);
+      vTemp = _mm_subs_epi16 (vHNext, vGapOpen);
+      vE = _mm_max_epi16 (vE, vTemp);
+      _mm_store_si128 (pvE + j, vE);
+
+      /* add score to vH */
+      vH = _mm_adds_epi16 (vH, *pvScore++);
+
+      /* get max from vH, vE and vF */
+      vH = _mm_max_epi16 (vH, vE);
+      vH = _mm_max_epi16 (vH, vF);
+      _mm_store_si128 (pvH + j, vH);
+
+      /* update vF value */
+      vH = _mm_subs_epi16 (vH, vGapOpen);
+      vF = _mm_max_epi16 (vF, vH);
+
+      /* load the next h values */
+      vH = vHNext;
+    }
+
+    /* check if we need to scale before the next round */
+    vTemp = _mm_cmpgt_epi16 (vF, vCeiling);
+    cmp  = _mm_movemask_epi8 (vTemp);
+
+    /* broadcast F values */
+    vF = _mm_xor_si128 (vF, vNull);
+
+    vTemp  = _mm_slli_si128 (vF, 2);
+    vTemp = _mm_subs_epu16 (vTemp, vScaleAmt);
+    vF = max_epu16 (vF, vTemp);
+
+    vTemp  = _mm_slli_si128 (vF, 4);
+    vScaleTmp = _mm_slli_si128 (vScaleAmt, 2);
+    vScaleTmp = _mm_adds_epu16 (vScaleTmp, vScaleAmt);
+    vTemp = _mm_subs_epu16 (vTemp, vScaleTmp);
+    vF = max_epu16 (vF, vTemp);
+
+    vTemp = _mm_slli_si128 (vScaleTmp, 4);
+    vScaleTmp = _mm_adds_epu16 (vScaleTmp, vTemp);
+    vTemp  = _mm_slli_si128 (vF, 8);
+    vTemp = _mm_subs_epu16 (vTemp, vScaleTmp);
+    vF = max_epu16 (vF, vTemp);
+
+    /* scale if necessary */
+    if (cmp != 0x0000) {
+      __m128i vScale1;
+      __m128i vScale2;
+
+      vScale = _mm_slli_si128 (vF, 2);
+      vScale = _mm_subs_epu16 (vScale, vGapOpen);
+      vScale = _mm_subs_epu16 (vScale, vScaleAmt);
+
+      vTemp = _mm_slli_si128 (vScale, 2);
+      vTemp = _mm_subs_epu16 (vScale, vTemp);
+      vScaleAmt = _mm_adds_epu16 (vScaleAmt, vTemp);
+      vTemp = _mm_slli_si128 (vScale, 2);
+      vTemp = _mm_subs_epu16 (vTemp, vScale);
+      vScaleAmt = _mm_subs_epu16 (vScaleAmt, vTemp);
+
+      /* rescale the previous F */
+      vF = _mm_subs_epu16 (vF, vScale);
+
+      /* check if we can continue in signed 16-bits */
+      vTemp = _mm_xor_si128 (vF, vNull);
+      vTemp = _mm_cmpgt_epi16 (vTemp, vCeiling);
+      cmp  = _mm_movemask_epi8 (vTemp);
+      if (cmp != 0x0000) {
+        return OVERFLOW_SCORE;
+      }
+
+      vTemp   = _mm_adds_epi16 (vCeiling, vCeiling);
+      vScale1 = _mm_subs_epu16 (vScale, vTemp);
+      vScale2 = _mm_subs_epu16 (vScale, vScale1);
+
+      /* scale all the vectors */
+      for (j = 0; j < iter; j++) {
+        /* load H and E */
+        vH = _mm_load_si128 (pvH + j);
+        vE = _mm_load_si128 (pvE + j);
+
+        /* get max from vH, vE and vF */
+        vH = _mm_subs_epi16 (vH, vScale1);
+        vH = _mm_subs_epi16 (vH, vScale2);
+        vE = _mm_subs_epi16 (vE, vScale1);
+        vE = _mm_subs_epi16 (vE, vScale2);
+
+        /* save the H and E */
+        _mm_store_si128 (pvH + j, vH);
+        _mm_store_si128 (pvE + j, vE);
+      }
+
+      vScale = vScaleAmt;
+      for (j = 0; j < position; ++j) {
+        vScale = _mm_slli_si128 (vScale, 2);
+      }
+
+      /* calculate the final scaling amount */
+      vTemp   = _mm_xor_si128 (vTemp, vTemp);
+      vScale1 = _mm_unpacklo_epi16 (vScale, vTemp);
+      vScale2 = _mm_unpackhi_epi16 (vScale, vTemp);
+      vScale  = _mm_add_epi32 (vScale1, vScale2);
+      vTemp = _mm_srli_si128 (vScale, 8);
+      vScale = _mm_add_epi32 (vScale, vTemp);
+      vTemp = _mm_srli_si128 (vScale, 4);
+      vScale = _mm_add_epi32 (vScale, vTemp);
+      scale = (int) (unsigned short) _mm_extract_epi16 (vScale, 0);
+      temp  = (int) (unsigned short) _mm_extract_epi16 (vScale, 1);
+      scale = scale + (temp << 16);
+    }
+
+    /* scale the F value for the next round */
+    vFPrev = _mm_slli_si128 (vF, 2);
+    vFPrev = _mm_subs_epu16 (vFPrev, vScaleAmt);
+    vFPrev = _mm_xor_si128 (vFPrev, vNull);
+
+    /* load and scale H for the next round */
+    vH = _mm_load_si128 (pvH + iter - 1);
+    vH = _mm_xor_si128 (vH, vNull);
+    vH = _mm_slli_si128 (vH, 2);
+    vH = _mm_subs_epu16 (vH, vScaleAmt);
+    vH = _mm_insert_epi16 (vH, gapOpen, 0);
+    vH = _mm_xor_si128 (vH, vNull);
+  }
+
+  vH = _mm_load_si128 (pvH + offset);
+  vH = _mm_max_epi16 (vH, vFPrev);
+  for (j = 0; j < position; ++j) {
+    vH = _mm_slli_si128 (vH, 2);
+  }
+  score = (int) (signed short) _mm_extract_epi16 (vH, 7);
+  score = score + SHORT_BIAS;
+
+  /* return largest score */
+  distance = (queryLength + dbLength) * gapExtend;
+  score = score - (gapOpen * 2) - distance + scale;
+
+  return score;
+}
+
+int
+global_sse2_byte(int                  queryLength,
+                 unsigned char       *profile,
+                 const unsigned char *dbSeq,
+                 int                  dbLength,
+                 unsigned short       gapOpen,
+                 unsigned short       gapExtend,
+                 unsigned short       ceiling,
+                 unsigned short       bias,
+                 struct f_struct     *f_str)
+{
+  int     i, j;
+
+  int     score;
+  int     scale;
+  int     distance;
+
+  int     offset;
+  int     position;
+
+  int     dup;
+  int     cmp;
+  int     iter;
+    
+  __m128i *pvH;
+  __m128i *pvE;
+
+  __m128i vE, vF, vH;
+  __m128i vHInit;
+  __m128i vHNext;
+  __m128i vFPrev;
+
+  __m128i vBias;
+  __m128i vGapOpen;
+  __m128i vGapExtend;
+  __m128i vCeiling;
+
+  __m128i vScale;
+  __m128i vScaleAmt;
+  __m128i vScaleTmp;
+
+  __m128i vTemp;
+  __m128i vNull;
+
+  __m128i *pvScore;
+
+  scale = 0;
+  iter = (queryLength + 15) / 16;
+  offset = (queryLength - 1) % iter;
+  position = 15 - (queryLength - 1) / iter;
+
+  pvH = (__m128i *)f_str->workspace;
+  pvE = pvH + iter;
+
+  /* Load the bias to all elements of a constant */
+  dup    = (bias << 8) | (bias & 0x00ff);
+  vBias = _mm_setzero_si128();	/* initialize cf Apple Devel smith_waterman_sse2.c */
+  vBias = _mm_insert_epi16 (vBias, dup, 0);
+  vBias = _mm_shufflelo_epi16 (vBias, 0);
+  vBias = _mm_shuffle_epi32 (vBias, 0);
+
+  /* Load gap opening penalty to all elements of a constant */
+  dup      = (gapOpen << 8) | (gapOpen & 0x00ff);
+  vGapOpen = _mm_setzero_si128();	/* initialize cf Apple Devel smith_waterman_sse2.c */
+  vGapOpen = _mm_insert_epi16 (vGapOpen, dup, 0);
+  vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0);
+  vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0);
+
+  /* Load gap extension penalty to all elements of a constant */
+  dup    = (gapExtend << 8) | (gapExtend & 0x00ff);
+  vGapExtend = _mm_setzero_si128();	/* initialize cf Apple Devel smith_waterman_sse2.c */
+  vGapExtend = _mm_insert_epi16 (vGapExtend, dup, 0);
+  vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0);
+  vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0);
+
+  /* Generate the ceiling before scaling */
+  dup    = (ceiling << 8) | (ceiling & 0x00ff);
+  vTemp = _mm_setzero_si128();	/* initialize cf Apple Devel smith_waterman_sse2.c */
+  vTemp = _mm_insert_epi16 (vTemp, dup, 0);
+  vTemp = _mm_shufflelo_epi16 (vTemp, 0);
+  vTemp = _mm_shuffle_epi32 (vTemp, 0);
+  vCeiling = _mm_cmpeq_epi8 (vTemp, vTemp);
+  vCeiling = _mm_subs_epu8 (vCeiling, vTemp);
+  vCeiling = _mm_subs_epu8 (vCeiling, vGapOpen);
+
+  /* since we want to use the full range, zero is redefined as */
+  /* 2 * gapOpen.  the lowest scaled score will an insert followed */
+  /* by a delete. */
+  vHInit = _mm_srli_si128 (vGapOpen, 15);
+
+  /* vNull = _mm_xor_si128 (vNull, vNull); */
+  vNull = _mm_setzero_si128();	/* initialize cf Apple Devel smith_waterman_sse2.c */
+  vScaleAmt = vNull;
+
+  /* Zero out the storage vector */
+  for (i = 0; i < iter; i++) {
+    _mm_store_si128 (pvH + i, vGapOpen);
+    _mm_store_si128 (pvE + i, vNull);
+  }
+
+  /* initialize F */
+  vF = vNull;
+  vFPrev = vNull;
+
+  /* load and scale H for the next round */
+  vH = _mm_load_si128 (pvH + iter - 1);
+  vH = _mm_slli_si128 (vH, 1);
+  vH = _mm_adds_epu8 (vH, vHInit);
+  vH = _mm_adds_epu8 (vH, vHInit);
+
+  for (i = 0; i < dbLength; ++i) {
+    /* fetch first data asap. */
+    pvScore = (__m128i *) profile + dbSeq[i] * iter;
+
+    vF = _mm_xor_si128 (vF, vF);
+
+    vH = _mm_max_epu8 (vH, vFPrev);
+    for (j = 0; j < iter; j++) {
+      /* correct H from the previous columns F */
+      vHNext = _mm_load_si128 (pvH + j);
+      vHNext = _mm_max_epu8 (vHNext, vFPrev);
+
+      /* load and correct E value */
+      vE = _mm_load_si128 (pvE + j);
+      vTemp = _mm_subs_epu8 (vHNext, vGapOpen);
+      vE = _mm_max_epu8 (vE, vTemp);
+      _mm_store_si128 (pvE + j, vE);
+
+      /* add score to vH */
+      vH = _mm_adds_epu8 (vH, *pvScore++);
+      vH = _mm_subs_epu8 (vH, vBias);
+
+      /* get max from vH, vE and vF */
+      vH = _mm_max_epu8 (vH, vE);
+      vH = _mm_max_epu8 (vH, vF);
+      _mm_store_si128 (pvH + j, vH);
+
+      /* update vF value */
+      vH = _mm_subs_epu8 (vH, vGapOpen);
+      vF = _mm_max_epu8 (vF, vH);
+
+      /* load the next h values */
+      vH = vHNext;
+    }
+
+    /* check if we need to scale before the next round */
+    vTemp = _mm_subs_epu8 (vCeiling, vF);
+    vTemp = _mm_cmpeq_epi8 (vTemp, vNull);
+    cmp  = _mm_movemask_epi8 (vTemp);
+
+    /* broadcast F values */
+    vTemp  = _mm_slli_si128 (vF, 1);
+    vTemp = _mm_subs_epu8 (vTemp, vScaleAmt);
+    vF = _mm_max_epu8 (vF, vTemp);
+
+    vScaleTmp = _mm_slli_si128 (vScaleAmt, 1);
+    vScaleTmp = _mm_adds_epu8 (vScaleTmp, vScaleAmt);
+    vTemp  = _mm_slli_si128 (vF, 2);
+    vTemp = _mm_subs_epu8 (vTemp, vScaleTmp);
+    vF = _mm_max_epu8 (vF, vTemp);
+
+    vTemp = _mm_slli_si128 (vScaleTmp, 2);
+    vScaleTmp = _mm_adds_epu8 (vScaleTmp, vTemp);
+    vTemp  = _mm_slli_si128 (vF, 4);
+    vTemp = _mm_subs_epu8 (vTemp, vScaleTmp);
+    vF = _mm_max_epu8 (vF, vTemp);
+
+    vTemp = _mm_slli_si128 (vScaleTmp, 4);
+    vScaleTmp = _mm_adds_epu8 (vScaleTmp, vTemp);
+    vTemp  = _mm_slli_si128 (vF, 8);
+    vTemp = _mm_subs_epu8 (vTemp, vScaleTmp);
+    vF = _mm_max_epu8 (vF, vTemp);
+
+    /* scale if necessary */
+    if (cmp != 0x0000) {
+      vScale = _mm_slli_si128 (vF, 1);
+      vScale = _mm_subs_epu8 (vScale, vGapOpen);
+      vScale = _mm_subs_epu8 (vScale, vScaleAmt);
+
+      vTemp = _mm_slli_si128 (vScale, 1);
+      vTemp = _mm_subs_epu8 (vScale, vTemp);
+      vScaleAmt = _mm_adds_epu8 (vScaleAmt, vTemp);
+      vTemp = _mm_slli_si128 (vScale, 1);
+      vTemp = _mm_subs_epu8 (vTemp, vScale);
+      vScaleAmt = _mm_subs_epu8 (vScaleAmt, vTemp);
+
+      /* rescale the previous F */
+      vF = _mm_subs_epu8 (vF, vScale);
+
+      /* check if we can continue in 8-bits */
+      vTemp = _mm_subs_epu8 (vCeiling, vF);
+      vTemp = _mm_cmpeq_epi8 (vTemp, vNull);
+      cmp  = _mm_movemask_epi8 (vTemp);
+      if (cmp != 0x0000) {
+        return OVERFLOW_SCORE;
+      }
+
+      /* scale all the vectors */
+      for (j = 0; j < iter; j++) {
+        /* load H and E */
+        vH = _mm_load_si128 (pvH + j);
+        vE = _mm_load_si128 (pvE + j);
+
+        /* get max from vH, vE and vF */
+        vH = _mm_subs_epu8 (vH, vScale);
+        vE = _mm_subs_epu8 (vE, vScale);
+
+        /* save the H and E */
+        _mm_store_si128 (pvH + j, vH);
+        _mm_store_si128 (pvE + j, vE);
+      }
+
+      /* calculate the final scaling amount */
+      vScale = vScaleAmt;
+      for (j = 0; j < position; ++j) {
+        vScale = _mm_slli_si128 (vScale, 1);
+      }
+      vTemp = _mm_unpacklo_epi8 (vScale, vNull);
+      vScale = _mm_unpackhi_epi8 (vScale, vNull);
+      vScale = _mm_adds_epi16 (vScale, vTemp);
+      vTemp = _mm_srli_si128 (vScale, 8);
+      vScale = _mm_adds_epi16 (vScale, vTemp);
+      vTemp = _mm_srli_si128 (vScale, 4);
+      vScale = _mm_adds_epi16 (vScale, vTemp);
+      vTemp = _mm_srli_si128 (vScale, 2);
+      vScale = _mm_adds_epi16 (vScale, vTemp);
+      scale = (int) _mm_extract_epi16 (vScale, 0);
+    }
+
+    /* scale the F value for the next round */
+    vFPrev = _mm_slli_si128 (vF, 1);
+    vFPrev = _mm_subs_epu8 (vFPrev, vScaleAmt);
+
+    /* load and scale H for the next round */
+    vH = _mm_load_si128 (pvH + iter - 1);
+    vH = _mm_slli_si128 (vH, 1);
+    vH = _mm_subs_epu8 (vH, vScaleAmt);
+    vH = _mm_or_si128 (vH, vHInit);
+  }
+
+  /* calculate the max global score */
+  vH = _mm_load_si128 (pvH + offset);
+  vH = _mm_max_epu8 (vH, vF);
+  for (j = 0; j < position; ++j) {
+    vH = _mm_slli_si128 (vH, 1);
+  }
+  score = (int) (unsigned short) _mm_extract_epi16 (vH, 7);
+  score >>= 8;
+
+  /* return largest score */
+  distance = (queryLength + dbLength) * gapExtend;
+  score = score - (gapOpen * 2) - distance + scale;
+
+  return score;
+}
+#else
+
+/* No SSE2 support. Avoid compiler complaints about empty object */
+
+int nw_dummy;
+
+#endif
+
diff --git a/src/global_sse2.h b/src/global_sse2.h
new file mode 100644
index 0000000..d45d14d
--- /dev/null
+++ b/src/global_sse2.h
@@ -0,0 +1,41 @@
+/******************************************************************
+  Copyright 2010 by Michael Farrar.  All rights reserved.
+  This program may not be sold or incorporated into a commercial product,
+  in whole or in part, without written consent of Michael Farrar.  For 
+  further information regarding permission for use or reproduction, please 
+  contact: Michael Farrar at farrar.michael at gmail.com.
+*******************************************************************/
+
+/*
+  Written by Michael Farrar, 2010.
+  Please send bug reports and/or suggestions to farrar.michael at gmail.com.
+*/
+
+#ifndef INCLUDE_GLOBAL_SSE2_H
+#define INCLUDE_GLOBAL_SSE2_H
+
+#define SHORT_BIAS     32768
+#define OVERFLOW_SCORE 0x7f000000
+
+int
+global_sse2_word(int                  queryLength,
+                 unsigned short      *profile,
+                 const unsigned char *dbSeq,
+                 int                  dbLength,
+                 unsigned short       gapOpen,
+                 unsigned short       gapExtend,
+                 unsigned short       ceiling,
+                 struct f_struct     *f_str);
+
+int
+global_sse2_byte(int                  queryLength,
+                 unsigned char       *profile,
+                 const unsigned char *dbSeq,
+                 int                  dbLength,
+                 unsigned short       gapOpen,
+                 unsigned short       gapExtend,
+                 unsigned short       ceiling,
+                 unsigned short       bias,
+                 struct f_struct     *f_str);
+
+#endif /* INCLUDE_GLOBAL_SSE2_H */
diff --git a/src/glocal_sse2.c b/src/glocal_sse2.c
new file mode 100644
index 0000000..e0f5ffd
--- /dev/null
+++ b/src/glocal_sse2.c
@@ -0,0 +1,596 @@
+/******************************************************************
+  Copyright 2010 by Michael Farrar.  All rights reserved.
+  This program may not be sold or incorporated into a commercial product,
+  in whole or in part, without written consent of Michael Farrar.  For 
+  further information regarding permission for use or reproduction, please 
+  contact: Michael Farrar at farrar.michael at gmail.com.
+*******************************************************************/
+
+/*
+  Written by Michael Farrar, 2010.
+  Please send bug reports and/or suggestions to farrar.michael at gmail.com.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "defs.h"
+#include "param.h"
+#include "dropgsw2.h"
+#include "global_sse2.h"
+
+#ifdef __SUNPRO_C
+#include <sunmedia_intrin.h>
+#else
+#include <emmintrin.h>
+#endif
+
+#ifdef SW_SSE2
+
+static inline __m128i
+max_epu16(__m128i a, __m128i b)
+{
+  a = _mm_subs_epu16 (a, b);
+  b = _mm_adds_epu16 (b, a);
+  return b;
+}
+
+int
+glocal_sse2_word(int                  queryLength,
+                 unsigned short      *profile,
+                 const unsigned char *dbSeq,
+                 int                  dbLength,
+                 unsigned short       gapOpen,
+                 unsigned short       gapExtend,
+                 unsigned short       ceiling,
+                 struct f_struct     *f_str)
+{
+  int     i, j;
+
+  int     max;
+  int     score;
+  int     scale;
+  int     temp;
+  int     distance;
+  int     initScale;
+  int     hinit;
+  int     zero;
+
+  int     offset;
+  int     position;
+
+  int     cmp;
+  int     iter;
+    
+  __m128i *pvH;
+  __m128i *pvE;
+
+  __m128i vE, vF, vH;
+  __m128i vHNext;
+  __m128i vFPrev;
+
+  __m128i vGapOpen;
+  __m128i vGapExtend;
+  __m128i vCeiling;
+
+  __m128i vScale;
+  __m128i vScaleAmt;
+  __m128i vScaleTmp;
+
+  __m128i vTemp;
+  __m128i vNull;
+
+  __m128i *pvScore;
+
+  scale = 0;
+  initScale = 0;
+
+  max = 0x80000000;
+  iter = (queryLength + 7) / 8;
+  offset = (queryLength - 1) % iter;
+  position = 7 - (queryLength - 1) / iter;
+
+  pvH = (__m128i *)f_str->workspace;
+  pvE = pvH + iter;
+
+  /* Load gap opening penalty to all elements of a constant */
+  vGapOpen = _mm_setzero_si128();	/* transfered from Apple Devel smith_waterman_sse2.c fix */
+  vGapOpen = _mm_insert_epi16 (vGapOpen, gapOpen, 0);
+  vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0);
+  vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0);
+
+  /* Load gap extension penalty to all elements of a constant */
+  vGapExtend = _mm_setzero_si128();	/* transfered from Apple Devel smith_waterman_sse2.c fix */
+  vGapExtend = _mm_insert_epi16 (vGapExtend, gapExtend, 0);
+  vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0);
+  vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0);
+
+  /* Generate the ceiling before scaling */
+  vTemp = _mm_setzero_si128();	/* transfered from Apple Devel smith_waterman_sse2.c fix */
+  vTemp = _mm_insert_epi16 (vTemp, ceiling, 0);
+  vTemp = _mm_shufflelo_epi16 (vTemp, 0);
+  vTemp = _mm_shuffle_epi32 (vTemp, 0);
+  vCeiling = _mm_cmpeq_epi16 (vTemp, vTemp);
+  vCeiling = _mm_srli_epi16 (vCeiling, 1);
+  vCeiling = _mm_subs_epi16 (vCeiling, vTemp);
+  vCeiling = _mm_subs_epi16 (vCeiling, vGapOpen);
+
+  vGapExtend = _mm_srli_si128 (vGapExtend, 14);
+  vNull = _mm_cmpeq_epi16 (vTemp, vTemp);
+  vNull = _mm_slli_epi16 (vNull, 15);
+  vScaleAmt = _mm_xor_si128 (vNull, vNull);
+
+  hinit = gapOpen * 2 - SHORT_BIAS;
+  zero = hinit;
+
+  /* Zero out the storage vector */
+  vTemp = _mm_adds_epi16 (vNull, vGapOpen);
+  for (i = 0; i < iter; i++) {
+    _mm_store_si128 (pvH + i, vTemp);
+    _mm_store_si128 (pvE + i, vNull);
+  }
+
+  /* initialize F */
+  vF = vNull;
+  vFPrev = vNull;
+
+  /* load and scale H for the next round */
+  vH = _mm_load_si128 (pvH + iter - 1);
+  vH = _mm_slli_si128 (vH, 2);
+  vH = _mm_insert_epi16 (vH, zero, 0);
+
+  for (i = 0; i < dbLength; ++i) {
+    /* fetch first data asap. */
+    pvScore = (__m128i *) profile + dbSeq[i] * iter;
+
+    vF = _mm_insert_epi16 (vNull, hinit, 0);
+    vF = _mm_adds_epi16 (vF, vGapExtend);
+    vF = _mm_subs_epi16 (vF, vGapOpen);
+
+    vH = _mm_max_epi16 (vH, vFPrev);
+    for (j = 0; j < iter; j++) {
+      /* correct H from the previous columns F */
+      vHNext = _mm_load_si128 (pvH + j);
+      vHNext = _mm_max_epi16 (vHNext, vFPrev);
+
+      /* load and correct E value */
+      vE = _mm_load_si128 (pvE + j);
+      vTemp = _mm_subs_epi16 (vHNext, vGapOpen);
+      vE = _mm_max_epi16 (vE, vTemp);
+      _mm_store_si128 (pvE + j, vE);
+
+      /* add score to vH */
+      vH = _mm_adds_epi16 (vH, *pvScore++);
+
+      /* get max from vH, vE and vF */
+      vH = _mm_max_epi16 (vH, vE);
+      vH = _mm_max_epi16 (vH, vF);
+      _mm_store_si128 (pvH + j, vH);
+
+      /* update vF value */
+      vH = _mm_subs_epi16 (vH, vGapOpen);
+      vF = _mm_max_epi16 (vF, vH);
+
+      /* load the next h values */
+      vH = vHNext;
+    }
+
+    /* check if we need to scale before the next round */
+    vTemp = _mm_cmpgt_epi16 (vF, vCeiling);
+    cmp  = _mm_movemask_epi8 (vTemp);
+
+    /* broadcast F values */
+    vF = _mm_xor_si128 (vF, vNull);
+
+    vTemp  = _mm_slli_si128 (vF, 2);
+    vTemp = _mm_subs_epu16 (vTemp, vScaleAmt);
+    vF = max_epu16 (vF, vTemp);
+
+    vTemp  = _mm_slli_si128 (vF, 4);
+    vScaleTmp = _mm_slli_si128 (vScaleAmt, 2);
+    vScaleTmp = _mm_adds_epu16 (vScaleTmp, vScaleAmt);
+    vTemp = _mm_subs_epu16 (vTemp, vScaleTmp);
+    vF = max_epu16 (vF, vTemp);
+
+    vTemp = _mm_slli_si128 (vScaleTmp, 4);
+    vScaleTmp = _mm_adds_epu16 (vScaleTmp, vTemp);
+    vTemp  = _mm_slli_si128 (vF, 8);
+    vTemp = _mm_subs_epu16 (vTemp, vScaleTmp);
+    vF = max_epu16 (vF, vTemp);
+
+    /* scale if necessary */
+    if (cmp != 0x0000)  {
+      __m128i vScale1;
+      __m128i vScale2;
+
+      scale = hinit - gapOpen * 2 + SHORT_BIAS;
+      initScale = initScale + scale;
+
+      vScale = _mm_slli_si128 (vF, 2);
+      vScale = _mm_subs_epu16 (vScale, vGapOpen);
+      vScale = _mm_subs_epu16 (vScale, vScaleAmt);
+      vScale = _mm_insert_epi16 (vScale, scale, 0);
+
+      vTemp = _mm_slli_si128 (vScale, 2);
+      vTemp = _mm_subs_epu16 (vScale, vTemp);
+      vScaleAmt = _mm_adds_epu16 (vScaleAmt, vTemp);
+      vTemp = _mm_slli_si128 (vScale, 2);
+      vTemp = _mm_subs_epu16 (vTemp, vScale);
+      vScaleAmt = _mm_subs_epu16 (vScaleAmt, vTemp);
+      vTemp = _mm_subs_epu8 (vTemp, vTemp);
+      vTemp = _mm_insert_epi16 (vTemp, scale, 0);
+      vScaleAmt = _mm_subs_epu16 (vScaleAmt, vTemp);
+
+      /* rescale the previous F */
+      vF = _mm_subs_epu16 (vF, vScale);
+
+      /* rescale the initial H value */
+      hinit = zero;
+
+      /* check if we can continue in signed 16-bits */
+      vTemp = _mm_xor_si128 (vF, vNull);
+      vTemp = _mm_cmpgt_epi16 (vTemp, vCeiling);
+      cmp  = _mm_movemask_epi8 (vTemp);
+      if (cmp != 0x0000) {
+        return OVERFLOW_SCORE;
+      }
+
+      vTemp   = _mm_adds_epi16 (vCeiling, vCeiling);
+      vScale1 = _mm_subs_epu16 (vScale, vTemp);
+      vScale2 = _mm_subs_epu16 (vScale, vScale1);
+
+      /* scale all the vectors */
+      for (j = 0; j < iter; j++) {
+        /* load H and E */
+        vH = _mm_load_si128 (pvH + j);
+        vE = _mm_load_si128 (pvE + j);
+
+        /* get max from vH, vE and vF */
+        vH = _mm_subs_epi16 (vH, vScale1);
+        vH = _mm_subs_epi16 (vH, vScale2);
+        vE = _mm_subs_epi16 (vE, vScale1);
+        vE = _mm_subs_epi16 (vE, vScale2);
+
+        /* save the H and E */
+        _mm_store_si128 (pvH + j, vH);
+        _mm_store_si128 (pvE + j, vE);
+      }
+
+      vScale = vScaleAmt;
+      for (j = 0; j < position; ++j) {
+        vScale = _mm_slli_si128 (vScale, 2);
+      }
+
+      /* calculate the final scaling amount */
+      /* vTemp   = _mm_xor_si128 (vTemp, vTemp); */
+      vTemp = _mm_setzero_si128();	/* transfered from Apple Devel fix for smith_waterman_sse2.c */
+      vScale1 = _mm_unpacklo_epi16 (vScale, vTemp);
+      vScale2 = _mm_unpackhi_epi16 (vScale, vTemp);
+      vScale  = _mm_add_epi32 (vScale1, vScale2);
+      vTemp  = _mm_srli_si128 (vScale, 8);
+      vScale = _mm_add_epi32 (vScale, vTemp);
+      vTemp  = _mm_srli_si128 (vScale, 4);
+      vScale = _mm_add_epi32 (vScale, vTemp);
+      scale  = (int) (unsigned short) _mm_extract_epi16 (vScale, 0);
+      temp   = (int) (unsigned short) _mm_extract_epi16 (vScale, 1);
+      scale  = scale + (temp << 16) + initScale;
+    }
+
+    /* scale the F value for the next round */
+    vFPrev = _mm_slli_si128 (vF, 2);
+    vFPrev = _mm_subs_epu16 (vFPrev, vScaleAmt);
+    vFPrev = _mm_xor_si128 (vFPrev, vNull);
+
+    vF = _mm_xor_si128 (vF, vNull);
+
+    vH = _mm_load_si128 (pvH + offset);
+    vH = _mm_max_epi16 (vH, vFPrev);
+    for (j = 0; j < position; ++j) {
+      vH = _mm_slli_si128 (vH, 2);
+    }
+    score = (int) (signed short) _mm_extract_epi16 (vH, 7);
+    score = score + SHORT_BIAS;
+
+    /* return largest score */
+    distance = (queryLength + i + 1) * gapExtend;
+    score = score - (gapOpen * 2) - distance + scale;
+    max = (max > score) ? max : score;
+
+    /* load and scale H for the next round */
+    hinit += gapExtend;
+    vH = _mm_load_si128 (pvH + iter - 1);
+    vH = _mm_slli_si128 (vH, 2);
+    vH = _mm_xor_si128 (vH, vNull);
+    vH = _mm_subs_epu16 (vH, vScaleAmt);
+    vH = _mm_xor_si128 (vH, vNull);
+    vH = _mm_insert_epi16 (vH, hinit, 0);
+  }
+
+  return max;
+}
+
+int
+glocal_sse2_byte(int                  queryLength,
+                 unsigned char       *profile,
+                 const unsigned char *dbSeq,
+                 int                  dbLength,
+                 unsigned short       gapOpen,
+                 unsigned short       gapExtend,
+                 unsigned short       ceiling,
+                 unsigned short       bias,
+                 struct f_struct     *f_str)
+{
+  int     i, j;
+
+  int     max;
+  int     score;
+  int     scale;
+  int     distance;
+  int     initScale;
+
+  int     offset;
+  int     position;
+
+  int     dup;
+  int     cmp;
+  int     iter;
+    
+  __m128i *pvH;
+  __m128i *pvE;
+
+  __m128i vE, vF, vH;
+  __m128i vHInit;
+  __m128i vHNext;
+  __m128i vFPrev;
+
+  __m128i vBias;
+  __m128i vGapOpen;
+  __m128i vGapExtend;
+  __m128i vCeiling;
+
+  __m128i vScale;
+  __m128i vScaleAmt;
+  __m128i vScaleTmp;
+
+  __m128i vTemp;
+  __m128i vZero;
+  __m128i vNull;
+
+  __m128i *pvScore;
+
+  scale = 0;
+  initScale = 0;
+
+  max = 0x80000000;
+  iter = (queryLength + 15) / 16;
+  offset = (queryLength - 1) % iter;
+  position = 15 - (queryLength - 1) / iter;
+
+  pvH = (__m128i *)f_str->workspace;
+  pvE = pvH + iter;
+
+  /* Load the bias to all elements of a constant */
+  dup    = (bias << 8) | (bias & 0x00ff);
+  vBias = _mm_setzero_si128();	/* transfered from Apple Devel smith_waterman_sse2.c fix */
+  vBias = _mm_insert_epi16 (vBias, dup, 0);
+  vBias = _mm_shufflelo_epi16 (vBias, 0);
+  vBias = _mm_shuffle_epi32 (vBias, 0);
+
+  /* Load gap opening penalty to all elements of a constant */
+  dup      = (gapOpen << 8) | (gapOpen & 0x00ff);
+  vGapOpen = _mm_setzero_si128();	/* transfered from Apple Devel smith_waterman_sse2.c fix */
+  vGapOpen = _mm_insert_epi16 (vGapOpen, dup, 0);
+  vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0);
+  vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0);
+
+  /* Load gap extension penalty to all elements of a constant */
+  dup    = (gapExtend << 8) | (gapExtend & 0x00ff);
+  vGapExtend = _mm_setzero_si128();	/* transfered from Apple Devel smith_waterman_sse2.c fix */
+  vGapExtend = _mm_insert_epi16 (vGapExtend, dup, 0);
+  vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0);
+  vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0);
+
+  /* Generate the ceiling before scaling */
+  dup    = (ceiling << 8) | (ceiling & 0x00ff);
+  vTemp = _mm_setzero_si128();	/* transfered from Apple Devel smith_waterman_sse2.c fix */
+  vTemp = _mm_insert_epi16 (vTemp, dup, 0);
+  vTemp = _mm_shufflelo_epi16 (vTemp, 0);
+  vTemp = _mm_shuffle_epi32 (vTemp, 0);
+  vCeiling = _mm_cmpeq_epi8 (vTemp, vTemp);
+  vCeiling = _mm_subs_epu8 (vCeiling, vTemp);
+  vCeiling = _mm_subs_epu8 (vCeiling, vGapOpen);
+
+  /* since we want to use the full range, zero is redefined as */
+  /* 2 * gapOpen.  the lowest scaled score will an insert followed */
+  /* by a delete. */
+  vHInit = _mm_adds_epu8 (vGapOpen, vGapOpen);
+  vHInit = _mm_srli_si128 (vHInit, 15);
+  vZero  = vHInit;
+
+  vGapExtend = _mm_srli_si128 (vGapExtend, 15);
+  /*   vNull = _mm_xor_si128 (vNull, vNull); */
+  vNull = _mm_setzero_si128();	/* transfered from Apple Devel smith_waterman_sse2.c fix */
+  vScaleAmt = vNull;
+
+  /* Zero out the storage vector */
+  for (i = 0; i < iter; i++) {
+    _mm_store_si128 (pvH + i, vGapOpen);
+    _mm_store_si128 (pvE + i, vNull);
+  }
+
+  /* initialize F */
+  vF = vNull;
+  vFPrev = vNull;
+
+  /* load and scale H for the next round */
+  vH = _mm_load_si128 (pvH + iter - 1);
+  vH = _mm_slli_si128 (vH, 1);
+  vH = _mm_or_si128 (vH, vZero);
+
+  for (i = 0; i < dbLength; ++i) {
+    /* fetch first data asap. */
+    pvScore = (__m128i *) profile + dbSeq[i] * iter;
+
+    vF = _mm_adds_epu8 (vHInit, vGapExtend);
+    vF = _mm_subs_epu8 (vF, vGapOpen);
+
+    vH = _mm_max_epu8 (vH, vFPrev);
+    for (j = 0; j < iter; j++) {
+      /* correct H from the previous columns F */
+      vHNext = _mm_load_si128 (pvH + j);
+      vHNext = _mm_max_epu8 (vHNext, vFPrev);
+
+      /* load and correct E value */
+      vE = _mm_load_si128 (pvE + j);
+      vTemp = _mm_subs_epu8 (vHNext, vGapOpen);
+      vE = _mm_max_epu8 (vE, vTemp);
+      _mm_store_si128 (pvE + j, vE);
+
+      /* add score to vH */
+      vH = _mm_adds_epu8 (vH, *pvScore++);
+      vH = _mm_subs_epu8 (vH, vBias);
+
+      /* get max from vH, vE and vF */
+      vH = _mm_max_epu8 (vH, vE);
+      vH = _mm_max_epu8 (vH, vF);
+      _mm_store_si128 (pvH + j, vH);
+
+      /* update vF value */
+      vH = _mm_subs_epu8 (vH, vGapOpen);
+      vF = _mm_max_epu8 (vF, vH);
+
+      /* load the next h values */
+      vH = vHNext;
+    }
+
+    /* check if we need to scale before the next round */
+    vTemp = _mm_subs_epu8 (vCeiling, vF);
+    vTemp = _mm_cmpeq_epi8 (vTemp, vNull);
+    cmp  = _mm_movemask_epi8 (vTemp);
+
+    /* broadcast F values */
+    vTemp  = _mm_slli_si128 (vF, 1);
+    vTemp = _mm_subs_epu8 (vTemp, vScaleAmt);
+    vF = _mm_max_epu8 (vF, vTemp);
+
+    vScaleTmp = _mm_slli_si128 (vScaleAmt, 1);
+    vScaleTmp = _mm_adds_epu8 (vScaleTmp, vScaleAmt);
+    vTemp  = _mm_slli_si128 (vF, 2);
+    vTemp = _mm_subs_epu8 (vTemp, vScaleTmp);
+    vF = _mm_max_epu8 (vF, vTemp);
+
+    vTemp = _mm_slli_si128 (vScaleTmp, 2);
+    vScaleTmp = _mm_adds_epu8 (vScaleTmp, vTemp);
+    vTemp  = _mm_slli_si128 (vF, 4);
+    vTemp = _mm_subs_epu8 (vTemp, vScaleTmp);
+    vF = _mm_max_epu8 (vF, vTemp);
+
+    vTemp = _mm_slli_si128 (vScaleTmp, 4);
+    vScaleTmp = _mm_adds_epu8 (vScaleTmp, vTemp);
+    vTemp  = _mm_slli_si128 (vF, 8);
+    vTemp = _mm_subs_epu8 (vTemp, vScaleTmp);
+    vF = _mm_max_epu8 (vF, vTemp);
+
+    /* scale if necessary */
+    if (cmp != 0x0000) {
+      vHInit = _mm_subs_epu8 (vHInit, vGapOpen);
+      vHInit = _mm_subs_epu8 (vHInit, vGapOpen);
+      scale = _mm_extract_epi16 (vHInit, 0);
+      initScale = initScale + scale;
+
+      vScale = _mm_slli_si128 (vF, 1);
+      vScale = _mm_subs_epu8 (vScale, vGapOpen);
+      vScale = _mm_subs_epu8 (vScale, vScaleAmt);
+      vScale = _mm_or_si128 (vScale, vHInit);
+
+      vTemp = _mm_slli_si128 (vScale, 1);
+      vTemp = _mm_subs_epu8 (vScale, vTemp);
+      vScaleAmt = _mm_adds_epu8 (vScaleAmt, vTemp);
+      vTemp = _mm_slli_si128 (vScale, 1);
+      vTemp = _mm_subs_epu8 (vTemp, vScale);
+      vScaleAmt = _mm_subs_epu8 (vScaleAmt, vTemp);
+      vScaleAmt = _mm_subs_epu8 (vScaleAmt, vHInit);
+
+      /* rescale the previous F */
+      vF = _mm_subs_epu8 (vF, vScale);
+
+      /* rescale the initial H value */
+      vHInit = vZero;
+
+      /* check if we can continue in 8-bits */
+      vTemp = _mm_subs_epu8 (vCeiling, vF);
+      vTemp = _mm_cmpeq_epi8 (vTemp, vNull);
+      cmp  = _mm_movemask_epi8 (vTemp);
+      if (cmp != 0x0000) {
+        return OVERFLOW_SCORE;
+      }
+
+      /* scale all the vectors */
+      for (j = 0; j < iter; j++) {
+        /* load H and E */
+        vH = _mm_load_si128 (pvH + j);
+        vE = _mm_load_si128 (pvE + j);
+
+        /* get max from vH, vE and vF */
+        vH = _mm_subs_epu8 (vH, vScale);
+        vE = _mm_subs_epu8 (vE, vScale);
+
+        /* save the H and E */
+        _mm_store_si128 (pvH + j, vH);
+        _mm_store_si128 (pvE + j, vE);
+      }
+
+      /* calculate the final scaling amount */
+      vScale = vScaleAmt;
+      for (j = 0; j < position; ++j) {
+        vScale = _mm_slli_si128 (vScale, 1);
+      }
+      vTemp = _mm_unpacklo_epi8 (vScale, vNull);
+      vScale = _mm_unpackhi_epi8 (vScale, vNull);
+      vScale = _mm_adds_epi16 (vScale, vTemp);
+      vTemp = _mm_srli_si128 (vScale, 8);
+      vScale = _mm_adds_epi16 (vScale, vTemp);
+      vTemp = _mm_srli_si128 (vScale, 4);
+      vScale = _mm_adds_epi16 (vScale, vTemp);
+      vTemp = _mm_srli_si128 (vScale, 2);
+      vScale = _mm_adds_epi16 (vScale, vTemp);
+      scale = (int) _mm_extract_epi16 (vScale, 0);
+      scale = scale + initScale;
+    }
+
+    /* scale the F value for the next round */
+    vFPrev = _mm_slli_si128 (vF, 1);
+    vFPrev = _mm_subs_epu8 (vFPrev, vScaleAmt);
+
+    /* calculate the max glocal score for this column */
+    vH = _mm_load_si128 (pvH + offset);
+    vH = _mm_max_epu8 (vH, vF);
+    for (j = 0; j < position; ++j) {
+      vH = _mm_slli_si128 (vH, 1);
+    }
+    score = (int) (unsigned short) _mm_extract_epi16 (vH, 7);
+    score >>= 8;
+
+    /* return largest score */
+    distance = (queryLength + i + 1) * gapExtend;
+    score = score - (gapOpen * 2) - distance + scale;
+    max = (max > score) ? max : score;
+
+    /* load and scale H for the next round */
+    vHInit = _mm_adds_epu8 (vHInit, vGapExtend);
+    vH = _mm_load_si128 (pvH + iter - 1);
+    vH = _mm_slli_si128 (vH, 1);
+    vH = _mm_subs_epu8 (vH, vScaleAmt);
+    vH = _mm_or_si128 (vH, vHInit);
+  }
+
+  return max;
+}
+#else
+
+/* No SSE2 support. Avoid compiler complaints about empty object */
+
+int nw_dummy;
+
+#endif
+
diff --git a/src/glocal_sse2.h b/src/glocal_sse2.h
new file mode 100644
index 0000000..2f77642
--- /dev/null
+++ b/src/glocal_sse2.h
@@ -0,0 +1,41 @@
+/******************************************************************
+  Copyright 2010 by Michael Farrar.  All rights reserved.
+  This program may not be sold or incorporated into a commercial product,
+  in whole or in part, without written consent of Michael Farrar.  For 
+  further information regarding permission for use or reproduction, please 
+  contact: Michael Farrar at farrar.michael at gmail.com.
+*******************************************************************/
+
+/*
+  Written by Michael Farrar, 2010.
+  Please send bug reports and/or suggestions to farrar.michael at gmail.com.
+*/
+
+#ifndef INCLUDE_GLOCAL_SSE2_H
+#define INCLUDE_GLOCAL_SSE2_H
+
+#define SHORT_BIAS     32768
+#define OVERFLOW_SCORE 0x7f000000
+
+int
+glocal_sse2_word(int                  queryLength,
+                 unsigned short      *profile,
+                 const unsigned char *dbSeq,
+                 int                  dbLength,
+                 unsigned short       gapOpen,
+                 unsigned short       gapExtend,
+                 unsigned short       ceiling,
+                 struct f_struct     *f_str);
+
+int
+glocal_sse2_byte(int                  queryLength,
+                 unsigned char       *profile,
+                 const unsigned char *dbSeq,
+                 int                  dbLength,
+                 unsigned short       gapOpen,
+                 unsigned short       gapExtend,
+                 unsigned short       ceiling,
+                 unsigned short       bias,
+                 struct f_struct     *f_str);
+
+#endif /* INCLUDE_GLOCAL_SSE2_H */
diff --git a/src/h_altlib.h b/src/h_altlib.h
new file mode 100644
index 0000000..3f95c0e
--- /dev/null
+++ b/src/h_altlib.h
@@ -0,0 +1,28 @@
+
+/* $Id: h_altlib.h 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+#define LASTENTRY 10
+#define LASTLIB 10
+#define BINARYGB 9
+#define DEFAULT 0
+#define FULLGB 1
+#define UNIXPIR 2
+#define EMBLSWISS 3
+#define INTELLIG 4
+#define VMSPIR 5
+
+int agetlib_h();	/* pearson fasta format */
+int agetntlib_h();	/* pearson fasta format nucleotides */
+int vgetlib_h();	/* PIR VMS format */
+
+int (*h_getliba[LASTLIB])()={
+	agetlib_h,agetlib_h,agetlib_h,agetlib_h,
+	agetlib_h,vgetlib_h,agetlib_h,agetlib_h,
+	agetlib_h,agetlib_h};
+
+int (*h_getntliba[LASTLIB])()={
+	agetntlib_h,agetntlib_h,agetntlib_h,agetntlib_h,
+	agetntlib_h,agetntlib_h,agetntlib_h,agetntlib_h,
+	agetntlib_h,agetntlib_h};
+
diff --git a/src/htime.c b/src/htime.c
new file mode 100644
index 0000000..c873493
--- /dev/null
+++ b/src/htime.c
@@ -0,0 +1,43 @@
+/* Concurrent read version */
+
+/* $Id: htime.c 867 2011-10-30 14:44:04Z wrp $ */
+/* $Revision: 867 $  */
+
+#include <stdio.h>
+#include <time.h>
+
+#ifdef UNIX
+#include <sys/types.h>
+#include <sys/time.h>
+#ifdef TIMES
+#include <sys/times.h>
+#else
+#undef TIMES
+#endif
+#endif
+
+#ifndef HZ
+#define HZ 100
+#endif
+
+long s_time ()			/* returns time in milliseconds */
+{
+#ifndef TIMES
+  time_t time(), tt;
+  return time(&tt)*1000;
+#else
+  struct tms tt;
+  times(&tt);
+#ifdef CLK_TCK
+  return tt.tms_utime*1000/CLK_TCK;
+#else
+  return tt.tms_utime*1000/HZ;
+#endif
+#endif
+}
+
+void ptime (FILE *fp, long time)		/* prints the time */
+{
+  fprintf (fp, "%6.3f",(double)(time)/1000.0);
+}
+
diff --git a/src/initfa.c b/src/initfa.c
new file mode 100644
index 0000000..1f11bf6
--- /dev/null
+++ b/src/initfa.c
@@ -0,0 +1,3183 @@
+/*	initfa.c	*/
+/*  $Id: initfa.c 1274 2014-08-07 18:30:56Z wrp $ */
+
+/* copyright (c) 1996, 1997, 1998, 2014 by William R. Pearson and The
+   Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* init??.c files provide function specific initializations */
+
+/* h_init()	- called from comp_lib.c, comp_thr.c to initialize pstruct ppst
+   		  which includes the alphabet, and pam matrix
+
+   alloc_pam()	- allocate pam matrix space
+   init_pam2()	- convert from 1D to 2D pam
+
+   init_pamx()	- extend pam2 for 'X', 'N', or lower case characters
+
+   f_initenv()	- set up mngmsg and pstruct defaults
+   f_getopt()	- read fasta specific command line options
+   f_getarg()	- read ktup
+
+   resetp()	- reset the parameters, scoring matrix for DNA-DNA/DNA-prot
+
+   query_parm()	- ask for ktup
+   last_init()	- some things must be done last
+
+   f_initpam()	- set some parameters based on the pam matrix
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <math.h>
+
+#ifdef UNIX
+#include <sys/types.h>
+#include <sys/stat.h>
+#endif
+
+#include "defs.h"
+#include "structs.h"
+#include "param.h"
+#include "best_stats.h"
+
+#define XTERNAL
+#include "upam.h"
+#include "uascii.h"
+#undef XTERNAL
+
+#define MAXWINDOW 32
+
+int initpam(char *, struct pstruct *);
+void init_pam2 (struct pstruct *ppst);
+void init_altpam(struct pstruct *ppst);
+void init_pamx (struct pstruct *ppst);
+void extend_pssm(unsigned char *aa0, int n0, struct pstruct *ppst);
+void build_xascii(int *qascii, char *save_str);
+void add_ascii_ann(int *qascii, unsigned char *ann_arr);
+void re_ascii(int *qascii, int *pascii, int max_ann_arr);
+extern int my_nrand(int, void *);
+
+/*  at some point, all the defaults should be driven from this table */
+/*
+#pgm	q_seq	l_seq	p_seq	matrix	g_open	g_ext	fr_shft	e_cut	ktup  E_band_opt
+#	-n/-p		-s	-e	-f	-h/-j	-E	argv[3]
+fasta	prot(0)	prot(0)	prot(0)	bl50	-10	-2	-	10.0	2	0.02
+fasta	dna(1)	dna(1)	dna(1)	+5/-4	-12	-4	-	2.0	6	0.01
+ssearch	prot(0)	prot(0)	prot(0)	bl50	-10	-2	-	10.0	-	-
+ssearch	dna(1)	dna(1)	dna(1)	+5/-4	-16	-4	-	2.0	-	-
+fastx	dna(1)	prot(0)	prot(0)	BL50	-12	-2	-20	5.0	2	0.02
+fasty	dna(1)	prot(0)	prot(0)	BL50	-12	-2	-20/-24	5.0	2	0.02
+tfastx	dna(1)	prot(0)	prot(0)	BL50	-14	-2	-20	5.0	2	0.01
+tfasty	dna(1)	prot(0)	prot(0)	BL50	-14	-2	-20/-24	5.0	2	0.01
+fasts	prot(0)	prot(0)	prot(0)	MD20-MS	-	-	-	5.0	-	-
+fasts	dna(1)	dna(1)	dna(1)	+2/-4	-	-	-	5.0	1	-
+tfasts	prot(0)	dna(1)	prot(0)	MD10-MS	-	-	-	2.0	1	-
+fastf	prot(0)	prot(0)	prot(0)	MD20	-	-	-	2.0	1	-
+tfastf	prot(0)	dna(1)	prot(0)	MD10	-	-	-	1.0	1	-
+fastm	prot(0)	prot(0)	prot(0)	MD20	-	-	-	5.0	1	-
+fastm	dna(1)	dna(1)	dna(1)	+2/-4	-	-	-	2.0	1	-
+tfastm	prot(0)	dna(1)	prot(0)	MD10	-	-	-	2.0	1	-
+lalign	prot(0) prot(0) prot(0) BL50    -12     -2	-       1.0	-	-
+lalign	dna(1)  dna(1)  dna(1)  +5/-4   -12     -4	-	0.1	-	-
+*/
+
+void show_help(char *, int );
+
+char *ref_str_a[]={
+/* 0 */ "W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448\n",
+/* 1 */ "T. F. Smith and M. S. Waterman, (1981) J. Mol. Biol. 147:195-197; \n W.R. Pearson (1991) Genomics 11:635-650\n",
+/* 2 */ "Pearson et al, Genomics (1997) 46:24-36\n",
+/* 3 */ "Mackey et al. Mol. Cell. Proteomics  (2002) 1:139-147\n",
+/* 4 */ "W.R. Pearson (1996) Meth. Enzymol. 266:227-258\n",
+/* 5 */ "X. Huang and W. Miller (1991) Adv. Appl. Math. 12:373-381\n",
+  ""
+};
+
+#define FA_PID	1
+#define SS_PID	2
+#define FX_PID	3
+#define FY_PID	4
+#define FS_PID	5
+#define FF_PID	6
+#define FM_PID	7
+#define RSS_PID	8
+#define RFX_PID	9
+#define SSS_PID 10	/* old (slow) non-PG Smith-Waterman */
+#define TFA_PID	FA_PID+10
+#define TFX_PID	FX_PID+10
+#define TFY_PID	FY_PID+10
+#define TFS_PID	FS_PID+10
+#define TFF_PID	FF_PID+10
+#define TFM_PID FM_PID+10
+#define LAL_PID 18
+#define LNW_PID 19
+#define GNW_PID 20
+
+struct pgm_def_str {
+  int pgm_id;
+  char *prog_func;
+  char *info_pgm_abbr;
+  char *iprompt0;
+  char *ref_str;
+  int PgmDID;
+  char *smstr;
+  int g_open_mod;
+  int g_ext_mod;
+  int gshift;
+  int hshift;
+  double e_cut;
+  int ktup;
+  double E_band_opt;
+  int can_pre_align;
+};
+
+static struct pgm_def_str
+pgm_def_arr[21] = {
+  {0, "", "", "", NULL, 400, "", 0, 0, 0, 0, 1.0, 0, 0 },  /* 0 */
+  {FA_PID, "FASTA", "fa",
+   "FASTA searches a protein or DNA sequence data bank",
+   NULL, 401, "BL50", 0, 0, 0, 0, 10.0, 2, 0.2, 1}, /* 1 - FASTA */
+  {SS_PID, "SSEARCH","gsw","SSEARCH performs a Smith-Waterman search",
+   NULL, 404, "BL50", 0, 0, 0, 0, 10.0, 0, 0.0, 1}, /* 2 - SSEARCH */
+  {FX_PID, "FASTX","fx",
+   "FASTX compares a DNA sequence to a protein sequence data bank",
+   NULL, 405, "BL50", -2, 0, -20, 0, 5.0, 2, 0.10, 1}, /* 3 - FASTX */
+  {FY_PID, "FASTY", "fy",
+   "FASTY compares a DNA sequence to a protein sequence data bank",
+   NULL, 405, "BL50", -2, 0, -20, -24, 5.0, 2, 0.10, 1}, /* 4 - FASTY */
+  {FS_PID, "FASTS", "fs",
+   "FASTS compares linked peptides to a protein data bank",
+   NULL, 400, "MD20-MS", 0, 0, 0, 0, 5.0, 1, 0.0, 0}, /* 5 - FASTS */
+  {FF_PID, "FASTF", "ff",
+   "FASTF compares mixed peptides to a protein databank",
+   NULL, 400, "MD20", 0, 0, 0, 0, 2.0, 1, 0.0, 0 }, /* 6 - FASTF */
+  {FM_PID, "FASTM", "fm",
+   "FASTM compares ordered peptides to a protein data bank",
+   NULL, 400, "MD20", 0, 0, 0, 0, 5.0, 1, 0.0, 0 }, /* 7 - FASTM */
+  {RSS_PID, "PRSS", "rss",
+   "PRSS evaluates statistical signficance using Smith-Waterman",
+   NULL, 401, "BL50", 0, 0, 0, 0, 1000.0, 0, 0.0, 1 }, /* 8 - PRSS */
+  {RFX_PID,"PRFX", "rfx",
+   "PRFX evaluates statistical signficance using FASTX",
+   NULL, 401, "BL50", -2, 0, -20, -24, 1000.0, 2, 0.2, 1 }, /* 9 - PRFX */
+  {SSS_PID, "OSEARCH","ssw","OSEARCH searches a sequence data bank",
+   NULL, 404, "BL50", 0, 0, 0, 0, 10.0, 0, 0.0, 1}, /* 2 - OSEARCH */
+  {TFA_PID, "TFASTA", "tfa",
+   "TFASTA compares a protein  to a translated DNA data bank",
+   NULL, 402, "BL50", -2, 0, 0, 0, 5.0, 2, 0.1, 1},
+  {0, "", "", "", NULL, 400, "", 0, 0, 0, 0, 1.0, 0, 0.0 },  /* 0 */
+  {TFX_PID, "TFASTX", "tfx",
+   "TFASTX compares a protein to a translated DNA data bank",
+   NULL, 406, "BL50", -2, 0, -20, 0, 2.0, 2, 0.10, 1},
+  {TFY_PID, "TFASTY", "tfy",
+   "TFASTY compares a protein to a translated DNA data bank",
+   NULL, 406, "BL50", -2, 0, -20, -24, 2.0, 2, 0.10, 1},
+  {TFS_PID, "TFASTS", "tfs",
+   "TFASTS compares linked peptides to a translated DNA data bank",
+   NULL, 400, "MD10-MS", 0, 0, 0, 0, 2.0, 2, 0.0, 0 },
+  {TFF_PID, "TFASTF", "tff",
+   "TFASTF compares mixed peptides to a protein databank",
+   NULL, 400, "MD10", 0, 0, 0, 0, 1.0, 1, 0.0, 0 },
+  {TFM_PID, "TFASTM", "tfm",
+   "TFASTM compares ordered peptides to a translated DNA databank",
+   NULL, 400, "MD10", 0, 0, 0, 0, 1.0, 1, 0.0, 0 },
+  {LAL_PID, "LALIGN", "lal",
+   "LALIGN finds non-overlapping local alignments",
+   NULL, 404, "BL50", -2, 0, 0, 0, 1.0, 0, 0.0, 1}, 	/* 18 - LALIGN */
+  {LNW_PID, "GLSEARCH", "lnw",
+   "GLSEARCH performs a global-query/local-library search",
+   NULL, 404, "BL50", -2, 0, 0, 0, 10.0, 0, 0.0, 1}, 	/* 19 - GLSEARCH */
+  {GNW_PID, "GGSEARCH", "gnw",
+   "GGSEARCH performs a global/global database searches",
+   NULL, 404, "BL50", 0, 0, 0, 0, 10.0, 0, 0.0, 1}, 	/* 20 - GGSEARCH */
+};
+
+struct msg_def_str {
+  int pgm_id;
+  int q_seqt;
+  int l_seqt;
+  int p_seqt;
+  int sw_flag;
+  int stages;
+  int qframe;
+  int nframe;
+  int nrelv, srelv, arelv;
+  char *f_id0, *f_id1, *label, *alabel;
+};
+
+/* align_label must be < MAX_SSTR (32) */
+char *align_label[]={
+  "Smith-Waterman",		/* 0 */
+  "banded Smith-Waterman",	/* 1 */
+  "Waterman-Eggert",		/* 2 */
+  "trans. Smith-Waterman",	/* 3 */
+  "global/local",		/* 4 */
+  "trans. global/local",	/* 5 */
+  "global/global (N-W)"	/* 6 */
+};
+
+/* pgm_id    q_seqt     l_seqt   p_seqt sw_f st qf nf nrv srv arv s_ix */
+static struct msg_def_str
+msg_def_arr[21] = {
+  {0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, "", "", ""},	/* ID=0 */
+  {FA_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 1, 3,
+   "fa","sw", "opt"},
+  {SS_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,
+   "sw","sw", "s-w"},
+  {FX_PID, SEQT_DNA, SEQT_PROT, SEQT_PROT, 1, 1, 2, -1, 3, 1, 3,
+   "fx","sx", "opt"},
+  {FY_PID, SEQT_DNA, SEQT_PROT, SEQT_PROT, 1, 1, 2, -1, 3, 1, 3,
+   "fy","sy", "opt"},
+  {FS_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3,
+   "fs","fs", "initn init1"},
+  {FF_PID, SEQT_PROT,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3,
+   "ff","ff", "initn init1"},
+  {FM_PID, SEQT_UNK,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3,
+   "fm","fm","initn init1"},
+  {RSS_PID, SEQT_UNK,SEQT_PROT, SEQT_PROT, 0, 1, 1, -1, 1, 1, 1,
+   "rss","sw","s-w"},
+  {RFX_PID, SEQT_DNA,SEQT_PROT, SEQT_PROT, 0, 1, 2, -1, 3, 1, 3,
+   "rfx","sx","opt"},
+  {SSS_PID, SEQT_UNK,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,
+   "sw","sw", "s-w"},
+  {TFA_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 0, 1, 1, 6, 3, 1, 3,
+   "tfa","fa","initn init1"},
+  {0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, "", "", ""},	/* ID=12 */
+  {TFX_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 2, 3, 2, 3,
+   "tfx","sx","initn opt"},
+  {TFY_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 2, 3, 2, 3,
+   "tfy","sy","initn opt"},
+  {TFS_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3,
+   "tfs","fs","initn init1"},
+  {TFF_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3,
+   "tff","ff","initn init1"},
+  {TFM_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3,
+   "tfm","fm","initn init1"},
+  {LAL_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,
+   "lsw","lsw", "ls-w"},
+  {LNW_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,
+   "gnw","gnw", "n-w"},
+  {GNW_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,
+   "gnw","gnw", "n-w"},
+};
+
+int
+get_pgm_id() {
+
+  int rval=0;
+
+#ifdef FASTA
+#ifndef TFAST
+  pgm_def_arr[FA_PID].ref_str = ref_str_a[0];
+  msg_def_arr[FA_PID].alabel = align_label[0];
+  rval=FA_PID;
+#else
+  pgm_def_arr[TFA_PID].ref_str = ref_str_a[0];
+  msg_def_arr[TFA_PID].alabel = align_label[2];
+  rval=TFA_PID;
+#endif
+#endif
+
+#ifdef FASTX
+#ifndef TFAST
+#ifndef PRSS
+  pgm_def_arr[FX_PID].ref_str = ref_str_a[2];
+  msg_def_arr[FX_PID].alabel = align_label[3];
+  rval=FX_PID;
+#else
+  pgm_def_arr[RFX_PID].ref_str = ref_str_a[2];
+  msg_def_arr[FX_PID].alabel = align_label[3];
+  rval=RFX_PID;
+#endif
+#else
+  pgm_def_arr[TFX_PID].ref_str = ref_str_a[2];
+  msg_def_arr[TFX_PID].alabel = align_label[3];
+  rval=TFX_PID;
+#endif
+#endif
+
+#ifdef FASTY
+#ifndef TFAST
+  pgm_def_arr[FY_PID].ref_str = ref_str_a[2];
+  msg_def_arr[FY_PID].alabel = align_label[3];
+  rval=FY_PID;
+#else
+  pgm_def_arr[TFY_PID].ref_str = ref_str_a[2];
+  msg_def_arr[TFY_PID].alabel = align_label[3];
+  rval=TFY_PID;
+#endif
+#endif
+
+#ifdef FASTS
+#ifndef TFAST
+  pgm_def_arr[FS_PID].ref_str = ref_str_a[3];
+  msg_def_arr[FS_PID].alabel = align_label[4];
+  rval=FS_PID;
+#else
+  pgm_def_arr[TFS_PID].ref_str = ref_str_a[3];
+  msg_def_arr[TFS_PID].alabel = align_label[5];
+  rval=TFS_PID;
+#endif
+#endif
+
+#ifdef FASTF
+#ifndef TFAST
+  pgm_def_arr[FF_PID].ref_str = ref_str_a[3];
+  msg_def_arr[FF_PID].alabel = align_label[4];
+  rval=FF_PID;
+#else
+  pgm_def_arr[TFF_PID].ref_str = ref_str_a[3];
+  msg_def_arr[TFF_PID].alabel = align_label[5];
+  rval=TFF_PID;
+#endif
+#endif
+
+#ifdef FASTM
+#ifndef TFAST
+  pgm_def_arr[FM_PID].ref_str = ref_str_a[3];
+  msg_def_arr[FM_PID].alabel = align_label[4];
+  rval=FM_PID;
+#else
+  pgm_def_arr[TFM_PID].ref_str = ref_str_a[3];
+  msg_def_arr[TFM_PID].alabel = align_label[5];
+  rval=TFM_PID;
+#endif
+#endif
+
+#ifdef SSEARCH
+#define CAN_PSSM
+  pgm_def_arr[SS_PID].ref_str = ref_str_a[1];
+  msg_def_arr[SS_PID].alabel = align_label[0];
+  rval=SS_PID;
+#endif
+
+#ifdef OSEARCH
+  pgm_def_arr[SSS_PID].ref_str = ref_str_a[1];
+  msg_def_arr[SSS_PID].alabel = align_label[0];
+  rval=SSS_PID;
+#endif
+
+#ifdef LALIGN
+#define CAN_PSSM
+  pgm_def_arr[LAL_PID].ref_str = ref_str_a[5];
+  msg_def_arr[LAL_PID].alabel = align_label[2];
+  rval=LAL_PID;
+#endif
+
+#ifdef GLSEARCH
+#define CAN_PSSM
+  pgm_def_arr[LNW_PID].ref_str = ref_str_a[6];
+  msg_def_arr[LNW_PID].alabel = align_label[4];
+  rval=LNW_PID;
+#endif
+
+#ifdef GGSEARCH
+#define CAN_PSSM
+  pgm_def_arr[GNW_PID].ref_str = ref_str_a[6];
+  msg_def_arr[GNW_PID].alabel = align_label[6];
+  rval=GNW_PID;
+#endif
+
+  return rval;
+}
+
+extern struct opt_def_str g_options[];
+extern void set_opt_disp_defs(char opt_char, struct opt_def_str *options, int type,
+			      int i_param1, int i_param2,
+			      double d_param1, double d_param2, char *s_param);
+
+static char z_opt_descr[] = "Statistics estimation method:\n      1 - regression; -1 - no stats.; 0 - no scaling; 2 - Maximum Likelihood Est.;\n      3 - Altschul/Gish; 4 - iter. regress.; 5 - regress w/variance;\n      6 - MLE with comp. adj.;\n     11 - 16 - estimates from shuffled library sequences;\n     21 - 26 - E2()-stats from shuffled high-scoring sequences;";
+
+static char s_opt_descr[] = "Scoring matrix: (protein)\n      BL50, BP62 (sets -f -11 -g -1); P250, OPT5, VT200,\n      VT160, P120, VT120, BL80, VT80, MD40, VT40, MD20, VT20, MD10, VT10;\n      scoring matrix file name; -s ?BL50 adjusts matrix for short queries;";
+
+
+struct opt_def_str f_options[] = {
+  {'3', 0, "norevcomp", "compare forward strand only", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#if defined(FASTA) || defined(SSEARCH)
+  {'a', 0, "show_all", "show complete Query/Sbjct sequences in alignment", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'W', 1, "context", "alignment context length (surrounding unaligned sequence)", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#endif
+#if defined(FASTA)
+  {'A', 0, "sw_align", "Smith-Waterman for final DNA alignment, band alignment for protein\n      default is band-alignment for DNA, Smith-Waterman for protein", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#endif
+  {'b', 1, "num_descriptions", "high scores reported (limited by -E by default)", 
+   "high scores reported (limited by -E by default);\n      =<int> forces <int> results;", 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'d', 1, "num_alignments", "number of alignments shown (limited by -E by default)", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#if defined(FASTA) || defined(FASTX) || defined(FASTY)
+  {'c', 1, "opt_join", "expected fraction for band-optimization, joining", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#endif
+  {'E', 1, "evalue", "E()-value threshold", "E()-value,E()-repeat threshold", 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'f', 1, "gapopen", "gap-open penalty", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'g', 1, "gapext", "gap-extension penalty", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#ifdef SHOW_HELP
+  {'h', 0, "help", "help - show options, arguments", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#ifdef FASTY
+  {'j', 1, "frame_subs", "frame-shift, codon substitution penalty", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#else
+#ifdef FASTX
+  {'j', 1, "frame_shift", "frame-shift penalty", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#endif
+#endif
+#else
+#ifndef LALIGN
+  {'h', 1, "frame", "frameshift penalty", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'j', 1, "codon_subs", "codon substitution", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#endif
+#endif
+#if defined(LALIGN)
+  {'J', 0, "show_ident", "show identity alignment", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'K', 1, "max_repeat", "maximum number of non-intersecting alignments", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#endif
+  {'k', 1, "nshuffle", "number of shuffles", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'M', 1, "range", "filter on library sequence length", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'n', 0, "dna", "DNA/RNA query", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'p', 0, "prot", "protein query", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#if defined(CAN_PSSM)
+  {'P', 1, "pssm", "PSSM file", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#endif
+  {'r', 1, "dna_ratio", " +match/-mismatch for DNA/RNA", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'s', 1, "matrix", "scoring matrix", &s_opt_descr[0], 0, 0, 0, 0, 0.0, 0.0, NULL},
+#if !defined(LALIGN) && !defined(FASTS) && !defined(FASTM)
+  {'S', 0, "seg", "filter lowercase (seg) residues", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#endif
+#if defined(TFAST) || defined(FASTX) || defined(FASTY)
+  {'t', 1, "gencode", "translation genetic code", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+#endif
+  {'U', 0, "rna", "RNA query", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'X', 1, "ext_opts", "Extended options", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'z', 1, "stats", "Statistics estimation method", &z_opt_descr[0], 0, 0, 0, 0, 0.0, 0.0, NULL},
+  {'\0', 0, "", "", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL}
+};
+
+void f_init_opts(int pgm_id, struct mngmsg *m_msp, struct pstruct *ppst) {
+#if defined(FASTA) || defined(FASTX) || defined(FASTY)
+  set_opt_disp_defs('c', f_options, 4, 0, 0, ppst->param_u.fa.E_band_opt, max(ppst->param_u.fa.E_band_opt*5.0,1.0), NULL);
+#endif
+#ifndef LALIGN
+  set_opt_disp_defs('E', f_options, 4, 0, 0, ppst->e_cut, ppst->e_cut_r, NULL);
+#else
+  set_opt_disp_defs('E', f_options, 3, 0, 0, ppst->e_cut, 0.0, NULL);
+#endif
+  set_opt_disp_defs('s', f_options, 5, 0, 0, 0.0, 0.0, ppst->pamfile);
+  set_opt_disp_defs('f', f_options, 1, ppst->gdelval, 0, 0.0, 0.0, NULL);
+  set_opt_disp_defs('g', f_options, 1, ppst->ggapval, 0, 0.0, 0.0, NULL);
+#ifdef FASTY
+  set_opt_disp_defs('j', f_options, 2, ppst->gshift, ppst->gsubs, 0.0, 0.0, NULL);
+#endif
+#ifdef FASTX
+  set_opt_disp_defs('j', f_options, 1, ppst->gshift, 0, 0.0, 0.0, NULL);
+#endif
+#ifdef LALIGN
+  set_opt_disp_defs('K', f_options, 1, ppst->max_repeat, 0, 0.0, 0.0, NULL);
+#endif
+  set_opt_disp_defs('k', f_options, 1, m_msp->shuff_max, 0, 0.0, 0.0, NULL);
+  set_opt_disp_defs('r', f_options, 2, ppst->p_d_mat, ppst->p_d_mis, 0.0, 0.0, NULL);
+  set_opt_disp_defs('t', f_options, 1, ppst->tr_type, 0, 0.0, 0.0, NULL);
+  set_opt_disp_defs('z', f_options, 1, ppst->zsflag, 0, 0.0, 0.0, NULL);
+#if defined(FASTA) || defined(SSEARCH)
+  set_opt_disp_defs('W', f_options, 1, m_msp->aln.llcntx, 0, 0.0, 0.0, NULL);
+#endif
+}
+
+char *iprompt1=" test sequence file name: ";
+char *iprompt2=" database file name: ";
+
+#ifdef PCOMPLIB
+char *verstr="36.3.8f May, 2017 MPI";
+#else
+char *verstr="36.3.8f May, 2017";
+#endif
+
+static int mktup=3;
+static int ktup_set = 0;
+static int gap_set=0;
+static int del_set=0;
+static int mshuff_set = 0;
+static int prot2dna = 0;
+void
+parse_ext_opts(char *opt_arg, int pgm_id, struct mngmsg *m_msp, struct pstruct *ppst);
+
+extern int fa_max_workers;
+
+extern void s_abort(char *, char *);
+extern void init_ascii0(int *sascii, char *sq_map, int sq_map_n, struct pstruct *ppst);
+extern void init_ascii(int ext_sq, int *sascii, int nsq, int dnaseq);
+extern void validate_novel_aa(int *sascii, int p_nsq, int dnaseq);
+extern int standard_pam(char *smstr, struct pstruct *ppst,
+			int del_set, int gap_set);
+extern int min_pam_bits(int n0_eff, double bit_thresh, struct pstruct *ppst,
+			int del_set, int gap_set);
+extern void mk_n_pam(int *arr,int siz, int mat, int mis);
+extern int karlin(int , int, double *, double *, double *);
+extern void init_karlin_a(struct pstruct *, double *, double **);
+extern int do_karlin_a(int **, const struct pstruct *, double *,
+		       double *, double *, double *, double *);
+
+#if defined(TFAST) || defined(FASTX) || defined(FASTY)
+extern void aainit(int tr_type, int debug);
+#endif
+
+char *iprompt0, *prog_func, *refstr;
+
+/* Sets defaults assuming a protein sequence */
+void h_init (struct pstruct *ppst, struct mngmsg *m_msp, char *info_pgm_abbr)
+{
+  struct pgm_def_str pgm_def;
+  int i, pgm_id;
+
+  ppst->pgm_id  = pgm_id =  get_pgm_id();
+  pgm_def = pgm_def_arr[pgm_id];
+
+  /* check that pgm_def_arr[] is valid */
+  if (pgm_def.pgm_id != pgm_id) {
+    fprintf(stderr,
+	    "**pgm_def integrity failure: def.pgm_id %d != pgm_id %d**\n",
+	    pgm_def.pgm_id, pgm_id);
+    exit(1);
+  }
+
+  /* check that msg_def_arr[] is valid */
+  if (msg_def_arr[pgm_id].pgm_id != pgm_id) {
+    fprintf(stderr,
+	    "**msg_def integrity failure: def.pgm_id %d != pgm_id %d**\n",
+	    msg_def_arr[pgm_id].pgm_id, pgm_id);
+    exit(1);
+  }
+
+  SAFE_STRNCPY(info_pgm_abbr,pgm_def.info_pgm_abbr,MAX_SSTR);
+  iprompt0 = pgm_def.iprompt0;
+  refstr = pgm_def.ref_str;
+  prog_func = pgm_def.prog_func;
+
+  /* used to be MAXTOT = MAXTST+MAXLIB, but now fixed at MAXLIB for
+     pre-loaded libraries */
+  m_msp->max_tot = MAXLIB;
+
+  init_ascii0(aascii, NCBIstdaa, NCBIstdaa_n, ppst);
+  pascii = aascii;
+
+  /* set up DNA query sequence if required*/
+  if (msg_def_arr[pgm_id].q_seqt == SEQT_DNA) {
+    memcpy(qascii,nascii,sizeof(qascii));
+    m_msp->qdnaseq = SEQT_DNA;
+  }
+  else { 	/* when SEQT_UNK, start with protein */
+    memcpy(qascii,aascii,sizeof(qascii));
+    m_msp->qdnaseq = msg_def_arr[pgm_id].q_seqt;
+  }
+
+#if defined(FASTF) || defined(FASTS) || defined(FASTM)
+  qascii[','] = ESS;
+  /* also initialize aascii, nascii for databases */
+  qascii['*'] = NA;
+  ppst->pam_ms = 1;
+  ppst->do_rep=0;		/* disable multiple alignments */
+  ppst->pseudocts = 200;
+#else
+  ppst->pam_ms = 0;
+  ppst->do_rep=1;		/* enable multiple alignments */
+#endif
+
+  /* initialize a pam matrix */
+  SAFE_STRNCPY(ppst->pamfile,pgm_def.smstr,MAX_FN);
+  standard_pam(ppst->pamfile,ppst,del_set,gap_set);
+  ppst->have_pam2 = 0;
+
+  /* specify pre-alignment */
+  ppst->can_pre_align = pgm_def.can_pre_align;
+
+  ppst->p_d_mat = 5;
+  ppst->p_d_mis = -4;
+
+  /* this is always protein by default */
+  ppst->nsq = NCBIstdaa_n;
+  ppst->nsqx = NCBIstdaa_ext_n;
+  /* we need to populate ppst->sq to nsqx for direct lc mapping */
+  for (i=0; i<ppst->nsqx; i++) {
+    ppst->sq[i] = NCBIstdaa_l[i];
+    ppst->hsq[i] = h_NCBIstdaa[i];
+  }
+  for (i=0; i<ppst->nsqx; i++) {
+    ppst->sqx[i]=NCBIstdaa_ext[i];
+    ppst->hsqx[i]=h_NCBIstdaa_ext[i];
+  }
+  ppst->sq[ppst->nsq] = ppst->sqx[ppst->nsqx] = '\0';
+
+  /* set up the c_nt[] mapping */
+
+#if defined(FASTS) || defined(FASTF) || defined(FASTM)
+  ppst->c_nt[ESS] = ESS;
+#endif
+  ppst->c_nt[0]=0;
+  for (i=1; i<nnt; i++) {
+    ppst->c_nt[i]=gc_nt[i];
+    ppst->c_nt[i+nnt]=gc_nt[i]+nnt;
+  }
+
+#ifdef CAN_PSSM
+  ppst->pam2p[0] = NULL;
+  ppst->pam2p[1] = NULL;
+#endif
+}
+
+/*
+ * alloc_pam(): allocates memory for the 2D pam matrix as well
+ * as for the integer array used to transmit the pam matrix
+ */
+void
+alloc_pam (int d1, int d2, struct pstruct *ppst)
+{
+  int     i, *d2p;
+  char err_str[128];
+
+  if ((ppst->pam2[0] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
+     sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
+     s_abort (err_str,"");
+  }
+
+  if ((ppst->pam2[1] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
+     sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
+     s_abort (err_str,"");
+  }
+
+  if ((d2p = (int *) calloc (d1 * d2, sizeof (int))) == NULL) {
+     sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
+     s_abort (err_str,"");
+   }
+
+   for (i = 0; i < d1; i++, d2p += d2)
+      ppst->pam2[0][i] = d2p;
+
+   if ((d2p= (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {
+     sprintf(err_str,"Cannot allocate 2d pam matrix: %d",d2);
+     s_abort (err_str,"");
+   }
+
+   for (i = 0;  i < d1; i++, d2p += d2)
+      ppst->pam2[1][i] = d2p;
+
+   ppst->have_pam2 = 1;
+}
+
+/*
+ *  init_pam2(struct pstruct pst): Converts 1-D pam matrix to 2-D
+ *  currently, this function is very protein centric
+ */
+void
+init_pam2 (struct pstruct *ppst) {
+  int     i, j, k, nsq, sa_t;
+  int ix_j, ix_l, ix_i, p_i, p_j;
+
+  nsq = ppst->nsq;
+
+  ppst->pam2[0][0][0] = -BIGNUM;
+  ppst->pam_h = -1; ppst->pam_l = 1;
+
+  k = 0;
+
+  if (ppst->dnaseq == 0) { /* not DNA */
+    sa_t = aascii['*'];	/* this is the last character for which pam[] is available */
+    pam_sq = apam_sq;
+    pam_sq_n = apam_sq_n;
+  }
+  else {	/* have DNA, no '*' */
+    sa_t = nascii['X'];
+    pam_sq = npam_sq;
+    pam_sq_n = npam_sq_n;
+  }
+
+  /* we use sa_t here because that is the last position in the 1-D
+     matrix */
+  for (i = 1; i < sa_t; i++) {
+    p_i = pascii[pam_sq[i]];
+    ppst->pam2[0][0][p_i] = ppst->pam2[0][p_i][0] = -BIGNUM;
+    for (j = 1; j <= i; j++) {
+      /* here is where the pam file is actually set */
+      p_j = pascii[pam_sq[j]];
+      ppst->pam2[0][p_j][p_i] = ppst->pam2[0][p_i][p_j] = pam[k++] - ppst->pamoff;
+      if (ppst->pam_l > ppst->pam2[0][p_i][p_j]) ppst->pam_l = ppst->pam2[0][p_i][p_j];
+      if (ppst->pam_h < ppst->pam2[0][p_i][p_j]) ppst->pam_h = ppst->pam2[0][p_i][p_j];
+    }
+  }
+
+  /* need to do the same thing for characters > sa_t */
+  for (i = sa_t+1; i < pam_sq_n; i++) {
+    p_i = pascii[pam_sq[i]];
+    ppst->pam2[0][0][p_i] = ppst->pam2[0][p_i][0] = -BIGNUM;
+  }  
+
+  if (ppst->dnaseq == 0) {
+    init_altpam(ppst);
+  }
+}
+
+void
+init_altpam(struct pstruct *ppst) {
+  int ix_i, ix_l, ix_j, p_i, p_j, i;
+
+  /* add values for 'J' (I/L) value, which are not present in 1-D matrices */
+    ix_i = pascii['I'];
+    ix_l = pascii['L'];
+    ix_j = pascii['J'];
+    if (strchr(pam_sq,'J')==NULL) {
+      ppst->pam2[0][ix_j][0] = ppst->pam2[0][0][ix_j] = -BIGNUM;
+      /* get the identities */
+      ppst->pam2[0][ix_j][ix_j] =
+	max(ppst->pam2[0][ix_i][ix_i],ppst->pam2[0][ix_l][ix_l]);
+      for (i=1; i < pam_sq_n; i++) {
+	p_i = pascii[pam_sq[i]];
+	/* do not assume symmetric matrices */
+	ppst->pam2[0][ix_j][p_i] =
+	  max(ppst->pam2[0][ix_i][p_i],ppst->pam2[0][ix_l][p_i]);
+	ppst->pam2[0][p_i][ix_j] =
+	  max(ppst->pam2[0][p_i][ix_i],ppst->pam2[0][p_i][ix_l]);
+      }
+    }
+    /* add values for 'O' (K) value, which are not present in 1-D matrices */
+    ix_i = pascii['K'];
+    ix_j = pascii['O'];  
+      if (ix_j < ppst->nsq) {	/* is it in the NCBIstdaa alphabet ? */
+      ppst->pam2[0][ix_j][0] = ppst->pam2[0][0][ix_j] = -BIGNUM;
+      /* get the identity */
+      ppst->pam2[0][ix_j][ix_j] = ppst->pam2[0][ix_i][ix_i];
+      /* do not assume symmetric matrices */
+      for (i=1; i < pam_sq_n; i++) {
+	p_i = pascii[pam_sq[i]];
+	ppst->pam2[0][ix_j][p_i] = ppst->pam2[0][ix_i][p_i];
+	ppst->pam2[0][p_i][ix_j] = ppst->pam2[0][p_i][ix_i];
+      }
+    }
+    else {
+      pascii['O'] = pascii['K'];
+      pascii['o'] = pascii['k'];
+    }
+
+    /* add values for 'U' (C) value, which are not present in 1-D matrices */
+    ix_i = pascii['C'];
+    ix_j = pascii['U'];  
+      if (ix_j < ppst->nsq) {	/* is it in the NCBIstdaa alphabet */
+      ppst->pam2[0][ix_j][0] = ppst->pam2[0][0][ix_j] = -BIGNUM;
+      /* get the identity */
+      ppst->pam2[0][ix_j][ix_j] = ppst->pam2[0][ix_i][ix_i];
+      /* do not assume symmetric matrices */
+      for (i=1; i < pam_sq_n; i++) {
+	p_i = pascii[pam_sq[i]];
+	ppst->pam2[0][ix_j][p_i] = ppst->pam2[0][ix_i][p_i];
+	ppst->pam2[0][p_i][ix_j] = ppst->pam2[0][p_i][ix_i];
+      }
+    }
+    else {
+      pascii['U'] = pascii['C'];
+      pascii['u'] = pascii['c'];
+    }
+}
+
+/* extend the standard pam matrix for special residues
+   (a) 'X' for protein and 'N' for DNA, 'G' and 'U' for RNA, 
+   (b) lower-case characters for ext_sq_set
+
+   must be called after init_pam2()
+*/
+void
+init_pamx (struct pstruct *ppst) {
+  int     i, j, k, nsq;
+  int sa_x, sa_t, tmp;
+
+  nsq = ppst->nsq;
+
+  ppst->nt_align = (ppst->dnaseq== SEQT_DNA || ppst->dnaseq == SEQT_RNA);
+
+  /* sa_x is the 'unknown' character, 'X' for proteins, 'N' for DNA/RNA */
+  /* sa_t is the termination character -- not used for DNA */
+
+  if (ppst->nt_align) {
+    sa_x = pascii['N'];
+    sa_t = sa_x;
+  }
+  else {
+    sa_x = pascii['X'];
+    sa_t = pascii['*'];
+  }
+
+  /* build an asymmetric matrix for RNA */
+  if (ppst->dnaseq == SEQT_RNA && !ppst->pam_set) {
+    tmp = ppst->pam2[0][nascii['G']][nascii['G']] - 3;
+    ppst->pam2[0][nascii['A']][nascii['G']] = 
+      ppst->pam2[0][nascii['C']][nascii['T']] = 
+      ppst->pam2[0][nascii['C']][nascii['U']] = tmp;
+  }
+
+  if (ppst->pam_x_set) {
+    for (i=1; i<nsq; i++) {
+      if (sa_x < nsq) ppst->pam2[0][sa_x][i] = ppst->pam2[0][i][sa_x]=ppst->pam_xm;
+      if (sa_t < nsq) ppst->pam2[0][sa_t][i] = ppst->pam2[0][i][sa_t]=ppst->pam_xm;
+    }
+    if (sa_x < nsq) ppst->pam2[0][sa_x][sa_x]=ppst->pam_xx;
+    if (sa_t < nsq) ppst->pam2[0][sa_t][sa_t]=ppst->pam_xx;
+  }
+  else {
+    ppst->pam_xx = ppst->pam2[0][sa_x][sa_x];
+    ppst->pam_xm = ppst->pam2[0][1][sa_x];
+  }
+
+    /* fill in pam2[1] matrix */
+    ppst->pam2[1][0][0] = -BIGNUM;
+    /* fill in additional parts of the matrix */
+    for (i = 0; i < nsq; i++) {
+      /* -BIGNUM to all matches vs 0 */
+      ppst->pam2[0][0][i+nsq] = ppst->pam2[0][i+nsq][0] = 
+	ppst->pam2[1][0][i+nsq] = ppst->pam2[1][i+nsq][0] = 
+	ppst->pam2[0][0][i] = ppst->pam2[0][i][0] = 
+	ppst->pam2[1][0][i] = ppst->pam2[1][i][0] = -BIGNUM;
+
+      for (j = 0; j < nsq; j++) {
+	/* replicate pam2[0] to i+nsq, j+nsq, also initialize lowest of pam2[1]*/
+	ppst->pam2[0][i+nsq][j] = ppst->pam2[0][i][j+nsq] =  ppst->pam2[0][i+nsq][j+nsq] = 
+	  ppst->pam2[1][i][j] = ppst->pam2[0][i][j];
+
+	/* set the high portion of pam2[1] to the corresponding value
+	   of pam2[1][sa_x][j] */
+
+	ppst->pam2[1][i+nsq][j] = ppst->pam2[1][i][j+nsq]=
+	  ppst->pam2[1][i+nsq][j+nsq]=ppst->pam2[0][sa_x][j];
+      }
+    }
+
+    /* set matches to the internal '-' to pam_xx */
+    /* this needs to be adjusted for multiple internal '-----' */
+    for (i=1; i< nsq; i++) {
+      ppst->pam2[0][nsq][i] = ppst->pam2[0][i][nsq] = 
+	ppst->pam2[0][nsq][i+nsq] = ppst->pam2[0][i+nsq][nsq] = 
+	ppst->pam2[1][nsq][i] = ppst->pam2[1][i][nsq] = 
+	ppst->pam2[1][nsq][i+nsq] = ppst->pam2[1][i+nsq][nsq] = ppst->pam_xm;
+    }
+    ppst->pam2[0][nsq][nsq] = ppst->pam2[1][nsq][nsq] = ppst->pam_xm;
+}
+
+/*  function specific initializations */
+void
+f_initenv (struct mngmsg *m_msp, struct pstruct *ppst, unsigned char **aa0) {
+  struct msg_def_str m_msg_def;
+  int pgm_id;
+
+  pgm_id = ppst->pgm_id;
+  m_msg_def = msg_def_arr[pgm_id];
+
+  m_msp->last_calc_flg=0;
+
+  SAFE_STRNCPY(m_msp->f_id0,m_msg_def.f_id0,sizeof(m_msp->f_id0));
+  SAFE_STRNCPY(m_msp->f_id1,m_msg_def.f_id1,sizeof(m_msp->f_id1));
+  SAFE_STRNCPY (m_msp->label, m_msg_def.label, sizeof(m_msp->label));
+  SAFE_STRNCPY(m_msp->alabel, m_msg_def.alabel, sizeof(m_msp->alabel));
+
+#if !defined(SSEARCH) && !defined(GGSEARCH) && !defined(GLSEARCH) && !defined(LALIGN)
+  SAFE_STRNCPY (m_msp->alab[0],"initn",20);
+  SAFE_STRNCPY (m_msp->alab[1],"init1",20);
+  SAFE_STRNCPY (m_msp->alab[2],"opt",20);
+#else
+#if defined(SSEARCH) || defined(LALIGN)
+  SAFE_STRNCPY (m_msp->alab[0],"s-w opt",20);
+#else
+  SAFE_STRNCPY (m_msp->alab[0],"n-w opt",20);
+#endif
+#endif
+
+#if defined(GGSEARCH) || defined(GLSEARCH)
+  m_msp->zsflag = ppst->zsflag = ppst->zsflag_f = 0;
+#else
+  m_msp->zsflag = ppst->zsflag = ppst->zsflag_f = 1;
+  m_msp->zsflag2 = ppst->zsflag2 = 1;
+#endif
+
+  ppst->gdelval += pgm_def_arr[pgm_id].g_open_mod;
+  ppst->ggapval += pgm_def_arr[pgm_id].g_ext_mod;
+#if defined(FASTX) || defined(FASTY)
+  ppst->gshift = pgm_def_arr[pgm_id].gshift;
+  ppst->gsubs = pgm_def_arr[pgm_id].hshift;
+#endif
+  ppst->sw_flag = m_msg_def.sw_flag;
+  ppst->e_cut = m_msp->e_cut=pgm_def_arr[pgm_id].e_cut;
+#ifndef LALIGN
+  ppst->e_cut_r = ppst->e_cut/10.0;	/* more significant */
+#else
+  ppst->e_cut_r = ppst->e_cut;		/* everything if local */
+#endif
+
+  ppst->score_ix = 0;
+  ppst->histint = 2;
+  m_msp->qframe = m_msg_def.qframe;
+  m_msp->nframe = m_msg_def.nframe;
+  m_msp->nrelv = m_msg_def.nrelv;
+  m_msp->srelv = m_msg_def.srelv;
+  m_msp->arelv = m_msg_def.arelv;
+  m_msp->stages = m_msg_def.stages;
+  m_msp->shuff_wid = 0;
+#if defined(GGSEARCH)
+  m_msp->shuff_max = 100;
+#else
+  m_msp->shuff_max = MAX_RSTATS;
+#endif
+  m_msp->shuff_max_save = m_msp->shuff_max;
+
+  /* see param.h for the definition of all these */
+
+  m_msp->qshuffle = 0;
+  m_msp->nm0 = 1;
+  m_msp->escore_flg = 0;
+
+  /* pam information */
+  ppst->pam_pssm = 0;
+#if defined(FASTS) || defined(FASTF) || defined(FASTM)
+   ppst->pam_xx = ppst->pam_xm = 0;
+#else
+  ppst->pam_xx = 1;  /* set >0 to use pam['X']['X'] value */
+  ppst->pam_xm = -1;  /* set >0 to use pam['X']['A-Z'] value */
+#endif
+  ppst->pam_x_set = 0;
+  ppst->pam_x_id_sim = 0;
+  ppst->pam_set = ppst->pam_variable = 0;
+  ppst->pam_pssm = 0;
+  ppst->p_d_set = 0;
+  ppst->pamoff = 0;
+  ppst->ext_sq_set = 0;
+  ppst->nsq_e = ppst->nsq;
+
+  /* initial settings for protein */
+  if (pgm_def_arr[ppst->pgm_id].ktup > 0) {
+    mktup = 3;
+    ppst->param_u.fa.bestscale = 300;
+    ppst->param_u.fa.bestoff = 36;
+    ppst->param_u.fa.bkfact = 6;
+    ppst->param_u.fa.scfact = 3;
+    ppst->param_u.fa.bktup = mktup;
+    ppst->param_u.fa.ktup = 0;
+    ppst->param_u.fa.bestmax = 50;
+    ppst->param_u.fa.pamfact = 1;
+    ppst->param_u.fa.altflag = 0;
+    ppst->param_u.fa.optflag = 1;
+    ppst->param_u.fa.iniflag = 0;
+    ppst->param_u.fa.optcut = 0;
+    ppst->param_u.fa.optcut_set = 0;
+    ppst->param_u.fa.cgap = 0;
+    ppst->param_u.fa.optwid = 16;
+    ppst->param_u.fa.optwid_set = 0;
+    ppst->param_u.fa.E_band_opt = pgm_def_arr[ppst->pgm_id].E_band_opt;
+    ppst->param_u.fa.use_E_thresholds = 1; /* disable E-thresholds for now */
+  }
+
+  f_init_opts(pgm_id, m_msp, ppst);
+}
+
+/*  switches for fasta only */
+
+static int shift_set=0;
+static int subs_set=0;
+static int sw_flag_set=0;
+static int nframe_set=0;
+static int E_thresh_set = 0;
+static int E_cgap_set = 0;
+
+void
+f_getopt (char copt, char *optarg,
+	  struct mngmsg *m_msg, struct pstruct *ppst)
+{
+  int pgm_id;
+  double tmp_f, tmp_f1;
+  double tmp_e_cut, tmp_e_rep;
+  int dnaseq_save;
+  char *bp;
+
+  pgm_id = ppst->pgm_id;
+
+  switch (copt) {
+  case '3':
+    nframe_set = 1;
+    if (pgm_id == TFA_PID) {
+      m_msg->nframe = 3; break;
+    }
+    else {
+      m_msg->nframe = 1;	/* for TFASTXY */
+      m_msg->qframe = 1;  /* for FASTA, FASTX */
+    }
+    break;
+  case 'a': m_msg->aln.showall = 1; break;
+  case 'A':
+    if (ppst->sw_flag) ppst->sw_flag=0;
+    else ppst->sw_flag= 1;
+    sw_flag_set = 1;
+    break;
+  case 'b':
+    if (optarg[0] == '$') {
+      m_msg->mshow = -1;
+      m_msg->e_cut = 10000000.0;
+      break;
+    }
+    else if (optarg[0] == '=') {
+      m_msg->e_cut = 10000000.0;
+      m_msg->e_cut_set = 1;
+      m_msg->mshow_min = 1;
+      sscanf (optarg+1, "%d", &m_msg->mshow);
+    }
+    else if (optarg[0] == '>') {
+      m_msg->mshow_min = 2;
+      sscanf (optarg+1, "%d", &m_msg->mshow);
+    }
+    else {
+      sscanf (optarg, "%d", &m_msg->mshow);
+      m_msg->mshow_min = 0;
+    }
+    m_msg->mshow_set = 1;
+    break;
+  case 'c':
+    tmp_f = tmp_f1 = 0.0;
+    if (*optarg == 'O') {
+      ppst->param_u.fa.use_E_thresholds = 0;
+      optarg++;
+    }
+    if (*optarg != '\0' && pgm_def_arr[pgm_id].ktup > 0) {
+      sscanf (optarg, "%lf %lf", &tmp_f, &tmp_f1);
+      if (tmp_f > 1.0) {
+	ppst->param_u.fa.optcut = (int)(tmp_f+0.1);
+	ppst->param_u.fa.use_E_thresholds = 0;
+	ppst->param_u.fa.optcut_set = 1;
+      }
+      else if (tmp_f <= 0.0) {
+	ppst->param_u.fa.use_E_thresholds = 1;
+      }
+      else {	/* 0.0 < tmp_f <= 1.0 */
+	ppst->param_u.fa.use_E_thresholds = 1;
+	ppst->param_u.fa.E_band_opt = min(tmp_f,1.0);
+	E_thresh_set = 1;
+	if (tmp_f1 > 0.0) {
+	  tmp_f1 = min(tmp_f1,1.0);	/* may want to do max(tmp_f1,tmp_f) */
+	  ppst->param_u.fa.E_join = tmp_f1;
+	  E_cgap_set=1;
+	}
+      }
+    }
+    break;
+  case 'd': sscanf(optarg,"%d",&m_msg->ashow);
+    if ((m_msg->mshow > 0) && (m_msg->ashow > m_msg->mshow)) m_msg->mshow=m_msg->ashow;
+    m_msg->ashow_set = 1;
+    break;
+  case 'E':
+    if (strchr(optarg,' ')) {	/* check for 1 or 2 values */
+      sscanf(optarg,"%lf %lf",&tmp_e_cut, &tmp_e_rep);
+      if (tmp_e_rep <= 0.0) {	/* two values, 2nd <= 0.0, no do_rep */
+	ppst->do_rep = 0;
+	ppst->e_cut_r = 1E-100;
+	tmp_e_rep = -2.0;
+      }
+      else {ppst->do_rep = 1;}
+    }
+    else {	/* one value, do_rep; tmp_e_rep=10.0 */
+      sscanf(optarg,"%lf",&tmp_e_cut);
+#ifndef LALIGN
+      tmp_e_rep = 10.0;
+#else
+      tmp_e_rep = 1.0;
+#endif
+      ppst->do_rep = 1;
+    }
+    if (!m_msg->e_cut_set && tmp_e_cut > 0.0 ) {
+      ppst->e_cut = m_msg->e_cut = tmp_e_cut;
+    }
+    m_msg->e_cut_set = 1;
+
+    if (tmp_e_rep > 0.0) {
+      if (tmp_e_rep >= 1.0) { ppst->e_cut_r = ppst->e_cut/tmp_e_rep;}
+      else { ppst->e_cut_r = tmp_e_rep;}
+    }
+    break;
+  case 'f':
+    sscanf (optarg, "%d", &ppst->gdelval);
+    if (ppst->gdelval > 0) ppst->gdelval = -ppst->gdelval;
+    del_set = 1;
+    break;
+  case 'g':
+    sscanf (optarg, "%d", &ppst->ggapval);
+    if (ppst->ggapval > 0) ppst->ggapval = -ppst->ggapval;
+    gap_set = 1;
+    break;
+#ifndef SHOW_HELP
+  case 'h':
+    sscanf (optarg, "%d", &ppst->gshift);
+    if (ppst->gshift > 0) ppst->gshift = -ppst->gshift;
+    shift_set = 1;
+    break;
+  case 'j':
+    sscanf (optarg, "%d", &ppst->gsubs);
+    if (ppst->gsubs > 0) ppst->gsubs = -ppst->gsubs;
+    subs_set = 1;
+    break;
+#else
+  case 'h':
+    show_help(m_msg->pgm_name, pgm_id);
+    break;
+  case 'j':
+#ifdef FASTY
+    if (strchr(optarg,' ')) {
+      sscanf (optarg, "%d %d", &ppst->gshift, &ppst->gsubs);
+      subs_set = 1;
+      if (ppst->gsubs > 0) ppst->gsubs = -ppst->gsubs;
+    }
+    else if (strchr(optarg,',')) {
+      sscanf (optarg, "%d,%d", &ppst->gshift, &ppst->gsubs);
+      subs_set = 1;
+      if (ppst->gsubs > 0) ppst->gsubs = -ppst->gsubs;
+    }
+    else {
+      sscanf (optarg, "%d", &ppst->gshift);
+    }
+#else
+#ifdef FASTX
+    sscanf (optarg, "%d", &ppst->gshift);
+#endif
+#endif
+    if (ppst->gshift > 0) ppst->gshift = -ppst->gshift;
+    shift_set = 1;
+    break;
+#endif
+  case 'J':
+#ifdef LALIGN
+    ppst->show_ident=1;
+#else
+    ppst->show_ident=0;
+#endif
+    break;
+
+
+#ifdef LALIGN
+  case 'K':
+    sscanf(optarg,"%d", &ppst->max_repeat);
+    break;
+#endif
+  case 'k':
+    sscanf (optarg, "%d", &m_msg->shuff_max);
+    m_msg->shuff_max_save = m_msg->shuff_max;
+    mshuff_set = 1;
+    break;
+  case 'M':
+    sscanf(optarg,"%d-%d",&m_msg->n1_low,&m_msg->n1_high);
+    if (m_msg->n1_low < 0) {
+      m_msg->n1_high = -m_msg->n1_low;
+      m_msg->n1_low = 0;
+    }
+    if (m_msg->n1_high == 0) m_msg->n1_high = BIGNUM;
+    if (m_msg->n1_low > m_msg->n1_high) {
+      fprintf(stderr," low cutoff %d greater than high %d\n",
+	      m_msg->n1_low, m_msg->n1_high);
+      m_msg->n1_low = 0;
+      m_msg->n1_high = BIGNUM;
+    }
+    ppst->n1_low = m_msg->n1_low;
+    ppst->n1_high = m_msg->n1_high;
+    break;
+  case 'n':
+    m_msg->qdnaseq = SEQT_DNA;
+    re_ascii(qascii,nascii,strlen((char *)m_msg->ann_arr+1));
+    SAFE_STRNCPY(m_msg->sqnam,"nt",4);
+    prot2dna = 1;
+    break;
+  case 'o':
+  case 'p':
+    m_msg->qdnaseq = SEQT_PROT;
+    ppst->dnaseq = SEQT_PROT;
+    SAFE_STRNCPY(m_msg->sqnam,"aa",4);
+    break;
+  case 'P':
+    SAFE_STRNCPY(ppst->pgpfile,optarg,MAX_FN);
+    if ((bp=strchr(ppst->pgpfile,' '))!=NULL) {
+      *bp='\0';
+      ppst->pgpfile_type = atoi(bp+1);
+    }
+    else ppst->pgpfile_type = 0;
+    ppst->pam_pssm = 1;
+    break;
+  case 'r':
+    sscanf(optarg,"%d/%d",&ppst->p_d_mat,&ppst->p_d_mis);
+    ppst->pam_set = 0;
+    ppst->p_d_set = 1;
+
+    SAFE_STRNCPY(ppst->pam_name, "DNA", 4);
+    if (ppst->dnaseq != SEQT_RNA) ppst->dnaseq = SEQT_DNA;
+    if (ppst->p_d_mat > 0 && ppst->p_d_mis < 0) {
+      ppst->p_d_set = 1;
+      SAFE_STRNCPY(ppst->pamfile,optarg,40);
+    }
+    break;
+    /* modified Sept, 2011, to recognize that a scoring matrix
+       specifies a sequence alphabet */
+  case 's':
+    if (*optarg == '?') {
+      ppst->pam_variable = 1;
+      optarg++;
+    }
+    if (*optarg == '\0') break;
+    SAFE_STRNCPY (ppst->pamfile, optarg, MAX_FN);
+    dnaseq_save = ppst->dnaseq;
+    /* check for default abbreviation */
+    if (!standard_pam(ppst->pamfile,ppst,del_set, gap_set)) {
+      /* check/load matrix file */
+      if (!initpam (ppst->pamfile, ppst)) {
+	/* matrix file failed, use default matrix */
+	SAFE_STRNCPY(ppst->pamfile,pgm_def_arr[pgm_id].smstr,MAX_FN);
+      }
+    }
+    ppst->pam_set=1;
+    /* check for changing alphabet here */
+    if (ppst->dnaseq != dnaseq_save && ppst->dnaseq >= SEQT_DNA) {
+      m_msg->qdnaseq = SEQT_DNA;
+      re_ascii(qascii,nascii,strlen((char *)m_msg->ann_arr+1));
+      SAFE_STRNCPY(m_msg->sqnam,"nt",4);
+      prot2dna = 1;
+    }
+    break;
+  case 'S':	/* turn on extended alphabet for seg */
+    ppst->ext_sq_set = 1;
+    ppst->nsq_e = ppst->nsqx;
+    break;
+  case 't':
+    if (tolower(optarg[0])=='t') {
+      m_msg->ldb_info.term_code = aascii['*'];
+      optarg++;
+    }
+    if (*optarg) {sscanf (optarg, "%d", &ppst->tr_type);}
+    break;
+  case 'U':
+    m_msg->qdnaseq = SEQT_RNA;
+    memcpy(qascii,nascii,sizeof(qascii));
+    SAFE_STRNCPY(m_msg->sqnam,"nt",4);
+    nt[nascii['T']]='U';
+    prot2dna=1;
+    break;
+  case 'W':
+    sscanf (optarg,"%d",&m_msg->aln.llcntx);
+    m_msg->aln.llcntx_set = 1;
+    break;
+  case 'X':
+    parse_ext_opts(optarg, pgm_id, m_msg, ppst);
+    break;
+  case 'z':
+    if (strchr(optarg,' ')!=NULL) {
+      sscanf(optarg,"%d %d",&ppst->zsflag,&ppst->zsflag2);
+      if (ppst->zsflag2 < 1 || ppst->zsflag2 > 6) ppst->zsflag2 = 2;
+    }
+    else if (strchr(optarg,',')!=NULL) {
+      sscanf(optarg,"%d,%d",&ppst->zsflag,&ppst->zsflag2);
+      if (ppst->zsflag2 < 1 || ppst->zsflag2 > 6) ppst->zsflag2 = 2;
+    }
+    else {
+      sscanf(optarg,"%d",&ppst->zsflag);
+      ppst->zsflag2 = (ppst->zsflag % 10);
+    }
+    break;
+  }
+}
+
+static char my_opts[] = "1BIM:ox:y:N:";
+
+void
+parse_ext_opts(char *opt_arg, int pgm_id, struct mngmsg *m_msp, struct pstruct *ppst) {
+  long l_arg;
+  char c_arg, c_opt, *the_arg, *bp;
+
+  c_opt = *opt_arg;
+  if ((bp=strchr(my_opts, c_opt))==NULL) {
+    return;
+  }
+
+  if (*(bp+1) == ':') the_arg = opt_arg+1;
+
+  switch (c_opt) {
+  case '1': 
+    if (pgm_def_arr[pgm_id].ktup > 0) {
+      ppst->param_u.fa.iniflag=1;
+    }
+    break;
+  case 'B': m_msp->z_bits = 0; break;
+  case 'I': 
+    m_msp->tot_ident = 1;
+    /*
+    l_arg = 0;
+    sscanf(the_arg,"%ld",&l_arg);
+    if (l_arg > 0) m_msp->tot_ident = l_arg;
+    */
+    break;
+  case 'M':
+    c_arg = '\0';
+    sscanf(the_arg,"%ld%c",&l_arg,&c_arg);
+    if (l_arg < 0) m_msp->max_memK = BIGNUM;
+    else {
+      l_arg *= 1024;
+      if (c_arg == 'G') l_arg *= 1024;
+      m_msp->max_memK = l_arg;
+    }
+    break;
+  case 'N':
+  case 'X':
+    ppst->pam_x_id_sim = 0;
+    if (*the_arg == 'S' || *the_arg == '+') {
+      ppst->pam_x_id_sim = 1;
+    }
+    else if (*the_arg == 'D' || *the_arg == '-') {
+      ppst->pam_x_id_sim = -1;
+    }
+    break;
+  case 'o':
+    if (pgm_def_arr[pgm_id].ktup > 0) {
+      ppst->param_u.fa.optflag = 0;
+      msg_def_arr[pgm_id].nrelv = m_msp->nrelv = 2;
+    }
+    break;
+  case 'x':
+    if (strchr(the_arg,' ')!=NULL) {
+      sscanf (the_arg,"%d %d",&ppst->pam_xx, &ppst->pam_xm);
+    }
+    else if (strchr(the_arg,',')!=NULL) {
+      sscanf (the_arg,"%d,%d",&ppst->pam_xx, &ppst->pam_xm);
+    }
+    else {
+      sscanf (the_arg,"%d",&ppst->pam_xx);
+      ppst->pam_xm = ppst->pam_xx;
+    }
+    ppst->pam_x_set=1;
+    break;
+  case 'y':
+    if (pgm_def_arr[pgm_id].ktup > 0) {
+      sscanf (the_arg, "%d", &ppst->param_u.fa.optwid);
+      ppst->param_u.fa.optwid_set = 1;
+    }
+    break;
+  }
+}
+
+void
+f_lastenv (struct mngmsg *m_msg, struct pstruct *ppst)
+{
+  char save_str[MAX_SSTR];
+
+#if !defined(FASTM) && !defined(FASTS) && !defined(FASTF)
+  SAFE_STRNCPY(save_str,"*",sizeof(save_str));
+#else
+  SAFE_STRNCPY(save_str,",",sizeof(save_str));
+#endif
+
+  if (m_msg->qdnaseq == SEQT_UNK) {
+    build_xascii(qascii,save_str);
+    if (m_msg->ann_flg) add_ascii_ann(qascii,m_msg->ann_arr);
+  }  
+/* this check allows lc DNA sequence queries with FASTX */
+  else {
+#if !defined(FASTS) && !defined(FASTM) && !defined(FASTF) && !defined(FASTX) && !defined(FASTY)
+    init_ascii(ppst->ext_sq_set,qascii, ppst->nsq, m_msg->qdnaseq);
+#endif
+    validate_novel_aa(qascii, ppst->nsq, m_msg->qdnaseq);
+  }
+}
+
+void
+f_getarg (int argc, char **argv, int optind,
+	  struct mngmsg *m_msg, struct pstruct *ppst)
+{
+
+  if (pgm_def_arr[ppst->pgm_id].ktup > 0) {
+    if (argc - optind >= 4) {
+      sscanf (argv[optind + 3], "%d", &ppst->param_u.fa.ktup);
+      ktup_set = 1;
+    }
+    else {
+      ppst->param_u.fa.ktup = -pgm_def_arr[ppst->pgm_id].ktup;
+    }
+  }
+  
+  if (ppst->pgm_id == RSS_PID && argc - optind > 3) {
+    sscanf (argv[optind + 3], "%d", &m_msg->shuff_max);
+  }
+
+  if (ppst->pgm_id == RFX_PID && argc - optind > 4) {
+    sscanf (argv[optind + 4], "%d", &m_msg->shuff_max);
+  }
+  m_msg->shuff_max_save = m_msg->shuff_max;
+}
+
+/* fills in the query ascii mapping from the parameter
+   ascii mapping.
+*/
+
+void
+re_ascii(int *qascii, int *pascii, int max_ann_arr) {
+  int i;
+
+  for (i=0; i < 128; i++) {
+    if (qascii[i] > NANN+max_ann_arr || qascii[i] < ESS) {
+      qascii[i] = pascii[i];
+    }
+  }
+}
+
+
+/* recode has become function specific to accommodate FASTS/M */
+/* modified 28-Dec-2004 to ensure that all mapped characters
+   are valid */
+int
+recode(unsigned char *seq, int n, int *qascii, int nsqx) {
+  int i,j;
+  char save_c;
+
+#if defined(FASTS) || defined(FASTM)
+  qascii[',']=ESS;
+#endif
+
+  for (i=0; i < n; i++) {
+    save_c = seq[i];
+    if (seq[i] > '@' || seq[i]=='*') seq[i] = qascii[seq[i]];
+    if (seq[i] > nsqx && seq[i]!=ESS) {
+      fprintf(stderr, "*** Warning - unrecognized residue at %d:%c - %2d\n",
+	      i,save_c,save_c);
+      seq[i] = qascii['X'];
+    }
+  }
+  seq[i]=EOSEQ;
+  return i;
+}
+
+/* here we have the query sequence, all the command line options,
+   but we need to set various parameter options based on the type
+   of the query sequence (m_msg->qdnaseq = 0:protein/1:DNA) and
+   the function (FASTA/FASTX/TFASTA)
+
+   29-Jun-2008 add code to ensure that weird ('O', 'U') amino-acids
+   are read properly.
+
+   15-Nov-2010 -- modify scoring matrix for very short query sequences
+   (e.g. short read metagenomics)
+*/
+
+/* this resetp is for conventional a FASTA/TFASTXYZ search */
+void
+resetp (struct mngmsg *m_msg, struct pstruct *ppst) {
+  int i, pgm_id;
+  int n0_eff;
+
+  pgm_id = ppst->pgm_id;
+
+  /* check for alphabet conflict */
+
+  ppst->shuffle_dna3 = 0;
+#if defined(TFAST)
+  if (m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA) {
+    fprintf(stderr," %s compares a protein to a translated\n\
+DNA sequence library.  Do not use a DNA query/scoring matrix.\n",prog_func);
+    exit(1);
+  }
+  ppst->shuffle_dna3 = 1;
+#else
+#if (defined(FASTX) || defined(FASTY))
+  if (!(m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA)) {
+    fprintf(stderr," FASTX/Y compares a DNA sequence to a protein database\n");
+    fprintf(stderr," Use a DNA query\n");
+    exit(1);
+  }
+#endif
+#endif
+
+  /* **************************************************************** */
+  /* adjust alphabets for prot:prot or DNA:DNA alignments */
+
+  /* this code changes parameters for programs (FA_PID, SS_PID, FS_PID,
+     RSS_PID) that can examine either protein (initial state) or DNA 
+     Modified May, 2006 to reset e_cut for DNA comparisons.
+  */
+  /* **************************************************************** */
+
+  if (msg_def_arr[pgm_id].q_seqt == SEQT_UNK) {
+    if (m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA) {
+      msg_def_arr[pgm_id].q_seqt = m_msg->qdnaseq;
+      msg_def_arr[pgm_id].p_seqt = SEQT_DNA;
+      msg_def_arr[pgm_id].l_seqt = SEQT_DNA;
+      if (m_msg->qdnaseq == SEQT_DNA) msg_def_arr[pgm_id].qframe = 2;
+      if (!m_msg->e_cut_set) {
+	pgm_def_arr[pgm_id].e_cut /= 5.0;
+	ppst->e_cut_r = 0.001;
+      }
+    }
+    else {
+      msg_def_arr[pgm_id].q_seqt = SEQT_PROT;
+    }
+  }
+
+  /* set the comparison type (PROT/DNA) in ppst */
+  ppst->dnaseq = msg_def_arr[pgm_id].p_seqt;
+
+  if (!sw_flag_set) ppst->sw_flag = msg_def_arr[pgm_id].sw_flag;
+  if (!m_msg->e_cut_set) {
+    ppst->e_cut = m_msg->e_cut=pgm_def_arr[pgm_id].e_cut;
+#ifdef LALIGN
+    ppst->e_cut_r = ppst->e_cut;
+#endif
+  }
+
+  if (ppst->dnaseq == SEQT_DNA && m_msg->qdnaseq==SEQT_RNA) {
+    ppst->dnaseq = SEQT_RNA;
+    ppst->nt_align = 1;
+  }
+  if (ppst->dnaseq==SEQT_DNA) pascii = &nascii[0];
+  else if (ppst->dnaseq==SEQT_RNA) {
+    pascii = &nascii[0];
+    ppst->sq[nascii['T']] = 'U';
+  }
+  else pascii = &aascii[0];
+  m_msg->ldb_info.ldnaseq = msg_def_arr[pgm_id].l_seqt;
+
+  if (m_msg->ldb_info.ldnaseq & SEQT_DNA) {
+    memcpy(lascii,nascii,sizeof(lascii));
+#ifndef TFAST
+#ifdef DNALIB_LC
+    init_ascii(ppst->ext_sq_set,lascii, ppst->nsq, m_msg->ldb_info.ldnaseq);
+#endif
+#else
+  /* no init_ascii() because we translate lower case library sequences */
+#endif
+    validate_novel_aa(lascii, ppst->nsq, m_msg->ldb_info.ldnaseq);
+  }
+  else {
+    memcpy(lascii,aascii,sizeof(lascii));	/* initialize lib mapping */
+    if (m_msg->ann_flg && strchr((char *)m_msg->ann_arr,'*')) {lascii['*'] = NA;}
+
+#if defined(FASTF) || defined(FASTS) || defined(FASTM)
+    lascii['*'] = NA;
+#endif
+    init_ascii(ppst->ext_sq_set,lascii, ppst->nsq, m_msg->ldb_info.ldnaseq);
+    validate_novel_aa(lascii, ppst->nsq, m_msg->ldb_info.ldnaseq);
+  }
+
+  /* have lascii - initialize l_ann_ascii[] if necessary */
+  if (m_msg->ann_flg) {
+    memcpy(l_ann_ascii,lascii,sizeof(l_ann_ascii));
+    /* make certain that '*' is treated correctly */
+    if (strchr((char *)m_msg->ann_arr,'*')) {l_ann_ascii['*'] = NA;}
+    add_ascii_ann(l_ann_ascii, m_msg->ann_arr);
+  }
+
+  /* **************************************************************** */
+  /* adjust qframe/nframe if DNA/translated DNA search  */
+  /* **************************************************************** */
+
+  if (!nframe_set) {
+    m_msg->qframe = msg_def_arr[pgm_id].qframe;
+    m_msg->nframe = msg_def_arr[pgm_id].nframe;
+  }
+
+  /* the possibilities:
+  	     -i  -3	qframe	revcomp
+   FA_D/FX   -    -        2       0	
+   FA_D/FX   +    -        2       1	
+   FA_D/FX   -    +        1       0	
+   FA_D/FX   +    +        2       1	
+  */
+
+  if (m_msg->qdnaseq == SEQT_DNA) {
+    m_msg->nframe = 1;
+    if (m_msg->qframe == 1 && m_msg->revcomp==1) {
+      m_msg->qframe = m_msg->revcomp+1;
+    }
+  }
+  else if (m_msg->qdnaseq == SEQT_RNA) {
+    m_msg->qframe = m_msg->revcomp+1;
+    m_msg->nframe = 1;
+  }
+
+  /* **************************************************************** */
+  /* adjust FASTA heuristics for  DNA/translated DNA search  */
+  /* **************************************************************** */
+
+  if (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA) {
+    ppst->histint = 4;
+
+    if (!del_set) {
+#ifdef OLD_FASTA_GAP
+      ppst->gdelval = -16;	/* def. del penalty */
+#else
+      ppst->gdelval = -12;	/* def. open penalty */
+#endif
+    }
+    if (!gap_set) ppst->ggapval = -4;	/* def. gap penalty */
+
+    ppst->nsq = nnt;
+    ppst->nsqx = nntx;
+    ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0';
+
+    /* reset parameters for DNA */
+    if (pgm_def_arr[pgm_id].ktup > 0) {
+      /* these parameters are used to scale optcut, and are being replaced
+	 by statistically based parameters */
+      /* largest ktup */
+      pgm_def_arr[pgm_id].ktup = mktup = 6;
+      if (!ppst->param_u.fa.optwid_set) ppst->param_u.fa.optwid = 16;
+      ppst->param_u.fa.bestscale = 80;
+      ppst->param_u.fa.bkfact = 5;
+      ppst->param_u.fa.scfact = 1;
+      ppst->param_u.fa.bktup = mktup;
+      ppst->param_u.fa.bestmax = 80;
+      ppst->param_u.fa.bestoff = 45;
+      if (!E_thresh_set) ppst->param_u.fa.E_band_opt = 0.05;
+
+      if (!sw_flag_set) {
+	ppst->sw_flag = 0;
+	SAFE_STRNCPY(m_msg->f_id1,"bs",sizeof(m_msg->f_id1));
+	SAFE_STRNCPY(m_msg->alabel, align_label[1], sizeof(m_msg->alabel));
+      }
+
+      /* largest ktup */
+      mktup = 6;
+
+      if (ppst->param_u.fa.pamfact >= 0) ppst->param_u.fa.pamfact = 0;
+      if (ppst->param_u.fa.ktup < 0)
+	ppst->param_u.fa.ktup = -ppst->param_u.fa.bktup;
+    }
+
+    for (i=0; i<=ppst->nsqx; i++) {
+      ppst->hsq[i] = hnt[i];
+      ppst->sq[i] = nt[i];
+      ppst->hsqx[i] = hntx[i];
+      ppst->sqx[i] = ntx[i];
+    }
+
+    /* **************************************************************** */
+    /* adjust scoring matrix for DNA:DNA search  */
+    /* **************************************************************** */
+
+    if (!ppst->pam_set) {
+      if (ppst->p_d_set)
+	mk_n_pam(npam,nnt,ppst->p_d_mat,ppst->p_d_mis);
+#if !defined(FASTS) && !defined(FASTM)
+      else if (ppst->pamfile[0]=='\0' || strncmp(ppst->pamfile,"BL50",4)==0) {
+	SAFE_STRNCPY (ppst->pamfile, "+5/-4", sizeof(ppst->pamfile));
+	SAFE_STRNCPY(ppst->pamfile_save, ppst->pamfile, sizeof(ppst->pamfile_save));
+	SAFE_STRNCPY (ppst->pam_name, "+5/-4", sizeof(ppst->pamfile));
+      }
+#else
+      else if (strncmp(ppst->pamfile,"MD20",4)==0) {
+	SAFE_STRNCPY (ppst->pamfile, "+2/-2", sizeof(ppst->pamfile));
+	SAFE_STRNCPY (ppst->pam_name, "+2/-2", sizeof(ppst->pam_name));
+	SAFE_STRNCPY(ppst->pamfile_save, ppst->pamfile, sizeof(ppst->pamfile_save));
+	ppst->p_d_mat = +2;
+	ppst->p_d_mis = -2;
+      	mk_n_pam(npam,nnt,ppst->p_d_mat,ppst->p_d_mis);
+      }
+#endif
+      pam = npam;
+    }
+
+    SAFE_STRNCPY (m_msg->sqnam, "nt",sizeof(m_msg->sqnam));
+    SAFE_STRNCPY (m_msg->sqtype, "DNA",sizeof(m_msg->sqtype));
+  }	/* end DNA reset */
+
+  else {  /* other parameters for protein comparison */
+    if (pgm_def_arr[pgm_id].ktup > 0) {
+      if (!ppst->param_u.fa.optwid_set) {
+	if (ppst->param_u.fa.ktup==1) ppst->param_u.fa.optwid = 32;
+	else ppst->param_u.fa.optwid = 16;
+      }
+    }
+    if (!shift_set) {ppst->gshift = pgm_def_arr[pgm_id].gshift;}
+    if (!subs_set) {ppst->gsubs = pgm_def_arr[pgm_id].hshift;}
+  }
+
+  SAFE_STRNCPY(ppst->pamfile_save, ppst->pamfile, 120);
+}
+
+/* query_parm()	this function asks for any additional parameters
+	that have not been provided.  Could be null. */
+void
+query_parm (struct mngmsg *m_msp, struct pstruct *ppst)
+{
+   char    qline[40];
+
+   if (pgm_def_arr[ppst->pgm_id].ktup > 0) {
+     if (ppst->param_u.fa.ktup < 0)
+       ppst->param_u.fa.ktup = -ppst->param_u.fa.ktup;
+
+     if (ppst->param_u.fa.ktup == 0) {
+       printf (" ktup? (1 to %d) [%d] ", mktup, pgm_def_arr[ppst->pgm_id].ktup);
+       if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);
+       else sscanf(qline,"%d",&ppst->param_u.fa.ktup);
+     }
+     if (ppst->param_u.fa.ktup == 0)
+       ppst->param_u.fa.ktup = pgm_def_arr[ppst->pgm_id].ktup;
+     else ktup_set = 1;
+   }
+
+#if defined(PRSS)
+   if (m_msp->shuff_max < 10) m_msp->shuff_max = MAX_RSTATS;
+
+   if (!mshuff_set) {
+     printf(" number of shuffles [%d]? ",m_msp->shuff_max);
+     fflush(stdout);
+     if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);
+     else sscanf(qline,"%d",&m_msp->shuff_max);
+   }
+
+   if (ppst->zs_win == 0) {
+     printf (" local (window) (w) or uniform (u) shuffle [u]? ");
+     if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);
+     else if (qline[0]=='w' || qline[0]=='W') {
+       m_msp->shuff_wid = 20;
+       printf(" local shuffle window size [%d]? ",m_msp->shuff_wid);
+       if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);
+       else sscanf(qline,"%d",&m_msp->shuff_wid);
+     }
+   }
+#endif
+}
+
+/* last_init() cannot look at aa0, n0, because it is only run once,
+   it is not run before each new aa0 search */
+void
+last_init (struct mngmsg *m_msg, struct pstruct *ppst)
+{
+  int ix_l, ix_i, i, pgm_id;
+  double *kar_p;
+  double aa0_f[MAXSQ];
+
+  m_msg->zsflag = ppst->zsflag;
+  m_msg->zsflag2 = ppst->zsflag2;
+
+  if (ppst->zsflag < 0) {
+    ppst->do_rep = 0;
+  }
+
+  pgm_id = ppst->pgm_id;
+
+#ifdef LALIGN
+  m_msg->do_showbest = 1;
+  m_msg->quiet = 1;
+#endif
+
+#if defined(FASTF) || defined(FASTS) || defined(FASTM)
+  m_msg->nohist = 1;
+  m_msg->shuff_max = 2000;
+  ppst->shuff_node = m_msg->shuff_max/fa_max_workers;
+#else
+  m_msg->shuff_max = m_msg->shuff_max_save;
+#endif
+
+  if (m_msg->aln.llen < 1) {
+    m_msg->aln.llen = 60;
+  }
+
+  if (m_msg->ldb_info.ldnaseq== SEQT_PROT) {
+    m_msg->max_tot = MAXLIB_P;
+  }
+
+#if defined(FASTX) || defined(FASTY) || defined(TFAST)
+  /* set up translation tables: faatran.c */
+  aainit(ppst->tr_type,ppst->debug_lib);
+#endif
+
+/* a sanity check */
+#if !defined(TFAST)
+   if (m_msg->revcomp && m_msg->qdnaseq!=SEQT_DNA && m_msg->qdnaseq!=SEQT_RNA) {
+     fprintf(stderr," cannot reverse complement protein\n");
+     m_msg->revcomp = 0;
+   }
+#endif
+
+   if (pgm_def_arr[pgm_id].ktup > 0) {
+
+     if (ppst->param_u.fa.ktup < 0)
+       ppst->param_u.fa.ktup = -ppst->param_u.fa.ktup;
+
+     if (ppst->param_u.fa.ktup < 1 || ppst->param_u.fa.ktup > mktup) {
+       fprintf(stderr," warning ktup = %d out of range [1..%d], reset to %d\n",
+	       ppst->param_u.fa.ktup, mktup, ppst->param_u.fa.bktup);
+       ppst->param_u.fa.ktup = ppst->param_u.fa.bktup;
+     }
+
+     if (ppst->sw_flag) {
+       SAFE_STRNCPY(m_msg->f_id1,"sw",sizeof(m_msg->f_id1));
+       SAFE_STRNCPY(m_msg->alabel, align_label[0], sizeof(m_msg->alabel));
+     }
+     else {
+       SAFE_STRNCPY(m_msg->f_id1,"bs",sizeof(m_msg->f_id1));
+       SAFE_STRNCPY(m_msg->alabel, align_label[1], sizeof(m_msg->alabel));
+     }
+   }
+
+   if (pgm_id == TFA_PID) {
+     m_msg->revcomp *= 3;
+     if (m_msg->nframe == 3) m_msg->nframe += m_msg->revcomp;
+   }
+   else if (pgm_id == TFX_PID || pgm_id == TFY_PID) {
+     if (m_msg->nframe == 1) m_msg->nframe += m_msg->revcomp;
+   }
+
+#if !defined(TFAST)
+  /* for fasta/fastx searches, itt iterates the the query strand */
+  m_msg->nitt1 = m_msg->qframe-1;
+#else
+  /* for tfasta/tfastxy searches, itt iterates the library frames */
+  m_msg->nitt1 = m_msg->nframe-1;
+#endif
+
+  if (pgm_def_arr[pgm_id].ktup > 0) {	/* its FASTA, not SSEARCH */
+    if (ppst->param_u.fa.ktup>=2 && !ppst->param_u.fa.optwid_set) {
+      ppst->param_u.fa.optwid=16;
+      switch (pgm_id) {
+      case FA_PID:
+      case FX_PID:
+      case FY_PID:
+	m_msg->thr_fact = 8;
+	m_msg->thr_fact = 8;
+	break;
+      case TFA_PID:
+      case TFX_PID:
+      case TFY_PID:
+	m_msg->thr_fact = 4;
+	break;
+      default:
+	m_msg->thr_fact = 4;
+      }
+    }
+    else { m_msg->thr_fact = 4;}
+  }
+  else {
+#if !defined(SW_ALTIVEC) && !defined(SW_SSE2)
+    m_msg->thr_fact = 1;	/* unvectorized SSEARCH  */
+#else
+    m_msg->thr_fact = 8;	/* vectorized SSEARCH */
+#endif
+  }
+
+#ifdef PCOMPLIB
+  m_msg->thr_fact = 1;	/* use much larger buffers */
+#endif
+
+#if defined(PRSS)
+   if (m_msg->shuff_max < 10) m_msg->shuff_max = MAX_RSTATS;
+   if (ppst->zsflag < 10) ppst->zsflag += 10;
+   if (ppst->zs_win > 0) {
+     m_msg->shuff_wid = ppst->zs_win;
+   }
+#endif
+
+   if (pgm_def_arr[ppst->pgm_id].ktup > 0) {
+     if (ppst->param_u.fa.iniflag) {
+       ppst->score_ix = 1;
+       SAFE_STRNCPY (m_msg->label, "initn init1", sizeof(m_msg->label));
+     }
+     else if (ppst->param_u.fa.optflag) {
+       ppst->score_ix = 2;
+       m_msg->stages = 1;
+     }
+   }
+
+   if (!ppst->have_pam2) {
+     alloc_pam (MAXSQ, MAXSQ, ppst);
+     init_pam2(ppst);
+   }
+   init_pamx(ppst);
+
+   if (ppst->pam_ms) {
+     if (m_msg->qdnaseq == SEQT_PROT) {
+       /* code to make 'L'/'I' identical scores */
+       ix_l = pascii['L'];
+       ix_i = pascii['I'];
+       ppst->pam2[0][ix_l][ix_i] = ppst->pam2[0][ix_i][ix_l] =
+	 ppst->pam2[0][ix_l][ix_l] = ppst->pam2[0][ix_i][ix_i] =
+	 max(ppst->pam2[0][ix_l][ix_l],ppst->pam2[0][ix_i][ix_i]);
+       for (i=1; i<=ppst->nsq; i++) {
+	 ppst->pam2[0][i][ix_i] = ppst->pam2[0][i][ix_l] =
+	   max(ppst->pam2[0][i][ix_l],ppst->pam2[0][i][ix_i]);
+	 ppst->pam2[0][ix_i][i] = ppst->pam2[0][ix_l][i] =
+	   max(ppst->pam2[0][ix_i][i],ppst->pam2[0][ix_l][i]);
+       }
+
+       /* code to make 'Q'/'K' identical scores */
+       if (!shift_set) {
+	 ix_l = pascii['Q'];
+	 ix_i = pascii['K'];
+	 ppst->pam2[0][ix_l][ix_i] = ppst->pam2[0][ix_i][ix_l] =
+	   ppst->pam2[0][ix_l][ix_l] = ppst->pam2[0][ix_i][ix_i] =
+	   (ppst->pam2[0][ix_l][ix_l]+ppst->pam2[0][ix_i][ix_i]+1)/2;
+	 for (i=1; i<=ppst->nsq; i++) {
+	   ppst->pam2[0][i][ix_i] = ppst->pam2[0][i][ix_l] =
+	     (ppst->pam2[0][i][ix_l]+ppst->pam2[0][i][ix_i]+1)/2;
+	   ppst->pam2[0][ix_i][i] = ppst->pam2[0][ix_l][i] =
+	     (ppst->pam2[0][ix_i][i]+ppst->pam2[0][ix_l][i]+1)/2;
+	 }
+       }
+     }
+   }
+
+   /*
+   print_pam(ppst);
+   */
+
+   /* once we have a complete pam matrix, we can calculate Lambda and K 
+      for "average" sequences */
+   kar_p = NULL;
+   init_karlin_a(ppst, aa0_f, &kar_p);
+   do_karlin_a(ppst->pam2[0], ppst, aa0_f,
+	       kar_p, &m_msg->Lambda, &m_msg->K, &m_msg->H);
+   ppst->pLambda = m_msg->Lambda;
+   ppst->pK = m_msg->K;
+   ppst->pH = m_msg->H;
+   ppst->LK_set = 1;
+   free(kar_p);
+
+#if defined(FASTF) || defined(FASTS) || defined(FASTM)
+   if (ppst->ext_sq_set) {
+     fprintf(stderr," -S not available on [t]fast[fs]\n");
+     ppst->ext_sq_set = 0;
+     ppst->nsq_e = ppst->nsq;
+
+     /* reset sascii to ignore -S, map lc */
+     init_ascii(0,lascii, ppst->nsq, 0);
+     validate_novel_aa(lascii, ppst->nsq, 0);
+   }
+#endif
+}
+
+/* alloc_pam2p creates a profile structure */
+int **
+alloc_pam2p(int **pam2p, int len, int nsq) {
+  int i, pam2p_len;
+  int *pam2pp;
+
+  if (pam2p == NULL) {
+    if ((pam2p = (int **)calloc(len,sizeof(int *)))==NULL) {
+      fprintf(stderr," Cannot allocate pam2p: %d\n",len);
+      return NULL;
+    }
+
+    if((pam2p[0] = (int *)calloc((nsq+1)*len,sizeof(int)))==NULL) {
+      fprintf(stderr, "Cannot allocate pam2p[0]: %d\n", (nsq+1)*len);
+      free(pam2p);
+      return NULL;
+    }
+  }
+  else {
+    pam2p_len = (nsq+1)*len*sizeof(int);
+    pam2pp = pam2p[0];
+    if ((pam2pp = (int *)realloc(pam2pp,pam2p_len))==NULL) {
+      fprintf(stderr,
+	      "Cannot reallocate pam2p[0]: %ld\n", (nsq+1)*len*sizeof(int));
+      return NULL;
+    }
+    memset(pam2pp,0,pam2p_len);
+
+    if ((pam2p = (int **)realloc(pam2p,len*sizeof(int *)))==NULL) {
+      fprintf(stderr," Cannot reallocate pam2p: %d\n",len);
+      return NULL;
+    }
+    pam2p[0] = pam2pp;
+  }
+
+  for (i=1; i<len; i++) {
+    pam2p[i] = pam2p[0] + (i*(nsq+1));
+  }
+
+  return pam2p;
+}
+
+void free_pam2p(int **pam2p) {
+  if (pam2p) {
+    free(pam2p[0]);
+    free(pam2p);
+  }
+}
+
+/* sortbest has now become comparison function specific so that we can use
+   a different comparison for fasts/f 
+*/
+#if !defined(FASTS) && !defined (FASTF) && !defined(FASTM)
+void
+qshuffle() {}
+
+#ifndef LALIGN	 /* LALIGN has last_calc() in last_thresh.c */
+int
+last_calc(
+	  unsigned char *aa0, unsigned char *aa1, int maxn,
+	  struct beststr **bestp_arr, int nbest,
+	  struct mngmsg m_msg, struct pstruct *ppst
+	  , void **f_str
+	  , void *pstat_str)
+{
+  return nbest;
+}
+#endif
+
+/* this function is almost never called, thus a slow shell sort */
+void sortbest (bptr, nbest, irelv)
+struct beststr **bptr;
+int nbest, irelv;
+{
+    int gap, i, j;
+    struct beststr *tmp;
+
+    for (gap = nbest/2; gap > 0; gap /= 2)
+	for (i = gap; i < nbest; i++)
+	    for (j = i - gap; j >= 0; j-= gap) {
+	      if (bptr[j]->rst.score[irelv] >= bptr[j + gap]->rst.score[irelv]) break;
+	      tmp = bptr[j];
+	      bptr[j] = bptr[j + gap];
+	      bptr[j + gap] = tmp;
+	    }
+}
+
+void show_aux(FILE *fp, struct beststr *bptr) {}
+void header_aux(FILE *fp) {}
+
+#else
+/* this function is almost never called, thus a slow shell sort */
+void sortbest (bptr, nbest, irelv)
+struct beststr **bptr;
+int nbest, irelv;
+{
+    int gap, i, j;
+    struct beststr *tmp;
+
+    for (gap = nbest/2; gap > 0; gap /= 2)
+	for (i = gap; i < nbest; i++)
+	    for (j = i - gap; j >= 0; j-= gap) {
+	      if (bptr[j]->rst.escore < bptr[j + gap]->rst.escore) break;
+	      tmp = bptr[j];
+	      bptr[j] = bptr[j + gap];
+	      bptr[j + gap] = tmp;
+	    }
+}
+
+#if defined(FASTS) || defined(FASTM)
+
+/* this shuffle is for FASTS  */
+/* convert ',' -> '\0', shuffle each of the substrings */
+void
+qshuffle(unsigned char *aa0, int n0, int nm0, void *rand_state) {
+
+  unsigned char **aa0start, *aap, tmp;
+  int i,j,k, ns;
+
+  if ((aa0start=(unsigned char **)calloc(nm0+1,
+					 sizeof(unsigned char *)))==NULL) {
+    fprintf(stderr,"cannot calloc for qshuffle %d\n",nm0);
+    exit(1);
+  }
+
+  aa0start[0]=aa0;
+  for (k=1,i=0; i<n0; i++) {
+    if (aa0[i]==EOSEQ || aa0[i]==ESS) {
+      aa0[i]='\0';
+      aa0start[k++] = &aa0[i+1];
+    }
+  }  
+
+  /* aa0start has the beginning of each substring */
+  for (k=0; k<nm0; k++) {
+    aap=aa0start[k];
+    ns = strlen((const char *)aap);
+    for (i=ns; i>1; i--) {
+      j = my_nrand(i, rand_state);
+      tmp = aap[j];
+      aap[j] = aap[i-1];
+      aap[i-1] = tmp;
+    }
+    aap[ns] = 0;
+  }
+
+  for (k=1; k<nm0; k++) {
+/*  aap = aa0start[k];
+    while (*aap) fputc(pst.sq[*aap++],stderr);
+    fputc('\n',stderr);
+*/
+    aa0start[k][-1]=ESS;
+  }
+
+  free(aa0start);
+}
+#endif
+
+#ifdef FASTF
+void qshuffle(unsigned char *aa0, int n0, int nm0, void *rand_state) {
+
+  int i, j, k, nmpos;
+  unsigned char tmp;
+  int nmoff;
+  
+  nmoff = (n0 - nm0 - 1)/nm0 + 1;
+
+  for (i = nmoff-1 ; i > 0 ; i--) {
+
+    /* j = nrand(i); if (i == j) continue;*/       /* shuffle columns */ 
+    j = (nmoff -1 ) - i; 
+    if (i <= j) break; /* reverse columns */
+
+    /* swap all i'th column residues for all j'th column residues */
+    for(nmpos = 0, k = 0 ; k < nm0 ; k++, nmpos += nmoff+1 ) {
+      tmp = aa0[nmpos + i];
+      aa0[nmpos + i] = aa0[nmpos + j];
+      aa0[nmpos + j] = tmp;
+    }
+  }
+}
+#endif
+
+
+/* show additional best_str values */
+void show_aux(FILE *fp, struct beststr *bptr) {
+  fprintf(fp," %2d %3d",bptr->rst.segnum,bptr->rst.seglen);
+}
+
+void header_aux(FILE *fp) {
+  fprintf(fp, " sn  sl");
+}
+#endif
+
+void
+fill_pam(int **pam2p, int n0, int nsq, double **freq2d, double scale, int **no_remap) {
+  int i, j, new_j;
+  double freq;
+
+  /* fprintf(stderr, "scale: %g\n", scale); */
+  
+  /* now fill in the pam matrix: */
+  for (j = 1 ; j <=20 ; j++) {
+    new_j = qascii[pssm_aa[j]];
+    for (i = 0 ; i < n0 ; i++) {
+      freq = scale * freq2d[i][j-1];
+      if ( freq < 0.0) freq -= 0.5;
+      else freq += 0.5;
+
+      if (no_remap[i][j-1]) {
+	pam2p[i][j] = (int)freq;
+      }
+      else {
+	pam2p[i][new_j] = (int)(freq);
+      }
+    }
+  }
+}
+
+double
+get_lambda(int **pam2p, int n0, int nsq, unsigned char *query) {
+  double lambda, H;
+  double *pr, tot, sum;
+  int i, ioff, j, min, max, q_i;
+
+  /* get min and max scores */
+  min = BIGNUM;
+  max = -BIGNUM;
+  if(pam2p[0][1] == -BIGNUM) {
+    ioff = 1;
+    n0++;
+  } else {
+    ioff = 0;
+  }
+
+  for (i = ioff ; i < n0 ; i++) {
+    for (j = 1; j < nsq ; j++) {
+      if (min > pam2p[i][j])
+	min = pam2p[i][j];
+      if (max < pam2p[i][j])
+	max = pam2p[i][j];
+    }
+  }
+
+  /*  fprintf(stderr, "min: %d\tmax:%d\n", min, max); */
+  
+  if ((pr = (double *) calloc(max - min + 1, sizeof(double))) == NULL) {
+    fprintf(stderr, "Couldn't allocate memory for score probabilities: %d\n", max - min + 1);
+    exit(1);
+  }
+
+  tot = (double) rrtotal * (double) rrtotal * (double) n0;
+  for (i = ioff ; i < n0 ; i++) {
+
+    if (query[i] < 'A') {q_i = query[i];}
+    else {q_i= aascii[query[i]];}
+
+    for (j = 1; j < nsq ; j++) {
+      pr[pam2p[i][j] - min] +=
+	(double) ((double) rrcounts[q_i] * (double) rrcounts[j]) / tot;
+    }
+  }
+
+  sum = 0.0;
+  for(i = 0 ; i <= max-min ; i++) { 
+    sum += pr[i];
+    /*     fprintf(stderr, "%3d: %g %g\n", i+min, pr[i], sum); */
+  }
+  /*   fprintf(stderr, "sum: %g\n", sum); */
+
+  for(i = 0 ; i <= max-min ; i++) { pr[i] /= sum; }
+
+  if (!karlin(min, max, pr, &lambda, &H)) {
+    fprintf(stderr, "Karlin lambda estimation failed\n");
+  }
+
+  /*   fprintf(stderr, "lambda: %g\n", lambda); */
+  free(pr);
+
+  return lambda;
+}
+
+/*
+   *aa0 - query sequence
+   n0   - length
+   pamscale - scaling for pam matrix - provided by apam.c, either
+              0.346574 = ln(2)/2 (P120, BL62) or
+	      0.231049 = ln(2)/3 (P250, BL50) 
+*/
+
+void
+scale_pssm(int **pssm2p, double **freq2d,
+	   unsigned char *query, int n0,
+	   int **pam2, double pamscale);
+
+static unsigned char ustandard_aa[] ="\0ARNDCQEGHILKMFPSTWYV";
+
+void
+read_pssm(unsigned char *aa0, int n0, int nsq,
+	  double pamscale, 
+	  FILE *fp, int pgpf_type, struct pstruct *ppst) {
+  int i, j, len, k;
+  int qi, rj;	/* qi - index query; rj - index residues (1-20) */
+  int **pam2p;
+  int first, too_high;
+  unsigned char *query, ctmp;
+  char dline[512];
+  double freq, **freq2d, lambda, new_lambda;
+  double scale, scale_high, scale_low;
+
+  pam2p = ppst->pam2p[0];
+
+  if (pgpf_type == 0) {
+
+    if (1 != fread(&len, sizeof(int), 1, fp)) {
+      fprintf(stderr, "error reading from checkpoint file: %d\n", len);
+      exit(1);
+    }
+
+    if (len != n0) {
+      fprintf(stderr, "profile length (%d) and query length (%d) don't match!\n",
+	      len,n0);
+      exit(1);
+    }
+
+    /* read over query sequence stored in BLAST profile */
+    if(NULL == (query = (unsigned char *) calloc(len+2, sizeof(char)))) {
+      fprintf(stderr, "Couldn't allocate memory for query!\n");
+      exit(1);
+    }
+
+    if (len != fread(query, sizeof(char), len, fp)) {
+      fprintf(stderr, "Couldn't read query sequence from profile: %s\n", query);
+      exit(1);
+    }
+  }
+  else if (pgpf_type == 1) {
+
+    if ((fgets(dline,sizeof(dline),fp) == NULL)  ||
+	(1 != sscanf(dline, "%d",&len))) {
+      fprintf(stderr, "error reading from checkpoint file: %d\n", len);
+      exit(1);
+    }
+
+    if(len != n0) {
+      fprintf(stderr, "profile length (%d) and query length (%d) don't match!\n",
+	      len,n0);
+      exit(1);
+    }
+
+    /* read over query sequence stored in BLAST profile */
+    if(NULL == (query = (unsigned char *) calloc(len+2, sizeof(char)))) {
+      fprintf(stderr, "Couldn't allocate memory for query!\n");
+      exit(1);
+    }
+
+    if (fgets((char *)query,len+2,fp)==NULL) {
+      fprintf(stderr, "Couldn't read query sequence from profile: %s\n", query);
+      exit(1);
+    }
+  }  
+  else {
+    fprintf(stderr," Unrecognized PSSM file type: %d\n",pgpf_type);
+    exit(1);
+  }
+
+  /* currently we don't do anything with query; ideally, we should
+     check to see that it actually matches aa0 ... */
+
+  /* quick 2d array alloc: */
+  if((freq2d = (double **) calloc(n0, sizeof(double *))) == NULL) {
+    fprintf(stderr, "Couldn't allocate memory for frequencies!\n");
+    exit(1);
+  }
+
+  if((freq2d[0] = (double *) calloc(n0 * 20, sizeof(double))) == NULL) {
+    fprintf(stderr, "Couldn't allocate memory for frequencies!\n");
+    exit(1);
+  }
+
+  /* a little pointer arithmetic to fill out 2d array: */
+  for (i = 1 ; i < n0 ; i++) {
+    freq2d[i] = freq2d[i-1] + 20;
+  }
+
+  if (pgpf_type == 0) {
+    for (qi = 0 ; qi < n0 ; qi++) {
+      for (rj = 0 ; rj < 20 ; rj++) {
+	if(1 != fread(&freq, sizeof(double), 1, fp)) {
+	  fprintf(stderr, "Error while reading frequencies!\n");
+	  exit(1);
+	}
+	freq2d[qi][rj] = freq;
+      }
+    }
+  }
+  else {
+    for (qi = 0 ; qi < n0 ; qi++) {
+      if ((fgets(dline,sizeof(dline),fp) ==NULL) ||
+      (k = sscanf(dline,"%c %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg\n",
+		 &ctmp, &freq2d[qi][0], &freq2d[qi][1], &freq2d[qi][2], &freq2d[qi][3], &freq2d[qi][4], 
+		 &freq2d[qi][5], &freq2d[qi][6], &freq2d[qi][7], &freq2d[qi][8], &freq2d[qi][9],
+		 &freq2d[qi][10], &freq2d[qi][11], &freq2d[qi][12], &freq2d[qi][13], &freq2d[qi][14],
+		      &freq2d[qi][15], &freq2d[qi][16], &freq2d[qi][17], &freq2d[qi][18], &freq2d[qi][19]))<1) {
+	fprintf(stderr, "Error while reading frequencies: %d read!\n",k);
+	exit(1);
+      }
+      for (rj=0; rj<20; rj++) { freq2d[qi][rj] /= 10.0; }	/* reverse scaling */
+    }
+  }
+
+  scale_pssm(ppst->pam2p[0], freq2d, query, n0, ppst->pam2[0],pamscale);
+
+  free(freq2d[0]);
+  free(freq2d);
+
+  free(query);
+}
+
+/* before fasta-36.3.6 (with reordered amino-acid mapping), scale_pssm()
+   simply produced a log(q_ij/p_j) and put it into pam2p.
+
+   But pssm's use pssm_aa encoding, while fasta-36.3.6 use NCBIstdaa
+   encoding, so the pam2p must be re-mapped
+*/
+
+void
+scale_pssm(int **pssm2p, double **freq2d, unsigned char *query, int n0, int **pam2, double pamscale) {
+  int i, qi, rj;
+  double freq, new_lambda, lambda;
+  int first, too_high;
+  double scale, scale_high, scale_low;
+  int **no_remap;
+
+
+  /* quick 2d array alloc: */
+  if((no_remap = (int **) calloc(n0, sizeof(int *))) == NULL) {
+    fprintf(stderr, "***error [%s:%d] Couldn't allocate memory for remap[%d]\n",__FILE__, __LINE__, n0);
+    exit(1);
+  }
+
+  if((no_remap[0] = (int *) calloc(n0 * 20, sizeof(int))) == NULL) {
+    fprintf(stderr, "***error [%s:%d] Couldn't allocate memory for remap[%d]\n",__FILE__, __LINE__, n0);
+    exit(1);
+  }
+
+  for (qi=1; qi < n0; qi++) {
+    no_remap[qi] = no_remap[qi-1]+20;
+  }
+
+  /* convert freq2d from frequences to log_scores; 
+     fill zeros with BLOSUM62 values */
+
+  for (rj = 0 ; rj < 20 ; rj++) {
+    for (qi = 0 ; qi < n0 ; qi++) {
+      if (freq2d[qi][rj] > 1e-20) {
+	freq = log(freq2d[qi][rj] /((double) (rrcounts[rj+1])/(double) rrtotal));
+	freq /= pamscale; /* this gets us close to originial pam scores */
+	freq2d[qi][rj] = freq;
+      }
+      else {		
+	/* when blastpgp decides to leave something out, it puts 0's in all the frequencies
+	   in the binary checkpoint file.  In the ascii version, however, it uses BLOSUM62
+	   values.  I will put in scoring matrix values as well */
+	/* 11-Oct-2015 -- this does not work properly, because the
+	   correct amino-acid ordering is not used -- pam2 uses
+	   NCBIStdaa ordering, but the rest of the matrix uses pssm_aa
+	   ordering, which is changed in fill_pam */
+
+	no_remap[qi][rj] = 1;
+	if (query[qi] < 'A') {
+	  freq2d[qi][rj] = pam2[query[qi]][rj+1];
+	}
+	else {
+	  freq2d[qi][rj] = pam2[aascii[query[qi]]][rj+1];
+	}
+      }
+    }
+  }
+
+  /* now figure out the right scale */
+  scale = 1.0;
+  lambda = get_lambda(pam2, 20, 20, ustandard_aa);
+
+#ifdef DEBUG
+  /*
+  fill_pam(pssm2p, n0, 20, freq2d, scale, no_remap);
+  fprintf(stderr,"        ");
+  for (rj = 1; rj <= 20; rj++) {
+    fprintf(stderr,"   %c", NCBIstdaa[rj]);
+  }
+  fprintf(stderr,"\n");
+  for (qi = 0 ; qi < n0 ; qi++) {
+    fprintf(stderr, "%4d %c: ", qi+1, NCBIstdaa[query[qi]]);
+    for (rj = 1 ; rj <= 20 ; rj++) {
+      fprintf(stderr, "%4d", pssm2p[qi][rj]);
+    }
+    fprintf(stderr, "\n");
+  }
+  */
+#endif
+
+  /* should be near 1.0 because of our initial scaling by ppst->pamscale */
+  /* fprintf(stderr, "real_lambda: %g\n", lambda); */
+
+  /* get initial high/low scale values: */
+  first = 1;
+  while (1) {
+    fill_pam(pssm2p, n0, 20, freq2d, scale, no_remap);
+    new_lambda = get_lambda(pssm2p, n0, 20, query); 
+
+    if (new_lambda > lambda) {
+      if (first) {
+	first = 0;
+	scale = scale_high = 1.0 + 0.05;
+	scale_low = 1.0;
+	too_high = 1;
+      } else {
+	if (!too_high) break;
+	scale = (scale_high += scale_high - 1.0);
+      }
+    } else if (new_lambda > 0) {
+      if (first) {
+	first = 0;
+	scale_high = 1.0;
+	scale = scale_low = 1.0 - 0.05;
+	too_high = 0;
+      } else {
+	if (too_high) break;
+	scale = (scale_low += scale_low - 1.0);
+      }
+    } else {
+      fprintf(stderr, "new_lambda (%g) <= 0; matrix has positive average score", new_lambda);
+      exit(1);
+    }
+  }
+
+  /* now do binary search between low and high */
+  for (i = 0 ; i < 10 ; i++) {
+    scale = 0.5 * (scale_high + scale_low);
+    fill_pam(pssm2p, n0, 20, freq2d, scale, no_remap);
+    new_lambda = get_lambda(pssm2p, n0, 20, query);
+    
+    if (new_lambda > lambda) scale_low = scale;
+    else scale_high = scale;
+  }
+
+  scale = 0.5 * (scale_high + scale_low);
+  fill_pam(pssm2p, n0, 20, freq2d, scale, no_remap);
+
+  free(no_remap[0]);
+  free(no_remap);
+
+#ifdef DEBUG
+  /*
+    fprintf(stderr, "final scale: %g\n", scale);
+
+    fprintf(stderr,"        ");
+    for (rj = 1; rj <= 20; rj++) {
+      fprintf(stderr,"   %c", NCBIstdaa[rj]);
+    }
+    fprintf(stderr,"\n");
+    for (qi = 0 ; qi < n0 ; qi++) {
+      fprintf(stderr, "%4d %c: ", qi+1, NCBIstdaa[query[qi]]);
+      for (rj = 1 ; rj <= 20 ; rj++) {
+	fprintf(stderr, "%4d", pssm2p[qi][rj]);
+      }
+      fprintf(stderr, "\n");
+    }
+  */
+#endif
+
+}
+
+#if defined(CAN_PSSM)
+int
+parse_pssm_asn_fa(FILE *afd, int *n_rows, int *n_cols,
+		  unsigned char **query,
+		  double ***wfreqs, double ***freqs, int ***iscores,
+		  char *matrix, int *gap_open, int *gap_extend,
+		  double *lambda);
+
+/* the ASN.1 pssm includes information about the scoring matrix used
+   (though not the gap penalty in the current version PSSM:2) The PSSM
+   scoring matrix and gap penalties should become the default if they
+   have not been set explicitly.
+*/
+
+/* read the PSSM from an open FILE *fp - but nothing has been read
+   from *fp */
+
+int
+read_asn_pssm(unsigned char *aa0, int n0, int nsq,
+	      double pamscale, FILE *fp, struct pstruct *ppst) {
+
+  int i, j, len, k, itmp;
+  int qi, rj;	/* qi - index query; rj - index residues (1-20) */
+  int **pam2p;
+  int first, too_high;
+  unsigned char *query, ctmp;
+  char dline[512];
+  char matrix[MAX_SSTR];
+  double psi2_lambda;
+  double freq, **wfreq2d=NULL, **freq2d=NULL, lambda, new_lambda;
+  double scale, scale_high, scale_low;
+  int **iscores2d=NULL;
+  int gap_open, gap_extend;
+  int n_rows, n_cols;
+
+
+  pam2p = ppst->pam2p[0];
+
+  /* get the information from the ASN.1 (binary) file */
+  if (parse_pssm_asn_fa(fp, &n_rows, &n_cols, &query, &wfreq2d, &freq2d, &iscores2d,
+			matrix, &gap_open, &gap_extend, &psi2_lambda)<=0) {
+    return -1;
+  }
+
+  /* not using wfreq2d[][] right now, free it */
+  if (wfreq2d != NULL) {
+    if (wfreq2d[0] != NULL) {free(wfreq2d[0]);}
+    free(wfreq2d);
+  }
+
+  /* do we have a query sequence */
+  if (query == NULL) { query = aa0;}
+
+  if (!gap_set) {
+    if (gap_open) {
+      if (gap_open > 0) {gap_open = -gap_open;}
+      ppst->gdelval = gap_open;
+    }
+    else if (strncmp(matrix,"BLOSUM62",8)==0) {
+      ppst->gdelval = -11;
+    }
+    gap_set = 1;
+  }
+  if (!del_set) {
+    if (gap_extend) {
+      if (gap_extend > 0) {gap_extend = -gap_extend;}
+      ppst->ggapval = gap_extend;
+    }
+    else if (strncmp(matrix,"BLOSUM62",8)==0) {
+      ppst->ggapval = -1;
+    }
+    del_set = 1;
+  }
+
+  if (strncmp(matrix, "BLOSUM62", 8)== 0 && !ppst->pam_set) {
+    SAFE_STRNCPY(ppst->pamfile, "BL62", 120);
+    SAFE_STRNCPY(ppst->pamfile_save, ppst->pamfile, 120);
+    standard_pam(ppst->pamfile,ppst,del_set, gap_set);
+    if (!ppst->have_pam2) {
+     alloc_pam (MAXSQ, MAXSQ, ppst);
+    }
+    init_pam2(ppst);
+    ppst->pam_set = 1;
+  }
+
+  if (n_cols < n0) { 
+    fprintf(stderr, " query length: %d != n_cols: %d\n",n0, n_cols);
+    exit(1);
+  }
+
+  /* try to just use the the iscore2d file */
+  if (iscores2d != NULL) {
+    for (qi = 0 ; qi < n0 ; qi++) {
+      for (rj = 1 ; rj <= 24 ; rj++) {
+	itmp = iscores2d[qi][rj];
+	if (itmp < -256) itmp=0;
+	pam2p[qi][rj] = itmp;
+      }
+    }
+    /* all done, free it */
+    free(iscores2d[0]);
+    free(iscores2d);
+  }
+  else {
+    scale_pssm(ppst->pam2p[0], freq2d, query, n0, ppst->pam2[0], pamscale);
+  }
+
+#if DEBUG
+  if (ppst->debug_lib) {
+    /*    fprintf(stderr, "final scale: %g\n", scale); */
+
+    fprintf(stderr,"        ");
+    for (rj = 1; rj <= 24; rj++) {
+      fprintf(stderr,"  %c", NCBIstdaa[rj]);
+    }
+    fprintf(stderr,"\n");
+    for (qi = 0 ; qi < n0 ; qi++) {
+      fprintf(stderr, "%3d %c: ", qi+1, NCBIstdaa[aa0[qi]]);
+      for (rj = 1 ; rj <= 24 ; rj++) {
+	fprintf(stderr, "%3d", pam2p[qi][rj]);
+      }
+      fprintf(stderr, "\n");
+    }
+  }
+#endif
+  
+  if (freq2d != NULL) {
+    free(freq2d[0]);
+    free(freq2d);
+  }
+
+  if (query != aa0) free(query);
+  return 1;
+}
+#endif
+
+/* last_params() sets up values in pstruct *ppst now that all
+   parameters and data is available.
+
+   It:
+   (1) moves m_msg->n0 to ppst->n0 for statistics calculations
+   (2) sets ppst->nsq_e
+   (3) reads the PSSM file if one is being used
+   (4) calculates m_msg->nm0 for FASTF/S/M
+   (5) determines statistical strategy for FASTF/S, sets last_calc_flg
+       and qshuffle
+   (6) lowers ktup for short sequences
+*/
+
+void
+last_params(unsigned char *aa0, int n0, 
+	    struct mngmsg *m_msp,
+	    struct pstruct *ppst
+	    ) {
+  int i, nsq;
+  FILE *fp;
+  int is_fastxy=0;
+  int n0_eff;
+  /* do_karlin_a() must be re-run everytime the scoring matrix changes */
+  double *kar_p;
+  double aa0_f[MAXSQ];
+
+  if (n0 < 0) { return;}
+
+  
+  n0_eff = m_msp->n0;
+  ppst->n0 = m_msp->n0;
+#if !defined(TFAST) && (defined(FASTX) || defined(FASTY))
+  n0_eff /= 3;
+  is_fastxy = 1;
+#endif
+
+  /* reset the PAMFILE to the original value */
+  if (strncmp(ppst->pamfile, ppst->pamfile_save,120)!=0) {
+    SAFE_STRNCPY(ppst->pamfile, ppst->pamfile_save, 120);
+    standard_pam(ppst->pamfile,ppst,del_set, gap_set);
+    init_pam2(ppst);
+    init_pamx(ppst);
+  }
+
+  /* **************************************************************** */
+  /* adjust scoring matrix for short protein/translated protein queries */
+  /* **************************************************************** */
+
+  if (ppst->pam_variable) {
+    if (min_pam_bits(n0_eff, DEF_MIN_BITS, ppst, del_set, gap_set)) {
+      init_pam2(ppst);
+      init_pamx(ppst);
+      kar_p = NULL;
+      init_karlin_a(ppst, aa0_f, &kar_p);
+      do_karlin_a(ppst->pam2[0], ppst, aa0_f,
+		  kar_p, &m_msp->Lambda, &m_msp->K, &m_msp->H);
+      ppst->pLambda = m_msp->Lambda;
+      ppst->pK = m_msp->K;
+      ppst->pH = m_msp->H;
+      ppst->LK_set = 1;
+      free(kar_p);
+    }
+    else {
+      fprintf(stderr,"+++ warning [%s:%d] - query too short [%d] for %d bit signal -- fasts36 may be more useful +++\n",
+	      __FILE__, __LINE__, n0, DEF_MIN_BITS);
+    }
+  }
+
+  if (ppst->ext_sq_set) { ppst->nsq_e = nsq = 2*ppst->nsq; }
+  else {ppst->nsq_e = nsq = ppst->nsq;}
+
+#if defined(CAN_PSSM)
+  ppst->pam2p[0] = alloc_pam2p(ppst->pam2p[0],n0,MAXSQ);
+  ppst->pam2p[1] = alloc_pam2p(ppst->pam2p[1],n0,MAXSQ);
+
+  if (ppst->pam_pssm) {
+    if ((ppst->pgpfile_type == 0) && (fp=fopen(ppst->pgpfile,"rb"))) {
+      read_pssm(aa0, n0, ppst->nsq, ppst->pamscale, fp, 0, ppst);
+      extend_pssm(aa0, n0, ppst);
+    }
+    else if ((ppst->pgpfile_type == 1) && (fp=fopen(ppst->pgpfile,"r"))) {
+      read_pssm(aa0, n0, ppst->nsq, ppst->pamscale, fp, 1, ppst);
+      extend_pssm(aa0, n0, ppst);
+    }
+    else if ((ppst->pgpfile_type == 2) && (fp=fopen(ppst->pgpfile,"rb"))) {
+      if (read_asn_pssm(aa0, n0, ppst->nsq, ppst->pamscale, fp, ppst)>0) {
+	extend_pssm(aa0, n0, ppst);
+      }
+      else {
+	fprintf(stderr," Could not parse PSSM file: %s\n",ppst->pgpfile);
+	ppst->pam_pssm = 0;
+	return;
+      }
+    }
+    else {
+      fprintf(stderr," Could not open PSSM file: %s\n",ppst->pgpfile);
+      ppst->pam_pssm = 0;
+      return;
+    }
+  }
+#endif
+
+#if defined(FASTF) || defined(FASTS) || defined(FASTM)
+  m_msp->nm0 = 1;
+  for (i=0; i<n0; i++)
+    if (aa0[i]==EOSEQ || aa0[i]==ESS) m_msp->nm0++;
+
+/*
+  for FASTS, we can do statistics in one of two different ways
+  if there are <= 10 query fragments, then we calculate probabilistic
+  scores for every library sequence.  If there are > 10 fragments, this
+  takes much too long and too much memory, so we use the old fashioned
+  raw score only z-score normalized method initially, and then calculate
+  the probabilistic scores for the best hits.  To scale those scores, we
+  also need a set of random probabilistic scores.  So we do the qshuffle
+  to get them.
+
+  For FASTF, precalculating probabilities is prohibitively expensive,
+  so we never do it; FASTF always acts like FASTS with nfrags>10.
+
+*/
+
+#if defined(FASTS) || defined(FASTM)
+  if (m_msp->nm0 > 10) m_msp->escore_flg = 0;
+  else m_msp->escore_flg = 1;
+#endif
+
+  if (m_msp->escore_flg && (ppst->zsflag&1)) {
+    m_msp->last_calc_flg = 0;
+    m_msp->qshuffle = 0;
+  }
+  else {	/* need random query, second set of 2000 scores */
+    m_msp->last_calc_flg = 1;
+    m_msp->qshuffle = 1;
+  }
+#else
+#ifndef LALIGN
+  m_msp->last_calc_flg = 0;
+#else
+  m_msp->last_calc_flg = 1;	/* LALIGN needs last_calc for threshold */
+#endif
+  m_msp->qshuffle = 0;
+  m_msp->escore_flg = 0;
+  m_msp->nm0 = 1;
+#endif
+
+/* adjust the ktup if appropriate */  
+
+  if (pgm_def_arr[ppst->pgm_id].ktup > 0) {
+    if (!ktup_set ) {
+      ppst->param_u.fa.ktup = pgm_def_arr[ppst->pgm_id].ktup;
+      if (m_msp->qdnaseq == SEQT_PROT || is_fastxy) {
+#if defined(FASTS) || defined(FASTM)
+	if (n0_eff > 100 && ppst->param_u.fa.ktup > 2) ppst->param_u.fa.ktup = 2;
+#endif
+	if (n0_eff <= 40 && ppst->param_u.fa.ktup > 1) ppst->param_u.fa.ktup = 1;
+      }
+      else if (m_msp->qdnaseq == SEQT_DNA || m_msp->qdnaseq == SEQT_RNA) {
+	if (n0_eff <= 20 && ppst->param_u.fa.ktup > 1) ppst->param_u.fa.ktup = 1;
+#if defined(FASTS) || defined(FASTM)
+	/* with the current (April 12 2005) dropfs2.c - ktup cannot be > 2 */
+	else ppst->param_u.fa.ktup = 2;
+#else
+	else if (n0 < 50 && ppst->param_u.fa.ktup > 2) ppst->param_u.fa.ktup = 2;
+	else if (n0 < 100 && ppst->param_u.fa.ktup > 3)  ppst->param_u.fa.ktup = 3;
+#endif
+      }
+    }
+    /* regardless of ktup state */
+    if (ppst->param_u.fa.use_E_thresholds) {
+      ppst->param_u.fa.use_E_thresholds = ppst->LK_set;
+    }
+    if (!E_cgap_set) {
+      ppst->param_u.fa.E_join = ppst->param_u.fa.E_band_opt * 5;
+    }
+    else {
+      if (ppst->param_u.fa.E_join > 1.0) {
+	ppst->param_u.fa.E_join = ppst->param_u.fa.E_band_opt * ppst->param_u.fa.E_join;
+      }
+    }
+  }
+}
+
+/* validate_params() exists because of bugs that keep appearing
+
+   (1) pam2[0][x][0] or pam2[0][0][x] are not -BIGNUM
+   (2) sascii[] (or qascii[], lascii[]) have values outside nsq_e.
+ */
+
+int
+validate_params(const unsigned char *aa0, int n0, 
+		const struct mngmsg *m_msg,
+		const struct pstruct *ppst,
+		const int *lascii, const int *pascii) {
+  int good_params = 1;
+  int i;
+
+  /* check for -BIGNUM for boundaries of pam2[0][0:x][x:0] */
+
+  for (i=0; i< ppst->nsq; i++) {
+    if (ppst->pam2[0][0][i] > -1000) {
+      fprintf(stderr," *** ERROR ***  pam2[0][0][%d/%c] == %d\n",
+	      i,NCBIstdaa[i],ppst->pam2[0][0][i]);
+      good_params = 0;
+    }
+    if (ppst->pam2[0][i][0] > -1000) {
+      fprintf(stderr," *** ERROR ***  pam2[0][%d/%c][0] == %d\n",
+	      i,NCBIstdaa[i],ppst->pam2[0][i][0]);
+      good_params = 0;
+    }
+  }
+
+  /* check for -BIGNUM for boundaries of pam2[1][0:x][x:0] */
+  if (ppst->ext_sq_set) {
+    for (i=0; i< ppst->nsqx; i++) {
+      if (ppst->pam2[1][0][i] > -1000) {
+	fprintf(stderr," *** ERROR ***  pam2[1][0][%d] == %d\n",
+		i,ppst->pam2[1][0][i]);
+      good_params = 0;
+      }
+      if (ppst->pam2[1][i][0] > -1000) {
+	fprintf(stderr," *** ERROR ***  pam2[1][%d][0] == %d\n",
+		i,ppst->pam2[1][i][0]);
+      good_params = 0;
+      }
+    }
+  }
+
+  /* check for valid residues in query */
+  for (i=0; i<n0; i++) {
+    if (aa0[i] > ppst->nsq_e && aa0[i] != ESS) {
+      fprintf(stderr," *** ERROR *** aa0[%d] = %c[%d > %d] out of range\n",
+	      i, aa0[i], aa0[i], ppst->nsq_e);
+      good_params = 0;
+    }
+  }
+
+  for (i=0; i<128; i++) {
+    if (lascii[i] < NA && lascii[i] > ppst->nsq_e) {
+      fprintf(stderr," *** ERROR *** lascii [%c|%d] = %d > %d out of range\n",
+	      i, i, lascii[i], ppst->nsq_e);
+      good_params = 0;
+    }
+
+    /*  currently, pascii[] is not reset for upper-case only
+    if (pascii[i] < NA && pascii[i] > ppst->nsq_e) {
+      fprintf(stderr," *** WARNING *** pascii[%c|%d] = %d > %d out of range\n",
+	      i, i, pascii[i], ppst->nsq_e);
+    }
+    */
+
+  }
+
+  return good_params;
+}
+
+/* given a good profile in ppst->pam2p[0], make an extended profile
+   in ppst->pam2p[1]
+*/
+void
+extend_pssm(unsigned char *aa0, int n0, struct pstruct *ppst) {
+
+  int i, j, nsq;
+  int sa_x, sa_t, sa_b, sa_z, sa_j;
+  int **pam2p0, **pam2p1;
+
+  nsq = ppst->nsq;
+
+  pam2p0 = ppst->pam2p[0];
+  pam2p1 = ppst->pam2p[1];
+
+  sa_x = pascii['X'];
+  sa_t = pascii['*'];
+  if (sa_t >= ppst->nsq) {sa_t = sa_x;}
+  sa_b = pascii['B'];
+  sa_z = pascii['Z'];
+  sa_j = pascii['J'];
+
+  /* fill in boundaries, B, Z, *, X */
+  for (i=0; i<n0; i++) {
+    pam2p0[i][0] = -BIGNUM;
+    pam2p0[i][sa_b] = (int)
+      (((float)pam2p0[i][pascii['N']]+(float)pam2p0[i][pascii['D']]+0.5)/2.0);
+    pam2p0[i][sa_z] = (int)
+      (((float)pam2p0[i][pascii['Q']]+(float)pam2p0[i][pascii['E']]+0.5)/2.0);
+    pam2p0[i][sa_j] = (int)
+      (((float)pam2p0[i][pascii['I']]+(float)pam2p0[i][pascii['L']]+0.5)/2.0);
+    pam2p0[i][sa_x] = ppst->pam_xm;
+    pam2p0[i][sa_t] = ppst->pam_xm;
+  }
+
+  /* copy pam2p0 into pam2p1 */
+  for (i=0; i<n0; i++) {
+    pam2p1[i][0] = -BIGNUM;
+    for (j=1; j<=ppst->nsq; j++) {
+      pam2p1[i][j] = pam2p0[i][j];
+    }
+  }
+
+  /* then fill in extended characters, if necessary */
+  if (ppst->ext_sq_set) {
+    for (i=0; i<n0; i++) {
+      for (j=1; j<=ppst->nsq; j++) {
+	pam2p0[i][nsq+j] = pam2p0[i][j];
+	pam2p1[i][nsq+j] = ppst->pam_xm;
+      }
+    }
+  }
+}
+
+void format_params(struct opt_def_str *opt_ptr, char *string) {
+  
+  if (opt_ptr->opt_char == 'r') {
+    sprintf(string, " [+%d/%d]", opt_ptr->i_param1, opt_ptr->i_param2);
+    return;
+  }
+
+  switch (opt_ptr->fmt_type) {
+
+  case 1:
+    sprintf(string, " [%d]", opt_ptr->i_param1); break;
+  case 2:
+    sprintf(string, " [%d,%d]", opt_ptr->i_param1, opt_ptr->i_param2); break;
+  case 3:
+    sprintf(string, " [%.4g]", opt_ptr->d_param1); break;
+  case 4:
+    sprintf(string, " [%.4g,%.4g]", opt_ptr->d_param1, opt_ptr->d_param2); break;
+  case 5:
+    sprintf(string, " [%s]", opt_ptr->s_param); break;
+  case 0:
+  default:
+    string[0] = '\0'; break;
+  }
+}
+
+
+#if defined(FASTX) || defined(FASTY)
+static char *common_opts = "sfgjSEbdI";
+#else
+#if defined(LALIGN)
+static char *common_opts = "sfgEZI";
+#else
+static char *common_opts = "sfgSbdI";
+#endif
+#endif
+
+void
+show_help(char *pgm_name, int pgm_id) {
+  int i, j;
+  int opt_line_cnt=0;
+  char tmp_string[MAX_STR];
+  struct opt_def_str *opt_ptr;
+
+  printf("USAGE\n");
+#ifndef LALIGN
+  printf(" %s [-options] query_file library_file",pgm_name);
+  if (pgm_def_arr[pgm_id].ktup > 0) {
+    printf(" [ktup]\n");
+  }
+  else {printf("\n");}
+#else
+  printf(" %s [-options] seq_file1 seq_file2\n",pgm_name);
+#endif
+  printf(" %s -help for a complete option list\n",pgm_name);
+  printf("\nDESCRIPTION\n");
+  printf(" %s\n version: %s\n",pgm_def_arr[pgm_id].iprompt0, verstr);
+  printf("\n");
+  printf("COMMON OPTIONS (options must preceed query_file library_file)\n");
+
+  for (i=0; i<strlen(common_opts); i++) {
+    opt_ptr = g_options;
+    for (j=0; opt_ptr[j].opt_char != '\0'; j++) {
+      if (common_opts[i]==opt_ptr[j].opt_char) {
+	format_params(&opt_ptr[j], tmp_string);
+	printf(" -%c%c %s %s;",opt_ptr[j].opt_char, (opt_ptr[j].has_arg? ':' : ' '), 
+	       tmp_string, opt_ptr[j].opt_descr_s);
+	/* if ((++opt_line_cnt % 2) == 0) printf("\n"); */
+	printf("\n");
+	goto next_option;
+      }
+    }
+
+    opt_ptr = f_options;
+    for (j=0; opt_ptr[j].opt_char != '\0'; j++) {
+      if (common_opts[i]==opt_ptr[j].opt_char) {
+	format_params(&opt_ptr[j], tmp_string);
+	printf(" -%c%c %s %s;",opt_ptr[j].opt_char, (opt_ptr[j].has_arg? ':' : ' '), 
+	       tmp_string, opt_ptr[j].opt_descr_s);
+	/* if ((++opt_line_cnt % 2)==0) printf("\n"); */
+	printf("\n");
+      }
+    }
+  next_option: continue;
+  }
+  if ((opt_line_cnt % 2) != 0) printf("\n");
+  exit(0);
+}
+
+/* sorts a list of options, with upper and lower case characters
+   sorted together */
+void sort_opt_list(char *v, int n) {
+  int gap, i, j, k;
+  int incs[7] = { 336, 112, 48, 21, 7, 3, 1 };
+  char tmp_c, tmp_u;
+  int v_start;
+
+  /* first shell sort the list using toupper() */
+  for ( k = 0; k < 7; k++) {
+    gap = incs[k];
+    for (i = gap; i < n; i++) {
+      tmp_c = v[i];
+      tmp_u = toupper(tmp_c);
+      j = i;
+      while (j >= gap && toupper(v[j - gap]) > tmp_u) {
+	v[j] = v[j - gap];
+	j -= gap;
+      }
+      v[j] = tmp_c;
+    }
+  }
+  /* then sort the toupper(==) pairs lower-case, upper-case */
+
+  for (i=1; i<n; i++) {
+    if (toupper(v[i])==toupper(v[i-1])) {
+      if (v[i] > v[i-1]) { tmp_c = v[i]; v[i] = v[i-1]; v[i-1]=tmp_c;}
+    }
+  }
+}
+
+char *
+sort_options (struct opt_def_str *g_options, struct opt_def_str *f_options) {
+  struct opt_def_str *this_option;
+  char *sorted_list, *sort_ptr;
+  int i, opt_count;
+
+  opt_count=0;
+  this_option = g_options;
+  while ((this_option++)->opt_char!='\0') { opt_count++;}
+  this_option = f_options;
+  while ((this_option++)->opt_char!='\0') { opt_count++;}
+
+  if ((sorted_list = (char *)calloc(opt_count+1, sizeof(char)))==NULL) {
+    fprintf(stderr," cannot allocate sorted_list[%d]\n",opt_count+1);
+    exit(1);
+  }
+
+  sort_ptr = sorted_list;
+  this_option = g_options;
+  while (this_option->opt_char!='\0') {
+    *sort_ptr++ = this_option->opt_char;
+    this_option++;
+  }
+
+  this_option = f_options;
+  while (this_option->opt_char!='\0') {
+    *sort_ptr++ = this_option->opt_char;
+    this_option++;
+  }
+
+  sort_opt_list(sorted_list, opt_count);
+
+  return sorted_list;
+}
+
+void
+show_all_help(char *pgm_name, int pgm_id) {
+  int i, j;
+  struct opt_def_str *opt_ptr;
+  char tmp_string[MAX_STR];
+  char *descr_ptr;
+  char *sorted_list;
+  
+  sorted_list = sort_options(g_options, f_options);
+
+  printf("USAGE\n");
+  printf(" %s [-options] query_file library_file",pgm_name);
+  if (pgm_def_arr[pgm_id].ktup > 0) {
+    printf(" [ktup]\n");
+  }
+  else {printf("\n");}
+
+  printf(" \"@\" query_file uses stdin; query_file:begin-end sets subset range\n");
+  printf(" library file formats: 0:FASTA; 1:GenBankFF; 3:EMBL_FF; 7:FASTQ; 10:subset; 12:NCBI blastdbcmd;\n");
+  printf("   alternate library formats: \"library_file 7\" for 7:FASTQ\n");
+
+  printf("\nDESCRIPTION\n");
+  printf(" %s\n version: %s\n",pgm_def_arr[pgm_id].iprompt0, verstr);
+  printf("\n");
+  printf("OPTIONS (options must preceed query_file library_file)\n");
+
+  for (i=0; i<strlen(sorted_list); i++) {
+    opt_ptr = g_options;
+    for (j=0; opt_ptr[j].opt_char != '\0'; j++) {
+      if (sorted_list[i]==opt_ptr[j].opt_char) {
+	descr_ptr = (opt_ptr[j].opt_descr_l) ? opt_ptr[j].opt_descr_l : opt_ptr[j].opt_descr_s;
+	format_params(&opt_ptr[j], tmp_string);
+	printf(" -%c%c %s %s\n",opt_ptr[j].opt_char, (opt_ptr[j].has_arg? ':' : ' '),tmp_string, descr_ptr);
+	goto next_option;
+      }
+    }
+
+    opt_ptr = f_options;
+    for (j=0; opt_ptr[j].opt_char != '\0'; j++) {
+      if (sorted_list[i]==opt_ptr[j].opt_char) {
+	descr_ptr = (opt_ptr[j].opt_descr_l) ? opt_ptr[j].opt_descr_l : opt_ptr[j].opt_descr_s;
+	format_params(&opt_ptr[j], tmp_string);
+	printf(" -%c%c %s %s\n",opt_ptr[j].opt_char, (opt_ptr[j].has_arg? ':' : ' '), tmp_string, descr_ptr);
+      }
+    }
+  next_option: continue;
+  }
+
+  free(sorted_list);
+
+  exit(0);
+}
diff --git a/src/karlin.c b/src/karlin.c
new file mode 100644
index 0000000..093aec8
--- /dev/null
+++ b/src/karlin.c
@@ -0,0 +1,519 @@
+/**************** Statistical Significance Parameter Subroutine ****************
+
+     $Id: karlin.c 625 2011-03-23 17:21:38Z wrp $
+     $Revision: 625 $
+
+    Version 1.0	February 2, 1990
+    Version 2.0	March 18,   1993
+
+    Program by:	Stephen Altschul
+
+    Address:	National Center for Biotechnology Information
+    National Library of Medicine
+    National Institutes of Health
+    Bethesda, MD  20894
+
+    Internet:	altschul at ncbi.nlm.nih.gov
+
+    See:	Karlin, S. & Altschul, S.F. "Methods for Assessing the Statistical
+    Significance of Molecular Sequence Features by Using General Scoring
+    Schemes,"  Proc. Natl. Acad. Sci. USA 87 (1990), 2264-2268.
+
+    Computes the parameters lambda and K for use in calculating the
+    statistical significance of high-scoring segments or subalignments.
+
+    The scoring scheme must be integer valued.  A positive score must be
+    possible, but the expected (mean) score must be negative.
+
+    A program that calls this routine must provide the value of the lowest
+    possible score, the value of the greatest possible score, and a pointer
+    to an array of probabilities for the occurence of all scores between
+    these two extreme scores.  For example, if score -2 occurs with
+    probability 0.7, score 0 occurs with probability 0.1, and score 3
+    occurs with probability 0.2, then the subroutine must be called with
+    low = -2, high = 3, and pr pointing to the array of values
+    { 0.7, 0.0, 0.1, 0.0, 0.0, 0.2 }.  The calling program must also provide
+    pointers to lambda and K; the subroutine will then calculate the values
+    of these two parameters.  In this example, lambda=0.330 and K=0.154.
+
+    The parameters lambda and K can be used as follows.  Suppose we are
+    given a length N random sequence of independent letters.  Associated
+    with each letter is a score, and the probabilities of the letters
+    determine the probability for each score.  Let S be the aggregate score
+    of the highest scoring contiguous segment of this sequence.  Then if N
+    is sufficiently large (greater than 100), the following bound on the
+    probability that S is greater than or equal to x applies:
+	
+    P( S >= x )   <=   1 - exp [ - KN exp ( - lambda * x ) ].
+	
+    In other words, the p-value for this segment can be written as
+    1-exp[-KN*exp(-lambda*S)].
+
+    This formula can be applied to pairwise sequence comparison by assigning
+    scores to pairs of letters (e.g. amino acids), and by replacing N in the
+    formula with N*M, where N and M are the lengths of the two sequences
+    being compared.
+
+    In addition, letting y = KN*exp(-lambda*S), the p-value for finding m
+    distinct segments all with score >= S is given by:
+
+    2             m-1           -y
+    1 - [ 1 + y + y /2! + ... + y   /(m-1)! ] e
+
+    Notice that for m=1 this formula reduces to 1-exp(-y), which is the same
+    as the previous formula.
+
+*******************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define MAXIT 25  /* Maximum number of iterations used in calculating lambda */
+#define NMAP_X 23
+#define NMAP 33
+
+#define TINY 1e-6
+
+/* first build a residue map to automatically put residues in score bins */
+
+#include "defs.h"
+#include "param.h"
+
+/* initialize the Karlin frequency, probability arrays using
+   a specific query sequence */
+
+int karlin(int , int, double *, double *, double *);
+static int karlin_k(int , int , double *, double *, double *, double *);
+
+void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst,
+		 double *aa0_f, double **kp)
+{
+  int kar_nsq, kar_range, kar_min, kar_max;
+
+  const unsigned char *aa0p;
+  int i;
+  int r_cnt[NMAP+1];
+  double fn0, *kar_p;
+  
+  kar_range = ppst->pam_h - ppst->pam_l + 1;
+  if (*kp == NULL) {
+    if ((kar_p=(double *)calloc(kar_range+1,sizeof(double)))==NULL) {
+      fprintf(stderr," cannot allocate kar_p array: %d\n",kar_range+1);
+      exit(1);
+    }
+    *kp = kar_p;
+  }
+  kar_nsq = ppst->nsq;	/* alphabet size */
+  kar_min = ppst->pam_l;	/* low pam value */
+  kar_max = ppst->pam_h;	/* high pam value */
+
+  /* must have at least 1 residue of each type */
+  r_cnt[NMAP]=0;
+  for (i=1; i<=kar_nsq; i++) r_cnt[i]=1; 
+ 
+  fn0 = 100.0/(double)(n0+kar_nsq);	/* weight of each residue */
+
+  aa0p = aa0;
+  /* increment residue count for each residue in query sequence */
+  while (*aa0p) r_cnt[ppst->hsqx[*aa0p++]]++;
+
+  /* map all unmapped residues to 'X' */
+  r_cnt[NMAP_X] += r_cnt[NMAP];
+ 
+  for (i=1; i<=kar_nsq; i++) aa0_f[i] = fn0*(double)r_cnt[i];
+}
+
+double nt_f[] = {0.0, 0.25, 0.25, 0.25, 0.25 };
+
+/* Robinson and Robinson frequencies */
+double aa_f[] = {
+/* NULL */ 0.00,
+/* A */   0.0780474700897585,
+/* R */   0.0512953149316987,
+/* N */   0.0448725775979007,
+/* D */   0.0536397361638076,
+/* C */   0.0192460110427568,
+/* Q */   0.0426436013507063,
+/* E */   0.0629485981204668,
+/* G */   0.0737715654561964,
+/* H */   0.0219922696262025,
+/* I */   0.0514196403000682,
+/* L */   0.090191394464413,
+/* K */   0.0574383201866657,
+/* M */   0.0224251883196316,
+/* F */   0.0385564048655621,
+/* P */   0.0520279465667327,
+/* S */   0.0711984743501224,
+/* T */   0.0584129422708473,
+/* W */   0.013298374223799,
+/* Y */   0.0321647488738564,
+/* V */   0.0644094211988074};
+
+/* initialize the Karlin frequency, probability arrays using
+   an "average" composition (average length if n0 <=0) */
+
+void
+init_karlin_a(struct pstruct *ppst, double *aa0_f, double **kp)
+{
+  int kar_nsq, kar_range;
+
+  int i;
+  double fn0, *kar_p;
+
+  kar_range = ppst->pam_h - ppst->pam_l + 1;
+  if (*kp == NULL) {
+    if ((kar_p=(double *)calloc(kar_range+1,sizeof(double)))==NULL) {
+      fprintf(stderr," cannot allocate kar_p array: %d\n",kar_range+1);
+      exit(1);
+    }
+  *kp = kar_p;
+  }
+
+  if (ppst->nt_align) {
+    kar_nsq = 4;
+    for (i=1; i<=kar_nsq; i++) aa0_f[i] = nt_f[i];
+  }
+  else if (ppst->dnaseq==SEQT_PROT || ppst->dnaseq == SEQT_UNK) {
+    kar_nsq = 20;
+    for (i=1; i<=kar_nsq; i++) aa0_f[i] = aa_f[i];
+  }
+  else {
+    kar_nsq = ppst->nsq;
+    fn0 = 1.0/(double)(kar_nsq-1);
+    for (i=1; i< kar_nsq; i++) aa0_f[i] = fn0;
+    aa0_f[kar_nsq]=0.0;
+  }
+
+}
+
+/* calculate set up karlin() to calculate Lambda, K, by calculating
+   aa1 frequencies */
+int
+do_karlin(const unsigned char *aa1, int n1,
+	  int **pam2, const struct pstruct *ppst,
+	  double *aa0_f, double *kar_p, double *lambda, double *H)
+{
+  register unsigned const char *aap;
+  int kar_range, kar_min, kar_max, kar_nsq;
+  int r_cnt[NMAP+1];
+  double aa1_f[NMAP];
+  double fn1, kar_tot;
+  int i, j;
+
+  kar_nsq = ppst->nsq;
+  kar_min = ppst->pam_l;
+  kar_max = ppst->pam_h;
+  kar_range = kar_max - kar_min + 1;
+
+  r_cnt[NMAP]=0;
+  for (i=1; i<=kar_nsq; i++) r_cnt[i]=1;
+
+  /* residue counts */
+
+  aap=aa1;
+  while (*aap) r_cnt[ppst->hsqx[*aap++]]++;
+
+  r_cnt[NMAP_X] += r_cnt[NMAP];
+  
+  /* residue frequencies */
+  fn1 = 100.0/(double)(n1+kar_nsq);
+  for (i=1; i<=kar_nsq; i++) aa1_f[i]= fn1*(double)r_cnt[i];
+
+  for (i=0; i<=kar_range; i++) kar_p[i] = 0.0;
+
+  for (i=1; i<=kar_nsq; i++) {
+    for (j=1; j<=kar_nsq; j++)
+      kar_p[pam2[i][j]-kar_min] += aa0_f[i]*aa1_f[j];
+  }
+
+  kar_tot = 0.0;
+  for (i=0; i<=kar_range; i++) kar_tot += kar_p[i];
+  if (kar_tot <= 0.00001) return 0;
+
+  for (i=0; i<=kar_range; i++) kar_p[i] /= kar_tot;
+
+  return karlin(kar_min, kar_max, kar_p, lambda, H);
+}
+
+int
+do_karlin_a(int **pam2, struct pstruct *ppst,
+	  double *aa0_f, double *kar_p, double *lambda, double *K, double *H)
+{
+  double *aa1fp;
+  int kar_range, kar_min, kar_max, kar_nsq;
+  double aa1_f[NMAP];
+  double fn1, kar_tot;
+  int i, j;
+
+  kar_min = ppst->pam_l;
+  kar_max = ppst->pam_h;
+  kar_range = kar_max - kar_min + 1;
+
+  kar_tot = 0.0;
+  if (ppst->nt_align ) {
+    kar_nsq = 4;
+    aa1fp = nt_f;
+    for (i=1; i<=kar_nsq; i++) {kar_tot += aa1fp[i];}
+    for (i=1; i<=kar_nsq; i++) {aa1_f[i]= aa1fp[i]/kar_tot;}
+  }
+  else if (!ppst->nt_align) {
+    kar_nsq = 20;
+    aa1fp = aa_f;
+    for (i=1; i<=kar_nsq; i++) {kar_tot += aa1fp[i];}
+    for (i=1; i<=kar_nsq; i++) {aa1_f[i]= aa1fp[i]/kar_tot;}
+  }
+  else {
+    kar_nsq = ppst->nsq;
+    fn1 = 1.0/(double)(kar_nsq-1);
+    for (i=1; i< kar_nsq; i++) aa1_f[i] = fn1;
+    aa1_f[kar_nsq]=0.0;
+  }
+
+  for (i=0; i<=kar_range; i++) kar_p[i] = 0.0;
+
+  for (i=1; i<=kar_nsq; i++) {
+    for (j=1; j<kar_nsq; j++)
+      kar_p[pam2[i][j]-kar_min] += aa0_f[i]*aa1_f[j];
+  }    
+
+  kar_tot = 0.0;
+  for (i=0; i<=kar_range; i++) kar_tot += kar_p[i];
+  if (kar_tot <= 0.00001) return 0;
+
+  for (i=0; i<=kar_range; i++) kar_p[i] /= kar_tot;
+
+  return karlin_k(kar_min, kar_max, kar_p, lambda, K, H);
+}
+
+/* take a array of letters and pam information and get *lambda, *H */
+int
+karlin(int low,			/* Lowest score (must be negative)    */
+       int high,		/* Highest score (must be positive)   */
+       double *pr,		/* Probabilities for various scores   */
+       double *lambda_p,	/* Pointer to parameter lambda        */
+       double *H_p)		/* Pointer to parameter H              */
+{
+  int i,range, nit;
+  double up,new,sum,av,beta,ftemp;
+  double lambda;
+  double *ptr1;
+
+  /* Calculate the parameter lambda */
+
+  range = high-low;
+
+  /* check for E() < 0.0 */
+  sum = 0;
+  ptr1 = pr;
+  for (i=low; i <= high ; i++) sum += i* (*ptr1++);
+  if (sum >= 0.0) {
+#ifdef DEBUG
+    fprintf(stderr," (karlin lambda) non-negative expected score: %.4lg\n",
+	    sum);
+#endif
+    return 0;
+  }
+
+  /* up is upper bound on lambda */
+  up=0.5;
+  do {
+    up *= 2.0;
+    ptr1=pr;
+
+    beta=exp(up);
+
+    ftemp=exp(up*(low-1));
+    sum = 0.0;
+    for (i=0; i<=range; ++i) sum+= *ptr1++ * (ftemp*=beta);
+  }
+  while (sum<1.0);
+
+  /* avoid overflow from very large lambda*S */
+/*
+  do {
+    up /= 2.0;
+    ptr1=pr;
+    beta=exp(up);
+
+    ftemp=exp(up*(low-1));
+    sum = 0.0;
+    for (i=0; i<=range; ++i) sum+= *ptr1++ * (ftemp*=beta);
+  } while (sum > 2.0);
+
+  up *= 2.0;
+*/	/* we moved past, now back up */
+
+  /*	for (lambda=j=0;j<25;++j) { */
+  lambda = 0.0;
+  nit = 0;
+  while ( nit++ < MAXIT ) {
+    new = (lambda+up)/2.0;
+    beta = exp(new);
+    ftemp = exp(new*(low-1));
+    ptr1=pr;
+    sum = 0.0;
+    /* multiply by exp(new) for each score */
+    for (i=0;i<=range;++i) sum+= *ptr1++ * (ftemp*=beta);
+
+    if (sum > 1.0 + TINY) up=new;
+    else {
+      if ( fabs(lambda - new) < TINY ) goto done;
+      lambda = new;
+    }
+  }
+
+  if (lambda <= 1e-10) {
+    lambda = -1.0;
+    return 0;
+  }
+
+ done:
+  *lambda_p = lambda;
+
+  /* Calculate the parameter K */
+
+  ptr1=pr;
+  ftemp=exp(lambda*(low-1));
+  for (av=0.0, i=low; i<=high; ++i)
+    av+= *ptr1++ *i*(ftemp*=beta);
+  *H_p= lambda*av;
+
+  return 1;		/* Parameters calculated successfully */
+}
+
+static int a_gcd (int, int);
+
+/* take a array of letters and pam information and get *lambda, *K, *H */
+static int
+karlin_k(int low,		/* Lowest score (must be negative)    */
+	 int high,		/* Highest score (must be positive)   */
+	 double *pr,	/* Probabilities for various scores   */
+	 double *lambda_p,	/* Pointer to parameter lambda        */
+	 double *K_p,
+	 double *H_p)		/* Pointer to parameter H              */
+{
+  int i,j,range,lo,hi,first,last, nit;
+  double up,new,sum,Sum,av,beta,oldsum,ratio,ftemp;
+  double lambda;
+  double *P,*ptrP,*ptr2;
+  double *ptr1;
+
+  /* Calculate the parameter lambda */
+
+  range = high-low;
+
+  /* check for E() < 0.0 */
+  sum = 0;
+  ptr1 = pr;
+  for (i=low; i <= high ; i++) sum += i* (*ptr1++);
+  if (sum >= 0.0) {
+    fprintf(stderr," (karlin lambda) non-negative expected score: %.4lg\n",
+	    sum);
+    /* perhaps we should return values for BLOSUM50 here to avoid fp
+       underflow later */
+    return 0;
+  }
+
+  /* up is upper bound on lambda */
+  up=0.5;
+  do {
+    up *= 2.0;
+    ptr1=pr;
+
+    beta=exp(up);
+
+    ftemp=exp(up*(low-1));
+    sum = 0.0;
+    for (i=0; i<=range; ++i) sum+= *ptr1++ * (ftemp*=beta);
+  }
+  while (sum<1.0);
+
+  /* avoid overflow from very large lambda*S */
+  /*
+  do {
+    up /= 2.0;
+    ptr1=pr;
+    beta=exp(up);
+
+    ftemp=exp(up*(low-1));
+    sum = 0.0;
+    for (i=0; i<=range; ++i) sum+= *ptr1++ * (ftemp*=beta);
+  } while (sum > 2.0);
+
+  up *= 2.0;
+  */
+  /* we moved past, now back up */
+
+  /*	for (lambda=j=0;j<25;++j) { */
+  lambda = 0.0;
+  nit = 0;
+  while ( nit++ < MAXIT ) {
+    new = (lambda+up)/2.0;
+    beta = exp(new);
+    ftemp = exp(new*(low-1));
+    ptr1=pr;
+    sum = 0.0;
+    /* multiply by exp(new) for each score */
+    for (i=0;i<=range;++i) sum+= *ptr1++ * (ftemp*=beta);
+
+    if (sum > 1.0 + TINY) up=new;
+    else {
+      if ( fabs(lambda - new) < TINY ) goto done;
+      lambda = new;
+    }
+  }
+
+  if (lambda <= 1e-10) {
+    lambda = -1.0;
+    return 0;
+  }
+
+ done:
+  *lambda_p = lambda;
+
+  /* Calculate the parameter H */
+
+  ptr1=pr;
+  ftemp=exp(lambda*(low-1));
+  for (av=0.0, i=low; i<=high; ++i) av+= *ptr1++ *i*(ftemp*=beta);
+  *H_p= lambda*av;
+
+  /* Calculate the pamameter K */
+  Sum=lo=hi=0;
+  P= (double *) calloc(MAXIT*range+1,sizeof(double));
+  for (*P=sum=oldsum=j=1;j<=MAXIT && sum>0.001;Sum+=sum/=j++) {
+    first=last=range;
+    for (ptrP=P+(hi+=high)-(lo+=low); ptrP>=P; *ptrP-- =sum) {
+      ptr1=ptrP - first;
+      ptr2=pr + first;
+      for (sum=0,i=first; i<=last; ++i) sum += *ptr1-- * *ptr2++;
+      if (first) --first;
+      if (ptrP-P<=range) --last;
+    }
+    ftemp=exp(lambda*(lo-1));
+    for (sum=0,i=lo;i;++i) sum+= *++ptrP * (ftemp*=beta);
+    for (;i<=hi;++i) sum+= *++ptrP;
+    ratio=sum/oldsum;
+    oldsum=sum;
+  }
+  for (;j<=200;Sum+=oldsum/j++) oldsum*=ratio;
+  for (i=low; !pr[i-low]; ++i);
+  for (j= -i;i<high && j>1;) if (pr[++i-low]) j=a_gcd(j,i);
+  *K_p = (j*exp(-2*Sum))/(av*(1.0-exp(- lambda*j)));
+  free(P);
+
+  return 1;		/* Parameters calculated successfully */
+}
+
+int
+a_gcd(int a, int b)
+{
+  int c;
+
+  if (b<0) b= -b;
+  if (b>a) { c=a; a=b; b=c; }
+  for (;b;b=c) { c=a%b; a=b; }
+  return a;
+}
+
diff --git a/src/last_tat.c b/src/last_tat.c
new file mode 100644
index 0000000..913bf0b
--- /dev/null
+++ b/src/last_tat.c
@@ -0,0 +1,155 @@
+/* $Id: last_tat.c 938 2012-06-04 16:15:06Z wrp $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and
+   The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+
+#include "structs.h"
+#include "param.h"
+#include "mm_file.h"
+#include "best_stats.h"
+
+
+extern int (*ranlib) (char *str, int cnt,
+	       fseek_t libpos, char *libstr,
+	       struct lmf_str *lm_fd);
+
+#define RANLIB (m_fptr->ranlib)
+
+#define MAX_BLINE 200
+
+int
+re_getlib(unsigned char *, struct annot_str **,int, int, int, int, int, long *, long *, 
+	  struct lmf_str *m_fptr);
+
+void
+do_work(unsigned char *aa0, int n0, unsigned char *aa1, int n1, int frame, 
+	struct pstruct *ppst, void *f_str, int qr_flg, struct rstruct *rst);
+
+extern void
+do_opt (unsigned char *aa0, int n0, unsigned char *aa1, int n1,
+	int frame, struct pstruct *pst, void *f_str,
+	struct rstruct *rst);
+
+struct lmf_str *re_openlib(struct lmf_str *, int outtty);
+
+void sortbestz (struct beststr **bptr, int nbest);
+
+double zs_to_E(double zs,int n1, int isdna, long entries, struct db_str db);
+
+double scale_one_score(int ipos, double escore, struct db_str db, void *rs_str);
+
+void sortbests (struct beststr **bptr, int nbest)
+{
+    int gap, i, j;
+    struct beststr *tmp;
+
+    for (gap = nbest/2; gap > 0; gap /= 2)
+	for (i = gap; i < nbest; i++)
+	    for (j = i - gap; j >= 0; j-= gap) {
+	      if (bptr[j]->rst.score[0] >= bptr[j + gap]->rst.score[0]) break;
+	      tmp = bptr[j];
+	      bptr[j] = bptr[j + gap];
+	      bptr[j + gap] = tmp;
+	    }
+}
+
+int
+last_calc(
+	  unsigned char **aa0, unsigned char *aa1save, int maxn,
+	  struct beststr **bptr, int nbest, 
+	  const struct mngmsg *m_msg, struct pstruct *ppst
+	  , void **f_str
+	  , void *rstat_str)
+{
+  unsigned char *aa1;
+  int nopt, ib;
+  struct beststr *bbp;
+  long loffset, l_off;
+  int n0, n1;
+  struct rstruct rst;
+  struct lmf_str *m_fptr;
+  char bline[60];
+  int tat_samp, tat_inc, loop_cnt, i;
+  double min_escore, ess;
+
+  n0 = m_msg->n0;
+
+  sortbestz(bptr,nbest);
+
+  tat_inc = 500;
+/*
+  if (zs_to_E(bptr[0]->zscore,bptr[0]->n1,0,ppst->zdb_size,m_msg->db)/ 
+      zs_to_E(bptr[nbest-1]->zscore,bptr[nbest-1]->n1,0,ppst->zdb_size,m_msg->db)
+	< 1e-20) { tat_inc /= 4 ;}
+*/
+
+/* || (zs_to_E(bptr[0]->zscore,bptr[0]->n1,0,ppst->zdb_size,m_msg->db)< 1e-5); */
+
+  ib = tat_samp = 0;
+  for (loop_cnt = 0; loop_cnt < 5; loop_cnt++) {
+    tat_samp += tat_inc;
+    nopt = min(nbest,tat_samp);
+    min_escore = 1000000.0;
+    for ( ; ib<nopt; ib++) {
+      bbp = bptr[ib];
+
+      if (bbp->rst.score[0] < 0) break;
+
+      if (bbp->seq->aa1b == NULL) {
+
+	if ((m_fptr=re_openlib(bbp->mseq->m_file_p,!m_msg->quiet))==NULL) {
+	  fprintf(stderr,"*** cannot re-open %s\n",bbp->mseq->m_file_p->lb_name);
+	  exit(1);
+	}
+
+	RANLIB(bline,sizeof(bline),bbp->mseq->lseek,bbp->mseq->libstr,m_fptr);
+
+	n1 = re_getlib(aa1save,NULL,maxn,m_msg->ldb_info.maxt3,
+		       m_msg->ldb_info.l_overlap,bbp->mseq->cont,
+		       m_msg->ldb_info.term_code,
+		       &loffset,&l_off,bbp->mseq->m_file_p);
+	aa1 = aa1save;
+      }
+      else {
+	n1 = bbp->seq->n1;
+	aa1 = bbp->seq->aa1b;
+	loffset = bbp->seq->l_offset;
+	l_off = bbp->seq->l_off;
+      }
+
+      do_opt(aa0[bbp->frame],m_msg->n0,aa1,n1,bbp->frame,ppst,
+	     f_str[bbp->frame],&rst);
+      memcpy(&(bbp->rst),&rst,sizeof(struct rstruct));
+
+      if ((ess=scale_one_score(ib, bbp->rst.escore, m_msg->db, rstat_str)) < 
+	min_escore) { min_escore = ess;}
+      /*
+      fprintf(stderr,"%d: %4d %2d %3d %.4g %.4g\n",
+	      ib, bbp->rst.score[0], bbp->segnum,bbp->seglen,bbp->escore, ess);
+      */
+    }
+
+    if (min_escore > m_msg->e_cut) goto done;
+  }
+ done:
+  return ib;
+}
diff --git a/src/last_thresh.c b/src/last_thresh.c
new file mode 100644
index 0000000..0d63130
--- /dev/null
+++ b/src/last_thresh.c
@@ -0,0 +1,62 @@
+/* $Id: last_thresh.c 808 2011-07-19 20:05:24Z wrp $ */
+
+/* copyright (c) 2011, 2014 by William R. Pearson and The
+   Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+
+#include "structs.h"
+#include "param.h"
+#include "mm_file.h"
+#include "best_stats.h"
+
+#ifdef PCOMPLIB
+#include "msg.h"
+void do_stage2(struct beststr **bptr, int nbest, const struct mngmsg *m_msp0,
+	       int s_func, struct qmng_str *qm_msp);
+#endif
+
+static char thresh_str[MAX_STR];
+
+int E1_to_s(double e_val, int n0, int n1, int db_size, 	void *pu);
+
+int
+last_calc(
+#ifndef PCOMPLIB
+	  unsigned char **aa0, unsigned char *aa1, int maxn,
+#endif	  
+	  struct beststr **bptr, int nbest, 
+	  const struct mngmsg *m_msg, struct pstruct *ppst
+#ifdef PCOMPLIB
+	  , struct qmng_str *qm_msp
+#else
+	  , void **f_str
+#endif
+	  , void *pstat_str)
+{
+
+  if (ppst->zdb_size < 0 ) ppst->zdb_size = m_msg->db.entries;
+  ppst->repeat_thresh = E1_to_s(ppst->e_cut, m_msg->n0, bptr[0]->seq->n1, ppst->zdb_size, pstat_str);
+
+  ppst->other_info = thresh_str;
+  sprintf(thresh_str,"Threshold: E() < %.2g score: %d\n",ppst->e_cut, ppst->repeat_thresh);
+
+  return nbest;
+}
diff --git a/src/lav_defs.h b/src/lav_defs.h
new file mode 100644
index 0000000..7471bab
--- /dev/null
+++ b/src/lav_defs.h
@@ -0,0 +1,44 @@
+
+#define MAX_STR	512 /* standard label/message buffer */
+
+#ifndef XTERNAL
+long pminx, pmaxx, pminy, pmaxy;
+int max_x=540, max_y=540;
+double fxscal, fyscal, fxoff, fyoff;
+
+int *linarr;
+int nlinarr=5;
+
+char lvstr[MAX_STR];
+
+double elinval[4]={1e-4,1e-2,1.0,100.0};
+double blinval[4]={40.0,30.0,20.0,10.0};
+int ilinval[4]={200,100,50,25};
+extern int have_bits, have_zdb;
+#else
+int have_bits=0, have_zdb=0;
+long zdb_size=1;
+#endif
+
+void openplt(long, long, int, int, char *, char *);
+void closeplt();
+void drawdiag(long n0, long n1);
+void closepl();
+void move(int, int);
+void cont(int, int);
+void clsline();
+void xaxis(long, int, char *);
+void yaxis(long, int, char *);
+void legend();
+void linetype(int);
+void opnline(int s, double bits);
+void newline();
+void clsline();
+void move(int, int);
+void draw(int, int);
+void sxy_move(int, int);
+void sxy_draw(int, int);
+void draw_str(char *);
+void draw_sstr(char *);
+
+double bit_to_E(double bits);
diff --git a/src/lib_sel.c b/src/lib_sel.c
new file mode 100644
index 0000000..9b1be12
--- /dev/null
+++ b/src/lib_sel.c
@@ -0,0 +1,341 @@
+/* $Id: lib_sel.c 792 2011-06-26 18:20:30Z wrp $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and
+   The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/*	modified Dec 13, 1989 requires different FASTLIBS */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "defs.h"
+#include "structs.h"
+
+#ifdef NCBIBL13
+#define LASTLIB NCBIBL13+1
+#else
+#define LASTLIB 11
+#endif
+
+
+struct lib_struct *get_lnames(char *tname, struct lib_struct *cur_lib_p);
+struct lib_struct *add_file(char *name, char *env, struct lib_struct *cur_lib_p);
+void lib_choice(char *lname, int nl, char *flstr, int ldnaseq);
+void subs_env(char *dest, char *src, int dest_size);
+char *ulindex(char *str, char *chr);
+
+static char ldname[MAX_FN];
+static char *libenv;
+
+/* read in the library names.
+   returns the beginning of the list of names, not the end
+
+   if cur_lib_p is NULL, then allocates it and returns it.
+   if cur_lib_p is not NULL, then links to cur_lib_p->next and returns cur_lib_p
+*/
+struct lib_struct *
+get_lnames(char *iname, struct lib_struct *cur_lib_p)
+{
+  char *bp, tsave[MAX_STR], *tname;
+  char lline[MAX_FN], *llp;
+  struct lib_struct *new_lib_p;
+  FILE *tptr;
+
+  /* expand environment variables */
+  
+  tname = tsave;
+  subs_env(tname, iname, sizeof(tsave));
+
+  if (*tname != '@') {
+    new_lib_p = add_file(tname,"\0",cur_lib_p);
+    if (cur_lib_p == NULL) return new_lib_p;
+    else return cur_lib_p;
+  }
+  else tname++;
+
+  /* remove ' ' before deftype if present */
+  if ((bp=strchr(tname,' '))!=NULL) *bp='\0';
+
+  if ((tptr=fopen(tname,"r"))==NULL) {
+    fprintf(stderr," could not open file of names: %s\n",tname);
+    return NULL;
+  }
+
+  new_lib_p = cur_lib_p;
+  while (fgets(lline,sizeof(lline),tptr)!=NULL) {
+    if (lline[0]==';') continue;
+    if ((bp=strchr(lline,'\n'))!=NULL) *bp='\0';
+    subs_env(tsave, lline, sizeof(tsave));
+    if (tsave[0]=='<') {
+      strncpy(ldname,&tsave[1],sizeof(ldname));
+      ldname[sizeof(ldname)-1]='\0';
+      libenv=ldname;
+    }
+    else {
+      new_lib_p = add_file(tsave,libenv,new_lib_p);
+      if (cur_lib_p == NULL) cur_lib_p = new_lib_p;
+    }
+  }
+  fclose(tptr);
+  return cur_lib_p;
+}
+
+void
+lib_choice(char *lname, int nl, char *flstr, int ldnaseq)
+{
+  FILE *fch;
+  char line[MAX_STR], *bp;
+  char *chstr[MAX_CH],*chfile[MAX_CH];
+  char *chtmp, *charr;
+  int i,j,k,chlen;
+
+  charr = NULL;
+  if (strlen(flstr)> (size_t)0) {
+    chlen = MAX_CH*MAX_FN;
+    if ((chtmp=charr=calloc((size_t)chlen,sizeof(char)))==NULL) {
+      fprintf(stderr,"cannot allocate choice file array\n");
+      goto l1;
+    }
+    chlen--;
+    if ((fch=fopen(flstr,"r"))==NULL) {
+      fprintf(stderr," cannot open choice file: %s\n",flstr);
+      goto l1;
+    }
+    fprintf(stderr,"\n Choose sequence library:\n\n");
+
+    for (i=j=0; j<MAX_CH; i++) {
+      if (fgets(line,sizeof(line),fch)==NULL) break;/* check for comment */
+      if (line[0]==';') continue;
+      if ((bp=strchr(line,'\n'))!=NULL) *bp='\0'; /* remove \n */
+      if ((bp=strchr(line,'$'))==NULL) continue;  /* if no '$', continue */
+      *bp++='\0';	      /* replace $ with \0, bp points to libtype */
+
+      /* if libtypes don't match, continue */
+      if ((*bp++ -'0')!=ldnaseq) continue;
+
+      /* if the library file name is too long, quit */
+      if ((k=strlen(line))>chlen) break;
+
+      /* save the library file name */
+      strncpy(chstr[j]=chtmp,line,chlen);
+      chtmp += k+1; chlen -= k+1;
+
+      if ((k=strlen(bp))>chlen) break;
+      strncpy(chfile[j]=chtmp,bp,chlen);
+      chtmp += k+1; chlen -= k+1;
+      fprintf(stderr,"    %c: %s\n",*chfile[j++],line);
+    }
+  l2:  fprintf(stderr,"\n Enter library filename (e.g. %s), letter (e.g. P)\n",
+	       (ldnaseq==0)? "prot.lib" : "dna.lib");
+    fprintf(stderr," or a %% followed by a list of letters (e.g. %%PN): ");
+    fflush(stderr);
+    if (fgets(line,sizeof(line),stdin)==NULL) exit(0);
+    if ((bp=strchr(line,'\n'))!=NULL) *bp='\0';
+    if (strlen(line)==0) goto l2;
+    strncpy(lname,line,nl);
+  }
+  else {
+  l1: fprintf(stderr," library file name: ");
+    fflush(stderr);
+    if (fgets(line,sizeof(line),stdin)==NULL) exit(0);
+    if ((bp=strchr(line,'\n'))!=NULL) *bp='\0';
+    if (strlen(line)> (size_t)0) strncpy(lname,line,nl);
+    else goto l1;
+  }
+  if (charr!=NULL) {
+    fclose(fch);
+    free(charr);
+  }
+}
+
+/* lib_select parses the choices in char *lname and builds the list
+   of library files.
+
+   lib_select returns the head of the list of files
+
+   lib_select recognizes:
+   %prm  -- leading '%' indicates a list of one letter abbreviations
+   +abrev1+abrev2+abrev3 -- leading '+' indicates a list of word abbreviations
+
+   @library.fil -- leading '@' indicates a list of library files
+   (possibly containing additional @files)
+
+   Support for NCBI .pal/.nal files needs to be added
+*/
+struct lib_struct *
+lib_select(char *lname, char *ltitle, const char *flstr, int ldnaseq)
+{
+  char line[MAX_FN*2], *bp, *bp1;
+  char *llnames[MAX_LF]; /* pointers into new list of names */
+  int new_abbr,ich, nch;	  /* use new multi-letter abbr */
+  int ltmp;
+  FILE *fch;
+  struct lib_struct *cur_lib_p = NULL;
+
+  new_abbr = 0;
+  *ltitle = '\0';
+
+  if (strlen(lname) > (size_t)1 && *lname != '%' && *lname != '+') {
+    return get_lnames(lname, cur_lib_p); /* file name */ 
+  }
+  else {
+    if (*flstr=='\0') {
+      fprintf(stderr," abbrv. list request but FASTLIBS undefined, cannot use %s\n",lname);
+      exit(1);
+    }
+
+    if (strchr(lname,'+')) {
+      /* indicates list of database abbrevs (not files) */
+      new_abbr=1;
+      nch = 0;
+      bp = lname+1; if (*bp == '+') bp++;
+      for (bp1=bp; nch < MAX_LF && bp!=NULL && bp1!=NULL; bp=bp1+1) {
+	if ((bp1=strchr(bp,'+'))!=NULL) *bp1='\0';
+	llnames[nch++] = bp;
+      }
+    }
+    else if (*lname=='%') {     /* list of single letter abbreviations */
+      lname++;	/* bump over '%' to get letters */
+    }
+
+    /* else just use a single character abbreviation */
+
+    if (strlen(flstr) > (size_t)0) {
+      if ((fch=fopen(flstr,"r"))==NULL) {
+	fprintf(stderr," cannot open choice file: %s\n",flstr);
+	return NULL;
+      }
+    }
+
+    /* read each line of FASTLIBS */
+    while (fgets(line,sizeof(line),fch)!=NULL) { 
+      if (line[0]==';') continue;	/* skip comments */
+      if ((bp=strchr(line,'\n'))!=NULL) *bp='\0';	/* remove '\n' */
+      if ((bp=strchr(line,'$'))==NULL) continue; /* no delim, continue */
+      *bp++='\0';	/* point to library type */
+      if ((*bp++ -'0')!=ldnaseq) continue; /* doesn't match, continue */
+
+      /* if !new_abbr, match on one letter with ulindex() */
+      if (!new_abbr) {
+	if (*bp=='+') continue; /* not a &lib& */
+	else if (ulindex(lname,bp)!=NULL) { 
+	  if (ltitle[0] == '\0') {
+	    strncpy(ltitle,line,MAX_STR);
+	  }
+	  else {
+	    ltmp = strlen(ltitle);
+	    strncat(ltitle,",\n  ",MAX_STR-ltmp);
+	    strncat(ltitle,line,MAX_STR-ltmp-4);
+	  }
+	  cur_lib_p = get_lnames(bp+1, cur_lib_p);
+	}
+      }
+      else {
+	if (*bp!='+') continue;
+	else {
+	  bp++;
+	  if ((bp1 = strchr(bp,'+'))!=NULL) {
+	    *bp1='\0';
+	    for (ich = 0; ich<nch; ich++) {
+	      if (strcmp(llnames[ich],bp)==0) {
+		if (ltitle[0] == '\0') {
+		  strncpy(ltitle,line,MAX_STR);
+		}
+		else {
+		  ltmp = strlen(ltitle);
+		  strncat(ltitle,",\n  ",MAX_STR-ltmp);
+		  strncat(ltitle,line,MAX_STR-ltmp-4);
+		}
+		cur_lib_p = get_lnames(bp1+1, cur_lib_p);
+		break;
+	      }
+	    }
+	    *bp1='+';
+	  }
+	  else fprintf(stderr,"%s missing final '+'\n",bp);
+	}
+      }
+    }
+    fclose(fch);
+  }
+  return cur_lib_p;
+}
+
+/* unlike lib_select() and get_lnames(), add_file() returns a new
+   pointer, to which library files can be added
+*/
+struct lib_struct *
+add_file(char *fname, char *env, struct lib_struct *cur_lib_p)
+{
+  char tname[MAX_STR], *bp, *bp1;
+  char *lbptr;
+  int len, lenv, l_size;
+  struct lib_struct *this_lib_p;
+
+  /*  check for default directory for files  */
+  if (env != NULL && *env != '\0') lenv = strlen(env)+1;
+  else lenv = 0;
+
+  len=strlen(fname)+1+lenv;
+
+  if (lenv > 1 && *fname != '#') {	/* add default directory to file name */
+    strncpy(tname,env,sizeof(tname)-1);
+#ifdef UNIX
+    strcat(tname,"/");
+#endif
+    }
+  else tname[0]='\0';
+
+  /* get to the end of the current list */
+  while (cur_lib_p && cur_lib_p->next) {cur_lib_p = cur_lib_p->next;}
+
+  /* add fname to tname, allocate space, and move to space */
+  strncat(tname,fname,sizeof(tname)-strlen(tname)-1);
+  len=strlen(tname)+1;
+  if ((lbptr=calloc(len,sizeof(char)))==NULL) {
+    fprintf(stderr,"no more space for filenames: %s ignored\n",fname);
+    return cur_lib_p;
+  }
+  else {
+    strncpy(lbptr,tname,len);
+    lbptr[len-1]='\0';
+    /* have a file name to add, I need a lib_struct */
+    if ((this_lib_p = (struct lib_struct *)calloc(1,sizeof(struct lib_struct)))==NULL) {
+      fprintf (stderr,"*** Error -- Cannot allocate lib_struct for %s\n",tname);
+      return NULL;
+    }
+    else {
+      this_lib_p->file_name = lbptr;
+      if (cur_lib_p != NULL) {cur_lib_p->next = this_lib_p;}
+      return this_lib_p;
+    }
+  }
+}
+
+char *
+ulindex(char *str, char *chr)
+{
+  char c;
+ 
+  c = tolower((int)(*chr));
+
+  while (*str != '\0' && tolower(*str) !=c ) str++;
+  if (*str=='\0') return NULL;
+  else return str;
+}
diff --git a/src/list_db.c b/src/list_db.c
new file mode 100644
index 0000000..40cb97d
--- /dev/null
+++ b/src/list_db.c
@@ -0,0 +1,245 @@
+/* list_db.c - report values from map_db.c  */
+
+/* $Id: list_db.c $ */
+
+/* copyright (c) 1999, 2014 by William R. Pearson and The Rector &
+   Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* format of the index file:
+
+1)  map_db version number ["MP"+2 bytes]
+2)  number of sequences in database [4 bytes]
+3)  total length of database        [8 bytes]
+4)  longest sequence in database    [8 bytes]
+5) list of offsets to definitions  [num_seq+1] int*8
+6) list of offsets to sequences    [num_seq+1] int*8
+7) list of flag characters for sequences [num_seq+1] bytes
+    (used for GCG binary to encode 2bit or 4 bit representation)
+
+    sequence files will be as defined by their format
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "uascii.h"
+#include "mm_file.h"
+/* #include "ncbl2_head.h" */
+
+void src_int4_write(FILE *, int);
+void src_int4_read(FILE *, int *);
+void src_long4_read(FILE *, long *);
+void src_long8_write(FILE *, int64_t);
+void src_long8_read(FILE *, int64_t *);
+
+void newname(char *nname, char *oname, char *suff, int maxn);
+
+int
+main(int argc, char **argv)
+{
+  FILE *libi;
+  char lname[256];
+  char iname[256];
+  char format[4];
+  char *bp;
+
+  int i;
+  int d_pos;	/* start of description */
+  int s_pos;	/* start of sequence */
+  int attr;	/* sequence attribute */
+  int lib_aa;	/* 0 => DNA, 1 => protein */
+  int nlib;	/* number of entries */
+  fseek_t f_size;
+  long lf_size;	/* (long) version of f_size */
+  long max_len;	/* longest sequence */
+  MM_OFF tot_len;	/* total sequence length */
+  int n1;
+  
+  int lib_size;	/* current space available - may be realloc'ed */
+  int lib_inc;
+  int lib_type; /* 1 for protein, 0 for DNA */
+  int lib_dna;	/* dna=1; prot=0; */
+  fseek_t *d_pos_arr;	/* array of description pointers */
+  fseek_t *s_pos_arr;	/* array of description pointers */
+  long *tmp_pos_arr;
+  char *attr_arr;	/* array of attribute chars */
+
+  int mm64_flag;
+
+  lib_type = 0;
+  lib_dna = 0;
+
+  /* open the database */
+  if (argc > 1) strncpy(lname, argv[1],sizeof(lname));
+  else {
+    fprintf(stderr," Entry library name: ");
+    fgets(lname,sizeof(lname),stdin);
+    if ((bp=strchr(lname,'\n'))!=NULL) *bp='\0';
+  }
+    
+  if ((bp=strchr(lname,' '))!=NULL) {
+    lib_type = atoi(bp+1);
+    *bp='\0';
+  }
+  else lib_type = 0;
+
+  newname(iname,lname,"xin",sizeof(iname));
+
+  if ((libi=fopen(iname,"r"))==NULL) {
+    fprintf(stderr," cannot open %s\n",iname);
+    exit(1);
+  }
+
+  fread(format,1,sizeof(format),libi);
+  printf("%c%c%d %d\n",format[0],format[1],format[2],format[3]);
+  mm64_flag = (format[2]==1);
+
+  src_int4_read(libi,&lib_aa);
+
+  if (mm64_flag) src_long8_read(libi,&f_size);
+  else {
+    src_long4_read(libi,&lf_size);
+    f_size = lf_size;
+  }
+
+  src_int4_read(libi,&nlib);
+
+  if (mm64_flag) {
+    src_long8_read(libi,&tot_len);
+  }
+  else {
+    src_long4_read(libi,&lf_size);
+    tot_len = lf_size;
+  }
+  src_long4_read(libi,&max_len);
+
+  printf(" %d entries; tot: %lld; max: %ld\n",nlib,tot_len,max_len);
+
+  /* allocate array of description pointers */
+  if (!mm64_flag) {
+    if ((tmp_pos_arr=(long *)calloc(nlib+1,sizeof(long)))==NULL) {
+      fprintf(stderr," cannot allocate %d for tmp_pos array\n",
+	      nlib+1);
+    }
+  }
+
+  /* allocate array of description pointers */
+  if ((d_pos_arr=(fseek_t *)calloc(nlib+1, sizeof(fseek_t)))==NULL) {
+    fprintf(stderr," cannot allocate %d for desc. array\n",nlib+1);
+    exit(1);
+  }
+  /* allocate array of sequence pointers */
+  if ((s_pos_arr=(fseek_t *)calloc(nlib+1, sizeof(fseek_t)))==NULL) {
+    fprintf(stderr," cannot allocate %d for seq. array\n",nlib+1);
+    exit(1);
+  }
+  if ((attr_arr=(char *)calloc(nlib+1, sizeof(char)))==NULL) {
+    fprintf(stderr," cannot allocate %d for attr. array\n",nlib+1);
+    exit(1);
+  }
+  
+  if (mm64_flag) {
+    for (i=0; i<=nlib; i++) src_long8_read(libi,&d_pos_arr[i]);
+    for (i=0; i<=nlib; i++) src_long8_read(libi,&s_pos_arr[i]);
+  }
+  else {
+    for (i=0; i<=nlib; i++) src_long4_read(libi,&tmp_pos_arr[i]);
+    for (i=0; i<=nlib; i++) d_pos_arr[i] = tmp_pos_arr[i];
+    for (i=0; i<=nlib; i++) src_long4_read(libi,&tmp_pos_arr[i]);
+    for (i=0; i<=nlib; i++) s_pos_arr[i] = tmp_pos_arr[i];
+  }
+
+  fread(attr_arr,nlib+1,sizeof(char),libi);
+  fclose(libi);
+
+  printf("header\tseq\n");
+
+  for (i=0; i<nlib; i++) printf("%lld\t%lld\n",d_pos_arr[i],s_pos_arr[i]);
+
+  exit(0);
+}
+
+void src_int4_read(FILE *fd,  int *val)
+{
+  int tval;
+#ifdef IS_BIG_ENDIAN
+  fread(&tval,(size_t)4,(size_t)1,fd);
+  *val = tval;
+#else
+  unsigned char b[4];
+
+  fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+  *val = 0;
+  *val = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
+	  +(int)b[3];
+#endif
+}
+
+void src_long4_read(FILE *fd,  long *val)
+{
+  int tval;
+#ifdef IS_BIG_ENDIAN
+  fread(&tval,(size_t)4,(size_t)1,fd);
+  *val = tval;
+#else
+  unsigned char b[4];
+
+  fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+  *val = 0;
+  *val = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
+	  +(int)b[3];
+#endif
+}
+
+void src_long8_read(FILE *fd,  int64_t *val)
+{
+#ifdef IS_BIG_ENDIAN
+  fread((char *)val,(size_t)8,(size_t)1,fd);
+#else
+  unsigned char b[8];
+
+  fread((char *)&b[0],(size_t)1,(size_t)8,fd);
+  *val = 0;
+  *val = (int)
+    ((((((((int)b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)+(int)b[3]<<8)+
+		(int)b[4]<<8)+(int)b[5]<<8)+(int)b[6]<<8)+(int)b[7];
+#endif
+}
+
+void src_int4_write(FILE *fd,  int val)
+{
+#ifdef IS_BIG_ENDIAN
+  fwrite(&val,(size_t)4,(size_t)1,fd);
+#else
+  unsigned char b[4];
+
+  b[3] = val & 255;
+  b[2] = (val=val>>8)&255;
+  b[1] = (val=val>>8)&255;
+  b[0] = (val=val>>8)&255;
+
+  fwrite(b,(size_t)1,(size_t)4,fd);
+#endif
+}
+
+void
+newname(char *nname, char *oname, char *suff, int maxn)
+{
+  strncpy(nname,oname,maxn-1);
+  strncat(nname,".",1);
+  strncat(nname,suff,maxn-strlen(nname));
+}
diff --git a/src/llgetaa.c b/src/llgetaa.c
new file mode 100644
index 0000000..4e1910a
--- /dev/null
+++ b/src/llgetaa.c
@@ -0,0 +1,497 @@
+/* $Id: llgetaa.c 625 2011-03-23 17:21:38Z wrp $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2007 by William R. Pearson and
+   The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/*
+   Feb, 1998 - version for prss 
+
+   March, 2001 - modifications to support comp_thr.c: use libpos to indicate
+   whether the score is shuffled==1 or unshuffled==0.  This simplifies
+   complib.c and makes comp_thr.c possible
+
+   modified version of nxgetaa.c that generates random sequences
+   for a library
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+#include "mm_file.h"
+
+#include "uascii.h"
+#include "structs.h"
+
+#define XTERNAL
+#include "upam.h"
+#undef XTERNAL
+
+#define YES 1
+#define NO 0
+#define MAXLINE 512
+
+#ifndef min
+#define min(x,y) ((x) > (y) ? (y) : (x))
+#endif
+
+static int use_stdin=0;
+static char llibstr0[256];
+static char llibstr1[256];
+static char o_line[256];
+
+#define NO_FORMAT 0
+#define FASTA_FORMAT 1
+#define GCG_FORMAT 2
+static int seq_format=NO_FORMAT;
+static char seq_title[200];
+
+extern int irand(int);
+extern void shuffle(unsigned char *from, unsigned char *to, int n);
+extern void wshuffle(unsigned char *from, unsigned char *to, int n, int wsiz, int *ieven);
+
+int
+getseq(char *filen, int *qascii,
+       unsigned char *seq, int maxs, char *libstr,
+       int n_libstr, long *sq0off)
+{
+  FILE *fptr;
+  char line[512],*bp;
+  int i, j, n;
+  int ic;
+  int sstart, sstop, sset=0;
+  int have_desc = 0;
+  int desc_complete = 0;
+  int llen, l_offset;
+
+  seq_title[0]='\0';
+
+  sstart = sstop = -1;
+#ifndef DOS
+  if ((bp=strchr(filen,':'))!=NULL) {
+#else
+  if ((bp=strchr(filen+3,':'))!=NULL) {
+#endif
+    *bp='\0';
+    if (*(bp+1)=='-') sscanf(bp+2,"%d",&sstop);
+    else sscanf(bp+1,"%d-%d",&sstart,&sstop);
+    sset=1;
+  }
+
+  if (strcmp(filen,"-") && strcmp(filen,"@")) {
+    if ((fptr=fopen(filen,"r"))==NULL) {
+      fprintf(stderr," could not open %s\n",filen);
+      return 0;
+    }
+  }
+  else {
+    fptr = stdin;
+    use_stdin++;
+  }
+
+  if (use_stdin > 1) {
+    have_desc = 1;
+    if ((bp=strchr(o_line,'\001'))!=NULL) *bp='\0';
+    strncpy(llibstr1,o_line,sizeof(llibstr1));
+    strncpy(libstr,o_line,n_libstr);
+    libstr[n_libstr-1]='\0';
+    l_offset = 0;
+  }
+
+  if (sset==1) {
+    filen[strlen(filen)]=':';
+    if (*sq0off==1 || sstart>1) *sq0off = sstart;
+  }
+
+  desc_complete = 0;
+  n=0;
+  while(fgets(line,sizeof(line),fptr)!=NULL) {
+    if (line[0]=='>') {
+      if (have_desc) {
+	strncpy(o_line,line,sizeof(o_line));
+	goto last;
+      }
+      l_offset = 0;
+      seq_format = FASTA_FORMAT;
+
+      if ((bp=(char *)strchr(line,'\n'))!=NULL) {
+	*bp='\0';				/* have newline */
+	desc_complete = 1;
+      }
+
+      if ((bp=strchr(line+1,'\001'))!=NULL) *bp='\0';
+      strncpy(seq_title,line+1,sizeof(seq_title));
+      strncpy(llibstr0,line+1,sizeof(llibstr0));
+      if (n_libstr <= 20) {
+	if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
+      }
+      strncpy(libstr,line+1,n_libstr);
+      libstr[n_libstr-1]='\0';
+
+      if (!desc_complete) {
+	while (fgets(line, sizeof(line), fptr) != NULL) {
+	  if (strchr(line,'\n') != NULL) {
+	    line[0]='>';
+	    break;
+	  }
+	}
+	desc_complete = 1;
+      }
+    }
+    else if (seq_format==NO_FORMAT) {
+      seq_format = GCG_FORMAT;
+      qascii['*'] = qascii['X'];
+      l_offset = 10;
+      llen = strlen(line);
+      while (strncmp(&line[llen-3],"..\n",(size_t)3) != 0) {
+	if (fgets(line,sizeof(line),fptr)==NULL) return 0;
+	llen = strlen(line);
+      }
+      if (n_libstr <= 20) {
+	if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
+	else if ((bp=(char *)strchr(line,'\n'))!=NULL) *bp='\0';
+      }
+      strncpy(libstr,line,n_libstr);
+      libstr[n_libstr-1]='\0';
+      if (fgets(line,sizeof(line),fptr)==NULL) return 0;
+    }
+
+    if (seq_format==GCG_FORMAT && strlen(line)<l_offset) continue;
+
+    if (line[0]!='>'&& line[0]!=';') {
+      for (i=l_offset; (n<maxs)&&
+	     ((ic=qascii[line[i]&AAMASK])<EL); i++)
+	if (ic<NA) seq[n++]= ic;
+      if (ic == ES) break;
+    }
+    else {
+      if (have_desc) {
+	strncpy(o_line,line,sizeof(o_line));
+	goto last;
+      }
+      else {
+	have_desc = 1;
+      }
+    }
+  }
+
+ last:
+  if (n==maxs) {
+    fprintf(stderr," sequence may be truncated %d %d\n",n,maxs);
+    fflush(stderr);
+  }
+  if ((bp=strchr(libstr,'\n'))!=NULL) *bp = '\0';
+  if ((bp=strchr(libstr,'\r'))!=NULL) *bp = '\0';
+  seq[n]= EOSEQ;
+
+  if (fptr!=stdin) fclose(fptr);
+
+  if (sset) {
+    if (sstart <= 0) sstart = 1;
+    if (sstop <= 0) sstop = n;
+    sstart--;
+    sstop--;
+    for (i=0, j=sstart; j<=sstop; i++,j++)
+      seq[i] = seq[j];
+    n = sstop - sstart +1;
+    seq[n]=EOSEQ;
+  }
+
+  return n;
+}
+
+int
+gettitle(filen,title,len)
+  char *filen, *title; int len;
+{
+  FILE *fptr;
+  char line[512];
+  char *bp;
+  int ll,sset;
+#ifdef WIN32
+  char *strpbrk();
+#endif
+  sset = 0;
+
+  if (use_stdin) {
+    if (use_stdin == 1) {
+      /*      use_stdin++; */
+      strncpy(title,llibstr0,len);
+    }
+    else {
+      strncpy(title,llibstr1,len);
+    }
+    if ((bp=strchr(title,'\001'))!=NULL) *bp='\0';
+    return strlen(title);
+  }
+
+  if ((bp=strchr(filen,':'))!=NULL) { *bp='\0'; sset=1;}
+	  
+  if ((fptr=fopen(filen,"r"))==NULL) {
+    fprintf(stderr," file %s was not found\n",filen);
+    fflush(stderr);
+    return 0;
+  }
+
+  if (sset==1) filen[strlen(filen)]=':';
+
+  while(fgets(line,sizeof(line),fptr)!=0) {
+    if (line[0]=='>'|| line[0]==';') goto found;
+  }
+  fclose(fptr);
+  title[0]='\0';
+  return 0;
+
+ found:
+  if ((bp=strchr(line,'\001'))!=NULL) *bp = 0;
+#ifdef WIN32
+  bp = strpbrk(line,"\n\r");
+#else
+  bp = strchr(line,'\n');
+#endif
+  if (bp!=NULL) *bp = 0;
+  strncpy(title,line,len);
+  title[len-1]='\0';
+  fclose(fptr);
+  return strlen(title);
+}	
+
+FILE *libf=NULL;
+
+long lpos;
+char lline[MAXLINE];
+int lfflag=0;	/* flag for CRLF in EMBL CDROM files */
+#define LFCHAR '\015'  /* for MWC 5.5 */
+
+int agetlib(); void aranlib();	/* pearson fasta format */
+
+/*	the following is from fgetgb.c */
+
+/* a file name for open_lib may now include a library type suffix */
+/* only opens fasta format files */
+
+static char libn_save[MAX_FN];
+static int ldna_save=0;
+static int do_shuffle;
+static int shuff_cnt=10;
+static int w_flag = 0;
+#ifdef DEBUG
+static FILE *dfile=NULL;
+#endif
+static unsigned char *aa_save;
+static int n1_save;
+static int i_even;
+
+/* lmf_str * is used here for compatibility with the "normal" open_lib,
+   but is largely unnecessary */
+
+void 
+set_shuffle(struct mngmsg m_msg) {
+  char dfname[MAX_FN];
+
+  if (m_msg.shuff_wid > 0) w_flag = m_msg.shuff_wid;
+  if (m_msg.shuff_max > shuff_cnt) shuff_cnt = m_msg.shuff_max;
+
+#ifdef DEBUG
+  if (m_msg.dfile[0]!='\0') {
+    strncpy(dfname,m_msg.dfile,sizeof(dfname));
+    strncat(dfname,"_rlib",sizeof(dfname));
+    dfile = fopen(dfname,"w");
+  }
+#endif
+}
+
+struct lmf_str *
+open_lib(char *lname, int ldnaseq, int *sascii, int quiet, struct lmf_str *m_fd)
+{
+  char rline[10],libn[MAX_FN], *bp;
+  int wcnt, ll, opnflg;
+  int libtype;
+  struct lmf_str *m_fptr;
+
+  wcnt = 0;
+  libtype = 0;
+
+  strncpy(libn_save,lname,sizeof(libn_save));
+
+  /* now allocate a buffer for the opened text file */
+  if ((m_fptr = calloc(1,sizeof(struct lmf_str)))==NULL) {
+    fprintf(stderr," cannot allocate lmf_str (%ld) for %s\n",
+	    sizeof(struct lmf_str),lname);
+    return NULL;
+  }
+
+  strncpy(m_fptr->lb_name,lname,MAX_FN);
+  m_fptr->lb_name[MAX_FN-1]='\0';
+
+  m_fptr->sascii = sascii;
+  m_fptr->getlib = agetlib;
+  m_fptr->ranlib = aranlib;
+  m_fptr->mm_flg = 0;
+
+  do_shuffle = 0;
+  irand(0);		/* initialize the random number generator */
+
+  return m_fptr;
+}
+
+void
+closelib()
+{
+  if (libf!=NULL) {
+    fclose(libf);
+    libf = NULL;
+  }
+#ifdef DEBUG
+  if (dfile) fclose(dfile);
+#endif
+}
+
+static int ieven=0;
+static char *desc_save;
+
+int
+agetlib(unsigned char *seq, 
+	int maxs,
+	char *libstr,
+	int n_libstr,
+	fseek_t *libpos,
+	int *lcont, 
+	struct lmf_str *lf_fd,
+	long *l_off)
+{
+  long sq1_off;
+  char lib_desc[120];
+  int i;
+
+  *l_off = 1;
+
+  if (!do_shuffle) {
+    do_shuffle = 1;
+    
+    if ((n1_save = getseq(libn_save,lf_fd->sascii,
+			  seq,maxs,lib_desc,sizeof(lib_desc),&sq1_off)) < 1)
+      return n1_save;
+
+    strncpy(libstr,lib_desc,n_libstr);
+    libstr[n_libstr-1]='\0';
+
+    if ((aa_save = (unsigned char *)calloc(n1_save+1,sizeof(unsigned char)))==
+	NULL) fprintf(stderr," cannot allocate %d for saved sequence\n",
+		       n1_save);
+    memcpy((void *)aa_save,(void *)seq,n1_save);
+
+    if ((desc_save =
+	 (char *)calloc(strlen(lib_desc)+1,sizeof(char)))== NULL) {
+      fprintf(stderr," cannot allocate saved desciption [%d]\n",
+	      strlen(lib_desc)+1);
+    }
+    else {
+      strncpy (desc_save,lib_desc,strlen(lib_desc));
+      desc_save[strlen(lib_desc)]=='\0';
+    }
+
+    *libpos = 0;
+    return n1_save;
+  }
+  else {	/* return a shuffled sequence - here we need a window size; */
+    strncpy(libstr,desc_save,n_libstr);
+    libstr[n_libstr-1]='\0';
+
+    if (shuff_cnt-- <= 0 ) return -1;
+    if (w_flag > 0) wshuffle(aa_save,seq,n1_save,w_flag,&ieven);
+    else shuffle(aa_save,seq,n1_save);
+    seq[n1_save] = EOSEQ;
+#ifdef DEBUG
+    if (dfile!=NULL) {
+      fprintf(dfile,">%d\n",shuff_cnt);
+      for (i=0; i<n1_save; i++) {
+	if (aa[seq[i]]>0) fputc(aa[seq[i]],dfile);
+	else {fprintf(stderr,"error aa0[%d]: %d %d\n",
+		      i,seq[i],aa[seq[i]]);}
+	if (i%60 == 59) fputc('\n',dfile);
+      }
+      fputc('\n',dfile);
+    }
+#endif
+    *libpos = 1;
+    return n1_save;
+  }
+}
+
+void
+aranlib(char *str,
+	int cnt,
+	fseek_t seek,
+	char *libstr,
+	struct lmf_str *lm_fd)
+{
+  char *bp;
+  int ll;
+
+  if (use_stdin == 2) {
+    if (llibstr1[0]=='>' || llibstr1[0]==';') {
+      strncpy(str,llibstr1+1,cnt);
+    }
+    else {
+      strncpy(str,llibstr1,cnt);
+    }
+  }
+  else {
+    strncpy(str,desc_save,cnt);
+  }
+  str[cnt-1]='\0';
+  if ((bp = strchr(str,'\001'))!=NULL) *bp='\0';
+  else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+  else str[cnt-1]='\0';
+}
+
+/*
+void
+revcomp(unsigned char *seq, int n, int *c_nt)
+{
+  unsigned char tmp;
+  int i, ni;
+
+
+  for (i=0, ni = n-1; i< n/2; i++,ni--) {
+    tmp = c_nt[seq[i]];
+    seq[i] = c_nt[seq[ni]];
+    seq[ni] = tmp;
+  }
+  if ((n%2)==1) {
+    i = n/2;
+    seq[i] = c_nt[seq[i]];
+  }
+}
+*/
+
+struct lmf_str *
+re_openlib(struct lmf_str *om_fptr, int outtty)
+{
+  return om_fptr;
+}
+
+int re_getlib(unsigned char *aa1, int n1, int maxt3, int loff, int cont,
+	      int term_code, long *loffset, long *l_off,
+	      struct lmf_str *m_file_p)
+{
+  *loffset = 0;
+  *l_off = 1;
+  return n1;
+}
+
diff --git a/src/lsim4.c b/src/lsim4.c
new file mode 100644
index 0000000..cb0a016
--- /dev/null
+++ b/src/lsim4.c
@@ -0,0 +1,998 @@
+/*
+  lsim4.c - calculate non-overlapping local alignments
+  
+  derived from lsim2.c from Webb Miller
+*/
+
+/* $Id: lsim4.c 1070 2012-10-12 16:44:16Z wrp $ */
+/* $Revision: 1070 $  */
+
+/*  March 2007 - modified to avoid global references  */
+
+/* October, 2008 - modified following changes from Xiaoqui Huang to
+   prevent alignments from crossing the identity diagonal during
+   self-comparison */
+
+/* October 27, 2008 - modified to free pair_ptr memory more reliably
+   (it is still imperfect) */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+#include "param.h"
+
+#include "lsim4.h"
+
+/* SIM(A,B,M,N,mini_score,Q,R) reports best non-intersecting alignments with
+   score >= mini_score of the segments of A and B in order of similarity
+   scores, where pam2[a][b] is the score of aligning a and b, and
+   -(Q+R*i) is the score of an i-symbol indel.  */
+
+/* SIM uses A[1..M], B[1..N] FORTRAN indexing */
+
+void SIM(const unsigned char *A, /* seq1 indexed A[1..M] */
+	 const unsigned char *B, /* seq2 indexed B[1..N] */
+	 int M, int N,		 /* len seq1, seq2 */
+	 struct pstruct *ppst,	/* parameters */
+	 int nseq,		 /* nseq - number of different sequences */
+	 int mini_score,	 /* cut-off score */
+	 int max_count,		 /* number of alignments */
+	 struct a_res_str *a_res)	/* alignment result structure */
+{
+  int endi, endj, stari, starj;	 /* endpoint and startpoint */ 
+  int score;  			 /* the max score in LIST */
+  int ck;
+  int  i;		 /* row and column indices */
+
+  int flag;
+  struct a_res_str *cur_ares, *tmp_ares;
+
+  bool first_pass;			
+  int q, r, qr;
+
+  int *sapp, last;
+  struct vert_str vLIST;
+
+  struct l_struct *l_ptr;
+  pair_ptr z, z_save;
+  int count;			 /* maximum size of list */	
+  vertex_p v_cur;		 /* temporary pointer */
+
+  /* allocate space for all vectors */
+
+  l_ptr = (struct l_struct *)ckalloc(1, sizeof(struct l_struct));
+
+  l_ptr->CCC = ( space_ptr ) ckalloc(N+1,sizeof(space));
+
+  l_ptr->r_ss = (struct lrr_str *) ckalloc(N + 1,sizeof(struct lrr_str));
+
+  l_ptr->c_ss = (struct lcc_str *) ckalloc(M + 1, sizeof(struct lcc_str));
+
+  l_ptr->row = ( pair_ptr * ) ckalloc( M + 1, sizeof(pair_ptr));
+
+  /* set up list for each row */
+  if ( nseq == 2 ) {
+    for ( i = 1; i <= M; i++ ) {l_ptr->row[i] = NULL;}
+  }
+  else {
+    for ( i = 1; i <= M; i++ ) {
+      l_ptr->row[i] = z = (pair_ptr) ckalloc(1, sizeof(pair));
+      z->COL = i;
+      z->NEXT = NULL;
+    }
+  }
+
+#ifdef OLD_FASTA_GAP
+  q = -(ppst->gdelval - ppst->ggapval);
+#else
+  q = -ppst->gdelval;
+#endif
+
+  r = -ppst->ggapval;
+
+  qr = q + r;
+
+  vLIST.LIST = vLIST.most = NULL;
+  vLIST.numnode = 0;
+
+  /* fill in l_ptr->CCC and vLIST */
+  big_pass(A,B,M,N,mini_score,ppst->pam2[0],q,r,nseq, &vLIST, l_ptr);
+  first_pass= 1;
+
+  /* Report the K best alignments one by one. After each alignment is
+     output, recompute part of the matrix. First determine the size
+     of the area to be recomputed, then do the recomputation         */
+
+  count = 0;
+  while (count < max_count) {
+    if ( vLIST.numnode == 0 ) break;	/* no more alignments */
+
+    if (first_pass) {
+      cur_ares = a_res;
+    }
+    else {	/* need a new a_res */
+      tmp_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
+      cur_ares->next = tmp_ares;
+      cur_ares = tmp_ares;
+    }
+
+    /* get the best next alignment */
+    v_cur = findmax(&vLIST);
+
+    score = v_cur->SCORE;
+    stari = ++v_cur->STARI;
+    starj = ++v_cur->STARJ;
+    endi = v_cur->ENDI;
+    endj = v_cur->ENDJ;
+
+    l_ptr->m1 = v_cur->TOP;
+    l_ptr->mm = v_cur->BOT;
+    l_ptr->n1 = v_cur->LEFT;
+    l_ptr->nn = v_cur->RIGHT;
+
+    l_ptr->rl = endi - stari + 1;
+    l_ptr->cl = endj - starj + 1;
+
+    l_ptr->I = stari - 1;
+    l_ptr->J = starj - 1;
+
+    /* minimum allocation for alignment */
+
+    sapp = cur_ares->res =(int *)calloc(2*min(l_ptr->rl,l_ptr->cl), sizeof(int));
+    last = 0;
+
+    cur_ares->n1 = N;
+    cur_ares->sw_score = cur_ares->rst.score[ppst->score_ix] = score;
+    cur_ares->min0 = stari-1;
+    cur_ares->min1 = starj-1;
+    cur_ares->max0 = stari+l_ptr->rl-1;
+    cur_ares->max1 = starj+l_ptr->cl-1;
+    cur_ares->next = NULL;
+
+    /* produce an alignment, encoded in sapp - equivalent to "align() in dropgsw.c" */
+    (void) diff(&A[stari]-1, &B[starj]-1, l_ptr->rl, l_ptr->cl,
+		q, q, (nseq == 2),ppst->pam2[0], q, r, &sapp, &last, l_ptr);
+
+    ck = CHECK_SCORE(&A[stari]-1,&B[starj]-1,l_ptr->rl,l_ptr->cl,
+		     cur_ares->res,ppst->pam2[0],q,r,&cur_ares->nres);
+
+#ifdef DEBUG
+    /* the same errors are produced by Miller and Huang's sim96 code, so I hope 
+       this reflects a mistake in CHECK_SCORE */
+
+    if (score != ck) {
+      fprintf(stderr,"*** Check_score error: orig %d != %d recons ***\n aa0[%d-%d] : aa1[%d-%d]\n",
+	      score,ck, cur_ares->min0, cur_ares->max0, cur_ares->min1, cur_ares->max1);
+    }
+#endif
+
+    free(v_cur);
+
+    flag = 0;
+    if (first_pass && maxi(l_ptr->rl, l_ptr->cl) > maxi(M,N)/4) {
+      /*printf("no locate\n");*/
+      flag = 1; l_ptr->n1 = l_ptr->m1 = 0; 
+    } 
+    else {
+      locate(A,B,mini_score,ppst->pam2[0],q,r, nseq, &flag, &vLIST, l_ptr);
+    }
+    if ( flag ) {
+      /*printf("small pass\n");*/
+      small_pass(A,B,mini_score,ppst->pam2[0],q,r, nseq, &vLIST, l_ptr);
+    }
+    first_pass= 0;
+    count++;
+  }
+  /* start cleaning up */
+
+  while (vLIST.numnode > 0) {
+    v_cur = findmax(&vLIST);
+    if (v_cur) free(v_cur);
+  }
+
+  for (i=M; i>0; i--) {
+    if ((z=l_ptr->row[i]) != NULL) {
+      for (z= l_ptr->row[i], z_save = z->NEXT; z_save != NULL; z = z_save) {
+	z_save = z->NEXT;
+	free(z);
+      }
+    }
+  }
+
+  free(l_ptr->row);
+  free(l_ptr->c_ss);
+  free(l_ptr->r_ss);
+  free(l_ptr->CCC);
+  free(l_ptr);
+}
+
+/* A big pass to compute classes scoring over K */
+/* fills in the l_ptr->CCC structure for further analysis */
+/* adds nodes to v_ptr->LIST when appropriate */
+/* does not produce alignments */
+
+static void
+big_pass(const unsigned char *A,
+	 const unsigned char *B,
+	 int M, int N,
+	 int mini_score,
+	 int **pam2,
+	 int Q, int R,
+	 int nseq,
+	 struct vert_str *v_ptr,
+	 struct l_struct *l_ptr)
+{
+  int  i, j;		/* row and column indices */
+  int  c;		/* best score at current point */
+  int  f;		/* best score ending with insertion */
+  int  d;		/* best score ending with deletion */
+  int  p;		/* best score at (i-1, j-1) */
+  int  ci, cj;		/* end-point associated with c */
+  int  fi, fj;		/* end-point associated with f */
+  int  pi, pj;		/* end-point associated with p */
+  space_ptr sp;
+  pair_ptr z;
+  int qr;
+  int  *va;				/* pointer to v(A[i], B[j]) */
+
+  qr = Q+R;
+
+  /* Compute the matrix and save the best scores in LIST
+     CC : the scores of the current row
+     RR and EE : the starting point that leads to score CC
+     DD : the scores of the current row, ending with deletion
+     SS and FF : the starting point that leads to score DD
+  */
+
+  /* Initialize the 0 th row */
+  for ( sp=&l_ptr->CCC[1], j = 1; j <= N; j++, sp++ ) {
+    sp->CC = sp->RR = 0;
+    sp->EE = j;
+    sp->DD = - (qr);
+    sp->SS = 1;
+    sp->FF = j;
+  }
+
+  for ( i = 1; i <= M; i++) {
+    c = 0;				/* Initialize column 0 */
+    f = - (qr);
+    va = pam2[A[i]];
+    ci = fi = i;
+    if ( nseq == 2 ) {
+      p = 0;			/* score of current row */
+      pi = (i - 1);		/* starting point */
+      cj = fj = pj = 0;		/* pj starting point */
+    }
+    else {
+      p = l_ptr->CCC[i].CC;/* score of current row */
+      pi = l_ptr->CCC[i].RR;	/* starting point */
+      pj = l_ptr->CCC[i].EE;	/* starting point */
+      cj = fj = i;
+    }
+    j = (nseq == 2 ? 1: i+1);
+    for ( sp = &l_ptr->CCC[j]; sp <= &l_ptr->CCC[N]; j++, sp++) {
+		   
+      d = sp->DD;
+      c = -1;
+      /* assign p+va[B[j]] to c if i, j not part of path */
+      DIAG(i, j, c, p+va[B[j]])		/* diagonal */
+      if (c < 0) {
+	p = sp->CC; pi = sp->RR; pj = sp->EE;
+	if (f >= 0) {
+	  c = f; ci = fi; cj = fj;
+	  /* replace ci, cj with sp->SS, sp->FF if c < d */
+	  ORDER1(c, ci, cj,  d, sp->SS, sp->FF)
+	  sp->CC = c; sp->RR = ci; sp->EE = cj;
+	  sp->DD -= R; f-=R;
+	} else if (d >= 0) {
+	  sp->CC = d; sp->RR = sp->SS; sp->EE = sp->FF;  
+	  sp->DD -= R;
+	} else {
+	  sp->CC = 0; sp->RR=i; sp->EE = j;
+	}
+      } else { 
+	ci = pi; cj = pj;
+	ORDER1(c, ci, cj,  f, fi, fj)
+	ORDER1(c, ci, cj,  d, sp->SS, sp->FF)
+	p = sp->CC;
+	sp->CC = c;
+	pi = sp->RR;
+	sp->RR = ci;
+	pj = sp->EE;
+	sp->EE = cj;
+	f -= R;
+	if (c >= qr) {
+	  if ( c > mini_score) {	/* add the score into list */
+	    addnode(c, ci, cj, i, j, v_ptr);
+	  }
+	  d -= R; c -= qr;
+	  ORDER1(f, fi, fj, c, ci, cj)
+	  ORDER1(d, sp->SS, sp->FF, c, ci, cj)
+	  sp->DD = d;
+	} else {
+	  sp->DD -= R;
+	}
+      }
+    }
+  }
+}
+
+/* Determine the left and top boundaries of the recomputed area */
+/* this function is not recursive */
+
+static void
+locate(const unsigned char *A,
+       const unsigned char *B,
+       int mini_score,
+       int **pam2, int Q, int R,
+       int nseq,
+       int *flag_p,
+       struct vert_str *v_ptr,
+       struct l_struct *l_ptr) {
+  int  i, j;		/* row and column indices */
+  int  c;		/* best score at current point */
+  int  f;		/* best score ending with insertion */
+  int  d;		/* best score ending with deletion */
+  int  p;		/* best score at (i-1, j-1) */
+  int  ci, cj;		/* end-point associated with c */ 
+  int  di, dj;
+  int  fi, fj;		/* end-point associated with f */
+  int  pi, pj;		/* end-point associated with p */
+
+  space_ptr sp;
+  pair_ptr z;
+  bool  cflag, rflag;			/* for recomputation */
+  int  *va;				/* pointer to v(A[i], B[j]) */
+  int  limit;				/* the bound on j */
+  int qr; 
+
+  qr = Q + R;
+
+  /* Reverse pass
+     rows come from CCC
+     CC : the scores on the current row
+     RR and EE : the endpoints that lead to CC
+     DD : the deletion scores 
+     SS and FF : the endpoints that lead to DD
+
+     columns come from c_ss[]
+     HH : the scores on the current columns
+     II and JJ : the endpoints that lead to HH
+     WW : the deletion scores
+     XX and YY : the endpoints that lead to WW
+  */
+
+  for ( j = l_ptr->nn; j >= l_ptr->n1 ; j-- ) {
+    l_ptr->CCC[j].CC = 0;
+    l_ptr->CCC[j].EE = j;
+    l_ptr->CCC[j].DD = - (Q);
+    l_ptr->CCC[j].FF = j;
+    if ( nseq == 2 || j > l_ptr->mm )
+      l_ptr->CCC[j].RR = l_ptr->CCC[j].SS = l_ptr->mm + 1;
+    else
+      l_ptr->CCC[j].RR = l_ptr->CCC[j].SS = j;
+  }
+
+  for ( i = l_ptr->mm; i >= l_ptr->m1; i-- )  {
+    c = p = 0;
+    f = - (Q);
+    ci = fi = i;
+    pi = i + 1;
+    cj = fj = pj = l_ptr->nn + 1;
+    va = pam2[A[i]];
+    if ( nseq == 2 || l_ptr->n1 > i ) limit = l_ptr->n1;
+    else limit = i + 1;
+
+    for ( j = l_ptr->nn, sp = &l_ptr->CCC[j]; j >= limit ; j--, sp-- ) {
+      f = f - R;
+      c = c - qr;
+      ORDER(f, fi, fj, c, ci, cj)
+      c = sp->CC - qr; 
+      d = sp->DD - R;
+      ORDER(d, sp->SS, sp->FF, c, sp->RR, sp->EE)
+      c = 0;
+      DIAG(i, j, c, p+va[B[j]])		/* diagonal */
+      if ( c <= 0 ) { c = 0; ci = i; cj = j; }
+      else  { ci = pi; cj = pj; }
+      ORDER1(c, ci, cj, d, sp->SS, sp->FF)
+      ORDER1(c, ci, cj, f, fi, fj)
+      p = sp->CC;
+      sp->CC = c;
+      pi = sp->RR;
+      pj = sp->EE;
+      sp->RR = ci;
+      sp->EE = cj;
+      sp->DD = d;
+      if ( c > mini_score ) *flag_p = 1;
+    }
+
+    if ( nseq == 2 || i < l_ptr->n1 ) {
+      l_ptr->c_ss[i].HH = l_ptr->CCC[l_ptr->n1].CC;
+      l_ptr->c_ss[i].II = l_ptr->CCC[l_ptr->n1].RR;
+      l_ptr->c_ss[i].JJ = l_ptr->CCC[l_ptr->n1].EE;
+      l_ptr->c_ss[i].WW = f;
+      l_ptr->c_ss[i].XX = fi;
+      l_ptr->c_ss[i].YY = fj;
+    }
+  }
+      
+  for ( l_ptr->rl = l_ptr->m1, l_ptr->cl = l_ptr->n1; ; ) {
+    for ( rflag = cflag = 1; ( rflag && l_ptr->m1 > 1 ) || ( cflag && l_ptr->n1 > 1 ) ;  ) {
+      if ( rflag && l_ptr->m1 > 1 ) {	/* Compute one row */
+	rflag = 0;
+	l_ptr->m1--;
+	c = p = 0;
+	f = - (Q);
+	ci = fi = l_ptr->m1;
+	pi = l_ptr->m1 + 1;
+	cj = fj = pj = l_ptr->nn + 1;
+	va = pam2[A[l_ptr->m1]];
+	for ( j = l_ptr->nn, sp = &l_ptr->CCC[j]; j >= l_ptr->n1 ; j--, sp-- ) {
+	  f = f - R;
+	  c = c - qr;
+	  ORDER(f, fi, fj, c, ci, cj)
+	  c = sp->CC - qr; 
+	  ci = sp->RR;
+	  cj = sp->EE;
+	  d = sp->DD - R;
+	  di = sp->SS;
+	  dj = sp->FF;
+	  ORDER(d, di, dj, c, ci, cj)
+          c = 0;
+	  DIAG(l_ptr->m1, j, c, p+va[B[j]])		/* diagonal */
+	  if ( c <= 0 ) { c = 0; ci = l_ptr->m1; cj = j; }
+	  else { ci = pi; cj = pj; }
+	  ORDER1(c, ci, cj, d, di, dj)
+	  ORDER1(c, ci, cj, f, fi, fj)
+	  sp->SS = di;
+	  sp->FF = dj;
+	  p = sp->CC;
+	  sp->CC = c;
+	  pi = sp->RR;
+	  pj = sp->EE;
+	  sp->RR = ci;
+	  sp->EE = cj;
+	  sp->DD = d;
+	  if ( c > mini_score ) *flag_p = 1;
+	  if ( ! rflag && ( (ci > l_ptr->rl && cj > l_ptr->cl) || (di > l_ptr->rl && dj > l_ptr->cl) || (fi > l_ptr->rl && fj > l_ptr->cl) ) ) rflag = 1;
+	}
+
+	l_ptr->c_ss[l_ptr->m1].HH = l_ptr->CCC[l_ptr->n1].CC;
+	l_ptr->c_ss[l_ptr->m1].II = l_ptr->CCC[l_ptr->n1].RR;
+	l_ptr->c_ss[l_ptr->m1].JJ = l_ptr->CCC[l_ptr->n1].EE;
+	l_ptr->c_ss[l_ptr->m1].WW = f;
+	l_ptr->c_ss[l_ptr->m1].XX = fi;
+	l_ptr->c_ss[l_ptr->m1].YY = fj;
+
+	if ( ! cflag && ( (ci > l_ptr->rl && cj > l_ptr->cl) || (di > l_ptr->rl && dj > l_ptr->cl)  || (fi > l_ptr->rl && fj > l_ptr->cl ) )) cflag = 1;
+      }
+
+      if ( nseq == 1 && l_ptr->n1 == (l_ptr->m1 + 1) && ! rflag ) cflag = 0;
+      if ( cflag && l_ptr->n1 > 1 ) {	/* Compute one column */
+	cflag = 0;
+	l_ptr->n1--;
+	c = 0;
+	f = - (Q);
+	cj = fj = l_ptr->n1;
+	va = pam2[B[l_ptr->n1]];
+	if ( nseq == 2 || l_ptr->mm < l_ptr->n1 ) {
+	  p = 0;
+	  ci = fi = pi = l_ptr->mm + 1;
+	  pj = l_ptr->n1 + 1;
+	  limit = l_ptr->mm;
+	}
+	else {
+	  p = l_ptr->c_ss[l_ptr->n1].HH;
+	  pi = l_ptr->c_ss[l_ptr->n1].II;
+	  pj = l_ptr->c_ss[l_ptr->n1].JJ;
+	  ci = fi = l_ptr->n1;
+	  limit = l_ptr->n1 - 1;
+	}
+
+	for ( i = limit; i >= l_ptr->m1 ; i-- ) {
+	  f = f - R;
+	  c = c - qr;
+	  ORDER(f, fi, fj, c, ci, cj)
+	  c = l_ptr->c_ss[i].HH - qr; 
+	  ci = l_ptr->c_ss[i].II;
+	  cj = l_ptr->c_ss[i].JJ;
+	  d = l_ptr->c_ss[i].WW - R;
+	  di = l_ptr->c_ss[i].XX;
+	  dj = l_ptr->c_ss[i].YY;
+	  ORDER(d, di, dj, c, ci, cj)
+	  c = 0;
+	  DIAG(i, l_ptr->n1, c, p+va[A[i]])
+	  if ( c <= 0 ) { c = 0; ci = i; cj = l_ptr->n1; }
+	  else { ci = pi; cj = pj; }
+	  ORDER1(c, ci, cj, d, di, dj)
+	  ORDER1(c, ci, cj, f, fi, fj)
+	  p = l_ptr->c_ss[i].HH;
+	  l_ptr->c_ss[i].HH = c;
+	  pi = l_ptr->c_ss[i].II;
+	  pj = l_ptr->c_ss[i].JJ;
+	  l_ptr->c_ss[i].II = ci;
+	  l_ptr->c_ss[i].JJ = cj;
+	  l_ptr->c_ss[i].WW = d;
+	  l_ptr->c_ss[i].XX = di;
+	  l_ptr->c_ss[i].YY = dj;
+	  if ( c > mini_score ) *flag_p = 1;
+	  if ( ! cflag && ( (ci > l_ptr->rl && cj > l_ptr->cl) || (di > l_ptr->rl && dj > l_ptr->cl)
+			    || (fi > l_ptr->rl && fj > l_ptr->cl) ) )  cflag = 1;
+	}
+
+	l_ptr->CCC[l_ptr->n1].CC = l_ptr->c_ss[l_ptr->m1].HH;
+	l_ptr->CCC[l_ptr->n1].RR = l_ptr->c_ss[l_ptr->m1].II;
+	l_ptr->CCC[l_ptr->n1].EE = l_ptr->c_ss[l_ptr->m1].JJ;
+	l_ptr->CCC[l_ptr->n1].DD = f;
+	l_ptr->CCC[l_ptr->n1].SS = fi;
+	l_ptr->CCC[l_ptr->n1].FF = fj;
+	if ( ! rflag && ( (ci > l_ptr->rl && cj > l_ptr->cl) || (di > l_ptr->rl && dj > l_ptr->cl)
+			  || (fi > l_ptr->rl && fj > l_ptr->cl )) ) rflag = 1;
+      }
+    }
+    if (( l_ptr->m1 == 1 && l_ptr->n1 == 1) || no_cross(flag_p, v_ptr->LIST, l_ptr) ) break;
+  }
+  l_ptr->m1--;
+  l_ptr->n1--;
+}
+
+/* recompute the area on forward pass */
+static void
+small_pass(const unsigned char *A,
+	   const unsigned char *B,
+	   int mini_score,
+	   int **pam2, int Q, int R,
+	   int nseq,
+	   struct vert_str *v_ptr,
+	   struct l_struct *l_ptr) {
+
+  int  i, j;		/* row and column indices */
+  int  c;		/* best score at current point */
+  int  f;		/* best score ending with insertion */
+  int  d;		/* best score ending with deletion */
+  int  p;		/* best score at (i-1, j-1) */
+  int  ci, cj;		/* end-point associated with c */ 
+  int  fi, fj;		/* end-point associated with f */
+  int  pi, pj;		/* end-point associated with p */
+  space_ptr sp;
+  pair_ptr z;
+  int q, r, qr;
+  int  *va;		/* pointer to pam2(A[i], B[j]) */
+  
+  int  limit;		/* lower bound on j */
+
+  q = Q; r = R; qr = q + r;
+
+  for ( sp = &l_ptr->CCC[l_ptr->n1 + 1], j = l_ptr->n1+1; sp <= &l_ptr->CCC[l_ptr->nn] ; sp++, j++ ) {
+    sp->CC = 0;
+    sp->RR = l_ptr->m1;
+    sp->EE = j;
+    sp->DD = - (qr);
+    sp->SS = l_ptr->m1+1;
+    sp->FF = j;
+  }
+
+  for ( i = l_ptr->m1 + 1; i <= l_ptr->mm; i++) {
+    c = 0;				/* Initialize column 0 */
+    f = - (qr);
+    ci = fi = i;
+    va = pam2[A[i]];
+    if ( nseq == 2 || i <= l_ptr->n1 ) {
+      p = 0;
+      pi = i - 1;
+      cj = fj = pj = l_ptr->n1;
+      limit = l_ptr->n1 + 1;
+    }
+    else {
+      p = l_ptr->CCC[i].CC;
+      pi = l_ptr->CCC[i].RR;
+      pj = l_ptr->CCC[i].EE;
+      cj = fj = i;
+      limit = i + 1;
+    }
+
+    for ( j = limit, sp = &l_ptr->CCC[j] ; j <= l_ptr->nn ; j++, sp++ )  {  
+      d = sp->DD;
+      c = -1;
+      DIAG(i, j, c, p+va[B[j]])		/* diagonal */
+      if (c < 0) {
+	p = sp->CC; pi = sp->RR; pj = sp->EE;
+	if (f >= 0) {
+	  c = f; ci = fi; cj = fj;
+	  ORDER1(c, ci, cj,  d, sp->SS, sp->FF)
+	  sp->CC = c; sp->RR = ci; sp->EE = cj;
+	  sp->DD -= r; f-=r;
+	} 
+	else if (d >= 0) {
+	  sp->CC = d; sp->RR = sp->SS; sp->EE = sp->FF;  
+	  sp->DD -= r;
+	}
+	else {
+	  sp->CC = 0;
+	  sp->RR=i;
+	  sp->EE = j;
+	}
+      }
+      else { 
+	ci = pi; cj = pj;
+	ORDER1(c, ci, cj,  f, fi, fj)
+	  ORDER1(c, ci, cj,  d, sp->SS, sp->FF)
+	  p = sp->CC;
+	sp->CC = c;
+	pi = sp->RR;
+	sp->RR = ci;
+	pj = sp->EE;
+	sp->EE = cj;
+	f-=r;
+	if (c >= qr) {
+	  if ( c > mini_score ) /* add the score into list */
+	    addnode(c, ci, cj, i, j, v_ptr);
+	  d -= r; c-=qr;
+	  ORDER1(f, fi, fj, c, ci, cj)
+          ORDER1(d, sp->SS, sp->FF, c, ci, cj)
+	  sp->DD = d;
+	}
+	else {
+	  sp->DD -= r;
+	}
+      }
+    }
+  }
+}
+
+/* Add a new node into list.  */
+
+static void
+addnode(int c, int ci, int cj, int i, int j, struct vert_str *v_ptr) {
+
+  bool found;	/* 1 if the node is in LIST */
+
+  found = 0;
+  if ( v_ptr->most != NULL && v_ptr->most->STARI == ci && v_ptr->most->STARJ == cj)
+    found = 1;
+  else {
+    for ( v_ptr->most = v_ptr->LIST; v_ptr->most; v_ptr->most = v_ptr->most->next ) {
+      if ( v_ptr->most->STARI == ci && v_ptr->most->STARJ == cj) {
+	found = 1;
+	break;
+      }
+    }
+  }
+  if ( found ) {
+    if ( v_ptr->most->SCORE < c ) {
+      v_ptr->most->SCORE = c;
+      v_ptr->most->ENDI = i;
+      v_ptr->most->ENDJ = j;
+    }
+    if ( v_ptr->most->TOP > i ) v_ptr->most->TOP = i;
+    if ( v_ptr->most->BOT < i ) v_ptr->most->BOT = i;
+    if ( v_ptr->most->LEFT > j ) v_ptr->most->LEFT = j;
+    if ( v_ptr->most->RIGHT < j ) v_ptr->most->RIGHT = j;
+  }
+  else { 
+    v_ptr->numnode++;
+    v_ptr->most = (vertex_p) ckalloc(1,sizeof(vertex));
+    v_ptr->most->SCORE = c;
+    v_ptr->most->STARI = ci;
+    v_ptr->most->STARJ = cj;
+    v_ptr->most->ENDI = i;
+    v_ptr->most->ENDJ = j;
+    v_ptr->most->TOP = v_ptr->most->BOT = i;
+    v_ptr->most->LEFT = v_ptr->most->RIGHT = j;
+    v_ptr->most->next = v_ptr->LIST;
+    v_ptr->LIST = v_ptr->most;
+  }
+}
+
+/* Find and remove the largest score in list */
+
+static vertex_p
+findmax(struct vert_str *v_ptr) {
+  vertex_p  ap, cur;
+  register int score;
+
+  for ( score = (v_ptr->LIST)->SCORE, cur = NULL, ap = (v_ptr->LIST); ap->next; ap = ap->next) {
+    if ( ap->next->SCORE > score ) {
+      cur = ap; score = ap->next->SCORE;
+    }
+  }
+  if (cur) {ap = cur->next; cur->next = ap->next; }
+  else { ap = v_ptr->LIST; v_ptr->LIST = (v_ptr->LIST)->next;}
+  v_ptr->numnode--;
+  v_ptr->most = v_ptr->LIST;
+  return ( ap );
+}
+
+/* return 1 if no node in LIST share vertices with the area */
+
+static bool
+no_cross(int *flag_p, vertex_p LIST, struct l_struct *l_ptr) {
+
+  vertex_p  cur;
+
+  for ( cur = LIST; cur; cur = cur->next ) { 
+    if ( cur->STARI <= l_ptr->mm && cur->STARJ <= l_ptr->nn && cur->BOT >= l_ptr->m1-1 && 
+	 cur->RIGHT >= l_ptr->n1-1 && (cur->STARI < l_ptr->rl || cur->STARJ < l_ptr->cl)) { 
+      if ( cur->STARI < l_ptr->rl ) l_ptr->rl = cur->STARI;
+      if ( cur->STARJ < l_ptr->cl ) l_ptr->cl = cur->STARJ;
+      *flag_p = 1;
+      break;
+    }
+  }
+  return !cur;
+}
+
+/* The following definitions are for function diff() */
+
+#define gap(k)  ((k) <= 0 ? 0 : Q+R*(k)) /* k-symbol indel score */
+
+/* Append "Delete k" op */
+#define DEL(k)				\
+{ l_ptr->I += k;			\
+  if (*last < 0)			\
+    *last = (*sapp)[-1] -= (k);		\
+  else {				\
+    *last = (*sapp)[0] = -(k);		\
+    (*sapp)++;				\
+  }					\
+}
+
+/* Append "Insert k" op */
+#define INS(k)				\
+{ l_ptr->J += k;			\
+  if (*last < 0) {			\
+   (*sapp)[-1] = (k);			\
+   (*sapp)[0] = *last;			\
+   (*sapp)++;				\
+  }					\
+  else {				\
+    *last = (*sapp)[0] = (k);		\
+    (*sapp)++;				\
+  }					\
+}
+
+/* diff(A,B,M,N,tb,te) returns the score of an optimum conversion between
+   A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
+   and appends such a conversion to the current script.                   */
+
+static int
+diff(const unsigned char *A,
+     const unsigned char *B,
+     int M, int N,
+     int tb, int te,
+     int two_seq,
+     int **pam2, int Q, int R,
+     int **sapp, int *last,
+     struct l_struct *l_ptr) {
+
+  int   midi, midj, type;	/* Midpoint, type, and cost */
+  int limit;
+  int midc;
+
+  register int i, j;
+  register int c, e, d, s;
+
+  pair_ptr z;
+
+  int t;
+  int *va;
+  int qr;
+
+  bool tt;
+
+  qr = Q + R;
+
+  /* Boundary cases: M <= 1 or N == 0 */
+
+  if (N <= 0){
+    if (M > 0) {DEL(M)}
+    return - gap(M);
+  }
+
+  if (M <= 1) {
+    if (M <= 0) {
+      INS(N)
+      return - gap(N);
+    }
+
+    if (tb > te) tb = te;
+    midc = - (tb + R + gap(N) );
+    midj = 0;
+
+    va = pam2[A[1]];
+    j = 2 + l_ptr->I - l_ptr->J;
+    if (two_seq || j < 1) j = 1;
+    for ( ; j <= N; j++) {
+      for ( tt = 1, z = l_ptr->row[l_ptr->I+1]; z != NULL; z = z->NEXT ) {
+	if ( z->COL == j+l_ptr->J ) { tt = 0; break; }		
+      }
+      if (tt) {
+	c = va[B[j]] - ( gap(j-1) + gap(N-j) );
+	if (c > midc) { midc = c;  midj = j; }
+      }
+    }
+
+    if (midj == 0) { INS(N) DEL(1) }
+    else  {
+      if (midj > 1) INS(midj-1)
+      *last = (*sapp)[0] = 0;
+      (*sapp)++;
+
+      /* mark (A[I],B[J]) as used: put J into list row[I] */	
+      l_ptr->I++; l_ptr->J++;
+      z = ( pair_ptr ) ckalloc(1,sizeof(pair));
+      z->COL = l_ptr->J;			
+      z->NEXT = l_ptr->row[l_ptr->I];				
+      l_ptr->row[l_ptr->I] = z;
+      if (midj < N) INS(N-midj)
+    }
+    return midc;
+  }
+
+  /* Divide: Find optimum midpoint (midi,midj) of cost midc */
+
+  midi = M/2;		/* Forward phase:                          */
+  l_ptr->r_ss[0].CC = 0;		/*   Compute C(M/2,k) & D(M/2,k) for all k */
+  t = -Q;
+  for (j = 1; j <= N; j++) {
+    l_ptr->r_ss[j].CC = t = t-R;
+    l_ptr->r_ss[j].DD = t-Q;
+  }
+  t = -tb;
+  for (i = 1; i <= midi; i++) {
+    va = pam2[A[i]];
+    t = t-R;
+    j = i + l_ptr->I - l_ptr->J;
+    if (two_seq || j <= 0) {
+      j = 0;
+      s = l_ptr->r_ss[0].CC;
+      l_ptr->r_ss[0].CC = c = t;
+    }
+    else {
+      if ( (c = (s = l_ptr->r_ss[j].CC) - qr) < (d = l_ptr->r_ss[j].DD)) c = d;
+      l_ptr->r_ss[j].CC = l_ptr->r_ss[j].DD = c;
+    }
+    e = c-Q;
+    for (j++ ; j <= N; j++) {
+      if ((c = c - qr) > (e = e - R)) e = c;
+      if ((c = l_ptr->r_ss[j].CC - qr) > (d = l_ptr->r_ss[j].DD - R)) d = c;
+      DIAG(i+l_ptr->I, j+l_ptr->J, c, s+va[B[j]])
+      if (c < d) c = d;
+      if (c < e) c = e;
+      s = l_ptr->r_ss[j].CC;
+      l_ptr->r_ss[j].CC = c;
+      l_ptr->r_ss[j].DD = d;
+    }
+  }
+  l_ptr->r_ss[0].DD = l_ptr->r_ss[0].CC;
+
+  l_ptr->r_ss[N].RR = 0;			/* Reverse phase:                          */
+  t = -Q;			/*   Compute R(M/2,k) & S(M/2,k) for all k */
+  for (j = N-1; j >= 0; j--) {
+    l_ptr->r_ss[j].RR = t = t-R;
+    l_ptr->r_ss[j].SS = t-Q;
+  }
+  t = -te;
+
+  for (i = M-1; i >= midi; i--) {
+    s = l_ptr->r_ss[N].RR;
+    l_ptr->r_ss[N].RR = c = t = t-R;
+    e = t-Q;
+    va = pam2[A[i+1]];
+    limit = i + l_ptr->I - l_ptr->J + 1;
+    if (two_seq || limit < 0) limit = 0;
+    for (j = N-1; j >= limit; j--) {
+      if ((c = c - qr) > (e = e - R)) e = c;
+      if ((c = l_ptr->r_ss[j].RR - qr) > (d = l_ptr->r_ss[j].SS - R)) d = c;
+      DIAG(i+1+l_ptr->I, j+1+l_ptr->J, c, s+va[B[j+1]])
+      if (c < d) c = d;
+      if (c < e) c = e;
+      s = l_ptr->r_ss[j].RR;
+      l_ptr->r_ss[j].RR = c;
+      l_ptr->r_ss[j].SS = d;
+    }
+  }
+  l_ptr->r_ss[N].SS = l_ptr->r_ss[N].RR;
+
+  midc = l_ptr->r_ss[0].CC+l_ptr->r_ss[0].RR;		/* Find optimal midpoint */
+  midj = 0;
+  type = 1;
+  limit = midi + l_ptr->I - l_ptr->J + 1;
+  if (two_seq || limit < 0) limit = 0;
+  for (j = limit; j <= N; j++) {
+    if ((c = l_ptr->r_ss[j].CC + l_ptr->r_ss[j].RR) >= midc) {
+      if (c > midc ||
+	  (l_ptr->r_ss[j].CC != l_ptr->r_ss[j].DD && l_ptr->r_ss[j].RR == l_ptr->r_ss[j].SS)) {
+	midc = c;
+	midj = j;
+      }
+    }
+  }
+  for (j = N; j >= limit; j--)
+    if ((c = l_ptr->r_ss[j].DD + l_ptr->r_ss[j].SS + Q) > midc)
+      { midc = c;
+      midj = j;
+      type = 2;
+      }
+
+/* Conquer: recursively around midpoint */
+
+  if (type == 1) {
+    (void)diff(A,B,midi,midj,tb,Q,two_seq,pam2,Q,R, sapp, last, l_ptr);
+    (void)diff(A+midi,B+midj,M-midi,N-midj,Q,te,two_seq,pam2,Q,R, sapp, last, l_ptr);
+  }
+  else {
+    (void)diff(A,B,midi-1,midj,tb,0,two_seq,pam2,Q,R, sapp, last, l_ptr);
+    DEL(2);
+    (void)diff(A+midi+1,B+midj,M-midi-1,N-midj,0,te,two_seq,pam2,Q,R, sapp, last, l_ptr);
+  }
+  return midc;
+}
+
+/* CHECK_SCORE - return the score of the alignment stored in S */
+
+static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
+		       int M, int N,
+		       int *S, int **w,
+		       int qq, int rr, int *NC)
+{ 
+  register int   i,  j, op, nc;
+  int itmp, score;
+#ifdef SHOW_ALIGN_SCORE
+  int mx_l_score;
+#endif
+
+  /*  print_seq_prof(A,M,w,iw); */
+
+  score = i = j = op = nc = 0;
+#ifdef SHOW_ALIGN_SCORE
+  mx_l_score = 0;
+  printf("#===start\n");
+  printf("#i j pam2 score mx_l_score\n");
+#endif
+  while (i < M || j < N) {
+    op = *S++;
+    if (op == 0) {
+      itmp = w[A[++i]][B[++j]];
+      score += itmp;
+      nc++;
+    }
+    else if (op > 0) {
+      itmp = -(qq + op*rr);
+      score += itmp;
+      j += op;
+      nc += op;
+    } else {	/* op < 0 */
+      itmp = - (qq - op*rr);
+      score += itmp;
+      i -= op;	/* i increased */
+      nc -= op;	/* nc increased */
+    }
+#ifdef SHOW_ALIGN_SCORE
+    if (score > mx_l_score) mx_l_score = score;
+    printf("%d\t%d\t%d\t%d\t%d\n",i, j, itmp, score, mx_l_score);
+#endif
+  }
+#ifdef SHOW_ALIGN_SCORE
+  printf("%d\t%d\tend\t%d\t%d\n====\n",i, j, score, mx_l_score);
+#endif
+  *NC = nc;
+  return score;
+}
+
+/* ckalloc - allocate space; check for success */
+void *ckalloc(size_t amount, size_t size)
+{
+  void *p;
+  static size_t mtotal;
+
+  mtotal += amount * size;
+
+  if ((p = malloc( amount * size )) == NULL) {
+    fprintf(stderr,"Ran out of near memory: %ld*%ld/%ld\n",amount,size,mtotal);
+    exit(1);
+  }
+  return(p);
+}
diff --git a/src/lsim4.h b/src/lsim4.h
new file mode 100644
index 0000000..96c8628
--- /dev/null
+++ b/src/lsim4.h
@@ -0,0 +1,145 @@
+
+/* $Id: lsim4.h 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+/* global definitions, #defines, for lsim4.c */
+
+typedef int bool;
+
+void *ckalloc(size_t amount, size_t size);
+
+#define maxi(x, y)  (((x) > (y)) ? x: y)
+
+typedef struct ONE {int COL ; struct ONE  *NEXT ;} pair, *pair_ptr;
+
+#define PAIRNULL (pair_ptr)NULL
+
+typedef struct NODE
+{ int  SCORE;
+  int  STARI, STARJ;
+  int  ENDI,  ENDJ;
+  int  TOP,   BOT;
+  int  LEFT,  RIGHT; 
+  struct NODE *next;
+}  vertex, *vertex_p;
+		
+struct lrr_str {
+  int CC, DD;			/* saving row matrix scores */
+  int RR, SS, EE, FF;		/* saving row start-points */
+};
+  
+struct lcc_str {
+  int HH, WW;			/* saving col matrix scores HH=CC, */
+  int II, JJ, XX, YY; 		/* saving col start-points , II=RR, JJ=EE */
+};
+
+typedef struct spa {
+  int CC, RR, EE, DD, SS, FF; 
+} space;
+
+typedef struct spa *space_ptr;
+
+struct vert_str {
+  int numnode;
+  vertex_p LIST, most;
+};
+
+struct l_struct {
+  space_ptr CCC;
+  struct lrr_str *r_ss;
+  struct lcc_str *c_ss;
+  pair_ptr *row;		/* for saving used aligned pairs */
+  int  m1, mm, n1, nn;	/* boundaries of recomputed area */
+  int  rl, cl;		/* left and top boundaries */
+  int I, J;		/* current positions of A ,B - used by diff() */
+};
+
+static void big_pass(const unsigned char *A,
+		     const unsigned char *B,
+		     int M, int N,
+		     int mini_score,
+		     int **pam2, int Q, int R,
+		     int nseq,
+		     struct vert_str *v_ptr,
+		     struct l_struct *l_ptr);
+
+static void locate(const unsigned char *A,
+		   const unsigned char *B,
+		   int mini_score,
+		   int **pam2, int Q, int R,
+		   int nseq,
+		   int *flag_p,
+		   struct vert_str *v_ptr,
+		   struct l_struct *l_ptr);
+
+static void small_pass(const unsigned char *A,
+		       const unsigned char *B,
+		       int mini_score,
+		       int **pam2, int Q, int R,
+		       int nseq,
+		       struct vert_str *v_ptr,
+		       struct l_struct *l_ptr);
+
+static void addnode(int c, int ci, int cj, int i, int j,
+		    struct vert_str *v_ptr);
+
+static bool no_cross(int *flag_p, vertex_p LIST, struct l_struct *l_ptr);
+
+static int diff(const unsigned char *A,
+		const unsigned char *B,
+		int M, int N, int tb, int te,
+		int two_seq,
+		int **pam2, int q, int r,
+		int **sapp, int *last,
+		struct l_struct *l_ptr);
+
+static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
+		       int M, int N,
+		       int *S, int **W, int G, int H, int *nres);
+
+static vertex_p findmax(struct vert_str *v_ptr);
+
+/* DIAG() assigns value to x if (ii,jj) is never used before */
+#define DIAG(ii, jj, x, value)	\
+{ for ( z = l_ptr->row[(ii)]; z != 0 && z->COL != (jj); z = z->NEXT ) ; \
+  if ( !z )   x = ( value );	\
+  }
+
+/* replace (ss1, xx1, yy1) by (ss2, xx2, yy2) if the latter is large */
+#define ORDER(ss1, xx1, yy1, ss2, xx2, yy2) \
+{ if ( ss1 < ss2 ) \
+  { ss1 = ss2; xx1 = xx2; yy1 = yy2; } \
+else \
+if ( ss1 == ss2 ) \
+  { if ( xx1 < xx2 ) { xx1 = xx2; yy1 = yy2; } \
+    else \
+       if ( xx1 == xx2 && yy1 < yy2 )  yy1 = yy2; \
+  } \
+}
+
+#define ORDER1(ss1, xx1, yy1, ss2, xx2, yy2)  \
+{ if (ss1 <= ss2) {  \
+    if (ss1 == ss2) {  \
+	if (xx1 < xx2) {  \
+           xx1 = xx2; yy1 = yy2; \
+    } else {                           \
+      if (xx1 == xx2 && yy1 < yy2)  \
+         yy1 = yy2; \
+    } \
+    } else {  \
+      ss1 = ss2; xx1 = xx2; yy1 = yy2;  \
+    } \
+  } \
+}
+
+#define ORDER2(ss1, xx1, ss2, xx2) \
+{  \
+   if (ss1 <= ss2) { \
+      if (ss1 == ss2) {  \
+         if (xx1 < xx2) xx1 = xx2; \
+      } else { \
+      ss1 = ss2; xx1 = xx2; \
+      } \
+ } \
+}
+
diff --git a/src/map_db.c b/src/map_db.c
new file mode 100644
index 0000000..5429508
--- /dev/null
+++ b/src/map_db.c
@@ -0,0 +1,600 @@
+/* map_db.c - read a FASTA or GCG format database and generate a list
+   of indices for rapid memory mapping */
+
+/* $Id: map_db.c 1239 2013-11-02 01:09:58Z wrp $ */
+
+/* copyright (c) 1999, 2014 by William R. Pearson and The Rector &
+   Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* input is a libtype 1,5, or 6 sequence database */
+/* output is a BLAST2 formatdb type index file */
+
+/* format of the index file:
+
+1)  map_db version number ["MP"+2 bytes]
+2)  number of sequences in database [4 bytes]
+3)  total length of database        [8 bytes]  (MP1, 4 bytes for MP0)
+4)  longest sequence in database    [8 bytes]  (MP1, 4 bytes for MP0)
+5) list of offsets to definitions  [num_seq+1] int*8 (MP1, 4 bytes for MP0)
+6) list of offsets to sequences    [num_seq+1] int*8 (MP1, 4 bytes for MP1)
+7) list of flag characters for sequences [num_seq+1]bytes
+    (used for GCG binary to encode 2bit or 4 bit representation)
+
+    sequence files will be as defined by their format
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#ifndef FSEEK_T_DEF
+#ifndef USE_FSEEKO
+#define FSEEK_T_DEF
+#define FSEEK fseek
+#define FTELL ftell
+typedef long fseek_t;
+#else
+#define FSEEK fseeko
+#define FTELL ftello
+typedef off_t fseek_t;
+#endif
+#endif
+
+#define LASTLIB 6
+
+int (*get_entry) ();
+
+int a_get_ent(unsigned char *, int, fseek_t *, fseek_t *);
+int gbf_get_ent(unsigned char *, int, fseek_t *, fseek_t *);
+
+void src_int4_write(FILE *, int);
+void src_int4_read(FILE *, int *);
+void src_long4_write(FILE *, long);
+void src_long4_read(FILE *, long *);
+void src_long8_write(FILE *, int64_t);
+void src_long8_read(FILE *, int64_t *);
+
+void newname(char *nname, char *oname, char *suff, int maxn);
+void init_ascii0(int *xascii, char *sq_map);
+
+int (*get_ent_arr[LASTLIB+1])()={a_get_ent, gbf_get_ent, NULL, NULL, NULL,
+				 NULL, NULL};
+
+fseek_t openlib(char *, int);
+
+#define NA 123
+#define TERM 24
+#define EL 125
+#define ES 126
+#define AAMASK 127
+
+static int *sascii, aascii[128];
+char *NCBIstdaa_ext = "-ABCDEFGHIKLMNPQRSTVWXYZU*OJ-abcdefghiklmnpqrstvwxyzu*oj";
+char NCBIstdaa_ext_n = 56;
+
+int
+main(int argc, char **argv) {
+  FILE *libi, *b_fd;  /* b_fd is used for both the binary data file and the index file */
+  char lname[256];
+  char bname[256];
+  char iname[256];
+  char format[4];
+  char *bp;
+  struct stat stat_buf;
+
+  int i;
+  int nlib;	/* number of entries */
+  long max_len;	/* longest sequence */
+  fseek_t tot_len;	/* total sequence length */
+
+  int n1;
+  
+  fseek_t f_size;	/* file size from fstat() */
+  int lib_size;	/* current space available - may be realloc'ed */
+  int lib_inc;
+  int lib_type; /* 1 for protein, 0 for DNA */
+  int lib_aa;	/* dna=1; prot=0; */
+  int build_binary; /* build binary sequence file */
+
+  /* file offsets */
+  fseek_t d_pos;	/* start of description */
+  fseek_t s_pos;	/* start of sequence */
+  fseek_t b_pos;	/* start of binary encoding */
+  fseek_t *d_pos_arr;	/* array of description pointers */
+  fseek_t *s_pos_arr;	/* array of ascii sequence pointers */
+  fseek_t *b_pos_arr;	/* array of binary sequence pointers */
+  unsigned char *zbuff;	/* tmp buffer for writes */
+  unsigned char *sbuff;	/* sequence buffer */
+  int sbuff_max, sbuff_dup;
+
+  lib_type = 0;
+  lib_size = 200000;
+  lib_inc  = 100000;
+  lib_aa = 1;
+  sbuff = NULL;
+  sbuff_max = 50000;
+  sbuff_dup = 0;
+  build_binary = 0;
+
+  while (argc > 1 && *argv[1]=='-') {
+    if (strcmp(argv[1],"-n")==0) lib_aa = 0;
+    else if (strcmp(argv[1], "-b")==0) build_binary = 1;
+    argv++;
+    argc--;
+  }
+
+  /* open the database */
+  if (argc > 1) strncpy(lname, argv[1],sizeof(lname));
+  else {
+    fprintf(stderr," Entry library name: ");
+    fgets(lname,sizeof(lname),stdin);
+    if ((bp=strchr(lname,'\n'))!=NULL) *bp='\0';
+  }
+    
+  if ((bp=strchr(lname,' '))!=NULL) {
+    lib_type = atoi(bp+1);
+    *bp='\0';
+  }
+  else lib_type = 0;
+
+  if (get_ent_arr[lib_type] == NULL) {
+    fprintf(stderr," cannot index file %s type %d\n",lname,lib_type);
+    exit(1);
+  }
+  
+  if (lib_type == 6) lib_aa = 0;
+  if (lib_type == 1) lib_aa = 0;
+  
+  if (lib_aa == 1) {
+    init_ascii0(aascii, NCBIstdaa_ext);
+  }
+  else {
+    if (build_binary) {
+      fprintf(stderr,"*** WARNING ***  map_db -- binary files not available for DNA libraries\n");
+      build_binary = 0;
+    }
+    init_ascii0(aascii, "\0ACGTURYMWSKDHVBNacgturymwskdhvbn");
+    aascii['X'] = aascii['N'];
+    aascii['x'] = aascii['n'];
+  }
+  sascii = &aascii[0];
+
+  if ((f_size=openlib(lname,lib_type))==0) {
+    fprintf(stderr," cannot open %s (type: %d)\n",lname,lib_type);
+    exit(1);
+  }
+
+  if (build_binary) {
+    newname(bname, lname, "bsq",sizeof(bname));
+    if ((b_fd = fopen(bname, "w")) == NULL) {
+      fprintf(stderr, "cannot open %s binary file for writing\n",bname);
+      exit(1);
+    }
+
+    if ((zbuff=(unsigned char *)calloc(256,sizeof(char)))==NULL) {
+      fprintf(stderr," cannot allocate zbuff[256]\n");
+      exit(1);
+    }
+
+    if ((sbuff=(unsigned char *)calloc(sbuff_max,sizeof(char)))==NULL) {
+      fprintf(stderr," cannot allocate sbuff[%d]\n",sbuff_max);
+      exit(1);
+    }
+
+    /* write out the initial NULL */
+    fwrite(zbuff,sizeof(char),1, b_fd);
+  }
+  b_pos = 1;	/* initialize whether used or not */
+
+  /* allocate array of description pointers */
+  if ((d_pos_arr=(fseek_t *)calloc(lib_size, sizeof(fseek_t)))==NULL) {
+    fprintf(stderr," cannot allocate %d for desc. array\n",lib_size);
+    exit(1);
+  }
+  /* allocate array of sequence pointers */
+  if ((s_pos_arr=(fseek_t *)calloc(lib_size, sizeof(fseek_t)))==NULL) {
+    fprintf(stderr," cannot allocate %d for seq. array\n",lib_size);
+    exit(1);
+  }
+  /* allocate array of sequence pointers */
+  if ((b_pos_arr=(fseek_t *)calloc(lib_size, sizeof(fseek_t)))==NULL) {
+    fprintf(stderr," cannot allocate %d for binary array\n",lib_size);
+    exit(1);
+  }
+
+  /* allocate array of sequence flags */
+
+  nlib = 0; tot_len=0; max_len=-1;
+  while ((n1=get_entry(sbuff, sbuff_max, &d_pos, &s_pos)) > 0) {
+    if (build_binary) fwrite(sbuff, sizeof(char), n1+1, b_fd);
+
+    d_pos_arr[nlib] = d_pos;
+    s_pos_arr[nlib] = s_pos;
+    b_pos_arr[nlib] = b_pos;
+
+    b_pos += n1+1;
+
+    nlib++;
+    tot_len += n1;
+
+    if (n1 > max_len) max_len = n1;
+    if (nlib >= lib_size) { /* too many entries */
+
+      lib_size += lib_inc;
+      if ((d_pos_arr=(fseek_t *)realloc(d_pos_arr,lib_size*sizeof(fseek_t)))==NULL) {
+	fprintf(stderr," cannot realloc allocate %d for desc.. array\n",
+		lib_size);
+	exit(1);
+      }
+      if ((s_pos_arr=(fseek_t *)realloc(s_pos_arr,lib_size*sizeof(fseek_t)))==NULL) {
+	fprintf(stderr," cannot realloc allocate %d for seq. array\n",
+		lib_size);
+	exit(1);
+      }
+      if ((b_pos_arr=(fseek_t *)realloc(b_pos_arr,lib_size*sizeof(fseek_t)))==NULL) {
+	fprintf(stderr," cannot realloc allocate %d for binary aseq. array\n",
+		lib_size);
+	exit(1);
+      }
+    }
+  }
+
+  if (build_binary) fclose(b_fd);
+
+  if (stat(lname,&stat_buf)<0) {
+    fprintf(stderr," cannot stat library: %s\n",lname);
+    exit(1);
+  }
+  else {
+    f_size = stat_buf.st_size;
+  }
+
+  d_pos_arr[nlib]= d_pos;	/* put in the end of the file */
+  s_pos_arr[nlib]=0;
+  b_pos_arr[nlib]= b_pos;
+
+  /* all the information is in, write it out */
+  
+  newname(iname,lname,"xin",sizeof(iname));
+  if ((libi=fopen(iname,"w"))==NULL) {
+    fprintf(stderr," cannot open %s for writing\n",iname);
+    exit(1);
+  }
+
+  /* write out format version, etc. to .xin file */
+  format[0]='M';
+  format[1]='P';
+#ifdef BIG_LIB64
+  format[2]= 1;		/* format 1,2 for 8-byte offsets */
+#else
+  format[2]='\0';	/* format '\0' for original 4-byte */
+#endif
+
+  format[3]=lib_type;
+  fwrite(format,4,sizeof(char),libi);
+
+  /* write out sequence type */
+  src_int4_write(libi, lib_aa);
+  /* write out file fstat as integrity check */
+#ifdef BIG_LIB64
+  src_long8_write(libi, f_size);
+#else
+  src_int4_write(libi, f_size);
+#endif
+  /* write out num_seq */
+  src_int4_write(libi, nlib);
+
+#ifdef BIG_LIB64
+  /* write out tot_len, max_len */
+  src_long8_write(libi, tot_len);
+#else
+  src_int4_write(libi, tot_len);
+#endif
+  src_int4_write(libi, max_len);
+
+#ifdef BIG_LIB64
+  for (i=0; i<=nlib; i++) src_long8_write(libi,d_pos_arr[i]);
+  for (i=0; i<=nlib; i++) src_long8_write(libi,s_pos_arr[i]);
+#else
+  for (i=0; i<=nlib; i++) src_int4_write(libi,d_pos_arr[i]);
+  for (i=0; i<=nlib; i++) src_int4_write(libi,s_pos_arr[i]);
+#endif
+  fclose(libi);
+
+  if (build_binary) { /* do the same thing for the .xin_b file */
+    if (stat(bname,&stat_buf)<0) {
+      fprintf(stderr," cannot stat library: %s\n",bname);
+      exit(1);
+    }
+    else {
+      f_size = stat_buf.st_size;
+    }
+
+    newname(iname,lname,"xin_b",sizeof(iname));
+    if ((b_fd=fopen(iname,"w"))==NULL) {
+      fprintf(stderr," cannot open %s for writing\n",iname);
+      exit(1);
+    }
+
+    /* write out format version, etc. to .xin file */
+    format[0]='M';
+    format[1]='P';
+#ifdef BIG_LIB64
+    format[2]= 2;		/* format 1,2 for 8-byte offsets */
+#else
+    format[2]='\0';	/* format '\0' for original 4-byte */
+#endif
+
+    format[3]=lib_type;
+    fwrite(format,4,sizeof(char),libi);
+
+    /* write out sequence type */
+    src_int4_write(libi, lib_aa);
+    /* write out file fstat as integrity check */
+    src_long8_write(libi, f_size);
+    /* write out num_seq */
+    src_int4_write(libi, nlib);
+
+    /* write out tot_len, max_len */
+    src_long8_write(libi, tot_len);
+    src_int4_write(libi, max_len);
+    /* write out maximum length */
+    src_int4_write(libi, sbuff_max);
+    /* write out overlap */
+    src_int4_write(libi, sbuff_dup);
+
+    for (i=0; i<=nlib; i++) src_long8_write(libi,b_pos_arr[i]);
+    fclose(libi);
+  }
+
+#ifdef BIG_LIB64
+  fprintf(stderr," wrote %d sequences (tot=%lld, max=%ld) to %s\n",
+	  nlib,tot_len,max_len,iname);
+#else
+  fprintf(stderr," wrote %d sequences (tot=%ld, max=%ld) to %s\n",
+	  nlib,tot_len,max_len,iname);
+#endif
+  if (build_binary) {
+    fprintf(stderr," binary file: %s.bsq written\n",lname);
+  }
+
+  exit(0);
+}
+
+
+FILE *libf=NULL;
+fseek_t lpos;
+
+#define MAXLINE 4096
+char lline[MAXLINE+1];
+
+fseek_t
+openlib(char *lname, int lib_type)
+{
+  struct stat stat_buf;
+
+  if (stat(lname,&stat_buf)<0) {
+    fprintf(stderr," cannot stat library: %s\n",lname);
+    return 0;
+  }
+
+  if ((libf=fopen(lname,"r"))==NULL) {
+    fprintf(stderr," cannot open library: %s (type: %d)\n",
+	    lname, lib_type);
+    return 0;
+  }
+  
+  get_entry = get_ent_arr[lib_type];
+
+  lpos = FTELL(libf);
+  if (fgets(lline,MAXLINE,libf)==NULL) return 0;
+  return stat_buf.st_size;
+}
+
+int
+a_get_ent(unsigned char *sbuff, int max_sbuff, 
+	  fseek_t *d_pos, fseek_t *s_pos)
+{
+  char *cp;
+  unsigned char *sptr, *sptr_max;
+  int *ap, n1;
+
+  sptr = sbuff;
+  sptr_max = sbuff+max_sbuff;
+
+  ap = sascii;
+
+  while (lline[0]!='>' && lline[0]!=';') {
+    lpos = FTELL(libf);
+    if (fgets(lline,sizeof(lline),libf)==NULL) {
+      *d_pos = lpos;
+      return 0;
+    }
+  }
+
+  *d_pos = lpos;
+
+  /* make certain we have the end of the line */
+  while (strchr((char *)lline,'\n')==NULL) {
+    if (fgets(lline,sizeof(lline),libf)==NULL) break;
+  }
+
+  *s_pos = FTELL(libf);
+  lline[0]='\0';
+  n1 = 0;
+  while (fgets(lline,sizeof(lline),libf)!=NULL) {
+    if (lline[0]=='>') break;
+    if (lline[0]==';') {
+      if (strchr(lline,'\n')==NULL) {
+	fprintf(stderr," excessive continuation\n%s",lline);
+	return -1;
+      }
+    }
+
+    if (sbuff) {
+      for (cp=lline; *cp && sptr < sptr_max;) {
+	if ((*sptr = ap[*cp++])<NA) {sptr++;}
+      }
+
+      if (sptr >= sptr_max) {
+	fprintf(stderr," sequence too long: %ld\n",(long)(sptr-sbuff));
+	exit(1);
+      }
+    }
+    else {
+      for (cp=lline; *cp; ) if (ap[*cp++]<NA) n1++;
+    }
+
+    lpos = FTELL(libf);
+  }
+  if (sbuff) {
+    *sptr = '\0';
+    return sptr - sbuff;
+  }
+  else {
+    return n1;
+  }
+}
+
+int
+gbf_get_ent(unsigned char *sbuff, int max_sbuff, 
+	    fseek_t *d_pos, fseek_t *s_pos)
+{
+  int n1;
+  char *cp;
+  register int *ap;
+
+#if !defined(TFAST)
+  ap = sascii;
+#else
+  ap = nascii;
+#endif
+
+  while (lline[0]!='L' || lline[1]!='O' || 
+	 strncmp(lline,"LOCUS",5)) { /* find LOCUS */
+    lpos = FTELL(libf);
+    if (fgets(lline,MAXLINE,libf)==NULL) return (-1);
+  }
+  *d_pos=lpos;
+
+  while (lline[0]!='O' || lline[1]!='R' ||
+	 strncmp(lline,"ORIGIN",6)) { /* find ORIGIN */
+    if (fgets(lline,MAXLINE,libf)==NULL) return (-1);
+  }
+  *s_pos = FTELL(libf);
+
+  lline[0]='\0';
+  n1=0;
+  while (fgets(lline,MAXLINE,libf)!=NULL) {
+    if (lline[0]=='/') break;
+    for (cp=lline; *cp; ) if (ap[*cp++]<NA) n1++;
+  }
+  lpos = FTELL(libf);
+  fgets(lline,MAXLINE,libf);
+
+  return n1;
+}
+
+void src_int4_read(FILE *fd,  int *val)
+{
+#ifdef IS_BIG_ENDIAN
+  fread((char *)val,(size_t)4,(size_t)1,fd);
+#else
+  unsigned char b[4];
+
+  fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+  *val = 0;
+  *val = (int)((int)((int)(b[0]<<8)+((int)b[1]<<8))+((int)b[2]<<8))
+	  +(int)b[3];
+#endif
+}
+
+void src_int4_write(FILE *fd,  int val)
+{
+#ifdef IS_BIG_ENDIAN
+  fwrite(&val,(size_t)4,(size_t)1,fd);
+#else
+  unsigned char b[4];
+
+  b[3] = val & 255;
+  b[2] = (val=val>>8)&255;
+  b[1] = (val=val>>8)&255;
+  b[0] = (val=val>>8)&255;
+
+  fwrite(b,(size_t)1,(size_t)4,fd);
+#endif
+}
+
+void src_long8_write(FILE *fd,  int64_t val)
+{
+#ifdef IS_BIG_ENDIAN
+  fwrite(&val,(size_t)8,(size_t)1,fd);
+#else
+  unsigned char b[8];
+
+  b[7] = val & 255;
+  b[6] = (val=val>>8)&255;
+  b[5] = (val=val>>8)&255;
+  b[4] = (val=val>>8)&255;
+  b[3] = (val=val>>8)&255;
+  b[2] = (val=val>>8)&255;
+  b[1] = (val=val>>8)&255;
+  b[0] = (val=val>>8)&255;
+
+  fwrite(b,(size_t)1,(size_t)8,fd);
+#endif
+}
+
+void
+newname(char *nname, char *oname, char *suff, int maxn)
+{
+  strncpy(nname,oname,maxn-1);
+  strncat(nname,".",1);
+  strncat(nname,suff,maxn-strlen(nname));
+}
+
+/* init_ascii0 -- initializes an ascii mapping from a sequence
+   ordering
+*/
+void 
+init_ascii0(int *xascii, char *sq_map) {
+  int i;
+  int n_sq_map;
+
+  n_sq_map = strlen(sq_map+1) + 1;
+
+  /* first map everything as non-sequence */
+  for (i=0; i<128; i++) {
+    xascii[i] = NA;
+  }
+
+  /* then map the actual sequence letters */
+  for (i = 1; i < n_sq_map; i++) {
+    xascii[sq_map[i]] = i;
+    if (n_sq_map <= 28) { /* only uppercase */
+      xascii[sq_map[i]+32] = i;	/* map lowercase */
+    }
+  }
+
+  /* then map the other stuff, EL  etc */
+  xascii[0] = ES;
+  xascii[10] = EL;
+  xascii[13] = EL;
+}
+
diff --git a/src/mm_file.h b/src/mm_file.h
new file mode 100644
index 0000000..8011153
--- /dev/null
+++ b/src/mm_file.h
@@ -0,0 +1,153 @@
+/*  mm_file.h - defines m_file_str for mmap()ed files  */
+
+/* $Id: mm_file.h 938 2012-06-04 16:15:06Z wrp $ */
+
+/* copyright (c) 1999, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+#include <sys/types.h>
+
+#ifndef FSEEK_T_DEF
+#ifndef USE_FSEEKO
+#define FSEEK_T_DEF
+#define FSEEK fseek
+#define FTELL ftell
+typedef long fseek_t;
+#else
+#define FSEEK fseeko
+#define FTELL ftello
+typedef off_t fseek_t;
+#endif
+#endif
+
+#ifdef HAS_INTTYPES
+#include <inttypes.h>
+#else
+#ifdef WIN32
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#else
+typedef long int64_t;
+typedef unsigned long uint64_t;
+#endif
+#endif
+typedef int64_t MM_OFF;
+
+#ifdef MYSQL_DB
+#include <mysql.h>
+#endif
+#ifdef PGSQL_DB
+#include <libpq-fe.h>
+#endif
+
+#ifndef MAX_FN
+#include "defs.h"
+#endif
+
+extern unsigned long adler32();
+
+struct lmf_str {
+  FILE *libf;		/* sequence file being read */
+  char *lb_name;	/* file name */
+  char opt_text[MAX_FN];	  /* text after filename */
+  int lb_type;		/* library type */
+  int *sascii;		/* ascii -> sq mapping */
+  int *vascii; 		/* annotation to ann mapping */
+
+  char *annot_sname;	/* annotation script name */
+
+  /* used by flat files */
+  char *lline;		/* last line read */
+  int acc_off;		/* start of libstr (+1 for agetlib/fasta) */
+  unsigned char *cpsave;	/* position in line for lgetlib() */
+  fseek_t lpos;			/* position in file */
+
+  /* blast2.0 stuff */
+  FILE *hfile;		/* BLAST2.0 description file */
+  int bl_format_ver;	/* blast formatdb version */
+  int bl_lib_pos;	/* for ncbl2 */
+  int pref_db;		/* preferred database */
+  int have_oid_list;	/* we have an oid file, must read oid's */	
+  unsigned int *oid_list;	/* oid list for subsets */
+  int oid_seqs;		/* start offset for mask array */
+  unsigned int max_oid;	/* start offset for mask array */
+
+  /* Genbank Flat files */
+  int lfflag;		/* flag for CRLF in EMBL CDROM files */
+
+  /* stuff for GCG format files (5,6) */
+  int gcg_binary;	/* flag for binary gcg format */
+  long gcg_len;		/* length of GCG sequence */
+
+  /* used when memory mapping */
+  int mm_flg;		/* mmap worked */
+  int mmap_fd;		/* mmap_fd */
+  char *mmap_base;	/* base */
+  char *mmap_addr;	/* current pos */
+  long st_size;		/* file size */
+
+  MM_OFF *d_pos_arr;	/* pointer to desc. offsets */
+  MM_OFF *s_pos_arr;	/* pointer to seq. offsets */
+  MM_OFF *b_pos_arr;	/* pointer to binary seq. offsets */
+  MM_OFF *a_pos_arr;	/* pointer to aux offsets */
+
+  /* currently available only for memory mapped files */
+  int max_cnt;		/* # database entries */
+  int64_t tot_len;	/* total residue length */
+  long max_len;		/* maximum sequence lengh */
+  long maxn;		/* maximum possible length */
+  long mdup;		/* duplication for overlapping sequences */
+  int lib_aa;		/* 0 = DNA, 1 = prot */
+  char *tmp_buf;	/* temporary buffer */
+  int tmp_buf_max;	/* max size */
+  int (*sel_acc_p)(char *, int gi, void *);	/* used to select subset of library */
+  void *sel_local;				/* local data structure for sel_acc_p() */
+
+  /* used for SQL database queries */
+  char *sql_db, *sql_query, *sql_getdesc, *sql_getseq, *sql_close_tables;
+  int sql_reopen;
+  char **sql_uid_arr;	/* indexed by lpos */
+  /* used to get sequence data */
+  char *sql_seqp;
+
+#ifdef MYSQL_DB
+  /* used to open the database */
+  MYSQL *mysql_conn;
+  MYSQL_RES *mysql_res;
+  MYSQL_ROW mysql_row;
+#endif
+
+#ifdef PGSQL_DB
+  /* used to open the database */
+  PGconn *pgsql_conn;
+  PGresult *pgsql_res;
+#endif
+
+  int (*getlib)(unsigned char *seq, int maxs, char *ann,
+		int n_libstr,
+		fseek_t *libpos,
+		int *lcont,
+		struct lmf_str *lm_fd,
+		long *l_off);
+
+  void (*ranlib)(char *str, int cnt,
+		 fseek_t libpos, char *libstr,
+		 struct lmf_str *lm_fd);
+
+  int (*get_mmap_chain)(struct seqr_chain *cur_seqr_chain,
+			struct lmf_str *m_fd, struct db_str *db);
+};
diff --git a/src/mmgetaa.c b/src/mmgetaa.c
new file mode 100644
index 0000000..77d58fe
--- /dev/null
+++ b/src/mmgetaa.c
@@ -0,0 +1,1116 @@
+/* mmgetaa.c - functions for mmap()ed access to libraries */
+
+/* $Id: mmgetaa.c 1153 2013-05-20 13:29:29Z wrp $ */
+
+/* copyright (c) 1999, 2000, 2014 by William  R. Pearson and The Rector &
+   Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/*
+  This is one of two alternative files that can be used to
+  read a database.  The two files are nmgetaa.c, and mmgetaa.c
+  (nxgetaa.c has been retired).
+
+  nmgetlib.c and mmgetaa.c are used together. nmgetlib.c provides
+  the same functions as nxgetaa.c if memory mapping is not used,
+  mmgetaa.c provides the database reading functions if memory
+  mapping is used. The decision to use memory mapping is made on
+  a file-by-file basis.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <limits.h>
+#include <string.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#define MAXLINE 512
+#define EOSEQ 0
+
+#define XTERNAL
+#include "uascii.h"
+/* #include "upam.h" */
+#undef XTERNAL
+
+#define GCGBIN 6
+
+#ifndef MAP_FILE
+#define MAP_FILE 0
+#endif
+
+#include "defs.h"
+#include "structs.h"
+#include "mm_file.h"
+
+extern int64_t bl2_long8_cvt(int64_t);
+extern int bl2_uint4_cvt(int);
+extern void newname(char *, char *, char *, int);
+unsigned long adler32(unsigned long, const unsigned char *, unsigned int);
+
+long crck(char *, int);
+extern void src_int4_read(FILE *fd,  int *val);
+extern void src_long4_read(FILE *fd,  long  *valp);
+extern void src_long8_read(FILE *fd,  int64_t *val);
+
+int
+agetlib_mb(unsigned char *seq,int maxs,char *libstr,int n_libstr,fseek_t *libpos,
+	   int *lcont, struct lmf_str *m_fd, long *l_off);
+
+extern void
+aranlib(char *str, int cnt, fseek_t seek, char *libstr, struct lmf_str *lm_fd);
+void
+aranlib_mb(char *str, int cnt, fseek_t seek, char *libstr, struct lmf_str *lm_fd);
+
+/* mmap()ed functions */
+#ifdef USE_MMAP
+int agetlibm(); void aranlibm();
+int lgetlibm(); void lranlibm();
+void vranlibm();
+int gcg_getlibm();
+
+int (*getlibam[])()={
+  agetlibm,lgetlibm, NULL, NULL,NULL,agetlibm,gcg_getlibm
+};
+
+void (*ranlibam[])()={
+  aranlibm,lranlibm,NULL,NULL,NULL,vranlibm,vranlibm
+};
+#endif
+
+int
+bmap_get_mmap_chain(struct seqr_chain *cur_seqr_chain, 
+			struct lmf_str *m_fd, struct db_str *db);
+
+/* load_mmap() loads the d_pos[] and s_pos[] arrays for rapid access */
+/* 24-July-2011 -- checks and maps bsq file */
+
+int can_mmap(int lib_type) {
+  return (getlibam[lib_type] != NULL);
+}
+
+
+struct lmf_str *
+load_mmap(FILE *libi,	/* fd for already open ".xin" file */
+	  char *sname,	/* name of sequence database file */
+	  int lib_type,	/* 0-Fasta, 5-vms_pir, 6-gcg_binary */
+	  int ldnaseq,	/* 1 for DNA, 0 for protein */
+	  struct lmf_str *m_fd)
+{
+  char format[4];
+  char bname[MAX_FN], xbname[MAX_FN];
+  int i, lib_aa;
+  fseek_t f_size;
+  long lf_size;
+  struct stat statbuf;
+  int max_cnt;
+  fseek_t *d_pos_arr, *s_pos_arr, *b_pos_arr;
+  int mm_flag, mm64_flag, mmb_flag;
+  FILE *libi_b;		/* FILE * for .xin_b file */
+  int *tmp_pos_arr;
+
+  /* first check that the necessary indices are up-to-date */
+  /* read the offsets in ".xin" file */
+  if (fread(format,1,4,libi)==0) {
+    fprintf(stderr," cannot read .xin format\n");
+    return NULL;
+  }
+    
+  mm64_flag = (format[2]>=1);	/* 4 bytes or 8 bytes for long? */
+
+#ifndef BIG_LIB64
+  if (mm64_flag) {return NULL;}
+#endif
+
+  if (format[3]!=lib_type) {
+    fprintf(stderr," cannot read format %d != lib_type %d\n",
+	    format[3],lib_type);
+    return NULL;
+  }
+
+  src_int4_read(libi,&lib_aa);
+  if (lib_aa == ldnaseq) { /* database residue mismatch */
+    fprintf(stderr," residue type mismatch %s != %s (.xin) in %s\n",
+	    (lib_aa ? "DNA" : "prot."),(ldnaseq ? "prot." : "DNA"),
+	    sname);
+    return NULL;
+  }
+    
+  /* everything looks good, allocate an lmf_str */
+  m_fd->lib_aa = lib_aa;
+
+  /* get ascii file size from index */
+  if (mm64_flag) src_long8_read(libi,&f_size);
+  else {
+    src_long4_read(libi,&lf_size);
+    f_size = lf_size;
+  }
+
+  if (sizeof(char *) < sizeof(fseek_t) && f_size > UINT_MAX) {
+    fprintf(stderr,"\n *** Warning *** database too large (%lld) for 32-bit mmap()\n",f_size);
+    return NULL;
+  }
+
+  /* check for .bsq binary mapping */
+  newname(bname,sname,"bsq",sizeof(bname));
+  mm_flag = (m_fd->mmap_fd=open(bname,O_RDONLY) >= 0);
+  mmb_flag = 0;
+  if (mm_flag) {
+    mmb_flag = 1;
+
+    /* fstat the binary sequence file */
+    if(stat(bname, &statbuf) < 0) {
+      fprintf(stderr," cannot stat %s for mmap()", sname);
+      perror("...");
+      mmb_flag = 0;
+      goto next_mmap;
+    }
+
+    /* now open the .xin_b file and read the offsets */
+    newname(xbname, sname, "xin_b",sizeof(xbname));
+    if ((libi_b = fopen(xbname,"r"))==NULL) {
+      fprintf(stderr,"Cannot open %s binary index file\n",xbname);
+      mmb_flag = 0;
+      goto next_mmap;
+    }
+
+    /* now read the .xin_b file */
+    if (fread(format,1,4,libi_b)==0) {
+      fprintf(stderr," cannot read .xin_b format\n");
+      mmb_flag = 0;
+      goto next_mmap;
+
+    }
+    src_int4_read(libi_b,&lib_aa);
+    /* get .bsq file size from .xin_b */
+    src_long8_read(libi_b,&f_size);
+    if (f_size != statbuf.st_size) {
+      fprintf(stderr," %s file size (%lld) and expected size (%lld) don't match\n",
+	      bname,statbuf.st_size,f_size);
+      mmb_flag = 0;
+      goto next_mmap;
+    }
+
+    /* now, start to open mmap()ed file */
+    mmb_flag=((m_fd->mmap_fd=open(bname,O_RDONLY))>=0);
+
+    if (!mmb_flag) {
+      fprintf(stderr," cannot open %s for mmap()", bname);
+      perror("...");
+      goto next_mmap;
+    }
+
+    /* the index file and library file are open and the sizes match */
+    /* allocate the m_file struct and  map the file */
+
+    m_fd->st_size = statbuf.st_size;
+    if((m_fd->mmap_base = 
+	mmap(NULL, m_fd->st_size, PROT_READ,
+	     MAP_FILE | MAP_SHARED, m_fd->mmap_fd, 0)) == (char *) -1) {
+      mm_flag = 0;
+#ifdef DEBUG
+      fprintf(stderr," cannot mmap %s", bname);
+      perror("...");
+#endif
+    }
+
+    /* now finish reading the index file */
+    src_int4_read(libi_b,&max_cnt);
+
+    src_long8_read(libi_b,&m_fd->tot_len);
+    src_long4_read(libi_b,&lf_size);
+    m_fd->max_len = lf_size;
+    /* get seqbuf_max */
+    src_long4_read(libi_b,&lf_size);
+    /* get seqbuf_dup */
+    src_long4_read(libi_b,&lf_size);
+
+#ifdef DEBUG
+    fprintf(stderr,
+	    "\n%s\tformat: %c%c%d %d; max_cnt: %d; tot_len: %lld max_len: %ld\n",
+	    sname,format[0],format[1],format[2],format[3],
+	    max_cnt,m_fd->tot_len,m_fd->max_len);
+#endif
+
+    /* allocate array of description pointers */
+
+    if ((b_pos_arr=(fseek_t *)calloc(max_cnt+1, sizeof(fseek_t)))==NULL) {
+      fprintf(stderr," cannot allocate %d for binary seq array\n",max_cnt+1);
+      exit(1);
+    }
+
+    /* now read the binary offsets (b_pos_arr) */
+    if (fread(b_pos_arr,sizeof(fseek_t),max_cnt+1,libi_b)!=
+	max_cnt+1) {
+      fprintf(stderr," error reading bseq offsets: %s\n",xbname);
+      return NULL;
+    }
+
+#ifndef IS_BIG_ENDIAN
+    for (i=0; i<=max_cnt; i++) {
+      b_pos_arr[i] = bl2_long8_cvt(b_pos_arr[i]);
+    }
+#endif
+    /* now have the b_pos_arr[] allocated and read, close libi_b */
+    fclose(libi_b);
+  }
+
+ next_mmap:
+  /* here if no mmb_flag or mmb read failed */
+  if (!mmb_flag) {
+    /* now, start to open mmap()ed file */
+    mm_flag=((m_fd->mmap_fd=open(sname,O_RDONLY))>=0);
+    if (!mm_flag) {
+      fprintf(stderr," cannot open %s for mmap()", sname);
+      perror("...");
+      return NULL;	/* file did not open */
+    }
+
+    /* fstat the library file and get size */
+    if(fstat(m_fd->mmap_fd, &statbuf) < 0) {
+      fprintf(stderr," cannot stat %s for mmap()", sname);
+      perror("...");
+      m_fd->mm_flg = 0;
+      goto finish;
+    }
+
+    /* check for identical sizes - if different, do not mmap */
+    if (f_size != statbuf.st_size) {
+      fprintf(stderr," %s file size (%lld) and expected size (%lld) don't match\n",
+	      sname,statbuf.st_size,f_size);
+      mm_flag = 0;
+      goto finish;    
+    }
+
+    /* the index file and library file are open and the sizes match */
+    /* allocate the m_file struct and  map the file */
+
+    m_fd->st_size = statbuf.st_size;
+    if((m_fd->mmap_base = 
+	mmap(NULL, m_fd->st_size, PROT_READ,
+	     MAP_FILE | MAP_SHARED, m_fd->mmap_fd, 0)) == (char *) -1) {
+      mm_flag = 0;
+#ifdef DEBUG
+      fprintf(stderr," cannot mmap %s", sname);
+      perror("...");
+#endif
+    }  
+  }
+
+  /* here, we have a memory mapped file, and if mmb_flag, we have the
+     b_pos_arr[] read, but not s_pos_arr[] or d_pos_arr[] */
+
+ finish:
+  close(m_fd->mmap_fd);
+  if (!mm_flag) { return NULL; }
+
+  /* now finish reading the index file */
+  src_int4_read(libi,&max_cnt);
+
+  if (mm64_flag) {
+    src_long8_read(libi,&m_fd->tot_len);
+  }
+  else {
+    src_long4_read(libi,&lf_size);
+    m_fd->tot_len = lf_size;
+  }
+  src_long4_read(libi,&lf_size);
+  m_fd->max_len = lf_size;
+
+#ifdef DEBUG
+  fprintf(stderr,
+	  "\n%s\tformat: %c%c%d %d; max_cnt: %d; tot_len: %lld max_len: %ld\n",
+	  sname,format[0],format[1],format[2],format[3],
+	  max_cnt,m_fd->tot_len,m_fd->max_len);
+#endif
+
+  /* allocate array of description pointers */
+  if (!mm64_flag) {
+    if ((tmp_pos_arr=(int *)calloc(max_cnt+1,sizeof(int)))==NULL) {
+      fprintf(stderr," cannot allocate %d for tmp_pos array\n",
+	      max_cnt+1);
+		return NULL;
+    }
+  }
+
+  if ((d_pos_arr=(fseek_t *)calloc(max_cnt+1, sizeof(fseek_t)))==NULL) {
+    fprintf(stderr," cannot allocate %d for desc. array\n",max_cnt+1);
+    exit(1);
+  }
+
+  /* read m_fd->d_pos[max_cnt+1] */
+  if (mm64_flag) {
+    if (fread(d_pos_arr,sizeof(fseek_t),max_cnt+1,libi)!=
+	max_cnt+1) {
+      fprintf(stderr," error reading desc. offsets: %s\n",sname);
+      return NULL;
+    }
+  }
+  else {
+    if (fread(tmp_pos_arr,sizeof(int),max_cnt+1,libi)!=
+	max_cnt+1) {
+      fprintf(stderr," error reading desc. offsets: %s\n",sname);
+      return NULL;
+    }
+#ifdef DEBUG
+    fprintf(stderr,"d_pos_crc: %ld\n",
+	    crck((char *)tmp_pos_arr,sizeof(int)*(max_cnt+1)));
+#endif
+  }
+
+#ifndef IS_BIG_ENDIAN
+  if (mm64_flag)
+    for (i=0; i<=max_cnt; i++) {
+      d_pos_arr[i] = bl2_long8_cvt(d_pos_arr[i]);
+    }
+  else
+    for (i=0; i<=max_cnt; i++) {
+      d_pos_arr[i] = bl2_uint4_cvt(tmp_pos_arr[i]);
+    }
+#else
+  if (!mm64_flag) {
+    for (i=0; i<=max_cnt; i++) {
+      d_pos_arr[i] = tmp_pos_arr[i];
+    }
+  }
+#endif
+
+#ifdef DEBUG
+  for (i=0; i<max_cnt-1; i++) {
+    if (d_pos_arr[i+1] <= d_pos_arr[i] )
+      fprintf(stderr," ** dpos_error [%d]\t%lld\t%lld\n",
+	      i,d_pos_arr[i],d_pos_arr[i+1]);
+  }
+#endif
+
+  /* allocate array of sequence pointers */
+  if ((s_pos_arr=(fseek_t *)calloc(max_cnt+1,sizeof(fseek_t)))==NULL) {
+    fprintf(stderr," cannot allocate %d for seq. array\n",max_cnt+1);
+    exit(1);
+  }
+
+  /* read m_fd->s_pos[max_cnt+1] */
+  if (mm64_flag) {
+    if (fread(s_pos_arr,sizeof(fseek_t),max_cnt+1,libi)!=
+	max_cnt+1) {
+      fprintf(stderr," error reading seq offsets: %s\n",sname);
+      return NULL;
+    }
+  }
+  else {
+    if (fread(tmp_pos_arr,sizeof(int),max_cnt+1,libi)!=
+	max_cnt+1) {
+      fprintf(stderr," error reading seq offsets: %s\n",sname);
+      return NULL;
+    }
+#ifdef DEBUG
+    fprintf(stderr,"s_pos_crc: %ld\n",
+	    crck((char *)tmp_pos_arr,sizeof(int)*(max_cnt+1)));
+#endif
+  }
+
+#ifndef IS_BIG_ENDIAN
+  if (mm64_flag)
+    for (i=0; i<=max_cnt; i++)
+      s_pos_arr[i] = bl2_long8_cvt(s_pos_arr[i]);
+  else
+    for (i=0; i<=max_cnt; i++)
+      s_pos_arr[i] = (long)bl2_uint4_cvt(tmp_pos_arr[i]);
+#else
+  if (!mm64_flag) 
+    for (i=0; i<=max_cnt; i++)
+      s_pos_arr[i] = (long)tmp_pos_arr[i];
+#endif
+
+#ifdef DEBUG
+  for (i=1; i<max_cnt-1; i++) {
+    if (s_pos_arr[i+1]<s_pos_arr[i])
+      fprintf(stderr," ** spos_error [%d]\t%lld\t%lld\n",
+	      i,s_pos_arr[i],s_pos_arr[i]);
+  }
+#endif
+
+  if (!mm64_flag) free(tmp_pos_arr);
+
+  m_fd->max_cnt = max_cnt;
+  m_fd->d_pos_arr = d_pos_arr;
+  m_fd->s_pos_arr = s_pos_arr;
+  if (mmb_flag)   m_fd->b_pos_arr = b_pos_arr;
+  m_fd->lpos = 0;
+  m_fd->lb_type = lib_type;
+  m_fd->getlib = getlibam[lib_type];
+  m_fd->ranlib = ranlibam[lib_type];
+  m_fd->get_mmap_chain = NULL;
+  m_fd->mm_flg = 1;
+  if (mmb_flag) {
+    m_fd->getlib = agetlib_mb;
+    m_fd->ranlib = aranlib_mb;
+    m_fd->get_mmap_chain = bmap_get_mmap_chain;
+  }
+
+  /*   check_mmap(m_fd,-2); */
+
+  return m_fd;
+}  
+ 
+char *mgets (char *s, int n, struct lmf_str *m_fd)
+{
+  char *cs, *mfp;
+
+  mfp = m_fd->mmap_addr;
+  cs = s;
+
+  while (--n > 0 && (*mfp != (char)EOF))
+    if ((*cs++ = *mfp++) == '\n') break;
+  *cs = '\0';
+
+  m_fd->mmap_addr = mfp;
+  return (*mfp == (char)EOF && cs == s) ? NULL : s;
+}
+
+int
+agetlibm(unsigned char *seq,
+	 int maxs,
+	 char *libstr,
+	 int n_libstr,
+	 fseek_t *libpos,
+	 int *lcont,
+	 struct lmf_str *m_fd,
+	 long *l_off)
+{
+  register unsigned char *cp, *seqp;
+  register int *ap;
+  int sel_status;
+  char *desc;
+  int lpos;		/* entry number in library */
+  long l;
+  unsigned char *seqm, *seqm1;
+  char *bp;
+  static long seq_len, desc_len;
+  static unsigned char *cp_max;
+
+  *l_off = 1;
+
+  lpos = m_fd->lpos;
+
+  seqp = seq;
+  seqm = &seq[maxs-9];
+  seqm1 = seqm-1;
+
+  ap = m_fd->sascii;
+
+  if (*lcont==0) {
+  start_seq:
+    if (lpos >= m_fd->max_cnt) return (-1);
+    seq_len = m_fd->d_pos_arr[lpos+1] - m_fd->s_pos_arr[lpos];
+    desc_len = m_fd->s_pos_arr[lpos] - m_fd->d_pos_arr[lpos]-m_fd->acc_off;
+    if (seq_len < 0 || (seq_len > m_fd->max_len && seq_len > (m_fd->max_len*5)/4)) {
+      fprintf(stderr," ** sequence over-run: %ld at %d\n",seq_len,lpos);
+      return(-1);
+    }
+    *libpos = (fseek_t)lpos;
+
+    desc = m_fd->mmap_base+m_fd->d_pos_arr[lpos]+m_fd->acc_off;
+    strncpy(libstr,desc,n_libstr-1);
+    libstr[n_libstr-1]='\0';
+
+    if ((m_fd->sel_acc_p != NULL) &&
+	(sel_status = (m_fd->sel_acc_p)(libstr, 0, m_fd->sel_local)) <= 0) {
+      if (sel_status < 0) return (-1);
+      lpos++;
+      goto start_seq;
+    }
+
+    if ((bp=strchr(libstr,'\r'))!=NULL) *bp='\0';
+    if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
+
+    if (n_libstr > MAX_UID) {
+      bp = libstr;
+      while (*bp++) if ( *bp=='\001' || *bp=='\t') *bp=' ';
+    }
+
+    /* find @C:offset in the last 11 characters of the description */
+    /* check that we can offset desc by 12 characters to get to  ' @C:' */
+    if ((desc_len > 12) && (bp = memchr(desc+desc_len-12,'@', 11)) && !strncmp(bp+1,"C:",2)) {
+      *l_off = atol(bp+3);	/* this addresses an apparent bug in sscanf for non-null terminated strings */
+    }
+
+    m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];
+    cp_max = (unsigned char *)(m_fd->mmap_addr+seq_len);
+  }
+
+  for (cp=(unsigned char *)m_fd->mmap_addr; seqp<seqm1; ) {
+    if ((*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA) continue;
+    --seqp;
+    if (cp >= cp_max) break;
+  }
+  m_fd->mmap_addr = (char *)cp;
+
+  if (seqp>=seqm1) (*lcont)++;
+  else {
+    *lcont=0;
+    lpos++;
+    m_fd->lpos = lpos;
+  }
+  *seqp = EOSEQ;
+  /*   if ((int)(seqp-seq)==0) return 1; */
+  return (int)(seqp-seq);
+}
+
+int
+agetlib_mb(unsigned char *seq,
+	   int maxs,
+	   char *libstr,
+	   int n_libstr,
+	   fseek_t *libpos,
+	   int *lcont,
+	   struct lmf_str *m_fd,
+	   long *l_off)
+{
+  int lpos, seq_len;		/* entry number in library */
+
+  *l_off = 1;
+  lpos = m_fd->lpos++;
+
+  if (lpos >= m_fd->max_cnt) return (-1);
+  seq_len = m_fd->b_pos_arr[lpos+1] - m_fd->b_pos_arr[lpos]-1;
+  if (seq_len < 0 || (seq_len > m_fd->max_len && seq_len > (m_fd->max_len*5)/4)) {
+    fprintf(stderr," ** sequence over-run: %d at %d\n",seq_len,lpos);
+    return(-1);
+  }
+  *libpos = (fseek_t)lpos;
+
+  strncpy(libstr,"",n_libstr-1);
+
+  memcpy(seq, m_fd->mmap_base+m_fd->b_pos_arr[lpos], min(seq_len+1,maxs));
+
+  *lcont=0;
+  /*   if ((int)(seqp-seq)==0) return 1; */
+  return seq_len;
+}
+
+void
+aranlibm(char *str,
+	 int cnt,
+	 fseek_t libpos,
+	 char *libstr,
+	 struct lmf_str *m_fd)
+{
+  char *bp;
+  int llen;
+  int lpos;
+
+  lpos = (int) libpos;
+
+  llen = m_fd->s_pos_arr[lpos]-m_fd->d_pos_arr[lpos];
+
+  if (llen >= cnt) llen = cnt-1;
+
+  strncpy(str,m_fd->mmap_base+m_fd->d_pos_arr[lpos]+1,llen);
+  str[llen]='\0';
+  if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
+  if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+  bp = str;
+  while (*bp++) if ( *bp=='\001' || *bp=='\t') *bp=' ';
+  m_fd->lpos = lpos;
+}
+
+/* aranlib_mb is a hybrid of aranlib/aranlibm that uses
+   s_pos_arr[lpos] to get the fseek offset for the description
+*/
+
+void
+aranlib_mb(char *str,
+	   int cnt,
+	   fseek_t libpos,
+	   char *libstr,
+	   struct lmf_str *m_fd)
+{
+  char *bp;
+  int llen;
+  int lpos, fs_pos;
+
+  aranlib(str, cnt, (fseek_t)m_fd->d_pos_arr[(int)libpos],
+	  libstr, m_fd);
+  m_fd->lpos = libpos;
+}
+
+/* bmap_get_mmap_chain fills cur_seqr_chain with sequence pointers
+   from the memory mapped file at *m_fd
+
+   because the database is opened read-only, this code only works with
+   an amino acid mapping identical to that used by blastdbcmd, aa_b2toa[]
+
+   bmap_get_mmap_chain must return EOF AND a set of sequences for the
+   comp_lib9.c/next_seqr_chain() logic to work properly.
+*/
+
+int bmap_get_mmap_chain(struct seqr_chain *cur_seqr_chain, 
+			struct lmf_str *m_fd, struct db_str *db) {
+  int i, lib_cnt;
+  struct seq_record *seq_a, *seq_p;
+  struct mseq_record *mseq_a, *mseq_p;
+
+  lib_cnt = m_fd->lpos;
+  if (lib_cnt >= m_fd->max_cnt) return EOF;
+  seq_a = cur_seqr_chain->seqr_base;
+  mseq_a = cur_seqr_chain->mseqr_base;
+
+  for (i=0; i < cur_seqr_chain->max_chain_seqs; i++) {
+    if (lib_cnt >= m_fd->max_cnt) break;
+    seq_p = &seq_a[i];
+    mseq_p = &mseq_a[i];
+    seq_p->n1 = m_fd->b_pos_arr[lib_cnt+1] - m_fd->b_pos_arr[lib_cnt]-1; /* value is +1 off to get the NULL */
+
+    db->entries++;
+    db->length += seq_p->n1;
+    if (db->length > LONG_MAX) {
+      db->length -= LONG_MAX; db->carry++;
+    }
+
+    mseq_p->m_file_p = m_fd;
+    mseq_p->n1tot_p=NULL;
+    mseq_p->cont = 0;
+    seq_p->index = mseq_p->index = mseq_p->lseek = lib_cnt;
+#ifndef DEBUG
+    mseq_p->libstr[0] = '\0';
+#else
+#endif
+    seq_p->aa1b = (unsigned char *)(m_fd->mmap_base + m_fd->b_pos_arr[lib_cnt++]);
+    seq_p->l_offset = 0;
+    seq_p->l_off = 1;
+#if DEBUG
+    seq_p->adler32_crc = mseq_p->adler32_crc = adler32(1L,seq_p->aa1b,seq_p->n1);
+#endif
+  }
+  cur_seqr_chain->cur_seq_cnt = i;
+  m_fd->lpos = lib_cnt;
+  if (lib_cnt >= m_fd->max_cnt) return EOF;
+  else return i;
+}
+
+/* there is no vgetlibm() because vgetlibm() and agetlibm() are
+   identical - the difference in the two file formats relates to the
+   location of the sequence, which is already available in spos_arr[].
+
+   however vranlibm must accomodate both type 5 and 6 files;
+   type 6 has extra stuff after the seq_id.
+*/
+
+void
+vranlibm(char *str,
+	 int cnt,
+	 fseek_t libpos,
+	 char *libstr,
+	 struct lmf_str *m_fd)
+{
+  char *bp, *mp;
+  int llen;
+  int lpos;
+
+  lpos = (int)libpos;
+
+  llen = m_fd->s_pos_arr[lpos]-m_fd->d_pos_arr[lpos];
+
+  mp = m_fd->mmap_base+m_fd->d_pos_arr[lpos];
+  
+  strncpy(str,mp+4,20);
+  str[20]='\0';
+  if ((bp=strchr(str,' '))!=NULL) *(bp+1) = '\0';
+  else if ((bp=strchr(str,'\n'))!=NULL) *bp = ' ';
+  bp = strchr(mp,'\n');
+
+  llen -= (bp-mp)-5;
+  if (llen >  cnt-strlen(str)) llen = cnt-strlen(str)-1;
+
+  strncat(str,bp+1,llen);
+  if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+  str[cnt-1]='\0';
+  m_fd->lpos = lpos;
+}
+
+void
+close_mmap(struct lmf_str *m_fd) {
+  free(m_fd->s_pos_arr);
+  free(m_fd->d_pos_arr);
+  if (m_fd->mm_flg) {
+    munmap(m_fd->mmap_base,m_fd->st_size);
+    free(m_fd);
+  }
+  m_fd->mm_flg=0;
+}  
+
+#ifndef min
+#define min(x,y) ((x) > (y) ? (y) : (x))
+#endif
+
+static int gcg_bton[4]={2,4,1,3};
+
+int
+gcg_getlibm(unsigned char *seq,
+	    int maxs,
+	    char *libstr,
+	    int n_libstr,
+	    fseek_t *libpos,
+	    int *lcont,
+	    struct lmf_str *m_fd,
+	    long *l_off)
+{
+  char dummy[20];
+  char gcg_date[6];
+  char gcg_type[10];
+  register unsigned char *cp, *seqp, stmp;
+  register int *ap, lpos;
+  unsigned char *seqm, *seqm1;
+  long r_block, b_block, r_fact, r16_block;
+
+  *l_off = 1;
+
+  seqp = seq;
+  seqm = &seq[maxs-9];
+
+  ap = m_fd->sascii;
+  lpos = m_fd->lpos; 
+
+  if (*lcont==0) {
+    if (lpos >= m_fd->max_cnt) return (-1);
+    sscanf(m_fd->mmap_base+m_fd->d_pos_arr[lpos]+4,"%s %s %s %s %ld\n",
+	   libstr,gcg_date,gcg_type,dummy,&(m_fd->gcg_len));
+
+    m_fd->gcg_binary = (gcg_type[0]=='2');
+
+    libstr[12]='\0';
+    *libpos = lpos;
+    m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];
+  }
+
+  r_block = b_block = min((size_t)(seqm-seqp),m_fd->gcg_len);
+  if (m_fd->gcg_binary) {
+    r_block = (r_block+3)/4;
+  }
+
+  cp=(unsigned char *)m_fd->mmap_addr; 
+  if (!m_fd->gcg_binary) {
+    r_fact = 1;
+    r16_block = r_block/16;
+    while (r16_block-- > 0) {
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+      *seqp++ = ap[*cp++];
+    }
+    while (seqp<seq+r_block) *seqp++ = ap[*cp++];
+  }
+  else if (m_fd->gcg_binary) {
+    r_fact = 4;
+    r16_block = r_block/8;
+    while(r16_block-- > 0) {
+      stmp = *cp++;
+      *seqp++ = gcg_bton[(stmp>>6) &3];
+      *seqp++ = gcg_bton[(stmp>>4) &3];
+      *seqp++ = gcg_bton[(stmp>>2) &3];
+      *seqp++ = gcg_bton[(stmp) &3];
+      stmp = *cp++;
+      *seqp++ = gcg_bton[(stmp>>6) &3];
+      *seqp++ = gcg_bton[(stmp>>4) &3];
+      *seqp++ = gcg_bton[(stmp>>2) &3];
+      *seqp++ = gcg_bton[(stmp) &3];
+      stmp = *cp++;
+      *seqp++ = gcg_bton[(stmp>>6) &3];
+      *seqp++ = gcg_bton[(stmp>>4) &3];
+      *seqp++ = gcg_bton[(stmp>>2) &3];
+      *seqp++ = gcg_bton[(stmp) &3];
+      stmp = *cp++;
+      *seqp++ = gcg_bton[(stmp>>6) &3];
+      *seqp++ = gcg_bton[(stmp>>4) &3];
+      *seqp++ = gcg_bton[(stmp>>2) &3];
+      *seqp++ = gcg_bton[(stmp) &3];
+      stmp = *cp++;
+      *seqp++ = gcg_bton[(stmp>>6) &3];
+      *seqp++ = gcg_bton[(stmp>>4) &3];
+      *seqp++ = gcg_bton[(stmp>>2) &3];
+      *seqp++ = gcg_bton[(stmp) &3];
+      stmp = *cp++;
+      *seqp++ = gcg_bton[(stmp>>6) &3];
+      *seqp++ = gcg_bton[(stmp>>4) &3];
+      *seqp++ = gcg_bton[(stmp>>2) &3];
+      *seqp++ = gcg_bton[(stmp) &3];
+      stmp = *cp++;
+      *seqp++ = gcg_bton[(stmp>>6) &3];
+      *seqp++ = gcg_bton[(stmp>>4) &3];
+      *seqp++ = gcg_bton[(stmp>>2) &3];
+      *seqp++ = gcg_bton[(stmp) &3];
+      stmp = *cp++;
+      *seqp++ = gcg_bton[(stmp>>6) &3];
+      *seqp++ = gcg_bton[(stmp>>4) &3];
+      *seqp++ = gcg_bton[(stmp>>2) &3];
+      *seqp++ = gcg_bton[(stmp) &3];
+    }
+
+    while (seqp < seq+4*r_block) {
+      stmp = *cp++;
+      *seqp++ = gcg_bton[(stmp>>6) &3];
+      *seqp++ = gcg_bton[(stmp>>4) &3];
+      *seqp++ = gcg_bton[(stmp>>2) &3];
+      *seqp++ = gcg_bton[(stmp) &3];
+    }
+  }
+  if (r_fact * r_block >= m_fd->gcg_len) {
+    *lcont = 0;
+    m_fd->lpos++;
+  }
+  else {
+    if (m_fd->gcg_binary) b_block = 4*r_block;
+    m_fd->gcg_len -= b_block;
+    (*lcont)++;
+  }
+
+  seq[b_block] = EOSEQ;
+  /*   if (b_block==0) return 1; else */
+  return b_block;
+}
+
+void lget_ann_m(struct lmf_str *lm_fd, char *libstr, int n_libstr);
+
+int
+lgetlibm(unsigned char *seq,
+	 int maxs,
+	 char *libstr,
+	 int n_libstr,
+	 fseek_t *libpos,
+	 int *lcont,
+	 struct lmf_str *m_fd,
+	 long *l_off)
+{
+  register unsigned char *cp, *seqp;
+  register int *ap, lpos;
+  unsigned char *seqm, *seqm1;
+
+  *l_off = 1;
+
+  seqp = seq;
+  seqm = &seq[maxs-11];
+  seqm1 = seqm-1;
+
+  lpos = m_fd->lpos;
+  ap = m_fd->sascii;
+
+  if (*lcont==0) {
+    if (lpos >= m_fd->max_cnt) return (-1);
+
+    if (n_libstr <= 21) {
+      strncpy(libstr,m_fd->mmap_base+m_fd->d_pos_arr[lpos]+12,12);
+      libstr[12]='\0';
+    }
+    else {
+      lget_ann_m(m_fd,libstr,n_libstr);
+    }
+    *libpos = lpos;
+
+    m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];
+    cp = (unsigned char *)m_fd->mmap_addr;
+  }
+  else cp = (unsigned char *)m_fd->mmap_addr;
+
+  while (seqp<seqm1) {
+    if (*cp=='/' && *(cp-1)=='\n') break;
+    if ((*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA) continue;
+    --seqp;
+    if (*cp=='\n' && *(cp+1)==' ') cp += 11;
+  }
+
+  if (seqp>=seqm1) {
+    (*lcont)++;
+    m_fd->mmap_addr = (char *)cp;
+  }
+  else {
+    *lcont=0;
+    m_fd->lpos++;
+  }
+
+  *seqp = EOSEQ;
+  return (int)(seqp-seq);
+}
+
+void
+lget_ann_m(struct lmf_str *lm_fd, char *libstr, int n_libstr) {
+  char *bp, *bp_gid, locus[120], desc[120], acc[120], ver[120];
+
+  /* copy in locus from lm_fd->lline */
+  strncpy(locus,&lm_fd->mmap_addr[12],sizeof(locus));
+  if ((bp=strchr(locus,' '))!=NULL) *(bp+1) = '\0';
+
+  /* get description */
+  mgets(desc,sizeof(desc),lm_fd);
+  while (desc[0]!='D' || desc[1]!='E' || strncmp(desc,"DEFINITION",10))
+    mgets(desc,sizeof(desc),lm_fd);
+  if ((bp = strchr(&desc[12],'\n'))!=NULL) *bp='\0';
+
+  /* get accession */
+  mgets(acc,sizeof(acc),lm_fd);
+  while (acc[0]!='A' || acc[1]!='C' || strncmp(acc,"ACCESSION",9)) {
+    mgets(acc,sizeof(acc),lm_fd);
+    if (acc[0]=='O' && acc[1]=='R' && strncmp(acc,"ORIGIN",6)==0)
+      break;
+  }
+  if ((bp = strchr(&acc[12],'\n'))!=NULL) *bp='\0';
+  if ((bp = strchr(&acc[12],' '))!=NULL) *bp='\0';
+
+  /* get version */
+  mgets(ver,sizeof(ver),lm_fd);
+  while (ver[0]!='V' || ver[1]!='E' || strncmp(ver,"VERSION",7)) {
+    mgets(ver,sizeof(ver),lm_fd);
+    if (ver[0]=='O' && ver[1]=='R' && strncmp(ver,"ORIGIN",6)==0)
+      break;
+  }
+  if ((bp = strchr(&ver[12],'\n'))!=NULL) *bp='\0';
+
+      /* extract gi:123456 from version line */
+  bp_gid = strchr(&ver[12],':');
+  if (bp_gid != NULL) {
+    if ((bp=strchr(bp_gid+1,' '))!=NULL) *bp='\0';
+    bp_gid++;
+  }
+  if ((bp = strchr(&ver[12],' '))!=NULL) *bp='\0';
+
+      /* build up FASTA header line */
+  if (bp_gid != NULL) {
+    strncpy(libstr,"gi|",n_libstr-1);
+    strncat(libstr,bp_gid,n_libstr-4);
+    strncat(libstr,"|gb|",n_libstr-20);
+  }
+  else {libstr[0]='\0';}
+
+  /* if we have a version number, use it, otherwise accession, 
+	 otherwise locus/description */
+
+  if (ver[0]=='V') {
+    strncat(libstr,&ver[12],n_libstr-1-strlen(libstr));
+    strncat(libstr,"|",n_libstr-1-strlen(libstr));
+  }
+  else if (acc[0]=='A') {
+    strncat(libstr,&acc[12],n_libstr-1-strlen(libstr));
+    strncat(libstr," ",n_libstr-1-strlen(libstr));
+  }
+
+  strncat(libstr,locus,n_libstr-1-strlen(libstr));
+  strncat(libstr,&desc[11],n_libstr-1-strlen(libstr));
+  libstr[n_libstr-1]='\0';
+}
+
+void
+lranlibm(char *str,
+	 int cnt,
+	 fseek_t seek,
+	 char *libstr,
+	 struct lmf_str *m_fd)
+{
+  lget_ann_m(m_fd,str,cnt);
+
+  str[cnt-1]='\0';
+
+  m_fd->lpos = seek;
+}
+
+static int check_status=0;
+
+void
+check_mmap(struct lmf_str *m_fd,long ntt) {
+
+  int i, seq_len, ok_stat;
+  
+  ok_stat = 1;
+  if ( ++check_status > 5) return;
+
+  fprintf(stderr," ** checking %s %ld**\n", m_fd->lb_name,ntt);
+  for (i=0; i<m_fd->max_cnt; i++) {
+    seq_len = m_fd->d_pos_arr[i+1] - m_fd->s_pos_arr[i];
+    if (seq_len < 0 || (seq_len > m_fd->max_len && seq_len > (m_fd->max_len*5)/4)) {
+      fprintf(stderr,"%d:\t%lld\t%lld\t%lld\n",
+	      i,m_fd->d_pos_arr[i],m_fd->s_pos_arr[i],
+	      m_fd->d_pos_arr[i+1]-m_fd->s_pos_arr[i]);
+      ok_stat=0;
+    }
+  }
+  if (ok_stat) {
+    if (check_status) fprintf(stderr," ** check_mmap OK %s %ld**\n",
+			      m_fd->lb_name,ntt);
+  }
+}
+
+#ifdef DEBUG
+/*  C H K 3  --  Compute a type-3 Kermit block check.  */
+/*
+ Calculate the 16-bit CRC of a null-terminated string using a byte-oriented
+ tableless algorithm invented by Andy Lowry (Columbia University).  The
+ magic number 010201 is derived from the CRC-CCITT polynomial x^16+x^12+x^5+1.
+ Note - this function could be adapted for strings containing imbedded 0's
+ by including a length argument.
+*/
+long
+crck(s,n)
+    char *s; int n;
+{
+    unsigned int c, q;
+    long crc = 0;
+
+    while (n-->0) {
+	c = *s++;
+	/* if (parity)*/
+	c &= 0177;
+	q = (crc ^ c) & 017;		/* Low-order nibble */
+	crc = (crc >> 4) ^ (q * 010201);
+	q = (crc ^ (c >> 4)) & 017;	/* High order nibble */
+	crc = (crc >> 4) ^ (q * 010201);
+    }
+    return(crc);
+}
+#endif
diff --git a/src/mrandom.c b/src/mrandom.c
new file mode 100644
index 0000000..77ab0fb
--- /dev/null
+++ b/src/mrandom.c
@@ -0,0 +1,97 @@
+/* mrandom.c 28-Jan-2010 */
+
+/*  $Id:  */
+/* $Revision: 625 $  */
+
+/* system versions of random/nrand48/nrand tend have thread contention
+   issues.  This version uses a random number generator from Wikipedia
+   that maintains state in a separate buffer, so that there is no contention.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef UNIX
+#include <sys/time.h>
+#else
+#include <time.h>
+#endif
+
+/* minimal standard random number generator taken from:
+   S. K. Park and K. W. Miller (1988) "Random number generators: Good
+   ones are hard to find" Comm. ACM 31:1192-1201
+*/
+#define MIN_STD_RAND
+
+struct m_rand_struct {
+#ifndef MIN_STD_RAND
+  unsigned int mw;
+  unsigned int mz;
+#else
+  int seed;
+#endif
+};
+
+#ifdef MIN_STD_RAND
+#define A 16807
+#define M 2147483647
+#define Q 127773	/* M / A */
+#define R 2836		/* M % A */
+#endif
+
+void *
+my_srand(int set)	/* initialize random number generator */
+{
+#ifdef UNIX
+  struct timeval t;
+#endif
+  int n;
+  struct m_rand_struct *my_rand_state;
+
+  if ((my_rand_state = (struct m_rand_struct *)calloc(1, sizeof(struct m_rand_struct)))==NULL) {
+    fprintf(stderr," *** [my_srand] cannot allocate random state ***\n");
+    exit(1);
+  }
+  
+#ifdef UNIX
+  gettimeofday(&t,NULL);
+  n = t.tv_usec % 65535;
+#else
+  n = time(NULL);
+#endif
+  if ((n % 2)==0) n++;
+
+#ifndef MIN_STD_RAND
+  my_rand_state->mw = n;
+  /* swap things around, since the next time will be close */
+  n = ((n & 0xFFF) << 12) + ((n>>12) & 0xFFF);
+  if ((n%2)==0) n++;
+  my_rand_state->mz = n;
+#else
+  if (set > 0) {  my_rand_state->seed = set;}
+  else {my_rand_state->seed = n;}
+#endif
+  return my_rand_state;
+}
+
+/* returns a random number between 0 and n-1 where n < 2^31) */
+unsigned int
+my_nrand(int n, struct m_rand_struct *my_rand_state)
+{
+  unsigned int rn;
+#ifdef MIN_STD_RAND
+  int lo, hi, test;
+
+  hi = my_rand_state->seed / Q;
+  lo = my_rand_state->seed % Q;
+  test = A * lo - R * hi;
+  if (test > 0) { my_rand_state->seed = test;}
+  else {my_rand_state->seed = test + M;}
+  rn = my_rand_state->seed;
+#else 
+  my_rand_state->mz = 36969 * (my_rand_state->mz & 65535) + (my_rand_state->mz >> 16);
+  my_rand_state->mw = 18000 * (my_rand_state->mw & 65535) + (my_rand_state->mw >> 16);
+  rn =  (my_rand_state->mz << 16) + my_rand_state->mw;  /* 32-bit result */
+#endif
+
+  return rn%n;
+}
diff --git a/src/msg.h b/src/msg.h
new file mode 100644
index 0000000..4e6e4f9
--- /dev/null
+++ b/src/msg.h
@@ -0,0 +1,57 @@
+/* Concurrent read version */
+
+/* $Id: msg.h 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+/* Cube definitions */
+
+#ifdef PVM_SRC
+#define FIRSTNODE	1
+#define FIRSTWORK	1
+#else
+#define FIRSTNODE	1
+#define FIRSTWORK	1
+#endif
+
+#define MAXNOD		128
+#define ALLTYPES        -1
+#define HOSTPID		0
+#define MANAGEPID 	0
+#define WORKPID 	0
+
+#define MANAGER		0
+#define ALLNODES        -1
+#define ALLPIDS         -1
+
+#define STARTTYPE0 	0	/* configuration buffer values */
+#define STARTTYPE1	1	/* struct mngmsg m_msp */
+#define STARTTYPE2	2	/* struct pstruct ppst */
+#define STARTTYPE3	3	/* pam2[0,1] matrix */
+#define STARTTYPE4	4	/* *pascii for fasty/tfasty */
+
+#define QSEQTYPE0	5
+#define QSEQTYPE1	6
+
+#define MSEQTYPE0	10	/* cur_buf->hdr */
+#define MSEQTYPE1	11	/* cur_buf->buf2_data * cur_buf->hdr.buf2_cnt */
+#define MSEQTYPE2	12	/* bulk - seq_b, seq_record * hdr.buf2_cnt */
+#define MSEQTYPE3	13	/* bulk - aa1b_start, aa1b_used+1 */
+#define MSEQTYPE4	14	/* individ. - seq_record */
+#define MSEQTYPE5	15	/* individ. - aa1b */
+#define MSEQTYPE6	16
+
+#define RES_TYPE0	20
+#define RES_TYPE1	21
+#define RES_TYPE2	22
+
+#define ALN_TYPE0	30
+#define ALN_TYPE1	31
+#define ALN_TYPE2	32
+#define ALN_TYPE3	33
+
+#define FINISHED 	16384	/* this must be larger than BFR */
+
+#define DO_SEARCH_FLG 0
+#define DO_OPT_FLG 1
+#define DO_ALIGN_FLG 2
+#define DO_CALC_FLG 3
diff --git a/src/mshowalign2.c b/src/mshowalign2.c
new file mode 100644
index 0000000..090343b
--- /dev/null
+++ b/src/mshowalign2.c
@@ -0,0 +1,999 @@
+/* $Id: mshowalign2.c 1269 2014-07-29 21:24:25Z wrp $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and
+   The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* mshowalign.c - show sequence alignments in pvcomplib */
+
+/* 
+   In the serial and current threaded versions of the programs,
+   showalign gets a list of high scoring sequences and must
+   re_getlib() the sequence, do_walign(), and then calculate the
+   alignment.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+#include "msg.h"
+#include "structs.h"
+#include "param.h"
+
+#include "mm_file.h"
+
+/* best_stats.h must come after mm_file.h */
+#include "best_stats.h"
+
+/* used to position the library sequence for re_getlib - also gets
+   description */
+#define RANLIB (m_fptr->ranlib)
+
+extern struct lmf_str *
+re_openlib(struct lmf_str *, int outtty);
+
+int
+re_getlib(unsigned char *aa1, struct annot_str **annot_p,
+	  int maxn, int maxt,
+	  int loff, int cont, int term_code,
+	  long *l_offset, long *l_off, 
+	  struct lmf_str *m_fptr);
+
+#include "drop_func.h"
+/* drop_func.c includes dyn_string.h */
+
+extern void calc_astruct(struct a_struct *aln_p, struct a_res_str *a_res_p, void *f_str);
+
+extern void calc_coord(int n0, int n1, long qoffset, long loffset,
+		       struct a_struct *aln);
+
+void initseq(char **, char **, char **, int);
+void initseq_ann(char **, char **, int);
+
+void freeseq(char **, char **, char **);
+void freeseq_ann(char **, char **);
+
+void do_show(FILE *fp, int n0, int n1, int score,
+	     char *name0, char *name1, int nml, char *link_name,
+	     const struct mngmsg *m_msp, const struct pstruct *ppst,
+	     char *seqc0, char *seqc0a,  char *seqc1, char *seqc1a,
+	     char *seqca, int *cumm_seq_score, int nc,
+	     float percent, float gpercent, int lc,
+	     struct a_struct *aln, const char *annot_var_s,
+	     const struct annot_str *q_annot_p,
+	     const struct annot_str *l_annot_p);
+
+void
+do_lav(FILE *fp, struct a_struct *aln, char *seqc, float percent, int is_mirror);
+
+void
+buf_align_seq(unsigned char **aa0, int n0,
+	      struct beststr **bestp_arr, int nbest,
+	      struct pstruct *ppst, struct mngmsg *m_msp,
+	      const struct mng_thr *m_bufi_p
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+	      , void **f_str
+#endif
+	      );
+
+/* pre-alignment */
+extern void 
+pre_load_best(unsigned char *aa1, int maxn,struct beststr **bbp_arr,
+	      int nbest, struct mngmsg *m_msp, int debug);
+
+float
+calc_fpercent_id(float scale, int n_ident, int n_alen, int tot_ident, float fail);
+
+extern int E1_to_s(double e_val, int n0, int n1, int db_size, void *pu);
+
+extern void discons(FILE *fd, const struct mngmsg *m_msg,
+		    char *seqc0, char *seqc0a,
+		    char *seqc1, char *seqc1a,
+		    char *seqca, int *cumm_seq_score, int nc, 
+		    int n0, int n1, char *name0, char *name1, int nml,
+		    struct a_struct *aln);
+
+extern void disgraph(FILE *fd, int n0, int n1,
+		     float percent, int score,
+		     int min0, int min1, int max0, int max1, long sq0off,
+		     char *name0, char *name1, int nml, int llen, int markx);
+
+extern double find_z(int score, double escore, int length, double comp,void *);
+extern double zs_to_bit(double, int, int);
+extern double s_to_bit(int score, int n0, int  n1, void *pu);
+extern double bit_to_E (double bit, int n0, int n1, long db_size, void *pu);
+extern double zs_to_E(double zs, int n1, int dnaseq, long db_size, struct db_str db);
+
+extern void
+do_url1(FILE *, const struct mngmsg *, const struct pstruct *, char *, int,
+	const struct a_struct *, const char *,
+	const struct annot_str *, const struct annot_str *);
+
+#ifndef A_MARK
+#define A_MARK ">>"
+#endif
+
+/* this version does not check for m_msg->e_cut because nshow/nbest has
+   already been set to limit on e_cut */
+
+void showalign (FILE *fp, unsigned char **aa0, unsigned char *aa1save, int maxn,
+		struct beststr **bptr, int nbest, int qlib, 
+		struct mngmsg *m_msp, struct pstruct *ppst, 
+		char *info_gstring2
+		, void **f_str, struct mng_thr *m_bufi_p
+		)
+{
+  unsigned char *aa1, *aa1a;
+  char tmp_str[20];
+  char info_str[200];
+  char bline[2048], *qline_p, *bline_p, *bl_ptr, *bp, *bp1, fmt[40];
+  struct dyn_string_str *annot_var_dyn, *align_code_dyn;
+  char *annot_var_s10;
+  int tmp_len, ttmp_len, l_llen, desc_llen, ranlib_done;
+  char name0[80], name0s[80], name1[200];
+  char l_name[128], link_name[140];	/* link name */
+  int istop, i = 0, ib, nml, first_line;
+  int l_ashow;
+  int  n1tot;
+  struct beststr *bbp;
+  struct a_res_str *cur_ares_p;
+  struct rstruct *rst_p;
+  int nc, lc, maxc;
+  double lzscore, lzscore2, lbits;
+  struct a_struct l_aln, *l_aln_p;
+  float percent, gpercent;
+  /* strings, lengths for conventional alignment */
+  char *seqc0, *seqc0a, *seqc1, *seqc1a, *seqca;
+  int *cumm_seq_score;
+  /* strings, lengths, for encoded alignment for MX10 */
+  char *seq_code=NULL, *annot_code=NULL;
+  int seq_code_len=0, annot_code_len=0;
+  long loffset, l_off;
+  long qt_offset, lt_offset;
+  int lsw_score, l_score0;
+  char html_pre_E[120], html_post_E[120];
+  int disp_dna_align = ((m_msp->qdnaseq>0) && (m_msp->ldb_info.ldnaseq > 0));
+  int score_delta = 0;
+#ifdef LALIGN
+  int lalign_repeat_thresh_done = 0;
+#endif
+
+  int n1;
+  struct lmf_str *m_fptr;
+  int ngap;
+
+  align_code_dyn = init_dyn_string(4096, 4096);
+  annot_var_dyn = init_dyn_string(4096, 4096);
+
+  qline_p = m_msp->qtitle;
+  if (!strncmp(m_msp->qtitle,"gi|",3)) {
+    qline_p = strchr(qline_p+4,'|');
+    /* check for additional '|'s associated with NCBI gi|12346|db|acc entry */
+    if (!qline_p || strchr(qline_p+1,'|')==NULL) {
+      qline_p = m_msp->qtitle;
+    }
+    else { qline_p += 1;}
+  }
+
+  memcpy(&l_aln, &(m_msp->aln),sizeof(struct a_struct));
+  l_aln_p = &l_aln;  /*   aln_p = &m_msp->aln; */
+
+  /* set the name0,1 label length */
+  if (m_msp->markx & (MX_M10FORM+MX_MBLAST)) nml = 12;
+  else if (m_msp->markx & MX_M11OUT) nml = MAX_UID;
+  else nml = m_msp->nmlen;
+
+  if (strlen(qline_p) > 0) {
+    if (qline_p[0]=='>') {SAFE_STRNCPY(name0s,qline_p+1,sizeof(name0s));}
+    else {SAFE_STRNCPY(name0s,qline_p,sizeof(name0s));}
+  }
+  else {
+    SAFE_STRNCPY(name0s,m_msp->tname,sizeof(name0s));
+  }
+
+  if ((bp=strchr(name0s,' '))!=NULL) *bp='\0';
+
+  if (m_msp->revcomp) name0[nml-1]='-';
+
+  if (m_msp->markx & MX_HTML) {
+    SAFE_STRNCPY(html_pre_E,"<font color=\"darkred\">",sizeof(html_pre_E));
+    SAFE_STRNCPY(html_post_E,"</font>",sizeof(html_post_E));
+
+  }
+  else {
+    html_pre_E[0] = html_post_E[0] = '\0';
+  }
+
+  desc_llen = l_llen = m_msp->aln.llen;
+  if ((m_msp->markx & MX_M9SUMM) && (m_msp->show_code != SHOW_CODE_ID && m_msp->show_code != SHOW_CODE_IDD)) {
+    l_llen += 40;
+    if (l_llen > 200) l_llen=200;
+  }
+
+  if (m_msp->markx & MX_MBLAST) {
+    sprintf(fmt,">%%-%ds\n%%sLength=%%d\n",l_llen+15);
+    desc_llen = l_llen+25;
+  }
+  else {
+    sprintf(fmt,"%s%%-%ds (%%d %s)\n",A_MARK,l_llen-5,m_msp->sqnam);
+  }
+
+  if (m_msp->std_output && !(m_msp->markx&MX_M10FORM)) fprintf (fp,"\n");
+
+  l_ashow = m_msp->ashow;
+  if (l_ashow < 0) l_ashow = m_msp->nshow;
+  istop = min(min(nbest,l_ashow),m_msp->nshow);
+
+  tmp_len = sizeof(bline)-1;
+  if (!(m_msp->markx & MX_M10FORM) && !m_msp->long_info) {tmp_len = l_llen-5;}
+  
+  /* don't call pre_load_best if we already have sequences and alignments */
+  if (!m_msp->align_done) {
+    if (!m_msp->pre_load_done) { pre_load_best(aa1save, maxn, bptr, istop, m_msp, ppst->debug_lib); }
+
+    /* don't call buf_align_seq if the algorithm does not support
+       pre-alignment */
+    if (ppst->can_pre_align) {
+#ifdef LALIGN
+      for (ib=0; ib<istop; ib++) { 
+	bbp = bptr[ib];
+	bbp->repeat_thresh = 
+	  min(E1_to_s(ppst->e_cut_r, m_msp->n0, bbp->seq->n1,ppst->zdb_size, m_msp->pstat_void),
+	      bbp->rst.score[ppst->score_ix]);
+      }
+      lalign_repeat_thresh_done = 1;
+#endif
+
+      buf_align_seq(aa0, m_msp->n0, bptr, istop, ppst, m_msp, m_bufi_p
+#if !defined(COMP_THR) && !defined(PCOMPLIB)
+		    , f_str
+#endif
+		    );
+    }
+  }
+
+  for (ib=0; ib<istop; ib++) {
+    bbp = bptr[ib];
+
+#ifdef LALIGN
+    if (!lalign_repeat_thresh_done) {
+      bbp->repeat_thresh = 
+	min(E1_to_s(ppst->e_cut_r, m_msp->n0, bbp->seq->n1,ppst->zdb_size, m_msp->pstat_void),
+	    bbp->rst.score[ppst->score_ix]);
+    }
+#endif
+    /* preload stuff guaranteed to be in bbp->seq */
+    n1 = bbp->seq->n1;
+    aa1 = bbp->seq->aa1b;
+    if (bbp->seq->annot_p != NULL) aa1a = bbp->seq->annot_p->aa1_ann;
+    else aa1a = NULL;
+    l_off = bbp->seq->l_off;
+    loffset = bbp->seq->l_offset;
+
+    /* make sure we have a description */
+    if (bbp->mseq->bline == NULL || bbp->mseq->bline_max < tmp_len) {
+      if ((m_fptr=re_openlib(bbp->mseq->m_file_p,!m_msp->quiet))==NULL)
+	exit(1);
+      RANLIB(bline,tmp_len,bbp->mseq->lseek,bbp->mseq->libstr,bbp->mseq->m_file_p);
+      bline[tmp_len]='\0';
+      ranlib_done = 1;
+    }
+    else {
+      ranlib_done = 0;
+      SAFE_STRNCPY(bline, bbp->mseq->bline, sizeof(bline));
+    }
+
+    /* make sure we have a sequence */
+    if (bbp->seq->aa1b == NULL || (m_msp->ann_flg==1 && &(bbp->seq->annot_p)==NULL)) {
+      if (!ranlib_done) {
+	if ((m_fptr=re_openlib(bbp->mseq->m_file_p,!m_msp->quiet))==NULL)
+	  exit(1);
+	RANLIB(bline,tmp_len,bbp->mseq->lseek,bbp->mseq->libstr,bbp->mseq->m_file_p);
+	bline[tmp_len]='\0';
+	ranlib_done = 1;
+      }
+
+      n1 = re_getlib(aa1save, (m_msp->ann_flg==1) ? &(bbp->seq->annot_p) : NULL, maxn,
+		     m_msp->ldb_info.maxt3,
+		     m_msp->ldb_info.l_overlap,bbp->mseq->cont,m_msp->ldb_info.term_code,
+		     &loffset,&l_off,bbp->mseq->m_file_p);
+      aa1 = aa1save;
+      if (m_msp->ann_flg==1 && bbp->seq->annot_p->aa1_ann) {aa1a = bbp->seq->annot_p->aa1_ann;}
+    }
+
+#ifdef DEBUG
+    if (n1 != bbp->seq->n1) {
+      fprintf(stderr," library sequence: %s lengths differ: %d != %d\n",
+	      bline,bbp->seq->n1, n1);
+      fprintf(stderr, "offset is: %lld\n",bbp->mseq->lseek);
+    }
+#endif
+
+    /* make sure we have an alignment encoding */
+    if (!(bbp->have_ares & 0x1)) {
+
+      bbp->a_res = do_walign(aa0[bbp->frame],m_msp->n0, aa1, n1,
+			     bbp->frame, bbp->repeat_thresh, ppst,
+			     f_str[bbp->frame], &bbp->have_ares);
+    }
+    else {
+      pre_cons(aa1,n1,bbp->frame,f_str[bbp->frame]);
+    }
+
+    cur_ares_p = bbp->a_res;
+    /* current do_walign()'s provide valid rst */
+    /*
+    memcpy(&cur_ares_p->rst,&bbp->rst, sizeof(struct rstruct));
+    */
+    aln_func_vals(bbp->frame, l_aln_p);
+
+    if (strlen(bline)==0) {
+      SAFE_STRNCPY(bline,">",sizeof(bline));
+      SAFE_STRNCAT(bline,m_msp->lname,l_llen-5);
+    }
+
+    bline_p = bline;
+    /* always remove "gi|" for alignments */
+    if (!strncmp(bline,"gi|",3)) {
+      bline_p = strchr(bline+4,'|');
+      if (!bline_p || !strchr(bline_p+1,'|')) {bline_p = bline;}
+      else bline_p += 1;
+    }
+
+    /* re-format bline */
+    while ((bp=strchr(bline_p,'\n'))!=NULL) *bp=' ';
+    if (m_msp->long_info) {
+      ttmp_len = strlen(bline_p);
+      bl_ptr = bline_p;
+      if (!(m_msp->markx & MX_M10FORM)) {
+	while (ttmp_len > desc_llen) {
+	  for (i=desc_llen; i>10; i--)
+	    if (bl_ptr[i]==' ') {
+	      bl_ptr[i]='\n';
+	      break;
+	    }
+	  if (i <= 10) break;
+	  ttmp_len -= i;
+	  bl_ptr += i;
+	}
+      }
+      bline[tmp_len]='\0';
+    }
+
+    n1tot = (bbp->mseq->n1tot_p) ? *bbp->mseq->n1tot_p : bbp->seq->n1;
+
+    /* name1 is used to label the display */
+    /* bline_p does not have gi|12345, but could have pf26|12345 or sp|P09488 */
+    SAFE_STRNCPY(name1,bline_p,sizeof(name1));
+
+    if (!(m_msp->markx & MX_M10FORM)) name1[nml]='\0';
+    if ((bp = strchr(name1,' '))!=NULL) *bp = '\0';
+
+    /* l_name is used to build an HTML link from the bestscore line to
+       the alignment.  It can also be used to discriminate multiple hits
+       from the same long sequence.  Text must match that in mshowbest.c */
+
+    SAFE_STRNCPY(l_name,bline_p,sizeof(l_name));
+    l_name[sizeof(l_name)-1]='\0';
+    if ((bp = strchr(l_name,' '))!=NULL) *bp = '\0';
+    if ((bp=strchr(&l_name[6],'|'))!=NULL) *bp='\0';  	/* increase to [6] from [3] to allow longer db names "ref", "unk", */
+    if (m_msp->nframe > 2) sprintf(&l_name[strlen(l_name)],"_%d",bbp->frame+1);
+    else if (m_msp->qframe >= 0 && bbp->frame == 1) {
+      SAFE_STRNCAT(l_name,"_r",sizeof(l_name));
+    }
+    if (bbp->mseq->cont-1 > 0) {
+      sprintf(tmp_str,":%d",bbp->mseq->cont-1);
+      SAFE_STRNCAT(l_name,tmp_str,sizeof(l_name)-strlen(l_name));
+    }
+
+    if (m_msp->markx & MX_MBLAST) { SAFE_STRNCPY(name1,"Sbjct",sizeof(name1));}
+
+    if (!(m_msp->markx & MX_M10FORM)) name1[nml]='\0';
+
+    /* print out score information; */
+
+    if (m_msp->markx & MX_HTML ) {
+      SAFE_STRNCPY(link_name, l_name, sizeof(link_name));
+      fprintf (fp,"<a name=\"%s\"><pre>",link_name);
+    }
+    SAFE_STRNCPY(name0,name0s,nml+1);
+    if (m_msp->markx & MX_MBLAST) { SAFE_STRNCPY(name0,"Query",sizeof(name0));}
+    name0[nml]='\0';
+
+    if (ppst->zsflag%10 == 6) {
+      sprintf(info_str," comp: %.5f H: %.5f",bbp->rst.comp,bbp->rst.H);
+    }
+    else info_str[0]='\0';
+
+    if (m_msp->markx & MX_M11OUT) {
+      qt_offset = m_msp->q_offset + (m_msp->q_off-1)+(m_msp->sq0off);
+      lt_offset = loffset + (bbp->seq->l_off-1) + (m_msp->sq1off);
+      fprintf (fp, "s {\n   \"%s\" %ld %ld \n   \"%s\" %ld %ld\n}\n",
+	       name0, qt_offset, qt_offset + m_msp->n0 - 1,
+	       name1, lt_offset, lt_offset + bbp->seq->n1 - 1);
+      fprintf (fp, "h {\n   \"%s\"\n   \"%s\"\n}\n", qline_p, bline_p);
+    }
+
+
+    /* enables >>seq_acc seq_description length for first alignment, >- after */
+    first_line = 1;
+
+    while (cur_ares_p != NULL && cur_ares_p->nres > 0) {
+
+      /* estimate space for alignment consensus */
+      if (m_msp->aln.showall==1) {
+	maxc = cur_ares_p->nres + max(cur_ares_p->min0,cur_ares_p->min1)+
+	  max((m_msp->n0-cur_ares_p->max0),(n1-cur_ares_p->max1))+4;
+      }
+      else {
+	maxc = cur_ares_p->nres + 4*m_msp->aln.llen+4;
+      }
+
+      /* get space to put the sequence alignment consensus */
+      initseq(&seqc0, &seqc1, &seqca, maxc);
+      cumm_seq_score = NULL;
+      if (m_msp->markx & MX_RES_ALIGN_SCORE) {
+	if ((cumm_seq_score = (int *)calloc(maxc,sizeof(int)))==NULL) {
+	  fprintf(stderr,"***error*** [%s:%d] cannot allocate cumm_seq_score[%d]\n",
+		  __FILE__, __LINE__, (int)(maxc*sizeof(int)));
+	}
+      }
+      if (m_msp->ann_flg && (m_msp->aa0a != NULL || aa1a!=NULL || m_msp->annot_p)) {
+	initseq_ann(&seqc0a, &seqc1a, maxc);
+      }
+      else { seqc0a = seqc1a = NULL;}
+
+      calc_astruct(l_aln_p, cur_ares_p, f_str[bbp->frame]);
+
+      calc_coord(m_msp->n0,bbp->seq->n1,
+		 m_msp->q_offset+(m_msp->q_off-1)+(m_msp->sq0off-1),
+		 loffset+(l_off-1)+(m_msp->sq1off-1),
+		 l_aln_p);
+
+#ifdef LALIGN
+      if ((m_msp->markx & MX_M11OUT) == MX_M11OUT) {	/* lav output - skip lots of stuff */
+	lsw_score = cur_ares_p->sw_score;
+	lzscore = find_z(lsw_score, 0.0, bbp->seq->n1, 0.0, m_msp->pstat_void);
+	lzscore2 = find_z(lsw_score, 0.0, bbp->seq->n1, 0.0, m_msp->pstat_void2);
+	lbits = zs_to_bit(lzscore, m_msp->n0, bbp->seq->n1);
+
+	NULL_dyn_string(annot_var_dyn);
+	NULL_dyn_string(align_code_dyn);
+
+	lc=calc_code(aa0[bbp->frame],m_msp->n0,
+		     aa1,n1, 
+		     l_aln_p, cur_ares_p,
+		     ppst, 
+		     align_code_dyn,
+		     /*	seqc0, maxc, */
+		     m_msp->ann_arr,
+		     m_msp->aa0a, m_msp->annot_p,
+		     aa1a, bbp->seq->annot_p,
+		     annot_var_dyn,
+		     &score_delta,
+		     f_str[bbp->frame],m_msp->pstat_void,m_msp->show_code + SHOW_ANNOT_FULL);
+
+	if (lc > 0) {
+	  percent = (100.0*(float)l_aln_p->nident)/(float)lc;
+	}
+	else { percent = -1.00; }
+
+	fprintf (fp, "a {\n");
+	if (annot_var_dyn->string[0]) {
+	  bp = annot_var_dyn->string;
+	  while ((bp1=strchr(bp, '\n'))) {
+	    *bp1 = '\0';
+	    fprintf (fp, "# %s\n", bp);
+	    *bp1 = '\n';
+	    bp = bp1 + 1;
+	  }
+	}
+	fprintf (fp, "  s %d %.1f\n", lsw_score, lbits);
+	do_lav(fp, l_aln_p, align_code_dyn->string, percent, 0);
+
+	if (ppst->nseq == 1) {
+	  fprintf (fp, "a {\n");
+	  fprintf (fp, "  s %d %.1f\n", lsw_score, lbits);
+	  do_lav(fp, l_aln_p, align_code_dyn->string, percent, 1);
+	}
+
+	cur_ares_p = cur_ares_p->next;
+	continue;
+      }
+#endif		/* ifdef LALIGN */
+
+      NULL_dyn_string(annot_var_dyn);
+      NULL_dyn_string(align_code_dyn);
+
+      nc=calc_cons_a(aa0[bbp->frame],m_msp->n0, aa1, n1,
+		     &lc,l_aln_p, cur_ares_p, ppst, 
+		     seqc0, seqc1, seqca, cumm_seq_score,
+		     m_msp->ann_arr,
+		     m_msp->aa0a, m_msp->annot_p, seqc0a,
+		     aa1a,  bbp->seq->annot_p, seqc1a,
+		     &score_delta,
+		     annot_var_dyn,
+		     f_str[bbp->frame],
+		     m_msp->pstat_void
+		     );
+
+      if (cur_ares_p->score_delta > 0) score_delta -= cur_ares_p->score_delta;
+
+      percent = calc_fpercent_id(100.0, l_aln_p->nident,lc,m_msp->tot_ident, -1.0);
+
+      ngap = l_aln_p->ngap_q + l_aln_p->ngap_l;
+#ifndef SHOWSIM
+      gpercent = calc_fpercent_id(100.0,l_aln_p->nident,lc-ngap,m_msp->tot_ident, -1.0);
+#else
+      gpercent = calc_fpercent_id(100.0,l_aln_p->nsim,lc,m_msp->tot_ident, -1.0);
+#endif
+
+      lsw_score = cur_ares_p->sw_score + score_delta;
+      /* removed 'first_line &&' so that LALIGN shows subject name/description */
+      if (first_line && !(m_msp->markx&MX_M11OUT )) {
+	if ((m_msp->markx & MX_ATYPE)!=7 && !(m_msp->markx & MX_M10FORM)) {
+	  if (m_msp->markx & MX_MBLAST) {
+	    /* provides >>id  description (length) line */
+	    fprintf (fp, fmt,bline_p,annot_var_dyn->string,n1tot);
+	  }
+	  else {
+	    fprintf (fp, fmt,bline_p,n1tot);
+	  }
+	}
+	else if (m_msp->markx & MX_M10FORM) {
+	  if (annot_var_dyn->string[0]) {	/* have annotation with '\n', replace with ';' in copy */
+	    if ((annot_var_s10=(char *)calloc(strlen(annot_var_dyn->string)+1,sizeof(char)))==NULL) {
+	      fprintf(stderr," ***error*** [%s:%d] -m 10 cannot allocate annot_var_s10[%d]\n",
+		      __FILE__,__LINE__,(int)strlen(annot_var_dyn->string));
+	      fprintf (fp,">>%s\n",bline_p);
+	    }
+	    else {
+	      SAFE_STRNCPY(annot_var_s10,annot_var_dyn->string,strlen(annot_var_dyn->string));
+	      bp = annot_var_s10;
+	      while ((bp = strchr(bp+1,'\n'))) {
+		if (bp[-1] != ';') {*bp = ';';}
+		else {*bp = ' ';}
+	      }
+	      fprintf (fp,">>%s;%s\n",bline_p, annot_var_s10);
+	      free(annot_var_s10); annot_var_s10 = NULL;
+	    }
+	  }
+	  else {
+	    fprintf (fp,">>%s\n",bline_p);
+	  }
+	}
+      }
+
+      /* this is required because cur_ares_p can carry scores from an
+	 alignment without -S low-complexity re-scored using low
+	 complexity */
+#ifndef LALIGN
+      if (first_line) {
+	rst_p = &bbp->rst;
+	first_line = 0;
+      }
+      else {
+	rst_p = &cur_ares_p->rst;
+      }
+#else
+      /* ensures that LALIGN alignments do not report 100% match
+	 values */
+      rst_p = &cur_ares_p->rst;
+      first_line = 0;
+#endif
+
+      l_score0 = rst_p->score[ppst->score_ix] + score_delta;
+
+      if (max(strlen(seqc0),strlen(seqc1)) > nc) {
+	fprintf(stderr," mshowalign: nc/maxc: %d/%d seqc0/1: %lu/%lu\n",
+		nc,maxc,strlen(seqc0),strlen(seqc1));
+      }
+
+#ifdef DEBUG
+      /*
+	if (lsw_score < bbp->rst.score[ppst->score_ix]) {
+	fprintf(stderr," *** warning - SW score=%d < opt score=%d ***\n",
+	lsw_score, bbp->rst.score[ppst->score_ix]);
+	}
+      */
+#endif
+      calc_coord(m_msp->n0,bbp->seq->n1,
+		m_msp->q_offset+(m_msp->q_off-1)+(m_msp->sq0off-1),
+		bbp->seq->l_offset+(bbp->seq->l_off-1)+(m_msp->sq1off-1),
+		l_aln_p);
+
+      lzscore = find_z(l_score0, rst_p->escore, bbp->seq->n1, rst_p->comp, m_msp->pstat_void);
+      if (ppst->zsflag > 20) {
+	lzscore2 = find_z(l_score0, rst_p->escore, bbp->seq->n1, rst_p->comp, m_msp->pstat_void2);
+      }
+      lbits = zs_to_bit(lzscore, m_msp->n0, bbp->seq->n1);
+
+      if (m_msp->markx & MX_MBLAST) {
+	fprintf(fp, "\n Score = %.1f bits (%d),  Expect = %.1g\n",
+		lbits, rst_p->score[ppst->score_ix] + score_delta,
+		zs_to_E(lzscore, bbp->seq->n1, ppst->dnaseq, ppst->zdb_size, m_msp->db));
+
+	fprintf(fp, " Identities = %d/%d (%d%%)", l_aln_p->nident, lc-ngap,
+		(int)((100.0*(float)l_aln_p->nident+0.5)/(float)(lc-ngap)));
+
+	if (!disp_dna_align) {
+	  fprintf(fp, ", Positives = %d/%d (%d%%)", l_aln_p->npos, lc-ngap, 
+		  (int)((100.0*(float)l_aln_p->npos+0.5)/(float)(lc-ngap)));
+	}
+
+	fprintf(fp, ", Gaps = %d/%d (%d%%)\n",ngap, lc-ngap,
+		(int)((100.0*(float)(l_aln_p->ngap_q+l_aln_p->ngap_l)+0.5)/(float)(lc-ngap)));
+
+	if (disp_dna_align) {
+	  if (m_msp->qframe > 1 && bbp->frame > 0) {
+	    fprintf (fp, " Strand=Minus/Plus\n");
+	  }
+	  else {
+	    fprintf (fp, " Strand=Plus/Plus\n");
+	  }
+	}
+	else {
+	  if (m_msp->nframe > 2) {
+	    fprintf (fp, " Frame= %d\n",bbp->frame+1);
+	  }
+	  else if (m_msp->nframe > 1) {
+	    fprintf (fp, " Frame = %s\n",(bbp->frame>0 ? "Reverse" : "Forward"));
+	  }
+	  else if (m_msp->qframe > 1) {
+	    fprintf (fp, " Frame = %s\n",(bbp->frame>0 ? "Reverse" : "Forward"));
+	  }
+	}
+      }
+      else if ((m_msp->markx & MX_ATYPE)!=7 && !(m_msp->markx & MX_M10FORM)) {
+	if (annot_var_dyn->string[0]) {
+	  if (m_msp->markx & MX_HTML) {
+	    fprintf(fp,"<!-- ANNOT_START \"%s\" -->",link_name);}
+	  /* ensure that last character is "\n" */
+	  if (annot_var_dyn->string[strlen(annot_var_dyn->string)-1] != '\n') {
+	    annot_var_dyn->string[strlen(annot_var_dyn->string)-1] = '\n';
+	  }
+	  fputs(annot_var_dyn->string, fp);
+	  if (m_msp->markx & MX_HTML) {fputs("<!-- ANNOT_STOP -->",fp);}
+	}
+
+	/* this code makes sense for library searches, but not for
+	   multiple non-intersecting alignments */
+
+#ifndef LALIGN
+	if (m_msp->nframe > 2) 
+	  fprintf (fp, "Frame: %d",bbp->frame+1);
+	else if (m_msp->nframe > 1) 
+	  fprintf (fp, "Frame: %c",(bbp->frame? 'r': 'f'));
+	else if (m_msp->qframe >= 0 && bbp->frame > 0 ) {
+	  fputs("rev-comp",fp);
+	  name0[nml-1]='\0';
+	  if (!(m_msp->markx & MX_MBLAST)) SAFE_STRNCAT(name0,"-",sizeof(name0)-1);
+	}
+
+	if (m_msp->arelv > 0)
+	  fprintf (fp, " %s: %3d", m_msp->alab[0],rst_p->score[0] + score_delta);
+	if (m_msp->arelv > 1)
+	  fprintf (fp, " %s: %3d", m_msp->alab[1],rst_p->score[1] + score_delta);
+	if (m_msp->arelv > 2)
+	  fprintf (fp, " %s: %3d", m_msp->alab[2],rst_p->score[2] + score_delta);
+	fprintf (fp,"%s",info_str);
+	if (ppst->zsflag>=0) {
+	  fprintf (fp, "  Z-score: %4.1f  bits: %3.1f %sE(%ld): %4.2g%s", 
+		   lzscore, lbits, 
+		   html_pre_E, ppst->zdb_size,
+		   zs_to_E(lzscore, bbp->seq->n1, ppst->dnaseq, ppst->zdb_size, m_msp->db),
+		   html_post_E);
+	  if (ppst->zsflag > 20) {
+	    fprintf(fp," E2(): %4.2g",zs_to_E(lzscore2, bbp->seq->n1, ppst->dnaseq, ppst->zdb_size, m_msp->db));
+	  }
+	}
+	fprintf (fp, "\n");
+
+#else /* LALIGN */
+	if ((m_msp->markx & MX_M11OUT) == 0) {
+	  fprintf (fp, " %s score: %d; ", m_msp->alabel, lsw_score);
+	  fprintf (fp," %3.1f bits; E(%ld) <  %.2g\n", lbits, ppst->zdb_size,
+		   zs_to_E(lzscore, bbp->seq->n1, ppst->dnaseq, ppst->zdb_size, m_msp->db));
+	}
+#endif
+      }
+      else if (m_msp->markx & MX_M10FORM) {
+#ifndef LALIGN
+	if (m_msp->qframe > -1) {
+	  if (m_msp->nframe > 2) {
+	    fprintf(fp,"; %s_frame: %d\n",m_msp->f_id0,bbp->frame+1);
+	  }
+	  else {
+	    fprintf(fp,"; %s_frame: %c\n",m_msp->f_id0,(bbp->frame > 0? 'r':'f'));
+	  }
+	}
+	fprintf (fp, "; %s_%s: %3d\n", m_msp->f_id0,m_msp->alab[0],cur_ares_p->rst.score[0]+score_delta);
+	if (m_msp->arelv > 1)
+	  fprintf (fp,"; %s_%s: %3d\n", m_msp->f_id0,m_msp->alab[1],cur_ares_p->rst.score[1]+score_delta);
+	if (m_msp->arelv > 2)
+	  fprintf (fp,"; %s_%s: %3d\n", m_msp->f_id0,m_msp->alab[2],cur_ares_p->rst.score[2]+score_delta);
+	if (info_str[0]) fprintf(fp,"; %s_info: %s\n",m_msp->f_id0,info_str);
+	if (ppst->zsflag>=0) 
+	  fprintf (fp,"; %s_z-score: %4.1f\n; %s_bits: %3.1f\n; %s_expect: %6.2g\n",
+		   m_msp->f_id0,bbp->zscore,
+		   m_msp->f_id0,zs_to_bit(bbp->zscore, m_msp->n0, bbp->seq->n1),
+		   m_msp->f_id0,bbp->rst.escore);
+#else
+	if ((m_msp->markx & MX_M11OUT) == 0) {
+	  fprintf (fp,"; %s_%s: %d\n", m_msp->f_id0, m_msp->alab[0], lsw_score);
+	  fprintf (fp,"; %s_z-score: %4.1f\n; %s_bits: %3.1f\n; %s_expect: %6.2g\n",
+		   m_msp->f_id0, lzscore, m_msp->f_id0, lbits, m_msp->f_id0, 
+		   zs_to_E(lzscore, bbp->seq->n1, ppst->dnaseq, ppst->zdb_size, m_msp->db));
+	}
+#endif	
+      }
+
+      do_show(fp, m_msp->n0, bbp->seq->n1, lsw_score, name0, name1, nml,
+	      link_name, 
+	      m_msp, ppst, seqc0, seqc0a, seqc1, seqc1a, seqca, cumm_seq_score,
+	      nc, percent, gpercent, lc, l_aln_p, annot_var_dyn->string,
+	      m_msp->annot_p, bbp->seq->annot_p);
+
+      /* display the encoded alignment left over from showbest()*/
+
+      if ((m_msp->markx & MX_M10FORM) &&
+	  (m_msp->markx & MX_M9SUMM) && 
+	  ((m_msp->show_code & SHOW_CODE_ALIGN) == SHOW_CODE_ALIGN)) {
+
+	seq_code = cur_ares_p->aln_code;
+	seq_code_len = cur_ares_p->aln_code_n;
+	annot_code = cur_ares_p->annot_code;
+	annot_code_len = cur_ares_p->annot_code_n;
+
+	if (seq_code_len > 0 && seq_code != NULL) {
+	  fprintf(fp,"; al_code: %s\n",seq_code);
+	  /* 	  free(seq_code);  -- this is now freed in comp_lib2.c */
+	  if (annot_code_len > 0 && annot_code != NULL) {
+	    fprintf(fp,"; al_code_ann: %s\n",annot_code);
+	    /* 	    free(ann_code);  -- this is now freed in comp_lib2.c */
+	  }
+	}
+      }
+
+      if (m_msp->markx & MX_HTML) fprintf(fp,"</pre><hr />");
+      fflush(fp);
+
+      freeseq(&seqc0,&seqc1, &seqca);
+      freeseq_ann(&seqc0a, &seqc1a);
+
+      cur_ares_p = cur_ares_p->next;
+
+      if (cur_ares_p != NULL) {
+	if (m_msp->markx & MX_HTML) {
+	  sprintf(link_name,"%s_%d",l_name, cur_ares_p->index);
+	  fprintf (fp,"<a name=\"%s\"><pre>",link_name);
+	}
+	else {
+	  if (!(m_msp->markx & MX_MBLAST)) fprintf(fp,">--\n");
+	}
+      }		/* done finishing up  */
+    }		/* while (cur_ares_p) */
+    /* we are done displaying the alignment - be sure to free a_res memory */
+  }
+
+  free_dyn_string(annot_var_dyn);
+
+  if (!(m_msp->markx & (MX_M8OUT+MX_HTML))) fprintf(fp,"\n");
+}
+
+void do_show(FILE *fp, int n0,int n1, int score,
+	     char *name0, char *name1, int nml, char *link_name,
+	     const struct mngmsg *m_msp, const struct pstruct *ppst,
+	     char *seqc0, char *seqc0a,  char *seqc1, char *seqc1a,
+	     char *seqca, int *cumm_seq_score, int nc,
+	     float percent, float gpercent, int lc,
+	     struct a_struct *aln, const char *annot_var_s,
+	     const struct annot_str * q_annot_p,
+	     const struct annot_str * l_annot_p)
+{
+  int tmp;
+
+  if (m_msp->markx & MX_AMAP && (m_msp->markx & MX_ATYPE)==7)
+    disgraph(fp, n0, n1, percent, score,
+	     aln->amin0, aln->amin1, aln->amax0, aln->amax1, m_msp->sq0off,
+	     name0, name1, nml, aln->llen, m_msp->markx);
+  else if (m_msp->markx & MX_M10FORM) {
+    if (ppst->sw_flag && m_msp->arelv>0)
+      fprintf(fp,"; %s_score: %d\n",m_msp->f_id1,score);
+    fprintf(fp,"; %s_ident: %5.3f\n",m_msp->f_id1,percent/100.0);
+#ifndef SHOWSIM
+    fprintf(fp,"; %s_gident: %5.3f\n",m_msp->f_id1,gpercent/100.0);
+#else
+    fprintf(fp,"; %s_sim: %5.3f\n",m_msp->f_id1,gpercent/100.0);
+#endif
+
+    fprintf(fp,"; %s_overlap: %d\n",m_msp->f_id1,lc);
+    discons(fp, m_msp,
+	    seqc0, seqc0a, seqc1, seqc1a, seqca, cumm_seq_score, nc,
+	    n0, n1, name0, name1, nml, aln);
+  }
+  else  {
+    if (!(m_msp->markx & MX_MBLAST)) {
+#ifndef LALIGN
+      fprintf(fp,"%s score: %d; ",m_msp->alabel, score);
+#endif
+#ifndef SHOWSIM
+      fprintf(fp,"%4.1f%% identity (%4.1f%% ungapped) in %d %s overlap (%ld-%ld:%ld-%ld)\n",
+	      percent,gpercent,lc,m_msp->sqnam,aln->d_start0,aln->d_stop0,
+	      aln->d_start1,aln->d_stop1);
+#else
+      fprintf(fp,"%4.1f%% identity (%4.1f%% similar) in %d %s overlap (%ld-%ld:%ld-%ld)\n",
+	      percent,gpercent,lc,m_msp->sqnam,aln->d_start0,aln->d_stop0,
+	      aln->d_start1,aln->d_stop1);
+#endif
+    }
+
+    if (m_msp->markx & MX_HTML) {
+      do_url1(fp, m_msp, ppst, link_name,n1, aln,
+	      annot_var_s, q_annot_p, l_annot_p);
+    }
+
+    if (m_msp->markx & MX_AMAP && (m_msp->markx & MX_ATYPE)!=7) {
+      fputc('\n',fp);
+      tmp = n0;
+
+      if (m_msp->qdnaseq == SEQT_DNA && m_msp->ldb_info.ldnaseq== SEQT_PROT)
+	tmp /= 3;
+
+      disgraph(fp, tmp, n1, percent, score,
+	       aln->amin0, aln->amin1,
+	       aln->amax0, aln->amax1,
+	       m_msp->sq0off,
+	       name0, name1, nml, aln->llen,m_msp->markx);
+    }
+
+    if (m_msp->markx & MX_HTML) {
+      fprintf(fp, "<!-- ALIGN_START \"%s\" -->",link_name);
+    }
+    discons(fp, m_msp,
+	    seqc0, seqc0a, seqc1, seqc1a, seqca, cumm_seq_score, nc,
+	    n0, n1, name0, name1, nml, aln);
+    if (m_msp->markx & MX_HTML) {fputs("<!-- ALIGN_STOP -->",fp);}
+    fputc('\n',fp);
+
+  }
+}
+
+void 
+do_lav(FILE *fp, struct a_struct *aln_p, char *seqc,
+       float percent, int is_mirror) {
+  int cur_b0, cur_b1, cur_e0, cur_e1;
+  int ipercent;
+  long len;
+  char *seqc_p, *num_e;
+
+  ipercent = (int)(percent+0.5);
+
+  cur_b0 = aln_p->d_start0;
+  cur_b1 = aln_p->d_start1;
+  cur_e0 = aln_p->d_stop0;
+  cur_e1 = aln_p->d_stop1;
+
+  if (!is_mirror) {
+    fprintf (fp, "  b %d %d\n  e %d %d\n", 
+	     cur_b0, cur_b1, cur_e0, cur_e1);
+  }
+  else  {
+    fprintf (fp, "  b %d %d\n  e %d %d\n", 
+	     cur_b1, cur_b0, cur_e1, cur_e0);
+  }
+
+  seqc_p = seqc;
+  
+  while (*seqc_p) {
+    if (*seqc_p == '=') {	/* extend match in both sequences */
+      len = strtol(seqc_p+1, &num_e, 10);
+      cur_e0 = cur_b0 + len - 1;
+      cur_e1 = cur_b1 + len - 1;
+      if (!is_mirror) {
+	fprintf(fp, "  l %d %d %d %d %d\n",
+		cur_b0, cur_b1, cur_e0, cur_e1,
+		ipercent);
+      }
+      else  {
+	fprintf(fp, "  l %d %d %d %d %d\n",
+		cur_b1, cur_b0, cur_e1, cur_e0,
+		ipercent);
+      }
+      cur_b0 = cur_e0 + 1;
+      cur_b1 = cur_e1 + 1;
+    }
+    else if (*seqc_p == '+') {	/* extend insertion in seq0  by incrementing seq1 */
+      len = strtol(seqc_p+1, &num_e, 10);
+      cur_b1 += len;
+    }
+    else {			/* extend insertion in seq1 by incrementing seq0 */
+      len = strtol(seqc_p+1, &num_e, 10);
+      cur_b0 += len;
+    }
+    seqc_p = num_e;
+  }
+
+  fprintf (fp, "}\n");
+}
+
+void	/* initialize consensus arrays */
+initseq(char **seqc0, char **seqc1, char **seqca, int seqsiz)
+{
+  *seqc0=(char *)calloc((size_t)seqsiz*3,sizeof(char));
+  if (*seqc0==NULL)
+    {fprintf(stderr,"cannot allocate consensus arrays %d\n",seqsiz);
+     exit(1);}
+  *seqc1=*seqc0 + seqsiz;
+  *seqca=*seqc1 + seqsiz;
+}
+
+void freeseq(char **seqc0, char **seqc1, char **seqca)
+{
+  free(*seqc0);
+  *seqc0 = *seqc1 = *seqca = NULL;
+}
+
+void	/* initialize consensus annotation arrays */
+initseq_ann(char **seqc0a, char **seqc1a, int seqsiz)
+{
+  *seqc0a=(char *)calloc((size_t)seqsiz*5,sizeof(char));
+  if (*seqc0a==NULL)
+    {fprintf(stderr,"cannot allocate consensus arrays %d\n",seqsiz);
+     exit(1);}
+  *seqc1a=*seqc0a + seqsiz;
+}
+
+void freeseq_ann(char **seqc0a, char **seqc1a)
+{
+  if (*seqc0a != NULL) {
+    free(*seqc0a);
+    *seqc0a = *seqc1a = NULL;
+  }
+}
+
+#include <math.h>
+/* calculates percentages, optionally ensuring that 100% is completely
+   identical*/
+float calc_fpercent_id(float scale, int n_ident, int n_alen, int tot_ident, float fail) {
+  float f_id, f_decimal;
+  int n_sig;
+
+  if (n_alen <= 0) { return fail;}
+
+  /*
+  n_sig = 3;
+  n_sig = tot_ident;
+  if (tot_ident==1) { n_sig = 3;}
+  */
+
+  f_id = (float)n_ident/(float)n_alen;
+
+  if (tot_ident && n_ident != n_alen) {
+    f_decimal = 0.999;
+    /*
+    if (n_sig == 4) {f_decimal = 0.9999;}
+    else if (n_sig == 3) { f_decimal = 0.999;}
+    else if (n_sig == 5) { f_decimal = 0.99999;}
+    else {f_decimal = 1.0 - powf(0.1, n_sig);}
+    */
+    if (f_id > f_decimal) f_id = f_decimal;
+  }
+
+  return scale*f_id;
+}
diff --git a/src/mshowbest.c b/src/mshowbest.c
new file mode 100644
index 0000000..18cf4bf
--- /dev/null
+++ b/src/mshowbest.c
@@ -0,0 +1,665 @@
+/* $Id: mshowbest.c 1281 2014-08-21 17:32:06Z wrp $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and
+   The Rector and Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License"); you
+   may not use this file except in compliance with the License.  You
+   may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* 2-April-2009 changes to simplify interactive display logic.  Coming
+   into showbest(), things are interactive (quiet==0) or use
+   m_msg.nshow */
+
+/*   29-Oct-2003 - changes so that bbp->mseq->cont < 0 => aa1 sequence is
+     already in aa1, no re_openlib or re_getlib required
+*/
+
+/*   14-May-2003 Changes to use a more consistent coordinate numbering
+     system for displays.  aln->d_start[01] is now consistently used
+     to report the start of the alignment in all functions, and
+     mshowbest.c has been modified to use d_start[01] instead of
+     d_start[01]-1.  aln->min[01] now starts at 0 for all functions;
+     instead of 1 for some functions (dropnfa.c, dropgsw.c, dropfs2.c
+     earlier).
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+#include "structs.h"
+#include "param.h"
+#include "mm_file.h"
+#include "best_stats.h"
+
+#define MAX_BLINE 256
+
+/* function calls necessary to re_getlib() the sequence and, do
+   alignments, if necessary
+*/
+
+#define RANLIB (m_fptr->ranlib)
+
+
+int
+re_getlib(unsigned char *, struct annot_str **, 
+	  int, int, int, int, int, long *, long *, 
+	  struct lmf_str *m_fptr);
+
+#include "drop_func.h"
+
+void
+s_annot_to_aa1a(int n1, struct annot_str *annot_p, unsigned char *ann_arr);
+
+struct a_res_str *
+build_ares_code(unsigned char *aa0, int n0,
+		unsigned char *aa1, struct seq_record *seq,
+		int frame, int *have_ares, int repeat_thresh, 
+		struct mngmsg *m_msp, struct pstruct *ppst,
+		void *f_str
+		);
+
+struct lmf_str *re_openlib(struct lmf_str *, int outtty);
+
+extern void calc_coord(int n0, int n1, long qoffset, long loffset,
+		      struct a_struct *aln);
+
+extern float
+calc_fpercent_id(float scale, int n_ident, int n_alen, int tot_ident, float fail);
+
+extern int
+get_annot(char *sname, struct mngmsg *m_msp, char *bline, int n1, struct annot_str **annot_p,
+ 	  int target, int debug);
+extern double find_z(int score, double escore, int length, double comp,void *);
+extern double zs_to_E(double zs, int n1, int dnaseq, long db_size, struct db_str db);
+extern double zs_to_bit(double, int, int);
+extern int E1_to_s(double e_val, int n0, int n1, int db_size, void *pu);
+
+void header_aux(FILE *);
+void show_aux(FILE *, struct beststr *);
+void w_abort (char *p, char *p1);
+
+extern double zs_to_bit(double, int, int);
+
+/* showbest() shows a list of high scoring sequence descriptions, and
+   their rst.scores.  If -m 9, then an additional complete set of
+   alignment information is provided.
+
+   If PCOMPLIB or m_msg.quiet then the number of high scores to be
+   shown is pre-determined by m_msg.mshow before showbest is called.
+
+   The comp_lib2.c version re_getlib()'s the sequence for its
+   discription, and then does another alignment for -m 9 (Thus, it
+   needs an f_str.  The PCOMPLIB version has everything available in
+   beststr before showbest() is called.
+*/
+
+void showbest (FILE *fp, unsigned char **aa0, unsigned char *aa1save, int maxn,
+	       struct beststr **bptr,int nbest,
+	       int qlib, struct mngmsg *m_msp,
+	       struct pstruct *ppst, struct db_str db,
+	       char **info_gstring2
+	       ,void **f_str
+)
+{
+  unsigned char *aa1;
+  int best_align_done = 0;
+  int ntmp = 0;
+  char bline[MAX_BLINE], fmt[40], pad[MAX_BLINE], fmt2[40], rline[40];
+  char l_name[128], link_name[140];
+  int istart = 0, istop, ib;
+  int nshow;		/* number of sequences shown before prompt,
+			   and ultimately displayed */
+  int first_line, link_shown;
+  int quiet;
+  int r_margin;
+  struct beststr *bbp;
+  int n1tot;
+  char *bp, *bline_p;
+  char rel_label[12];
+  char score_label[120];
+  char tmp_str[20], *seq_code, *annot_str;
+  int seq_code_len, annot_str_len;
+  long loffset;		/* loffset is offset from beginning of real sequence */
+  long l_off;		/* l_off is the the virtual coordinate of residue 1 */
+  int n1, ranlib_done;
+  struct rstruct rst;
+  int l_score0, ngap;
+  double lzscore, lzscore2, lbits;
+  float percent, gpercent, ng_percent;
+  struct a_struct *aln_p;
+  struct a_res_str *cur_ares_p;
+  struct rstruct *rst_p;
+  int gi_num;
+  char html_pre_E[120], html_post_E[120];
+  int have_lalign = 0;
+
+  struct lmf_str *m_fptr;
+
+  /* for lalign alignments, only show stuff when -m != 11 */
+
+  if (m_msp->markx & MX_M11OUT) return;
+  if (strcmp(m_msp->label,"ls-w")==0) {
+    have_lalign = 1;
+    if ((m_msp->markx & MX_M9SUMM) == 0) return;
+  }
+
+  rel_label[0]='\0';
+  SAFE_STRNCPY(score_label,"scores", sizeof(score_label));
+
+  quiet = m_msp->quiet;
+
+  if (m_msp->aln.llen > MAX_BLINE) m_msp->aln.llen = MAX_BLINE;
+
+  if (ppst->zsflag < 0) r_margin = 10;
+  else if (ppst->zsflag>=0  && m_msp->srelv > 1 ) r_margin = 19;
+  else r_margin = 10;
+
+  if (m_msp->markx & MX_M9SUMM && (m_msp->show_code == SHOW_CODE_ID || m_msp->show_code == SHOW_CODE_IDD)) {
+#ifdef SHOWSIM
+    r_margin += 15;
+#else
+    r_margin += 10;
+#endif
+  }
+  else if (m_msp->markx & MX_MBLAST2) {
+    r_margin -= 10;
+  }
+  else if (m_msp->markx & (MX_M9SUMM + MX_M8OUT)) {
+    r_margin = 0;
+  }
+
+  if (m_msp->markx & MX_HTML) {
+    strncpy(html_pre_E,"<font color=\"darkred\">",sizeof(html_pre_E));
+    strncpy(html_post_E,"</font>",sizeof(html_post_E));
+
+  }
+  else {
+    html_pre_E[0] = html_post_E[0] = '\0';
+  }
+
+  if (m_msp->nframe < 0) {
+    sprintf(fmt,"%%-%ds (%%4d)",m_msp->aln.llen-r_margin);
+  }
+  else {
+    sprintf(fmt,"%%-%ds (%%4d)",m_msp->aln.llen-(r_margin+4));
+  }
+  sprintf(fmt2,"%%-%ds",m_msp->aln.llen-r_margin+8);
+
+  memset(pad,' ',m_msp->aln.llen-(r_margin+6));
+  pad[m_msp->aln.llen-(r_margin+12)]='\0';
+  if (have_lalign) {
+    if (ppst->show_ident) {
+      SAFE_STRNCPY(score_label,"alignments", sizeof(score_label));
+      pad[m_msp->aln.llen-(r_margin+16)]='\0';
+    }
+    else {
+      SAFE_STRNCPY(score_label,"non-identical alignments", sizeof(score_label));
+      pad[m_msp->aln.llen-(r_margin+30)]='\0';
+    }
+  }
+
+  nshow = min(m_msp->nshow,nbest);
+
+  if ((bp = strchr (m_msp->qtitle, '\n')) != NULL) *bp = '\0';
+  if (m_msp->markx & MX_M8OUT) {
+    if ((bp = strchr (m_msp->qtitle, ' ')) != NULL) *bp = '\0';
+  }
+
+/*   fprintf (fp, "%3d %s\n", qlib,m_msp->qtitle); */
+
+  if (m_msp->markx & MX_HTML) fprintf(fp,"<pre>");
+
+  /* **************************************************************** */
+  /* done with display format */
+  /* **************************************************************** */
+
+  /* **************************************************************** */
+  /* prompt for number of best scores if quiet == 0 */
+  /* **************************************************************** */
+
+  if (quiet == 0) {	/* interactive */
+    nshow = min(m_msp->nshow, nbest);
+    printf(" How many scores would you like to see? [%d] ",nshow);
+    fflush(stdout);
+    if (fgets(rline,20,stdin)==NULL) exit(0);
+    if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&nshow);
+    if (nshow > nbest) nshow=nbest;
+    if (nshow<=0) nshow = min(20,nbest);
+  }
+
+  /* display number of hits for -m 8C (Blast Tab-commented format) */
+  if (m_msp->markx & MX_M8COMMENT) {
+    /* line below copied from BLAST+ output */
+    fprintf(fp,"# Fields: query id, subject id, %% identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score");
+    if (ppst->zsflag > 20) {fprintf(fp,", eval2");}
+    if (m_msp->show_code & (SHOW_CODE_ALIGN+SHOW_CODE_CIGAR)) { fprintf(fp,", aln_code");}
+    else if ((m_msp->show_code & SHOW_CODE_BTOP)==SHOW_CODE_BTOP) { fprintf(fp,", BTOP");}
+
+    fprintf(fp,"\n");
+    fprintf(fp,"# %d hits found\n",nshow);
+  }
+
+  /* **************************************************************** */
+  /* have number of scores in interactive or quiet mode */
+  /* display "The best scores are" */
+  /* **************************************************************** */
+
+  if (m_msp->markx & MX_MBLAST2) {
+    fprintf(fp, "%81s\n"," Score     E");
+    fprintf(fp, "Sequences producing significant alignments:                          (Bits)  Value\n\n");
+  }
+  else if (!(m_msp->markx & MX_M8OUT)) {
+    if (ppst->zsflag >= 0) {
+      if (m_msp->z_bits==1) {/* show bit score */
+	fprintf(fp,"\nThe best%s %s are:%s%s bits %sE(%ld)%s",
+		rel_label,score_label,pad,m_msp->label,html_pre_E,ppst->zdb_size,html_post_E);
+	if (ppst->zsflag > 20) {
+	  fprintf(fp," E2()");
+	}
+      }
+      else {/* show z-score */
+	fprintf(fp,"\nThe best%s %s are:%s%s z-sc %sE(%ld)%s",
+		rel_label,score_label,pad,m_msp->label,html_pre_E,ppst->zdb_size,html_post_E);
+	if (ppst->zsflag > 20) {
+	  fprintf(fp," E2()");
+	}
+      }
+      header_aux(fp);
+      if (m_msp->markx & MX_M9SUMM) {
+	if (m_msp->show_code == SHOW_CODE_ID || m_msp->show_code == SHOW_CODE_IDD) {
+#ifdef SHOWSIM
+	  fprintf(fp," %%_id  %%_sim  alen");
+#else
+	  fprintf(fp," %%_id  alen");
+#endif
+	}
+	else {
+	  if (m_msp->markx & MX_HTML && m_msp->show_code != SHOW_CODE_ID && m_msp->show_code != SHOW_CODE_IDD) { fprintf(fp,"<!-- ");}
+#ifndef SHOWSIM
+	  fprintf(fp,"\t%%_id  %%_gid %4s  alen  an0  ax0  pn0  px0  an1  ax1 pn1 px1 gapq gapl  fs ",m_msp->f_id1);
+#else
+	  fprintf(fp,"\t%%_id  %%_sim %4s  alen  an0  ax0  pn0  px0  an1  ax1 pn1 px1 gapq gapl  fs ",m_msp->f_id1);
+#endif
+	}
+	if (m_msp->show_code & (SHOW_CODE_ALIGN+SHOW_CODE_CIGAR)) { fprintf(fp," aln_code"); }
+	if (m_msp->markx & MX_HTML && m_msp->show_code != SHOW_CODE_ID && m_msp->show_code != SHOW_CODE_IDD) { fprintf(fp," -->");}
+      }
+      fprintf(fp,"\n");
+    }
+    else {
+      fprintf(fp,"\nThe best%s %s are:%s%s",rel_label,score_label,pad,m_msp->label);
+      header_aux(fp);
+      if (m_msp->markx & MX_M9SUMM) {
+	if (m_msp->show_code == SHOW_CODE_ID || m_msp->show_code == SHOW_CODE_IDD) {
+#ifdef SHOWSIM
+	  fprintf(fp," %%_id  %%_sm  alen");
+#else
+	  fprintf(fp," %%_id  alen");
+#endif
+	}
+	else {
+#ifndef SHOWSIM
+	  fprintf(fp,"\t%%_id  %%_gid %4s  alen  an0  ax0  pn0  px0  an1  ax1 pn1 px1 gapq gapl  fs ",m_msp->f_id1);
+#else
+	  fprintf(fp,"\t%%_id  %%_sim %4s  alen  an0  ax0  pn0  px0  an1  ax1 pn1 px1 gapq gapl  fs ",m_msp->f_id1);
+#endif	/* SHOWSIM */
+	}
+      }
+      if (m_msp->show_code & (SHOW_CODE_ALIGN+SHOW_CODE_CIGAR)) { fprintf(fp," aln_code"); }
+      fprintf(fp,"\n");
+    }
+  }	/* !(m_msp->markx & MX_M8OUT) */
+
+  istart = 0;
+l1:
+  istop = min(nshow, nbest);
+
+  for (ib=istart; ib<istop; ib++) {
+    bbp = bptr[ib];
+    if (ppst->do_rep) {
+      bbp->repeat_thresh = 
+	min(E1_to_s(ppst->e_cut_r, m_msp->n0, bbp->seq->n1,ppst->zdb_size, m_msp->pstat_void),
+	    bbp->rst.score[ppst->score_ix]);
+    }
+
+#ifdef DEBUG
+    if (bbp->seq->n1 != bbp->n1 ) {
+      fprintf(stderr, " *** lib len error [%d!=%d] *** %s score %d\n",
+	      bbp->seq->n1,bbp->n1, bbp->mseq->libstr, bbp->rst.score[0]);
+    }
+#endif
+
+    /* this gets us a valid bline[] and the library for searching if necessary
+       do not read if we have a long enough bline or we don't need a sequence 
+    */
+    if (bbp->mseq->bline != NULL && bbp->mseq->bline_max >= m_msp->aln.llen) {
+      ranlib_done = 0;
+
+      /* copy m_msp->aln.llen, not llen-r_margin, because the r_margin
+	 will be set later, possibly after the gi|12345 is removed */
+      strncpy(bline,bbp->mseq->bline,m_msp->aln.llen);
+      bline[m_msp->aln.llen]='\0';
+    }
+    else {
+      if ((m_fptr=re_openlib(bbp->mseq->m_file_p,!m_msp->quiet))==NULL) {
+	fprintf(stderr,"*** cannot re-open %s\n",bbp->mseq->m_file_p->lb_name);
+	exit(1);
+      }
+      RANLIB(bline,m_msp->aln.llen,bbp->mseq->lseek,bbp->mseq->libstr,m_fptr);
+      ranlib_done = 1;
+    }
+
+    /* get a valid cur_ares_p chain and put it in bbp->ares */
+    if (!m_msp->align_done && (m_msp->stages>1 || (m_msp->markx & MX_M9SUMM))) {	/* we need a sequence */
+      if (bbp->seq->aa1b == NULL || (m_msp->ann_flg==1 && bbp->seq->annot_p==NULL)) {
+	if (!ranlib_done) {	/* we didn't open the library already */
+	  if ((m_fptr=re_openlib(bbp->mseq->m_file_p,!m_msp->quiet))==NULL) {
+	    fprintf(stderr,"*** cannot re-open %s\n",bbp->mseq->m_file_p->lb_name);
+	    exit(1);
+	  }
+	  RANLIB(bline,m_msp->aln.llen,bbp->mseq->lseek,bbp->mseq->libstr,m_fptr);
+	  ranlib_done = 1;
+	}
+	n1 = re_getlib(aa1save,
+		       (m_msp->ann_flg==1) ? &(bbp->seq->annot_p) : NULL, 
+		       maxn,m_msp->ldb_info.maxt3,
+		       m_msp->ldb_info.l_overlap,bbp->mseq->cont,m_msp->ldb_info.term_code,
+		       &bbp->seq->l_offset,&bbp->seq->l_off,bbp->mseq->m_file_p);
+
+	aa1 = aa1save;
+
+	if (m_msp->ann_flg==2 && bbp->seq->annot_p==NULL ) {
+	  /* get information about this sequence from bline */
+	  if (get_annot(m_msp->annot1_sname, m_msp, bline, bbp->seq->n1, &(bbp->seq->annot_p), 1, ppst->debug_lib) > 0) {
+	    /* do something with annotation */
+	    s_annot_to_aa1a(bbp->n1, bbp->seq->annot_p, m_msp->ann_arr);
+	  }
+	}
+      }
+      else {
+	n1 = bbp->seq->n1;
+	aa1 = bbp->seq->aa1b;
+      }
+
+      if (n1 != bbp->n1) {
+	fprintf(stderr," *** sequence length conflict %d != %d: %s\n", n1, bbp->n1, bline);
+	continue;
+      }
+
+      if ( m_msp->stages > 1 && bbp->rst.score[2] == -BIGNUM) { 
+	/* this is not typically done unless m_msp->stages > 1 */
+	do_opt (aa0[bbp->frame], m_msp->n0, aa1, n1, bbp->frame, ppst, f_str[bbp->frame], &rst);
+	bbp->rst.score[2]=rst.score[2];
+      }
+
+      if (!bbp->have_ares & 0x1) {
+	bbp->a_res = build_ares_code(aa0[bbp->frame], m_msp->n0, aa1, bbp->seq,
+				     bbp->frame, &bbp->have_ares,
+				     bbp->repeat_thresh, m_msp, ppst, f_str[bbp->frame] );
+	best_align_done = 1;
+      }
+    }	/* end stages > 1 || MX_M9SUMM9 */
+
+    n1tot = (bbp->mseq->n1tot_p) ? *bbp->mseq->n1tot_p : bbp->seq->n1;
+
+    bline_p = bline;
+    if (!(m_msp->markx & (MX_M8OUT)) && !strncmp(bline,"gi|",3)) {
+      bline_p = strchr(bline+4,'|')+1;
+      *(bline_p-1) = 0;
+      gi_num = atoi(bline+3);
+    }
+
+  /* l_name is used to build an HTML link from the bestscore line to
+     the alignment.  It can also be used to discriminate multiple hits
+     from the same long sequence.  This requires that fast_pan use -m 6.
+
+     (6-April-2013) Add ability to specify additional alignments with
+     link_name;
+  */
+
+    SAFE_STRNCPY(l_name,bline_p,sizeof(l_name)); /* get rid of text after second "|" */
+    if ((bp=strchr(l_name,' '))!=NULL) *bp=0;
+    if ((bp=strchr(&l_name[6],'|'))!=NULL) *bp='\0'; 	/* increase to [6] from [3] to allow longer db names "ref", "unk", */
+    if (m_msp->nframe > 2) sprintf(&l_name[strlen(l_name)],"_%d",bbp->frame+1);
+    else if (m_msp->nframe > 0 && bbp->frame == 1)
+      SAFE_STRNCAT(l_name,"_r",sizeof(l_name));
+    if (bbp->mseq->cont-1 > 0) {
+      sprintf(tmp_str,":%d",bbp->mseq->cont-1);
+      SAFE_STRNCAT(l_name,tmp_str,sizeof(l_name));
+    }
+
+    if (m_msp->markx & MX_M8OUT) {
+      if ((bp=strchr(bline_p,' '))!=NULL) *bp = '\0';
+    }
+    else {
+      bline_p[m_msp->aln.llen-r_margin]='\0';
+      /* check for translated frame info */
+      if (m_msp->nframe > -1) bline_p[m_msp->aln.llen-(r_margin+4)]='\0';
+    }
+    /* now its time to report the summary numbers for all the alignments */
+
+    /* in the next loop, cur_ares_p could be NULL if we haven't done do_walign() */
+    cur_ares_p = bbp->a_res;
+
+    first_line = 1;
+    do {
+      /* if cur_res_p != NULL, then we get rst from a_res->rst
+	 Otherwise, it comes from bbp->rst
+      */
+
+      if ((!first_line || (have_lalign && !ppst->show_ident)) && cur_ares_p ) {
+	rst_p = &cur_ares_p->rst;
+      }
+      else {
+	rst_p = &bbp->rst;
+      }
+
+      n1 = bbp->seq->n1;
+      l_score0 = rst_p->score[ppst->score_ix];
+      lzscore = find_z(l_score0, rst_p->escore, n1, rst_p->comp, m_msp->pstat_void);
+      if (ppst->zsflag > 20) {
+	lzscore2 = find_z(l_score0, rst_p->escore, n1, rst_p->comp, m_msp->pstat_void2);
+      }
+      lbits = zs_to_bit(lzscore, m_msp->n0, n1);
+
+      /* *********************************** */
+      /* standard "The best scores are" here */
+      /* *********************************** */
+
+      if (!(m_msp->markx & (MX_M8OUT + MX_MBLAST2))) {
+	if (first_line) {
+	  first_line = 0;
+	  fprintf (fp, fmt,bline_p,n1tot);
+	  if (m_msp->nframe > 2) fprintf (fp, " [%d]", bbp->frame+1);
+	  else if (m_msp->nframe >= 0) fprintf(fp," [%c]",(bbp->frame > 0 ?'r':'f'));
+	}
+	else {
+	  fprintf (fp, fmt2,"\n+-");
+	}
+
+	if (m_msp->srelv == 1) fprintf (fp, " %4d", rst_p->score[ppst->score_ix]);
+	else {
+	  if (m_msp->srelv-1 > 0) fprintf (fp, " %4d", rst_p->score[0]);
+	  if (m_msp->srelv-1 > 1 || m_msp->stages>1)
+	    fprintf (fp, " %4d", rst_p->score[1]);
+	  fprintf (fp, " %4d", rst_p->score[ppst->score_ix]);
+	}
+
+	if (ppst->zsflag>=0) { 
+	  if (m_msp->z_bits==1) {
+	    fprintf (fp, " %.1f %s%7.2g%s",lbits,html_pre_E,
+		     zs_to_E(lzscore, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db),
+		     html_post_E);
+	    if (ppst->zsflag > 20) {
+	      fprintf (fp, " %7.2g",zs_to_E(lzscore2, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db));
+	    }
+	  }
+	  else {
+	    fprintf (fp, " %.1f %s%7.2g%s",lzscore,html_pre_E,
+		     zs_to_E(lzscore, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db),
+		     html_post_E);
+	    if (ppst->zsflag > 20) {
+	      fprintf (fp, " %7.2g",zs_to_E(lzscore2, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db));
+	    }
+	  }
+	}
+	show_aux(fp,bbp);
+      }
+      else if (m_msp->markx & MX_M8OUT) {	/* MX_M8OUT -- provide query, library */
+	if (first_line) {first_line = 0;}
+	fprintf (fp,"%s\t%s",m_msp->qtitle,bline_p);
+      }
+      else if (m_msp->markx & MX_MBLAST2) {	/* blast "Sequences producing" */ 
+	if (first_line) {first_line = 0;}
+	fprintf (fp,"%-67s %6.1f    %.1g", bline_p, lbits,
+		    zs_to_E(lzscore,n1,ppst->dnaseq,ppst->zdb_size,m_msp->db));
+      }
+
+      if (m_msp->markx & MX_M9SUMM || m_msp->markx & MX_M8OUT) {
+	loffset = bbp->seq->l_offset;
+	l_off = bbp->seq->l_off;
+	aln_p = &cur_ares_p->aln;
+	seq_code = cur_ares_p->aln_code;
+	seq_code_len = cur_ares_p->aln_code_n;
+	annot_str = cur_ares_p->annot_code;
+	annot_str_len = cur_ares_p->annot_code_n;
+
+	ngap = cur_ares_p->aln.ngap_q + cur_ares_p->aln.ngap_l;
+        percent = calc_fpercent_id(100.0,aln_p->nident,aln_p->lc, m_msp->tot_ident, -100.0);
+        ng_percent = calc_fpercent_id(100.0,aln_p->nident,aln_p->lc-ngap, m_msp->tot_ident, -100.0);
+
+#ifndef SHOWSIM
+	gpercent = calc_fpercent_id(100.0, aln_p->nident, aln_p->lc-ngap, m_msp->tot_ident, -100.0);
+#else
+	gpercent = calc_fpercent_id(100.0, cur_ares_p->aln.nsim, aln_p->lc, m_msp->tot_ident, -100.0);
+#endif	/* SHOWSIM */
+
+	if (m_msp->show_code != SHOW_CODE_ID && m_msp->show_code != SHOW_CODE_IDD) {	/* show more complete info than just identity */
+
+	  /*  	calc_astruct(aln_p, cur_ares_p); -- this function
+		should not be used after calc_code or any other
+		alignment that calculates amax0/amax1 */
+
+	  /* we need the coordinates for annotated SHOW_CODE_ALIGN */
+	  calc_coord(m_msp->n0,bbp->seq->n1,
+		     m_msp->q_offset + (m_msp->q_off-1) + (m_msp->sq0off-1),
+		     loffset + (l_off-1) + (m_msp->sq1off-1),
+		     aln_p);
+
+	  /* if (m_msp->markx & MX_HTML) fprintf(fp,"<!-- "); */
+	  /*            %_id  %_sim s-w alen an0  ax0  pn0  px0  an1  ax1  pn1  px1 gapq gapl fs  */
+	  /*                    alignment    min  max            min  max */
+	  /*                    sequence coordinate    min  max            min  max */
+	  if (!(m_msp->markx & MX_M8OUT)) {
+	    fprintf(fp,"\t%5.3f %5.3f %4d %4d %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %3d %3d %3d",
+		    percent/100.0,gpercent/100.0, 
+		    cur_ares_p->sw_score,
+		    aln_p->lc,
+		    aln_p->d_start0,aln_p->d_stop0,
+		    aln_p->q_start_off, aln_p->q_end_off,
+		    aln_p->d_start1,aln_p->d_stop1,
+		    aln_p->l_start_off, aln_p->l_end_off,
+		    aln_p->ngap_q,aln_p->ngap_l,aln_p->nfs);
+	    if ((m_msp->show_code & (SHOW_CODE_ALIGN+SHOW_CODE_CIGAR+SHOW_CODE_BTOP))
+		&& seq_code_len > 0 && seq_code != NULL) {
+	      fprintf(fp,"\t%s",seq_code);
+	      if (annot_str_len > 0 && annot_str != NULL) {
+		fprintf(fp,"\t%s",annot_str);
+	      }
+	    }
+	  }
+	  else {	/* MX_M8OUT -- blast order, tab separated */
+	    fprintf(fp,"\t%.2f\t%d\t%d\t%d\t%ld\t%ld\t%ld\t%ld\t%.2g\t%.1f",
+		    ng_percent,aln_p->lc,aln_p->nmismatch,
+		    aln_p->ngap_q + aln_p->ngap_l+aln_p->nfs,
+		    aln_p->d_start0, aln_p->d_stop0,
+		    aln_p->d_start1, aln_p->d_stop1,
+		    zs_to_E(lzscore,n1,ppst->dnaseq,ppst->zdb_size,m_msp->db),
+		    lbits);
+	    if (ppst->zsflag > 20) {
+	      fprintf(fp,"\t%.2g",zs_to_E(lzscore2, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db));
+	    }
+	    if ((m_msp->show_code & (SHOW_CODE_ALIGN+SHOW_CODE_CIGAR+SHOW_CODE_BTOP)) && seq_code_len > 0 && seq_code != NULL) {
+	      fprintf(fp,"\t%s",seq_code);
+	      if (annot_str_len > 0 && annot_str != NULL) {
+		fprintf(fp,"\t%s",annot_str);
+	      }
+	    }
+	    fprintf(fp,"\n");
+	  }
+	}
+	else {	/* !SHOW_CODE -> SHOW_ID or SHOW_IDD*/
+#ifdef SHOWSIM
+	  fprintf(fp," %5.3f %5.3f %4d", 
+		  percent/100.0,
+		  (float)aln_p->nsim/(float)aln_p->lc,aln_p->lc);
+#else
+	  fprintf(fp," %5.3f %4d", percent/100.0,aln_p->lc);
+#endif
+	  if (m_msp->markx & MX_HTML) {
+	    if (cur_ares_p->index > 0) {
+	      sprintf(link_name,"%s_%d",l_name, cur_ares_p->index);
+	    }
+	    else {
+	      SAFE_STRNCPY(link_name, l_name, sizeof(l_name));
+	    }
+	    fprintf(fp," <a href=\"#%s\">align</a>",link_name);
+	    link_shown = 1;
+	  }
+	  else { link_shown = 0;}
+
+	  if ((m_msp->show_code & SHOW_CODE_ID) == SHOW_CODE_ID) {
+	    annot_str = cur_ares_p->annot_var_id;
+	  }
+	  else if ((m_msp->show_code & SHOW_CODE_IDD) == SHOW_CODE_IDD) {
+	    annot_str = cur_ares_p->annot_var_idd;
+	  }
+	  else {
+	    annot_str = NULL;
+	  }
+	  if (annot_str && annot_str[0]) {
+	    fprintf(fp," %s",annot_str);
+	  }
+	}
+      }
+    } while ( cur_ares_p && (cur_ares_p = cur_ares_p->next));
+
+    /*    if ((m_msp->markx & MX_HTML) && !link_shown) fprintf(fp," <a href=\"#%s\">align</a>",l_name); */
+    if (!(m_msp->markx & MX_M8OUT)) fprintf(fp, "\n");
+    fflush(fp);
+  }
+
+  if (quiet==0) {
+    printf(" More scores? [0] ");
+    fflush(stdout);
+    if (fgets(rline,20,stdin)==NULL) exit(0);
+    ntmp = 0;
+    if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&ntmp);
+    if (ntmp<=0) ntmp = 0;
+    if (ntmp>0) {
+      istart = istop;
+      nshow = min(nshow+ntmp, nbest);
+      goto l1;
+    }
+  }	/* end of for (ib) loop */
+
+  if (m_msp->markx & MX_MBLAST2) {fprintf(fp, "\n\n");}
+
+  m_msp->nshow = nshow;	/* save the number of hits displayed for showalign */
+
+  if (best_align_done) { m_msp->align_done = 1;}	/* note that alignments are done */
+
+  if (m_msp->markx & MX_HTML) fprintf(fp,"</pre><hr>\n");
+}
diff --git a/src/mw.h b/src/mw.h
new file mode 100644
index 0000000..eafd43a
--- /dev/null
+++ b/src/mw.h
@@ -0,0 +1,49 @@
+/* Concurrent read version */
+
+/* $Id: mw.h 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+#include "param.h"
+
+#ifndef FSEEK_T_DEF
+#ifndef USE_FSEEKO
+typedef long fseek_t;
+#else
+typedef off_t fseek_t;
+#endif
+#endif
+
+struct beststr {
+  struct seq_record *seq;	/* sequence info */
+  struct mseq_record *mseq;	/* sequence meta-info */
+  struct beststr *bbp_link;	/* link to a previous beststr entry with the same sequence */
+  struct rstruct rst;		/* results info */
+
+  int n1;		/* duplicate of seq.n1, used for error checking/debugging */
+#ifdef DEBUG
+  long adler32_crc;	/* duplicate of seq.adler32_crc for error checking/debugging */
+#endif
+  int frame;		/* in buf2_str */
+  int repeat_thresh;	/* threshold for additional alignments */
+  double zscore;	/* the z-score mostly exists for sorting best scores */
+  double zscore2;	/* z-score - from high-scoring shuffles  */
+  double bit_score;	/* move to bit-scores for consistency */
+  double bit_score2;	/* bit-score for second shuffle */
+
+  int a_res_cnt;
+  struct a_res_str *a_res;	/* need only a_res, not a_res[2], because different frames
+				   for the same sequence are stored separately */
+  int have_ares;
+  float percent, gpercent;
+};
+
+struct stat_str {
+  int score;
+  int n1;
+  double comp;
+  double H;
+  double escore;
+  int segnum;
+  int seglen;
+};
+
diff --git a/src/mysql_lib.c b/src/mysql_lib.c
new file mode 100644
index 0000000..41362ca
--- /dev/null
+++ b/src/mysql_lib.c
@@ -0,0 +1,636 @@
+/* $Id: mysql_lib.c 817 2011-08-02 03:54:02Z wrp $ */
+/* $Revision: 817 $  */
+
+/* mysql_lib.c copyright (c) 2000, 2014 by William R. Pearson and The
+   Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* functions for opening, reading, seeking a mySQL database */
+
+/* (close_tables added June, 2011)
+  For the moment, this interface assumes that the file to be searched
+  will be specified in a single, long, string with 4 required and 1
+  optional parts:
+
+  (1) a database open string. This string has four fields, separated by
+      whitespace (' \t'):
+        hostname:port dbname user password
+
+   '--' dashes at the beginning of lines are ignored -
+   thus the first line could be:
+   -- hostname:port dbname user password
+
+  (2) a database query string that will return an unique ID (not
+      necessarily numberic, but it must be < 12 characters as libstr[12]
+      is used) and a sequence string
+
+  (2a) a series of mySQL commands that do not generate results
+       starting with 'DO', followed by a select() statement.
+
+  (3) a database select string that will return a description
+      given a unique ID
+
+  (4) a database select string that well return a sequence given a
+      unique ID
+
+  (5) [optional] an SQL statement to be run when closing the database
+      (e.g. a DROP TABLE statement)
+
+   Lines (3) and (4) are not required for pv34comp* libraries, but
+   line (2) must generate a complete description as well as a sequence.
+
+   18-July-2001
+   Additional syntax has been added to support multiline SQL queries.
+
+   If the host line begins with '+', then the SQL is openned on the same
+   connection as the previous SQL file.
+
+   If the host line contains '-' just before the terminal ';', then
+   the file will not produce any output.
+
+   This string can contain "\n". ";" are used to separate the four
+   functions, which must be specified in the order shown above.
+   The last (fourth) query must terminate with a ';' */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include <mysql.h>
+#define MYSQL_LIB 16
+
+#include "defs.h"
+#include "structs.h"
+#include "mm_file.h"
+
+#define XTERNAL
+#include "uascii.h"
+#define EOSEQ 0
+/* #include "upam.h" */
+
+char *alloc_file_name(char *f_name);
+int mysql_getlib(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+void mysql_ranlib(char *, int, fseek_t, char *, struct lmf_str *m_fd);
+
+#define MYSQL_BUF 4096
+
+struct lmf_str *
+mysql_openlib(char *sname, int ldnaseq, int *sascii) {
+  FILE *sql_file;
+  char *tmp_str, *ttmp_str;
+  int tmp_str_len;
+  char *bp, *bps, *bdp, *tp, tchar;
+  int i, qs_len, qqs_len;
+  char *sql_db, *sql_host, *sql_dbname, *sql_user, *sql_pass;
+  char *sql_do;
+  int sql_do_cnt;
+  int sql_port;
+  struct lmf_str *m_fptr;
+
+  /*  if (sql_reopen) return NULL; - should not be called for re-open */
+
+  tmp_str_len = MYSQL_BUF;
+  if ((tmp_str=(char *)calloc(tmp_str_len,sizeof(char)))==NULL) {
+    fprintf(stderr,"cannot allocate %d for mySQL buffer\n",tmp_str_len);
+    return NULL;
+  }
+
+  /* immediate mysql scripts start with '%' */
+  if (sname[0] == '%') {
+    strncpy(tmp_str,sname+1,tmp_str_len);
+    tmp_str[sizeof(tmp_str)-1]='\0';
+  }
+  else {	/* read the script from a file */
+    if ((sql_file=fopen(sname,"r"))==NULL) {
+      fprintf(stderr," cannot open mySQL file: %s\n",sname);
+      return NULL;
+    }
+
+    if ((qs_len=fread(tmp_str,sizeof(char),tmp_str_len-1,sql_file))<=0) {
+      fprintf(stderr," cannot read mySQL file: %s\n",sname);
+      return NULL;
+    }
+    else  {	/* read the entire file in MYSQL_BUF (4096) byte
+		   chunks, reallocating as necessary */
+      tmp_str[qs_len]='\0';
+      qqs_len = qs_len;
+      while (qqs_len >= tmp_str_len-1) {
+	tmp_str_len += MYSQL_BUF;
+	if ((tmp_str=(char *)realloc(tmp_str,tmp_str_len))==NULL) {
+	  fprintf(stderr,
+		  " cannot reallocate %d for mySQL buffer\n",tmp_str_len);
+	  return NULL;
+	}
+	ttmp_str = &tmp_str[qqs_len];
+	if ((qs_len=fread(ttmp_str,sizeof(char),MYSQL_BUF,sql_file))<0) {
+	  fprintf(stderr," cannot read mySQL file: %s\n",sname);
+	  return NULL;
+	}
+	ttmp_str[qs_len]='\0';
+	qqs_len += qs_len;
+      }
+    }
+    fclose(sql_file);
+  }
+
+  /* tmp_str has the entire contents of the file */
+  bps = tmp_str;
+  if ((bp=strchr(bps,';'))!=NULL) {
+    /* get the connection info */
+    *bp='\0';
+    if ((sql_db=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+      fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
+	      (int)strlen(bps),bps);
+      return NULL;
+    }
+    /* have database name, parse the fields */
+    else {  /* copy connection info into sql_db and parse */
+      strcpy(sql_db,bps);	/* strcpy OK because allocated strlen(bps) */
+      bps = bp+1;	/* points to next char after ';' */
+      while (isspace(*bps)) bps++;
+      *bp=';'; /* replace ; */
+      bp = sql_db;
+      while (*bp=='-') {*bp++ = ' ';}
+      sql_host = strtok(bp," \t\n");
+      sql_dbname = strtok(NULL," \t\n");
+      sql_user = strtok(NULL," \t\n");
+      sql_pass = strtok(NULL," \t\n");
+      if ((tp=strchr(sql_host,':'))!=NULL) {
+	*tp='\0';
+	sql_port=atoi(tp+1);
+      }
+      else sql_port = 0;
+    }
+  }
+  else {
+    fprintf(stderr," cannot find database fields:\n%s\n",tmp_str);
+    return NULL;
+  }
+
+  /* we have all the info we need to open a database, allocate lmf_str */
+  if ((m_fptr = (struct lmf_str *)calloc(1,sizeof(struct lmf_str)))==NULL) {
+    fprintf(stderr," cannot allocate lmf_str (%ld) for %s\n",
+	    sizeof(struct lmf_str),sname);
+    return NULL;
+  }
+
+  /* have our struct, initialize it */
+
+  m_fptr->lb_name = alloc_file_name(sname);
+
+  m_fptr->sascii = sascii;
+
+  m_fptr->sql_db = sql_db;
+  m_fptr->getlib = mysql_getlib;
+  m_fptr->ranlib = mysql_ranlib;
+  m_fptr->mm_flg = 0;
+  m_fptr->sql_reopen = 0;
+  m_fptr->lb_type = MYSQL_LIB;
+
+  /* now open the database, if necessary */
+  if ((m_fptr->mysql_conn=mysql_init(NULL))==NULL) {
+    fprintf(stderr,"*** Error - mysql_init\n");
+    goto error_r;
+  }
+
+  if (mysql_real_connect(m_fptr->mysql_conn,
+			 sql_host,sql_user,sql_pass,
+			 sql_dbname,
+			 sql_port,
+			 NULL,
+			 0)==NULL)
+    {
+      fprintf(stderr,"*** Error %u - could  not open database:\n%s\n%s",
+	      mysql_errno(m_fptr->mysql_conn),tmp_str,
+	      mysql_error(m_fptr->mysql_conn));
+      goto error_r;
+    }
+#ifdef DEBUG
+  else {
+    fprintf(stderr," Database %s opened on %s\n",sql_dbname,sql_host);
+  }
+#endif
+
+  /* check for 'DO' command - copy to 'DO' string */
+  while (*bps == '-') { *bps++=' ';}
+  if (isspace(bps[-1]) && toupper(bps[0])=='D' &&
+      toupper(bps[1])=='O' && isspace(bps[2])) {
+    /* have some 'DO' commands */
+    /* check where the end of the last DO statement is */
+
+    sql_do_cnt = 1;	/* count up the number of 'DO' statements for later */
+    bdp=bps+3;
+    while ((bp=strchr(bdp,';'))!=NULL) {
+      tp = bp+2; /* skip ;\n */
+      while (isspace(*tp) || *tp == '-') {*tp++ = ' ';}
+      if (toupper(*tp)=='D' && toupper(tp[1])=='O' && isspace(tp[2])) {
+	sql_do_cnt++;		/* count the DO statements */
+	bdp = tp+3;		/* move to the next DO statement */
+      }
+      else break;
+    }
+    if (bp != NULL) {	/* end of the last DO, begin of select */
+      tchar = *(bp+1);
+      *(bp+1)='\0';		/* terminate DO strings */
+      if ((sql_do = calloc(strlen(bps)+1, sizeof(char)))==NULL) {
+	fprintf(stderr," cannot allocate %d for sql_do\n",(int)strlen(bps));
+	goto error_r;
+      }
+      else {
+	strcpy(sql_do,bps);
+	*(bp+1)=tchar;	/* replace missing ';' */
+      }
+      bps = bp+1;
+      while (isspace(*bps)) bps++;
+    }
+    else {
+      fprintf(stderr," terminal ';' not found: %s\n",bps);
+      goto error_r;
+    }
+    /* all the DO commands are in m_fptr->sql_do in the form: 
+     DO command1; DO command2; DO command3; */
+    bdp = sql_do;
+    while (sql_do_cnt-- && (bp=strchr(bdp,';'))!=NULL) {
+      /* do the mysql statement on bdp+3 */
+      /* check for error */
+      *bp='\0';
+      if (mysql_query(m_fptr->mysql_conn,bdp+3)) {
+	fprintf(stderr,"*** Error %u - query failed:\n%s\n%s\n",
+		mysql_errno(m_fptr->mysql_conn), bdp+3, mysql_error(m_fptr->mysql_conn));
+	goto error_r;
+      }
+      *bp=';';
+      bdp = bp+1;
+      while (isspace(*bdp)) bdp++;
+    }
+  }
+
+  /* copy 1st query field */
+  if ((bp=strchr(bps,';'))!=NULL) {
+    *bp='\0';
+    if ((m_fptr->sql_query=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+      fprintf(stderr, " cannot allocate space for query string [%d], %s\n",
+	      (int)strlen(bps),bps);
+      goto error_r;
+    }
+    /* have query, copy it */
+    else {
+      strcpy(m_fptr->sql_query,bps);
+      *bp=';'; /* replace ; */
+      bps = bp+1;
+      while(isspace(*bps)) bps++;
+    }
+  }
+  else {
+    fprintf(stderr," cannot find database query field:\n%s\n",tmp_str);
+    goto error_r;
+  }
+
+  /* copy get_desc field */
+  if ((bp=strchr(bps,';'))!=NULL) {
+    *bp='\0';
+    if ((m_fptr->sql_getdesc=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+      fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
+	      (int)strlen(bps),bps);
+      goto error_r;
+    }
+    /* have get_desc, copy it */
+    else {
+      strcpy(m_fptr->sql_getdesc,bps);
+      *bp=';'; /* replace ; */
+      bps = bp+1;
+      while(isspace(*bps)) bps++;
+    }
+  }
+  else {
+    fprintf(stderr," cannot find getdesc field:\n%s\n",tmp_str);
+    goto error_r;
+  }
+
+  if ((bp=strchr(bps,';'))!=NULL) { *bp='\0';}
+
+  if ((m_fptr->sql_getseq=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+    fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
+	    (int)strlen(bps),bps);
+    goto error_r;
+  }
+
+  if (strlen(bps) > 0) {
+    strcpy(m_fptr->sql_getseq,bps);
+    bps = bp+1;
+  }
+  else {
+    fprintf(stderr," cannot find getseq field:\n%s\n",tmp_str);
+    return 0;
+  }
+  if (bp!=NULL) *bp=';';
+
+  /* check for close_table statement */
+  if ((bp=strchr(bps,';'))!=NULL) {
+    *bp='\0';
+    if ((m_fptr->sql_close_tables=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+      fprintf(stderr, " cannot allocate space for close_tables [%d], %s\n",
+	      (int)strlen(bps),bps);
+      goto error_r;
+    }
+    /* have get_desc, copy it */
+    else {
+      strcpy(m_fptr->sql_close_tables,bps);
+      *bp=';'; /* replace ; */
+      bps = bp+1;
+      while(isspace(*bps)) bps++;
+    }
+  }
+
+  /* now do the query */    
+
+  if (mysql_query(m_fptr->mysql_conn,m_fptr->sql_query)) {
+    fprintf(stderr,"*** Error %u - query failed:\n%s\n%s\n",
+	    mysql_errno(m_fptr->mysql_conn), m_fptr->sql_query, mysql_error(m_fptr->mysql_conn));
+    goto error_r;
+  }
+
+  if ((m_fptr->mysql_res = mysql_use_result(m_fptr->mysql_conn)) == NULL) {
+    fprintf(stderr,"*** Error = use result failed\n%s\n",
+	    mysql_error(m_fptr->mysql_conn));
+    goto error_r;
+  }
+  return m_fptr;
+
+ error_r:
+  if (m_fptr->sql_close_tables) free(m_fptr->sql_close_tables);
+  if (m_fptr->sql_getseq) free(m_fptr->sql_getseq);
+  if (m_fptr->sql_getdesc) free(m_fptr->sql_getdesc);
+  if (m_fptr->sql_query) free(m_fptr->sql_query);
+  free(m_fptr);
+  free(sql_db);
+  return NULL;
+}
+
+struct lmf_str *
+mysql_reopen(struct lmf_str *m_fptr) {
+  m_fptr->sql_reopen = 1;
+  return m_fptr;
+}
+
+void
+mysql_closelib(struct lmf_str *m_fptr) {
+
+  if (m_fptr == NULL) return;
+
+  if (m_fptr->mysql_res != NULL)
+    mysql_free_result(m_fptr->mysql_res);
+
+  if (m_fptr->sql_close_tables) {
+    if (mysql_query(m_fptr->mysql_conn,m_fptr->sql_close_tables)) {
+      fprintf(stderr,"*** Error %u - close_tables failed:\n%s\n%s\n",
+	      mysql_errno(m_fptr->mysql_conn), m_fptr->sql_close_tables,
+	      mysql_error(m_fptr->mysql_conn));
+    }
+  }
+  mysql_close(m_fptr->mysql_conn);
+  m_fptr->sql_reopen=0;
+}
+
+/*
+static char *sql_seq = NULL, *sql_seqp;
+static int sql_seq_len;
+static MYSQL_ROW sql_row;
+*/
+
+int
+mysql_getlib( unsigned char *seq,
+	      int maxs,
+	      char *libstr,
+	      int n_libstr,
+	      fseek_t *libpos,
+	      int *lcont,
+	      struct lmf_str *lm_fd,
+	      long *l_off)
+{
+  register unsigned char *cp, *seqp;
+  register int *ap;
+  unsigned char *seqm, *seqm1;
+  char *bp;
+  /*   int l_start, l_stop, len; */
+
+  seqp = seq;
+  seqm = &seq[maxs-9];
+  seqm1 = seqm-1;
+
+  ap = lm_fd->sascii;
+
+  if (*lcont==0) {
+    /* get a row, with UID, sequence */
+    *l_off = 1;
+    if ((lm_fd->mysql_row =mysql_fetch_row(lm_fd->mysql_res))!=NULL) {
+      *libpos=(fseek_t)atol(lm_fd->mysql_row[0]);
+
+      /* for @P:1-n removed */
+      /*
+      if ((bp=strchr(lm_fd->mysql_row[2],'@'))!=NULL &&
+	  !strncmp(bp+1,"P:",2)) {
+	sscanf(bp+3,"%d-%d",&l_start,&l_stop)
+	l_start--;
+	if (l_start < 0) l_start=0;
+	if (l_stop > (len=strlen(lm_fd->mysql_row[1]))) l_stop= len-1;
+	lm_fd->sql_seqp = lm_fd->mysql_row[1];
+	lm_fd->sql_seqp[l_stop]='\0';
+	lm_fd->sql_seqp += l_start;
+      */
+
+      if (lm_fd->mysql_row[2] == NULL) {
+	fprintf(stderr," NULL comment at: [%s] %ld\n",
+		lm_fd->mysql_row[0],*libpos);
+      }
+      else if ((bp=strchr(lm_fd->mysql_row[2],'@'))!=NULL &&
+	  !strncmp(bp+1,"C:",2)) sscanf(bp+3,"%ld",l_off);
+      else *l_off = 1;
+
+      lm_fd->sql_seqp = lm_fd->mysql_row[1];
+
+      /* because of changes in mysql_ranlib(), it is essential that
+         libstr return the unique identifier; thus we must use
+         sql_row[0], not sql_row[2]. Using libstr as the UID allows
+         one to use any UID, not just numeric ones.  *libpos is not
+         used for mysql libraries.
+      */
+
+      if (n_libstr <= MAX_UID) {
+	/* the normal case returns only GID/sequence */
+	strncpy(libstr,lm_fd->mysql_row[0],MAX_UID-1);
+	libstr[MAX_UID-1]='\0';
+      }
+      else {
+	/* here we do not use the UID in libstr, because we are not
+           going back into the db */
+	/* the PVM case also returns a long description */
+	if (lm_fd->mysql_row[2]!=NULL) {
+	  strncpy(libstr,lm_fd->mysql_row[2],n_libstr-1);
+	}
+	else {
+	  strncpy(libstr,lm_fd->mysql_row[0],n_libstr-1);
+	}
+	libstr[n_libstr-1]='\0';
+      }
+    }
+    else {
+      mysql_free_result(lm_fd->mysql_res);
+      lm_fd->mysql_res=NULL;
+      *lcont = 0;
+      *seqp = EOSEQ;
+      return -1;
+    }
+  }
+
+  for (cp=(unsigned char *)lm_fd->sql_seqp; seqp<seqm1 && *cp; ) {
+    if ((*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA) continue;
+    --seqp;
+    if (*(cp-1)==0) break;
+  }
+  lm_fd->sql_seqp = (char *)cp;
+
+  if (seqp>=seqm1) (*lcont)++;
+  else {
+    *lcont=0;
+    if (lm_fd->sql_reopen) {
+      mysql_free_result(lm_fd->mysql_res);
+      lm_fd->mysql_res = NULL;
+    }
+  }
+
+  *seqp = EOSEQ;
+  /*   if ((int)(seqp-seq)==0) return 1; */
+  return (int)(seqp-seq);
+}
+
+void
+mysql_ranlib(char *str,
+	     int cnt,
+	     fseek_t libpos,
+	     char *libstr,
+	     struct lmf_str *lm_fd
+	     )
+{
+  char tmp_query[1024], tmp_val[20];
+  char *bp;
+
+  str[0]='\0';
+
+  /* put the UID into the query string - cannot use sprintf because of
+     "%' etc */
+
+  /*   sprintf(tmp_query,lm_fd->sql_getdesc,libpos); */
+
+  if ((bp=strchr(lm_fd->sql_getdesc,'#'))==NULL) {
+    fprintf(stderr, "no GID position in %s\n",lm_fd->sql_getdesc);
+    goto next1;
+  }
+  else {
+    *bp = '\0';
+    strncpy(tmp_query,lm_fd->sql_getdesc,sizeof(tmp_query));
+    tmp_query[sizeof(tmp_query)-1]='\0';
+    /*    sprintf(tmp_val,"%ld",(long)libpos); */
+    strncat(tmp_query,libstr,sizeof(tmp_query)-1);
+    strncat(tmp_query,bp+1,sizeof(tmp_query)-1);
+    *bp='#';
+    lm_fd->lpos = libpos;
+  }
+
+  /*  fprintf(stderr," requesting: %s\n",tmp_query); */
+
+  if (lm_fd->mysql_res !=NULL) {
+    mysql_free_result(lm_fd->mysql_res);
+    lm_fd->mysql_res = NULL;
+  }
+
+  if (mysql_query(lm_fd->mysql_conn,tmp_query)) {
+    fprintf(stderr,"*** Error - query failed:\n%s\n%s\n",tmp_query,
+	    mysql_error(lm_fd->mysql_conn));
+    sprintf(str,"gi|%ld ***Error - query failed***",(long)libpos);
+    goto next1;
+  }
+
+  if ((lm_fd->mysql_res = mysql_use_result(lm_fd->mysql_conn)) == NULL) {
+/*     fprintf(stderr,"*** Error = use result failed\n%s\n", 
+	   mysql_error(lm_fd->mysql_conn)); */
+    sprintf(str,"gi|%ld ***use result failed***",(long)libpos);
+    goto next0;
+  }
+  
+  /* have the description */
+  if ((lm_fd->mysql_row = mysql_fetch_row(lm_fd->mysql_res))==NULL) {
+    /*    fprintf(stderr," cannot fetch description: %s\n",tmp_query); */
+    sprintf(str,"gi|%ld ***cannot fetch description***",(long)libpos);
+    goto next0;
+  }
+  
+  if (lm_fd->mysql_row[1] != NULL) strncpy(str,lm_fd->mysql_row[1],cnt-1);
+  else strncpy(str,lm_fd->mysql_row[0],cnt-1);
+  str[cnt-1]='\0';
+  while (strlen(str) < cnt-1 &&
+	 (lm_fd->mysql_row = mysql_fetch_row(lm_fd->mysql_res))!=NULL) {
+    strncat(str," ",cnt-2-strlen(str));
+    if (lm_fd->mysql_row[1]!=NULL) 
+      strncat(str,lm_fd->mysql_row[1],cnt-2-strlen(str));
+    else break;
+  }
+
+  str[cnt-1]='\0';
+  if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
+  if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+
+ next0:
+  mysql_free_result(lm_fd->mysql_res);
+ next1: 
+  lm_fd->mysql_res = NULL;
+
+  /* get the sequence, set up for mysql_getseq() */
+  /* put the UID into the query string */
+
+  if ((bp=strchr(lm_fd->sql_getseq,'#'))==NULL) {
+    fprintf(stderr, "no GID position in %s\n",lm_fd->sql_getseq);
+    return;
+  }
+  else {
+    *bp = '\0';
+    strncpy(tmp_query,lm_fd->sql_getseq,sizeof(tmp_query));
+    tmp_query[sizeof(tmp_query)-1]='\0';
+    /*    sprintf(tmp_val,"%ld",(long)libpos); */
+    strncat(tmp_query,libstr,sizeof(tmp_query));
+    strncat(tmp_query,bp+1,sizeof(tmp_query));
+    *bp='#';
+  }
+
+  if (mysql_query(lm_fd->mysql_conn,tmp_query)) {
+    fprintf(stderr,"*** Error - query failed:\n%s\n%s\n",tmp_query,
+	    mysql_error(lm_fd->mysql_conn));
+  }
+
+  if ((lm_fd->mysql_res = mysql_use_result(lm_fd->mysql_conn)) == NULL) {
+    fprintf(stderr,"*** Error = use result failed\n%s\n",
+	    mysql_error(lm_fd->mysql_conn));
+  }
+}
diff --git a/src/ncbl2_head.h b/src/ncbl2_head.h
new file mode 100644
index 0000000..1da9293
--- /dev/null
+++ b/src/ncbl2_head.h
@@ -0,0 +1,35 @@
+/* ncbl_head.h	header files for blast1.3 format */
+
+/* $Id: ncbl2_head.h 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+#define AMINO_ACID_SEQTYPE	1
+#define AA_SEQTYPE	AMINO_ACID_SEQTYPE
+#define AAFORMAT	AA_SEQTYPE
+
+#define NUCLEIC_ACID_SEQTYPE	0
+#define NT_SEQTYPE	NUCLEIC_ACID_SEQTYPE
+#define NTFORMAT	NT_SEQTYPE
+
+/* Filename extensions used by the two types of databases (a.a. and nt.) */
+#define AA_LIST_EXT	"pal"
+#define AA_HEADER_EXT	"phr"
+#define AA_INDEX_EXT	"pin"
+#define AA_SEARCHSEQ_EXT	"psq"
+
+#define NT_LIST_EXT	"nal"
+#define NT_HEADER_EXT	"nhr"
+#define NT_INDEX_EXT	"nin"
+#define NT_SEARCHSEQ_EXT	"nsq"
+
+#define FORMATDBV3	3	/* formatdb version */
+#define FORMATDBV4	4	/* formatdb version */
+
+#define NULLB		'\0'	/* sentinel byte */
+
+#ifndef CHAR_BIT
+#define CHAR_BIT	8	/* these values should match blast */
+#endif
+
+#define NBPN		2
+#define NSENTINELS	2
diff --git a/src/ncbl2_mlib.c b/src/ncbl2_mlib.c
new file mode 100644
index 0000000..16cc8da
--- /dev/null
+++ b/src/ncbl2_mlib.c
@@ -0,0 +1,2442 @@
+/*	ncbl2_lib.c	functions to read ncbi-blast format files from
+			formatdb (blast2.0 format files)
+*/
+
+/* copyright (c) 2006, 2014 by William R. Pearson and
+   The Rector and Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* Updated 22-Aug-2014 to include 
+
+   ambiguity-decoding code from 
+
+     Ralf Jost, Dipl.-Inform.
+     Director, Technical Bioinformatics
+     Biomax Informatics AG
+     ralf.jost at biomax.com
+
+     using code from NCBI Blast distribution
+*/
+
+/* $Name:  $ - $Id: ncbl2_mlib.c 1291 2014-08-28 18:32:58Z wrp $ */
+
+/* to turn on mmap()ing for Blast2 files: */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <limits.h>
+#include <fcntl.h>
+#ifdef UNIX
+#include <unistd.h>
+#endif
+#include <errno.h>
+
+
+/* ****************************************************************
+
+17-May-2006
+
+Modified to read NCBI .[np]al and .msk files.  The .nal or .pal file
+provides a way to read sequences from a list of files.  The .msk file
+provides a compact way of indicating the subset of sequences in a
+larger database (typically nr or nt) that comprise a smaller database
+(e.g. swissprot or pdbaa).  A .pal file (e.g. swissprot.00.pal) that
+uses a .msk file has the form:
+
+	# Alias file generated by genmask
+	# Date created: Mon Apr 10 11:24:05 2006
+	#
+	TITLE     Non-redundant SwissProt sequences
+	DBLIST    nr.00
+	OIDLIST   swissprot.00.msk
+	LENGTH    74351250
+	NSEQ      198346
+	MAXOID    2617347
+	MEMB_BIT 1
+	# end of the file
+
+To work with this file, we must first load the nr.00 file, and then
+read the swissprot.00.msk file, and then scan all the entries in the
+swissprot.00.msk file (which are packed 32 mask-bit to an int) to
+determine whether a specific libpos index entry is present in the
+subset database.
+
+**************************************************************** */
+
+
+/* ****************************************************************
+This code reads NCBI Blast2 format databases from formatdb version 3 and 4
+
+(From NCBI) This section describes the format of the databases.
+
+Formatdb creates three main files for proteins containing indices,
+sequences, and headers with the extensions, respectively, of pin, psq,
+and phr (for nucleotides these are nin, nsq, and nhr).  A number of
+other ISAM indices are created, but these are described elsewhere.
+
+FORMAT OF THE INDEX FILE
+------------------------
+
+1.) formatdb version number 	[4 bytes].
+
+2.) protein dump flag (1 for a protein database, 0 for a nucleotide
+    database) [4 bytes].
+
+3.) length of the database title in bytes	[4 bytes].
+4.) the database title		[length given in 3.)].
+5.) length of the date/time string	[4 bytes].
+6.) the date/time string	[length given in 5.)].
+7.) the number of sequences in the database	[4 bytes].
+8.) the total length of the database in residues/basepairs	[4 bytes].
+9.) the length of the longest sequence in the database 		[4 bytes].
+
+10.) a list of the offsets for definitions (one for each sequence) in
+the header file.  There are num_of_seq+1 of these, where num_of_seq is
+the number of sequences given in 7.).
+
+11.) a list of the offsets for sequences (one for each sequence) in
+the sequence file.  There are num_of_seq+1 of these, where num_of_seq
+is the number of sequences given in 7.).
+
+12.) a list of the offsets for the ambiguity characters (one for each
+sequence) in the sequence file.  This list is only present for
+nucleotide databases and, since the database is compressed 4/1 for
+nucleotides, allows the ambiguity characters to be restored when the
+sequence is generated.  There are num_of_seq+1 of these, where
+num_of_seq is the number of sequences given in 7.).
+
+
+FORMAT OF THE SEQUENCE FILE
+---------------------------
+
+There are different formats for the protein and nucleotide sequence files.
+
+The protein sequence files is quite simple.  The first byte in the
+file is a NULL byte, followed by the sequence in ncbistdaa format
+(described in the NCBI Software Development Toolkit documentation).
+Following the sequence is another NULL byte, followed by the next
+sequence.  The file ends with a NULL byte, following the last
+sequence.
+
+The nucleotide sequence file contains the nucleotide sequence, with
+four basepairs compressed into one byte.  The format used is NCBI2na,
+documented in the NCBI Software Development Toolkit manual.  Any
+ambiguity characters present in the original sequence are replaced at
+random by A, C, G or T.  The true value of ambiguity characters are
+stored at the end of each sequence to allow true reproduction of the
+original sequence.
+
+FORMAT OF THE HEADER FILE  (formatdb version 3)
+-------------------------
+
+The format of the header file depends on whether or not the identifiers in the
+original file were parsed or not.  For the case that they were not, then each
+entry has the format:
+
+gnl|BL_ORD_ID|entry_number my favorite yeast sequence...
+
+Here entry_number gives the ordinal number of the sequence in the
+database (with zero offset).  The identifier
+gnl|BL_ORD_ID|entry_number is used by the BLAST software to identify
+the entry, if the user has not provided another identifier.  If the
+identifier was parsed, then gnl|BL_ORD_ID|entry_number is replaced by
+the correct identifier, as described in
+ftp://ncbi.nlm.nih.gov/blast/db/README .
+
+There are no separators between these deflines.
+
+For formatdb version 4, the header file contains blast ASN.1 binary
+deflines, which can parsed with parse_fastadl_asn().
+
+FORMAT OF THE .MSK FILE
+-----------------------
+
+The .msk file is simply a packed list of masks for formatdb "oids" for
+some other file (typically nr).  The first value is the last oid
+available; the remainder are packed 32 oids/mask, so that the number
+of masks is 1/32 the number of sequences in the file.
+
+**************************************************************** */
+
+#ifdef USE_MMAP
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#ifdef IBM_AIX
+#include <fcntl.h>
+#else
+#include <sys/fcntl.h>
+#endif
+#endif
+
+#ifdef USE_MMAP
+#ifndef MAP_FILE
+#define MAP_FILE 0
+#endif
+#endif
+
+#ifdef UNIX
+#define RBSTR "r"
+#else
+#define RBSTR "rb"
+#endif
+
+#ifdef WIN32
+#define SLASH_CHAR '\\'
+#define SLASH_STR "\\"
+#else
+#define SLASH_CHAR '/'
+#define SLASH_STR "/"
+#endif
+
+#define XTERNAL
+#include "uascii.h"
+
+#define XTERNAL
+#include "upam.h"
+#include "ncbl2_head.h"
+
+#include "defs.h"
+#include "structs.h"
+#include "mm_file.h"
+
+#define MAX_FADL_ACC_LEN 64
+
+unsigned long adler32(unsigned long, const unsigned char *, unsigned int);
+unsigned int bl2_uint4_cvt(unsigned int);
+unsigned int bl2_long4_cvt(long);
+uint64_t bl2_long8_cvt(uint64_t);
+void src_int4_read(FILE *fd,  int *valp);
+void src_uint4_read(FILE *fd,  unsigned int *valp);
+void src_long4_read(FILE *fd,  long *valp);
+void src_long8_read(FILE *fd,  int64_t *val);
+void ncbi_long8_read(FILE *fd,  int64_t *valp);
+void src_char_read(FILE *fd,  char *valp);
+unsigned char *parse_fastadl_asn(unsigned char *asn_buff, unsigned char *asn_max,
+				 int *gi_p, int *db, char *acc,  size_t acc_len, 
+				 char *name, size_t name_len,
+				 char *title, size_t t_len, int *taxid);
+
+/* nt_btoa maps  from blast 2bit  format to ascii  characters */
+static char nt_btoa[5] = {"ACGT"};
+
+static char *aa_b2toa= "-ABCDEFGHIKLMNPQRSTVWXYZU*OJ";	/* NCBIstdaa */
+
+static int aa_btof[32];	/* maps to fasta alphabet */
+static int aa_btof_null = 0;
+
+static int dbtype, dbformat, amb_cnt;
+
+#define NCBIBL20 12
+
+struct lmf_str *load_ncbl2(struct lmf_str *m_fptr, FILE *ifile, int dbformat, int dbtype);
+
+int ncbl2_get_mmap_chain_o(struct seqr_chain *cur_seqr_chain, 
+			   struct lmf_str *m_fd, struct db_str *db);
+
+int ncbl2_get_mmap_chain(struct seqr_chain *cur_seqr_chain, 
+			 struct lmf_str *m_fd, struct db_str *db);
+
+int ncbl2_getliba(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+int ncbl2_getlibn(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+
+int ncbl2_getliba_o(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+int ncbl2_getlibn_o(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+
+void newname(char *, char *, char *, int);
+void parse_pal(char *, char *, int *, int *, FILE *);
+
+int readMFILE (void *buffer, size_t size, int nitems, struct lmf_str *m_fd);
+
+void ncbl2_ranlib(char *, int, fseek_t, char *, struct lmf_str *m_fd);
+
+/* ncbl2_openlib() is used to open (and memory map) a BLAST2.0 format
+   file.  Ifdef USE_MMAP, then ncbl2_openlib returns a structure that can
+   be used to read the database. */
+   
+struct lmf_str *
+ncbl2_openlib(struct lib_struct *lib_p,  int ldnaseq)
+{
+  char lname[256];	/* .pal, .nal file name */
+  char dname[256];	/* .pin, .nin file for files included from .msk files */
+  char msk_name[256];	/* .msk file name */
+  char hname[256];	/* .phr, .nhr */
+  char sname[256];	/* .psq, .nsq */
+  char tname[256];	/* .pin, .nin file */
+  char db_dir[256];	/*  directory where all the files live */
+  int pref_db= -1;	/*  right now, only swissprot.pal, pdbaa.pal
+			    are used for masked OID files */
+  char *bp;
+  int oid_seqs, max_oid, have_oid_list;
+  int oid_cnt, oid_len;
+  unsigned int *oid_list, o_max;
+  int tmp;
+  int i;
+#ifdef USE_MMAP
+  struct stat statbuf;
+#endif
+  FILE *ifile;	/* index offsets, also DB info */
+  struct lmf_str *m_fptr;
+
+  /* this function should be reorganized
+  (1) check to see if there is a .nal/.pal file before doing things
+      with names, since the names might be wrong. 
+  (2) if there is a .nal/.pal file, check to see if it has a DBLIST
+      (later), if it does, then modify the lib_p->next chain
+      if it does not, then generate the appropriate file names, and
+      read the oid list
+  (3) otherwise (no .nal/.pal file), generate the file names
+
+
+  */
+
+  if (ldnaseq==SEQT_PROT) {
+    newname(lname,lib_p->file_name,AA_LIST_EXT,(int)sizeof(lname));	/* .pal */
+  }
+  else {
+    newname(lname,lib_p->file_name,NT_LIST_EXT,(int)sizeof(lname));	/* .nal */
+  }
+  
+  /* check for a .nal/.pal OID list file */
+  max_oid = oid_seqs = 0;
+  oid_list = NULL;
+
+  /* here, we check for a .pal/.nal file by trying to open it */
+  if ((ifile = fopen(lname,"r"))!=NULL) {
+
+    if ((bp = strrchr(lib_p->file_name,SLASH_CHAR))!=NULL) {
+      *bp = '\0';
+      SAFE_STRNCPY(db_dir,lib_p->file_name,sizeof(db_dir));
+      SAFE_STRNCAT(db_dir,SLASH_STR,sizeof(db_dir));
+      *bp = SLASH_CHAR;
+    }
+    else {
+      db_dir[0]='\0';
+    }
+
+    /* we have a list file, we need to parse it */
+    parse_pal(dname, msk_name, &oid_seqs, &max_oid, ifile);
+    fclose(ifile);
+
+    pref_db = -1;
+
+    if (oid_seqs > 0) {
+
+      have_oid_list = 1;
+      /* get the pref_db before adding the directory */
+      if (strncmp(msk_name,"swissprot",9)==0) {
+	pref_db = 7;
+      }
+      else if (strncmp(msk_name,"pdbaa",5)==0) {
+	pref_db = 14;
+      }
+
+      /* need to add directory to both dname and msk_name */
+      SAFE_STRNCPY(tname,db_dir,sizeof(tname));
+      SAFE_STRNCAT(tname,msk_name, sizeof(tname));
+      SAFE_STRNCPY(msk_name, tname, sizeof(msk_name));
+
+      SAFE_STRNCPY(tname,db_dir,sizeof(tname));
+      SAFE_STRNCAT(tname,dname, sizeof(tname));
+      SAFE_STRNCPY(dname,tname,sizeof(dname));
+
+      if (ldnaseq == SEQT_PROT) {
+	newname(tname,dname,AA_INDEX_EXT,(int)sizeof(tname));
+	newname(hname,dname,AA_HEADER_EXT,(int)sizeof(hname));
+	newname(sname,dname,AA_SEARCHSEQ_EXT,(int)sizeof(sname));
+      }
+      else {	/* reading DNA library */
+	newname(tname,dname,NT_INDEX_EXT,(int)sizeof(tname));
+	newname(hname,dname,NT_HEADER_EXT,(int)sizeof(hname));
+	newname(sname,dname,NT_SEARCHSEQ_EXT,(int)sizeof(sname));
+      }
+      /* now load the oid file */
+      if ((ifile = fopen(msk_name,RBSTR))==NULL) {
+	fprintf(stderr,"error - cannot load %s file\n",msk_name);
+	return NULL;
+      }
+      else {
+	src_uint4_read(ifile,&o_max);
+	if (o_max != max_oid) {
+	  fprintf(stderr," error - oid count mismatch %d != %d\n",max_oid, o_max);
+	}
+	oid_len = (max_oid/32+1);
+	if ((oid_list=(unsigned int *)calloc(oid_len,sizeof(int)))==NULL) {
+	  fprintf(stderr," error - cannot allocate oid_list[%d]\n",oid_len);
+	  return NULL;
+	}
+	if ((oid_cnt=fread(oid_list,sizeof(int),oid_len,ifile))==0) {
+	  fprintf(stderr," error - cannot read oid_list[%d]\n",oid_len);
+	  return NULL;
+	}
+	fclose(ifile);
+      }
+    }
+#ifdef DEBUG
+    else {	/* we had a .msk file, but there are no oid's in
+		   it. */
+      fprintf(stderr," *** WARNING -- found .pal/.nal file %s with no OIDs\n",lname);
+      return NULL;
+    }
+#endif
+  }
+  else { /* else no OID/.msk file -- generate the names */
+    have_oid_list = 0;
+
+    /* initialize file names */
+    if (ldnaseq==SEQT_PROT) {	/* read a protein database */
+      newname(tname,lib_p->file_name,AA_INDEX_EXT,(int)sizeof(tname));	/* .pin */
+      newname(hname,lib_p->file_name,AA_HEADER_EXT,(int)sizeof(hname));	/* .phr */
+      newname(sname,lib_p->file_name,AA_SEARCHSEQ_EXT,(int)sizeof(sname));	/* .psq */
+
+    }
+    else {	/* reading DNA library */
+      newname(tname,lib_p->file_name,NT_INDEX_EXT,(int)sizeof(tname));	/* .nin */
+      newname(hname,lib_p->file_name,NT_HEADER_EXT,(int)sizeof(hname));	/* .nhr */
+      newname(sname,lib_p->file_name,NT_SEARCHSEQ_EXT,(int)sizeof(sname));	/* .nsq */
+    }
+  }	
+
+
+  if (ldnaseq == SEQT_PROT) {
+    /* initialize map of BLAST2 amino acids to FASTA amino acids */
+    for (i=0; aa_b2toa[i]; i++) {
+      if ((tmp=aascii[aa_b2toa[i]])<NA) {
+	aa_btof[i]=tmp;
+	if (aa_b2toa[i] == 'O' || aa_b2toa[i] == 'o') aa_btof[i] = aascii['K'];
+	else if (aa_b2toa[i] == 'U' || aa_b2toa[i] == 'u') aa_btof[i] = aascii['C'];
+      }
+      else if (aa_b2toa[i]=='*') aa_btof[i]=aascii['X'];
+      else aa_btof[i]=0;
+/*    else aa_btof[i]=aascii['X']; */
+    }
+
+    /* check to see if aa_btof[] actually does anything interesting */
+    aa_btof_null = 1;
+    for (i=0; i<sizeof(aa_b2toa); i++) {
+      if (i != aa_btof[i]) {
+#ifdef DEBUG
+	fprintf(stderr," difference at: i: %d [%c] != aa_btof[i]: %d [%c]\n",
+		i,aa_b2toa[i],aa_btof[i],NCBIstdaa[aa_btof[i]]);
+#endif
+	aa_btof_null = 0;
+      }
+    }
+  }	/* else no OID/.msk file */
+
+
+  /* now we have all the file names, open the files and read the data */
+  /* open the index/header file, and read the sequence type info  */
+  if ((ifile = fopen(tname,RBSTR))==NULL) {
+    fprintf(stderr," cannot open %s (%s) INDEX file",tname,lib_p->file_name);
+    perror("...");
+    return NULL;
+  }
+  src_uint4_read(ifile,(unsigned *)&dbformat); /* get format DB version number */
+  src_uint4_read(ifile,(unsigned *)&dbtype);   /* get 1 for protein/0 DNA */
+
+  if (dbformat != FORMATDBV3 && dbformat!=FORMATDBV4) {
+    fprintf(stderr,"error - %s wrong formatdb version (%d/%d)\n",
+	    tname,dbformat,FORMATDBV3);
+    return NULL;
+  }
+
+  if ((ldnaseq==SEQT_PROT && dbtype != AAFORMAT) || 
+      (ldnaseq==SEQT_DNA && dbtype!=NTFORMAT)) {
+    fprintf(stderr,"error - %s wrong format (%d/%d)\n",
+	    tname,dbtype,(ldnaseq ? NTFORMAT: AAFORMAT));
+    return NULL;
+  }
+
+  /* the files are there - allocate lmf_str */
+  if ((m_fptr=(struct lmf_str *)calloc(1,sizeof(struct lmf_str)))==NULL) {
+    fprintf(stderr," cannot allocate lmf_str\n");
+    return NULL;
+  }
+
+  m_fptr->lib_aa = (ldnaseq == 0);
+  m_fptr->tmp_buf_max = 4096;
+  if ((m_fptr->tmp_buf=
+       (char *)calloc(m_fptr->tmp_buf_max,sizeof(char)))==NULL) {
+    fprintf(stderr," cannot allocate lmf_str->tmp_buffer\n");
+    return NULL;
+  }
+
+  /* load the oid info */
+  m_fptr->have_oid_list = have_oid_list;
+  m_fptr->max_oid = max_oid;
+  m_fptr->oid_seqs = oid_seqs;
+  m_fptr->oid_list = oid_list;
+  m_fptr->pref_db= pref_db;
+  m_fptr->get_mmap_chain = NULL;
+
+  /* open the header file */
+  if ((m_fptr->hfile = fopen(hname,RBSTR))==NULL) {
+    fprintf(stderr," cannot open %s header file\n",hname);
+    goto error_r;
+  }
+
+  /* ncbl2_ranlib is used for all BLAST2.0 access */
+  m_fptr->ranlib = ncbl2_ranlib;
+  m_fptr->bl_format_ver = dbformat;
+  m_fptr->lb_type = NCBIBL20;
+  m_fptr->libf = NULL;
+
+  if (ldnaseq==SEQT_DNA) {
+    if (oid_seqs > 0) {
+      m_fptr->getlib = ncbl2_getlibn_o;
+    }
+    else {
+      m_fptr->getlib = ncbl2_getlibn;
+    }
+    m_fptr->sascii = nascii;
+  }
+  else {
+    if (oid_seqs > 0) {
+      m_fptr->getlib = ncbl2_getliba_o;
+      m_fptr->get_mmap_chain = ncbl2_get_mmap_chain_o;
+    }
+    else {
+      m_fptr->getlib = ncbl2_getliba;
+      m_fptr->get_mmap_chain = ncbl2_get_mmap_chain;
+    }
+    m_fptr->sascii = aascii;
+  }
+  m_fptr->lb_name = lib_p->file_name;
+
+  /* open the sequence file */
+
+#if defined (USE_MMAP) 
+  m_fptr->mm_flg=((m_fptr->mmap_fd=open(sname,O_RDONLY))>=0);
+  if (!m_fptr->mm_flg) {
+    fprintf(stderr," cannot open %s",sname);
+    perror("...");
+  }
+  else {
+    if(fstat(m_fptr->mmap_fd, &statbuf) < 0) {
+      fprintf(stderr," cannot fstat %s",sname);
+      perror("...");
+      m_fptr->mm_flg = 0;
+    }
+    else {
+      m_fptr->st_size = statbuf.st_size;
+      if((m_fptr->mmap_base = 
+	  mmap(NULL, m_fptr->st_size, PROT_READ,
+	       MAP_FILE | MAP_SHARED, m_fptr->mmap_fd, 0)) == (char *) -1) {
+	fprintf(stderr," cannot mmap %s",sname);
+	perror("...");
+	m_fptr->mm_flg = 0;
+      }  
+      else {
+	m_fptr->mmap_addr = m_fptr->mmap_base;
+	m_fptr->mm_flg = 1;
+      }
+    }
+    /* regardless, close the open()ed version */
+    close(m_fptr->mmap_fd);
+  }
+#else
+  m_fptr->mm_flg = 0;
+#endif
+
+  if  (!m_fptr->mm_flg) {
+    if ((m_fptr->libf = fopen(sname,RBSTR))==NULL) {
+      fprintf(stderr," cannot open %s sequence file",sname);
+      perror("...");
+      goto error_r;
+    }
+  }
+
+/* all files should be open -- the rest of the work can be done by a
+   function common to ncbl2_openlib() and ncbl2_reopen()
+*/
+
+  return load_ncbl2(m_fptr, ifile, dbformat, dbtype);
+
+ error_r:
+  /* here if failure after m_fptr allocated */
+  free(m_fptr);
+  return NULL;
+}
+
+
+/* **************************************************************** */
+/* re-open an already opened library file using information in m_fptr
+
+   a valid m_fptr guarantees that the necessary files exist, and we
+   know whether there is an OID file.  m_fptr's are NEVER used for
+   file lists, only for files with sequence data
+*/
+/* **************************************************************** */
+
+struct lmf_str *ncbl2_reopen(struct lmf_str *m_fptr) {
+  char lname[256];	/* .pal, .nal file name */
+  char dname[256];	/* .pin, .nin file for files included from .msk files */
+  char msk_name[256];	/* .msk file name */
+  char hname[256];	/* .phr, .nhr */
+  char sname[256];	/* .psq, .nsq */
+  char tname[256];	/* .pin, .nin file */
+  char db_dir[256];	/*  directory where all the files live */
+  int pref_db= -1;	/*  right now, only swissprot.pal, pdbaa.pal
+			    are used for masked OID files */
+  char *bp;
+  int oid_seqs, max_oid, have_oid_list;
+  int oid_cnt, oid_len;
+  unsigned int *oid_list, o_max;
+#ifdef USE_MMAP
+  struct stat statbuf;
+#endif
+  FILE *ifile;	/* index offsets, also DB info */
+
+  /* its not open, but its being re-used, so re-initialize things */
+  m_fptr->libf = NULL;
+  m_fptr->mmap_fd = -1;
+  m_fptr->mm_flg = 0;
+
+  /* if we have an oid list, open it, read it, and use it to get the file names */
+  /* check for a .nal/.pal OID list file */
+  max_oid = oid_seqs = 0;
+  oid_list = NULL;
+
+  if (m_fptr->have_oid_list) {
+    if (m_fptr->lib_aa==1) {
+      newname(lname,m_fptr->lb_name,AA_LIST_EXT,(int)sizeof(lname));	/* .pal */
+    }
+    else {
+      newname(lname,m_fptr->lb_name,NT_LIST_EXT,(int)sizeof(lname));	/* .nal */
+    }
+  
+    ifile = fopen(lname,"r");	/* it has to open, it did before */
+    if ((bp = strrchr(m_fptr->lb_name,SLASH_CHAR))!=NULL) {
+      *bp = '\0';
+      SAFE_STRNCPY(db_dir,m_fptr->lb_name,sizeof(db_dir));
+      SAFE_STRNCAT(db_dir,SLASH_STR,sizeof(db_dir));
+      *bp = SLASH_CHAR;
+    }
+    else {
+      db_dir[0]='\0';
+    }
+
+    /* we have a list file, we need to parse it */
+    parse_pal(dname, msk_name, &oid_seqs, &max_oid, ifile);
+    fclose(ifile);
+
+    /* we have read the .pal/.nal file, now deal with the .msk file */
+
+    pref_db = -1;
+
+    /* get the pref_db before adding the directory */
+    if (strncmp(msk_name,"swissprot",9)==0) {
+      pref_db = 7;
+    }
+    else if (strncmp(msk_name,"pdbaa",5)==0) {
+      pref_db = 14;
+    }
+
+    /* need to add directory to both dname and msk_name */
+    SAFE_STRNCPY(tname,db_dir,sizeof(tname));
+    SAFE_STRNCAT(tname,msk_name, sizeof(tname));
+    SAFE_STRNCPY(msk_name, tname, sizeof(msk_name));
+
+    SAFE_STRNCPY(tname,db_dir,sizeof(tname));
+    SAFE_STRNCAT(tname,dname, sizeof(tname));
+    SAFE_STRNCPY(dname,tname,sizeof(dname));
+
+    if (m_fptr->lib_aa) {
+      newname(tname,dname,AA_INDEX_EXT,(int)sizeof(tname));
+      newname(hname,dname,AA_HEADER_EXT,(int)sizeof(hname));
+      newname(sname,dname,AA_SEARCHSEQ_EXT,(int)sizeof(sname));
+    }
+    else {	/* reading DNA library */
+      newname(tname,dname,NT_INDEX_EXT,(int)sizeof(tname));
+      newname(hname,dname,NT_HEADER_EXT,(int)sizeof(hname));
+      newname(sname,dname,NT_SEARCHSEQ_EXT,(int)sizeof(sname));
+    }
+
+    ifile = fopen(msk_name,RBSTR);
+    src_uint4_read(ifile,&o_max);
+    oid_len = (max_oid/32+1);
+    if ((oid_list=(unsigned int *)calloc(oid_len,sizeof(int)))==NULL) {
+      fprintf(stderr," error - cannot allocate oid_list[%d]\n",oid_len);
+      return NULL;
+    }
+    if ((oid_cnt=fread(oid_list,sizeof(int),oid_len,ifile))==0) {
+      fprintf(stderr," error - cannot read oid_list[%d]\n",oid_len);
+      return NULL;
+    }
+    fclose(ifile);
+  }
+  else { /* else no OID/.msk file -- generate the names */
+    /* initialize file names */
+    if (m_fptr->lib_aa) {	/* read a protein database */
+      newname(tname,m_fptr->lb_name,AA_INDEX_EXT,(int)sizeof(tname));	/* .pin */
+      newname(hname,m_fptr->lb_name,AA_HEADER_EXT,(int)sizeof(hname));	/* .phr */
+      newname(sname,m_fptr->lb_name,AA_SEARCHSEQ_EXT,(int)sizeof(sname));	/* .psq */
+
+    }
+    else {	/* reading DNA library */
+      newname(tname,m_fptr->lb_name,NT_INDEX_EXT,(int)sizeof(tname));	/* .nin */
+      newname(hname,m_fptr->lb_name,NT_HEADER_EXT,(int)sizeof(hname));	/* .nhr */
+      newname(sname,m_fptr->lb_name,NT_SEARCHSEQ_EXT,(int)sizeof(sname));	/* .nsq */
+    }
+  }	
+
+  /* now we have all the file names, open the files and read the data */
+  /* open the index/header file, and read the sequence type info  */
+  if ((ifile = fopen(tname,RBSTR))==NULL) {
+    fprintf(stderr," cannot open %s (%s) INDEX file",tname,m_fptr->lb_name);
+    perror("...");
+    return NULL;
+  }
+  src_uint4_read(ifile,(unsigned *)&dbformat); /* get format DB version number */
+  src_uint4_read(ifile,(unsigned *)&dbtype);   /* get 1 for protein/0 DNA */
+
+  m_fptr->tmp_buf_max = 4096;
+  if ((m_fptr->tmp_buf=
+       (char *)calloc(m_fptr->tmp_buf_max,sizeof(char)))==NULL) {
+    fprintf(stderr," cannot allocate lmf_str->tmp_buffer\n");
+    return NULL;
+  }
+
+  /* load the oid info */
+  m_fptr->max_oid = max_oid;
+  m_fptr->oid_seqs = oid_seqs;
+  m_fptr->oid_list = oid_list;
+  m_fptr->pref_db= pref_db;
+  m_fptr->get_mmap_chain = NULL;
+
+  /* open the header file */
+  m_fptr->hfile = fopen(hname,RBSTR);
+
+  /* ncbl2_ranlib is used for all BLAST2.0 access */
+  m_fptr->ranlib = ncbl2_ranlib;
+  m_fptr->bl_format_ver = dbformat;
+
+  if (!m_fptr->lib_aa) {
+    if (oid_seqs > 0) {
+      m_fptr->getlib = ncbl2_getlibn_o;
+    }
+    else {
+      m_fptr->getlib = ncbl2_getlibn;
+    }
+    m_fptr->sascii = nascii;
+  }
+  else {
+    if (oid_seqs > 0) { m_fptr->getlib = ncbl2_getliba_o; }
+    else { m_fptr->getlib = ncbl2_getliba; }
+    m_fptr->sascii = aascii;
+  }
+
+  /* open the sequence file */
+
+#if defined (USE_MMAP) 
+  m_fptr->mm_flg=((m_fptr->mmap_fd=open(sname,O_RDONLY))>=0);
+  if(fstat(m_fptr->mmap_fd, &statbuf) < 0) {
+    fprintf(stderr," cannot fstat %s",sname);
+    perror("...");
+    m_fptr->mm_flg = 0;
+  }
+  else {
+    m_fptr->st_size = statbuf.st_size;
+    if((m_fptr->mmap_base = 
+	mmap(NULL, m_fptr->st_size, PROT_READ,
+	     MAP_FILE | MAP_SHARED, m_fptr->mmap_fd, 0)) == (char *) -1) {
+      fprintf(stderr," cannot mmap %s",sname);
+      perror("...");
+      m_fptr->mm_flg = 0;
+    }  
+    else {
+      m_fptr->mmap_addr = m_fptr->mmap_base;
+      m_fptr->mm_flg = 1;
+    }
+  }
+  /* regardless, close the open()ed version */
+  close(m_fptr->mmap_fd);
+#else
+  m_fptr->mm_flg = 0;
+#endif
+
+  if  (!m_fptr->mm_flg) {
+    m_fptr->libf = fopen(sname,RBSTR);
+    if (!m_fptr->libf) {
+      fprintf(stderr," cannot open %s\n",sname);
+      return NULL;
+    }
+    m_fptr->mm_flg = 0;
+  }
+
+  return load_ncbl2(m_fptr, ifile, dbformat, dbtype);
+}
+
+struct lmf_str 
+*load_ncbl2(struct lmf_str *m_fptr, FILE *ifile, int dbformat, int dbtype) {
+  int title_len;
+  char *title_str=NULL;
+  int date_len;
+  char *date_str=NULL;
+  long ltmp;
+  int64_t l8tmp;
+  int i, tmp;
+  unsigned int *f_pos_arr;
+
+  src_uint4_read(ifile,(unsigned *)&title_len);
+
+  if (title_len > 0) {
+    if ((title_str = calloc((size_t)title_len+1,sizeof(char)))==NULL) {
+      fprintf(stderr," cannot allocate title string (%d)\n",title_len);
+      goto error_r;
+    }
+    fread(title_str,(size_t)1,(size_t)title_len,ifile);
+  }
+  
+  src_uint4_read(ifile,(unsigned *)&date_len);
+
+  if (date_len > 0) {
+    if ((date_str = calloc((size_t)date_len+1,sizeof(char)))==NULL) {
+      fprintf(stderr," cannot allocate date string (%d)\n",date_len);
+      goto error_r;
+    }
+    fread(date_str,(size_t)1,(size_t)date_len,ifile);
+  }
+  
+  m_fptr->lpos = 0;
+  src_uint4_read(ifile,(unsigned *)&m_fptr->max_cnt);
+  
+  if (dbformat == FORMATDBV3) {
+    src_long4_read(ifile,&ltmp);
+    m_fptr->tot_len = ltmp;
+  }
+  else {
+    ncbi_long8_read(ifile,&l8tmp);
+    m_fptr->tot_len = l8tmp;
+  }
+
+  src_long4_read(ifile,&ltmp);
+  m_fptr->max_len = ltmp;
+
+  /* currently we are not using this information, but perhaps later */
+  if (title_str!=NULL) free(title_str);
+  if (date_str!=NULL) free(date_str);
+
+#ifdef DEBUG
+    fprintf(stderr,"%s format: BL2 (%s)  max_cnt: %d, totlen: %lld, maxlen %ld\n",
+	    m_fptr->lb_name,m_fptr->mm_flg ? "mmap" : "fopen", 
+	    m_fptr->max_cnt,m_fptr->tot_len,m_fptr->max_len);
+#endif
+
+  /* allocate and read hdr indexes */
+  if ((f_pos_arr=(unsigned int *)calloc((size_t)m_fptr->max_cnt+1,sizeof(int)))==NULL) {
+      fprintf(stderr," cannot allocate tmp header pointers\n");
+      goto error_r;
+    }
+
+  if ((m_fptr->d_pos_arr=(MM_OFF *)calloc((size_t)m_fptr->max_cnt+1,sizeof(MM_OFF)))==NULL) {
+      fprintf(stderr," cannot allocate header pointers\n");
+      goto error_r;
+    }
+
+  /* allocate and read sequence offsets */
+  if ((m_fptr->s_pos_arr=(MM_OFF *)calloc((size_t)m_fptr->max_cnt+1,sizeof(MM_OFF)))==NULL) {
+      fprintf(stderr," cannot allocate sequence pointers\n");
+      goto error_r;
+    }
+
+  if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) {
+    fprintf(stderr," error reading hdr offsets: %s\n",m_fptr->lb_name);
+    goto error_r;
+  }
+
+  for (i=0; i<=m_fptr->max_cnt; i++)
+#ifdef IS_BIG_ENDIAN
+    m_fptr->d_pos_arr[i] = f_pos_arr[i];
+#else
+    m_fptr->d_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);
+#endif
+
+  if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) {
+    fprintf(stderr," error reading seq offsets: %s\n",m_fptr->lb_name);
+    goto error_r;
+  }
+  for (i=0; i<=m_fptr->max_cnt; i++) {
+#ifdef IS_BIG_ENDIAN
+    m_fptr->s_pos_arr[i] = f_pos_arr[i];
+#else
+    m_fptr->s_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);
+#endif
+  }
+
+  if (dbtype == NTFORMAT) {
+    /* allocate and ambiguity  offsets */
+    if ((m_fptr->a_pos_arr=(MM_OFF *)calloc((size_t)m_fptr->max_cnt+1,sizeof(MM_OFF)))==NULL) {
+      fprintf(stderr," cannot allocate sequence pointers\n");
+      goto error_r;
+    }
+
+    /*
+    for (i=0; i<=m_fptr->max_cnt; i++) src_uint4_read(ifile,&m_fptr->a_pos_arr[i]);
+    */
+
+    if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) {
+      fprintf(stderr," error reading seq offsets: %s\n",m_fptr->lb_name);
+      goto error_r;
+    }
+    for (i=0; i<=m_fptr->max_cnt; i++) {
+#ifdef IS_BIG_ENDIAN
+      m_fptr->a_pos_arr[i] = f_pos_arr[i];
+#else
+      m_fptr->a_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);
+#endif
+    }
+  }
+
+  /*
+  for (i=0; i < min(m_fptr->max_cnt,10); i++) {
+    fprintf(stderr,"%d: %d %d %d\n",i,m_fptr->s_pos_arr[i],m_fptr->a_pos_arr[i],m_fptr->d_pos_arr[i]);
+  }
+  */
+
+  /* all done with ifile, close it */
+  fclose(ifile);
+  free(f_pos_arr);
+
+  if (!m_fptr->mm_flg) {
+    tmp = fgetc(m_fptr->libf);
+    if (tmp!=NULLB)
+      fprintf(stderr," phase error: %d:%d found\n",0,tmp);
+  }
+
+  m_fptr->bl_lib_pos = 1;
+  amb_cnt = 0;
+
+  return m_fptr;
+
+ error_r:
+  /* here if failure after m_fptr allocated */
+  free(m_fptr);
+  return NULL;
+}
+
+/* **************************************************************** */
+/* close the library, free s_pos_arr, a_pos_arr, but save file info */
+/* **************************************************************** */
+
+void ncbl2_closelib(struct lmf_str *m_fptr)
+{
+
+  if (m_fptr->tmp_buf != NULL) {
+    free(m_fptr->tmp_buf);
+    m_fptr->tmp_buf = NULL;
+    m_fptr->tmp_buf_max = 0;
+  }
+
+  if (m_fptr->s_pos_arr !=NULL) {
+    free(m_fptr->s_pos_arr);
+    m_fptr->s_pos_arr = NULL;
+  }
+  if (m_fptr->a_pos_arr!=NULL) {
+    free(m_fptr->a_pos_arr);
+    m_fptr->a_pos_arr = NULL;
+  }
+
+  if (m_fptr->hfile !=NULL ) {
+    fclose(m_fptr->hfile);
+    m_fptr->hfile=NULL;
+    free(m_fptr->d_pos_arr); 
+    m_fptr->d_pos_arr = NULL;
+  }
+
+  if (m_fptr->oid_list != NULL) {
+    free(m_fptr->oid_list);
+    m_fptr->oid_list = NULL;
+    m_fptr->oid_seqs = m_fptr->max_oid = 0;
+  }
+
+#ifdef use_mmap
+  if (m_fptr->mm_flg) {
+    munmap(m_fptr->mmap_base,m_fptr->st_size);
+    m_fptr->mmap_fd = -1;
+  }
+  else 
+#endif
+    if (m_fptr->libf !=NULL ) {
+      fclose(m_fptr->libf);
+      m_fptr->libf=NULL;
+    }
+
+  m_fptr->mm_flg = 0;
+}
+
+/* **************************************************************** */
+/* read a protein sequence using OID offsets */
+/* **************************************************************** */
+
+int
+ncbl2_getliba_o(unsigned char *seq,
+		int maxs,
+		char *libstr,
+		int n_libstr,
+		fseek_t *libpos,
+		int *lcont,
+		struct lmf_str *m_fd,
+		long *l_off)
+{
+  int tpos;
+  unsigned int t_mask, t_shift, oid_mask;
+  
+  /* get to the next valid pointer */
+  
+  for ( tpos = m_fd->lpos ;tpos <= m_fd->max_oid; tpos++) {
+    t_mask = tpos / 32;
+    t_shift = 31 - (tpos % 32);
+    if ((oid_mask = m_fd->oid_list[t_mask])==0) {  continue; }
+
+    if ((bl2_uint4_cvt(oid_mask) & 0x1 << t_shift)) {
+      if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[tpos],0);
+      m_fd->lpos = tpos;	/* already bumped up */
+      m_fd->bl_lib_pos = m_fd->s_pos_arr[tpos];
+      return ncbl2_getliba(seq, maxs, libstr, n_libstr,
+			   libpos, lcont, m_fd, l_off);
+    }
+  }
+  return -1;
+}
+
+/* **************************************************************** */
+/* read a protein sequence */
+/* **************************************************************** */
+
+int
+ncbl2_getliba(unsigned char *seq,
+	      int maxs,
+	      char *libstr,
+	      int n_libstr,
+	      fseek_t *libpos,
+	      int *lcont,
+	      struct lmf_str *m_fd,
+	      long *l_off)
+{
+  unsigned char *sptr, *dptr;
+  int s_chunk, d_len, lib_cnt;
+  long seqcnt;
+  long tmp;
+  static long seq_len;
+#if defined(DEBUG) || defined(PCOMPLIB)
+  int gi, my_db, taxid;
+  char acc[MAX_FADL_ACC_LEN], title[MAX_UID], name[MAX_FADL_ACC_LEN];
+#endif
+  
+  *l_off = 1;
+
+  lib_cnt = m_fd->lpos;
+  *libpos = (fseek_t)m_fd->lpos;
+
+  if (*lcont==0) {
+    if (lib_cnt >= m_fd->max_cnt) return -1;	/* no more sequences */
+    seq_len = m_fd->s_pos_arr[lib_cnt+1] - m_fd->s_pos_arr[lib_cnt]; /* value is +1 off to get the NULL */
+    if (m_fd->mm_flg) m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lib_cnt];
+#if !defined(DEBUG) && !defined(PCOMPLIB)
+    libstr[0]='\0';
+#else
+    /* get the name from the header file */
+    fseek(m_fd->hfile,m_fd->d_pos_arr[lib_cnt],0);
+
+    if (m_fd->bl_format_ver == FORMATDBV3) {
+      d_len = min(n_libstr-1,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1);
+      fread(libstr,(size_t)1,(size_t)d_len,m_fd->hfile);
+      libstr[d_len]='\0';
+    }
+    else {
+      d_len = min(m_fd->tmp_buf_max,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1);
+      fread(m_fd->tmp_buf,(size_t)1,(size_t)d_len,m_fd->hfile);
+      parse_fastadl_asn((unsigned char *)m_fd->tmp_buf, (unsigned char *)m_fd->tmp_buf+d_len,
+			&gi, &my_db, acc, sizeof(acc), name, sizeof(name), title, sizeof(title), &taxid);
+      sprintf(m_fd->tmp_buf,"gi|%d",gi);
+      SAFE_STRNCPY(libstr,m_fd->tmp_buf,n_libstr);
+    }
+    libstr[n_libstr-1]='\0';
+#endif
+  }
+  if (seq_len <= maxs) { /* sequence fits */
+    seqcnt = seq_len;
+    m_fd->lpos++;
+    *lcont = 0;
+  }
+  else {		/* doesn't fit */
+    seqcnt = maxs-1;
+    (*lcont)++;
+  } 
+
+  if (m_fd->mm_flg) sptr = (unsigned char *)m_fd->mmap_addr;
+  else {
+    if ((tmp=fread(seq,(size_t)1,(size_t)seq_len,m_fd->libf))!=(size_t)seq_len) {
+      fprintf(stderr," could not read sequence record: %lld %ld != %ld\n",
+	      *libpos,tmp,seq_len);
+      goto error; 
+    }
+    sptr = seq;
+  }
+  if (seq_len <= maxs) {seqcnt = --seq_len;}
+
+  /* everything is ready, set up dst. pointer, seq_len */
+  if (aa_b2toa[sptr[seq_len-1]]=='*') seq_len--;
+  if (aa_btof_null) {
+    memcpy(seq,sptr,seq_len);
+  }
+  else {
+    dptr = seq;
+    s_chunk = seqcnt/16;
+    while (s_chunk-- > 0) {
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+      *dptr++ = aa_btof[*sptr++];
+    }
+    while (dptr < seq+seqcnt) *dptr++ = aa_btof[*sptr++];
+  }
+
+  if (m_fd->mm_flg) m_fd->mmap_addr = (char *)sptr;
+
+  /* we didn't get it all, so reset for more */
+  if (*lcont) seq_len -= seqcnt;
+
+  seq[seqcnt]= EOSEQ;
+  return (seqcnt);
+  
+error:	fprintf(stderr," error reading %s at %lld\n",libstr,*libpos);
+  fflush(stderr);
+  return (-1);
+}
+
+/* ncbl2_mmap_getchain_o fills cur_seqr_chain with sequence pointers
+   from the memory mapped file at *m_fd, based on oid coordinates (not
+   contiguous)
+
+   requires NCBIstdaa core encoding
+*/
+int
+ncbl2_get_mmap_chain_o(struct seqr_chain *cur_seqr_chain, 
+		       struct lmf_str *m_fd, struct db_str *db)
+{
+  int i;
+  struct seq_record *seq_a, *seq_p;
+  struct mseq_record *mseq_a, *mseq_p;
+
+  int tpos;
+  unsigned int t_mask, t_shift, oid_mask;
+  
+  tpos = m_fd->lpos;
+  if (tpos > m_fd->max_oid) return EOF;
+  seq_a = cur_seqr_chain->seqr_base;
+  mseq_a = cur_seqr_chain->mseqr_base;
+
+  for (i=0; i < cur_seqr_chain->max_chain_seqs; i++) {
+    if (tpos > m_fd->max_oid) {
+      break;
+    }
+    seq_p = &seq_a[i];
+    mseq_p = &mseq_a[i];
+
+    /* now get the next valid pointer */
+    while (tpos <= m_fd->max_oid) {
+      t_mask = tpos / 32;
+      t_shift = 31 - (tpos % 32);
+      if ((oid_mask = m_fd->oid_list[t_mask])==0) {  tpos++; continue; }
+
+      /* have a valid entry */
+      if ((bl2_uint4_cvt(oid_mask) & 0x1 << t_shift)) {
+	m_fd->bl_lib_pos = m_fd->s_pos_arr[tpos];
+	seq_p->n1 = m_fd->s_pos_arr[tpos+1] - m_fd->s_pos_arr[tpos]-1; /* value is +1 off to get the NULL */
+	seq_p->aa1b = (unsigned char *)(m_fd->mmap_base + m_fd->s_pos_arr[tpos]);
+	seq_p->l_offset = 0;
+	seq_p->l_off = 1;
+
+	db->entries++;
+	db->length += seq_p->n1;
+	if (db->length > LONG_MAX) {
+	  db->length -= LONG_MAX; db->carry++;
+	}
+
+	mseq_p->m_file_p = m_fd;
+	mseq_p->n1tot_p=NULL;
+	mseq_p->cont = 0;
+	seq_p->index = mseq_p->index = mseq_p->lseek = tpos++;
+#ifndef DEBUG
+	mseq_p->libstr[0] = '\0';
+#else
+#endif
+#if DEBUG
+	seq_p->adler32_crc = mseq_p->adler32_crc = adler32(1L,seq_p->aa1b,seq_p->n1);
+#endif
+	break;
+      }
+      else {
+	tpos++;
+      }
+    }
+  }
+  if (i==0 && tpos > m_fd->max_oid) return EOF;
+  m_fd->lpos = tpos;
+  cur_seqr_chain->cur_seq_cnt = i;
+  if (i >= m_fd->max_cnt) return EOF;
+  else return i;
+}
+
+/* ncbl2_mmap_getchain fills cur_seqr_chain with sequence pointers
+   from the memory mapped file at *m_fd
+
+   because the database is opened read-only, this code only works with
+   an amino acid mapping identical to that used by blastdbcmd, aa_b2toa[]
+
+*/
+int
+ncbl2_get_mmap_chain(struct seqr_chain *cur_seqr_chain, 
+		     struct lmf_str *m_fd, struct db_str *db) {
+  int i, lib_cnt;
+  struct seq_record *seq_a, *seq_p;
+  struct mseq_record *mseq_a, *mseq_p;
+
+  lib_cnt = m_fd->lpos;
+  if (lib_cnt >= m_fd->max_cnt) return EOF;
+  seq_a = cur_seqr_chain->seqr_base;
+  mseq_a = cur_seqr_chain->mseqr_base;
+
+  for (i=0; i < cur_seqr_chain->max_chain_seqs; i++) {
+    if (lib_cnt >= m_fd->max_cnt) break;
+    seq_p = &seq_a[i];
+    mseq_p = &mseq_a[i];
+    seq_p->n1 = m_fd->s_pos_arr[lib_cnt+1] - m_fd->s_pos_arr[lib_cnt]-1; /* value is +1 off to get the NULL */
+
+    db->entries++;
+    db->length += seq_p->n1;
+    if (db->length > LONG_MAX) {
+      db->length -= LONG_MAX; db->carry++;
+    }
+
+    mseq_p->m_file_p = m_fd;
+    mseq_p->n1tot_p=NULL;
+    mseq_p->cont = 0;
+    seq_p->index = mseq_p->index = mseq_p->lseek = lib_cnt;
+#ifndef DEBUG
+    mseq_p->libstr[0] = '\0';
+#else
+#endif
+    seq_p->aa1b = (unsigned char *)(m_fd->mmap_base + m_fd->s_pos_arr[lib_cnt++]);
+    seq_p->l_offset = 0;
+    seq_p->l_off = 1;
+#if DEBUG
+    seq_p->adler32_crc = mseq_p->adler32_crc = adler32(1L,seq_p->aa1b,seq_p->n1);
+#endif
+  }
+  m_fd->lpos = lib_cnt;
+  cur_seqr_chain->cur_seq_cnt = i;
+  return i;
+}
+
+/* **************************************************************** */
+/* read a DNA sequence using OID offsets */
+/* **************************************************************** */
+
+int 
+ncbl2_getlibn_o(unsigned char *seq,
+		int maxs,
+		char *libstr,
+		int n_libstr,
+		fseek_t *libpos,
+		int *lcont,
+		struct lmf_str *m_fd,
+		long *l_off)
+{
+  int tpos;
+  unsigned int t_mask, t_shift, oid_mask;
+  
+  /* get to the next valid pointer */
+  
+  for (tpos = m_fd->lpos; tpos <= m_fd->max_oid; tpos++) {
+    t_mask = tpos / 32;
+    t_shift = 31 - (tpos % 32);
+    if ((oid_mask = m_fd->oid_list[t_mask])==0) {  continue; }
+
+    if ((bl2_uint4_cvt(oid_mask) & 0x1 << t_shift)) {
+      if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[tpos],0);
+      m_fd->lpos = tpos;	/* already bumped up */
+      m_fd->bl_lib_pos = m_fd->s_pos_arr[tpos];
+      return ncbl2_getlibn(seq, maxs, libstr, n_libstr,
+			   libpos, lcont, m_fd, l_off);
+    }
+  }
+  return -1;
+}
+
+static char tmp_amb[4096];
+
+/* **************************************************************** */
+/* read a DNA sequence */
+/* **************************************************************** */
+
+int
+ncbl2_getlibn(unsigned char *seq,
+	      int maxs,
+	      char *libstr,
+	      int n_libstr,
+	      fseek_t *libpos,
+	      int *lcont,
+	      struct lmf_str *m_fd,
+	      long *l_off)
+{
+  unsigned char *sptr, *tptr, stmp;
+  long seqcnt;
+  int s_chunk, lib_cnt;
+  size_t tmp;
+  char ch;
+  static long seq_len;
+  static int c_len,c_pad;
+  int c_len_set, d_len;
+#if defined(DEBUG) || defined(PCOMPLIB)
+  int gi, my_db, taxid;
+  char acc[MAX_FADL_ACC_LEN], title[MAX_UID], name[MAX_FADL_ACC_LEN];
+#endif
+
+  /* ambiguity code adapted from NCBI Blast sources by:
+
+     Ralf Jost, Dipl.-Inform.
+     Director, Technical Bioinformatics
+     Biomax Informatics AG
+     ralf.jost at biomax.com
+  */
+
+  int amb_lower = *lcont * maxs;
+  /*   int amb_upper = (*lcont + 1) * maxs - 1; */ /* not used */
+
+  unsigned long x;
+  unsigned long soff, eoff;
+  int index;
+
+  unsigned int amb_cnt = 0;
+  unsigned int large_amb = 0;
+
+  unsigned int start;
+  unsigned int end;
+  unsigned int amb_start;
+
+  const char str_2bit[] = "ACGT";
+  const char str_4bit[] = "-ACMGRSVTWYHKDBN";
+
+  unsigned int *amb_ptr = NULL;
+  long filepos = 0;
+  char *mmap_pos = NULL;
+
+  unsigned int i;
+  unsigned char res;
+  int row_len;
+  int j, position = 0;
+
+  *l_off = 1;
+
+  lib_cnt = m_fd->lpos;
+  *libpos = (fseek_t)lib_cnt;
+  if (*lcont==0) {	/* not a continuation of previous */
+    if (lib_cnt >= m_fd->max_cnt) return (-1);
+    c_len = m_fd->a_pos_arr[lib_cnt]- m_fd->s_pos_arr[lib_cnt];
+    if (!m_fd->mm_flg) {
+      /* fseek over amb_ray */
+	  fseek(m_fd->libf,m_fd->s_pos_arr[lib_cnt],0);
+	  m_fd->bl_lib_pos = m_fd->s_pos_arr[lib_cnt];
+    }
+    else m_fd->mmap_addr = m_fd->mmap_base + m_fd->s_pos_arr[lib_cnt];
+#if !defined(DEBUG) && !defined(PCOMPLIB)
+    libstr[0]='\0';
+#else
+    /* get the name from the header file */
+    fseek(m_fd->hfile,m_fd->d_pos_arr[lib_cnt],0);
+
+    if (m_fd->bl_format_ver == FORMATDBV3) {
+      d_len = min(n_libstr-1,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1);
+      fread(libstr,(size_t)1,(size_t)d_len,m_fd->hfile);
+    }
+    else {
+      d_len = min(m_fd->tmp_buf_max,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1);
+      fread(m_fd->tmp_buf,(size_t)1,(size_t)d_len,m_fd->hfile);
+      parse_fastadl_asn((unsigned char *)m_fd->tmp_buf, (unsigned char *)m_fd->tmp_buf+d_len,
+			&gi, &my_db, acc, sizeof(acc), name, sizeof(name), title, sizeof(title), &taxid);
+      sprintf(m_fd->tmp_buf,"gi|%d",gi);
+      SAFE_STRNCPY(libstr,m_fd->tmp_buf,n_libstr);
+    }
+    libstr[n_libstr-1]='\0';
+#endif
+  }			/* end of *lcont==0 */
+
+  /* To avoid the situation where c_len <= 1; we must anticipate what
+     c_len will be after this pass.  If it will be <= 64, back off this
+     time so next time it will be > 64 */
+
+  seq_len = c_len*4;
+
+  if ((seq_len+4 > maxs) && (seq_len+4 - maxs  <= 256)) {
+    /* we won't be done but we will have less than 256 to go */
+    c_len -= 64; seq_len -= 256; c_len_set = 1; maxs -= 256;}
+  else c_len_set = 0;
+
+  /*
+  fprintf(stderr," lib_cnt: %d %d %d %d\n",lib_cnt,c_len,seq_len,maxs);
+  */
+
+  /* does the rest of the sequence fit? */
+  if (seq_len <= maxs-4 && !c_len_set) {
+    seqcnt = c_len;
+    if (!m_fd->mm_flg) {
+      if ((tmp=fread(seq,(size_t)1,(size_t)seqcnt,m_fd->libf))!=(size_t)seqcnt) {
+	fprintf(stderr,
+		" could not read sequence record: %s %lld %ld != %ld: %d\n",
+		libstr,*libpos,tmp,seqcnt,*seq);
+	goto error; 
+      }
+      m_fd->bl_lib_pos += tmp;
+      sptr = seq + seqcnt;
+    }
+    else sptr = (unsigned char *)(m_fd->mmap_addr+seqcnt);
+
+    *lcont = 0;		/* this is the last chunk */
+    // lib_cnt++;		/* increment to the next sequence */
+    /* the last byte is either '0' (no remainder) or the last 1-3 chars and the remainder */
+    c_pad = *(sptr-1);
+    c_pad &= 0x3;	/* get the last (low) 2 bits */
+    seq_len -= (4 - c_pad);	/* if the last 2 bits are 0, its a NULL byte */
+  }
+  else {	/* get the next chunk, but more to come */
+    seqcnt = ((maxs+3)/4)-1;
+    if (!m_fd->mm_flg) {
+      if ((tmp=fread(seq,(size_t)1,(size_t)(seqcnt),m_fd->libf))!=(size_t)(seqcnt)) {
+	fprintf(stderr," could not read sequence record: %lld %ld/%ld\n",
+		*libpos,tmp,seqcnt);
+	goto error;
+      }
+      m_fd->bl_lib_pos += tmp;
+      sptr = seq + seqcnt;
+    }
+    else {
+      sptr = (unsigned char *)(m_fd->mmap_addr+seqcnt);
+      m_fd->mmap_addr += seqcnt;
+    }
+    seq_len = 4*seqcnt;
+    c_len -= seqcnt;
+/*    if (c_len_set) {c_len += 64; maxs += 256;} */
+    (*lcont)++;
+/*  hopefully we don't need this because of c_len -= 64. */
+/*
+    if (c_len == 1) {
+#if !defined (USE_MMAP)
+      c_pad = fgetc(m_fd->libf);
+      *sptr=c_pad;
+#else
+      c_pad = *m_fd->mmap_addr++;
+      sptr = m_fd->mmap_addr;
+#endif
+      c_pad &= 0x3;
+      seq_len += c_pad;
+      seqcnt++;
+      lib_cnt++;
+      *lcont = 0;
+    }
+*/
+  }
+
+  /* point to the last packed byte and to the end of the array
+     seqcnt is the exact number of bytes read
+     tptr points to the destination, use multiple of 4 to simplify math
+     sptr points to the source, note that the last byte will be read 4 cycles
+     before it is written
+     */
+  
+  tptr = seq + 4*seqcnt;
+  s_chunk = seqcnt/8;
+  while (s_chunk-- > 0) {
+    stmp = *--sptr;
+    *--tptr = (stmp&3) +1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    stmp = *--sptr;
+    *--tptr = (stmp&3) +1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    stmp = *--sptr;
+    *--tptr = (stmp&3) +1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    stmp = *--sptr;
+    *--tptr = (stmp&3) +1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    stmp = *--sptr;
+    *--tptr = (stmp&3) +1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    stmp = *--sptr;
+    *--tptr = (stmp&3) +1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    stmp = *--sptr;
+    *--tptr = (stmp&3) +1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    stmp = *--sptr;
+    *--tptr = (stmp&3) +1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+  }
+  while (tptr>seq) {
+    stmp = *--sptr;
+    *--tptr = (stmp&3) +1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+  }
+
+  /*
+   * Ambiguity-Decoding code from 
+
+     Ralf Jost, Dipl.-Inform.
+     Director, Technical Bioinformatics
+     Biomax Informatics AG
+     ralf.jost at biomax.com
+
+     using code from NCBI Blast distribution
+   */
+
+  amb_start = m_fd->a_pos_arr[lib_cnt];
+  end = m_fd->s_pos_arr[lib_cnt + 1];
+
+  if (amb_start != end) {
+    if (!m_fd->mm_flg) {
+      filepos = ftell(m_fd->libf);
+      /* find the size of the ambiguity table */
+      if (fseek(m_fd->libf, amb_start, SEEK_SET) != 0) {
+	fprintf(stderr, "*** error [%s:%d] *** -- Seek amb start 0x%08x error %d\n", 
+		__FILE__, __LINE__, amb_start, ferror(m_fd->libf));
+      }
+      if (fread(&amb_cnt, sizeof(unsigned int), 1, m_fd->libf) != 1) {
+	fprintf(stderr, "*** error [%s:%d] *** -- Read amb count error %d\n", 
+		__FILE__, __LINE__, ferror(m_fd->libf));
+      }
+    } else {
+      mmap_pos = m_fd->mmap_addr;
+      m_fd->mmap_addr = m_fd->mmap_base + amb_start;
+      if (readMFILE((void *)&amb_cnt, sizeof(unsigned int), 1, m_fd) != 1) {
+	fprintf(stderr, "*** error [%s:%d] *** -- Read amb count error %d\n", 
+		__FILE__, __LINE__, ferror(m_fd->libf));
+      }
+    }
+
+    amb_cnt = bl2_uint4_cvt(amb_cnt);
+
+    /* if the most significant bit is set on the count, then each
+     * correction will take two entries in the table.  the layout
+     * is described below.
+     */
+    large_amb = amb_cnt >> 31;
+    amb_cnt = amb_cnt & 0x7fffffff;
+
+    /* allocate enough space for the ambiguity table */
+    amb_ptr = (unsigned int *) malloc(amb_cnt * sizeof(unsigned int));
+    if (amb_ptr == NULL) {
+      fprintf(stderr, "*** error [%s:%d] malloc amb table error size %ld\n",
+	      __FILE__, __LINE__, amb_cnt * sizeof(unsigned int));
+    }
+
+    /* read the table */
+    if (!m_fd->mm_flg) {
+      if (fread((unsigned char *) amb_ptr, sizeof(unsigned int), amb_cnt, m_fd->libf)
+	  != amb_cnt) {
+	fprintf(stderr, "*** error [%s:%d] *** -- Read amb table %d error %d\n", 
+		__FILE__, __LINE__, amb_cnt, ferror(m_fd->libf));
+      }
+    } else {
+      if (readMFILE((void *) amb_ptr, sizeof(unsigned int), amb_cnt, m_fd)
+	  != amb_cnt) {
+	fprintf(stderr, "*** error [%s:%d] *** -- Read amb table %d error %d\n", 
+		__FILE__, __LINE__, amb_cnt, ferror(m_fd->libf));
+      }
+    }
+
+    for (index=0; index < amb_cnt; index++) {
+      amb_ptr[index] = bl2_uint4_cvt(amb_ptr[index]);
+    }
+
+    for (i = 0; i < amb_cnt; i++) {
+
+      if (large_amb) {
+	res = (unsigned char) (amb_ptr[i] >> 28);
+	row_len = (int) (amb_ptr[i] >> 16) & 0xFFF;
+	position = amb_ptr[i + 1];
+      } else {
+	res = (unsigned char) (amb_ptr[i] >> 28);
+	row_len = (int) ((amb_ptr[i] >> 24) & 0xF);
+	position = amb_ptr[i] & 0xFFFFFF;
+      }
+      for (index = position, j = 0; j <= row_len; j++) {
+	if ((index + j >= amb_lower) && (index + j < amb_lower + 4 * seqcnt))
+	  seq[index + j - amb_lower] = nascii[str_4bit[res]];
+      }
+
+      if (large_amb)
+	i++;
+    }
+
+    if (amb_ptr != NULL)
+      free(amb_ptr);
+
+    if (!m_fd->mm_flg) {
+      fseek(m_fd->libf, filepos, SEEK_SET);
+    } else {
+      m_fd->mmap_addr = mmap_pos;
+    }
+  }
+
+  /*
+   * End of ambiguity-decoding.
+   */
+
+  if ( *lcont == 0)
+    lib_cnt++;
+
+
+
+
+
+  /*
+    for (sptr=seq; sptr < seq+seq_len; sptr++) {
+    printf("%c",nt[*sptr]);
+    if ((int)(sptr-seq) % 60 == 59) printf("\n");
+    }
+    printf("\n");
+    */
+
+  m_fd->lpos = lib_cnt;
+  if (seqcnt*4 >= seq_len) {	/* there was enough room */
+    seq[seq_len]= EOSEQ;
+    /* printf("%d\n",seq_len); */
+    return seq_len;
+  }
+  else {				/* not enough room */
+    seq[seqcnt*4]=EOSEQ;
+    seq_len -= 4*seqcnt;
+    return (4*seqcnt);
+  }
+  
+error:	fprintf(stderr," error reading %s at %lld\n",libstr,*libpos);
+  fflush(stderr);
+  return (-1);
+}
+
+                /*   0     1     2     3    4     5     6    7
+		     8     9    10    11   12    13    14   15
+		     16    17 */
+static char
+*db_type_arr[] = {"lcl","gib","gim","gii","gb","emb","pir","sp",
+		  "pat","ref","gnl","gi","dbj","prf","pdb","tpg",
+		  "tpe","tpd"};
+
+/* **************************************************************** */
+/* read a description, position for sequence */
+/* **************************************************************** */
+void
+ncbl2_ranlib(char *str,
+	     int cnt,
+	     fseek_t libpos,
+	     char *libstr,
+	     struct lmf_str *m_fd)
+{
+  int llen, lib_cnt;
+  char *bp;
+  unsigned char *my_buff=NULL;
+  char descr[2048];
+  unsigned char *abp;
+  int gi, taxid;
+  int my_db;
+  char db[5], acc[MAX_FADL_ACC_LEN], name[MAX_FADL_ACC_LEN];
+  char title[2048];
+  int have_my_buff=0;
+  int have_descr = 0;
+
+  lib_cnt = (int)libpos;
+  llen = m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt];
+
+  fseek(m_fd->hfile,m_fd->d_pos_arr[libpos],0);
+
+  if (m_fd->bl_format_ver == FORMATDBV3) {
+    if (llen >= cnt) llen = cnt-1;
+    fread(str,(size_t)1,(size_t)(llen),m_fd->hfile);
+  }
+  else {
+    if (llen >= m_fd->tmp_buf_max) {
+      if ((my_buff=(unsigned char *)calloc(llen,sizeof(char)))==NULL) {
+	fprintf(stderr," cannot allocate ASN.1 buffer: %d\n",llen);
+	my_buff = (unsigned char *)m_fd->tmp_buf;
+	llen = m_fd->tmp_buf_max;
+      }
+      else have_my_buff = 1;
+    }
+    else { 
+      my_buff = (unsigned char *)m_fd->tmp_buf;
+    }
+    abp = my_buff;
+    fread(my_buff,(size_t)1,llen,m_fd->hfile);
+
+    do {
+      abp = parse_fastadl_asn(abp, my_buff+llen,
+			      &gi, &my_db, acc, sizeof(acc), name, sizeof(name),
+			      title, sizeof(title), &taxid);
+
+      if (gi > 0) {
+	sprintf(descr,"gi|%d|%s|%s|%s ",gi,db_type_arr[my_db],acc,name);
+      }
+      else {
+	if (acc[0] != '\0') sprintf(descr,"%s ",acc);
+	else descr[0] = '\0';
+	if (name[0] != '\0' && strcmp(name,"BL_ORD_ID")!=0) sprintf(descr+strlen(descr),"%s ", name);
+      }
+      if (my_db == 0 || m_fd->pref_db < 0) {
+	if (!have_descr) {
+	  SAFE_STRNCPY(str,descr,cnt);
+	  have_descr = 1;
+	}
+	else {
+	  SAFE_STRNCAT(str,"\001",cnt);
+	  SAFE_STRNCAT(str,descr,cnt);
+	}
+	SAFE_STRNCAT(str,title,cnt);
+	if (strlen(str) >= cnt-1) break;
+      }
+      else if (m_fd->pref_db == my_db) {
+	have_descr = 1;
+	SAFE_STRNCPY(str,descr,cnt);
+	SAFE_STRNCAT(str,title,cnt);
+	break;
+      }
+    } while (abp);
+
+    if (!have_descr) {
+      SAFE_STRNCPY(str,descr,cnt);
+      SAFE_STRNCAT(str,descr,cnt);
+    }
+
+    if (have_my_buff) free(my_buff);
+  }
+
+  str[cnt-1]='\0';
+
+  bp = str;
+  while((bp=strchr(bp,'\001'))!=NULL) {*bp++=' ';}
+
+  if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[libpos],0);
+
+  m_fd->lpos = lib_cnt;
+  m_fd->bl_lib_pos = m_fd->s_pos_arr[lib_cnt];
+}
+
+unsigned int bl2_uint4_cvt(unsigned int val)
+{
+  unsigned int res;
+#ifdef IS_BIG_ENDIAN
+  return val;
+#else /* it better be LITTLE_ENDIAN */
+  res = ((val&255)*256)+ ((val>>8)&255);
+  res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255);
+  return res;
+#endif
+}  
+
+unsigned int bl2_long4_cvt(long val)
+{
+  int val4;
+  unsigned int res;
+#ifdef IS_BIG_ENDIAN
+  val4 = val;
+  return val4;
+#else /* it better be LITTLE_ENDIAN */
+  res = ((val&255)*256)+ ((val>>8)&255);
+  res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255);
+  return res;
+#endif
+}  
+
+uint64_t bl2_long8_cvt(uint64_t val)
+{
+  uint64_t res;
+#ifdef IS_BIG_ENDIAN
+  return val;
+#else /* it better be LITTLE_ENDIAN */
+  res = ((val&255)*256)+ ((val>>8)&255);
+  res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255);
+#ifdef BIG_LIB64
+  res = (res<<16) + (((val>>32)&255)*256) + ((val>>40)&255);
+  res = (res<<16) + (((val>>48)&255)*256) + ((val>>56)&255);
+#else
+  fprintf(stderr,"Cannot use bl2_long8_cvt without 64-bit longs\n");
+  exit(1);
+#endif
+  return res;
+#endif
+}  
+
+void src_int4_read(FILE *fd,  int *val)
+{
+#ifdef IS_BIG_ENDIAN
+  fread((char *)val,(size_t)4,(size_t)1,fd);
+#else
+  unsigned char b[4];
+
+  fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+  *val = 0;
+  *val = (int)(((((b[0]<<8)+b[1])<<8)+b[2])<<8)+b[3];
+#endif
+}
+
+void src_long4_read(FILE *fd,  long *valp)
+{
+  int val4;
+#ifdef IS_BIG_ENDIAN
+  fread(&val4,(size_t)4,(size_t)1,fd);
+  *valp = val4;
+#else
+  unsigned char b[4];
+
+  fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+  val4 = (int)(((((b[0]<<8)+b[1])<<8)+b[2])<<8)+b[3];
+  *valp = val4;
+#endif
+}
+
+void src_uint4_read(FILE *fd,  unsigned int *valp)
+{
+#ifdef IS_BIG_ENDIAN
+  fread(valp,(size_t)4,(size_t)1,fd);
+#else
+  unsigned char b[4];
+
+  fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+  *valp = 0;
+  *valp = (unsigned int)(((((b[0]<<8)+b[1])<<8)+b[2])<<8)+b[3];
+#endif
+}
+
+void src_long8_read(FILE *fd,  int64_t *val)
+{
+#ifdef IS_BIG_ENDIAN
+  fread((void *)val,(size_t)8,(size_t)1,fd);
+#else
+  int val_h;
+  unsigned int val_l;
+  unsigned char b[8];
+
+  fread((char *)&b[0],(size_t)1,(size_t)8,fd);
+
+  /* modified to work with both 32-bit and 64-bit native ints */
+  val_h = (((((b[0]<<8)+b[1])<<8)+b[2])<<8)+b[3];
+  val_l = (((((b[4]<<8)+b[5])<<8)+b[6])<<8)+b[7];
+  *val = val_h * 4294967296LL + val_l;
+
+  /*
+  *val = 0;
+  *val = (long)(((((((b[0]<<8)+b[1]<<8)+b[2]<<8)
+		  +b[3]<<8)+b[4]<<8)+b[5]<<8)
+		+b[6]<<8)+b[7];
+  */
+#endif
+}
+
+void ncbi_long8_read(FILE *fd,  int64_t *val)
+{
+  unsigned char b[8];
+
+  fread((char *)&b[0],(size_t)1,(size_t)8,fd);
+  *val = 0;
+  *val = (long)(((((((((((((b[7]<<8)+b[6])<<8)+b[5])<<8)+b[4])<<8)+b[3])<<8)+b[2])<<8)+b[1])<<8)+b[0];
+}
+
+void src_char_read(FILE *fd, char *val)
+{
+  fread(val,(size_t)1,(size_t)1,fd);
+}
+
+void src_fstr_read(FILE *fd, char *val,  int slen)
+{
+  fread(val,(size_t)slen,(size_t)1,fd);
+}
+
+void
+newname(char *nname, char *oname, char *suff, int maxn)
+{
+  SAFE_STRNCPY(nname,oname,maxn);
+  SAFE_STRNCAT(nname,".",maxn);
+  SAFE_STRNCAT(nname,suff,maxn);
+}
+
+#define ASN_SEQ 0x30
+#define ASN_IS_BOOL 1
+#define ASN_IS_INT 2
+#define ASN_IS_STR 26
+#define ASN_TYPE_MASK 31
+
+unsigned char *
+get_asn_int(unsigned char *abp, int *val) {
+
+  int v_len, v;
+
+  v = 0;
+  if (*abp++ != ASN_IS_INT) { /* check for int */
+    fprintf(stderr,"*** error [%s:%d] -- int missing\n",__FILE__, __LINE__);
+  }
+  else {
+    v_len = *abp++;
+    while (v_len-- > 0) {
+      v *= 256;
+      v += *abp++;
+    }
+    abp += 2;	/* skip over null's */
+  }
+  *val = v;
+  return abp;
+}
+
+unsigned char *
+get_asn_text(unsigned char *abp, char *text, int t_len) {
+  int tch, at_len;
+
+  text[0] = '\0';
+  if (*abp++ != ASN_IS_STR) { /* check for str */
+    fprintf(stderr,"*** error [%s:%d] - str missing\n",__FILE__,__LINE__);
+  }
+  else {
+    if ((tch = *abp++) > 128) {	/* string length is in next bytes */
+      tch &= 0x7f;	/* get number of bytes for len */
+      at_len = 0;
+      while (tch-- > 0) { at_len = (at_len << 8) + *abp++;}
+    }
+    else {
+      at_len = tch;
+    }
+
+    if ( at_len < t_len-1) {
+      memcpy(text, abp, at_len);
+      text[at_len] = '\0';
+    }
+    else {
+      memcpy(text, abp, t_len-1);
+      text[t_len-1] = '\0';
+    }
+    abp += at_len + 2;
+  }
+  return abp;
+}
+
+/* something to try to skip over stuff we don't want */
+unsigned char *
+get_asn_junk(unsigned char *abp) {
+
+  int seq_cnt = 0;
+  int tmp;
+  char string[256];
+
+  while (*abp) {
+    if ( *abp  == ASN_SEQ) { abp += 2; seq_cnt++;}
+    else if ( *abp == ASN_IS_BOOL ) {abp = get_asn_int(abp, &tmp);}
+    else if ( *abp == ASN_IS_INT ) {abp = get_asn_int(abp, &tmp);}
+    else if ( *abp == ASN_IS_STR ) {abp = get_asn_text(abp, string, sizeof(string)-1);}
+    else { abp += 2;}
+  }
+
+  while (seq_cnt-- > 0) abp += 2;
+  return abp;
+}
+
+#define ASN_FADL_TITLE 0xa0	/* \240 160 */
+#define ASN_FADL_SEQID 0xa1	/* \241 161 */
+#define ASN_FADL_TAXID 0xa2	/* \242 162 */
+#define ASN_FADL_MEMBERS 0xa3	/* \243 163 */
+#define ASN_FADL_LINKS 0xa4	/* \244 164 */
+#define ASN_FADL_OTHER 0xa5	/* \245 165 */
+#define ASN_FADL_GI    171
+
+#define ASN_FADL_TEXTSEQ_ID 0xa4  /* \244 164 */
+#define ASN_FADL_OTHERSEQ_ID 0xa5  /* \245 164 */
+
+/* from seq.asn::Textseq-id/Textannot-id */
+#define ASN_TEXTSEQ_ID_NAME 0xa0	/* \240 160 */
+#define ASN_TEXTSEQ_ID_ACC  0xa1	/* \241 161 */
+#define ASN_TEXTSEQ_ID_REL  0xa2	/* \242 162 */
+#define ASN_TEXTSEQ_ID_VER  0xa3	/* \243 163 */
+
+unsigned char *
+get_asn_textseq_id(unsigned char *abp, 
+		   char *name, size_t name_len,  char *acc, size_t acc_len)
+{
+  char release[20], ver_str[10];
+  int version;
+  int seqcnt = 0;
+
+  ver_str[0]='\0';
+
+  if (*abp == ASN_SEQ) { abp += 2; seqcnt++;}
+
+  while (*abp) {
+    switch (*abp) {
+    case ASN_TEXTSEQ_ID_NAME :
+      abp = get_asn_text(abp+2, name, name_len);
+      break;
+    case ASN_TEXTSEQ_ID_ACC :
+      abp = get_asn_text(abp+2, acc, acc_len);
+      break;
+    case ASN_TEXTSEQ_ID_REL :
+      abp = get_asn_text(abp+2, release, sizeof(release));
+      break;
+    case ASN_TEXTSEQ_ID_VER :
+      abp = get_asn_int(abp+2, &version);
+      sprintf(ver_str,".%d",version);
+      break;
+    default: abp += 2;
+    }
+  }
+  while (seqcnt-- > 0) abp += 4;
+  strncat(acc,ver_str,acc_len-strlen(acc));
+  acc[19]='\0';
+  return abp;	/* skip 2 NULL's */
+}
+
+unsigned char *
+get_asn_local_id(unsigned char *abp, char *acc, size_t acc_len)
+{
+  int seqcnt = 0;
+
+  if (*abp == ASN_SEQ) { abp += 2; seqcnt++;}
+
+  abp = get_asn_text(abp+2, acc, acc_len);
+
+  while (seqcnt-- > 0) abp += 4;
+  acc[acc_len-1]='\0';
+  return abp+2;	/* skip 2 NULL's */
+}
+
+unsigned char *
+get_asn_dbtag(unsigned char *abp, char *name, size_t name_len, char *str, size_t str_len, int *id_p) {
+
+  if (*abp == ASN_SEQ) { abp += 2;}
+
+  if (*abp == 0xa0) {  /* get db */
+    abp = get_asn_text(abp+2, name, name_len);
+  }
+  else {
+    fprintf(stderr,"*** error [%s:%d] -  missing dbtag:db %d %d\n",__FILE__, __LINE__, abp[0],abp[1]);
+    abp += 2;
+  }
+
+  if (*abp == 0xa1) {  /* get tag */
+    abp += 2;
+    abp += 2; /* skip over id */
+    if (*abp == 2) abp = get_asn_int(abp,id_p);
+    else abp = get_asn_text(abp, str, str_len);
+  }
+  else {
+    fprintf(stderr,"*** error [%s:%d] - missing dbtag:tag %2x %2x\n",__FILE__, __LINE__, abp[0],abp[1]);
+    abp += 2;
+  }
+  return abp+2;	/* skip 2 NULL's */
+}
+
+#define ASN_DATE_STR 0xa0
+#define ASN_DATE_STD 0xa1
+#define ASN_DATE_STD_YR 0xa0
+#define ASN_DATE_STD_MO 0xa1
+#define ASN_DATE_STD_DAY 0xa2
+
+unsigned char *
+get_asn_date_std(unsigned char *abp, char *date) {
+  int seq_cnt=0;
+  int year, month, day;
+
+  year = month = day = 0;
+  
+  while (*abp == ASN_SEQ) { abp+=2; seq_cnt++;}
+
+  while (*abp) {
+    switch (*abp) {
+    case ASN_DATE_STD_YR :
+      abp = get_asn_int(abp+2, &year);
+      break;
+    case ASN_DATE_STD_MO :
+      abp = get_asn_int(abp+2, &month);
+      break;
+    case ASN_DATE_STD_DAY :
+      abp = get_asn_int(abp+2, &day);
+      break;
+    default: 
+      fprintf(stderr, "*** error [%s:%d] - incorrect date-std code: %0x1 %0x1\n",
+	      __FILE__, __LINE__, abp[0], abp[1]);
+    }
+  }
+  sprintf(date, "%02d-%02d-%02d", year, month, day);
+
+  while (seq_cnt-- > 0) { abp += 4;}
+
+  return abp+2;
+}
+
+unsigned char *
+get_asn_date(unsigned char *abp, char *date, size_t date_len) {
+
+  if (*abp == ASN_DATE_STR) {
+    abp = get_asn_text(abp, date, date_len);
+  }
+  else if (*abp == ASN_DATE_STD) {
+    abp = get_asn_date_std(abp+2, date);
+  }
+  else {
+    fprintf(stderr, "*** error [%s:%d] - incorrect date code: %0x1 %0x1\n",
+	    __FILE__, __LINE__, abp[0], abp[1]);
+  }
+  return abp+2;
+}
+
+unsigned char *
+get_asn_pdb_id(unsigned char *abp, char *acc, size_t acc_len, char *chain, size_t chain_len)
+{
+  int ichain, seq_cnt=0;
+  char dummy[40];
+
+  if (*abp == ASN_SEQ) { abp += 2; seq_cnt++;}
+
+  while (*abp) {
+    switch (*abp) {
+    case 0: abp += 2; break;
+    case 0xa0:	/* mol */
+      abp = get_asn_text(abp+2, acc, 20);
+      break;
+    case 0xa1:	/* chain */
+      abp = get_asn_int(abp+2, &ichain);
+      chain[0] = ichain;
+      chain[1] = '\0';
+      break;
+    case 0xa2:	/* release */
+      abp = get_asn_date(abp+2, dummy, sizeof(dummy));
+      break;
+    default: abp+=2;
+    }
+  }
+  while (seq_cnt-- > 0) {abp += 4;}
+  return abp;
+}
+
+unsigned char *
+get_asn_seqid_ori(unsigned char *abp, int *gi_p, int *db, char *acc, size_t acc_len, char *name, size_t name_len)
+{
+  int db_type, itmp, seq_cnt=0;
+
+  *gi_p = 0;
+
+  if (*abp != ASN_SEQ) {
+    fprintf(stderr, "*** error [%s:%d] - seqid - missing SEQ 1: %2x %2x\n",
+	    __FILE__, __LINE__, abp[0], abp[1]);
+    return abp;
+  }
+  else { abp += 2; seq_cnt++;}
+
+  db_type = (*abp & ASN_TYPE_MASK);
+
+  if (db_type == 11) { /* gi */
+    abp = get_asn_int(abp+2,gi_p);
+  }
+  
+  while (*abp == ASN_SEQ) {abp += 2; seq_cnt++;}
+
+  db_type = (*abp & ASN_TYPE_MASK);
+  if (db_type > 17) {db_type = 0;}
+  *db = db_type;
+
+  switch(db_type) {
+  case 0: 
+    abp = get_asn_local_id(abp, acc, acc_len);
+    break;
+  case 1:
+  case 2:
+    abp = get_asn_int(abp+2,&itmp);
+    abp += 2;
+    break;
+  case 11:
+    abp = get_asn_int(abp+2,&itmp);
+    break;
+  case 4:
+  case 5:
+  case 6:
+  case 7:
+  case 9:
+  case 12:
+  case 13:
+  case 15:
+  case 16:
+  case 17:
+    abp = get_asn_textseq_id(abp,name,name_len, acc, acc_len);
+    break;
+  case 10:
+    abp = get_asn_dbtag(abp+2,name,name_len, acc, acc_len, &itmp);
+  case 14:
+    abp = get_asn_pdb_id(abp,acc,acc_len,name,name_len);
+    break;
+  default: abp += 2;
+  }
+  
+  while (seq_cnt-- > 0) { abp += 4;}
+  return abp; /* skip over 2 NULL's */
+}
+
+unsigned char *
+get_asn_db_info(unsigned char *abp, int db_type, int *gi_p, char *name, size_t name_len, char *acc, size_t acc_len) {
+  int seq_cnt = 0, itmp;
+
+  if (db_type == 11) {
+    abp = get_asn_int(abp, gi_p);
+    return abp;
+  }
+
+  while (*abp == ASN_SEQ) {abp += 2; seq_cnt++;}
+
+  switch(db_type) {
+  case 0: 
+    abp = get_asn_local_id(abp, acc, acc_len);
+    break;
+  case 1:
+  case 2:
+    abp = get_asn_int(abp,gi_p);
+    break;
+  case 11:
+    abp = get_asn_int(abp+2,gi_p);
+    break;
+  case 4:
+  case 5:
+  case 6:
+  case 7:
+  case 9:
+  case 12:
+  case 13:
+  case 15:
+  case 16:
+  case 17:
+    abp = get_asn_textseq_id(abp,name,name_len, acc, acc_len);
+    break;
+  case 10:
+    abp = get_asn_dbtag(abp,name,name_len, acc, acc_len, &itmp);
+    break;
+  case 14:
+    abp = get_asn_pdb_id(abp,acc,acc_len,name,name_len);
+    break;
+  default: abp += 2;
+  }
+  
+  while (seq_cnt-- > 0) { abp += 4;}
+  return abp; /* skip over 2 NULL's */
+}
+
+
+unsigned char *
+get_asn_seqid(unsigned char *abp, int *gi_p, int *db, char *acc, size_t acc_len, char *name, size_t name_len)
+{
+  int db_type, itmp, seq_cnt=0;
+
+  *gi_p = 0;
+
+  if (*abp != ASN_SEQ) {
+    fprintf(stderr, "*** error [%s:%d] - get_asn_seqid - missing SEQ 1: %2x %2x\n",
+	    __FILE__, __LINE__, abp[0], abp[1]);
+    return abp;
+  }
+  else { abp += 2; seq_cnt++;}
+
+  while (*abp) {
+    if (*abp == ASN_FADL_TEXTSEQ_ID) {
+      abp = get_asn_textseq_id(abp+2, name, name_len, acc, acc_len );
+    }
+    else if (*abp == ASN_FADL_OTHERSEQ_ID) {
+      abp = get_asn_textseq_id(abp+2, name, name_len, acc, acc_len );
+    }
+    else if ((db_type = (*abp & ASN_TYPE_MASK)) < 17) {
+      abp = get_asn_db_info(abp+2, db_type, gi_p, name, name_len, acc, acc_len);
+      *db = db_type;
+    }
+    else {
+      fprintf(stderr,"*** error [%s:%d] -- get_asn_seqid not TEXTSEQ/not GI: %2x %2x\n",
+	      __FILE__, __LINE__,abp[0], abp[1]);
+      return abp;
+    }
+  }
+  
+  while (seq_cnt-- > 0) { abp += 4;}
+  return abp; /* skip over 2 NULL's */
+}
+
+unsigned char *
+get_asn_seqid_other(unsigned char *abp, int *gi_p, char *acc, size_t acc_len, char *name, size_t name_len)
+{
+  int db_type, itmp, seq_cnt=0;
+
+  *gi_p = 0;
+  name[0] = acc[0] = '\0';
+
+  if (*abp != ASN_SEQ) {
+    fprintf(stderr, "*** error [%s:%d] - get_asn_seqid - missing SEQ 1: %2x %2x\n",
+	    __FILE__, __LINE__, abp[0], abp[1]);
+    return abp;
+  }
+  else { abp += 2; seq_cnt++;}
+
+  while (*abp) {
+    if (*abp == ASN_TEXTSEQ_ID_ACC) {
+      abp = get_asn_text(abp+2, acc, acc_len );
+    }
+    if (*abp == ASN_TEXTSEQ_ID_VER) {
+      abp = get_asn_text(abp+2, acc, acc_len );
+    }
+    else if (*abp == ASN_TEXTSEQ_ID_NAME) {
+      abp = get_asn_text(abp+2, name, name_len );
+    }
+    else if (*abp == ASN_IS_INT) {
+      abp = get_asn_int(abp, &itmp );
+    }
+    else {
+      fprintf(stderr,"*** error [%s:%d] -- get_asn_seqid not SEQID_ACC: %2x %2x\n",
+	      __FILE__, __LINE__,abp[0], abp[1]);
+      return abp;
+    }
+  }
+
+  if (*abp == ASN_FADL_GI) {
+    abp = get_asn_int(abp+2, gi_p);
+  }
+  
+  while (seq_cnt-- > 0) { abp += 4;}
+  return abp; /* skip over 2 NULL's */
+}
+
+
+unsigned char *
+parse_fastadl_asn(unsigned char *asn_buff, unsigned char *asn_max,
+		  int *gi_p, int *db,  char *acc, size_t acc_len,
+		  char *name, size_t name_len,
+		  char *title, size_t t_len, int *taxid_p) {
+  unsigned char *abp;
+  int this_db, itmp;
+  int seq_cnt = 0;
+
+  acc[0] = name[0] = db[0] = title[0] = '\0';
+
+  abp = asn_buff;
+  while ( abp < asn_max && *abp) {
+    if (*abp == ASN_SEQ) { abp += 2; seq_cnt++; }
+    else if (*abp == ASN_FADL_TITLE) {
+      abp = get_asn_text(abp+2, title, t_len);
+    }
+    else if (*abp == ASN_FADL_SEQID ) {
+      abp = get_asn_seqid(abp+2, gi_p, db, acc, acc_len, name, name_len);
+    }
+    else if (*abp == ASN_FADL_TAXID ) {
+      abp = get_asn_int(abp+2, taxid_p);
+    }
+    else if (*abp == ASN_FADL_MEMBERS) {
+      abp = get_asn_junk(abp+2);
+      break;
+    }
+    else if (*abp == ASN_FADL_LINKS ) {
+      abp = get_asn_junk(abp+2);
+      break;
+    }
+    else if (*abp == ASN_FADL_OTHER ) { /* possibly here for seqid without name */
+      abp = get_asn_seqid_other(abp+2, gi_p, acc, acc_len, name, name_len);
+    }
+    else {
+      /*       fprintf(stderr, " Error - missing ASN.1 %2x:%2x:%2x:%2x\n", 
+	       abp[-2],abp[-1],abp[0],abp[1]); */
+      abp = get_asn_junk(abp);
+      /* something is broken, give up */
+      break;
+    }
+  }
+  while (abp < asn_max && *abp == '\0'  ) abp++;
+  if (abp >= asn_max) return NULL;
+  else return abp;
+}
+
+
+void
+parse_pal(char *dname, char *msk_name,
+	  int *oid_seqs, int *max_oid,
+	  FILE *fd) {
+
+  char line[MAX_STR];
+
+  while (fgets(line,sizeof(line),fd)) {
+    if (line[0] == '#') continue;
+
+    if (strncmp(line, "DBLIST", 6)==0) {
+      sscanf(line+7,"%s",dname);
+    }
+    else if (strncmp(line, "OIDLIST", 7)==0) {
+      sscanf(line+8,"%s",msk_name);
+    }
+    else if (strncmp(line, "NSEQ", 4)==0) {
+      sscanf(line+5,"%d",oid_seqs);
+    }
+    else if (strncmp(line, "MAXOID", 6)==0) {
+      sscanf(line+7,"%d",max_oid);
+    }
+  }
+}
+
+/* part of new ambiguity code */
+int readMFILE (void *buffer, size_t size, int nitems, struct lmf_str *m_fd) {
+  register size_t  diff, len;
+
+  if (m_fd == NULL)
+    return 0;
+
+  if (m_fd->mm_flg) {
+    len = size * nitems;
+    memcpy((void *) buffer, (void *) m_fd->mmap_addr, len);
+    m_fd->mmap_addr += len;
+    return nitems;
+  }
+  else {
+    return fread((unsigned char *)buffer, size, nitems, m_fd->libf);
+  }
+}
diff --git a/src/ncbl_head.h b/src/ncbl_head.h
new file mode 100644
index 0000000..786d431
--- /dev/null
+++ b/src/ncbl_head.h
@@ -0,0 +1,33 @@
+/* ncbl_head.h	header files for blast1.3 format */
+
+/* $Id: ncbl_head.h 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+#define AMINO_ACID_SEQTYPE	1
+#define AA_SEQTYPE	AMINO_ACID_SEQTYPE
+#define NUCLEIC_ACID_SEQTYPE	2
+#define NT_SEQTYPE	NUCLEIC_ACID_SEQTYPE
+
+/* Filename extensions used by the two types of databases (a.a. and nt.) */
+#define AA_HEADER_EXT	"ahd"
+#define AA_TABLE_EXT	"atb"
+#define AA_SEARCHSEQ_EXT	"bsq"
+#define NT_HEADER_EXT	"nhd"
+#define NT_TABLE_EXT	"ntb"
+#define NT_SEARCHSEQ_EXT	"csq"
+
+#define DB_TYPE_PRO	0x78857a4f	/* Magic # for a protein sequence database */
+#define DB_TYPE_NUC	0x788325f8	/* Magic # for a nt. sequence database */
+
+#define AAFORMAT	3	/* Latest a.a. database format ID number */
+#define NTFORMAT	6	/* Latest nt. database format ID number */
+
+#define NULLB		'\0'	/* sentinel byte */
+#define NT_MAGIC_BYTE	0xfc	/* Magic byte at end of compressed nt db */
+
+#ifndef CHAR_BIT
+#define CHAR_BIT	8	/* these values should match blast */
+#endif
+
+#define NBPN		2
+#define NSENTINELS	2
diff --git a/src/ncbl_lib.c b/src/ncbl_lib.c
new file mode 100644
index 0000000..4a11659
--- /dev/null
+++ b/src/ncbl_lib.c
@@ -0,0 +1,491 @@
+/*	ncbl_lib.c	functions to read ncbi-blast format files from
+	setdb (blastp 1.3.2) format files */
+
+/* $Id: ncbl_lib.c 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and
+   The Rector and Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef WIN32
+#define RBSTR "r"
+#else
+#define RBSTR "rb"
+#endif
+
+#define XTERNAL
+#include "uascii.h"
+#include "upam.h"
+#include "ncbl_head.h"
+#include "mm_file.h"
+
+int ncbl_getliba(char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+int ncbl_getlibn(char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+
+void src_ulong_read();
+
+#ifndef NCBL13_ONLY
+static void src_char_read();
+static void newname(char *, char *, char *, int);
+#else
+void src_char_read();
+void newname(char *, char *, char *, int);
+#endif
+
+/* nt_btoa maps  from blast 2bit  format to ascii  characters */
+static char nt_btoa[5] = {"ACGT"};
+
+static char aa_btoa[27]= {"-ARNDCQEGHILKMFPSTWYVBZX*"};
+static int aa_btof[32];	/* maps to fasta alphabet */
+
+static FILE *tfile=NULL,	/* table of offsets, also DB info */
+	    *hfile=NULL,	/* description lines */
+	    *sfile=NULL;	/* binary sequence data */
+
+static unsigned long lib_cnt, max_cnt, totlen, mxlen, dbline_len;
+static unsigned long *seq_beg, *hdr_beg;
+static unsigned char *ambiguity_ray;
+static long seq_format, dbtype, dbformat;
+static char dline[512];
+
+#define NCBIBL13 11
+
+struct lmf_str *
+ncbl_openlib(char *name, int ldnaseq)
+{
+  char hname[256];
+  char sname[256];
+  char tname[256];
+  long title_len;
+  char *title_str;
+  int rdtmp;
+  int i;
+  unsigned long line_len, c_len, clean_count;
+
+  if (ldnaseq!=1) {
+    newname(tname,name,AA_TABLE_EXT,(int)sizeof(tname));
+    if ((tfile = fopen(tname,RBSTR))==NULL) {
+      fprintf(stderr," cannot open %s (%s.%s) table file\n",
+	      name,tname,NT_TABLE_EXT);
+      return (-1);
+    }
+    seq_format = AAFORMAT;
+  }
+  else {
+    newname(tname,name,NT_TABLE_EXT,(int)sizeof(tname));
+    if ((tfile = fopen(tname,RBSTR))==NULL) {
+      fprintf(stderr," cannot open %s (%s.%s) table file\n",
+	      name,tname,NT_TABLE_EXT);
+      return (-1);
+    }
+    seq_format = NTFORMAT;
+  }
+	
+  src_ulong_read(tfile,&dbtype);
+  src_ulong_read(tfile,&dbformat);
+
+  if (seq_format == AAFORMAT && (dbformat != seq_format || dbtype !=
+				 DB_TYPE_PRO)) {
+    fprintf(stderr,"error - %s wrong type (%ld/%d) or format (%ld/%ld)\n",
+	    tname,dbtype,DB_TYPE_PRO,dbformat,seq_format);
+    return (-1);
+  }
+  else if (seq_format == NTFORMAT && (dbformat != seq_format || dbtype !=
+				 DB_TYPE_NUC)) {
+    fprintf(stderr,"error - %s wrong type (%ld/%d) or format (%ld/%ld)\n",
+	    tname,dbtype,DB_TYPE_NUC,dbformat,seq_format);
+    return (-1);
+  }
+
+  if (seq_format == AAFORMAT) {
+    newname(hname,name,AA_HEADER_EXT,(int)sizeof(hname));
+    if ((hfile = fopen(hname,RBSTR))==NULL) {
+      fprintf(stderr," cannot open %s header file\n",hname);
+      return (-1);
+    }
+    newname(sname,name,AA_SEARCHSEQ_EXT,(int)sizeof(sname));
+    if ((sfile = fopen(sname,RBSTR))==NULL) {
+      fprintf(stderr," cannot open %s sequence file\n",sname);
+      return (-1);
+    }
+  }
+  else {
+    newname(hname,name,NT_HEADER_EXT,(int)sizeof(hname));
+    if ((hfile = fopen(hname,RBSTR))==NULL) {
+      fprintf(stderr," cannot open %s header file\n",hname);
+      return (-1);
+    }
+    newname(sname,name,NT_SEARCHSEQ_EXT,(int)sizeof(sname));
+    if ((sfile = fopen(sname,RBSTR))==NULL) {
+      fprintf(stderr," cannot open %s sequence file\n",sname);
+      return (-1);
+    }
+  }
+
+/* all files should be open */
+
+  src_ulong_read(tfile,&title_len);
+  rdtmp = title_len + ((title_len%4 !=0 ) ? 4-(title_len%4) : 0);
+  if ((title_str = calloc((size_t)rdtmp,sizeof(char)))==NULL) {
+    fprintf(stderr," cannot allocate title string (%d)\n",rdtmp);
+    return(-1);
+  }
+  fread(title_str,(size_t)1,(size_t)rdtmp,tfile);
+
+  lib_cnt = 0;
+  if (seq_format == AAFORMAT) {
+    src_ulong_read(tfile,&max_cnt);
+    src_ulong_read(tfile,&totlen);
+    src_ulong_read(tfile,&mxlen);
+
+    /* fprintf(stderr," max_cnt: %d, totlen: %d\n",max_cnt,totlen); */
+
+    if ((seq_beg=(unsigned long *)calloc((size_t)max_cnt+1,sizeof(long)))==NULL) {
+      fprintf(stderr," cannot allocate sequence pointers\n");
+      return -1;
+    }
+    if ((hdr_beg=(unsigned long *)calloc((size_t)max_cnt+1,sizeof(long)))==NULL) {
+      fprintf(stderr," cannot allocate header pointers\n");
+      return -1;
+    }
+    for (i=0; i<max_cnt+1; i++) src_ulong_read(tfile,&seq_beg[i]);
+    for (i=0; i<max_cnt+1; i++) src_ulong_read(tfile,&hdr_beg[i]);
+
+    for (i=0; i<sizeof(aa_btoa); i++) {
+      if ((rdtmp=aascii[aa_btoa[i]])<NA) aa_btof[i]=rdtmp;
+      else aa_btof[i]=aascii['X'];
+    }
+  }
+  else if (seq_format == NTFORMAT) {
+    src_ulong_read(tfile,&dbline_len);	/* length of uncompress DB lines */
+    src_ulong_read(tfile,&max_cnt);	/* number of entries */
+    src_ulong_read(tfile,&mxlen);	/* maximum length sequence */
+    src_ulong_read(tfile,&totlen);	/* total count */
+    src_ulong_read(tfile,&c_len);	/* compressed db length */
+    src_ulong_read(tfile,&clean_count);	/* count of nt's cleaned */
+
+    fseek(tfile,(size_t)((clean_count)*4),1);
+    					 /* seek over clean_count */
+    if ((seq_beg=(unsigned long *)calloc((size_t)max_cnt+1,sizeof(long)))==NULL) {
+      fprintf(stderr," cannot allocate sequence pointers\n");
+      return -1;
+    }
+    if ((hdr_beg=(unsigned long *)calloc((size_t)max_cnt+1,sizeof(long)))==NULL) {
+      fprintf(stderr," cannot allocate header pointers\n");
+      return -1;
+    }
+    if ((ambiguity_ray=
+	 (unsigned char *)calloc((size_t)max_cnt/CHAR_BIT+1,sizeof(char)))==NULL) {
+      fprintf(stderr," cannot allocate ambiguity_ray\n");
+      return -1;
+    }
+
+    for (i=0; i<max_cnt+1; i++) src_ulong_read(tfile,&seq_beg[i]);
+    fseek(tfile,(size_t)((max_cnt+1)*4),1);
+    					 /* seek over seq_beg */
+    for (i=0; i<max_cnt+1; i++) src_ulong_read(tfile,&hdr_beg[i]);
+    for (i=0; i<max_cnt/CHAR_BIT+1; i++)
+      src_char_read(tfile,&ambiguity_ray[i]);
+  }
+  return 1;
+}
+
+void ncbl_closelib()
+{
+  if (tfile !=NULL ) {fclose(tfile); tfile=NULL;}
+  if (hfile !=NULL ) {fclose(hfile); hfile=NULL;}
+  if (sfile !=NULL ) {fclose(sfile); sfile=NULL;}
+}
+
+int
+ncbl_getliba(char *seq, int maxs,
+	     char *libstr, int n_libstr,
+	     fseek_t *libpos,
+	     int lcont)
+{
+  register char *sptr;
+  long seqcnt;
+  long tmp;
+  char ch;
+  static long seq_len;
+  
+  *libpos = lib_cnt;
+  if (*lcont==0) {
+    if (lib_cnt >= max_cnt) return -1;
+    seq_len = seq_beg[lib_cnt+1] - seq_beg[lib_cnt] -1;
+    tmp=(long)fgetc(sfile);	/* skip the null byte */
+    if (tmp!=NULLB)
+      fprintf(stderr," phase error: %ld:%ld found\n",lib_cnt,tmp);
+    libstr[0]='\0';
+    }
+  
+  if (seq_len < maxs) {
+    if ((tmp=fread(seq,(size_t)1,(size_t)seq_len,sfile))!=(size_t)seq_len) {
+      fprintf(stderr," could not read sequence record: %ld %ld != %ld\n",
+	      *libpos,tmp,seq_len);
+      goto error; 
+    }
+    if (aa_btoa[seq[seq_len-1]]=='*') seqcnt = seq_len-1;
+    else seqcnt=seq_len;
+    lib_cnt++;
+    *lcont = 0;
+  }
+  else {
+    if (fread(seq,(size_t)1,(size_t)(maxs-1),sfile)!=(size_t)(maxs-1)) {
+      fprintf(stderr," could not read sequence record: %ld %ld\n",
+	      *libpos,seq_len);
+      goto error;
+    }
+    (*lcont)++;
+    seqcnt = maxs-1;
+    seq_len -= seqcnt;
+  }
+  sptr = seq+seqcnt;
+
+  while (--sptr >= seq) *sptr = aa_btof[*sptr];
+  
+  seq[seqcnt]= EOSEQ;
+  return (seqcnt);
+  
+error:	fprintf(stderr," error reading %ld at %ld\n",libstr,*libpos);
+  fflush(stderr);
+  return (-1);
+}
+
+int
+ncbl_getlibn(char *seq, int maxs,
+	     char *libstr, int n_libstr,
+	     fseek_t *libpos, int *lcont)
+{
+  register char *sptr, *tptr, stmp;
+  long seqcnt;
+  long tmp;
+  char ch;
+  static long seq_len;
+  static int c_len,c_pad;
+  
+  *libpos = lib_cnt;
+  if (*lcont==0) {
+    if (lib_cnt >= max_cnt) return -1;
+    c_len = seq_beg[lib_cnt+1]/(CHAR_BIT/NBPN)
+		- seq_beg[lib_cnt]/(CHAR_BIT/NBPN);
+    c_len -= NSENTINELS;
+
+    seq_len = c_len*(CHAR_BIT/NBPN);
+    c_pad = seq_beg[lib_cnt] & ((CHAR_BIT/NBPN)-1);
+    if (c_pad != 0) seq_len -= ((CHAR_BIT/NBPN) - c_pad);
+
+    tmp=fgetc(sfile);	/* skip the null byte */
+    if (tmp!=NT_MAGIC_BYTE) {
+      fprintf(stderr," phase error: %ld:%ld (%ld/%d) found\n",
+	      lib_cnt,seq_len,tmp,NT_MAGIC_BYTE);
+      goto error;
+    }
+    libstr[0]='\0';
+  }
+
+  if (seq_len < maxs-3) {
+    seqcnt=(seq_len+3)/4;
+    if (seqcnt==0) seqcnt++;
+    if ((tmp=fread(seq,(size_t)1,(size_t)seqcnt,sfile))
+	!=(size_t)seqcnt) {
+      fprintf(stderr,
+	      " could not read sequence record: %s %ld %ld != %ld: %d\n",
+	      libstr,*libpos,tmp,seqcnt,*seq);
+      goto error; 
+    }
+    tmp=fgetc(sfile);	/* skip the null byte */
+    if (tmp!=(unsigned char)NT_MAGIC_BYTE) {
+      fprintf(stderr," phase2 error: %ld:%ld (%ld/%d) next ",
+	      lib_cnt,seqcnt,tmp,NT_MAGIC_BYTE);
+      
+      goto error;
+    }
+    *lcont = 0;
+    lib_cnt++;
+  }
+  else {
+    seqcnt = ((maxs+3)/4)-1;
+    if (fread(seq,(size_t)1,(size_t)(seqcnt),sfile)!=(size_t)(seqcnt)) {
+      fprintf(stderr," could not read sequence record: %s %ld %ld\n",
+	      libstr,*libpos,seqcnt);
+      goto error;
+    }
+    (*lcont)++;
+  }
+  
+  /* point to the last packed byte and to the end of the array
+     seqcnt is the exact number of bytes read
+     tptr points to the destination, use multiple of 4 to simplify math
+     sptr points to the source, note that the last byte will be read 4 cycles
+     before it is written
+     */
+  
+  sptr = seq + seqcnt;
+  tptr = seq + 4*seqcnt;
+  while (sptr>seq) {
+    stmp = *--sptr;
+    *--tptr = (stmp&3) +1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+    *--tptr = ((stmp >>= 2)&3)+1;
+  }
+  /*
+    for (sptr=seq; sptr < seq+seq_len; sptr++) {
+    printf("%c",nt[*sptr]);
+    if ((int)(sptr-seq) % 60 == 59) printf("\n");
+    }
+    printf("\n");
+    */
+  if (seqcnt*4 >= seq_len) {	/* there was enough room */
+    seq[seq_len]= EOSEQ;
+    /* printf("%d\n",seq_len); */
+    return seq_len;
+  }
+  else {				/* not enough room */
+    seq[seqcnt*4]=EOSEQ;
+    seq_len -= 4*seqcnt;
+    return (4*seqcnt);
+  }
+  
+error:	fprintf(stderr," error reading %ld at %ld\n",libstr,*libpos);
+  fflush(stderr);
+  return (-1);
+}
+
+void
+ncbl_ranlib(str,cnt,libpos)
+	char *str; int cnt;
+	long libpos;
+{
+  char hline[256], *bp, *bp0;
+  int llen;
+  long spos;
+
+  lib_cnt = libpos;
+  llen = hdr_beg[lib_cnt+1]-hdr_beg[lib_cnt];
+  if (llen > sizeof(hline)) llen = sizeof(hline);
+  fseek(hfile,hdr_beg[lib_cnt]+1,0);
+
+  fread(hline,(size_t)1,(size_t)(llen-1),hfile);
+  hline[llen-1]='\0';
+
+  if (hline[9]=='|' || hline[10]=='|') {
+    bp0 = strchr(hline+3,'|');
+    if ((bp=strchr(bp0+1,' '))!=NULL) *bp='\0';
+    if (dbformat == NTFORMAT && 
+	(ambiguity_ray[lib_cnt/CHAR_BIT]&(1<<lib_cnt%CHAR_BIT))) {
+      sprintf(str,"*%-9s ",bp0+1);
+    }
+    else sprintf(str,"%-10s ",bp0+1);
+    strncat(str+11,bp+1,cnt-strlen(str));
+  }
+  else {
+    if (dbformat == NTFORMAT && 
+	(ambiguity_ray[lib_cnt/CHAR_BIT]&(1<<lib_cnt%CHAR_BIT))) {
+      str[0]='*'; 
+      strncpy(str+1,hline,cnt-1);
+    }
+    else strncpy(str,hline,cnt);
+  }
+  str[cnt-1]='\0';
+
+  if (dbformat == AAFORMAT)
+    fseek(sfile,seq_beg[lib_cnt]-1,0);
+  else {
+    spos = (seq_beg[lib_cnt])/(CHAR_BIT/NBPN);
+    fseek(sfile,spos-1,0);
+  }
+}
+
+void src_ulong_read(fd, val)
+     FILE *fd;
+     unsigned long *val;
+{
+#ifdef IS_BIG_ENDIAN
+  fread((char *)val,(size_t)4,(size_t)1,fd);
+#else
+  unsigned char b[4];
+
+  fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+  *val = 0;
+  *val = (unsigned long)((unsigned long)((unsigned long)(b[0]<<8) +
+	 (unsigned long)b[1]<<8) + (unsigned long)b[2]<<8)+(unsigned long)b[3];
+#endif
+}
+
+void src_long_read(fd,val)
+     FILE *fd;
+     long *val;
+{
+#ifdef IS_BIG_ENDIAN
+  fread((char *)val,(size_t)4,(size_t)1,fd);
+#else
+  unsigned char b[4];
+
+  fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+  *val = 0;
+  *val = (long)((long)((long)(b[0]<<8)+(long)b[1]<<8)+(long)b[2]<<8)
+	  +(long)b[3];
+#endif
+}
+
+#ifndef NCBL13_ONLY
+static void
+#else
+void
+#endif
+src_char_read(fd, val)
+     FILE *fd;
+     char *val;
+{
+  fread(val,(size_t)1,(size_t)1,fd);
+}
+
+#ifndef NCBL13_ONLY
+static void
+#else
+void
+#endif
+src_fstr_read(fd, val, slen)
+     FILE *fd;
+     char *val;
+     long slen;
+{
+  fread(val,(size_t)slen,(size_t)1,fd);
+}
+
+#ifndef NCBL13_ONLY
+static void
+#else
+void
+#endif
+newname(char *nname, char *oname, char *suff, int maxn)
+{
+  char *tptr;
+
+  if (oname[0]=='@') strncpy(nname,&oname[1],maxn);
+  else strncpy(nname,oname,maxn);
+  for (tptr=nname; *tptr=='.' && *tptr; tptr++);
+  for (; *tptr!='.'&& *tptr; tptr++); /* get to '.' or EOS */
+  *tptr++='.'; *tptr='\0';
+  strncat(nname,suff,maxn);
+}
+
diff --git a/src/nmgetlib.c b/src/nmgetlib.c
new file mode 100644
index 0000000..3b96da4
--- /dev/null
+++ b/src/nmgetlib.c
@@ -0,0 +1,2254 @@
+/* $Id: nmgetlib.c 1251 2014-01-24 21:34:05Z wrp $ */
+/* $Revision: 1251 $  */
+
+/*  copyright (c) 1987, 1988, 1989, 1992, 1995, 2000, 2014 by 
+    William  R. Pearson and The Rector & Vistors of the University of
+    Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/*	May, June 1987	- modified for rapid read of database
+
+	revised (split) version of nmgetaa.c -> renamed nmgetlib.c
+
+	This version seeks to be a thread safe, no global, library
+	reading program.  While adjusting the routines in this file
+	should be relatively easy, ncbl2_mlib.c and mysql_lib.c may be
+	more difficult.
+
+	nmgetlib.c and mmgetaa.c are used together.  nmgetlib.c provides
+	the same functions as nxgetaa.c if memory mapping is not used,
+	mmgetaa.c provides the database reading functions if memory
+	mapping is used. The decision to use memory mapping is made on
+	a file-by-file basis.
+
+	June 2, 1987 - added TFASTA
+	March 30, 1988 - combined ffgetaa, fgetgb;
+	April 8, 1988 - added PIRLIB format for unix
+	Feb 4, 1989 - added universal subroutines for libraries
+	December, 1995 - added range option file.name:1-1000
+	September, 1999 - added option for mmap()ed files using ".xin" */
+
+
+/*
+	February 4, 1988 - this starts a major revision of the getaa
+	routines.  The goal is to be able to seach the following format
+	libraries:
+
+	0 - normal FASTA format
+	1 - full Genbank flatfile format
+	2 - NBRF/PIR CODATA format
+	3 - EMBL/Swiss-prot format
+	4 - Intelligentics format
+	5 - NBRF/PIR VMS format
+	6 - GCG 2bit format
+
+	10 - list of gi/acc's
+	11 - NCBI setdb/blastp (1.3.2) AA/NT
+	12 - NCBI setdb/blastp (2.0) AA/NT
+	16 - mySQL queries
+
+	see file altlib.h to confirm numbers
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "defs.h"
+#include "structs.h"
+
+#ifndef SFCHAR
+#define SFCHAR ':'
+#endif
+
+#define EOSEQ 0
+
+#include "uascii.h"
+/* #include "upam.h" */
+
+#define LFCHAR '\015'  /* for MWC 5.5 */
+
+#include "altlib.h"
+
+#include <fcntl.h>
+#ifndef O_RAW
+#ifdef O_BINARY
+#define O_RAW O_BINARY
+#else
+#define O_RAW 0
+#endif		/* O_BINARY */
+#endif		/* O_RAW */
+
+#ifdef WIN32
+#define RBSTR "rb"	/* read file in binary mode */
+#else
+#define RBSTR "r"
+#endif
+
+char *alloc_file_name(char *f_name);
+struct lib_struct *get_lnames(char *tname, struct lib_struct *cur_lib_p);
+struct lmf_str *load_mmap(FILE *, char *, int, int, struct lmf_str *);
+struct lmf_str *ncbl2_openlib(struct lib_struct *, int ldnaseq);
+struct lmf_str *ncbl2_reopen(struct lmf_str *);
+
+static struct lmf_str *last_m_fptr=NULL;
+
+int sel_acc_libstr(char *libstr, int gi, void *ptr);
+void *sel_acc_libstr_init(FILE *libf, int *acc_off, char fmt_term);
+
+int sel_acc_gi(char *libstr, int gi, void *ptr);
+void *sel_acc_gi_init(FILE *libf, int *acc_off, char fmt_term);
+
+int sel_hacc_libstr(char *libstr, int gi, void *ptr);
+void *sel_hacc_libstr_init(FILE *libf, int *acc_off, char fmt_term);
+
+int sel_hacc_gi(char *libstr, int gi, void *ptr);
+void *sel_hacc_gi_init(FILE *libf, int *acc_off, char fmt_term);
+
+#define MAX_ACC_TYPE 4
+int (*sel_acc_arr[MAX_ACC_TYPE+1])(char *libstr, int gi, void *ptr) = {
+  NULL, sel_acc_libstr, sel_acc_gi, sel_hacc_libstr, sel_hacc_gi
+};
+
+void *(*sel_acc_init[MAX_ACC_TYPE+1])(FILE *libf, int *acc_off, char fmt_term) = {
+  NULL, sel_acc_libstr_init, sel_acc_gi_init, sel_hacc_libstr_init, sel_hacc_gi_init
+};
+
+unsigned int hash_func(char *key);
+unsigned int fast_hash32 (unsigned int data);
+
+#ifdef MYSQL_DB
+struct lmf_str *mysql_openlib(char *, int, int *);
+struct lmf_str *mysql_reopen(struct lmf_str *);
+#endif
+
+#ifdef PGSQL_DB
+struct lmf_str *pgsql_openlib(char *, int, int *);
+struct lmf_str *pgsql_reopen(struct lmf_str *);
+#endif
+
+extern int can_mmap(int lib_type);
+
+int closelib(struct lmf_str *m_fptr, int force);
+extern void newname(char *nname, char *oname, char *suff, int maxn);
+
+/* a file name for openlib may include a library type suffix */
+
+struct lmf_str *
+open_lib(struct lib_struct *lib_p, int ldnaseq, int *sascii, int outtty)
+{
+  struct lmf_str *om_fptr;
+  char rline[10], iname[MAX_FN];
+  char *bp, *bp1, *bp2;
+  char opt_text[MAX_FN];	/* save text after ':' */
+  char f_line[MAX_STR];
+  int wcnt, opnflg;
+  int lib_type;
+  int acc_ltype = 1;	/* def type is 1, not zero, so that the acc is read */
+  struct lmf_str *acc_fptr;	/* file of subset accessions */
+  char af_name[MAX_FN];
+  FILE *libi=NULL;
+  FILE *libf;
+  int use_stdin;
+  struct lmf_str *m_fptr=NULL;
+  int acc_off=0;
+  char fmt_term;
+  struct lib_struct *next_lib_p, *this_lib_p, *tmp_lib_p;
+
+  om_fptr = lib_p->m_file_p;
+
+  if (om_fptr != NULL && om_fptr->mm_flg) {
+    om_fptr->lpos = 0;
+    return om_fptr;
+  }
+
+  wcnt = 0;	/* number of times to ask for file name */
+
+  /* check to see if there is a file option ":1-100" */
+#ifndef WIN32
+  if ((bp=strchr(lib_p->file_name,':'))!=NULL && *(bp+1)!='\0') {
+#else
+  if ((bp=strchr(lib_p->file_name+3,':'))!=NULL && *(bp+1)!='\0') {
+#endif
+    strncpy(opt_text,bp+1,sizeof(opt_text));
+    opt_text[sizeof(opt_text)-1]='\0';
+    *bp = '\0';
+  }
+  else opt_text[0]='\0';
+
+  if (lib_p->file_name[0] == '-' || lib_p->file_name[0] == '@') {
+    use_stdin = 1;
+  }
+  else use_stdin=0;
+
+  /* check for library type */
+  if ((bp=strchr(lib_p->file_name,' '))!=NULL) {
+    *bp='\0';
+    sscanf(bp+1,"%d",&lib_type);
+    if (lib_type<0 || lib_type >= LASTLIB) {
+      fprintf(stderr,"\n invalid library type: %d (>%d)- resetting\n%s\n",
+	      lib_type,LASTLIB,lib_p->file_name);
+      lib_type=0;
+    }
+    else {
+      lib_p->lib_type = lib_type;
+    }
+  }
+  else lib_type = lib_p->lib_type;
+
+  if (use_stdin && lib_type !=0 ) {
+    fprintf(stderr,"\n @/- STDIN libraries must be in FASTA format\n");
+    return NULL;
+  }
+
+  /* check to see if file can be open()ed? */
+
+ l1:
+  opnflg = 0;
+  if (lib_type<=LASTTXT) {
+    if (!use_stdin) {
+      opnflg=((libf=fopen(lib_p->file_name,RBSTR))!=NULL);
+    }
+    else {
+      libf=stdin;
+      lib_p->file_name = alloc_file_name("STDIN");
+      opnflg=1;
+    }
+  } 
+  else if (lib_type==ACC_LIST) {
+    /* if we have already processed the acc_list file, 
+       open the file, modify the acc_list stuff, and return it
+    */
+    if (lib_p->acc_file_p != NULL) {
+      if ((acc_fptr = open_lib(lib_p, ldnaseq, sascii, outtty))==NULL) {
+	fprintf(stderr, "Cannot open %s library for ACC_LIST\n",lib_p->file_name);
+	return NULL;
+      }
+      else {
+	/* note that sel_acc_arr[0] must be NULL */
+	acc_fptr->sel_acc_p = lib_p->acc_file_p->sel_acc_p;
+	acc_fptr->acc_off = lib_p->acc_file_p->acc_off;
+	return acc_fptr;
+      }
+    }
+
+    /* open the file, read the first line, do an openlib on the first line */
+    if (!use_stdin) {
+      opnflg=((libf=fopen(lib_p->file_name,RBSTR))!=NULL);
+    }
+    else {
+      libf=stdin;
+      lib_p->file_name = alloc_file_name("STDIN");
+      opnflg=1;
+    }
+
+    if (!opnflg) {
+      fprintf(stderr, "Cannot open %s library\n",lib_p->file_name);
+      return NULL;
+    }
+    else {
+      /* read in the file line */
+      if (fgets(f_line, sizeof(f_line), libf)==NULL) {
+	fprintf(stderr, "Cannot read ACC_LIST file line\n");
+	return NULL;
+      }
+      /* else parse the file line */
+      if (f_line[0] != '<') {
+	fprintf(stderr, "missing < - %s\n",f_line); return NULL;
+      }
+      if ((bp=strchr(f_line+1,'\r'))!=NULL) {*bp = '\0';}
+      if ((bp=strchr(f_line+1,'\n'))!=NULL) {*bp = '\0';}
+
+      /* check for accession format */
+      if ((bp=strchr(f_line+1,':'))!=NULL) {
+	*bp = '\0';
+	/* access string should be %d %d%c - acc_ltype, acc_off, fmt_term */
+	sscanf(bp+1,"%d %d%c",&acc_ltype, &acc_off, &fmt_term);
+	/* blank terminator is default */
+	if (acc_off == 0) acc_off = 1;	/* always skip the '>' */
+	if (fmt_term < ' ' || fmt_term > '~') fmt_term = ' ';
+	if (acc_ltype > MAX_ACC_TYPE) {acc_ltype = MAX_ACC_TYPE;}
+      }
+
+      this_lib_p = get_lnames(f_line+1, NULL);
+
+      /* this_lib_p now has the list of files specified by the
+	 <@?acc_list_file. If there is only one file, then this
+	 information, and the associated m_file_p, should be put into
+	 lib_p.  If there is more than one file, the the first should
+	 be put in lib_p, and the subsequent lib_struct's should be
+	 linked from lib_p->next, and the end of list needs to point
+	 to lib_p->next.
+      */
+
+      /* done whether list or not */
+      lib_p->file_name = this_lib_p->file_name;
+      /* deal with other list issues after we have a acc_fptr */
+
+      /* check that we can open the library file */
+      if ((acc_fptr = open_lib(this_lib_p, ldnaseq, sascii, outtty))==NULL) {
+	fprintf(stderr, "Cannot open %s library for ACC_LIST\n",f_line+1);
+	free(this_lib_p);
+	return NULL;
+      }
+      else {
+	/* set up the auxiliary information for the current open file */
+	this_lib_p->m_file_p = acc_fptr;
+	/* note that sel_acc_arr[0] must be NULL */
+	acc_fptr->sel_acc_p = sel_acc_arr[acc_ltype];
+	acc_fptr->acc_off = acc_off;
+	/* read in the data */
+	acc_fptr->sel_local = sel_acc_init[acc_ltype](libf, &acc_fptr->acc_off, fmt_term);
+
+	/* now handle the rest of the this_lib_p list */
+
+	tmp_lib_p = this_lib_p->next;	/* skip over the first entry in this_lib_p */
+	/* fill in the information up to the next to last entry in the chain */
+	while (tmp_lib_p && tmp_lib_p->next) {
+	  tmp_lib_p->acc_file_p = acc_fptr;
+	  tmp_lib_p = tmp_lib_p->next;
+	}
+	if (tmp_lib_p) {
+	  tmp_lib_p->acc_file_p = acc_fptr;	/* fill in last entry */
+	  tmp_lib_p->next = lib_p->next;	/* continue the chain */
+	}
+	lib_p->next = this_lib_p->next;		/* insert the chain */
+	return acc_fptr;
+      }
+    }
+  }
+#ifdef NCBIBL13
+  else if (lib_type==NCBIBL13) opnflg=(ncbl_openlib(lib_p->file_name,ldnaseq)!= -1);
+#endif
+#ifdef NCBIBL20
+  else if (lib_type==NCBIBL20) {
+    opnflg=((m_fptr=ncbl2_openlib(lib_p,ldnaseq))!=NULL);
+  }
+#endif
+
+#ifdef MYSQL_DB
+  /* a mySQL filename contains mySQL commands, not sequences */
+  else if (lib_type==MYSQL_LIB) {
+    opnflg=((m_fptr=mysql_openlib(lib_p->file_name,ldnaseq,sascii))!=NULL);
+    m_fptr->get_mmap_chain = NULL;
+  }
+#endif
+#ifdef PGSQL_DB
+  /* a mySQL filename contains mySQL commands, not sequences */
+  else if (lib_type==PGSQL_LIB) {
+    opnflg=((m_fptr=pgsql_openlib(lib_p->file_name,ldnaseq,sascii))!=NULL);
+    m_fptr->get_mmap_chain = NULL;
+  }
+#endif
+
+  if (!opnflg) {	/* here if open failed */
+    if (outtty) {
+      fprintf(stderr,"\n cannot open %s library\n",lib_p->file_name);
+      fprintf(stderr," enter new file name or <RET> to quit ");
+      fflush(stderr);
+      if (fgets(f_line,sizeof(f_line),stdin)==NULL) return NULL;
+      if ((bp=strchr(f_line,'\n'))!=0) *bp='\0';
+      if (strlen(f_line)==0) return NULL;
+      if (++wcnt > 10) return NULL;
+      lib_p->file_name = alloc_file_name(f_line);
+      goto l1;
+    }
+    else return NULL;
+  }	/* !openflg */
+
+  if (lib_type <= LASTTXT) {
+    /* modify to re-use the om_fptr if it exists */
+    if (om_fptr != NULL) {
+      m_fptr = om_fptr;
+    }
+    else {
+      if ((m_fptr = calloc(1,sizeof(struct lmf_str)))==NULL) {
+	fprintf(stderr,"\n *** cannot allocate lmf_str (%ld) for %s\n",
+		sizeof(struct lmf_str),lib_p->file_name);
+	return NULL;
+      }
+      if ((m_fptr->lline = calloc(MAX_STR,sizeof(char)))==NULL) {
+	fprintf(stderr,"\n *** cannot allocate lline (%d) for %s\n",
+		MAX_STR,lib_p->file_name);
+	return NULL;
+      }
+    }
+
+    m_fptr->lb_name = lib_p->file_name;
+    strncpy(m_fptr->opt_text,opt_text,MAX_FN);
+    m_fptr->opt_text[MAX_FN-1]='\0';
+    m_fptr->sascii = sascii;
+    m_fptr->get_mmap_chain = NULL;
+
+    m_fptr->libf = libf;
+    m_fptr->lb_type = lib_type;
+    m_fptr->acc_off = 1;	/* default for FASTA format */
+    m_fptr->getlib = getliba[lib_type];
+    m_fptr->ranlib = ranliba[lib_type];
+    m_fptr->sel_acc_p = NULL;
+    m_fptr->mm_flg = 0;
+    m_fptr->tot_len = 0;
+    m_fptr->max_len = 0;
+    m_fptr->lib_aa = (ldnaseq==SEQT_PROT);
+  }
+  last_m_fptr = m_fptr;
+
+#ifdef USE_MMAP
+  /* check for possible mmap()ed files */
+  if (!use_stdin && (lib_type <= LASTTXT) && can_mmap(lib_type)) {
+    /* this is a file we can mmap() */
+    /* look for .xin file */
+    newname(iname,lib_p->file_name,"xin",sizeof(iname));
+    if ((libi=fopen(iname,"r"))!=NULL) { /* have a *.xin file, use mmap */
+      if (load_mmap(libi,lib_p->file_name,lib_type,ldnaseq,m_fptr)!=NULL) {
+	fclose(libi);	/* close index file */
+	return m_fptr;
+      }
+    fclose(libi);	/* memory mapping failed, but still must close file */
+    }
+  }
+#endif
+
+  if (lib_type <= LASTTXT) {
+    m_fptr->lpos = 0;
+    if (fgets(m_fptr->lline,MAX_STR,libf)==NULL) return NULL;
+  }
+  return m_fptr;
+}
+
+int
+closelib(struct lmf_str *m_fptr,int force) {
+
+  if (m_fptr == NULL) return 0;
+
+#ifdef USE_MMAP
+  if (!force && m_fptr->mm_flg) {
+/* don't close memory mapped files
+*/
+    m_fptr->lpos = 0;
+    return 0;
+  }
+#endif
+
+  if (m_fptr->libf!=NULL && m_fptr->libf != stdin) {
+    fclose(m_fptr->libf);
+    m_fptr->libf = NULL;
+    m_fptr->mm_flg = 0;
+  }
+
+  /* keep m_fptr->lline around for re-use -- alternatively, always
+     allocate */
+  /*
+  if (m_fptr->lline != NULL) {
+    free(m_fptr->lline);
+    m_fptr->lline = NULL;
+  }
+  */
+
+#ifdef NCBIBL13
+  if (m_fptr->lb_type == NCBIBL13) ncbl_closelib(m_fptr);
+#endif
+#ifdef NCBIBL20
+  if (m_fptr->lb_type == NCBIBL20) ncbl2_closelib(m_fptr);
+#endif
+#ifdef MYSQL_DB
+  if (m_fptr->lb_type == MYSQL_LIB) mysql_closelib(m_fptr);
+#endif
+
+  if (m_fptr == last_m_fptr) {
+    last_m_fptr = NULL;
+  }
+
+  return 1;
+}
+
+struct lmf_str *
+  re_openlib(struct lmf_str *om_fptr, struct lib_struct *lib_p, int outtty)
+{
+  int opnflg;
+
+  /* if its already open, return it */
+  if (om_fptr == last_m_fptr) {
+    return om_fptr;
+  }
+  else {
+    if (om_fptr->mm_flg) {
+      last_m_fptr = om_fptr;
+      om_fptr->lpos = 0;
+      return om_fptr;
+    }
+#ifdef MYSQL_DB
+    /* if this is a mysql database - use it and return */
+    else if (om_fptr->lb_type == MYSQL_LIB) {
+      return om_fptr;
+    }
+#endif
+    else {
+      closelib(last_m_fptr,1);
+    }
+  }
+
+  last_m_fptr = om_fptr;
+
+
+  /* data is available, but file is closed or not memory mapped, open it */
+  /* no longer check to memory map - because we could not do it before */
+
+  opnflg = 1;
+  if (om_fptr->lb_type<=LASTTXT && om_fptr->libf==NULL)
+    opnflg=((om_fptr->libf=fopen(om_fptr->lb_name,RBSTR))!=NULL);
+#ifdef NCBIBL20
+  else if (om_fptr->lb_type==NCBIBL20) {
+    opnflg=((om_fptr=ncbl2_reopen(om_fptr))!=NULL);
+  }
+#endif
+#ifdef MYSQL_DB
+  /* a mySQL filename contains mySQL commands, not sequences */
+  else if (om_fptr->lb_type==MYSQL_LIB) 
+    opnflg=(mysql_reopen(om_fptr)!=NULL);
+#endif
+
+  if (!opnflg) {
+    fprintf(stderr,"*** could not re_open %s\n",om_fptr->lb_name);
+    return NULL;
+  }
+
+  /* use the old buffer for the opened text file */
+  return om_fptr;
+}
+
+void sf_sort(int *, int);
+
+int
+agetlib(unsigned char *seq, int maxs,
+	char *libstr, int n_libstr,
+	fseek_t *libpos,
+	int *lcont,
+	struct lmf_str *lm_fd,
+	long *l_off)
+{
+  int i;
+  register unsigned char *cp, *seqp, *seqb;
+  register int *ap;
+  unsigned char *seqm, *seqm1;
+  /* int ic, l_start, l_stop, l_limit, rn; */
+  char *bp, *bp1, *bpa, *tp;
+  int sel_status;
+
+  seqp = seqb = seq;
+  seqm = &seq[maxs-9];
+  seqm1 = seqm-1;
+
+  ap = lm_fd->sascii;
+
+  *l_off = 1;
+  if (*lcont==0) {
+
+  start_seq:
+    while (lm_fd->lline[0]!='>' && lm_fd->lline[0]!=';') {
+      if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);
+      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+    }
+
+    /* get l_off coordinate from @C:123 */
+    if ((bp=strchr(lm_fd->lline,'@'))!=NULL && !strncmp(bp+1,"C:",2)) {
+      sscanf(bp+3,"%ld",l_off);
+    }
+
+    strncpy(libstr,lm_fd->lline+lm_fd->acc_off,n_libstr-1);
+    libstr[n_libstr-1]='\0';
+
+    if ((lm_fd->sel_acc_p != NULL) &&
+	(sel_status = (lm_fd->sel_acc_p)(libstr, 0, lm_fd->sel_local)) <= 0) {
+      if (sel_status < 0) return (-1);
+      while (strchr((char *)lm_fd->lline,'\n')==NULL) {
+	if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+      }
+      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+      goto start_seq;
+    }
+
+    if ((bp=strchr(libstr,'\r'))!=NULL) *bp='\0';
+    if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
+
+    if (n_libstr > MAX_UID) {
+      tp = libstr;
+      while (*tp++) if (*tp == '\001' || *tp== '\t') *tp = ' ';
+    }
+
+    *libpos = lm_fd->lpos;
+
+    /* make certain we have the end of the line */
+    while (strchr((char *)lm_fd->lline,'\n')==NULL) {
+      if (strlen(lm_fd->lline)<MAX_STR/2) 
+	fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR/2,lm_fd->libf);
+      else 
+	fgets(&lm_fd->lline[MAX_STR/2],MAX_STR/2,lm_fd->libf);
+    }
+    lm_fd->lline[MAX_STR-1]='\0';
+  }
+
+  lm_fd->lline[0]='\0';
+  while (seqb<seqm1 && fgets((char *)seqb,(size_t)(seqm-seqb),lm_fd->libf)!=NULL) {
+    if (*seqb=='>') goto new;
+    if (*seqb==';') {
+      if (strchr((char *)seqb,'\n')==NULL) goto cont;
+      continue;
+    }
+
+    /* removed - used for @P:1-n 
+       if (l_limit) {
+       for (cp=seqp; seqp<seqm1 && rn < l_stop && (ic=ap[*cp++])<EL; )
+       if (ic < NA && ++rn > l_start) *seqp++ = (unsigned char)ic;
+       if (rn > l_stop) goto finish;
+       }
+       else {
+    */
+    seqp = seqb;
+    for (cp=seqp; seqp<seqm1; ) {
+      if ((*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA) continue;
+      if (*(--seqp)>NA) break;
+    }
+    seqb = seqp;
+    if (*seqp==ES) goto done;
+    if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);
+  }
+  goto done;
+ new:
+  strncpy(lm_fd->lline,(char *)seqp,MAX_STR);
+  lm_fd->lline[MAX_STR-1]='\0';
+  /* be certain to get complete line, if possible */
+  if (strchr(lm_fd->lline,'\n')==NULL)
+    fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR-strlen(lm_fd->lline),lm_fd->libf);
+  lm_fd->lline[MAX_STR-1]='\0';
+  if (strchr(lm_fd->lline,'\n')==NULL && strchr((char *)seqp,'\n')!=NULL)
+    lm_fd->lline[strlen(lm_fd->lline)-1]='\n';
+  goto done;
+
+  /* removed - used for @P:1-n
+finish: 
+   while (lm_fd->lline[0]!='>' && 
+	  fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {
+     if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);
+   }
+   goto done;
+*/
+ cont:
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+  seqm1 = seqp;
+ done:
+  if (seqp>=seqm1) (*lcont)++;
+  else {
+    *lcont=0;
+  }
+
+  *seqp = EOSEQ;
+  /*  if ((int)(seqp-seq)==0) return 1; */
+  return (int)(seqp-seq);
+}
+
+void
+aranlib(char *str, int cnt, fseek_t seek, char *libstr, struct lmf_str *lm_fd)
+{
+  char *bp;
+
+  if (lm_fd->libf != stdin) {
+    FSEEK(lm_fd->libf, seek, 0);
+    fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+
+    if (lm_fd->lline[0]=='>' || lm_fd->lline[0]==';') {
+      strncpy(str,lm_fd->lline+lm_fd->acc_off,cnt);
+      str[cnt-1]='\0';
+      if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
+      if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+      /*
+	if ((bp = strchr(str,SFCHAR))!=NULL) *bp='\0';
+	else if ((bp = strchr(str,'\001'))!=NULL) *bp='\0';
+	else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+	else str[cnt-1]='\0';
+      */
+      bp = str;
+      while (*bp++) if (*bp=='\001' || *bp=='\t') *bp=' ';
+    }
+    else {
+      str[0]='\0';
+    }
+  }
+  else str[0]='\0';
+}
+
+int
+qgetlib(unsigned char *seq, int maxs,
+	char *libstr, int n_libstr,
+	fseek_t *libpos,
+	int *lcont,
+	struct lmf_str *lm_fd,
+	long *l_off)
+{
+  int i;
+  register unsigned char *cp, *seqp, *seqb;
+  register int *ap;
+  unsigned char *seqm, *seqm1;
+  /* int ic, l_start, l_stop, l_limit, rn; */
+  char *bp, *bp1, *bpa, *tp;
+  int sel_status;
+
+  seqp = seqb = seq;
+  seqm = &seq[maxs-9];
+  seqm1 = seqm-1;
+
+  ap = lm_fd->sascii;
+
+  *l_off = 1;
+  if (*lcont==0) {
+
+    while (lm_fd->lline[0]!='@') {
+      if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);
+      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+    }
+
+    strncpy(libstr,lm_fd->lline+lm_fd->acc_off,n_libstr-1);
+    libstr[n_libstr-1]='\0';
+
+    if ((bp=strchr(libstr,'\r'))!=NULL) *bp='\0';
+    if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
+
+    *libpos = lm_fd->lpos;
+
+    /* make certain we have the end of the line */
+    while (strchr((char *)lm_fd->lline,'\n')==NULL) {
+      if (strlen(lm_fd->lline)<MAX_STR/2) 
+	fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR/2,lm_fd->libf);
+      else 
+	fgets(&lm_fd->lline[MAX_STR/2],MAX_STR/2,lm_fd->libf);
+    }
+    lm_fd->lline[MAX_STR-1]='\0';
+  }
+
+  lm_fd->lline[0]='\0';
+  while (seqb<seqm1 && fgets((char *)seqb,(size_t)(seqm-seqb),lm_fd->libf)!=NULL) {
+    if (*seqb=='+') goto new;
+
+    seqp = seqb;
+    for (cp=seqp; seqp<seqm1; ) {
+      if ((*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA) continue;
+      if (*(--seqp)>NA) break;
+    }
+    seqb = seqp;
+    if (*seqp==ES) goto done;
+    if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);
+  }
+  goto done;
+ new:
+  strncpy(lm_fd->lline,(char *)seqp,MAX_STR);
+  lm_fd->lline[MAX_STR-1]='\0';
+  /* be certain to get complete line, if possible */
+  if (strchr(lm_fd->lline,'\n')==NULL)
+    fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR-strlen(lm_fd->lline),lm_fd->libf);
+  lm_fd->lline[MAX_STR-1]='\0';
+  if (strchr(lm_fd->lline,'\n')==NULL && strchr((char *)seqp,'\n')!=NULL)
+    lm_fd->lline[strlen(lm_fd->lline)-1]='\n';
+
+ done:
+  if (seqp>=seqm1) (*lcont)++;
+  else {
+    *lcont=0;
+  }
+
+  *seqp = EOSEQ;
+  /*  if ((int)(seqp-seq)==0) return 1; */
+  return (int)(seqp-seq);
+}
+
+void
+qranlib(char *str, int cnt, fseek_t seek, char *libstr, struct lmf_str *lm_fd)
+{
+  char *bp;
+
+  if (lm_fd->libf != stdin) {
+    FSEEK(lm_fd->libf, seek, 0);
+    fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+
+    if (lm_fd->lline[0]=='@') {
+      strncpy(str,lm_fd->lline+lm_fd->acc_off,cnt);
+      str[cnt-1]='\0';
+      if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
+      if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+      bp = str;
+    }
+    else {
+      str[0]='\0';
+    }
+  }
+  else str[0]='\0';
+}
+
+void lget_ann(struct lmf_str *, char *, int);
+
+int
+lgetlib(unsigned char *seq, int maxs,
+	char *libstr,
+	int n_libstr,
+	fseek_t *libpos,
+	int *lcont,
+	struct lmf_str *lm_fd,
+	long *l_off)
+{
+  register unsigned char *cp, *seqp;
+  register int *ap;
+  unsigned char *seqm, *seqm1;
+  char *bp, *bp_gid;
+
+  *l_off = 1;
+
+  seqp = seq;
+  seqm = &seq[maxs-11];
+  seqm1 = seqm-1;
+
+  ap = lm_fd->sascii;
+
+  if (*lcont==0) {
+    while (lm_fd->lline[0]!='L' || lm_fd->lline[1]!='O' || 
+	   strncmp(lm_fd->lline,"LOCUS",5)) { /* find LOCUS */
+      lm_fd->lpos = FTELL(lm_fd->libf);
+      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+      if (lm_fd->lfflag) getc(lm_fd->libf);
+    }
+    *libpos= lm_fd->lpos;
+
+    if (n_libstr <= 21) {
+      strncpy(libstr,&lm_fd->lline[12],12);
+      libstr[12]='\0';
+    }
+    else {
+      lget_ann(lm_fd,libstr,n_libstr);
+      fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+    }
+
+    while (lm_fd->lline[0]!='O' || lm_fd->lline[1]!='R' ||
+	   strncmp(lm_fd->lline,"ORIGIN",6)) { /* find ORIGIN */
+      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+      if (lm_fd->lfflag) getc(lm_fd->libf);
+    }
+  }
+  else {
+    for (cp= lm_fd->cpsave; seqp<seqm1; ) {
+      if ((*seqp++=ap[*cp++])<NA) continue;
+      if (*(--seqp)>NA) break;
+    }
+  }
+
+  lm_fd->lline[0]='\0';
+  while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {
+    if (lm_fd->lfflag) getc(lm_fd->libf);
+    if (lm_fd->lline[0]=='/') goto new;
+    for (cp= (unsigned char *)&lm_fd->lline[10]; seqp<seqm1; ) {
+      if ((*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA) continue;
+      if (*(--seqp)>NA) break;
+    }
+  }
+  goto done;
+new:
+  lm_fd->lpos = FTELL(lm_fd->libf);
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+  if (lm_fd->lfflag) getc(lm_fd->libf);
+
+done:
+  if (seqp>=seqm1) {
+    lm_fd->cpsave = cp;
+    (*lcont)++;
+  }
+  else *lcont=0;
+
+  *seqp = EOSEQ;
+  /*  if ((int)(seqp-seq)==0) return 1; */
+  return (int)(seqp-seq);
+}
+
+void
+lget_ann(struct lmf_str *lm_fd, char *libstr, int n_libstr) {
+  char *bp, *bp_gid, locus[120], desc[120], acc[120], ver[120];
+
+  /* copy in locus from lm_fd->lline */
+  strncpy(locus,&lm_fd->lline[12],sizeof(locus));
+  if ((bp=strchr(locus,' '))!=NULL) *(bp+1) = '\0';
+
+  /* get description */
+  fgets(desc,sizeof(desc),lm_fd->libf);
+  while (desc[0]!='D' || desc[1]!='E' || strncmp(desc,"DEFINITION",10))
+    fgets(desc,sizeof(desc),lm_fd->libf);
+  if ((bp = strchr(&desc[12],'\n'))!=NULL) *bp='\0';
+
+  /* get accession */
+  fgets(acc,sizeof(acc),lm_fd->libf);
+  while (acc[0]!='A' || acc[1]!='C' || strncmp(acc,"ACCESSION",9)) {
+    fgets(acc,sizeof(acc),lm_fd->libf);
+    if (acc[0]=='O' && acc[1]=='R' && strncmp(acc,"ORIGIN",6)==0)
+      break;
+  }
+  if ((bp = strchr(&acc[12],'\n'))!=NULL) *bp='\0';
+  if ((bp = strchr(&acc[12],' '))!=NULL) *bp='\0';
+
+  /* get version */
+  fgets(ver,sizeof(ver),lm_fd->libf);
+  while (ver[0]!='V' || ver[1]!='E' || strncmp(ver,"VERSION",7)) {
+    fgets(ver,sizeof(ver),lm_fd->libf);
+    if (ver[0]=='O' && ver[1]=='R' && strncmp(ver,"ORIGIN",6)==0)
+      break;
+  }
+  if ((bp = strchr(&ver[12],'\n'))!=NULL) *bp='\0';
+
+      /* extract gi:123456 from version line */
+  bp_gid = strchr(&ver[12],':');
+  if (bp_gid != NULL) {
+    if ((bp=strchr(bp_gid+1,' '))!=NULL) *bp='\0';
+    bp_gid++;
+  }
+  if ((bp = strchr(&ver[12],' '))!=NULL) *bp='\0';
+
+      /* build up FASTA header line */
+  if (bp_gid != NULL) {
+    strncpy(libstr,"gi|",n_libstr-1);
+    strncat(libstr,bp_gid,n_libstr-4);
+    strncat(libstr,"|gb|",n_libstr-20);
+  }
+  else {libstr[0]='\0';}
+
+  /* if we have a version number, use it, otherwise accession, 
+	 otherwise locus/description */
+
+  if (ver[0]=='V') {
+    strncat(libstr,&ver[12],n_libstr-1-strlen(libstr));
+    strncat(libstr,"|",n_libstr-1-strlen(libstr));
+  }
+  else if (acc[0]=='A') {
+    strncat(libstr,&acc[12],n_libstr-1-strlen(libstr));
+    strncat(libstr," ",n_libstr-1-strlen(libstr));
+  }
+
+  strncat(libstr,locus,n_libstr-1-strlen(libstr));
+  strncat(libstr,&desc[11],n_libstr-1-strlen(libstr));
+  libstr[n_libstr-1]='\0';
+}
+
+/* this code seeks to provide both the various accession numbers
+   necessary to identify the sequence, and also some description.
+
+   Unfortunately, the various contributors to Genbank use three
+   slightly different formats for including the accession number.
+
+(1)LOCUS       HSJ214M20  107422 bp    DNA             HTG       16-JUN-2000
+   DEFINITION  Homo sapiens chromosome 6 clone RP1-214M20 map p12.1-12.3, ***
+               SEQUENCING IN PROGRESS ***, in unordered pieces.
+   ACCESSION   AL121969
+
+(2)LOCUS       AL359201   117444 bp    DNA             HTG       15-JUN-2000
+   DEFINITION  Homo sapiens chromosome 1 clone RP4-671C13 map p13.2-21.1, ***
+               SEQUENCING IN PROGRESS ***, in unordered pieces.
+   ACCESSION   AL359201
+
+(3)LOCUS       BB067000      280 bp    mRNA            EST       19-JUN-2000
+   DEFINITION  BB067000 RIKEN full-length enriched, 15 days embryo male testis Mus
+               musculus cDNA clone 8030456L01 3', mRNA sequence.
+   ACCESSION   BB067000
+
+This makes it more difficult to both provide the accession number in a
+standard location and to conserve definition space
+*/
+
+void
+lranlib(char *str,
+	int cnt,
+	fseek_t seek,
+	char *libstr,
+	struct lmf_str *lm_fd)
+{
+  char *bp, acc[MAX_STR], desc[MAX_STR];
+
+  FSEEK(lm_fd->libf, seek, 0);
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+  if (lm_fd->lfflag) getc(lm_fd->libf);
+
+  lget_ann(lm_fd, str, cnt);
+  str[cnt-1]='\0';
+
+  FSEEK(lm_fd->libf,seek,0);
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+  if (lm_fd->lfflag) getc(lm_fd->libf);
+}
+
+int
+pgetlib(unsigned char *seq, int maxs,
+	char *libstr,
+	int n_libstr,
+	fseek_t *libpos,
+	int *lcont,
+	struct lmf_str *lm_fd,
+	long *l_off)
+{
+  int ic;
+  register unsigned char *cp, *seqp;
+  register int *ap;
+  unsigned char *seqm, *seqm1;
+
+  *l_off = 1;
+
+  seqp = seq;
+  seqm = &seq[maxs-11];
+  seqm1 = seqm-1;
+
+  ap = lm_fd->sascii;
+
+  if (*lcont==0) {
+    while (lm_fd->lline[0]!='E' || lm_fd->lline[1]!='N' || strncmp(lm_fd->lline,"ENTRY",5))
+      { /* find ENTRY */
+	lm_fd->lpos = FTELL(lm_fd->libf);
+	if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+      }
+    strncpy(libstr,&lm_fd->lline[16],8);
+    libstr[8]='\0';
+    *libpos = lm_fd->lpos;
+    while (lm_fd->lline[2]!='Q' || lm_fd->lline[0]!='S' || strncmp(lm_fd->lline,"SEQUENCE",8))
+      { /* find SEQUENCE */
+	if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+      }
+    fgets(lm_fd->lline,MAX_STR,lm_fd->libf); /* get the extra line */
+  }
+  else {
+    for (cp= lm_fd->cpsave; seqp<seqm1; ) {
+      if ((*seqp++=ap[*cp++])<NA) continue;
+      if (*(--seqp)>NA) break;
+    }
+    if (*seqp==ES) goto done;
+  }
+
+  lm_fd->lline[0]='\0';
+  while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {
+    if (lm_fd->lline[0]=='/') goto new;
+    for (cp= (unsigned char *)&lm_fd->lline[8]; seqp<seqm1; ) {
+      if ((*seqp++=ap[*cp++])<NA) continue;
+      if (*(--seqp)>NA) break;
+    };
+    if (*seqp==ES) goto done;
+  }
+  goto done;
+new:
+  lm_fd->lpos = FTELL(lm_fd->libf);
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+
+done:
+  if (seqp>=seqm1) {
+    lm_fd->cpsave = cp;
+    (*lcont)++;
+  }
+  else *lcont=0;
+
+  *seqp = EOSEQ;
+  /*  if ((int)(seqp-seq)==0) return 1; */
+  return (int)(seqp-seq);
+}
+
+void
+pranlib(char *str,
+	int cnt,
+	fseek_t seek,
+	char *libstr,
+	struct lmf_str *lm_fd)
+{
+  char *bp;
+
+  FSEEK(lm_fd->libf, seek, 0);
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+
+  strncpy(str,&lm_fd->lline[16],8);
+  str[8]='\0';
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+  while (lm_fd->lline[0]!='T' || lm_fd->lline[1]!='I' || strncmp(lm_fd->lline,"TITLE",5))
+    fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+  strncpy(&str[8],&lm_fd->lline[16],cnt-9);
+  str[cnt-9]='\0';
+  if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+
+  FSEEK(lm_fd->libf,seek,0);
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+}
+
+int
+egetlib(unsigned char *seq, int maxs,
+	char *libstr,
+	int n_libstr,
+	fseek_t *libpos,
+	int *lcont,
+	struct lmf_str *lm_fd,
+	long *l_off)
+{
+  int ll;
+  int ic;
+  register unsigned char *cp, *seqp;
+  register int *ap;
+  unsigned char *seqm, *seqm1;
+  int sel_status;
+  char id[11];  /* Holds Identifier */
+
+  *l_off=1;
+
+  seqp = seq;
+  seqm = &seq[maxs-11];
+  seqm1 = seqm-1;
+
+  ap = lm_fd->sascii;
+
+  if (*lcont==0) {
+  start_seq:
+    while (lm_fd->lline[0]!='I' || lm_fd->lline[1]!='D') { /* find ID */
+      lm_fd->lpos = FTELL(lm_fd->libf);
+      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+      if (lm_fd->lfflag) getc(lm_fd->libf);
+    }
+    sscanf(&lm_fd->lline[5],"%s",id);
+    sprintf(libstr,"%-12.12s",id);
+    libstr[12]='\0';
+
+    if ((lm_fd->sel_acc_p != NULL) &&
+	(sel_status = (lm_fd->sel_acc_p)(libstr, 0, lm_fd->sel_local)) <= 0) {
+      if (sel_status < 0) return (-1);
+      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+      goto start_seq;
+    }
+
+    *libpos = lm_fd->lpos;
+    while (lm_fd->lline[0]!='S' || lm_fd->lline[1]!='Q') { /* find ORIGIN */
+      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+      if (lm_fd->lfflag) getc(lm_fd->libf);
+    }
+    sscanf(&lm_fd->lline[14],"%ld",&lm_fd->gcg_len);
+  }
+  else {
+    for (cp= lm_fd->cpsave; seqp<seqm1; ) {
+      if ((*seqp++=ap[*cp++])<NA) continue;
+      if (*(--seqp)>NA) break;
+    }
+    if (*seqp==ES) goto done;
+  }
+
+  lm_fd->lline[0]='\0';
+  while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {
+    if (lm_fd->lfflag) getc(lm_fd->libf);
+    if (lm_fd->lline[0]=='/') goto new;
+    lm_fd->lline[70]='\0';
+    for (cp= (unsigned char *)&lm_fd->lline[5]; seqp<seqm1; ) {
+      if ((*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA) continue;
+      if (*(--seqp)>NA) break;
+    }
+    if (*seqp==ES) goto done;
+  }
+  goto done;
+new:	lm_fd->lpos = FTELL(lm_fd->libf);
+fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+if (lm_fd->lfflag) getc(lm_fd->libf);
+goto done;
+
+done:	if (seqp>=seqm1) {
+  lm_fd->cpsave = cp;
+  (*lcont)++;
+  lm_fd->gcg_len -= (long)(seqp-seq);
+}
+else *lcont=0;
+
+*seqp = EOSEQ;
+/* if ((int)(seqp-seq)==0) return 1; */
+/*	if (*lcont==0 && (long)(seqp-seq)!=lm_fd->gcg_len)
+	printf("%s read %d of %d\n",libstr,(int)(seqp-seq),lm_fd->gcg_len);
+	*/
+return (int)(seqp-seq);
+}
+
+void
+eranlib(char *str,
+	int cnt,
+	fseek_t seek,
+	char *libstr,
+	struct lmf_str *lm_fd)
+{
+  char *bp;
+  char id[14];  /* Holds Identifier */
+
+  FSEEK(lm_fd->libf, seek, 0);
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+  if (lm_fd->lfflag) getc(lm_fd->libf);
+
+  sscanf(&lm_fd->lline[5],"%s",id);
+  sprintf(str,"%-12.12s ",id);
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+  if (lm_fd->lfflag) getc(lm_fd->libf);
+
+  while (lm_fd->lline[0]!='D' || lm_fd->lline[1]!='E') fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+  strncpy(str+12,&lm_fd->lline[5],cnt-11);
+  str[cnt-11]='\0';
+  if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
+  if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+
+  FSEEK(lm_fd->libf,seek,0);
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+  if (lm_fd->lfflag) getc(lm_fd->libf);
+}
+
+int
+igetlib(unsigned char *seq, int maxs,
+	char *libstr,
+	int n_libstr,
+	fseek_t *libpos,
+	int *lcont,
+	struct lmf_str *lm_fd,
+	long *l_off)
+{
+	register unsigned char *cp, *seqp;
+	register int *ap;
+	unsigned char *seqm, *seqm1;
+	char *bp;
+
+	*l_off = 1;
+
+	seqp = seq;
+	seqm = &seq[maxs-9];
+	seqm1 = seqm-1;
+
+	ap = lm_fd->sascii;
+
+	if (*lcont==0) {
+		while (lm_fd->lline[0]!=';') {
+			lm_fd->lpos = FTELL(lm_fd->libf);
+			if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+			}
+		*libpos = lm_fd->lpos;
+		while (lm_fd->lline[0]==';') fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+		strncpy(libstr,lm_fd->lline+1,12);
+		libstr[12]='\0';
+		if((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
+		}
+
+	lm_fd->lline[0]='\0';
+	while (seqp<seqm1 && fgets((char *)seqp,(size_t)(seqm-seqp),lm_fd->libf)!=NULL) {
+		if (*seqp=='>') goto new;
+		if (*seqp==';') {
+			if (strchr((char *)seqp,'\n')==NULL) goto cont;
+			continue;
+			}
+		for (cp=seqp; seqp<seqm1; ) {
+			if ((*seqp++=ap[*cp++])<NA &&
+			    (*seqp++=ap[*cp++])<NA &&
+		            (*seqp++=ap[*cp++])<NA &&
+		            (*seqp++=ap[*cp++])<NA &&
+			    (*seqp++=ap[*cp++])<NA &&
+			    (*seqp++=ap[*cp++])<NA &&
+			    (*seqp++=ap[*cp++])<NA &&
+			    (*seqp++=ap[*cp++])<NA &&
+			    (*seqp++=ap[*cp++])<NA) continue;
+			    if (*(--seqp)>NA) break;
+			    }
+		if (*seqp==ES) goto done;
+		lm_fd->lpos = FTELL(lm_fd->libf);
+		}
+	goto done;
+new:	strncpy(lm_fd->lline,(char *)seqp,MAX_STR);
+	lm_fd->lline[MAX_STR-1]='\0';
+	if (strchr((char *)seqp,'\n')==NULL)
+	    fgets(lm_fd->lline,MAX_STR-strlen(lm_fd->lline),lm_fd->libf);
+	goto done;
+
+cont:
+	fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+	seqm1 = seqp;
+
+done:	if (seqp>=seqm1) {
+		(*lcont)++;
+		}
+	else {
+	*lcont=0;
+		}
+
+
+	*seqp = EOSEQ;
+	/*	if ((int)(seqp-seq)==0) return 1; */
+	return (int)(seqp-seq);
+	}
+
+void
+iranlib(char *str,
+	int cnt,
+	fseek_t seek,
+	char *libstr,
+	struct lmf_str *lm_fd)
+{
+	char *bp;
+	char tline[MAX_FN];
+
+	FSEEK(lm_fd->libf, seek, 0);
+	fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+
+	if (lm_fd->lline[0]=='>' || lm_fd->lline[0]==';') {
+		strncpy(tline,lm_fd->lline+1,sizeof(tline));
+		tline[sizeof(tline)-1]='\0';
+		if ((bp = strchr(tline,'\n'))!=NULL) *bp='\0';
+		}
+	else {
+		tline[0]='\0';
+		}
+
+	while (lm_fd->lline[0]==';') fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+	if ((bp=strchr(lm_fd->lline,'\n'))!=NULL) *bp=0;
+	if ((bp=strchr(lm_fd->lline,' '))!=NULL) *bp=0;
+	strncpy(str,lm_fd->lline,cnt);
+	str[cnt-1]='\0';
+	strncat(str,"  ",cnt-strlen(str)-1);
+	strncat(str,tline,cnt-strlen(str)-1);
+	
+	FSEEK(lm_fd->libf,seek,0);
+	fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+	}
+
+int
+vgetlib(unsigned char *seq, int maxs,
+	char *libstr,
+	int n_libstr,
+	fseek_t *libpos,
+	int *lcont,
+	struct lmf_str *lm_fd,
+	long *l_off)
+{
+  int i, ich;
+  register unsigned char *cp, *seqp;
+  register int *ap;
+  unsigned char *seqm, *seqm1;
+  char *bp, *tp;
+
+  *l_off = 1;
+
+  seqp = seq;
+  seqm = &seq[maxs-9];
+  seqm1 = seqm-1;
+
+  ap = lm_fd->sascii;
+
+  if (*lcont==0) {
+    while (lm_fd->lline[0]!='>' && lm_fd->lline[0]!=';') {
+      lm_fd->lpos = FTELL(lm_fd->libf);
+      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+      if (lm_fd->lfflag) getc(lm_fd->libf);
+    }
+
+    if ((bp=strchr(lm_fd->lline,'\n'))!=NULL) *bp='\0';
+    strncpy(libstr,&lm_fd->lline[4],12);
+    libstr[12]='\0';
+    if ((bp=strchr(libstr,' '))!=NULL) *bp='\0';
+    if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
+    
+    fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+    if (lm_fd->lfflag) getc(lm_fd->libf);
+
+    if (n_libstr > 21) {
+      strcat(libstr," ");
+      strncat(libstr,lm_fd->lline,n_libstr-1-strlen(libstr));
+      if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
+      libstr[n_libstr-1]='\0';
+    }
+    *libpos = lm_fd->lpos;
+  }
+
+  lm_fd->lline[0]='\0';
+  while (seqp<seqm1 && fgets((char *)seqp,(size_t)(seqm-seqp),lm_fd->libf)!=NULL) {
+    if (lm_fd->lfflag && (ich=getc(lm_fd->libf))!=LFCHAR) ungetc(ich,lm_fd->libf);
+    if (*seqp=='>') goto new;
+    if (*seqp==';') {
+      if (strchr((char *)seqp,'\n')==NULL) goto cont;
+      continue;
+    }
+    for (cp=seqp; seqp<seqm1; ) {
+      if ((*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA &&
+	  (*seqp++=ap[*cp++])<NA) continue;
+      if (*(--seqp)>NA) break;
+    }
+    if (*seqp==ES) goto done;
+    lm_fd->lpos = FTELL(lm_fd->libf);
+  }
+  goto done;
+new:
+  strncpy(lm_fd->lline,(char *)seqp,MAX_STR);
+  lm_fd->lline[MAX_STR-1]='\0';
+  if (strchr((char *)seqp,'\n')==NULL) {
+    fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR-strlen(lm_fd->lline),lm_fd->libf);
+    if (lm_fd->lfflag && (ich=getc(lm_fd->libf))!=LFCHAR) ungetc(ich,lm_fd->libf);
+  }
+  goto done;
+
+cont:
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+  if (lm_fd->lfflag && (ich=getc(lm_fd->libf))!=LFCHAR) ungetc(ich,lm_fd->libf);
+  seqm1 = seqp;
+
+done:
+  if (seqp>=seqm1) {
+    (*lcont)++;
+  }
+  else {
+    *lcont=0;
+  }
+
+  *seqp = EOSEQ;
+  /*   if ((int)(seqp-seq)==0) return 1;*/
+  return (int)(seqp-seq);
+}
+
+void
+vranlib(char *str,
+	int cnt,
+	fseek_t seek,
+	char *libstr,
+	struct lmf_str *lm_fd)
+{
+  char *bp, *llp;
+
+  FSEEK(lm_fd->libf, seek, 0);
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+  if (lm_fd->lfflag) getc(lm_fd->libf);
+
+  if (lm_fd->lline[0]=='>'&&(lm_fd->lline[3]==';'||
+			     lm_fd->lline[3]=='>')	/* GCG ascii */
+      ) {
+    strncpy(str,&lm_fd->lline[4],cnt-1);
+    str[cnt-1]='\0';
+
+    if (lm_fd->lline[3]=='>' && (bp = strchr(str,' '))!=NULL) *bp='\0';
+
+    if ((bp = strchr(str,':'))!=NULL) *bp='\0';
+    if ((bp=strchr(str,'\r'))!=NULL) *bp='\0';
+    else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+    else str[cnt-1]='\0';
+
+    fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+    if (lm_fd->lfflag) getc(lm_fd->libf);
+
+    /* skip over redundant stuff */
+    for (llp=lm_fd->lline,bp=str; *llp==*bp; llp++,bp++);
+    if ((int)(llp-lm_fd->lline)<5) llp = lm_fd->lline;
+
+    if ((bp=strchr(llp,'\r'))!=NULL) *bp=' ';
+    if ((bp=strchr(llp,'\n'))!=NULL) *bp='\0';
+    strncat(str," ",(size_t)1);
+    strncat(str,llp,(size_t)cnt-strlen(str)-1);
+  }
+  else {
+    str[0]='\0';
+  }
+
+  FSEEK(lm_fd->libf,seek,0);
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+  if (lm_fd->lfflag) getc(lm_fd->libf);
+}
+
+static int gcg_bton[4]={2,4,1,3};
+
+int
+gcg_getlib(unsigned char *seq, int maxs,
+	   char *libstr,
+	   int n_libstr,
+	   fseek_t *libpos,
+	   int *lcont,
+	   struct lmf_str *lm_fd,
+	   long *l_off)
+{
+  char dummy[20];
+  char gcg_date[10];
+  register unsigned char *cp, *seqp, stmp;
+  register int *ap;
+  char gcg_type[10];
+  unsigned char *seqm, *seqm1;
+  long r_block, b_block;
+  char *bp;
+
+  *l_off = 1;
+
+  seqp = seq;
+  seqm = &seq[maxs-9];
+  seqm1 = seqm-1;
+
+  ap = lm_fd->sascii;
+
+  if (*lcont==0) {
+    while (lm_fd->lline[0]!='>' && lm_fd->lline[0]!=';') {
+      lm_fd->lpos = FTELL(lm_fd->libf);
+      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+    }
+    sscanf(&lm_fd->lline[4],"%s %s %s %s %ld",
+	   libstr,gcg_date,gcg_type,dummy,&(lm_fd->gcg_len));
+
+    lm_fd->gcg_binary = (gcg_type[0]=='2');
+
+    fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+    while (strchr((char *)lm_fd->lline,'\n')==NULL) {
+      if (strlen(lm_fd->lline)<MAX_STR/2) 
+	fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR/2,lm_fd->libf);
+      else 
+      fgets(&lm_fd->lline[strlen(lm_fd->lline)-MAX_STR/2],MAX_STR/2,lm_fd->libf);
+    }
+    lm_fd->lline[MAX_STR-1]='\0';
+    if (n_libstr <= 21) {
+      libstr[12]='\0';
+    }
+    else {
+      strncat(libstr," ",1);
+      strncat(libstr,lm_fd->lline,n_libstr-1-strlen(libstr));
+      if ((bp = strchr(libstr,'\n'))!=NULL) *bp='\0';
+      libstr[n_libstr-1]='\0';
+    }
+    *libpos = lm_fd->lpos;
+  }
+
+  lm_fd->lline[0]='\0';
+
+  r_block = b_block = min((size_t)(seqm-seqp),lm_fd->gcg_len);
+  if (lm_fd->gcg_binary) { r_block = (r_block+3)/4; }
+
+  fread((char *)seqp,(size_t)r_block,(size_t)1,lm_fd->libf);
+  if (!lm_fd->gcg_binary) 
+    for (cp=seqp; seqp<seq+r_block; ) *seqp++ = ap[*cp++];
+  else if (lm_fd->gcg_binary) {
+    seqp = seq + r_block;
+    cp = seq + 4*r_block;
+    while (seqp > seq) {
+      stmp = *--seqp;
+      *--cp = gcg_bton[stmp&3];
+      *--cp = gcg_bton[(stmp >>= 2)&3];
+      *--cp = gcg_bton[(stmp >>= 2)&3];
+      *--cp = gcg_bton[(stmp >>= 2)&3];
+    }
+  }
+  if (4 * r_block >= lm_fd->gcg_len) {
+    fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+    *lcont = 0;
+  }
+  else {
+    if (lm_fd->gcg_binary) b_block = 4*r_block;
+    lm_fd->gcg_len -= b_block;
+    (*lcont)++;
+  }
+
+  seq[b_block] = EOSEQ;
+  /*   if (b_block==0) return 1; else */
+  return b_block;
+}
+
+void
+gcg_ranlib(char *str,
+	   int cnt,
+	   fseek_t seek,
+	   char *libstr,
+	   struct lmf_str *lm_fd)
+{
+  char *bp, *bp1, *llp;
+
+  FSEEK(lm_fd->libf, seek, 0);
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+
+  if (lm_fd->lline[0]=='>'&&(lm_fd->lline[3]==';'||lm_fd->lline[3]=='>')) {
+    strncpy(str,&lm_fd->lline[4],cnt-1);
+    str[cnt-1]='\0';
+    if ((bp = strchr(str,' '))!=NULL) *bp='\0';
+    else if ((bp=strchr(str,'\r'))!=NULL) *bp='\0';
+    else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+    else str[cnt-1]='\0';
+
+    fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+
+    /* check beginning of line it is a duplicate */
+    for (llp=lm_fd->lline,bp=str; *llp == *bp; llp++,bp++);
+    if ((int)(llp-lm_fd->lline)<5) llp = lm_fd->lline;
+
+    /* here we would like to skip over some species stuff */
+	/*
+    if ((bp1 = strchr(llp,';'))!=NULL && (int)(bp1-llp)<50) {
+      if ((bp2 = strchr(bp1+1,';'))!=NULL && (int)(bp2-bp1)<50) {
+	*(bp2+1)='\0'; bp1 = bp2+2;
+      }
+      else {bp1=llp;}
+    }
+    else if ((bp1=strchr(llp,'.'))!=NULL && *(bp1+1)==' ') {
+      *(bp1+1) = '\0'; bp1 += 2;}
+    else bp1 = llp;
+    */
+    
+    bp1 = llp;
+    if ((bp=strchr(bp1,'\r'))!=NULL) *bp='\0';
+    if ((bp=strchr(bp1,'\n'))!=NULL) *bp='\0';
+    strncat(str," ",(size_t)1);
+    strncat(str,bp1,(size_t)cnt-strlen(str));
+    if (bp1!=llp) strncat(str,llp,(size_t)cnt-strlen(str));
+  }
+  else {
+    str[0]='\0';
+  }
+
+  FSEEK(lm_fd->libf,seek,0);
+  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+}
+
+/* **************************************************************** */
+/* the following section contains the functions used to initialize,
+   read, hash, and lookup acc_lists, either from a sorted list, or a
+   hash table.
+
+   The functions are:
+
+   void *sel_acc_libstr_init(FILE *libf, int *acc_off, char fmt_term)
+   -- allocate space, read the accessions from an already open file of
+      accessions
+
+   int sel_acc_libstr(char *libstr, int gi, void *ptr)
+   -- compare libstr to current accession, returning 1 and
+      incrementing cur_entry if match is found.
+      Requires sorted list; does not access gi
+
+   int sel_acc_gi(char *libstr, int gi, void *ptr)
+   -- compare gi to gi_list, returning 1 and incrementing if match.
+      if (gi <= 0), get gi from libstr
+
+   int sel_hacc_libstr(char *libstr, int gi, void *ptr)
+   -- check to see whether libstr is in hash table
+
+   int sel_hacc_gi(char *libstr, int gi, void *ptr)
+   -- check to see if gi in hash32 table
+*/
+/* **************************************************************** */
+
+struct sel_acc_str {
+  int curr_entry;
+  int max_entry;
+  char fmt_term;
+  char *acc_buff;
+  char **acc_list;
+  int *gi_list;
+  int *acc_hash;
+  int *acc_hash_link;
+  int hash_mask;
+};
+
+/* allocate space, read the accessions from an already open file of
+   accessions */
+
+void *sel_acc_libstr_init(FILE *libf, int *acc_off, char fmt_term) {
+  struct sel_acc_str *sel_acc_ptr;
+  char acc_line[MAX_STR];
+  char *bp, *bp1;
+  char *acc_buff;
+  char *acc_buff_max;	/* end of buffer */
+  char *new_buff;	/* reallocated buffer size */
+  char *acc_buff_p;
+  char **acc_list;
+  int acc_cnt, i;
+  int new_buff_siz;
+  int abuff_siz;		/* allocated buffer size */
+  int buff_siz;		/* fread buff_siz */
+
+  if ((sel_acc_ptr = (struct sel_acc_str *)calloc(1,sizeof(struct sel_acc_str)))==NULL) {
+    fprintf(stderr, "Cannot allocate struct sel_acc_str\n");
+    return NULL;
+  }
+
+  /*
+  if (fmt && *fmt != '\0') {
+    sel_acc_ptr->fmt = (char *)calloc(strlen(fmt)+1,sizeof(char));
+    strncpy(sel_acc_ptr->fmt, fmt, strlen(fmt)+1);
+    sel_acc_ptr->fmt[strlen(fmt)] = '\0';
+  }
+  */
+
+  sel_acc_ptr->fmt_term = fmt_term;
+
+  /* allocate some space for the ACC's */
+
+  abuff_siz = new_buff_siz = 640000;
+
+  if ((acc_buff = (char *)calloc(abuff_siz*10, sizeof(char)))==NULL) {
+    fprintf(stderr, "Cannot allocate acc buff %d\n",abuff_siz*10);
+    free(sel_acc_ptr);
+    return NULL;
+  }
+
+  /* iteratively read and reallocate space for buffer until its all read */
+
+  acc_buff_p = acc_buff;
+  while ((buff_siz = fread(acc_buff_p, sizeof(char), new_buff_siz, libf))==new_buff_siz) {
+    if ((new_buff = realloc(acc_buff, (size_t)(abuff_siz+new_buff_siz)))==NULL) {
+      fprintf(stderr, " cannot reallocate for acc_buf[%d]\n",abuff_siz);
+      break;
+    }
+    else {
+      acc_buff = new_buff;
+      acc_buff_p = acc_buff + abuff_siz;
+      abuff_siz += new_buff_siz;
+    }
+  }
+  fclose(libf);
+
+  acc_buff_max = acc_buff_p + buff_siz;
+
+  /* convert all the ACC lines (with \n) to null-terminated and
+     count the number of aacc's */
+
+  acc_cnt = 0;
+  acc_buff_p = acc_buff;
+  while (acc_buff_p < acc_buff_max && (bp = strchr(acc_buff_p,'\n'))!=NULL) {
+    *bp = '\0';
+    /*  also remove '\r'); */
+    if ((bp1=strchr(acc_buff_p,'\r'))!=NULL) {*bp1 = '\0';}
+    acc_cnt++;
+    acc_buff_p = bp+1;
+  }
+
+  /* allocate the acc_list */
+  if ((acc_list=(char **)calloc(acc_cnt+1, sizeof(char **)))==NULL) {
+    fprintf(stderr," cannot allocate acc_list[%d]\n",acc_cnt+1);
+    free(sel_acc_ptr);
+    return NULL;
+  }
+
+  /* now load acc_list[] */
+  for (i=0, acc_buff_p=acc_buff; i<acc_cnt; i++) {
+    acc_list[i] = acc_buff_p;
+    acc_buff_p += strlen(acc_buff_p)+1;
+  }
+
+  /* finally put everything in the structure to be returned */
+  sel_acc_ptr->acc_buff = acc_buff;
+  sel_acc_ptr->acc_list = acc_list;
+  sel_acc_ptr->curr_entry = 0;
+  sel_acc_ptr->max_entry = acc_cnt;
+  return (void *)sel_acc_ptr;
+}
+
+int sel_acc_libstr(char *libstr, int gi, void *ptr) {
+  struct sel_acc_str *sel_acc_ptr;
+  char *curr_acc;
+  char acc[MAX_SSTR], *acc_p, *bp;
+
+  sel_acc_ptr = (struct sel_acc_str *)ptr;
+  
+  if (sel_acc_ptr->curr_entry >= sel_acc_ptr->max_entry) return -1;
+
+  if ((bp = strchr(libstr,sel_acc_ptr->fmt_term))!=NULL) {
+    *bp = '\0';
+  }
+
+  curr_acc = sel_acc_ptr->acc_list[sel_acc_ptr->curr_entry];
+  if (libstr[2] == curr_acc[2] && libstr[1] == curr_acc[1] && 
+      strncmp(libstr,curr_acc,MAX_UID)==0) {
+    sel_acc_ptr->curr_entry++;
+    return 1;
+  }
+  else {
+    return 0;
+  }
+}
+
+void *sel_acc_gi_init(FILE *libf, int *acc_off, char fmt_term) {
+  struct sel_acc_str *sel_acc_ptr;
+  char acc_line[MAX_STR];
+  char *bp;
+  int *gi_list;
+  int *new_buff;	/* reallocated buffer size */
+  int *acc_buff_p;
+  int acc_cnt, i;
+  int new_buff_siz;
+  int abuff_siz;		/* allocated buffer size */
+  int buff_siz;		/* fread buff_siz */
+
+  if ((sel_acc_ptr = (struct sel_acc_str *)calloc(1,sizeof(struct sel_acc_str)))==NULL) {
+    fprintf(stderr, "Cannot allocate struct sel_acc_str\n");
+    return NULL;
+  }
+
+  /*
+  if (fmt != NULL && *fmt != '\0') {
+    sel_acc_ptr->fmt = (char *)calloc(strlen(fmt)+1,sizeof(char));
+    strncpy(sel_acc_ptr->fmt, fmt, strlen(fmt)+1);
+    sel_acc_ptr->fmt[strlen(fmt)] = '\0';
+  }
+  else {
+    sel_acc_ptr->fmt = NULL;
+  }
+  */
+
+  sel_acc_ptr->fmt_term = fmt_term;
+
+  /* now allocate some space for the ACC's */
+
+  abuff_siz = new_buff_siz = 64000;
+
+  if ((gi_list = (int *)calloc(abuff_siz, sizeof(int)))==NULL) {
+    fprintf(stderr, "Cannot allocate acc buff %d\n",abuff_siz);
+    free(sel_acc_ptr);
+    return NULL;
+  }
+
+  /* now iteratively read and reallocate space for buffer until its all read */
+
+  acc_cnt = 0;
+  while (fgets(acc_line, sizeof(acc_line), libf)!=NULL) {
+    gi_list[acc_cnt++] = atoi(acc_line);
+    if (acc_cnt >= abuff_siz) {
+      if ((new_buff = realloc(gi_list,(abuff_siz + new_buff_siz)*sizeof(int)))==NULL) {
+	fprintf(stderr,"cannot realloc gi_list[%d]\n",abuff_siz+new_buff_siz);
+	break;
+      }
+      else {
+	abuff_siz += new_buff_siz;
+	gi_list = new_buff;
+      }
+    }
+  }
+  fclose(libf);
+
+  /* finally put everything in the structure to be returned */
+  sel_acc_ptr->gi_list = gi_list;
+  sel_acc_ptr->curr_entry = 0;
+  sel_acc_ptr->max_entry = acc_cnt;
+  return (void *)sel_acc_ptr;
+}
+
+int sel_acc_gi(char *libstr, int gi, void *ptr) {
+  struct sel_acc_str *sel_acc_ptr;
+  char *bp;
+
+  sel_acc_ptr = (struct sel_acc_str *)ptr;
+  
+  if (sel_acc_ptr->curr_entry >= sel_acc_ptr->max_entry) return -1;
+
+  if (gi <= 0) {
+    if (libstr) {
+      /*
+      if (sel_acc_ptr->fmt) {
+	sscanf(libstr,sel_acc_ptr->fmt,&gi);
+      }
+      */
+      gi = atoi(libstr);
+    }
+  }
+
+  if (gi == sel_acc_ptr->gi_list[sel_acc_ptr->curr_entry]) {
+    sel_acc_ptr->curr_entry++;
+    return 1;
+  }
+  else {
+    return 0;
+  }
+}
+
+/* this version of the selection algorithm does not require sorted
+   lists.  It hashes the initial list, and uses the hash table to
+   lookup the library sequences.
+
+*/
+
+#define HASH_TABLE_MULT 8
+
+void *sel_hacc_libstr_init(FILE *libf, int *acc_off, char fmt_term) {
+  struct sel_acc_str *sel_acc_ptr;
+  char acc_line[MAX_STR];
+  char *bp, *bp1;
+  char *acc_buff;
+  char *acc_buff_max;	/* end of buffer */
+  char *new_buff;	/* reallocated buffer size */
+  char *acc_buff_p;
+  char **acc_list;
+  int acc_cnt, i;
+  int new_buff_siz;
+  int abuff_siz;		/* allocated buffer size */
+  int buff_siz;		/* fread buff_siz */
+  int hash_mask, hash_max;
+  int hash_val, *acc_hash, *acc_hash_link;
+  int link_save;
+
+  if ((sel_acc_ptr = (struct sel_acc_str *)calloc(1,sizeof(struct sel_acc_str)))==NULL) {
+    fprintf(stderr, "Cannot allocate struct sel_acc_str\n");
+    return NULL;
+  }
+
+  /*
+  if (fmt && *fmt != '\0') {
+    sel_acc_ptr->fmt = (char *)calloc(strlen(fmt)+1,sizeof(char));
+    strncpy(sel_acc_ptr->fmt, fmt, strlen(fmt)+1);
+    sel_acc_ptr->fmt[strlen(fmt)] = '\0';
+  }
+  */
+
+  sel_acc_ptr->fmt_term = fmt_term;
+
+  /* now allocate some space for the ACC's */
+
+  abuff_siz = new_buff_siz = 640000;
+
+  if ((acc_buff = (char *)calloc(abuff_siz, sizeof(char)))==NULL) {
+    fprintf(stderr, "Cannot allocate acc buff %d\n",abuff_siz);
+    free(sel_acc_ptr);
+    return NULL;
+  }
+
+  /* now iteratively read and reallocate space for buffer until its all read */
+
+  acc_buff_p = acc_buff;
+  while ((buff_siz = fread(acc_buff_p, sizeof(char), new_buff_siz, libf))==new_buff_siz) {
+    if ((new_buff = realloc(acc_buff, (size_t)(abuff_siz+new_buff_siz)))==NULL) {
+      fprintf(stderr, " cannot reallocate for acc_buf[%d]\n",abuff_siz);
+      break;
+    }
+    else {
+      acc_buff = new_buff;
+      acc_buff_p = acc_buff + abuff_siz;
+      abuff_siz += new_buff_siz;
+    }
+  }
+  fclose(libf);
+
+  acc_buff_max = acc_buff_p + buff_siz;
+
+  /* now convert all the ACC lines (with \n) to null-terminated and
+     count the number of aacc's */
+
+  acc_cnt = 0;
+  acc_buff_p = acc_buff;
+  while (acc_buff_p < acc_buff_max && (bp = strchr(acc_buff_p,'\n'))!=NULL) {
+    *bp = '\0';
+    /*  also remove '\r'); */
+    if ((bp1=strchr(acc_buff_p,'\r'))!=NULL) {*bp1 = '\0';}
+    acc_cnt++;
+    acc_buff_p = bp+1;
+  }
+
+  /* allocate the acc_list */
+  if ((acc_list=(char **)calloc(acc_cnt+1, sizeof(char **)))==NULL) {
+    fprintf(stderr," cannot allocate acc_list[%d]\n",acc_cnt+1);
+    free(sel_acc_ptr);
+    return NULL;
+  }
+
+  /* now load acc_list[] */
+  for (i=0, acc_buff_p=acc_buff; i<acc_cnt; i++) {
+    acc_list[i] = acc_buff_p;
+    acc_buff_p += strlen(acc_buff_p)+1;
+  }
+
+  /* allocate the hash for the acc_list - we want a table that is
+     about 4X acc_cnt and a power of 2 */
+  for (hash_max = 4096; hash_max <= HASH_TABLE_MULT * acc_cnt; hash_max *= 2);
+  hash_mask = hash_max - 1;
+
+  if ((acc_hash = (int *)calloc(hash_max, sizeof(int)))==NULL) {
+    fprintf(stderr, "cannot allocate acc_hash[%ld]\n",hash_max*sizeof(int));
+    return NULL;
+  }
+
+  /* allocate the acc_list link table */
+  if ((acc_hash_link = (int *)calloc(acc_cnt+1,sizeof(char *)))==NULL) {
+    fprintf(stderr, "cannot allocate acc_hash_link[%ld]\n",acc_cnt*sizeof(char *));
+    return NULL;
+  }
+
+  for (i=0; i<acc_cnt; i++) {
+    hash_val = hash_func(acc_list[i]) & hash_mask;
+    if ((link_save = acc_hash[hash_val]) != 0) {
+      acc_hash_link[i+1]=link_save;
+    }
+    acc_hash[hash_val] = i+1;
+  }
+
+  /* finally put everything in the structure to be returned */
+  sel_acc_ptr->acc_buff = acc_buff;
+  sel_acc_ptr->acc_list = acc_list;
+  sel_acc_ptr->curr_entry = 0;
+  sel_acc_ptr->max_entry = acc_cnt;
+  /* hash stuff */
+  sel_acc_ptr->acc_hash = acc_hash;
+  sel_acc_ptr->acc_hash_link = acc_hash_link;
+  sel_acc_ptr->hash_mask = hash_mask;
+
+  return (void *)sel_acc_ptr;
+}
+
+int sel_hacc_libstr(char *libstr, int gi, void *ptr) {
+  int i, i1;
+  struct sel_acc_str *sel_acc_ptr;
+  char *bp,  **acc_list;
+  int hash_val;
+
+  sel_acc_ptr = (struct sel_acc_str *)ptr;
+  
+  if (sel_acc_ptr->curr_entry >= sel_acc_ptr->max_entry) return -1;
+
+  if ((bp = strchr(libstr,sel_acc_ptr->fmt_term))!=NULL) {
+    *bp = '\0';
+  }
+
+  hash_val = hash_func(libstr) & sel_acc_ptr->hash_mask;
+
+  if (sel_acc_ptr->acc_hash[hash_val] == 0) return 0;
+
+  acc_list = sel_acc_ptr->acc_list;
+
+  for (i=sel_acc_ptr->acc_hash[hash_val]; i > 0;
+       i = sel_acc_ptr->acc_hash_link[i]) {
+    i1 = i-1;
+    if (libstr[2] == acc_list[i1][2] && libstr[1] == acc_list[i1][1] && 
+      strncmp(libstr,acc_list[i1],MAX_UID)==0) {
+      return 1;
+    }
+  }
+  return 0;
+}
+
+void *sel_hacc_gi_init(FILE *libf, int *acc_off, char fmt_term) {
+  struct sel_acc_str *sel_acc_ptr;
+  char acc_line[MAX_STR];
+  char *bp;
+  int *gi_list;
+  int *new_buff;	/* reallocated buffer size */
+  int *acc_buff_p;
+  int acc_cnt, i;
+  int new_buff_siz;
+  int abuff_siz;		/* allocated buffer size */
+  int buff_siz;		/* fread buff_siz */
+  int hash_val, hash_max, hash_mask, link_save;
+  int *gi_hash, *gi_hash_link;
+
+  if ((sel_acc_ptr = (struct sel_acc_str *)calloc(1,sizeof(struct sel_acc_str)))==NULL) {
+    fprintf(stderr, "Cannot allocate struct sel_acc_str\n");
+    return NULL;
+  }
+
+  /*
+  if (fmt != NULL && *fmt != '\0') {
+    sel_acc_ptr->fmt = (char *)calloc(strlen(fmt)+1,sizeof(char));
+    strncpy(sel_acc_ptr->fmt, fmt, strlen(fmt)+1);
+    sel_acc_ptr->fmt[strlen(fmt)] = '\0';
+  }
+  else {
+    sel_acc_ptr->fmt = NULL;
+  }
+  */
+
+  sel_acc_ptr->fmt_term = fmt_term;
+
+  /* now allocate some space for the ACC's */
+
+  abuff_siz = new_buff_siz = 64000;
+
+  if ((gi_list = (int *)calloc(abuff_siz, sizeof(int)))==NULL) {
+    fprintf(stderr, "Cannot allocate acc buff %d\n",abuff_siz);
+    free(sel_acc_ptr);
+    return NULL;
+  }
+
+  /* now iteratively read and reallocate space for buffer until its all read */
+
+  acc_cnt = 0;
+  while (fgets(acc_line, sizeof(acc_line), libf)!=NULL) {
+    gi_list[acc_cnt++] = atoi(acc_line);
+    if (acc_cnt >= abuff_siz) {
+      if ((new_buff = realloc(gi_list,(abuff_siz + new_buff_siz)*sizeof(int)))==NULL) {
+	fprintf(stderr,"cannot realloc gi_list[%d]\n",abuff_siz+new_buff_siz);
+	break;
+      }
+      else {
+	abuff_siz += new_buff_siz;
+	gi_list = new_buff;
+      }
+    }
+  }
+  fclose(libf);
+
+  /* allocate the hash for the gi_list - we want a table that is
+     about 4X acc_cnt and a power of 2 */
+  for (hash_max = 4096; hash_max <= HASH_TABLE_MULT * acc_cnt; hash_max *= 2);
+  hash_mask = hash_max - 1;
+
+  if ((gi_hash = (int *)calloc(hash_max, sizeof(int)))==NULL) {
+    fprintf(stderr, "cannot allocate gi_hash[%ld]\n",hash_max*sizeof(int));
+    return NULL;
+  }
+
+  /* allocate the gi_list link table */
+  if ((gi_hash_link = (int *)calloc(acc_cnt+1,sizeof(char *)))==NULL) {
+    fprintf(stderr, "cannot allocate gi_hash_link[%ld]\n",acc_cnt*sizeof(char *));
+    return NULL;
+  }
+
+  for (i=0; i<acc_cnt; i++) {
+    hash_val = fast_hash32(gi_list[i]) & hash_mask;
+    if (gi_hash[hash_val] != 0) {
+      link_save = gi_hash[hash_val];
+      gi_hash_link[i+1]=link_save;
+    }
+    gi_hash[hash_val] = i+1;
+  }
+
+  /* finally put everything in the structure to be returned */
+  sel_acc_ptr->gi_list = gi_list;
+  sel_acc_ptr->curr_entry = 0;
+  sel_acc_ptr->max_entry = acc_cnt;
+
+  sel_acc_ptr->acc_hash = gi_hash;
+  sel_acc_ptr->acc_hash_link = gi_hash_link;
+  sel_acc_ptr->hash_mask = hash_mask;
+
+  return (void *)sel_acc_ptr;
+}
+
+int sel_hacc_gi(char *libstr, int gi, void *ptr) {
+  struct sel_acc_str *sel_acc_ptr;
+  int hash_val;
+  int *gi_list, i;
+  char *bp;
+
+  sel_acc_ptr = (struct sel_acc_str *)ptr;
+  
+  if (sel_acc_ptr->curr_entry >= sel_acc_ptr->max_entry) return -1;
+
+  if (gi <= 0) {
+    if (libstr) {
+      /*
+      if (sel_acc_ptr->fmt) {
+	sscanf(libstr,sel_acc_ptr->fmt,&gi);
+      }
+      */
+      gi = atoi(libstr);
+    }
+  }
+
+  hash_val = fast_hash32(gi) & sel_acc_ptr->hash_mask;
+  if (sel_acc_ptr->acc_hash[hash_val]==0) return 0;
+
+  gi_list = sel_acc_ptr->gi_list;
+
+  for (i=sel_acc_ptr->acc_hash[hash_val]; i > 0;
+       i = sel_acc_ptr->acc_hash_link[i]) {
+    if (gi_list[i-1] == gi) return 1;
+  }
+  return 0;
+}
+
+
+/* adapted from	http://burtleburtle.net/bob/hash/doobs.html */
+unsigned int
+hash_func(char *key)
+{
+  unsigned int hash;
+
+  hash = 0;
+
+  while (*key) {
+    hash += *key++;
+    hash += (hash << 10);
+    hash ^= (hash >> 6);
+  }
+
+  hash += (hash << 3);
+  hash ^= (hash >> 11);
+  hash += (hash << 15);
+
+  return hash;
+}
+
+unsigned int
+fast_hash32 (unsigned int data) {
+  int tmp, hash;
+
+
+  hash  = data >> 16;
+  tmp    = ((data & 0xFFFF) << 11) ^ hash;
+  hash   = (hash << 16) ^ tmp;
+  hash  += hash >> 11;
+
+  /* Force "avalanching" of final 127 bits */
+  hash ^= hash << 3;
+  hash += hash >> 5;
+  hash ^= hash << 4;
+  hash += hash >> 17;
+  hash ^= hash << 25;
+  hash += hash >> 6;
+
+  return hash;
+}
+
+/* takes a file name string and allocates space for it, returning
+   pointer to space */
+char *alloc_file_name(char *f_name) {
+  int fn_len;
+  char *alloc_f_name;
+
+  fn_len = strlen(f_name);
+  if ((alloc_f_name = calloc(fn_len+1,sizeof(char)))==NULL) {
+    fprintf(stderr, "Cannot allocate %d space for %s\n",
+	    fn_len+1, f_name);
+    exit(1);
+  }
+  else {
+    strncpy(alloc_f_name,f_name,fn_len);
+    return alloc_f_name;
+  }
+}
diff --git a/src/param.h b/src/param.h
new file mode 100644
index 0000000..e4deca5
--- /dev/null
+++ b/src/param.h
@@ -0,0 +1,251 @@
+
+/* $Id: param.h 1233 2013-10-08 18:26:31Z wrp $ */
+/* $Revision: 1233 $  */
+
+#include <sys/types.h>
+
+#ifndef P_STRUCT
+#define P_STRUCT
+
+#define MAXSQ 60
+
+/* Concurrent read version */
+
+struct fastr {
+  int ktup;
+  int cgap;
+  int pgap;
+  int pamfact;
+  int scfact;
+  /* these values will soon be abandoned */
+  int bestoff;
+  int bestscale;
+  int bkfact;
+  int bktup;
+  int bestmax;
+  /* statistics based scaling values */
+  int use_E_thresholds;
+  double E_join, E_band_opt;
+  int altflag;
+  int optflag;
+  int iniflag;
+  int optcut;
+  int optcut_set;
+  int optwid;
+  int optwid_set;
+};
+
+struct prostr {
+    int gopen;
+    int gextend;
+    int width;
+};
+
+/* must be identical in thr_bufs.h */
+struct score_count_s {
+  long s_cnt[3];
+  long tot_scores;
+};
+
+struct pstruct		/* parameters */
+{
+  int n0;	/* length of query sequence, used for statistics */
+  int gdelval;	/* value gap open (-10) */
+  int ggapval;	/* value for additional residues in gap (-2) */
+  int gshift;	/* frameshift for fastx, fasty */
+  int gsubs;	/* nt substitution in fasty */
+  int p_d_mat;	/* dna match penalty */
+  int p_d_mis;	/* dna mismatch penalty */
+  int p_d_set;	/* using match/mismatch */
+  int n1_low;
+  int n1_high;	/* sequence length limits */
+  int score_ix;	/* index to sorted score */
+  int show_ident;	/* flag - show identical lalign alignment */
+  int nseq;	/* number of different sequences (for lalign) */
+  int zsflag;	/* use scalebest() */
+  int zsflag2;	/* statistics for best shuffle */
+  int zsflag_f;	/* use scalebest() */
+  int zs_win;	/* window shuffle size */
+  int shuffle_dna3;	/* shuffle dna as codons */
+  int histint;		/* histogram interval */
+  unsigned char sq[MAXSQ+1];
+  int hsq[MAXSQ+1];
+  int nsq;		/* length of normal sq */
+  /* int pamh1[MAXSQ+1]; */	/* identical match score (diagonal scores) */
+  /* int *pamh2[MAXSQ+1]; */	/* ktup match score */
+  int ext_sq_set;	/* flag for using extended alphabet */
+  unsigned char sqx[MAXSQ+1];
+  int hsqx[MAXSQ+1];
+  int c_nt[MAXSQ+1];
+  int nsqx;	/* length of extended sq */
+  int nsq_e;	/* effective nsq */
+  int dnaseq;	/* -1 = not set (protein); 0 = protein; 1 = DNA; 2 = other, 3 RNA */
+  int nt_align;	/* DNA/RNA alignment = 1 */
+  int debug_lib;
+  int tr_type;	/* codon table */
+  int sw_flag;
+  char pamfile[MAX_FN];	/* pam file name */
+  char pamfile_save[MAX_FN];  /* original pam file */
+  char pam_name[MAX_FN];
+  char pgpfile[MAX_FN];
+  int pgpfile_type;
+  float pamscale;	/* ln(2)/3 or ln(2)/2 */
+  float ulambda;	/* ungapped lambda */
+  float entropy;	/* bits/position */
+  float tfract_id;	/* target fraction id */
+  int pam_pssm;
+  int pam_set;
+  int pam_variable;
+  int have_pam2;
+  int **pam2[2];	/* set of 2D scoring matrices; [0] lower-case 'x', [1] upper/lower case */
+  int **pam2p[2];
+  int pamoff;	/* offset for pam values */
+  int pam_l, pam_h, pam_xx, pam_xm;	/* lowest, highest pam value */
+  int pam_x_set;
+  int pam_x_id_sim;	/* =0 -> 'N,X' identical but not similar;
+			   =1 -> 'N,X' identical+similar;
+			   <0 -> 'N,X' not identical, not similar */
+  int pam_ms;		/* use a Mass Spec pam matrix */
+  void *fp_struct;	/* function specific parameters based on algorith/scoring matrix */
+  int LK_set;
+  double pLambda, pK, pH;	/* Karlin-Altscul parameters */
+  int maxlen;
+  int max_repeat;	/* used for repeat count in ssearch34/lalign */
+  int repeat_thresh;
+  char *other_info;
+  double e_cut;		/* cutoff for scores */
+  double e_cut_r; 	/* cutoff for multiple local alignments */
+  double zs_off;	/* z-score offset from sampling */
+  int do_rep;		/* enable multiple alignments */
+  int can_pre_align;	/* flag for have_ares & 0x1 pre-alignments */
+  long zdb_size; 	/* force database size */
+  int zdb_size_set;	/* flag for user -Z */
+  int pgm_id;
+  int pseudocts;
+  int shuff_node;
+  union {
+    struct fastr fa;
+    struct prostr pr;
+  } param_u;
+};
+
+#include "rstruct.h"
+
+/* the seq_record has all the invariant data about a sequence -
+   sequence length, libstr, sequence itself, etc.
+   it does not have the results information
+   we can have 1, 2, or 6 (obsolete tfasta) results records for a sequence,
+   but there will still be only one sequence record.
+*/
+
+struct annot_str {
+  /* information for conventional annotations */
+  unsigned char *aa1_ann;	/* annotation string */
+  /* information for "rich" annotations */
+  int n_annot;	/* length of ann_arr_str array */
+  int n_domains; 	/* length of domain_arr_p array */
+  struct annot_entry *annot_arr_p;	/* array[n_annot] of annot_entry's for all annotations */
+  struct annot_entry **s_annot_arr_p;	/* sorted version of annots */
+};
+
+/* ann_str keeps information on "rich" annotations, position, type, value */
+struct annot_entry {
+  long pos;
+  long end;
+  char label;	/* currently -V *#%!@ symbols, plus 'V' for variant */
+  unsigned char value;	/* must be amino acid residue, binary encoded */
+  char *comment;
+  int target;	 /* 0 for query/ 1 for library */
+};
+
+/* domain_str keeps information on "rich" annotations, position, type, value */
+struct domfeat_data {
+  struct annot_entry *annot_entry_p;
+  struct domfeat_data *next;
+  long pos;	/* annotation position */
+  long a_pos;	/* aligned annotation position */
+  long end_pos;	/* domain annotation end */
+  int score;	/* score of current region */
+  int n_ident;	/* count for percent id */
+  int n_alen; 	/* align len for percent id */
+  int n_gaplen;	/* number of gap residues in alignment */
+};
+
+/* seq_record has the data required to do a calculation */
+struct seq_record {
+  unsigned char *aa1b;		/* sequence buffer */
+  struct annot_str *annot_p;
+  int n1;
+  long l_offset;	/* q_offset/l_offset set outside getlib() based on chunks; 0-based */
+  long l_off;		/* q_off/l_off comes from @C:123, and is 1-based */
+  int index;		/* index in search */
+#ifdef DEBUG
+  long adler32_crc;
+#endif
+};
+
+/* mseq_record has meta data not required to calculate score or alignment */
+struct mseq_record {
+  int *n1tot_p;
+#ifdef USE_FSEEKO
+  off_t lseek;
+#else
+  long lseek;
+#endif
+  struct lmf_str *m_file_p;
+  int cont;
+  char libstr[MAX_UID];
+  char *bline;
+  int bline_max;
+  int annot_req_flag;
+  int index		/* index in search */;
+#ifdef DEBUG
+  long adler32_crc;
+#endif
+};
+
+struct seqr_chain {
+  struct seq_record *seqr_base;
+  struct mseq_record *mseqr_base;
+  struct seqr_chain *next;
+  /*   struct lib_seq_info *ldb_info; */
+  int max_chain_seqs;
+  int cur_seq_cnt;
+  unsigned char *aa1b_base;
+  int aa1b_size;
+  int aa1b_next;
+  int contiguous;
+};
+
+struct getlib_str {
+  int lcont;		/* lcont save */
+  int ocont;		/* ocont save */
+  int eof;		/* done with this file */
+#ifdef USE_FSEEKO
+  off_t lseek;
+#else
+  long lseek;
+#endif
+  long loffset;		/* loffset save */
+  char libstr[MAX_UID];	/* repository for libstr */
+  int n_libstr;		/* length of libstr */
+  unsigned char *aa1save;	/* overlapping sequence save */
+  struct lib_struct *lib_list_p;
+  int *n1tot_ptr, *n1tot_cur;
+  int n1tot_cnt;
+  int n1tot_v;
+  long tot_memK;	/* cummulative amount of memory allocated for aa1b;
+			   used to limit memory use */
+  long max_memK;	/* allow separate memory limits for main,link
+			   searches */
+  long lost_memK;	/* check for waste */
+  struct seqr_chain *start_seqr_chain;
+  struct seqr_chain *cur_seqr_chain;
+  int use_memory;
+};
+
+#endif	/* P_STRUCT */
+
+#ifndef A_STRUCT
+#include "aln_structs.h"
+#endif
diff --git a/src/pcomp_bufs.h b/src/pcomp_bufs.h
new file mode 100644
index 0000000..4d147fa
--- /dev/null
+++ b/src/pcomp_bufs.h
@@ -0,0 +1,33 @@
+
+/***************************************/
+/* thread global variable declarations */
+/***************************************/
+
+/* $Id: pcomp_bufs.h 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+/* this file serves the same purpose as thr_bufs2.h in the threaded version
+
+   Unlike thr_bufs2.h, which has two separate lists of buffers,
+   reader_buf[] and worker_buf[], so that the threads can be asynchronous.
+
+   pcomp workers are asynchonous as well, but, initially, there is an
+   array that has an entry for each worker (worker_buf[]), and a queue
+   that captures which buffers are ready (work_q[]).
+ */
+
+#ifndef MAX_WORKERS
+#define MAX_WORKERS 2
+#endif
+
+#ifndef XTERNAL
+struct buf_head **worker_buf;  /* pointers buffers of sequences/results */
+int *work_q;	/* next worker available */
+int max_worker_q;
+int num_reader_bufs;
+#else
+extern struct buf_head **worker_buf;
+extern int *work_q;
+extern int max_worker_q;
+extern int num_reader_bufs;
+#endif
diff --git a/src/pcomp_subs2.c b/src/pcomp_subs2.c
new file mode 100644
index 0000000..9752d72
--- /dev/null
+++ b/src/pcomp_subs2.c
@@ -0,0 +1,686 @@
+/* $Id: pcomp_subs2.c $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and
+   The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* modified to do more initialization of work_info here, rather than
+   in main() */
+
+/* this file provides the same functions for PCOMPLIB as pthr_subs2.c does for COMP_THR */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#ifdef UNIX
+#include <unistd.h>
+#endif
+#include <signal.h>
+
+#include "defs.h"
+#include "structs.h"		/* mngmsg, libstruct */
+#include "param.h"		/* pstruct, thr_str, buf_head, rstruct */
+#include "thr_buf_structs.h"
+
+#ifdef MPI_SRC
+#include "mpi.h"
+#endif
+
+#include "msg.h"
+#include "pcomp_bufs.h"
+#define XTERNAL
+#include "uascii.h"
+#undef XTERNAL
+#include "pthr_subs.h"
+
+#ifdef DEBUG
+unsigned long adler32(unsigned long, const unsigned char *, unsigned int);
+#endif
+
+static int next_worker_idx, num_workers_idle;
+extern int g_worker;
+
+/* used for debugging */
+/*
+int check_seq_range(unsigned char *aa1b, int n1, int nsq, char *);
+*/
+
+/* start the workers, nworkers == number of workers, not nodes */
+void
+init_thr(int nworkers, char *info_lib_range_p, const struct mngmsg *m_msp, struct pstruct *ppst,
+	 unsigned char *aa0, struct mng_thr *m_bufi_p)
+{
+#ifdef MPI_SRC
+  MPI_Status mpi_status;
+  int int_msg_b[4];	/* general purpose buffer for integers */
+#endif
+  int node, snode;
+
+  /* start the worker processes */
+
+  if (work_q == NULL) {
+    if ((work_q=(int *)calloc(nworkers, sizeof(int)))==NULL) {
+      fprintf(stderr, " cannot allocate work_q[%d] structure\n",
+	      nworkers);
+      exit(1);
+    }
+    else {max_worker_q = nworkers;}
+  }
+  num_workers_idle = 0;
+
+  /* setup thread buffer info */
+  if (aa0 == NULL) {
+    int_msg_b[0] = int_msg_b[1] = int_msg_b[2] = 0;
+  }
+  else {
+    int_msg_b[0] = nworkers;
+    int_msg_b[1] = m_bufi_p->max_buf2_res;
+    int_msg_b[2] = m_bufi_p->max_chain_seqs;
+    int_msg_b[3] = m_bufi_p->seq_buf_size;
+  }
+
+  /* send thread info */
+  for (node=FIRSTNODE; node < nworkers+FIRSTNODE; node++) {
+
+    MPI_Send(int_msg_b, 4, MPI_INT, node, STARTTYPE0, MPI_COMM_WORLD);
+
+    if (aa0 == NULL) { continue;}
+
+    /* send mngmsg */
+    MPI_Send((void *)m_msp, sizeof(struct mngmsg), MPI_BYTE, node,
+	     STARTTYPE1, MPI_COMM_WORLD);
+
+    MPI_Send(ppst, sizeof(struct pstruct), MPI_BYTE, node,
+	     STARTTYPE2, MPI_COMM_WORLD);
+
+    /* send the rest of the pieces of pam[2] */
+    MPI_Send(&ppst->pam2[0][0][0],m_msp->pamd1*m_msp->pamd2,MPI_INT,node,STARTTYPE3,
+	     MPI_COMM_WORLD);
+    MPI_Send(&ppst->pam2[1][0][0],m_msp->pamd1*m_msp->pamd2,MPI_INT,node,STARTTYPE3,
+	     MPI_COMM_WORLD);
+
+    /* send pascii (only for fasty/tfasty */
+    MPI_Send(pascii, sizeof(aascii), MPI_BYTE, node, STARTTYPE4, MPI_COMM_WORLD);
+  }
+
+  if (aa0 == NULL) {
+    /* all done */
+    free(work_q);
+    return;
+  }
+
+  /* wait for returned status results */
+  while (num_workers_idle < max_worker_q) {
+    MPI_Recv(&node, 1, MPI_INT, MPI_ANY_SOURCE,MSEQTYPE0,
+	     MPI_COMM_WORLD, &mpi_status);
+    snode= mpi_status.MPI_SOURCE;
+    if (snode == FIRSTNODE) {
+      MPI_Recv(info_lib_range_p, MAX_FN, MPI_BYTE, snode,MSEQTYPE0,
+	       MPI_COMM_WORLD, &mpi_status);
+    }
+
+    if (snode != node) {
+      fprintf(stderr, " initial node mismatch [%d!=%d]\n",node, snode);
+    }
+    worker_buf[snode-FIRSTNODE]->hdr.have_data = 0;
+    worker_buf[snode-FIRSTNODE]->hdr.have_results = 0;
+    worker_buf[snode-FIRSTNODE]->hdr.worker_idx = snode;
+    work_q[num_workers_idle++] = snode;
+  }
+  next_worker_idx = 0;
+
+  /* send query sequence info to workers */
+  for (node=FIRSTNODE; node < nworkers+FIRSTNODE; node++) {
+    /* send thread buffer info */
+    int_msg_b[0] = m_msp->n0;
+    int_msg_b[1] = m_msp->nm0;
+    MPI_Send(int_msg_b, 2, MPI_INT, node, QSEQTYPE0, MPI_COMM_WORLD);
+    MPI_Send(aa0, m_msp->n0+1, MPI_BYTE, node, QSEQTYPE1, MPI_COMM_WORLD);
+    if (m_msp->ann_flg && m_msp->aa0a) {
+      MPI_Send(m_msp->aa0a, m_msp->n0+2, MPI_BYTE, node, QSEQTYPE1, MPI_COMM_WORLD);
+    }
+  }
+}
+
+/* get_rbuf() provides buffers containing sequences to the main
+   program. max_work_q buffers are available, with each
+   buffer tied to a worker.
+
+   As the main program runs, it calls get_rbuf() to get a worker
+   buffer (reader buffers are not used with PCOMPLIB), fills it with
+   sequences, and sends it to a worker with put_rbuf().
+
+   At the same time, the worker programs call get_wbuf(), to get a
+   filled worker buffer sent by put_rbuf(), takes the sequences from
+   the buffer and does the comparisons, and sends the results back to
+   the manager by calling put_wbuf().
+*/
+
+/* wait for results from any worker */
+struct buf_head *
+next_work_result(int *snode) {
+  int this_node, buf2_cnt;
+  int int_msg_b[4];	/* general purpose int buffer */
+  int i;
+  struct buf2_hdr_s buf2_head;
+  struct buf_head *this_buf_p, tmp_buf_head;
+  struct seq_record *seq_b_save;
+  struct mseq_record *mseq_b_save;
+  unsigned char *aa1b_start_save;
+  struct a_res_str *new_ares_p, *prev_ares_p;
+#ifdef MPI_SRC
+  MPI_Status mpi_status;
+#endif
+
+  /* wait for a returned result */
+  MPI_Recv(&tmp_buf_head, sizeof(struct buf_head), MPI_BYTE, MPI_ANY_SOURCE,RES_TYPE0,
+	   MPI_COMM_WORLD, &mpi_status);
+  this_node = mpi_status.MPI_SOURCE;
+  buf2_cnt = tmp_buf_head.hdr.buf2_cnt;
+
+#ifdef DEBUG
+  /*
+  fprintf(stderr," %d: %d results\n", this_node, buf2_cnt);
+  */
+#endif
+
+  this_buf_p = worker_buf[this_node-FIRSTNODE];
+  /* move things selectively to avoid over-writing pointers to res, a_res arrays */
+  
+  aa1b_start_save = this_buf_p->hdr.aa1b_start;
+  seq_b_save = this_buf_p->hdr.seq_b;
+  mseq_b_save = this_buf_p->hdr.mseq_b;
+
+  memcpy(&this_buf_p->hdr,&tmp_buf_head.hdr,sizeof(struct buf2_hdr_s));
+
+  this_buf_p->hdr.aa1b_start = aa1b_start_save;
+  this_buf_p->hdr.seq_b = seq_b_save;
+  this_buf_p->hdr.mseq_b =mseq_b_save;
+
+  memcpy(&this_buf_p->s_cnt_info,&tmp_buf_head.s_cnt_info,sizeof(struct score_count_s));
+
+  if (this_buf_p->hdr.have_results) {
+    if (this_buf_p->hdr.buf2_type & (BUF2_DOWORK + BUF2_DOSHUF + BUF2_DOOPT)) {
+      MPI_Recv(this_buf_p->buf2_res, sizeof(struct buf2_res_s)*buf2_cnt,
+	       MPI_BYTE, this_node, RES_TYPE1, MPI_COMM_WORLD, &mpi_status);
+      /*
+      for (i=0; i < buf2_cnt; i++) {
+	if (this_buf_p->buf2_res[i].rst.score[2] > 200) {
+	  fprintf(stderr, "HS[%d:%d,%d]: %d (%d:%d)\n",i,this_node, tmp_buf_head.hdr.worker_idx, this_buf_p->buf2_res[i].rst.score[2],
+		  this_buf_p->buf2_data[i].seq->index,this_buf_p->buf2_data[i].seq->n1);
+	}
+      }
+      */
+    }
+
+    if (this_buf_p->hdr.buf2_type & BUF2_DOALIGN) {
+      /* (1) get a message that has "have_ares"
+	 (2) allocate space for each a_res and receive it individually
+	 (3) reset the ->next pointers for the a_res chain
+      */
+
+      for (i = 0; i < buf2_cnt; i++) {
+	MPI_Recv(int_msg_b, 1, MPI_INT, this_node, ALN_TYPE0, MPI_COMM_WORLD, &mpi_status);
+	this_buf_p->buf2_ares[i].have_ares = int_msg_b[0];
+	this_buf_p->buf2_ares[i].a_res = NULL;	/* pre-initialize */
+
+	if (this_buf_p->buf2_ares[i].have_ares) {
+	  /* allocate space to receive it */
+	  if ((new_ares_p = (struct a_res_str *)calloc(1,sizeof(struct a_res_str)))==NULL) {
+	    fprintf(stderr, "cannot allocate a_res from %d\n",this_node);
+	    exit(1);
+	  }
+	  /* save the head of the ares_chain */
+	  this_buf_p->buf2_ares[i].a_res = new_ares_p;
+
+	  /* get the first a_res */
+	  MPI_Recv(new_ares_p, sizeof(struct a_res_str), MPI_BYTE, this_node,
+		   ALN_TYPE1, MPI_COMM_WORLD, &mpi_status);
+	  /* get the associated res[nres] */
+	  if ((new_ares_p->res = (int *)calloc(new_ares_p->nres,sizeof(int)))==NULL) {
+	    fprintf(stderr, "cannot allocate res for a_res from %d\n",this_node);
+	    exit(1);
+	  }
+	  MPI_Recv(new_ares_p->res, new_ares_p->nres, MPI_INT, this_node,
+		   ALN_TYPE2, MPI_COMM_WORLD, &mpi_status);
+
+	  /* now get alignment encodings if available */
+	  if (new_ares_p->aln_code) {
+	    if ((new_ares_p->aln_code = (char *)calloc(new_ares_p->aln_code_n+1,sizeof(char)))==NULL) {
+	      fprintf(stderr, "cannot allocate aln_code for a_res from %d\n",this_node);
+	      exit(1);
+	    }
+	    MPI_Recv(new_ares_p->aln_code, new_ares_p->aln_code_n+1, MPI_BYTE, this_node,
+		   ALN_TYPE3, MPI_COMM_WORLD, &mpi_status);
+	  }
+	  if (new_ares_p->ann_code) {
+	    if ((new_ares_p->ann_code = (char *)calloc(new_ares_p->ann_code_n+1,sizeof(char)))==NULL) {
+	      fprintf(stderr, "cannot allocate ann_code for a_res from %d\n",this_node);
+	      exit(1);
+	    }
+	    MPI_Recv(new_ares_p->ann_code, new_ares_p->ann_code_n+1, MPI_BYTE, this_node,
+		   ALN_TYPE3, MPI_COMM_WORLD, &mpi_status);
+
+	  }
+
+	  while (new_ares_p->next) {	/* while the chain continues */
+	    prev_ares_p = new_ares_p;	/* save pointer to previous a_res to fix prev_ares->next */
+	    if ((new_ares_p = (struct a_res_str *)calloc(1,sizeof(struct a_res_str)))==NULL) {
+	      fprintf(stderr, "cannot allocate a_res from %d\n",this_node);
+	      exit(1);
+	    }
+	    prev_ares_p->next = new_ares_p;
+	    MPI_Recv(new_ares_p, sizeof(struct a_res_str), MPI_BYTE, this_node,
+		     ALN_TYPE1, MPI_COMM_WORLD, &mpi_status);
+	    if ((new_ares_p->res = (int *)calloc(new_ares_p->nres,sizeof(int)))==NULL) {
+	      fprintf(stderr, "cannot allocate res for a_res from %d\n",this_node);
+	      exit(1);
+	    }
+	    MPI_Recv(new_ares_p->res, new_ares_p->nres, MPI_INT, this_node,
+		     ALN_TYPE2, MPI_COMM_WORLD, &mpi_status);
+	    /* now get alignment encodings if available */
+	    if (new_ares_p->aln_code) {
+	      if ((new_ares_p->aln_code = (char *)calloc(new_ares_p->aln_code_n+1,sizeof(char)))==NULL) {
+		fprintf(stderr, "cannot allocate aln_code for a_res from %d\n",this_node);
+		exit(1);
+	      }
+	      MPI_Recv(new_ares_p->aln_code, new_ares_p->aln_code_n+1, MPI_BYTE, this_node,
+		       ALN_TYPE3, MPI_COMM_WORLD, &mpi_status);
+	    }
+	    if (new_ares_p->ann_code) {
+	      if ((new_ares_p->ann_code = (char *)calloc(new_ares_p->ann_code_n+1,sizeof(char)))==NULL) {
+		fprintf(stderr, "cannot allocate ann_code for a_res from %d\n",this_node);
+		exit(1);
+	      }
+	      MPI_Recv(new_ares_p->ann_code, new_ares_p->ann_code_n+1, MPI_BYTE, this_node,
+		       ALN_TYPE3, MPI_COMM_WORLD, &mpi_status);
+
+	    }
+	  }	/* finished with the ares_chain */
+	} /* done with have_ares */
+	else {
+#ifdef DEBUG
+	  fprintf(stderr, " getting alignment with no have_ares[%d]: %d/%d",
+		  this_buf_p->hdr.worker_idx,i,this_buf_p->buf2_ares[i].best_idx);
+#endif
+	}
+      }	/* done with buf2_ares[buf2_cnt] */
+    } /* done with BUF_DOALIGN */
+  } /* done with have_results */
+  *snode = this_node;
+  return this_buf_p;
+}
+
+/* wait until a worker/buffer is available */
+void get_rbuf(struct buf_head **cur_buf, int max_work_buf)
+{
+  int node, snode;
+  int i_msg_b[2], nresults;
+  struct buf_head *this_buf_p;
+
+#ifdef MPI_SRC
+  MPI_Status mpi_status;
+#endif
+
+  if (num_workers_idle == 0) {
+    this_buf_p = next_work_result(&snode);
+
+    work_q[next_worker_idx] = snode;
+    num_workers_idle++;
+  }
+  else {
+    this_buf_p = worker_buf[work_q[next_worker_idx]-FIRSTNODE];
+  }
+
+  *cur_buf = this_buf_p;
+
+  /* update worker queue */
+  next_worker_idx = (next_worker_idx+1)%(max_work_buf);
+}
+
+/* put_rbuf() takes a buffer filled with sequences to be compared 
+   sends it to a worker */
+
+void put_rbuf(struct buf_head *cur_buf, int max_work_buf)
+{
+#ifdef MPI_SRC
+  MPI_Status mpi_status;
+#endif
+  struct seq_record *cur_seq_p, *tmp_seq_p;
+  int i, j, snode, buf2_cnt, seqr_cnt;
+  int cur_aa1b_size, max_aa1b_size;
+
+  /* do not send msg if no data */
+  if (!cur_buf->hdr.have_data || !(cur_buf->hdr.buf2_cnt > 0)) {return;}
+
+  /* here, since we have a buffer, we have a worker, just send the info */
+  snode = cur_buf->hdr.worker_idx;
+  buf2_cnt = cur_buf->hdr.buf2_cnt;
+  seqr_cnt = cur_buf->hdr.seqr_cnt;
+  max_aa1b_size = cur_buf->hdr.aa1b_size;
+
+#ifdef DEBUG
+  /*   fprintf(stderr," sending %d/%d seqs to %d\n", buf2_cnt, seqr_cnt, snode); */
+#endif
+  /* send header */
+  MPI_Send(&cur_buf->hdr, sizeof(struct buf2_hdr_s), MPI_BYTE, snode,
+	   MSEQTYPE0, MPI_COMM_WORLD);
+
+  /* send data */
+  MPI_Send(cur_buf->buf2_data, sizeof(struct buf2_data_s)*buf2_cnt,
+	   MPI_BYTE, snode, MSEQTYPE1, MPI_COMM_WORLD);
+
+  /* before sending sequence records, we need to check to see if we
+     need to transfer to a continuous location (or send lots of short
+     records) */
+
+#ifdef DEBUG
+  cur_aa1b_size = 0;
+  for (i=0; i < buf2_cnt; i++) {
+    cur_seq_p = cur_buf->buf2_data[i].seq;
+    if (!cur_buf->buf2_data[i].seq_dup) {
+      cur_aa1b_size += cur_seq_p->n1+1;
+    }
+    if (check_seq_range(cur_seq_p->aa1b, cur_seq_p->n1, 50, "put_rbuf()")) {
+      fprintf(stderr, "[put_rbuf] range error at: %d\n", i);
+    }
+  }
+
+  if (cur_aa1b_size != cur_buf->hdr.aa1b_used) {
+    fprintf(stderr,"[put_rbuf:%d] aa1b_used size mismatch: %d != %d\n",
+	    snode, cur_aa1b_size, cur_buf->hdr.aa1b_used);
+  }
+#endif
+
+  if (cur_buf->hdr.seq_record_continuous) {
+    /* send sequence records associated with data in one message */
+    MPI_Send(cur_buf->hdr.seq_b, sizeof(struct seq_record)*seqr_cnt,
+	     MPI_BYTE, snode, MSEQTYPE2, MPI_COMM_WORLD);
+    MPI_Send(cur_buf->hdr.aa1b_start, cur_buf->hdr.aa1b_used+1,
+	     MPI_BYTE, snode, MSEQTYPE3, MPI_COMM_WORLD);
+  }
+  else {
+    /* send individual sequence records */
+    cur_aa1b_size = 0;
+    for (i=0; i < buf2_cnt; i++) {
+      cur_seq_p = cur_buf->buf2_data[i].seq;
+      if (!cur_buf->buf2_data[i].seq_dup) {	/* don't send sequence if its a duplicate */
+	MPI_Send(cur_seq_p, sizeof(struct seq_record),
+		 MPI_BYTE, snode, MSEQTYPE4, MPI_COMM_WORLD);
+	MPI_Send(cur_seq_p->aa1b, cur_seq_p->n1+1,
+		 MPI_BYTE, snode, MSEQTYPE5, MPI_COMM_WORLD);
+      }
+    }
+  }
+
+  /* reduce the number of idle workers */
+  num_workers_idle--;
+}
+
+/* wait_rbuf() -- wait for the worker threads to finish with the
+   current sequence buffers.
+*/
+void wait_rbuf(int used_reader_bufs) {
+  int snode;
+
+  while (num_workers_idle < max_worker_q) {
+    next_work_result(&snode);
+    num_workers_idle++;
+  }
+
+  /* all workers are idle, re-initialize work_q */
+  for (snode = 0; snode < max_worker_q; snode++) {
+    work_q[snode] = snode + FIRSTNODE;
+  }
+}
+
+void rbuf_done(int nthreads)
+{
+#ifdef MPI_SRC
+  MPI_Status mpi_status;
+#endif
+  int status, i;
+
+  /* use a dummy buf_head to send buf2_cnt=0, stop_work=1 */
+  struct buf2_hdr_s tmp_buf2_hdr;
+
+  tmp_buf2_hdr.stop_work = 1;
+  tmp_buf2_hdr.buf2_cnt = 0;
+
+  /* send a message to all the workers waiting for get_wbuf()
+     to quit
+   */
+ 
+  for (i=FIRSTNODE; i < nthreads+FIRSTNODE; i++) {
+    MPI_Send(&tmp_buf2_hdr, sizeof(struct buf2_hdr_s), MPI_BYTE, i,
+	     MSEQTYPE0, MPI_COMM_WORLD);
+  }
+}
+
+/* get_wbuf() -- called in workers
+   get a buffer full of sequences to be compared from the main program
+
+   this function should follow put_rbuf() message for message
+
+   In the PCOMPLIB version, there is no queue of buffers to be read,
+   but we must have space to put the messages in as we receive them,
+   and we must fix the pointers in the seq_records
+ */
+
+int get_wbuf(struct buf_head **cur_buf, int max_work_buf)
+{
+#ifdef MPI_SRC
+  MPI_Status mpi_status;
+#endif
+
+  /* we need to preserve some sequence pointer information so it is not
+     over-written by the messages */
+
+  struct seq_record *seq_base, *cur_seq_p, *prev_seq_p, *old_host_seq_p, *host_seq_p;
+  struct buf2_data_s *cur_buf2_dp;
+  unsigned char *aa1b_start_save, *old_aa1b_start, *cur_aa1b;
+  unsigned char *host_aa1b, *old_host_aa1b;
+  int buf2_cnt, i, j, cur_n1, seqr_cnt;
+  int max_aa1b_size, aa1b_size_save;
+  int cur_aa1b_size;
+  int snode;
+
+  snode = (*cur_buf)->hdr.worker_idx;
+  seq_base = (*cur_buf)->hdr.seq_b;
+  aa1b_start_save = (*cur_buf)->hdr.aa1b_start;
+  max_aa1b_size = aa1b_size_save = (*cur_buf)->hdr.aa1b_size;
+
+  /* put invalid bytes in aa1b to check for transmission errors */
+  memset(aa1b_start_save, 127, aa1b_size_save);
+
+  MPI_Recv(&(*cur_buf)->hdr, sizeof(struct buf2_hdr_s), MPI_BYTE, 0, 
+	   MSEQTYPE0, MPI_COMM_WORLD, &mpi_status);
+
+  buf2_cnt = (*cur_buf)->hdr.buf2_cnt;
+  seqr_cnt = (*cur_buf)->hdr.seqr_cnt;
+
+  if (buf2_cnt <= 0 || (*cur_buf)->hdr.stop_work) { return 0; }
+
+  /* get the buf2_data array, which has seq_dup and ->seq records */
+  MPI_Recv((*cur_buf)->buf2_data, sizeof(struct buf2_data_s)*buf2_cnt,
+	   MPI_BYTE, 0, MSEQTYPE1, MPI_COMM_WORLD, &mpi_status);
+  
+#ifdef DEBUG
+  /*   fprintf(stderr,"[%d/get_wbuf] receiving %d/%d sequences\n",snode, buf2_cnt, seqr_cnt); */
+#endif
+
+  /* get seq_records (but not mseq_records, don't need them) */
+  if ((*cur_buf)->hdr.seq_record_continuous) {
+    MPI_Recv(seq_base, sizeof(struct seq_record)*seqr_cnt,
+	     MPI_BYTE, 0, MSEQTYPE2, MPI_COMM_WORLD, &mpi_status);
+
+    /* now get the sequence data */
+    MPI_Recv(aa1b_start_save, (*cur_buf)->hdr.aa1b_used+1,
+	     MPI_BYTE, 0, MSEQTYPE3, MPI_COMM_WORLD, &mpi_status);
+  
+    /* map the seq records back into buf2_data */
+    /* must check for duplicate sequence records, initialize buf2_data[i]->seq 
+       AND seq.aa1b in the same pass */
+
+    cur_buf2_dp = (*cur_buf)->buf2_data;
+    cur_seq_p = prev_seq_p = seq_base;
+
+    cur_aa1b = aa1b_start_save;
+    cur_aa1b_size = 0;
+
+    for (i=0; i < buf2_cnt; i++, cur_buf2_dp++) {
+      if (!cur_buf2_dp->seq_dup) {	/* not a duplicate */
+	cur_seq_p->aa1b = cur_aa1b;
+	cur_aa1b += cur_seq_p->n1 + 1;
+	cur_aa1b_size += cur_seq_p->n1 + 1;
+	cur_buf2_dp->seq = cur_seq_p++;
+      }
+      else {		/* duplicate */
+	cur_buf2_dp->seq = prev_seq_p;	/* point to the previous value */
+	prev_seq_p = cur_seq_p;
+      }
+    }
+
+    if (cur_aa1b_size != (*cur_buf)->hdr.aa1b_used) {
+      fprintf(stderr, "[%d] incorrect cur_aa1b_size: %d != %d [%d]\n",
+	      snode, cur_aa1b_size, (*cur_buf)->hdr.aa1b_used);
+    }
+  }
+  else { /* not continuous, get seq_records one at a time */
+    cur_seq_p = seq_base;
+    cur_aa1b = aa1b_start_save;
+    cur_buf2_dp = (*cur_buf)->buf2_data;
+    cur_aa1b_size = 0;
+    for (i=0; i < buf2_cnt; i++) {
+      /* get a seq record */
+      if (!(*cur_buf)->buf2_data[i].seq_dup) {	/* not a duplicate, so get it */
+	MPI_Recv(cur_seq_p, sizeof(struct seq_record),
+		 MPI_BYTE, 0, MSEQTYPE4, MPI_COMM_WORLD, &mpi_status);
+	/* get the sequence itself */
+	prev_seq_p = cur_seq_p;
+	cur_n1 = cur_seq_p->n1;
+	cur_aa1b_size += cur_n1+1;
+	if (cur_aa1b_size >= max_aa1b_size) {
+	  fprintf(stderr,"[get_wbuf:%d] -- receive buffer too small %d > %d\n",
+		  (*cur_buf)->hdr.worker_idx, cur_aa1b_size, max_aa1b_size);
+	  exit(1);
+	}
+
+	MPI_Recv(cur_aa1b, cur_n1+1, MPI_BYTE, 0, MSEQTYPE5, MPI_COMM_WORLD, &mpi_status);
+	cur_seq_p->aa1b = cur_aa1b;
+#ifdef DEBUG
+	if (cur_seq_p->adler32_crc != adler32(1L,cur_aa1b,cur_n1)) {
+	  fprintf(stderr," [get_wbuf:%d] -- adler32 mismatch; n1: %d\n",
+		  (*cur_buf)->hdr.worker_idx, cur_n1);
+	}
+#endif
+
+	cur_buf2_dp->seq = cur_seq_p++;
+	cur_aa1b += cur_n1+1;
+      }
+      else {	/* its a duplicate, so point to the original version */
+	cur_buf2_dp->seq = prev_seq_p;
+      }
+      cur_buf2_dp++;
+    }
+  }
+
+  /* restore the seq_b, aa1b_start that were over-written */
+  (*cur_buf)->hdr.seq_b = seq_base;
+  (*cur_buf)->hdr.aa1b_start = aa1b_start_save;
+  (*cur_buf)->hdr.aa1b_size = aa1b_size_save;
+
+  /*
+  for (i=0; i < buf2_cnt; i++) {
+    cur_seq_p = (*cur_buf)->buf2_data[i].seq;
+    if (check_seq_range(cur_seq_p->aa1b, cur_seq_p->n1, 50, "get_wbuf()")) {
+      fprintf(stderr, "[%d] (get_wbuf) range error at: %d/%d (seqr_cnt: %d)\n", 
+	      (*cur_buf)->hdr.worker_idx, i, buf2_cnt, seqr_cnt);
+    }
+  }
+  */
+
+  return 1;
+}
+
+/* put_wbuf() -- called in workers
+
+   In the PCOMPLIB version, there is no queue of buffers to be read,
+   so just send the buffer to the manager
+ */
+void put_wbuf(struct buf_head *cur_buf, int max_work_buf)
+{
+  int int_msg_b[4], i;
+  struct buf2_ares_s *buf2_ares_p;
+  struct a_res_str *cur_ares_p, *next_ares_p;
+#ifdef MPI_SRC
+  MPI_Status mpi_status;
+#endif
+
+  MPI_Send(&cur_buf->hdr, sizeof(struct buf_head), MPI_BYTE, 0,
+	   RES_TYPE0, MPI_COMM_WORLD);
+
+  if (!cur_buf->hdr.have_results) { return;}
+
+  /* have buf2_res type results */
+  if (cur_buf->hdr.buf2_type & (BUF2_DOWORK + BUF2_DOSHUF+BUF2_DOOPT)) {
+    MPI_Send(cur_buf->buf2_res, sizeof(struct buf2_res_s)*cur_buf->hdr.buf2_cnt, MPI_BYTE, 0,
+	     RES_TYPE1, MPI_COMM_WORLD);
+  }
+
+  /* have buf2_ares type results */
+  if (cur_buf->hdr.buf2_type & BUF2_DOALIGN) {
+    /* buf2_ares does not have much useful information, except have_ares and a chain of *a_res pointers.
+       so we need to:
+       (1) send have_ares
+       (2) send each part of the a_res chain individually
+    */
+       
+    buf2_ares_p = cur_buf->buf2_ares;
+    for (i=0; i < cur_buf->hdr.buf2_cnt; i++) {
+      int_msg_b[0] = buf2_ares_p->have_ares;
+      MPI_Send(int_msg_b, 1, MPI_INT, 0, ALN_TYPE0, MPI_COMM_WORLD);
+      if (buf2_ares_p->have_ares) {
+	/* (a) send the first one */
+	for (cur_ares_p = buf2_ares_p->a_res; cur_ares_p; cur_ares_p = cur_ares_p->next) {
+	  MPI_Send(cur_ares_p, sizeof(struct a_res_str), MPI_BYTE, 0, ALN_TYPE1, MPI_COMM_WORLD);
+	  MPI_Send(cur_ares_p->res, cur_ares_p->nres ,MPI_INT, 0, ALN_TYPE2, MPI_COMM_WORLD);
+	  if (cur_ares_p->aln_code) {
+	    MPI_Send(cur_ares_p->aln_code, cur_ares_p->aln_code_n+1 ,MPI_BYTE, 0, ALN_TYPE3, MPI_COMM_WORLD);
+	  }
+	  if (cur_ares_p->ann_code) {
+	    MPI_Send(cur_ares_p->ann_code, cur_ares_p->ann_code_n+1 ,MPI_BYTE, 0, ALN_TYPE3, MPI_COMM_WORLD);
+	  }
+	} /* done with a_res chain */
+
+	/* free the chain */
+	cur_ares_p = buf2_ares_p->a_res;
+	while (cur_ares_p) {
+	  if (cur_ares_p->aln_code) free(cur_ares_p->aln_code);
+	  if (cur_ares_p->ann_code) free(cur_ares_p->ann_code);
+	  if ((buf2_ares_p->have_ares & 0x1) && cur_ares_p->res) free(cur_ares_p->res);
+	  next_ares_p = cur_ares_p->next;
+	  free(cur_ares_p);
+	  cur_ares_p = next_ares_p;
+	}
+	buf2_ares_p->a_res = NULL;
+      } /* done with have_ares */
+      buf2_ares_p->have_ares = 0;	/* must be zero-ed out for later use */
+      buf2_ares_p++;
+    } /* done with buf2_ares[buf2_cnt] */
+  } /* done with BUF2_DOALIGN */
+} /* done with put_wbuf() */
diff --git a/src/pgsql_lib.c b/src/pgsql_lib.c
new file mode 100644
index 0000000..912fa06
--- /dev/null
+++ b/src/pgsql_lib.c
@@ -0,0 +1,631 @@
+/*  $Id: pgsql_lib.c 781 2011-06-20 10:31:40Z wrp $ */
+
+/* pgsql_lib.c copyright (c) 2004, 2014 by William R. Pearson and
+   The Rector & Visitors of the University of Virginia */
+/*
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing,
+     software distributed under this License is distributed on an "AS
+     IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+     express or implied.  See the License for the specific language
+     governing permissions and limitations under the License. 
+*/
+
+/* functions for opening, reading, seeking a pgsql database */
+
+/*
+  For the moment, this interface assumes that the file to be searched will
+  be specified in a single, long, string with 4 parts:
+
+  (1) a database open string. This string has four fields, separated by
+      whitespace (' \t'):
+        hostname:port dbname user password
+
+   '--' dashes at the beginning of lines are ignored -
+   thus the first line could be:
+   -- hostname:port dbname user password
+
+  (2) a database query string that will return an unique ID (not
+      necessarily numberic, but it must be < 12 characters as libstr[12]
+      is used) and a sequence string
+
+  (2a) a series of pgsql commands that do not generate results
+       starting with 'DO', followed by a select() statement.
+
+  (3) a database select string that will return a description
+      given a unique ID
+
+  (4) a database select string that well return a sequence given a
+      unique ID
+
+   Lines (3) and (4) are not required for pv34comp* libraries, but
+   line (2) must generate a complete description as well as a sequence.
+
+
+   18-July-2001
+   Additional syntax has been added to support multiline SQL queries.
+
+   If the host line begins with '+', then the SQL is openned on the same
+   connection as the previous SQL file.
+
+   If the host line contains '-' just before the terminal ';', then
+   the file will not produce any output.
+
+   This string can contain "\n". ";" are used to separate the four
+   functions, which must be specified in the order shown above.
+   The last (fourth) query must terminate with a ';'
+
+   19-July-2004
+
+   This file is designed for PostgreSQL, which uses a different syntax
+   for getting rows of data.  Specifically, a select statement must be
+   associated with a "cursor", so that one can fetch a single row.
+
+   This can be simply done with the statment:
+
+   DECLARE next_seq CURSOR FOR "select statement ..."
+
+   The need for a CURSOR complicates the getlib()/ranlib() design, which
+   assumes that ranlib() can set something up that getlib() can read.
+   This can be avoided by setting up an otherwise unnecessary cursor for
+   the ranlib statement that gets a sequence.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include <libpq-fe.h>
+#define PGSQL_LIB 17
+
+#include "defs.h"
+#include "mm_file.h"
+
+#define XTERNAL
+#include "uascii.h"
+#define EOSEQ 0
+/* #include "upam.h" */
+
+char *alloc_file_name(char *f_name);
+int pgsql_getlib(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+void pgsql_ranlib(char *, int, fseek_t, char *, struct lmf_str *m_fd);
+
+#define PGSQL_BUF 4096
+
+struct lmf_str *
+pgsql_openlib(char *sname, int ldnaseq, int *sascii) {
+  FILE *sql_file;
+  PGconn *conn;
+  PGresult *res;
+  char *tmp_str, *ttmp_str;
+  int tmp_str_len;
+  char *bp, *bps, *bdp, *tp, tchar;
+  int i, qs_len, qqs_len;
+  char *sql_db, *sql_host, *sql_dbname, *sql_user, *sql_pass;
+  char *sql_port;
+  char *sql_do;
+  int sql_do_cnt;
+  struct lmf_str *m_fptr;
+
+  /*  if (sql_reopen) return NULL; - should not be called for re-open */
+
+  tmp_str_len = PGSQL_BUF;
+  if ((tmp_str=(char *)calloc(tmp_str_len,sizeof(char)))==NULL) {
+    fprintf(stderr,"cannot allocate %d for pgSQL buffer\n",tmp_str_len);
+    return NULL;
+  }
+
+  if (sname[0] == '%') {
+    strncpy(tmp_str,sname+1,tmp_str_len);
+    tmp_str[sizeof(tmp_str)-1]='\0';
+  }
+  else {
+    if ((sql_file=fopen(sname,"r"))==NULL) {
+      fprintf(stderr," cannot open pgSQL file: %s\n",sname);
+      return NULL;
+    }
+
+    if ((qs_len=fread(tmp_str,sizeof(char),tmp_str_len-1,sql_file))<=0) {
+      fprintf(stderr," cannot read pgSQL file: %s\n",sname);
+      return NULL;
+    }
+    else  {
+      tmp_str[qs_len]='\0';
+      qqs_len = qs_len;
+      while (qqs_len >= tmp_str_len-1) {
+	tmp_str_len += PGSQL_BUF;
+	if ((tmp_str=(char *)realloc(tmp_str,tmp_str_len))==NULL) {
+	  fprintf(stderr,
+		  " cannot reallocate %d for pgSQL buffer\n",tmp_str_len);
+	  return NULL;
+	}
+	ttmp_str = &tmp_str[qqs_len];
+	if ((qs_len=fread(ttmp_str,sizeof(char),PGSQL_BUF,sql_file))<0) {
+	  fprintf(stderr," cannot read pgSQL file: %s\n",sname);
+	  return NULL;
+	}
+	ttmp_str[qs_len]='\0';
+	qqs_len += qs_len;
+      }
+    }
+    fclose(sql_file);
+  }
+
+  bps = tmp_str;
+  if ((bp=strchr(bps,';'))!=NULL) {
+    *bp='\0';
+    if ((sql_db=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+      fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
+	      (int)strlen(bps),bps);
+      return NULL;
+    }
+    /* have database name, parse the fields */
+    else {
+      strcpy(sql_db,bps);	/* strcpy OK because allocated strlen(bps) */
+      bps = bp+1;	/* points to next char after ';' */
+      while (isspace(*bps)) bps++;
+      *bp=';'; /* replace ; */
+      bp = sql_db;
+      while (*bp=='-') {*bp++ = ' ';}
+      sql_host = strtok(bp," \t\n");
+      if (sql_host[0]=='@') sql_host="";
+      sql_dbname = strtok(NULL," \t\n");
+      sql_user = strtok(NULL," \t\n");
+      if (sql_user[0]=='@') sql_user="";
+      sql_pass = strtok(NULL," \t\n");
+      if (sql_pass[0]=='@') sql_pass="";
+      if ((tp=strchr(sql_host,':'))!=NULL) {
+	sql_port = tp+1;
+	*tp='\0';
+      }
+      else sql_port = "";
+    }
+  }
+  else {
+    fprintf(stderr," cannot find database fields:\n%s\n",tmp_str);
+    return NULL;
+  }
+
+  /* we have all the info we need to open a database, allocate lmf_str */
+  if ((m_fptr = (struct lmf_str *)calloc(1,sizeof(struct lmf_str)))==NULL) {
+    fprintf(stderr," cannot allocate lmf_str (%ld) for %s\n",
+	    sizeof(struct lmf_str),sname);
+    return NULL;
+  }
+
+  /* have our struct, initialize it */
+
+  m_fptr->lb_name = alloc_file_name(sname);
+
+  m_fptr->sascii = sascii;
+
+  m_fptr->sql_db = sql_db;
+  m_fptr->getlib = pgsql_getlib;
+  m_fptr->ranlib = pgsql_ranlib;
+  m_fptr->mm_flg = 0;
+  m_fptr->sql_reopen = 0;
+  m_fptr->lb_type = PGSQL_LIB;
+
+  /* now open the database, if necessary */
+  conn = PQsetdbLogin(sql_host,
+		      sql_port,
+		      NULL,
+		      NULL,
+		      sql_dbname,
+		      sql_user,
+		      sql_pass);
+
+  if (PQstatus(conn) != CONNECTION_OK)     {
+    fprintf(stderr, "Connection to database '%s' failed.\n", PQdb(conn));
+    fprintf(stderr, "%s", PQerrorMessage(conn));
+    PQfinish(conn);
+    goto error_r;
+  }
+  else {
+    m_fptr->pgsql_conn = conn;
+    fprintf(stderr," Database %s opened on %s\n",sql_dbname,sql_host);
+  }
+
+  /* check for 'DO' command - copy to 'DO' string */
+  while (*bps == '-') { *bps++=' ';}
+  if (isspace(bps[-1]) && toupper(bps[0])=='D' &&
+      toupper(bps[1])=='O' && isspace(bps[2])) {
+    /* have some 'DO' commands */
+    /* check where the end of the last DO statement is */
+
+    sql_do_cnt = 1;	/* count up the number of 'DO' statements for later */
+    bdp=bps+3;
+    while ((bp=strchr(bdp,';'))!=NULL) {
+      tp = bp+2; /* skip ;\n */
+      while (isspace(*tp) || *tp == '-') {*tp++ = ' ';}
+      if (toupper(*tp)=='D' && toupper(tp[1])=='O' && isspace(tp[2])) {
+	sql_do_cnt++;		/* count the DO statements */
+	bdp = tp+3;		/* move to the next DO statement */
+      }
+      else break;
+    }
+    if (bp != NULL) {	/* end of the last DO, begin of select */
+      tchar = *(bp+1);
+      *(bp+1)='\0';		/* terminate DO strings */
+      if ((sql_do = calloc(strlen(bps)+1, sizeof(char)))==NULL) {
+	fprintf(stderr," cannot allocate %d for sql_do\n",(int)strlen(bps));
+	goto error_r;
+      }
+      else {
+	strcpy(sql_do,bps);
+	*(bp+1)=tchar;	/* replace missing ';' */
+      }
+      bps = bp+1;
+      while (isspace(*bps)) bps++;
+    }
+    else {
+      fprintf(stderr," terminal ';' not found: %s\n",bps);
+      goto error_r;
+    }
+    /* all the DO commands are in m_fptr->sql_do in the form: 
+     DO command1; DO command2; DO command3; */
+    bdp = sql_do;
+    while (sql_do_cnt-- && (bp=strchr(bdp,';'))!=NULL) {
+      /* do the pgsql statement on bdp+3 */
+      /* check for error */
+      *bp='\0';
+      res = PQexec(m_fptr->pgsql_conn,bdp+3);
+      if (PQresultStatus(res) != PGRES_COMMAND_OK) {
+	fprintf(stderr,"*** Error %s - query failed:\n%s\n",
+		PQerrorMessage(m_fptr->pgsql_conn), bdp+3);
+	PQclear(res);
+	goto error_r;
+      }
+      PQclear(res);
+
+      *bp=';';
+      bdp = bp+1;
+      while (isspace(*bdp)) bdp++;
+    }
+  }
+
+  /* copy 1st query field */
+  if ((bp=strchr(bps,';'))!=NULL) {
+    *bp='\0';
+    if ((m_fptr->sql_query=calloc(strlen(bps)+41,sizeof(char)))==NULL) {
+      fprintf(stderr, " cannot allocate space for query string [%d], %s\n",
+	      (int)strlen(bps),bps);
+      goto error_r;
+    }
+    /* have query, copy it */
+    else {
+      strncpy(m_fptr->sql_query,"DECLARE next_seq CURSOR FOR ",40);
+      strcat(m_fptr->sql_query,bps);
+      *bp=';'; /* replace ; */
+      bps = bp+1;
+      while(isspace(*bps)) bps++;
+    }
+  }
+  else {
+    fprintf(stderr," cannot find database query field:\n%s\n",tmp_str);
+    goto error_r;
+  }
+
+  /* copy get_desc field */
+  if ((bp=strchr(bps,';'))!=NULL) {
+    *bp='\0';
+    if ((m_fptr->sql_getdesc=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+      fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
+	      (int)strlen(bps),bps);
+      goto error_r;
+    }
+    /* have get_desc, copy it */
+    else {
+      strcpy(m_fptr->sql_getdesc,bps);
+      *bp=';'; /* replace ; */
+      bps = bp+1;
+      while(isspace(*bps)) bps++;
+    }
+  }
+  else {
+    fprintf(stderr," cannot find getdesc field:\n%s\n",tmp_str);
+    goto error_r;
+  }
+
+  if ((bp=strchr(bps,';'))!=NULL) { *bp='\0';}
+
+  if ((m_fptr->sql_getseq=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+    fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
+	    (int)strlen(bps),bps);
+    goto error_r;
+  }
+
+  if (strlen(bps) > 0) {
+    strcpy(m_fptr->sql_getseq,bps);
+  }
+  else {
+    fprintf(stderr," cannot find getseq field:\n%s\n",tmp_str);
+    return NULL;
+  }
+  if (bp!=NULL) *bp=';';
+
+  /* now do the fetch */
+
+  res = PQexec(m_fptr->pgsql_conn,"BEGIN;");
+  if (PQresultStatus(res) != PGRES_COMMAND_OK) {
+    fprintf(stderr,"*** Error %s - BEGIN failed:\n",
+	    PQerrorMessage(conn));
+    PQclear(res);
+    goto error_r;
+  }
+  PQclear(res);
+
+  res = PQexec(m_fptr->pgsql_conn, m_fptr->sql_query);
+  if (PQresultStatus(res) != PGRES_COMMAND_OK) {
+    fprintf(stderr,"*** Error %d:%s - query failed:\n%s\n",
+	    PQresultStatus(res),PQerrorMessage(conn), m_fptr->sql_query);
+    PQclear(res);
+    goto error_r;
+  }
+  PQclear(res);
+  m_fptr->pgsql_res=NULL;
+
+  return m_fptr;
+
+ error_r:
+  free(m_fptr->sql_getseq);
+  free(m_fptr->sql_getdesc);
+  free(m_fptr->sql_query);
+  free(m_fptr);
+  free(sql_db);
+  return NULL;
+}
+
+struct lmf_str *
+pgsql_reopen(struct lmf_str *m_fptr) {
+  m_fptr->sql_reopen = 1;
+  return m_fptr;
+}
+
+void
+pgsql_closelib(struct lmf_str *m_fptr) {
+
+  if (m_fptr == NULL) return;
+  if (m_fptr->pgsql_res != NULL) PQclear(m_fptr->pgsql_res);
+  PQfinish(m_fptr->pgsql_conn);
+  m_fptr->sql_reopen=0;
+}
+
+/*
+static char *sql_seq = NULL, *sql_seqp;
+static int sql_seq_len;
+*/
+
+int
+pgsql_getlib( unsigned char *seq,
+	      int maxs,
+	      char *libstr,
+	      int n_libstr,
+	      fseek_t *libpos,
+	      int *lcont,
+	      struct lmf_str *lm_fd,
+	      long *l_off)
+{
+  register unsigned char *cp, *seqp;
+  register int *ap;
+  unsigned char *seqm, *seqm1;
+  PGresult *res;
+
+  char *bp;
+  /*   int l_start, l_stop, len; */
+
+  seqp = seq;
+  seqm = &seq[maxs-9];
+  seqm1 = seqm-1;
+
+  ap = lm_fd->sascii;
+
+  if (*lcont==0) {
+    /* get a row, with UID, sequence */
+    *l_off = 1;
+
+    /* check to see if we already have a valid result */
+    if (lm_fd->pgsql_res==NULL) {
+      res = PQexec(lm_fd->pgsql_conn,"FETCH next_seq");
+      if (PQresultStatus(res) != PGRES_TUPLES_OK) {
+	fprintf(stderr,"*** Error %s - getlib FETCH failed:\n%s\n",
+		PQerrorMessage(lm_fd->pgsql_conn), lm_fd->sql_query);
+	PQclear(res);
+	lm_fd->pgsql_res = NULL;
+	*lcont = 0;
+	*seqp = EOSEQ;
+	return -1;
+      }
+    }
+    else {res = lm_fd->pgsql_res;}
+
+    if (PQntuples(res)>0) {
+      lm_fd->pgsql_res = res;
+      *libpos=(fseek_t)atol(PQgetvalue(res,0,0));
+	
+      *l_off = 1;
+      if (PQnfields(res) > 2 && (bp=strchr(PQgetvalue(res,0,2),'@'))!=NULL &&
+	  !strncmp(bp+1,"C:",2)) sscanf(bp+3,"%ld",l_off);
+
+      lm_fd->sql_seqp = PQgetvalue(res,0,1);
+    
+      /* because of changes in pgsql_ranlib(), it is essential that
+         libstr return the unique identifier; thus we must use
+         sql_row[0], not sql_row[2]. Using libstr as the UID allows
+         one to use any UID, not just numeric ones.  *libpos is not
+         used for pgsql libraries.
+      */
+
+      if (n_libstr <= MAX_UID) {
+	/* the normal case returns only GID/sequence */
+	strncpy(libstr,PQgetvalue(res,0,0),MAX_UID-1);
+	libstr[MAX_UID-1]='\0';
+      }
+      else {
+	/* here we do not use the UID in libstr, because we are not
+           going back into the db */
+	/* the PVM case also returns a long description */
+	if (PQnfields(res)>2) {
+	  strncpy(libstr,PQgetvalue(res,0,2),n_libstr-1);
+	}
+	else {
+	  strncpy(libstr,PQgetvalue(res,0,0),n_libstr-1);
+	}
+	libstr[n_libstr-1]='\0';
+      }
+    }
+    else {
+      PQclear(lm_fd->pgsql_res);
+      lm_fd->pgsql_res=NULL;
+      *lcont = 0;
+      *seqp = EOSEQ;
+      return -1;
+    }
+  }
+
+  for (cp=(unsigned char *)lm_fd->sql_seqp; seqp<seqm1 && *cp; ) {
+    if ((*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA &&
+	(*seqp++=ap[*cp++])<NA) continue;
+    --seqp;
+    if (*(cp-1)==0) break;
+  }
+  lm_fd->sql_seqp = (char *)cp;
+
+  if (seqp>=seqm1) (*lcont)++;
+  else {
+    *lcont=0;
+    PQclear(lm_fd->pgsql_res);
+    lm_fd->pgsql_res = NULL;
+  }
+
+  *seqp = EOSEQ;
+  /*   if ((int)(seqp-seq)==0) return 1; */
+  return (int)(seqp-seq);
+}
+
+void
+pgsql_ranlib(char *str,
+	     int cnt,
+	     fseek_t libpos,
+	     char *libstr,
+	     struct lmf_str *lm_fd
+	     )
+{
+  char tmp_query[1024], tmp_val[20];
+  PGresult *res;
+  char *bp;
+
+  str[0]='\0';
+
+  /* put the UID into the query string - cannot use sprintf because of
+     "%' etc */
+
+  /*   sprintf(tmp_query,lm_fd->sql_getdesc,libpos); */
+
+  if ((bp=strchr(lm_fd->sql_getdesc,'#'))==NULL) {
+    fprintf(stderr, "no KEY position in %s\n",lm_fd->sql_getdesc);
+    goto next1;
+  }
+  else {
+    *bp = '\0';
+    strncpy(tmp_query,lm_fd->sql_getdesc,sizeof(tmp_query));
+    tmp_query[sizeof(tmp_query)-1]='\0';
+    /*    sprintf(tmp_val,"%ld",(long)libpos); */
+    strncat(tmp_query,libstr,sizeof(tmp_query)-1);
+    strncat(tmp_query,bp+1,sizeof(tmp_query)-1);
+    *bp='#';
+    lm_fd->lpos = libpos;
+  }
+
+  /*  fprintf(stderr," requesting: %s\n",tmp_query); */
+
+  if (lm_fd->pgsql_res !=NULL) {
+    PQclear(lm_fd->pgsql_res);
+    lm_fd->pgsql_res = NULL;
+  }
+
+  res = PQexec(lm_fd->pgsql_conn,tmp_query);
+  if (PQresultStatus(res) != PGRES_TUPLES_OK) {
+    lm_fd->pgsql_res = NULL;
+
+    sprintf(str,"gi|%ld ***Error - query failed***",(long)libpos);
+    fprintf(stderr,"*** Error %s - ranlib DESC failed:\n%s\n",
+	    PQerrorMessage(lm_fd->pgsql_conn), tmp_query);
+    PQclear(res);
+    goto next1;
+  }
+
+  if (PQntuples(res)<=0) {
+/*     fprintf(stderr,"*** Error = use result failed\n%s\n", 
+	   pgsql_error(lm_fd->pgsql_conn)); */
+    sprintf(str,"gi|%ld ***use result failed***",(long)libpos);
+    goto next0;
+  }
+
+  if (PQgetvalue(res,0,1)!= NULL) strncpy(str,PQgetvalue(res,0,1),cnt-1);
+  else strncpy(str,PQgetvalue(res,0,0),cnt-1);
+  str[cnt-1]='\0';
+  /* change this later to support multiple row returns */
+  /*
+  while (strlen(str) < cnt-1 &&
+	 (lm_fd->sql_row = pgsql_fetch_row(lm_fd->pgsql_res))!=NULL) {
+    strncat(str," ",cnt-2-strlen(str));
+    if (lm_fd->sql_row[1]!=NULL) 
+      strncat(str,lm_fd->sql_row[1],cnt-2-strlen(str));
+    else break;
+  }
+  */
+
+  str[cnt-1]='\0';
+  if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
+  if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+
+ next0:
+  PQclear(res);
+ next1: 
+  lm_fd->pgsql_res = NULL;
+
+  /* get the sequence, set up for pgsql_getseq() */
+  /* put the UID into the query string */
+
+  if ((bp=strchr(lm_fd->sql_getseq,'#'))==NULL) {
+    fprintf(stderr, "no GID position in %s\n",lm_fd->sql_getseq);
+    return;
+  }
+  else {
+    *bp = '\0';
+    strncpy(tmp_query,lm_fd->sql_getseq,sizeof(tmp_query));
+    tmp_query[sizeof(tmp_query)-1]='\0';
+    /*    sprintf(tmp_val,"%ld",(long)libpos); */
+    strncat(tmp_query,libstr,sizeof(tmp_query));
+    strncat(tmp_query,bp+1,sizeof(tmp_query));
+    *bp='#';
+  }
+
+  res = PQexec(lm_fd->pgsql_conn,tmp_query);
+  if (PQresultStatus(res) != PGRES_TUPLES_OK) {
+    PQclear(res);
+    lm_fd->pgsql_res = NULL;
+    fprintf(stderr,"*** Error - ranlib SEQ failed:\n%s\n%s\n",tmp_query,
+	    PQerrorMessage(lm_fd->pgsql_conn));
+    exit(1);
+  }
+  else {
+    lm_fd->pgsql_res = res;
+  }
+}
diff --git a/src/print_pssm.c b/src/print_pssm.c
new file mode 100644
index 0000000..f2db3d5
--- /dev/null
+++ b/src/print_pssm.c
@@ -0,0 +1,793 @@
+/* print_pssm.c - 21-Jan-2005  */
+
+/* $Id: print_pssm.c 1111 2013-01-09 18:46:57Z wrp $  */
+/* $Revision: 1111 $  */
+
+/* copyright (c) 2005, 2014 - William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/*
+   read a binary PSSM checkpoint file from blastpgp, and produce an ascii
+   formatted file
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <math.h>
+#include <string.h>
+#ifdef UNIX
+#include <getopt.h>
+#else
+extern int optind;		/* used by getopt() */
+extern char *optarg;
+#endif
+
+#include "defs.h"
+#include "param.h"
+
+#include "uascii.h"
+#include "upam.h"
+
+void initenv(int, char **, struct pstruct *, char *);
+void read_pssm(unsigned char *aa0, int n0, int nsq, double pamscale, 
+	       FILE *fp, int pgpf_type, struct pstruct *ppst);
+int
+read_asn_pssm(unsigned char *aa0, int n0, int nsq,
+	      double pamscale, FILE *fp, struct pstruct *ppst);
+void alloc_pam();
+int **alloc_pam2p();
+void initpam2();
+void init_ascii0(int *xascii, char *sq_map, int n_sq_map, struct pstruct *ppst);
+void fill_pam();
+double get_lambda(int **pam2p, int n0, int nsq, char *aa0);
+
+int standard_pam(char *smstr, struct pstruct *ppst, int del_set, int gap_set);
+int getseq(char *filen, int *qascii, unsigned char *seq, int maxs, char *libstr,
+	   int n_libstr, long *sq0off);
+int initpam (char *mfname, struct pstruct *ppst);
+int karlin(int , int, double *, double *, double *);
+
+int
+main(int argc, char **argv) {
+
+  unsigned char *aa0;
+  char libstr[MAX_FN];
+  char qname[MAX_FN];
+  long sq0off;
+  int i, n0;
+  FILE *fp;
+  struct pstruct pst, *ppst;
+
+  /* stuff from initfa.c/h_init() */
+
+  memcpy(qascii,aascii,sizeof(qascii));
+
+  /* initialize a pam matrix */
+  ppst = &pst;
+  strncpy(ppst->pamfile,"BP62",MAX_FN);
+  standard_pam(ppst->pamfile,ppst,0,0);
+
+  /* this is always protein by default */
+  ppst->nsq = naa;
+  ppst->nsqx = naax;
+  for (i=0; i<=ppst->nsqx; i++) {
+    ppst->sq[i] = NCBIstdaa[i];
+  }
+  ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0';
+
+  if ((aa0 = calloc(MAXTST,sizeof(char)))==NULL) {
+    fprintf(stderr,"Cannot allocate aa0\n");
+    exit(1);
+  }
+
+  initenv(argc, argv, &pst, qname);
+
+  if (argc < 1) {
+    fprintf(stderr,"usage -- print_pssm -P \"pssm.asn 2\" query_file\n");
+    exit(1);
+  }
+
+  alloc_pam(pst.nsq+1,pst.nsq+1, &pst);
+  initpam2(&pst);
+  init_ascii0(qascii, NCBIstdaa_ext, NCBIstdaa_ext_n, &pst);
+
+  n0 = getseq (qname, qascii, aa0, MAXTST, libstr, sizeof(libstr), &sq0off);
+
+  if (!pst.pam_pssm) {
+    fprintf(stderr," ** ERROR ** No -P PSSM provided\n");
+  }
+  else {
+    pst.pam2p[0] = alloc_pam2p(n0,pst.nsq);
+    pst.pam2p[1] = alloc_pam2p(n0,pst.nsq);
+
+    if ((pst.pgpfile_type == 0) && (fp=fopen(pst.pgpfile,"rb"))) {
+      read_pssm(aa0, n0, pst.nsq, pst.pamscale, fp, 0, &pst);
+    }
+    else if ((pst.pgpfile_type == 1) && (fp=fopen(pst.pgpfile,"r"))) {
+      read_pssm(aa0, n0, pst.nsq, pst.pamscale, fp, 1, &pst);
+    }
+    else if ((pst.pgpfile_type == 2) && (fp=fopen(pst.pgpfile,"rb"))) {
+      if (read_asn_pssm(aa0, n0, pst.nsq, pst.pamscale, fp, &pst)<=0)
+	fprintf(stderr," Could not parse PSSM file: %s\n",pst.pgpfile);
+	pst.pam_pssm = 0;
+	exit(1);
+    }
+  }
+}
+
+void
+initenv(int argc, char **argv, struct pstruct *ppst, char *qname) {
+  char copt;
+  char *bp;
+
+  pascii = aascii;
+
+  while ((copt = getopt(argc, argv, "P:s:"))!=EOF) {
+    switch (copt) {
+      case 'P':
+	if ((bp=(strchr(optarg,' ')))!=NULL) {
+	  *bp = '\0';
+	  ppst->pgpfile_type = atoi(bp+1);
+	}
+	strncpy(ppst->pgpfile,optarg,MAX_FN);
+	ppst->pgpfile[MAX_FN-1]='\0';
+	ppst->pam_pssm = 1;
+	break;
+
+      case 's':
+	strncpy (ppst->pamfile, optarg, 120);
+	ppst->pamfile[120-1]='\0';
+	if (!standard_pam(ppst->pamfile,ppst,0, 0)) {
+	  initpam (ppst->pamfile, ppst);
+	}
+	ppst->pam_set=1;
+	break;
+    }
+  }
+  optind--;
+
+  if (argc - optind > 1) strncpy(qname, argv[optind+1], MAX_FN);
+}
+
+
+/*
+   *aa0 - query sequence
+   n0   - length
+   pamscale - scaling for pam matrix - provided by apam.c, either
+              0.346574 = ln(2)/2 (P120, BL62) or
+	      0.231049 = ln(2)/3 (P250, BL50) 
+*/
+
+#define N_EFFECT 20
+
+void
+read_pssm(unsigned char *aa0, int n0, int nsq, double pamscale, FILE *fp, int pgf_type, struct pstruct *ppst) {
+  int i, j, len;
+  int qi, rj;
+  int **pam2p;
+  int first, too_high;
+  char *query;
+  double freq, **freq2d, lambda, new_lambda;
+  double scale, scale_high, scale_low;
+
+  pam2p = ppst->pam2p[0];
+
+  if(1 != fread(&len, sizeof(int), 1, fp)) {
+    fprintf(stderr, "error reading from checkpoint file: %d\n", len);
+    exit(1);
+  }
+
+  if(len != n0) {
+    fprintf(stderr, "profile length (%d) and query length (%d) don't match!\n",
+	    len,n0);
+    exit(1);
+  }
+
+  /* read over query sequence stored in BLAST profile */
+  if(NULL == (query = (char *) calloc(len, sizeof(char)))) {
+    fprintf(stderr, "Couldn't allocate memory for query!\n");
+    exit(1);
+  }
+
+  if(len != fread(query, sizeof(char), len, fp)) {
+    fprintf(stderr, "Couldn't read query sequence from profile: %s\n", query);
+    exit(1);
+  }
+
+  printf("%d\n%s\n",len,query);
+
+  /* currently we don't do anything with query; ideally, we should
+     check to see that it actually matches aa0 ... */
+
+  /* quick 2d array alloc: */
+  if((freq2d = (double **) calloc(n0, sizeof(double *))) == NULL) {
+    fprintf(stderr, "Couldn't allocate memory for frequencies!\n");
+    exit(1);
+  }
+
+  if((freq2d[0] = (double *) calloc(n0 * N_EFFECT, sizeof(double))) == NULL) {
+    fprintf(stderr, "Couldn't allocate memory for frequencies!\n");
+    exit(1);
+  }
+
+  /* a little pointer arithmetic to fill out 2d array: */
+  for (qi = 1 ; qi < n0 ; qi++) {
+    freq2d[qi] = freq2d[0] + (N_EFFECT * qi);
+  }
+
+  printf(" rrtotal: %ld; pamscale=%0.5f\n", rrtotal, pamscale);
+
+  printf("   ");
+  for (rj = 1; rj <= N_EFFECT; rj++ ) {
+    printf(" %c    ",NCBIstdaa[rj]);
+  }
+  printf("\n");
+
+  for (qi = 0 ; qi < n0 ; qi++) {
+    printf("%c",query[qi]);
+    for (rj = 0 ; rj < N_EFFECT ; rj++) {
+      if(1 != fread(&freq, sizeof(double), 1, fp)) {
+	fprintf(stderr, "Error while reading frequencies!\n");
+	exit(1);
+      }
+      /* printf(" %8.7g",freq*10.0); */
+
+      if (freq > 1e-12) {
+	freq = log(freq /((double) (rrcounts[rj+1])/(double) rrtotal));
+	freq /= pamscale; /* this gets us close to originial pam scores */
+	freq2d[qi][rj] = freq;
+      }
+      else {freq2d[qi][rj] = freq;}
+      printf(" %5.2f",freq);
+    }
+    printf("\n");
+  }
+
+
+  /* now figure out the right scale */
+  scale = 1.0;
+  lambda = get_lambda(ppst->pam2[0], 20, 20, "\0ARNDCQEGHILKMFPSTWYV");
+
+  /* should be near 1.0 because of our initial scaling by ppst->pamscale */
+  fprintf(stderr, "real_lambda: %g\n", lambda);
+
+  /* get initial high/low scale values: */
+  first = 1;
+  while (1) {
+    fill_pam(pam2p, n0, 20, freq2d, scale);
+    new_lambda = get_lambda(pam2p, n0, 20, query); 
+    fprintf(stderr, "new_lambda: %g; scale: %g\n",new_lambda,scale);
+
+    if (new_lambda > lambda) {
+      if (first) {
+	first = 0;
+	scale = scale_high = 1.0 + 0.05;
+	scale_low = 1.0;
+	too_high = 1;
+      } else {
+	if (!too_high) break;
+	scale = (scale_high += scale_high - 1.0);
+      }
+    } else if (new_lambda > 0) {
+      if (first) {
+	first = 0;
+	scale_high = 1.0;
+	scale = scale_low = 1.0 - 0.05;
+	too_high = 0;
+      } else {
+	if (too_high) break;
+	scale = (scale_low += scale_low - 1.0);
+      }
+    } else {  /* new_lambda <= 0 */
+      fprintf(stderr, "new_lambda (%g) <= 0; matrix has positive average score", new_lambda);
+      exit(1);
+    }
+  }
+
+  /* now do binary search between low and high */
+  for (i = 0 ; i < 10 ; i++) {
+    scale = 0.5 * (scale_high + scale_low);
+    fill_pam(pam2p, n0, 20, freq2d, scale);
+    new_lambda = get_lambda(pam2p, n0, 20, query);
+    fprintf(stderr, "it: %d - new_lambda: %g; scale: %g\n",i,new_lambda,scale);
+    
+    if (new_lambda > lambda) scale_low = scale;
+    else scale_high = scale;
+  }
+
+  scale = 0.5 * (scale_high + scale_low);
+  fill_pam(pam2p, n0, 20, freq2d, scale);
+
+  fprintf(stderr, "final scale: %g\n", scale);
+
+  fprintf(stderr,"         ");
+  for (rj = 1; rj <= N_EFFECT+3; rj++ ) {
+    fprintf(stderr,"   %c",NCBIstdaa[rj]);
+  }
+  fprintf(stderr,"\n");
+
+  for (qi = 0 ; qi < n0 ; qi++) {
+    fprintf(stderr, "%4d %c:  ", qi+1, query[qi]);
+    for (rj = 1 ; rj <= N_EFFECT+3 ; rj++) {
+      fprintf(stderr, "%4d", pam2p[qi][rj]);
+    }
+    fprintf(stderr, "\n");
+  }
+
+  free(freq2d[0]);
+  free(freq2d);
+
+  free(query);
+}
+
+/*
+ * alloc_pam(): allocates memory for the 2D pam matrix as well
+ * as for the integer array used to transmit the pam matrix
+ */
+void
+alloc_pam (int d1, int d2, struct pstruct *ppst)
+{
+  int     i, *d2p;
+
+
+  if ((ppst->pam2[0] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
+     fprintf(stderr,"Cannot allocate 2D pam matrix: %d",d1);
+     exit(1);
+  }
+
+  if ((ppst->pam2[1] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
+     fprintf(stderr,"Cannot allocate 2D pam matrix: %d",d1);
+     exit(1);
+  }
+
+  if ((d2p = (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {
+     fprintf(stderr,"Cannot allocate 2D pam matrix: %d",d1);
+     exit(1);
+   }
+
+   for (i = 0; i < d1; i++, d2p += d2)
+      ppst->pam2[0][i] = d2p;
+
+   if ((d2p= (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {
+     fprintf(stderr,"Cannot allocate 2d pam matrix: %d",d2);
+     exit(1);
+   }
+
+   for (i = 0;  i < d1; i++, d2p += d2)
+      ppst->pam2[1][i] = d2p;
+}
+
+void
+fill_pam(int **pam2p, int n0, int nsq, double **freq2d, double scale) {
+  int i, j, n_j;
+  double freq;
+
+  /* fprintf(stderr, "scale: %g\n", scale); */
+  
+  /* now fill in the pam matrix: */
+  for (j = 1 ; j <=nsq ; j++) {
+    n_j = qascii[pssm_aa[j]];
+    for (i = 0 ; i < n0 ; i++) {
+      freq = scale * freq2d[i][j-1];
+      if ( freq < 0.0) freq -= 0.5;
+      else freq += 0.5;
+      pam2p[i][n_j] = (int)freq;
+    }
+  }
+}
+
+/*
+ *  initpam2(struct pstruct pst): Converts 1-D pam matrix to 2-D
+ */
+void initpam2 (struct pstruct *ppst)
+{
+   int i, j, k, nsq, pam_xx, pam_xm;
+   int sa_x, sa_t, tmp;
+
+   nsq = ppst->nsq;
+   sa_x = pascii['X'];
+   sa_t = pascii['*'];
+
+   ppst->pam2[0][0][0] = -BIGNUM;
+   ppst->pam_h = -1; ppst->pam_l = 1;
+
+   k = 0;
+   for (i = 1; i <= nsq; i++) {
+     ppst->pam2[0][0][i] = ppst->pam2[0][i][0] = -BIGNUM;
+     for (j = 1; j <= i; j++) {
+       ppst->pam2[0][j][i] = ppst->pam2[0][i][j] = pam[k++] - ppst->pamoff;
+       if (ppst->pam_l > ppst->pam2[0][i][j]) ppst->pam_l =ppst->pam2[0][i][j];
+       if (ppst->pam_h < ppst->pam2[0][i][j]) ppst->pam_h =ppst->pam2[0][i][j];
+     }
+   }
+
+   ppst->nt_align = (ppst->dnaseq== SEQT_DNA || ppst->dnaseq == SEQT_RNA);
+
+   if (ppst->dnaseq == SEQT_RNA) {
+     tmp = ppst->pam2[0][nascii['G']][nascii['G']] - 1;
+     ppst->pam2[0][nascii['A']][nascii['G']] = 
+       ppst->pam2[0][nascii['C']][nascii['T']] = 
+       ppst->pam2[0][nascii['C']][nascii['U']] = tmp;
+   }
+
+   if (ppst->pam_x_set) {
+     for (i=1; i<=nsq; i++) {
+       ppst->pam2[0][sa_x][i] = ppst->pam2[0][i][sa_x]=ppst->pam_xm;
+       ppst->pam2[0][sa_t][i] = ppst->pam2[0][i][sa_t]=ppst->pam_xm;
+     }
+     ppst->pam2[0][sa_x][sa_x]=ppst->pam_xx;
+     ppst->pam2[0][sa_t][sa_t]=ppst->pam_xm;
+   }
+   else {
+     ppst->pam_xx = ppst->pam2[0][sa_x][sa_x];
+     ppst->pam_xm = ppst->pam2[0][1][sa_x];
+   }
+}
+
+double
+get_lambda(int **pam2p, int n0, int nsq, char *aa0) {
+  double lambda, H;
+  double *pr, tot, sum;
+  int aa0i;
+  int i, ioff, j, min, max;
+
+  /* get min and max scores */
+  min = BIGNUM;
+  max = -BIGNUM;
+  if(pam2p[0][1] == -BIGNUM) {
+    ioff = 1;
+    n0++;
+  } else {
+    ioff = 0;
+  }
+
+  for (i = ioff ; i < n0 ; i++) {
+    for (j = 1; j <= nsq ; j++) {
+      if (min > pam2p[i][j])
+	min = pam2p[i][j];
+      if (max < pam2p[i][j])
+	max = pam2p[i][j];
+    }
+  }
+
+  fprintf(stderr, "min: %d\tmax:%d\n", min, max);
+  
+  if ((pr = (double *) calloc(max - min + 1, sizeof(double))) == NULL) {
+    fprintf(stderr, "Couldn't allocate memory for score probabilities: %d\n", max - min + 1);
+    exit(1);
+  }
+
+  tot = (double) rrtotal * (double) rrtotal * (double) n0;
+  for (i = ioff ; i < n0 ; i++) {
+    if (aa0[i] < 'A') {aa0i = aa0[i];}
+    else {aa0i = aascii[aa0[i]];}
+    for (j = 1; j <= nsq ; j++) {
+      pr[pam2p[i][j] - min] +=
+	(double) ((double) rrcounts[aa0i] * (double) rrcounts[j]) / tot;
+    }
+  }
+
+  sum = 0.0;
+  for(i = 0 ; i <= max-min ; i++) { 
+    sum += pr[i];
+    /*    fprintf(stderr, "%3d: %g %g\n", i+min, pr[i], sum); */
+  }
+  /* fprintf(stderr, "pr[] sum: %g\n", sum); */
+
+  for(i = 0 ; i <= max-min ; i++) { pr[i] /= sum; }
+
+  if (!karlin(min, max, pr, &lambda, &H)) {
+    fprintf(stderr, "Karlin lambda estimation failed\n");
+  }
+
+  /*   fprintf(stderr, "lambda: %g\n", lambda); */
+  free(pr);
+
+  return lambda;
+}
+
+int **
+alloc_pam2p(int len, int nsq) {
+  int i;
+  int **pam2p;
+
+  if ((pam2p = (int **)calloc(len,sizeof(int *)))==NULL) {
+    fprintf(stderr," Cannot allocate pam2p: %d\n",len);
+    return NULL;
+  }
+
+  if((pam2p[0] = (int *)calloc((nsq+1)*len,sizeof(int)))==NULL) {
+    fprintf(stderr, "Cannot allocate pam2p[0]: %d\n", (nsq+1)*len);
+    free(pam2p);
+    return NULL;
+  }
+
+  for (i=1; i<len; i++) {
+    pam2p[i] = pam2p[0] + (i*(nsq+1));
+  }
+
+  return pam2p;
+}
+
+void free_pam2p(int **pam2p) {
+  if (pam2p) {
+    free(pam2p[0]);
+    free(pam2p);
+  }
+}
+
+int
+parse_pssm_asn_fa(FILE *afd, int *n_rows, int *n_cols,
+		  unsigned char **query, double ***wfreqs,double ***freqs, int ***iscores,
+		  char *matrix, int *gap_open, int *gap_extend,
+		  double *lambda);
+
+/* the ASN.1 pssm includes information about the scoring matrix used
+   (though not the gap penalty in the current version PSSM:2) The PSSM
+   scoring matrix and gap penalties should become the default if they
+   have not been set explicitly.
+*/
+
+/* read the PSSM from an open FILE *fp - but nothing has been read
+   from *fp */
+
+int
+read_asn_pssm(unsigned char *aa0, int n0, int nsq,
+	      double pamscale, FILE *fp, struct pstruct *ppst) {
+
+  int i, j, len, k, itmp;
+  int qi, rj;	/* qi - index query; rj - index residues (1-20) */
+  int **pam2p;
+  int first, too_high;
+  char *query, ctmp;
+  char dline[512];
+  char matrix[MAX_SSTR];
+  double psi2_lambda;
+  double freq, **wfreq2d, **freq2d, lambda, new_lambda;
+  double scale, scale_high, scale_low;
+  int **iscores2d;
+  int gap_open, gap_extend;
+  int n_rows, n_cols;
+
+  pam2p = ppst->pam2p[0];
+
+  if (parse_pssm_asn_fa(fp, &n_rows, &n_cols, (unsigned char **)&query, &wfreq2d, &freq2d, &iscores2d,
+			matrix, &gap_open, &gap_extend, &psi2_lambda)<=0) {
+    return -1;
+  }
+
+  if (!query) { query = (char *)aa0;}
+
+  if (gap_open) {
+    if (gap_open > 0) {gap_open = -gap_open;}
+    ppst->gdelval = gap_open;
+  }
+  else if (strncmp(matrix,"BLOSUM62",8)==0) {
+    ppst->gdelval = -11;
+  }
+
+  if (gap_extend) {
+    if (gap_extend > 0) {gap_extend = -gap_extend;}
+    ppst->ggapval = gap_extend;
+  }
+  else if (strncmp(matrix,"BLOSUM62",8)==0) {
+    ppst->ggapval = -1;
+  }
+
+  if (strncmp(matrix, "BLOSUM62", 8)== 0 && !ppst->pam_set) {
+    strncpy(ppst->pamfile, "BP62", 120);
+    strncpy(ppst->pamfile_save, ppst->pamfile, 120);
+    standard_pam(ppst->pamfile,ppst,0, 0);
+    if (!ppst->have_pam2) {
+     alloc_pam (MAXSQ, MAXSQ, ppst);
+    }
+    initpam2(ppst);
+    ppst->pam_set = 1;
+  }
+
+  if (n_cols < n0) { 
+    fprintf(stderr, " query length: %d != n_cols: %d\n",n0, n_cols);
+    exit(1);
+  }
+
+  printf(" rrtotal: %ld; pamscale=%0.5f\n", rrtotal, pamscale);
+
+  printf("        ");
+  for (rj = 1; rj <= N_EFFECT; rj++ ) {
+    printf(" %c    ",NCBIstdaa[rj]);
+  }
+  printf("\n");
+  for (qi = 0 ; qi < n0 ; qi++) {
+    printf("%3d %c",qi+1, NCBIstdaa[aa0[qi]]);
+    for (rj = 0 ; rj < N_EFFECT ; rj++) {
+      freq = freq2d[qi][rj];
+/*    printf(" %8.7g",freq*10.0); */
+
+      if (freq > 1e-12) {
+	freq = log(freq /((double) (rrcounts[rj+1])/(double) rrtotal));
+	freq /= pamscale; /* this gets us close to originial pam scores */
+	freq2d[qi][rj] = freq;
+      }
+      printf(" %5.2f",freq);
+    }
+    printf("\n");
+  }
+
+  /* now figure out the right scale */
+  scale = 1.0;
+  lambda = get_lambda(ppst->pam2[0], 20, 20, "\0ARNDCQEGHILKMFPSTWYV");
+
+  /* should be near 1.0 because of our initial scaling by ppst->pamscale */
+  fprintf(stderr, "real_lambda: %g\n", lambda);
+
+  /* get initial high/low scale values: */
+  first = 1;
+  while (1) {
+    fill_pam(pam2p, n0, 20, freq2d, scale);
+    new_lambda = get_lambda(pam2p, n0, 20, query); 
+    fprintf(stderr, " new_lambda: %g; scale: %g\n",new_lambda,scale);
+
+    if (new_lambda > lambda) {
+      if (first) {
+	first = 0;
+	scale = scale_high = 1.0 + 0.05;
+	scale_low = 1.0;
+	too_high = 1;
+      } else {
+	if (!too_high) break;
+	scale = (scale_high += scale_high - 1.0);
+      }
+    } else if (new_lambda > 0) {
+      if (first) {
+	first = 0;
+	scale_high = 1.0;
+	scale = scale_low = 1.0 - 0.05;
+	too_high = 0;
+      } else {
+	if (too_high) break;
+	scale = (scale_low += scale_low - 1.0);
+      }
+    } else {
+      fprintf(stderr, "new_lambda (%g) <= 0; matrix has positive average score", new_lambda);
+      exit(1);
+    }
+  }
+
+  /* now do binary search between low and high */
+  for (i = 0 ; i < 10 ; i++) {
+    scale = 0.5 * (scale_high + scale_low);
+    fill_pam(pam2p, n0, 20, freq2d, scale);
+    new_lambda = get_lambda(pam2p, n0, 20, query);
+    fprintf(stderr, "it: %d - new_lambda: %g; scale: %g\n",i,new_lambda,scale);
+    
+    if (new_lambda > lambda) scale_low = scale;
+    else scale_high = scale;
+  }
+
+  scale = 0.5 * (scale_high + scale_low);
+  fill_pam(pam2p, n0, 20, freq2d, scale);
+
+  fprintf(stderr, "final scale: %g\n", scale);
+
+  fprintf(stderr,"        ");
+  for (rj = 1; rj <= N_EFFECT+3; rj++ ) {
+    fprintf(stderr,"  %c",NCBIstdaa[rj]);
+  }
+  fprintf(stderr,"\n");
+
+  for (qi = 0 ; qi < n0 ; qi++) {
+    fprintf(stderr, "%4d %c: ", qi+1, NCBIstdaa[aa0[qi]]);
+    for (rj = 1 ; rj <= N_EFFECT+3 ; rj++) {
+      fprintf(stderr, "%3d", pam2p[qi][rj]);
+    }
+    fprintf(stderr, "\n");
+  }
+
+  if (iscores2d != NULL) {
+    fprintf(stderr,"         ");
+    for (rj = 1; rj <= N_EFFECT+3; rj++ ) {
+      fprintf(stderr," %c ",NCBIstdaa[rj]);
+    }
+    fprintf(stderr,"\n");
+    for (qi = 0 ; qi < n0 ; qi++) {
+      fprintf(stderr, "%4d %c: ", qi+1, NCBIstdaa[aa0[qi]]);
+      for (rj = 1 ; rj <= N_EFFECT+3 ; rj++) {
+	itmp = iscores2d[qi][rj];
+	if (itmp < -256) itmp=0;
+	fprintf(stderr, "%3d", itmp );
+      }
+      fprintf(stderr, "\n");
+    }
+    free(iscores2d[0]);
+    free(iscores2d);
+  }
+
+  if (wfreq2d != NULL) {
+    free(wfreq2d[0]);
+    free(wfreq2d);
+  }
+
+  if (freq2d != NULL) {
+    free(freq2d[0]);
+    free(freq2d);
+  }
+
+  free(query);
+  return 1;
+}
+
+void
+init_altpam(struct pstruct *ppst) {
+  int ix_i, ix_l, ix_j, p_i, p_j, i;
+
+  /* add values for 'J' (I/L) value, which are not present in 1-D matrices */
+    ix_i = pascii['I'];
+    ix_l = pascii['L'];
+    ix_j = pascii['J'];
+    if (strchr(pam_sq,'J')==NULL) {
+      ppst->pam2[0][ix_j][0] = ppst->pam2[0][0][ix_j] = -BIGNUM;
+      /* get the identities */
+      ppst->pam2[0][ix_j][ix_j] =
+	max(ppst->pam2[0][ix_i][ix_i],ppst->pam2[0][ix_l][ix_l]);
+      for (i=1; i < pam_sq_n; i++) {
+	p_i = pascii[pam_sq[i]];
+	/* do not assume symmetric matrices */
+	ppst->pam2[0][ix_j][p_i] =
+	  max(ppst->pam2[0][ix_i][p_i],ppst->pam2[0][ix_l][p_i]);
+	ppst->pam2[0][p_i][ix_j] =
+	  max(ppst->pam2[0][p_i][ix_i],ppst->pam2[0][p_i][ix_l]);
+      }
+    }
+    /* add values for 'O' (K) value, which are not present in 1-D matrices */
+    ix_i = pascii['K'];
+    ix_j = pascii['O'];  
+      if (ix_j < ppst->nsq) {	/* is it in the NCBIstdaa alphabet ? */
+      ppst->pam2[0][ix_j][0] = ppst->pam2[0][0][ix_j] = -BIGNUM;
+      /* get the identity */
+      ppst->pam2[0][ix_j][ix_j] = ppst->pam2[0][ix_i][ix_i];
+      /* do not assume symmetric matrices */
+      for (i=1; i < pam_sq_n; i++) {
+	p_i = pascii[pam_sq[i]];
+	ppst->pam2[0][ix_j][p_i] = ppst->pam2[0][ix_i][p_i];
+	ppst->pam2[0][p_i][ix_j] = ppst->pam2[0][p_i][ix_i];
+      }
+    }
+    else {
+      pascii['O'] = pascii['K'];
+      pascii['o'] = pascii['k'];
+    }
+
+    /* add values for 'U' (C) value, which are not present in 1-D matrices */
+    ix_i = pascii['C'];
+    ix_j = pascii['U'];  
+      if (ix_j < ppst->nsq) {	/* is it in the NCBIstdaa alphabet */
+      ppst->pam2[0][ix_j][0] = ppst->pam2[0][0][ix_j] = -BIGNUM;
+      /* get the identity */
+      ppst->pam2[0][ix_j][ix_j] = ppst->pam2[0][ix_i][ix_i];
+      /* do not assume symmetric matrices */
+      for (i=1; i < pam_sq_n; i++) {
+	p_i = pascii[pam_sq[i]];
+	ppst->pam2[0][ix_j][p_i] = ppst->pam2[0][ix_i][p_i];
+	ppst->pam2[0][p_i][ix_j] = ppst->pam2[0][p_i][ix_i];
+      }
+    }
+    else {
+      pascii['U'] = pascii['C'];
+      pascii['u'] = pascii['c'];
+    }
+}
+
diff --git a/src/pssm_asn_subs.c b/src/pssm_asn_subs.c
new file mode 100644
index 0000000..50b7844
--- /dev/null
+++ b/src/pssm_asn_subs.c
@@ -0,0 +1,1756 @@
+/* $Id: pssm_asn_subs.c 1265 2014-06-30 16:13:49Z wrp $ */
+
+/* copyright (C) 2005, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License.
+*/
+
+/* read_asn_dest modified 26-Jul-2007 to skip over text/bytes if dest is NULL */
+
+/* this code is designed to parse the ASN.1 binary encoded scoremat
+   object produced by blastpgp -C file.ckpt_asn -u 2 */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+
+int parse_pssm_asn();
+int parse_pssm2_asn();
+
+int
+parse_pssm_asn_fa(FILE *afd, int *n_rows, int *n_cols,
+		  unsigned char **query, double ***wfreqs, double ***freqs, int ***iscores,
+		  char *matrix, int *gap_open, int *gap_extend,
+		  double *lambda);
+
+#define COMPO_NUM_TRUE_AA 20
+
+/**positions of true characters in protein alphabet*/
+/*
+static int trueCharPositions[COMPO_NUM_TRUE_AA] = {
+  1,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,22
+};
+*/
+
+#define COMPO_LARGEST_ALPHABET 28
+
+/*
+static char ncbieaatoa[COMPO_LARGEST_ALPHABET] = {"-ABCDEFGHIJKLMNOPQRSTUVWXYZ"};
+
+static int alphaConvert[COMPO_LARGEST_ALPHABET] = {
+  (-1), 0, (-1), 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15,
+  16, 19,   17, (-1), 18, (-1), (-1), (-1), (-1), (-1)
+};
+*/
+
+int pssm_aa_order[20] = { 1,  /*A*/
+			  16, /*R*/
+			  13, /*N*/
+			   4, /*D*/
+			   3, /*C*/
+			  15, /*Q*/
+			   5, /*E*/
+			   7, /*G*/
+			   8, /*H*/
+			   9, /*I*/
+			  11, /*L*/
+			  10, /*K*/
+			  12, /*M*/
+			   6, /*F*/
+			  14, /*P*/
+			  17, /*S*/
+			  18, /*T*/
+			  20, /*W*/
+			  22, /*Y*/
+			  19}; /*V*/
+
+#define ABP *asnp->abp
+#define ABPP asnp->abp
+#define ABP_INC2 asnp->abp += 2
+
+#define ASN_SEQ 48
+#define ASN_SET 48
+#define ASN_SEQOF 49
+#define ASN_SETOF 49
+
+#define ASN_PSSM_QUERY 166
+#define ASN_PSSM2_VERSION 160
+#define ASN_PSSM2_QUERY 161
+#define ASN_PSSM2_MATRIX 162
+
+#define ASN_PSSM_IS_PROT 160
+#define ASN_PSSM_NROWS 162
+#define ASN_PSSM_NCOLS 163
+
+#define ASN_PSSM_BYCOL 165
+#define ASN_PSSM_INTERMED_DATA 167
+#define ASN_PSSM_INTERMED_RES_FREQS 160
+#define ASN_PSSM_INTERMED_WRES_FREQS 161
+#define ASN_PSSM_INTERMED_FREQ_RATIOS 162
+#define ASN_PSSM_INTERMED_INFO_CONTENT 163
+#define ASN_PSSM_INTERMED_GAPL_COLWTS 164
+#define ASN_PSSM_INTERMED_SIGMA 165
+#define ASN_PSSM_INTERMED_INTVAL_SIZE 166
+#define ASN_PSSM_INTERMED_NUM_MATCH_SEQ 167
+
+#define ASN_PSSM_FREQS 162
+
+#define ASN_PSSM_FINAL_DATA 168	/* Sequence */
+#define ASN_PSSM_FINAL_DATA_SCORES 160  /* sequence of integer */
+#define ASN_PSSM_FINAL_DATA_LAMBDA 161  /* real */
+#define ASN_PSSM_FINAL_DATA_KAPPA  162  /* real */
+#define ASN_PSSM_FINAL_DATA_H      163  /* real */
+#define ASN_PSSM_FINAL_DATA_SCALEF 164  /* integer */
+#define ASN_PSSM_FINAL_DATA_ULAMBDA 165  /* real */
+#define ASN_PSSM_FINAL_DATA_UKAPPA  166  /* real */
+#define ASN_PSSM_FINAL_DATA_UH      167  /* real */
+
+#define ASN_PSSM2_IS_PROTEIN 160
+#define ASN_PSSM2_MATRIX_NAME 161
+#define ASN_PSSM2_MATRIX_COMMENT 162	/* not used */
+#define ASN_PSSM2_NCOLS 163
+#define ASN_PSSM2_NROWS 164
+#define ASN_PSSM2_SCORES 165
+#define ASN_PSSM2_KARLIN_K 166
+#define ASN_PSSM2_FREQS 167
+
+#define ASN_IS_STR 26
+#define ASN_IS_SSTR 65
+#define ASN_IS_INT  2
+#define ASN_IS_BOOL 1
+#define ASN_IS_OCTSTR 4
+#define ASN_IS_OCTSSTR 65
+#define ASN_IS_REAL 9
+#define ASN_IS_ENUM 10
+#define ASN_IS_ENUM0 1
+
+#define ASN_OBJ_INT 160
+#define ASN_OBJ_STR 161
+
+struct asn_bstruct {
+  FILE *fd;
+  unsigned char *buf;
+  unsigned char *abp;
+  unsigned char *buf_max;
+  int len;
+};
+
+#define ASN_BUF 4096
+
+void *
+new_asn_bstruct(int buf_siz) {
+
+  struct asn_bstruct *asnp;
+
+  if ((asnp=calloc(1,sizeof(struct asn_bstruct)))==NULL) {
+    fprintf(stderr, "cannot allocate asn_bstruct\n");
+    exit(1);
+  }
+
+  if ((asnp->buf = (unsigned char *)calloc(buf_siz, sizeof(char))) == NULL ) {
+    fprintf(stderr, " cannot allocate asn_buf (%d)\n",buf_siz);
+    exit(1);
+  }
+
+  return asnp;
+}
+
+void
+free_asn_bstruct(struct asn_bstruct *asnp) {
+
+  if (asnp == NULL) return;
+  if (asnp->buf != NULL) free(asnp->buf);
+  free(asnp);
+}
+
+unsigned char *
+chk_asn_buf(struct asn_bstruct *asnp, int v) {
+  int new_buf;
+
+  if (v > ASN_BUF) {
+    fprintf(stderr," attempt to read %d bytes ASN.1 data > buffer size (%d)\n",
+	    v, ASN_BUF);
+    exit(1);
+  }
+
+  if (asnp->abp + v > asnp->buf_max) {
+
+    /* move down the left over stuff */
+    asnp->len = asnp->buf_max - asnp->abp;
+
+    memmove(asnp->buf, asnp->abp, asnp->len);
+
+    asnp->abp = asnp->buf;
+    new_buf = ASN_BUF - asnp->len;
+
+    if (asnp->fd && !feof(asnp->fd) &&
+	(new_buf=fread(asnp->buf + asnp->len, sizeof(char), new_buf, asnp->fd)) != 0) {
+      asnp->len += new_buf;
+    }
+
+    asnp->buf_max = asnp->buf + asnp->len;
+
+    if (asnp->len < v) {
+      fprintf(stderr, " Unable to read %d bytes\n",v);
+      exit(1);
+    }
+  }
+  /* otherwise, v bytes are currently in the buffer */
+
+  return asnp->abp;
+}
+
+unsigned char *
+asn_error(char *func, char *token, int tval,
+	  struct asn_bstruct *asnp, int len) {
+  int i;
+
+  fprintf(stderr," %s %s [%0x]:",func, token, tval);
+  for (i=0; i<len; i++) {
+    fprintf(stderr," %0x",asnp->abp[i]);
+  }
+  fprintf(stderr,"\n");
+  return asnp->abp;
+}
+
+/*
+   read_asn_dest reads v bytes into oct_str if v <= o_len - otherwise
+   fails - the correct size buffer must be pre-allocated read_asn_dest
+   is required for ASN data entities that are longer than ASN_BUF
+   (1024)
+
+   skip over if oct_str==NULL;
+*/
+unsigned char *
+read_asn_dest(struct asn_bstruct *asnp, int v, unsigned char *oct_str, int o_len) {
+  int new_buf;
+  unsigned char *oct_ptr;
+
+
+  if (oct_str != NULL && v > o_len) {
+    fprintf(stderr, " read_asn_dest - cannot read %d bytes into %d buffer\n",
+	    v, o_len);
+    exit(1);
+  }
+
+  if (asnp->abp + v <= asnp->buf_max) {
+    if (oct_str != NULL) memmove(oct_str, asnp->abp, v);
+    return asnp->abp+v;
+  }
+  else {
+    /* move down the left over stuff */
+
+    asnp->len = asnp->buf_max - asnp->abp;
+
+    if (oct_str != NULL)  memmove(oct_str, asnp->abp, asnp->len);
+    oct_ptr = oct_str+asnp->len;
+    v -= asnp->len;
+
+    asnp->abp = asnp->buf;
+    new_buf = ASN_BUF;
+
+    while ((new_buf=fread(asnp->buf, sizeof(char), new_buf, asnp->fd)) != 0) {
+      asnp->len = new_buf;
+      asnp->buf_max = asnp->buf + asnp->len;
+      if (v <= new_buf) {	/* we have it all this time */
+	if (oct_str != NULL)  memmove(oct_ptr, asnp->buf, v);
+	asnp->len -= v;
+	asnp->abp = asnp->buf + v;
+	break;
+      }
+      else {	/* we need to read some more */
+	if (oct_str != NULL)  memmove(oct_ptr, asnp->buf, new_buf);
+	v -= new_buf;
+	new_buf = ASN_BUF;
+      }
+    }
+  }
+  return asnp->buf + v;
+}
+
+unsigned char *
+get_astr_bool(struct asn_bstruct *asnp, int *val) {
+
+  int v_len, v;
+
+  asnp->abp = chk_asn_buf(asnp,16);
+
+  v = 0;
+  if (*asnp->abp++ != 1) { /* check for int */
+    fprintf(stderr," bool missing\n");
+  }
+  else {
+    v_len = *asnp->abp++;
+    if (v_len != 1) {
+      fprintf(stderr, "boolean length != 1 : %d\n", v_len);
+      v = *asnp->abp++;
+    }
+    else { v = *asnp->abp++;}
+  }
+  *val = v;
+  return asnp->abp;
+}
+
+unsigned char *
+get_astr_int(struct asn_bstruct *asnp, long *val) {
+
+  int i_len, v_len, v;
+
+  v = 0;
+
+  asnp->abp = chk_asn_buf(asnp,32);
+
+  if (*asnp->abp++ != ASN_IS_INT) { /* check for int */
+    return asn_error("get_astr_int", "ASN_IS_INT", ASN_IS_INT, asnp, 4);
+  }
+  else {
+    i_len = v_len = *asnp->abp++;
+    while (i_len-- > 0) {
+      v *= 256;
+      v += *asnp->abp++;
+    }
+  }
+
+  if (v_len == 1 && v > 127) { v = v - 256; }
+  else if (v_len == 2 && v > 32767) {v =  v - 65536;}
+
+  *val = v;
+  return asnp->abp;
+}
+
+unsigned char *
+get_astr_real(struct asn_bstruct *asnp,
+	    double *val) {
+
+  int v_len, v;
+  asnp->abp = chk_asn_buf(asnp,32);
+
+  if (ABP != ASN_IS_REAL) {
+    fprintf(stderr," real missing\n");
+    return asnp->abp;
+  }
+  else {
+    v_len = asnp->abp[1];
+    ABP_INC2;
+  }
+
+  *val = 0.0;
+  if (ABP != '\0') {
+    fprintf(stderr," float missing\n");
+    return asnp->abp;
+  }
+  else {
+    sscanf((char *)asnp->abp+1,"%lg",val);
+    asnp->abp += v_len;
+  }
+  return asnp->abp;
+}
+
+unsigned char *
+get_astr_enum(struct asn_bstruct *asnp, int *val) {
+
+  int v_len, v;
+
+  asnp->abp = chk_asn_buf(asnp,16);
+
+  v = 0;
+  if (*asnp->abp++ != ASN_IS_ENUM) { /* check for int */
+    fprintf(stderr," enum missing\n");
+  }
+  else {
+    v_len = *asnp->abp++;
+    while (v_len-- > 0) { v *= 256;  v += *asnp->abp++; }
+  }
+  *val = v;
+
+  return asnp->abp;
+}
+
+unsigned char *
+get_astr_packedreal(struct asn_bstruct *asnp, long *l_val_p, double *d_val_p) {
+
+  int v_len;
+  char tmp_str[64];
+
+  asnp->abp = chk_asn_buf(asnp,32);
+
+  if (*asnp->abp++ != ASN_IS_REAL) { /* check for packed float */
+    fprintf(stderr,"*** error [%s:%d] - float missing\n",__FILE__,__LINE__);
+    *d_val_p = 0;
+    return asnp->abp;
+  }
+  else {
+    v_len = *asnp->abp++;
+
+    if (v_len > 63) {
+      fprintf(stderr,"*** error [%s:%d] - real string too long: %d\n",__FILE__,__LINE__,v_len);
+    }
+
+    asnp->abp = chk_asn_buf(asnp,v_len+16);
+
+    if (v_len == 2  && *asnp->abp == '\0' && *(asnp->abp+1)=='0') {
+      ABP_INC2;
+      *d_val_p = 0.0;
+    }
+    else {	/* copy and scan it */
+      if (*asnp->abp != '\0') {
+	fprintf(stderr, "*** error [%s:%d] -  packedreal - expected 0, got %d\n", __FILE__,__LINE__,*asnp->abp);
+	*d_val_p = -1.0;
+	return asnp->abp;
+      }
+      asnp->abp++;
+      strncpy(tmp_str, (char *)asnp->abp, v_len);
+      tmp_str[v_len-1] = '\0';
+      tmp_str[63] = '\0';
+      sscanf(tmp_str,"%lg", d_val_p);
+      asnp->abp += v_len-1;
+    }
+  }
+  return asnp->abp;
+}
+
+unsigned char *
+get_astr_packedint(struct asn_bstruct *asnp, long *l_val_p, double *d_val_p) {
+
+  asnp->abp = chk_asn_buf(asnp,32);
+  ABPP = get_astr_int(asnp, l_val_p);
+  return asnp->abp;
+}
+
+unsigned char *
+get_astr_str(struct asn_bstruct *asnp, char *text, int t_len) {
+
+  int v_len, tv_len;
+
+  asnp->abp = chk_asn_buf(asnp,32);
+
+  if (text != NULL) text[0] = '\0';
+
+  if (ABP != ASN_IS_STR  && ABP != ASN_IS_SSTR) { /* check for str */
+    return asn_error("get_astr_str", "ASN_IS_STR", ASN_IS_STR, asnp, 4);
+  }
+  asnp->abp++;
+
+  v_len = *asnp->abp++;
+  if (v_len > 128) { /* need to read the length from the next bytes */
+    tv_len = v_len &0x7f;
+
+    asnp->abp = chk_asn_buf(asnp,tv_len+32);
+
+    for (v_len =0; tv_len; tv_len--) { v_len = (v_len << 8) + *asnp->abp++; }
+  }
+
+  /* read v_len bytes */
+
+  if (v_len < t_len) { /* the string fits in the buffer */
+    asnp->abp = read_asn_dest(asnp,v_len, (unsigned char *)text, t_len);
+  }
+  else {	/* it does not fit, fill the buffer and skip */
+    if (t_len > 0)
+      asnp->abp = read_asn_dest(asnp,t_len, (unsigned char *)text, t_len);
+    asnp->abp = read_asn_dest(asnp,v_len - t_len, NULL, 0);
+  }
+  if (text != NULL && t_len > 0) {text[min(v_len,t_len)]='\0';}
+  return asnp->abp;
+}
+
+unsigned char *
+get_astr_octstr(struct asn_bstruct *asnp,
+	       unsigned char *oct_str,
+	       int o_len) {
+
+  int q_len, v_len;
+
+  asnp->abp = chk_asn_buf(asnp,32);
+
+  if (ABP == ASN_IS_OCTSTR || ABP == ASN_IS_OCTSSTR) {
+    ABPP++;
+    /* get length  of length */
+    if (ABP > 128) {
+      v_len = *asnp->abp++ & 0x7f;
+
+      asnp->abp = chk_asn_buf(asnp,v_len+32);
+
+      q_len = 0;
+      while (v_len-- > 0) {
+	q_len *= 256;
+	q_len += *asnp->abp++;
+      }
+    }
+    else {
+      q_len = *asnp->abp++ & 0x7f;
+    }
+
+    if (q_len < o_len) { /* the string fits in the buffer */
+      asnp->abp = read_asn_dest(asnp,q_len, oct_str, o_len);
+    }
+    else {	/* it does not fit, fill the buffer and skip */
+      asnp->abp = read_asn_dest(asnp,o_len, oct_str, o_len);
+      asnp->abp = read_asn_dest(asnp,q_len - o_len, NULL, 0);
+    }
+    if (oct_str != NULL && o_len > 0) oct_str[min(q_len,o_len)]='\0';
+
+    /*    asnp->abp += 2; */	/* skip characters and NULL's */
+  }
+  return asnp->abp;
+}
+
+/* something to try to skip over stuff we don't want */
+unsigned char *
+get_astr_junk(struct asn_bstruct *asnp) {
+
+  int seq_cnt = 0;
+  long tmp;
+  char string[256];
+
+  while (ABP) {
+    if ( ABP  == ASN_SEQ) { ABP_INC2; seq_cnt++;}
+    else if ( ABP == ASN_IS_BOOL ) {
+      ABP_INC2;
+      ABPP = get_astr_int(asnp, &tmp) + 2;
+    }
+    else if ( ABP == ASN_IS_INT ) {
+      ABP_INC2;
+      ABPP = get_astr_int(asnp, &tmp) + 2;
+    }
+    else if ( ABP == ASN_IS_STR ) {
+      ABP_INC2;
+      ABPP = get_astr_str(asnp, string, sizeof(string)-1) + 2;
+    }
+  }
+
+  while (seq_cnt-- > 0) ABP_INC2;
+  return asnp->abp;
+}
+
+#define ASN_SEQINST_NCBIEAA 167
+#define ASN_SEQINST_NCBISTDAA 169
+#define ASN_SEQINST_IUPACAA 161
+
+unsigned char *
+get_astr_iseqd(struct asn_bstruct *asnp,
+	       unsigned char *query,
+	       int nq) {
+
+  asnp->abp = chk_asn_buf(asnp,32);
+
+  /* check for the sequence type - NCBIstdaa or NCBIstdeaa */
+
+  if (ABP == ASN_SEQINST_NCBIEAA) {
+    ABP_INC2;
+    return get_astr_str(asnp, (char *)query, nq) + 2;
+  }
+  else if (ABP == ASN_SEQINST_NCBISTDAA) {
+    ABP_INC2;
+    return get_astr_octstr(asnp, query, nq) + 2;
+  }
+  else if (ABP == ASN_SEQINST_IUPACAA) {
+    ABP_INC2;
+    return get_astr_str(asnp, (char *)query, nq) + 2;
+  }
+  else {
+    return asn_error("get_astr_iseqd","",-1,asnp,4);
+  }
+}
+
+unsigned char *
+get_astr_objid(struct asn_bstruct *asnp, int *type, int *val, char *text, int t_len) {
+
+  long local_ival;
+
+  asnp->abp = chk_asn_buf(asnp,32);
+
+  if (text != NULL) text[0] = '\0';
+  if (val != NULL) *val = 0;
+  *type = 0;
+
+  /* object could be text, or could be int */
+
+  if (ABP == ASN_OBJ_INT) {
+    ABP_INC2;
+    asnp->abp = get_astr_int(asnp, &local_ival)+2;
+    if (val != NULL) *val = local_ival;
+    *type = 1;
+  }
+  else if (ABP == ASN_OBJ_STR) {
+    ABP_INC2;
+    asnp->abp = get_astr_str(asnp, text, t_len)+2;
+    *type = 2;
+  }
+  else {
+    return asn_error("get_astr_objid","ASN_OBJ_STR",ASN_OBJ_STR,asnp,4);
+  }
+  return asnp->abp;
+}
+
+#define ASN_BIOSEQ_SEQ 160
+#define ASN_BIOSEQ_ID_VAL 160
+#define ASN_BIOSEQ_ID_OBJ 160
+#define ASN_BIOSEQ_ID_LOCAL 161
+#define ASN_BIOSEQ_ID_GIBBSQ 162
+#define ASN_BIOSEQ_ID_GIBBMT 163
+#define ASN_BIOSEQ_ID_GB 164
+#define ASN_BIOSEQ_ID_EMBL 165
+#define ASN_BIOSEQ_ID_PIR 166
+#define ASN_BIOSEQ_ID_SP 167
+#define ASN_BIOSEQ_ID_PATENT 168
+#define ASN_BIOSEQ_ID_OTHER 169
+#define ASN_BIOSEQ_ID_GEN 170
+#define ASN_BIOSEQ_ID_GI 171
+#define ASN_BIOSEQ_ID_DDBJ 172
+#define ASN_BIOSEQ_ID_PDB 173
+#define ASN_BIOSEQ_ID_TPG 174
+#define ASN_BIOSEQ_ID_TPE 175
+#define ASN_BIOSEQ_ID_TPD 176
+
+#define ASN_BIOSEQ_TEXTID_NAME 160
+#define ASN_BIOSEQ_TEXTID_ACC 161
+#define ASN_BIOSEQ_TEXTID_REL 162
+#define ASN_BIOSEQ_TEXTID_VER 163
+
+#define ASN_BIOSEQ_ID  160
+#define ASN_BIOSEQ_DESCR 161
+#define ASN_BIOSEQ_INST  162
+#define ASN_BIOSEQ_ANNOT 163
+
+#define ASN_BIOSEQ_D_NAME  163
+#define ASN_BIOSEQ_D_TITLE  164
+#define ASN_BIOSEQ_D_PIR  169
+#define ASN_BIOSEQ_D_GB   170
+#define ASN_BIOSEQ_D_USER 173
+#define ASN_BIOSEQ_D_SP 174
+
+#define ASN_BIOSEQ_INST_REPR  160
+#define ASN_BIOSEQ_INST_MOL  161
+#define ASN_BIOSEQ_INST_LEN  162
+#define ASN_BIOSEQ_INST_SEQD  166
+#define ASN_BIOSEQ_INST_HIST  168
+
+#define ASN_USERFLD_D_STR 160
+#define ASN_USERFLD_D_INT 161
+#define ASN_USERFLD_D_REAL 162
+#define ASN_USERFLD_D_BOOL 163
+#define ASN_USERFLD_D_OS 164
+#define ASN_USERFLD_D_USER 165
+#define ASN_USERFLD_D_STRS 166
+#define ASN_USERFLD_D_INTS 167
+#define ASN_USERFLD_D_REALS 168
+#define ASN_USERFLD_D_OSS 169
+#define ASN_USERFLD_D_FIELDS 170
+#define ASN_USERFLD_D_OBJS 171
+
+unsigned char *
+get_astr_userfld_data(struct asn_bstruct *asnp) {
+  double real;
+  long ival;
+  int bool;
+
+  ABPP = chk_asn_buf(asnp, 32);
+
+  switch (ABP) {
+  case ASN_USERFLD_D_STR :
+    ABP_INC2;
+    ABPP = get_astr_str(asnp, NULL, 0) + 2;
+    break;
+  case ASN_USERFLD_D_INT :
+    ABP_INC2;
+    ABPP = get_astr_int(asnp, &ival) + 2;
+    break;
+  case ASN_USERFLD_D_REAL :
+    ABP_INC2;
+    ABPP = get_astr_real(asnp, &real) + 2;
+    break;
+  case ASN_USERFLD_D_BOOL :
+    ABP_INC2;
+    ABPP = get_astr_bool(asnp, &bool) + 2;
+    break;
+  case ASN_USERFLD_D_OS :
+    ABP_INC2;
+    ABPP = get_astr_octstr(asnp, NULL, 0)+2;
+    break;
+  case ASN_USERFLD_D_OSS :
+    asnp->abp += 4;
+    ABPP = get_astr_octstr(asnp, NULL, 0)+4;
+    break;
+  default:
+    return asn_error("get_astr_userfld_data","",0,asnp,4);
+  }
+  return asnp->abp;
+}
+
+#define ASN_USERFLD_LABEL 160
+#define ASN_USERFLD_NUM 161
+#define ASN_USERFLD_DATA 162
+
+unsigned char *
+get_astr_userfld(struct asn_bstruct *asnp) {
+
+  char *func = "get_astr_userfld";
+  long num;
+  int type, in_seq=0;
+
+  asnp->abp = chk_asn_buf(asnp, 32);
+
+  if (ABP == ASN_SEQ) { in_seq = 1; ABP_INC2;}
+
+  if (ABP != ASN_USERFLD_LABEL) {
+    return asn_error(func, "ASN_USERFLD_LABEL", ASN_USERFLD_LABEL, asnp, 4);
+  }
+  else {
+    ABP_INC2;
+    asnp->abp = get_astr_objid(asnp, &type, NULL, NULL, 0)+2;
+  }
+
+  if (ABP == ASN_USERFLD_NUM) {
+    asnp->abp +=2;
+    asnp->abp = get_astr_int(asnp, &num)+2;
+  }
+
+  if (ABP != ASN_USERFLD_DATA) {
+    return asn_error(func, "ASN_USERFLD_DATA", ASN_USERFLD_DATA, asnp, 4);
+  }
+  else {
+    ABP_INC2;
+    asnp->abp = get_astr_userfld_data(asnp)+2;
+  }
+
+  asnp->abp = chk_asn_buf(asnp,8);
+  if (in_seq) ABP_INC2;
+  return asnp->abp;
+}
+
+#define ASN_USER_CLASS 160
+#define ASN_USER_TYPE 161
+#define ASN_USER_DATA 162
+
+unsigned char *
+get_astr_user(struct asn_bstruct *asnp) {
+  int type;
+
+  char *func = "get_astr_user";
+  asnp->abp = chk_asn_buf(asnp,32);
+
+  ABP_INC2;	/* skip SEQ */
+  if (ABP == ASN_USER_CLASS) {
+    ABP_INC2;
+    asnp->abp = get_astr_str(asnp, NULL, 0) + 2;
+  }
+  if (ABP != ASN_USER_TYPE) {
+    return asn_error(func, "ASN_USER_TYPE", ASN_USER_TYPE, asnp, 4);
+  }
+  else {
+    ABP_INC2;
+    asnp->abp = get_astr_objid(asnp, &type, NULL, NULL, 0) + 2;
+  }
+
+  if (ABP != ASN_USER_DATA) {
+    return asn_error(func,"ASN_USER_DATA", ASN_USER_DATA, asnp, 4);
+  }
+  else {
+    asnp->abp += 4;	/* skip over, data, SEQ */
+    asnp->abp = chk_asn_buf(asnp,32);
+    asnp->abp = get_astr_userfld(asnp);
+    asnp->abp += 4;
+  }
+  return asnp->abp;
+}
+
+unsigned char *
+get_astr_seqdescr(struct asn_bstruct *asnp,
+		 char *descr) {
+
+  int end_seq=0;
+
+  /* get seqof '1' */
+  /* get 164/128 -  title */
+  /* get string */
+  /* pop nulls */
+
+  asnp->abp = chk_asn_buf(asnp,16);
+
+  if (ABP == ASN_SEQOF) {
+    end_seq++;
+    ABP_INC2;
+  }
+  else {
+    fprintf(stderr, "*** error [%s:%d] - missing ASN_SEQOF '1': %0x %0x\n",__FILE__, __LINE__,ABP, asnp->abp[1]);
+  }
+
+  while (ABP != '\0') {
+
+    if (ABP == ASN_BIOSEQ_D_TITLE) {
+      ABP_INC2;	/* skip token */
+      asnp->abp = get_astr_str(asnp, descr, MAX_STR) + 2;
+    }
+    else if (ABP == ASN_BIOSEQ_D_USER) {
+      ABP_INC2;
+      asnp->abp = get_astr_user(asnp);
+    }
+    else {
+      fprintf(stderr, "*** error [%s:%d] - Un-parsed Seq-descr: %x %x\n",__FILE__,__LINE__,asnp->abp[0],asnp->abp[1]);
+      return asnp->abp;
+    }
+  }
+
+  asnp->abp = chk_asn_buf(asnp,8);
+
+  if (end_seq) ABP_INC2;
+
+  return asnp->abp;
+}
+
+unsigned char *
+get_astr_seqinst(struct asn_bstruct *asnp,
+		unsigned char **query,
+		int *nq) {
+
+  int end_seq=0, tmp;
+  long l_val;
+
+  /* get sequence '0' */
+  /* get 160/128/10/len/val -  repr enum raw val */
+  /* get 161/128/10/len/val -  mol enum aa val */
+  /* get 162/128/02/len/val -  length int val */
+  /* get 166/128 - topology (empty) */
+  /* get 167/128 - seq-data */
+  /* get 65/len+128/len/octet_string */
+  /* pop nulls */
+
+  asnp->abp = chk_asn_buf(asnp,32);
+
+  if (ABP == ASN_SEQ) {
+    end_seq++;
+    ABP_INC2;
+  }
+  else {
+    fprintf(stderr, "*** error [%s:%d] - missing ASN_SEQ '0': %0x %0x\n",__FILE__, __LINE__, ABP, asnp->abp[1]);
+  }
+
+  if (ABP == ASN_BIOSEQ_INST_REPR && *(asnp->abp+1) == 128) {
+    ABP_INC2;
+    asnp->abp = get_astr_enum(asnp, &tmp)+2;
+  }
+  else {
+    fprintf(stderr, "*** error [%s:%d] - missing ASN_BIOSEQ_INST_REPR 160: %0x %0x\n",__FILE__,__LINE__,ABP, asnp->abp[1]);
+  }
+
+  if (ABP == ASN_BIOSEQ_INST_MOL && *(asnp->abp+1) == 128) {
+    ABP_INC2;
+    asnp->abp = get_astr_enum(asnp, &tmp)+2;
+  }
+  else {
+    fprintf(stderr, "*** error [%s:%d] - missing ASN_BIOSEQ_INST_MOL 161: %0x %0x\n",__FILE__,__LINE__,ABP, asnp->abp[1]);
+  }
+
+  if (ABP == ASN_BIOSEQ_INST_LEN) {
+    ABP_INC2;
+    asnp->abp = get_astr_int(asnp, &l_val)+2;
+    *nq = l_val;
+  }
+  else {
+    fprintf(stderr, "*** error [%s:%d] - missing ASN_BIOSEQ_INST_LEN 161: %0x %0x\n",__FILE__, __LINE__, ABP, asnp->abp[1]);
+    return asnp->abp;
+  }
+
+  if ((*query = (unsigned char *)calloc(*nq + 1, sizeof(char)))==NULL) {
+    fprintf(stderr, " cannot allocate %d char query\n", *nq+1);
+  }
+
+  if (ABP == ASN_BIOSEQ_INST_SEQD) {
+    ABP_INC2;
+    asnp->abp = get_astr_iseqd(asnp, *query, *nq+1 ) + 2;
+  }
+  else {
+    fprintf(stderr, "*** error [%s:%d] - missing ASN_BIOSEQ_INST_SEQD 166: %0x %0x\n",__FILE__, __LINE__, ABP, asnp->abp[1]);
+    free(*query);
+    *query = NULL;
+    return asnp->abp;
+  }
+
+  if (ABP == ASN_BIOSEQ_INST_HIST ) {
+    fprintf(stderr, "*** error [%s:%d] - Cannot parse bioseq inst history\n",__FILE__,__LINE__);
+    exit(1);
+  }
+
+  if (end_seq) ABP_INC2;
+
+  return asnp->abp;
+}
+
+
+unsigned char *
+get_astr_textid( struct asn_bstruct *asnp,
+		char *name,
+		char *acc) {
+  int end_seq = 0;
+  long ver;
+  char this_func[]="get_astr_textid";
+
+  chk_asn_buf(asnp,32);
+
+  if (ABP != ASN_SEQ) {
+    fprintf(stderr, "*** error [%s:%d] - %s - Expected ASN_SEQ: %0x %0x\n",__FILE__,__LINE__,this_func,ABP, asnp->abp[1]);
+  }
+  else {ABP_INC2; end_seq++;}
+
+  name[0] = acc[0] = '\0';
+
+  if (ABP == ASN_BIOSEQ_TEXTID_NAME) {
+    ABP_INC2;
+    asnp->abp = get_astr_str(asnp, name, MAX_SSTR) + 2;
+  }
+
+  if (ABP == ASN_BIOSEQ_TEXTID_ACC) {
+    ABP_INC2;
+    asnp->abp = get_astr_str(asnp, acc, MAX_SSTR) + 2;
+  }
+
+  if (ABP == ASN_BIOSEQ_TEXTID_REL) {
+    ABP_INC2;
+    asnp->abp = get_astr_str(asnp, NULL, 0) + 2;
+  }
+
+  if (ABP == ASN_BIOSEQ_TEXTID_VER) {
+    ABP_INC2;
+    asnp->abp = get_astr_int(asnp, &ver)+2;
+  }
+
+  if (end_seq) ABP_INC2;
+  return asnp->abp;
+}
+
+unsigned char *
+get_astr_seqid (struct asn_bstruct *asnp,
+		long *gi,
+		char *name,
+		char *acc) {
+  int type;
+  int val;
+
+  *gi = 0;
+  acc[0] = '\0';
+  while (ABP != '\0') {
+
+    switch (ABP) {
+    case ASN_BIOSEQ_ID_OBJ:
+      ABP_INC2;
+      asnp->abp = get_astr_objid(asnp, &type, &val, name, MAX_SSTR) + 2;
+      break;
+    case ASN_BIOSEQ_ID_LOCAL:
+      ABP_INC2;
+      asnp->abp = get_astr_str(asnp, name, MAX_SSTR) + 2;
+      break;
+    case ASN_BIOSEQ_ID_GI:
+      ABP_INC2;
+      asnp->abp = get_astr_int(asnp, gi) + 2;
+      break;
+
+    case ASN_BIOSEQ_ID_GB:
+    case ASN_BIOSEQ_ID_EMBL:
+    case ASN_BIOSEQ_ID_PIR:
+    case ASN_BIOSEQ_ID_SP:
+    case ASN_BIOSEQ_ID_OTHER:
+      ABP_INC2;
+      asnp->abp = get_astr_textid(asnp, name, acc)  + 2;
+      break;
+    default:
+      return asn_error("get_atr_seqid", "", -1, asnp,4);
+    }
+  }
+  return asnp->abp;
+}
+
+/*
+Bioseq ::= SEQUENCE {
+    id SET OF Seq-id ,            -- equivalent identifiers
+    descr Seq-descr OPTIONAL , -- descriptors
+    inst Seq-inst,              -- the sequence data
+    annot SET OF Seq-annot OPTIONAL }
+*/
+
+/* modified 8-Nov-2009 to allow additional information after the inst */
+
+unsigned char *
+get_astr_bioseq(struct asn_bstruct *asnp,
+		long *gi,
+		char *name,
+		char *acc,
+		char *descr,
+		unsigned char **query,
+		int *nq
+		) {
+
+  int end_seq = 0;
+
+  asnp->abp = chk_asn_buf(asnp,64);
+
+  if (ABP == ASN_SEQ) {
+    end_seq++;
+    ABP_INC2;
+  }
+
+  if (ABP != ASN_BIOSEQ_ID) {
+    fprintf(stderr, "*** error [%s:%d] - Bioseq - missing ID tag: %2x %2x\n",__FILE__,__LINE__,ABP, asnp->abp[1]);
+    return asnp->abp;
+  }
+  else {
+  /* skip over bioseq-id tag */
+    ABP_INC2;
+    if (ABP == ASN_SETOF) {	/* jump over ASN_SETOF */
+      ABP_INC2;
+      asnp->abp = get_astr_seqid(asnp, gi, name, acc);
+      ABP_INC2; 		/* close ASN_SETOF */
+    }
+    else {
+      return asn_error("get_astr_bioseq","ASN_SEQOF", ASN_SEQOF, asnp, 4);
+    }
+    ABP_INC2;	/* jump over seq-id tag end */
+  }
+
+  if (ABP == ASN_BIOSEQ_DESCR) {
+    ABP_INC2;
+    asnp->abp = get_astr_seqdescr(asnp, descr);
+    ABP_INC2; 		/* skip nulls */
+  }
+  else { descr[0] = '\0';}
+
+  while (ABP == '\0') { ABP_INC2;}
+
+  if (ABP != ASN_BIOSEQ_INST) {
+    fprintf(stderr, "*** error [%s:%d] - Bioseq - missing ID tag: %2x %2x\n",__FILE__,__LINE__,ABP, asnp->abp[1]);
+    return asnp->abp;
+  }
+  else {
+    ABP_INC2;
+    asnp->abp = get_astr_seqinst(asnp, query, nq);
+    ABP_INC2; 		/* skip nulls */
+  }
+
+  if (end_seq--) {
+    ABP_INC2;
+  }
+
+  return asnp->abp;
+}
+
+/*
+  get_pssm_intermed_null() captures and throws away an array of data
+  rather than have different functions for different datatypes, get_data_func()
+  reads the data, saving it to *d_val, where it will be discarded
+*/
+unsigned char *
+get_pssm_intermed_null(struct asn_bstruct *asnp,
+		       int n_rows,
+		       int n_cols,
+		       int by_row,
+		       unsigned char *(*get_data_func)(struct asn_bstruct *, long *, double *),
+		       long *l_val_p,
+		       double *d_val_p
+		       ) {
+
+  int i_rows, i_cols;
+  int in_seq = 0;
+
+  asnp->abp = chk_asn_buf(asnp,32);
+
+  if (ABP == ASN_SEQ) {
+    ABP_INC2;
+    in_seq = 1;
+  }
+
+  if (!by_row) {
+    for (i_cols = 0; i_cols < n_cols; i_cols++) {
+      for (i_rows = 0; i_rows < n_rows; i_rows++) {
+	asnp->abp = (*get_data_func)(asnp, l_val_p, d_val_p);
+      }
+    }
+  }
+  else {
+    for (i_rows = 0; i_rows < n_rows; i_rows++) {
+      for (i_cols = 0; i_cols < n_cols; i_cols++) {
+	asnp->abp = (*get_data_func)(asnp, l_val_p, d_val_p);
+      }
+    }
+  }
+
+  asnp->abp = chk_asn_buf(asnp,32);
+  if (in_seq) {asnp->abp +=2;}	/* skip nulls */
+  ABP_INC2;
+  return asnp->abp;
+}
+
+unsigned char *
+get_pssm_freqs(struct asn_bstruct *asnp,
+	       double **freqs,
+	       int n_rows,
+	       int n_cols,
+	       int by_row) {
+
+  int i_rows, i_cols;
+  int in_seq = 0;
+  long l_val;
+  double f_val;
+
+  asnp->abp = chk_asn_buf(asnp,64);
+
+  if (ABP == ASN_SEQ) {
+    ABP_INC2;
+    in_seq = 1;
+  }
+
+  if (!by_row) {
+    for (i_cols = 0; i_cols < n_cols; i_cols++) {
+      for (i_rows = 0; i_rows < n_rows; i_rows++) {
+	asnp->abp = get_astr_packedreal(asnp, &l_val, &f_val);
+	freqs[i_cols][i_rows] = f_val;
+      }
+    }
+  }
+  else {
+    for (i_rows = 0; i_rows < n_rows; i_rows++) {
+      for (i_cols = 0; i_cols < n_cols; i_cols++) {
+	asnp->abp = get_astr_packedreal(asnp, &l_val, &f_val);
+	freqs[i_rows][i_cols] = f_val;
+      }
+    }
+  }
+
+  asnp->abp = chk_asn_buf(asnp,32);
+  if (in_seq) {asnp->abp +=2;}	/* skip nulls */
+  ABP_INC2;
+  return asnp->abp;
+}
+
+unsigned char *
+get_pssm_intermed(struct asn_bstruct *asnp,
+		  double ***wfreqs,
+		  double ***freqs,
+		  int n_rows,
+		  int n_cols,
+		  int by_row) {
+
+  long long_data;
+  double real_data;
+  int i;
+
+  asnp->abp = chk_asn_buf(asnp,32);
+
+  if (ABP == ASN_SEQ) {
+    ABP_INC2;
+    if (ABP == ASN_PSSM_INTERMED_RES_FREQS) {
+      ABP_INC2;
+      asnp->abp = get_pssm_intermed_null(asnp, n_rows, n_cols, by_row,
+					 &get_astr_packedint, &long_data, &real_data);
+    }
+
+    if (ABP == ASN_PSSM_INTERMED_WRES_FREQS) {
+      if (((*wfreqs) = (double **)calloc(n_cols, sizeof(double *)))==NULL) {
+	fprintf(stderr, "*** error [%s:%d] - cannot allocate wfreq cols - %d\n", __FILE__, __LINE__, n_cols);
+	exit(1);
+      }
+
+      if (((*wfreqs)[0] = (double *) calloc(n_cols * n_rows, sizeof(double)))==NULL) {
+	fprintf(stderr, "*** error [%s:%d] - cannot allocate freq rows * cols - %d * %d\n", __FILE__, __LINE__, n_rows, n_cols);
+	exit(1);
+      }
+
+      for (i=1; i < n_cols; i++) {
+	(*wfreqs)[i] = (*wfreqs)[i-1] + n_rows;
+      }
+
+      ABP_INC2;
+      asnp->abp = get_pssm_freqs(asnp, *wfreqs, n_rows, n_cols, by_row);
+    }
+
+    if (ABP == ASN_PSSM_INTERMED_FREQ_RATIOS) {
+      if ((*freqs = (double **) calloc(n_cols, sizeof(double *)))==NULL) {
+	fprintf(stderr, "*** error [%s:%d] - cannot allocate wfreq cols - %d\n", __FILE__, __LINE__, n_cols);
+	exit(1);
+      }
+
+      if (((*freqs)[0] = (double *) calloc(n_cols * n_rows, sizeof(double)))==NULL) {
+	fprintf(stderr, "*** error [%s:%d] - cannot allocate freq rows * cols - %d * %d\n", __FILE__, __LINE__, n_rows, n_cols);
+	exit(1);
+      }
+
+      for (i=1; i < n_cols; i++) {
+	(*freqs)[i] = (*freqs)[i-1] + n_rows;
+      }
+
+      ABP_INC2;
+      asnp->abp = get_pssm_freqs(asnp, *freqs, n_rows, n_cols, by_row);
+    }
+
+    if (ABP == ASN_PSSM_INTERMED_INFO_CONTENT) {
+      ABP_INC2;
+      asnp->abp = get_pssm_intermed_null(asnp, 1, n_cols, by_row,
+					 &get_astr_packedreal, &long_data, &real_data);
+    }
+
+    if (ABP == ASN_PSSM_INTERMED_GAPL_COLWTS) {
+      ABP_INC2;
+      asnp->abp = get_pssm_intermed_null(asnp, 1, n_cols, by_row,
+					 &get_astr_packedreal, &long_data, &real_data);
+    }
+
+    if (ABP == ASN_PSSM_INTERMED_SIGMA) {
+      ABP_INC2;
+      asnp->abp = get_pssm_intermed_null(asnp, 1, n_cols, by_row,
+					 &get_astr_packedreal, &long_data, &real_data);
+    }
+
+    if (ABP == ASN_PSSM_INTERMED_INTVAL_SIZE) {
+      ABP_INC2;
+      asnp->abp = get_pssm_intermed_null(asnp, 1, n_cols, by_row,
+					 &get_astr_packedint, &long_data, &real_data);
+    }
+
+    if (ABP == ASN_PSSM_INTERMED_NUM_MATCH_SEQ) {
+      ABP_INC2;
+      asnp->abp = get_pssm_intermed_null(asnp, 1, n_cols, by_row,
+					 &get_astr_packedint, &long_data, &real_data);
+    }
+
+    asnp->abp +=2;	/* skip nulls */
+  }
+  ABP_INC2;
+  return asnp->abp;
+}
+
+
+#define ASN_PSSM_PARAMS 161
+#define ASN_PSSM_PARAMS_PSEUDOCNT 160
+#define ASN_PSSM_PARAMS_RPSPARAMS 161
+#define ASN_PSSM_RPSPARAMS_MATRIX 160
+#define ASN_PSSM_RPSPARAMS_GAPOPEN 161
+#define ASN_PSSM_RPSPARAMS_GAPEXT 162
+
+unsigned char *
+get_pssm_rpsparams(struct asn_bstruct *asnp,
+	       char *matrix,
+	       int *gap_open_p,
+	       int *gap_ext_p) {
+
+  int end_seq=0;
+  long l_val;
+
+  asnp->abp = chk_asn_buf(asnp,32);
+
+  if (ABP == ASN_SEQ) {
+    ABP_INC2;
+    end_seq++;
+  }
+
+  asnp->abp = chk_asn_buf(asnp,32);
+  if (ABP == ASN_PSSM_RPSPARAMS_MATRIX) {
+    ABP_INC2;
+    asnp->abp = get_astr_str(asnp, matrix, MAX_SSTR) + 2;
+  }
+  else {
+    strncpy(matrix,"BLOSUM62", MAX_SSTR);
+  }
+
+  asnp->abp = chk_asn_buf(asnp,16);
+  if (ABP == ASN_PSSM_RPSPARAMS_GAPOPEN) {
+    ABP_INC2;
+    asnp->abp = get_astr_int(asnp, &l_val)+2;
+    *gap_open_p = l_val;
+  }
+  else {*gap_open_p = -11;}
+
+  asnp->abp = chk_asn_buf(asnp,16);
+  if (ABP == ASN_PSSM_RPSPARAMS_GAPEXT) {
+    ABP_INC2;
+    asnp->abp = get_astr_int(asnp, &l_val)+2;
+    *gap_ext_p = l_val;
+  }
+  else {*gap_ext_p = -1;}
+
+  if (end_seq) { chk_asn_buf(asnp,(end_seq * 2)+16); }
+  while (end_seq-- > 0) { ABP_INC2; }
+  return asnp->abp;
+}
+
+/* this routine skips over the final scores */
+unsigned char *
+get_pssm_final_scores(struct asn_bstruct *asnp, int ***iscores, int n_rows, int n_cols, int by_row) {
+
+  int i_rows, i_cols, i;
+  int in_seq = 0;
+  long l_val;
+
+  if (ABP == ASN_SEQ) { ABP_INC2; in_seq=1;}
+
+  if (((*iscores) = (int **) calloc(n_cols, sizeof(int *)))==NULL) {
+    fprintf(stderr, "*** error [%s:%d] - cannot allocate wfreq cols - %d\n", __FILE__, __LINE__, n_cols);
+    exit(1);
+  }
+
+  if (((*iscores)[0] = (int *) calloc(n_cols * n_rows, sizeof(int)))==NULL) {
+    fprintf(stderr, "*** error [%s:%d] - cannot allocate freq rows * cols - %d * %d\n", __FILE__, __LINE__, n_rows, n_cols);
+    exit(1);
+  }
+
+  for (i=1; i < n_cols; i++) {
+    (*iscores)[i] = (*iscores)[i-1] + n_rows;
+  }
+
+  if (!by_row) {
+    for (i_cols = 0; i_cols < n_cols; i_cols++) {
+      for (i_rows = 0; i_rows < n_rows; i_rows++) {
+	asnp->abp = get_astr_int(asnp, &l_val);
+	(*iscores)[i_cols][i_rows] = l_val;
+      }
+    }
+  }
+  else {
+    for (i_rows = 0; i_rows < n_rows; i_rows++) {
+      for (i_cols = 0; i_cols < n_cols; i_cols++) {
+	asnp->abp = get_astr_int(asnp, &l_val);
+	(*iscores)[i_cols][i_rows] = l_val;
+      }
+    }
+  }
+
+  asnp->abp = chk_asn_buf(asnp,16);
+  if (in_seq) {asnp->abp +=2;}	/* skip nulls */
+  ABP_INC2;
+  return asnp->abp;
+}
+
+unsigned char *
+get_pssm_params(struct asn_bstruct *asnp,
+		int *pseudo_cnts,
+		char *matrix,
+		int *gap_open_p,
+		int *gap_ext_p) {
+
+  int end_seq=0;
+  long l_val;
+
+  asnp->abp = chk_asn_buf(asnp,16);
+
+  if (ABP == ASN_SEQ) {
+    ABP_INC2;
+    end_seq++;
+  }
+
+  if (ABP == ASN_PSSM_PARAMS_PSEUDOCNT) {
+    ABP_INC2;
+    asnp->abp = get_astr_int(asnp, &l_val)+2;
+    *pseudo_cnts = l_val;
+  }
+
+  if (ABP == ASN_PSSM_PARAMS_RPSPARAMS) {
+    ABP_INC2;
+    asnp->abp = get_pssm_rpsparams(asnp, matrix, gap_open_p, gap_ext_p);
+    ABP_INC2;
+  }
+  else {
+    *gap_open_p = -11;
+    *gap_ext_p = -1;
+    strncpy(matrix,"BLOSUM62",MAX_SSTR);
+  }
+
+  while (end_seq-- > 0) { ABP_INC2; }
+  return asnp->abp;
+}
+
+unsigned char *
+get_pssm2_scores(struct asn_bstruct *asnp,
+		 int *have_scores
+		 ) {
+
+  int end_seq=0;
+
+  if (have_scores != NULL) *have_scores = 0;
+
+  if (ABP == ASN_SEQ) {
+    end_seq++;
+    ABP_INC2;
+  }
+
+  if (ABP == '\0') {	/* no scores */
+    if (end_seq) ABP_INC2;
+  }
+  else {
+    if (have_scores != NULL) *have_scores = 1;
+  }
+  return asnp->abp;
+}
+
+unsigned char *
+get_pssm2_intermed(struct asn_bstruct *asnp,
+		   double ***wfreqs,
+		   double ***freqs,
+		   int n_rows,
+		   int n_cols) {
+
+  int i;
+  double **my_freqs, **my_wfreqs;
+  int **my_iscores;
+
+  if ((my_freqs = (double **) calloc(n_cols, sizeof(double *)))==NULL) {
+    fprintf(stderr, "*** error [%s:%d] - cannot allocate freq cols - %d\n", __FILE__, __LINE__, n_cols);
+    exit(1);
+  }
+
+  if ((my_wfreqs = (double **) calloc(n_cols, sizeof(double *)))==NULL) {
+    fprintf(stderr, "*** error [%s:%d] - cannot allocate wfreq cols - %d\n", __FILE__, __LINE__, n_cols);
+    exit(1);
+  }
+
+  if ((my_freqs[0] = (double *) calloc(n_cols * n_rows, sizeof(double)))==NULL) {
+    fprintf(stderr, "*** error [%s:%d] - cannot allocate freq rows * cols - %d * %d\n", __FILE__, __LINE__, n_rows, n_cols);
+    exit(1);
+  }
+
+  if ((my_wfreqs[0] = (double *) calloc(n_cols * n_rows, sizeof(double)))==NULL) {
+    fprintf(stderr, "*** error [%s:%d] - cannot allocate freq rows * cols - %d * %d\n", __FILE__, __LINE__, n_rows, n_cols);
+    exit(1);
+  }
+
+  for (i=1; i < n_cols; i++) {
+    my_freqs[i] = my_freqs[i-1] + n_rows;
+    my_wfreqs[i] = my_wfreqs[i-1] + n_rows;
+  }
+
+  *wfreqs = my_wfreqs;
+  *freqs = my_freqs;
+
+  chk_asn_buf(asnp, 16);
+
+  return get_pssm_freqs(asnp, my_freqs, n_rows, n_cols, 0);
+}
+
+int
+parse_pssm2_asn(struct asn_bstruct *asnp,
+		long *gi,
+		char *name,
+		char *acc,
+		char *descr,
+		unsigned char **query,
+		int *nq,
+		int *n_rows,
+		int *n_cols,
+		double ***wfreqs,
+		double ***freqs,
+		int ***iscores,
+		int *pseudo_cnts,
+		char *matrix,
+		double *lambda_p) {
+
+  int is_protein;
+  int have_rows=0, have_cols=0;
+  long l_val;
+  int have_scores=0;
+
+  chk_asn_buf(asnp, 32);
+
+  /* first get the query */
+
+  if (memcmp(asnp->abp, "\241\2000\200",4) != 0) {
+    asn_error("parse_pssm2_asn","ASN_PSSM2_QUERY",ASN_PSSM2_QUERY,asnp,4);
+    return -1;
+  }
+  else {
+    asnp->abp+=4;
+    asnp->abp = get_astr_bioseq(asnp, gi, name, acc, descr, query, nq) + 4;
+  }
+
+  /* finish up the nulls */
+  /* perhaps we have parsed correctly and do not need this */
+  /*   while (ABP == '\0') { ABP_INC2;} */
+
+  if (memcmp(asnp->abp, "\242\2000\200",4) != 0) {
+    asn_error("parse_pssm2_asn","ASN_PSSM2_MATRIX",ASN_PSSM2_MATRIX,asnp,4);
+    return -1;
+  }
+  else {
+    asnp->abp+=4;
+
+    if (ABP == ASN_PSSM_IS_PROT) {
+      ABP_INC2;
+      asnp->abp = get_astr_bool(asnp, &is_protein)+2;
+    }
+
+    if (ABP == ASN_PSSM2_MATRIX_NAME) {
+      ABP_INC2;
+      asnp->abp = get_astr_str(asnp, matrix, MAX_SSTR) + 2;
+    }
+
+    if (ABP ==   ASN_PSSM2_NCOLS) {
+      ABP_INC2;
+      asnp->abp = get_astr_int(asnp, &l_val)+2;
+      *n_cols = l_val;
+      have_cols = 1;
+    }
+
+    if (ABP ==  ASN_PSSM2_NROWS) {
+      ABP_INC2;
+      asnp->abp = get_astr_int(asnp, &l_val)+2;
+      *n_rows = l_val;
+      have_rows = 1;
+    }
+
+    if (ABP == ASN_PSSM2_SCORES) {
+      /* right now, this is always empty */
+      ABP_INC2;
+      asnp->abp = get_pssm2_scores(asnp, &have_scores) + 2;
+      if (have_scores) return 0;
+    }
+
+    if (ABP == ASN_PSSM2_KARLIN_K) {
+      ABP_INC2;
+      asnp->abp = get_astr_packedreal(asnp, &l_val, lambda_p) + 2;
+    }
+
+    if (ABP == ASN_PSSM2_FREQS) {
+      asnp->abp += 4;
+      asnp->abp = get_pssm2_intermed(asnp, wfreqs, freqs, *n_rows, *n_cols) + 4;
+    }
+  }
+
+  return 1;
+}
+
+int
+parse_pssm_asn(FILE *afd,
+	       long *gi,
+	       char *name,
+	       char *acc,
+	       char *descr,
+	       unsigned char **query,
+	       int *nq,
+	       int *n_rows,
+	       int *n_cols,
+	       double ***wfreqs,
+	       double ***freqs,
+	       int  ***iscores,
+	       int *pseudo_cnts,
+	       char *matrix,
+	       int *gap_open_p,
+	       int *gap_ext_p,
+	       double *lambda_p) {
+
+  int is_protein;
+  int pssm_version;
+  long l_val;
+  int i;
+  long itmp;
+  int have_rows=0, have_cols=0, by_col=0;
+  double **my_freqs=NULL, **my_wfreqs=NULL, dtmp;
+  int **my_iscores=NULL;
+  struct asn_bstruct *asnp;
+
+  *wfreqs = NULL;
+  *freqs = NULL;
+  *iscores = NULL;
+
+  asnp = new_asn_bstruct(ASN_BUF);
+
+  asnp->fd = afd;
+  asnp->len = ASN_BUF;
+  asnp->abp = asnp->buf_max = asnp->buf + ASN_BUF;
+
+  chk_asn_buf(asnp, 32);
+
+  if (memcmp(asnp->abp, "0\200\240\200",4) != 0) {
+    fprintf(stderr, "*** error [%s:%d] - improper PSSM header\n",__FILE__,__LINE__);
+    return -1;
+  }
+  else {asnp->abp+=4;}
+
+  if (ABP == ASN_IS_INT) {
+    asnp->abp = get_astr_int(asnp, &l_val)+2;
+    pssm_version = l_val;
+    if (pssm_version != 2) {
+      fprintf(stderr, "*** error [%s:%d] - PSSM2 version mismatch: %d\n",__FILE__,__LINE__,pssm_version);
+      return -1;
+    }
+    *gap_open_p = *gap_ext_p = 0;
+    return parse_pssm2_asn(asnp, gi, name, acc, descr,
+			   query, nq,
+			   n_rows, n_cols,
+			   wfreqs, freqs, iscores,
+			   pseudo_cnts, matrix,
+			   lambda_p);
+  }
+
+  if (ABP == ASN_SEQ) { asnp->abp += 2;  }
+
+  if (ABP == ASN_PSSM_IS_PROT ) {
+    ABP_INC2;
+    asnp->abp = get_astr_bool(asnp, &is_protein)+2;
+  }
+
+  if (ABP == ASN_PSSM_NROWS ) {
+    ABP_INC2;
+    asnp->abp = get_astr_int(asnp, &l_val)+2;
+    *n_rows = l_val;
+
+    if (*n_rows > 0) { have_rows = 1; }
+    else {
+      fprintf(stderr, "*** error [%s:%d] - bad n_row count\n",__FILE__,__LINE__);
+      exit(1);
+    }
+  }
+
+  if (ABP == ASN_PSSM_NCOLS ) {
+    ABP_INC2;
+    asnp->abp = get_astr_int(asnp, &l_val)+2;
+    *n_cols = l_val;
+    if (*n_cols > 0) {
+      have_cols = 1;
+    }
+    else {
+      fprintf(stderr, "*** error [%s:%d] - bad n_row count\n",__FILE__,__LINE__);
+      exit(1);
+    }
+  }
+
+  if (ABP == ASN_PSSM_BYCOL ) {
+    ABP_INC2;
+    asnp->abp = get_astr_bool(asnp, &by_col)+2;
+  }
+
+  /* we have read everything up to the query
+
+     n_cols gives us the query length, which we can allocate;
+  */
+
+  if (ABP == ASN_PSSM_QUERY ) {
+    asnp->abp+=4;	/* skip token and CHOICE */
+    asnp->abp = get_astr_bioseq(asnp, gi, name, acc, descr, query, nq) + 4;
+    *nq = *n_cols;
+  }
+
+  /* finish up the nulls */
+
+
+  while (ABP == '\0') { asnp->abp += 2;}
+
+  if (ABP == ASN_PSSM_INTERMED_DATA) {
+
+    if (!have_rows || !have_cols) {
+      fprintf(stderr, "*** error [%s:%d] - cannot allocate freq - missing rows/cols - %d/%d\n",
+	      __FILE__,__LINE__, have_rows, have_cols);
+      return -1;
+    }
+
+    ABP_INC2;
+    asnp->abp = get_pssm_intermed(asnp, &my_wfreqs, &my_freqs, *n_rows, *n_cols, by_col);
+    *wfreqs = my_wfreqs;
+    *freqs = my_freqs;
+  }
+
+  if (ABP == ASN_PSSM_FINAL_DATA) {
+    ABP_INC2;
+    if (ABP == ASN_SEQ) { asnp->abp += 2;  }
+    if (ABP == ASN_PSSM_FINAL_DATA_SCORES) {
+      ABP_INC2;
+
+      asnp->abp = get_pssm_final_scores(asnp, iscores, *n_rows, *n_cols, by_col) + 2;
+
+      ABP_INC2;
+    }
+    if (ABP == ASN_PSSM_FINAL_DATA_LAMBDA) {
+      ABP_INC2;
+      ABPP = get_astr_real(asnp, &dtmp) + 2;
+    }
+    if (ABP == ASN_PSSM_FINAL_DATA_KAPPA) {
+      ABP_INC2;
+      ABPP = get_astr_real(asnp, &dtmp) + 2;
+    }
+    if (ABP == ASN_PSSM_FINAL_DATA_H) {
+      ABP_INC2;
+      ABPP = get_astr_real(asnp, &dtmp) + 2;
+    }
+    if (ABP == ASN_PSSM_FINAL_DATA_SCALEF) {
+      ABP_INC2;
+      ABPP = get_astr_int(asnp, &itmp) + 2;
+    }
+    if (ABP == ASN_PSSM_FINAL_DATA_ULAMBDA) {
+      ABP_INC2;
+      ABPP = get_astr_real(asnp, &dtmp) + 2;
+    }
+    if (ABP == ASN_PSSM_FINAL_DATA_UKAPPA) {
+      ABP_INC2;
+      ABPP = get_astr_real(asnp, &dtmp) + 2;
+    }
+    if (ABP == ASN_PSSM_FINAL_DATA_UH) {
+      ABP_INC2;
+      ABPP = get_astr_real(asnp, &dtmp) + 2;
+    }
+    asnp->abp += 8;
+  }
+
+  if (ABP == ASN_PSSM_PARAMS ) {
+      ABP_INC2;
+      asnp->abp = get_pssm_params(asnp, pseudo_cnts, matrix, gap_open_p, gap_ext_p) + 2;
+  }
+  else {
+    *gap_open_p = -11;
+    *gap_ext_p = -1;
+    strncpy(matrix,"BLOSUM62",MAX_SSTR);
+    if (ABP == 0) {ABP_INC2;}
+  }
+
+  free_asn_bstruct(asnp);
+
+  return 1;
+}
+
+int
+parse_pssm_asn_fa( FILE *fd,
+		   int *n_rows_p, int *n_cols_p,
+		   unsigned char **query,
+		   double ***wfreq2d,
+		   double ***freq2d,
+		   int ***iscores2d,
+		   char *matrix,
+		   int *gap_open_p,
+		   int *gap_extend_p,
+		   double *lambda_p
+		   ) {
+
+  int qi, rj;
+  long gi;
+  double tmp_freqs[COMPO_LARGEST_ALPHABET];
+  char name[MAX_SSTR], acc[MAX_SSTR], descr[MAX_STR];
+  int nq;
+  int pseudo_cnts;
+  int ret_val;
+
+  /* parse the file */
+
+  ret_val = parse_pssm_asn(fd, &gi, name, acc, descr, query, &nq,
+			   n_rows_p, n_cols_p, wfreq2d, freq2d, iscores2d,
+			   &pseudo_cnts, matrix, gap_open_p, gap_extend_p,
+			   lambda_p);
+
+  if (ret_val <=0) return ret_val;
+
+  for (qi = 0; qi < *n_cols_p; qi++) {
+    for (rj = 0; rj < *n_rows_p; rj++) { tmp_freqs[rj] = (*freq2d)[qi][rj];}
+
+    for (rj = 0; rj < COMPO_NUM_TRUE_AA; rj++) {
+      (*freq2d)[qi][rj] = tmp_freqs[pssm_aa_order[rj]];
+    }
+  }
+
+  return 1;
+}
diff --git a/src/pthr_subs.h b/src/pthr_subs.h
new file mode 100644
index 0000000..60469c0
--- /dev/null
+++ b/src/pthr_subs.h
@@ -0,0 +1,49 @@
+/* $Id: pthr_subs.h 625 2011-03-23 17:21:38Z wrp $ */
+
+#include <pthread.h>
+
+/* error macro for thread calls */
+
+#define check(status,string) \
+   if (status != 0) {fprintf(stderr,string); \
+     fprintf(stderr,"%s\n",strerror(status)); } /* error macro */
+
+/*
+#define check(status,string) \
+     if (status == -1) perror(string)  */ /* error macro for thread calls */
+
+
+#ifndef XTERNAL
+pthread_t *fa_threads=NULL;
+
+/* reader stuff */
+
+pthread_mutex_t reader_mutex;      /* empty buffer pointer structure lock */
+pthread_cond_t reader_cond_var;    /* condition variable for reader */
+
+pthread_mutex_t worker_mutex;      /* full buffer pointer structure lock */
+pthread_cond_t worker_cond_var;    /* condition variable for workers */
+
+/* condition variable stuff */
+
+pthread_mutex_t start_mutex;       /* start-up synchronisation lock */
+pthread_cond_t start_cond_var;     /* start-up synchronisation condition variable */
+
+#else
+extern pthread_t *fa_threads;
+
+/* mutex stuff */
+
+extern pthread_mutex_t reader_mutex;
+extern pthread_mutex_t worker_mutex;
+
+/* condition variable stuff */
+
+extern pthread_cond_t reader_cond_var;
+extern pthread_cond_t worker_cond_var;
+
+extern pthread_mutex_t start_mutex;
+extern pthread_cond_t start_cond_var;
+extern int start_thread;
+
+#endif
diff --git a/src/pthr_subs2.c b/src/pthr_subs2.c
new file mode 100644
index 0000000..e64d81c
--- /dev/null
+++ b/src/pthr_subs2.c
@@ -0,0 +1,377 @@
+/* $Id: pthr_subs2.c 625 2011-03-23 17:21:38Z wrp $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and
+   The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* modified to do more initialization of work_info here, rather than in main() */
+
+/* this file isolates the pthreads calls from the main program */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#ifdef UNIX
+#include <unistd.h>
+#endif
+#include <signal.h>
+
+#include "defs.h"
+#include "structs.h"		/* mngmsg, libstruct */
+#include "param.h"		/* pstruct, thr_str, buf_head, rstruct */
+#include "thr_buf_structs.h"
+
+#include <pthread.h>
+#define XTERNAL
+#include "thr_bufs2.h"
+#undef XTERNAL
+#include "pthr_subs.h"
+
+extern void work_thread (struct thr_str *);
+
+/* start the threads working */
+
+void init_thr(int nthreads, struct thr_str *work_info,
+	      const struct mngmsg *m_msp, struct pstruct *ppst,
+	      unsigned char *aa0, struct mng_thr *m_bufi_p)
+{
+  int status, i;
+  pthread_attr_t thread_attr;
+
+  if (fa_threads == NULL && (fa_threads=(pthread_t *)calloc(nthreads, sizeof(pthread_t)))==NULL) {
+      fprintf(stderr, "Cannot allocate %d pthread_t\n",nthreads);
+      exit(1);
+  }
+
+  /* set up work_info[] structure, set parameters */
+
+  for (i=0; i<nthreads; i++) {
+    work_info[i].m_msp = m_msp;
+    work_info[i].n0 = m_msp->n0;
+    work_info[i].nm0 = m_msp->nm0;
+    work_info[i].qframe = m_msp->qframe;
+    work_info[i].qshuffle = m_msp->qshuffle;
+    work_info[i].ppst = ppst;
+    work_info[i].aa0 = aa0;
+    work_info[i].max_work_buf=m_bufi_p->max_work_buf;
+    work_info[i].worker=i;
+    work_info[i].max_tot=m_msp->max_tot;
+  }
+
+  /* mutex and condition variable initialisation */
+
+  status = pthread_mutex_init(&reader_mutex, NULL);
+  check(status,"Reader_mutex init bad status\n");
+   
+  status = pthread_mutex_init(&worker_mutex, NULL);
+  check(status,"Worker_mutex init bad status\n");
+
+  status = pthread_cond_init(&reader_cond_var, NULL);
+  check(status,"Reader_cond_var init bad status\n");
+
+  status = pthread_cond_init(&worker_cond_var, NULL);
+  check(status,"Worker_cond_var init bad status\n");
+
+  status = pthread_mutex_init(&start_mutex, NULL);
+  check(status,"Start_mutex init bad status\n");
+
+  status = pthread_cond_init(&start_cond_var, NULL);
+  check(status,"Start_cond_var init bad status\n");
+
+  /* change stacksize on threads */    /***************************/
+
+  status = pthread_attr_init( &thread_attr );
+  check(status,"attribute create bad status\n");
+
+#ifdef IRIX
+  if (pthread_attr_setscope( &thread_attr, 2) != NULL) 
+    status = pthread_attr_setscope( &thread_attr,PTHREAD_SCOPE_PROCESS);
+  check(status,"set scope on IRIX bad status\n");
+#endif
+
+#ifdef FASTA_setscope
+  status = pthread_attr_setscope( &thread_attr, PTHREAD_SCOPE_SYSTEM);
+  check(status,"set scope bad status\n");
+#endif
+
+  /* start the worker threads */
+
+  for (i=0; i < nthreads; i++) {
+    /**********************/
+    status=pthread_create(&fa_threads[i],&thread_attr,
+			  (void *(*)(void *))&work_thread,&work_info[i]);
+    check(status,"Pthread_create failed\n");
+  }
+}
+
+/* start_mutex/start_cont_var provides exclusive access to 
+   extern int start_thread */
+
+void start_thr()
+{
+  int status;
+
+  /* tell threads to proceed */
+
+  status = pthread_mutex_lock(&start_mutex);
+  check(status,"Start_mutex lock bad status in main\n");
+
+  start_thread = 0;  /* lower predicate */
+
+  status = pthread_cond_broadcast(&start_cond_var);
+  status = pthread_mutex_unlock(&start_mutex);
+  check(status,"Start_mutex unlock bad status in main\n");
+}
+
+/* get_rbuf() provides buffers containing sequences to the main program 
+   initially, max_work_buf buffers are allocated and are available.
+   As the main program runs, it calls get_rbuf() to get a reader
+   buffer, fills it with sequences, and puts it on the queue with
+   put_rbuf().
+
+   At the same time, the worker programs call get_wbuf(), which gets a
+   filled buffer put on the queue by put_rbuf(), takes the sequences
+   from the buffer and does the comparisons, and puts the results back
+   in that buffer, finally calling put_wbuf().
+
+   locks on reader_mutex
+   increments reader_buf_readp
+*/
+
+/* wait until all reader bufs are available */
+void get_rbuf(struct buf_head **cur_buf, int max_work_buf)
+{
+  int status;
+
+  status = pthread_mutex_lock(&reader_mutex);  /* lock reader_buf structure */
+
+  reader_wait = 0;
+
+  check(status,"Reader_mutex lock in master bad status\n");
+
+  /* no reader bufs:  wait for signal to proceed */
+  while (num_reader_bufs == 0) {
+    pthread_cond_wait(&reader_cond_var,&reader_mutex);
+  }
+
+  *cur_buf = reader_buf[reader_buf_readp];  /* get the buffer address */
+  reader_buf_readp = (reader_buf_readp+1)%(max_work_buf);  /* increment index */
+  num_reader_bufs--;
+
+  /* fprintf(stderr, " rb: %3d consumed by %lld\n",num_reader_bufs,pthread_self()); */
+
+  status = pthread_mutex_unlock(&reader_mutex);  /* unlock structure */
+  check(status,"Reader_mutex unlock in master bad status\n");
+}
+
+/* put_rbuf() takes a buffer filled with sequences to be compared and
+   puts it in the queue.
+
+   locks on worker_mutex
+   increments worker_buf_readp;
+ */
+
+void put_rbuf(struct buf_head *cur_buf, int max_work_buf)
+{
+  int status;
+
+  /* give the buffer to a thread, and wait for more */
+  status = pthread_mutex_lock(&worker_mutex);  /* lock worker_buf_structure */
+  check(status,"Worker_mutex lock in master bad status\n");
+
+  /*  Put buffer onto available for workers list */
+  worker_buf[worker_buf_readp] = cur_buf;
+  worker_buf_readp = (worker_buf_readp+1)%(max_work_buf);
+  num_worker_bufs++;   /* increment number of buffers available to workers */
+
+  /*  Signal one worker to wake and start work */
+  status = pthread_cond_signal(&worker_cond_var);
+
+  status = pthread_mutex_unlock(&worker_mutex);
+  check(status,"Worker_mutex unlock in master bad status\n"); 
+}
+
+/* this function is not currently used */
+void put_rbuf_done(int nthreads, struct buf_head *cur_buf, int max_work_buf)
+{
+  int status, i;
+  void *exit_value;
+
+  /* give the buffer to a thread, and wait for more */
+  status = pthread_mutex_lock(&worker_mutex);  /* lock worker_buf_structure */
+  check(status,"Worker_mutex lock in master bad status\n");
+
+  /*  Put buffer onto available for workers list */
+  worker_buf[worker_buf_readp] = cur_buf;
+  worker_buf_readp = (worker_buf_readp+1)%(max_work_buf);
+  num_worker_bufs++;   /* increment number of buffers available to workers */
+
+  /*  Signal one worker to wake and start work */
+
+  reader_done = 1;	/* this causes the next get_wbuf() in the
+			   thread to return 0 which causes the thread
+			   to exit the main while() loop and quit */
+
+  status = pthread_cond_broadcast(&worker_cond_var);
+
+  status = pthread_mutex_unlock(&worker_mutex);
+  check(status,"Worker_mutex unlock in master bad status\n"); 
+
+  /* wait for all buffers available (means all do_workers are done) */
+ 
+  for (i=0; i < nthreads; i++) {
+    status = pthread_join( fa_threads[i], &exit_value);
+    check(status,"Pthread_join bad status\n");
+  } 
+}
+
+/* wait_rbuf() -- wait for the worker threads to finish with the
+   current sequence buffers.
+*/
+void wait_rbuf(int used_reader_bufs) {
+  int status;
+
+  /* get a buffer to work on */
+  status = pthread_mutex_lock(&reader_mutex);
+  check(status,"wait_rbuf reader_mutex lock in worker bad status\n");
+
+  reader_wait = 1;
+
+  /*  worker_bufs still available:  wait for worker to consume them */
+  while (num_reader_bufs < used_reader_bufs) {
+    /* wait for a signal that no more worker_bufs are available */
+    pthread_cond_wait(&reader_cond_var,&reader_mutex);
+  }
+
+  status = pthread_mutex_unlock(&reader_mutex);
+  check(status,"wait_rbuf reader_mutex unlock in worker bad status\n");
+}
+
+/*
+  Called once at the end of each search.  
+ */
+
+void rbuf_done(int nthreads)
+{
+  int status, i;
+  void *exit_value;
+
+  /* give the buffer to a thread, and wait for more */
+  status = pthread_mutex_lock(&worker_mutex);  /* lock worker_buf_structure */
+  check(status,"Worker_mutex lock in master bad status\n");
+
+  /*  Signal one worker to wake and start work */
+
+  reader_done = 1;	/* this causes the next get_wbuf() in the
+			   thread to return 0 which causes the thread
+			   to exit the main while() loop and quit */
+
+  status = pthread_cond_broadcast(&worker_cond_var);
+
+  status = pthread_mutex_unlock(&worker_mutex);
+  check(status,"Worker_mutex unlock in master bad status\n"); 
+
+  /* wait for all buffers available (means all do_workers are done) */
+ 
+  for (i=0; i < nthreads; i++) {
+    status = pthread_join( fa_threads[i], &exit_value);
+    check(status,"Pthread_join bad status\n");
+  } 
+}
+
+/* wait for extern int start_thread == 0 */
+
+void wait_thr()
+{
+  int status;
+
+  /* Wait on master to give start signal */
+  status = pthread_mutex_lock(&start_mutex);
+  check(status,"Start_mutex lock bad status in worker\n");
+
+  while (start_thread) {
+         status = pthread_cond_wait(&start_cond_var, &start_mutex);
+         check(status,"Start_cond_wait bad status in worker\n");
+  }
+
+  status = pthread_mutex_unlock(&start_mutex);
+  check(status,"Start_mutex unlock bad status in worker\n");
+}
+
+/* get_wbuf() -- used in worker threads
+   get a buffer full of sequences to be compared from the main program
+
+   locks on worker_mutex
+   increments worker_buf_workp
+ */
+
+int get_wbuf(struct buf_head **cur_buf, int max_work_buf)
+{
+  int status;
+
+  /* get a buffer to work on */
+  status = pthread_mutex_lock(&worker_mutex);
+  check(status,"First worker_mutex lock in worker bad status\n");
+
+  /*  No worker_bufs available:  wait for reader to produce some */
+  while (num_worker_bufs == 0) {
+    /*  Exit if reader has finished */
+    if (reader_done) {
+      pthread_mutex_unlock(&worker_mutex);
+      return 0;
+    }
+    pthread_cond_wait(&worker_cond_var,&worker_mutex);
+  } /* end while */
+
+  /*  Get the buffer from list */
+  *cur_buf = worker_buf[worker_buf_workp];
+  worker_buf_workp = (worker_buf_workp+1)%(max_work_buf);
+  num_worker_bufs--;
+
+  status = pthread_mutex_unlock(&worker_mutex);
+  check(status,"First worker_mutex unlock in worker bad status\n");
+  return 1;
+}
+
+/* put_wbuf() -- called in worker threads
+   return a filled buffer of scores to the main program
+
+   locks on reader_mutex
+   increments reader_buf_workp
+ */
+void put_wbuf(struct buf_head *cur_buf, int max_work_buf)
+{
+  int status;
+
+  /* put buffer back on list for reader */
+  status = pthread_mutex_lock(&reader_mutex);
+  check(status,"Reader_mutex lock in worker bad status\n");
+    
+  reader_buf[reader_buf_workp] = cur_buf;
+  reader_buf_workp = (reader_buf_workp+1)%(max_work_buf);
+  num_reader_bufs++;
+
+  /*   fprintf(stderr, " rb: %3d produced\n",num_reader_bufs); */
+
+  /* we used to do this only when num_reader_bufs==1 */
+  if (num_reader_bufs == 1 || reader_wait == 1) {
+    pthread_cond_signal(&reader_cond_var);
+  }
+
+  status = pthread_mutex_unlock(&reader_mutex);
+  check(status,"Reader_mutex unlock in worker bad status\n");
+}
diff --git a/src/randtest.c b/src/randtest.c
new file mode 100644
index 0000000..2c7cf7e
--- /dev/null
+++ b/src/randtest.c
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+void *my_srand(int);
+
+main(argc, argv)
+     int argc; char **argv;
+{
+  int i, n, s;
+  struct timeval t;
+  void *my_rand_state;
+
+  if (argc < 2) n = 10;
+  else n = atoi(argv[1]);
+
+  /*
+    gettimeofday(&t,NULL);
+    printf(" seed: %d\n",t.tv_usec);
+  */  
+
+  my_rand_state = my_srand(0);
+
+  for (i=0; i<n; i++) {
+    s = my_nrand(n,my_rand_state);
+    printf("%d\n",s);
+  }
+
+  /*
+  for (i=0; i< 9999; i++) {
+
+  }
+  n = my_nrand(2147483648,my_rand_state);
+  printf("number 10000: %d\n",n);
+  n = my_nrand(2147483648,my_rand_state);
+  printf("number 10001: %d\n",n);
+  */
+}
diff --git a/src/re_getlib.c b/src/re_getlib.c
new file mode 100644
index 0000000..a25e350
--- /dev/null
+++ b/src/re_getlib.c
@@ -0,0 +1,146 @@
+/* re_getlib.c - re-acquire a sequence given lseek, lcont */
+
+/* $Id: re_getlib.c 1227 2013-09-26 19:19:28Z wrp $  */
+
+/* copyright (C) 2005, 2008, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+#include "structs.h"
+#include "mm_file.h"
+#define XTERNAL
+#include "uascii.h"
+
+#define GETLIB (m_fptr->getlib)
+
+/* modified Feb, 2008 to provide aa1a - annotation string */
+extern int ann_scan(unsigned char *aa0, int n0, unsigned char **aa0a_p, int seqtype);
+
+int
+re_getlib(unsigned char *aa1,
+	  struct annot_str **annot_p,
+	  int maxn,	/* longest aa1 */
+	  int maxt3,	/* alternate maxn */
+	  int loff,	/* overlap */
+	  int lcont,
+	  int term_code,
+	  long *loffset,	/* offset from real start of sequence */
+	  long *l_off_p,	/* coordinate of sequence start */
+	  struct lmf_str *m_fptr) {
+
+  unsigned char *aa1ptr;
+  int *sascii_save;
+  int icont, maxt, ccont, n1;
+  char libstr[MAX_UID];
+  fseek_t lmark; 
+  int sstart, sstop, is, id;
+  
+  aa1ptr = aa1;
+  icont=0;
+
+  /* no longer do selection */
+  m_fptr->sel_acc_p = NULL;
+
+  *loffset = 0l;
+  maxt = maxn;
+  n1 = -1;
+
+  /* to process sequences in pieces properly, if lcont > 0, then we
+     must read all but the last sequence using the scanning sascii,
+     and then read the last piece using the ann_ascii */
+
+  if (lcont > 1) {
+    for (ccont=0; ccont<lcont-1; ccont++) {
+
+      n1= GETLIB(aa1ptr,maxt,libstr,sizeof(libstr),&lmark,&icont,m_fptr,l_off_p);
+
+      if (term_code && m_fptr->lib_aa && aa1ptr[n1-1]!=term_code) {
+	aa1ptr[n1++]=term_code;
+	aa1ptr[n1]=0;
+      }
+
+      if (aa1ptr!=aa1) n1 += loff;
+
+      if (icont) {
+	maxt = maxt3;
+	memcpy(aa1,&aa1[n1-loff],loff);
+	aa1ptr= &aa1[loff];
+	*loffset += n1 - loff;
+      }
+      else {
+	maxt = maxn;
+	aa1ptr=aa1;
+      }
+    }
+  }
+
+  /* for the last one, replace m_fptr->sascii with ann_ascii[], and
+     read the sequence */
+
+  /* change sascii matrix only if there are annotations - otherwise
+     l_ann_ascii is not initialized */
+  /* cannot scan for annotations in memory mapped files */
+  if (annot_p != NULL && !m_fptr->mm_flg) {
+    if (*annot_p || (*annot_p = (struct annot_str *)calloc(1,sizeof(struct annot_str)))!=NULL) {
+      sascii_save = m_fptr->sascii;
+      m_fptr->sascii = l_ann_ascii;
+      n1= GETLIB(aa1ptr,maxt,libstr,sizeof(libstr),&lmark,&icont,m_fptr,l_off_p);
+      m_fptr->sascii = sascii_save;
+      n1 = ann_scan(aa1ptr,n1,&((*annot_p)->aa1_ann),0);
+    }
+    else {
+      fprintf(stderr,"re_getlib.c: cannot allocate annot_p\n");
+      n1= GETLIB(aa1ptr,maxt,libstr,sizeof(libstr),&lmark,&icont,m_fptr,l_off_p);
+    }
+  }
+  else {
+    n1= GETLIB(aa1ptr,maxt,libstr,sizeof(libstr),&lmark,&icont,m_fptr,l_off_p);
+  }    
+
+  if (term_code && m_fptr->lib_aa && aa1ptr[n1-1]!=term_code) {
+    aa1ptr[n1++]=term_code;
+    aa1ptr[n1]=0;
+  }
+
+    /* check for subset */
+    if (m_fptr->opt_text[0]!='\0') {
+      if (m_fptr->opt_text[0]=='-') {
+	sstart=0; sscanf(&m_fptr->opt_text[1],"%d",&sstop);
+      }
+      else {
+	sstart = 0; sstop = -1;
+	sscanf(&m_fptr->opt_text[0],"%d-%d",&sstart,&sstop);
+	sstart--;
+	if (sstop <= 0 ) sstop = BIGNUM;
+      }
+
+      n1 = min(n1, sstop);
+      for (id=0,is=sstart; is<n1; ) {
+	aa1ptr[id++]=aa1ptr[is++];
+      }
+      aa1ptr[id]='\0';
+      n1 -= sstart;
+      *l_off_p += sstart;
+    }
+
+  if (aa1ptr!=aa1) n1 += loff;
+
+  return n1;
+}
diff --git a/src/res_stats.c b/src/res_stats.c
new file mode 100644
index 0000000..95271e5
--- /dev/null
+++ b/src/res_stats.c
@@ -0,0 +1,703 @@
+/* $Id: res_stats.c 1227 2013-09-26 19:19:28Z wrp $  */
+
+/* copyright (C) 2005, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* calculate stats from results file using scalesws.c */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <limits.h>
+#include <math.h>
+
+#define MAX_LLEN 200
+
+#define LN_FACT 10.0
+
+#include "defs.h"
+#include "structs.h"
+#include "param.h"
+
+struct beststr {
+  int score;	/* smith-waterman score */
+  int sscore;	/* duplicate for compatibility with fasta */
+  double comp;
+  double H;
+  double zscore;
+  double escore;
+  int n1;
+#ifndef USE_FTELLO
+  long lseek;	/* position in library file */
+#else
+  off_t lseek;
+#endif
+  int cont;	/* offset into sequence */
+  int frame;
+  int lib;
+  char libstr[13];
+} *bbp, *bestptr, **bptr, *best;
+
+struct stat_str {
+  int score;
+  int n1;
+  double comp;
+  double H;
+};
+
+static struct db_str qtt = {0l, 0l, 0};
+
+char info_gstring2[MAX_STR];                  /* string for label */
+char info_gstring3[MAX_STR];
+char info_hstring1[MAX_STR];
+
+FILE *outfd;
+
+int nbest;	/* number of sequences better than bestcut in best */
+int bestcut=1; 	/* cut off for getting into MAX_BEST */
+int bestfull;
+
+int dohist = 0;
+int zsflag = 1;
+int outtty=1;
+int llen=40;
+
+/* statistics functions */
+extern void
+process_hist(struct stat_str *sptr, int nstat, struct pstruct pst,
+	     struct hist_str *hist, void **);
+extern void addhistz(double, struct hist_str *); /* scaleswn.c */
+void selectbestz(struct beststr **, int, int );
+
+extern double zs_to_E(double, int, int, long, struct db_str);
+extern double zs_to_Ec(double zs, long entries);
+
+extern double find_z(int score, int length, double comp, void *);
+
+void prhist(FILE *, struct mngmsg, struct pstruct, struct hist_str, 
+	    int, struct db_str, char *);
+
+int nshow=20, mshow=50, ashow= -1;
+double e_cut=10.0;
+
+main(argc, argv)
+     int argc; char **argv;
+{
+  FILE *fin;
+  char line[512];
+  int max, icol, iarg, i, qsfnum, lsfnum, n0, n1, s[3], frame;
+  double comp, H;
+  int idup, ndup, max_s;
+  char libstr[MAX_UID], *bp;
+  char bin_file[80];
+  FILE *bout=NULL;
+  struct mngmsg m_msg;		/* Message from host to manager */
+  struct pstruct pst;
+  struct stat_str *stats;
+  int nstats;
+  double zscor, mu, var;
+
+#if defined(UNIX)
+  outtty = isatty(1);
+#else
+  outtty = 1;
+#endif
+
+  if (argc < 2 ) {
+    fprintf(stderr," useage - res_stats -c col -r bin_file file\n");
+    exit(1);
+  }
+
+  m_msg.db.length = qtt.length = 0l;
+  m_msg.db.entries = m_msg.db.carry = qtt.entries = qtt.carry = 0;
+  m_msg.pstat_void = NULL;
+  m_msg.hist.hist_a = NULL;
+  m_msg.nohist = 0;
+  m_msg.markx = 0;
+
+  pst.n0 = 200;		/* sensible dummy value */
+  pst.zsflag = 1;
+  pst.dnaseq = 0;
+  pst.histint = 2;
+
+  bin_file[0]='\0';
+  icol = 1;
+  iarg = 1;
+  ndup = 1;
+  while (1) {
+    if (argv[iarg][0]=='-' && argv[iarg][1]=='c') {
+      sscanf(argv[iarg+1],"%d",&icol);
+      iarg += 2;
+    }
+    else if (argv[iarg][0]=='-' && argv[iarg][1]=='r') {
+      strncpy(bin_file,argv[iarg+1],sizeof(bin_file));
+      iarg += 2;
+    }
+    else if (argv[iarg][0]=='-' && argv[iarg][1]=='z') {
+      sscanf(argv[iarg+1],"%d",&pst.zsflag);
+      iarg += 2;
+    }
+    else if (argv[iarg][0]=='-' && argv[iarg][1]=='n') {
+      pst.dnaseq = 1;
+      iarg += 1;
+    }
+    else if (argv[iarg][0]=='-' && argv[iarg][1]=='s') {
+      sscanf(argv[iarg+1],"%d",&ndup);
+      iarg += 2;
+    }
+    else if (argv[iarg][0]=='-' && argv[iarg][1]=='q') {
+      outtty = 0;
+      iarg += 1;
+    }
+    else break;
+  }
+
+  icol--;
+
+  if ((fin=fopen(argv[iarg],"r"))==NULL) {
+    fprintf(stderr," cannot open %s\n",argv[1]);
+    exit(1);
+  }
+
+  if (bin_file[0]!='\0' && ((bout=fopen(bin_file,"w"))==NULL)) {
+    fprintf(stderr,"cannot open %s for output\n",bin_file);
+  }
+
+  if ((stats =
+       (struct stat_str *)malloc((MAX_STATS)*sizeof(struct stat_str)))==NULL)
+    s_abort ("Cannot allocate stats struct","");
+  nstats = 0;
+
+  initbest(MAX_BEST+1);	/* +1 required for select() */
+
+  for (nbest=0; nbest<MAX_BEST+1; nbest++)
+    bptr[nbest] = &best[nbest];
+  bptr++; best++;
+  best[-1].score= BIGNUM;
+  
+  nbest = 0;
+
+  pst.Lambda=0.232;
+  pst.K = 0.11;
+  pst.H = 0.34;
+
+  /* read the best scores from the results file */
+
+  max_s = -1;
+  idup = 0;
+
+  /* get first line with sequence length */
+  fgets(line,sizeof(line),fin);
+  sscanf(line,"%d",&n0);
+  if (n0 > 0) pst.n0 = n0;
+
+  while (fgets(line,sizeof(line),fin)!=NULL) {
+    if (line[0]=='/' && line[1]=='*') {
+      fputs(line,stdout);
+      strncpy(info_gstring2,line,sizeof(info_gstring2));
+      if ((bp=strchr(info_gstring2,'\n'))!=NULL) *bp = '\0';
+      break;
+    }
+    if (line[0]==';') {
+      if ((bp=strchr(line,'|'))!=NULL) qsfnum = atoi(bp+1);
+      else continue;
+      if ((bp=strchr(line,'('))!=NULL) {
+	n0 = atoi(bp+1);
+	pst.n0 = n0;
+      }
+      else {
+	fprintf(stderr, "cannot find n0:\n %s\n",line);
+	continue;
+      }
+    }
+    else {
+	sscanf(line,"%s %d %d %d %lf %lf %d %d %d",
+	       libstr,&lsfnum,&n1,&frame,&comp, &H, &s[0],&s[1],&s[2]);
+	if (lsfnum==0 && n1==0) {
+	  fputs(line,stderr);
+	  continue;
+	}
+	if (n1 < 10 || s[icol]<=0) fputs(line,stderr);
+	idup++;
+
+	if (s[icol] > max_s) max_s = s[icol];
+	if (idup < ndup) continue;
+
+	m_msg.db.entries++;
+	m_msg.db.length += n1;
+
+	if (dohist) addhistz(zscor=find_z(max_s,n1,comp,m_msg.pstat_void),
+			     &m_msg.hist);
+	else zscor = (double)max_s;
+
+	if (nstats < MAX_STATS) {
+	  stats[nstats].n1 = n1;
+	  stats[nstats].comp = comp;
+	  stats[nstats].H = H;
+	  stats[nstats++].score = max_s;
+	}
+
+	else if (!dohist) {
+	  /*	  do_bout(bout,stats,nstats); */
+	  process_hist(stats,nstats,pst,&m_msg.hist, &m_msg.pstat_void);
+	  for (i=0; i<nbest; i++)
+	    bptr[i]->zscore = 
+	      find_z(bptr[i]->score,bptr[i]->n1,bptr[i]->comp,
+			 m_msg.pstat_void);
+	  dohist = 1;
+	}
+
+	if (dohist) {
+	  zscor =find_z(max_s,n1,comp,m_msg.pstat_void);
+	  addhistz(zscor,&m_msg.hist);
+	}
+	else zscor = (double)max_s;
+
+	if (nbest >= MAX_BEST) {
+	  bestfull = nbest-MAX_BEST/4;
+	  selectz(bestfull-1,nbest);
+	  bestcut = (int)(bptr[bestfull-1]->zscore+0.5);
+	  nbest = bestfull;
+	}
+	bestptr = bptr[nbest];
+	bestptr->score = max_s;
+	bestptr->sscore = max_s;
+	bestptr->n1 = n1;
+	bestptr->comp = comp;
+	bestptr->H = H;
+	bestptr->lib = lsfnum;
+	bestptr->zscore = zscor;
+	strncpy(bestptr->libstr,libstr,12);
+	bestptr->libstr[12]='\0';
+	nbest++;
+
+	max_s = -1;
+	idup = 0;
+    }
+  }	/* done with reading results */
+
+  if (!dohist) {
+    if (nbest < 20) {
+      zsflag = 0;
+    }
+    else {
+      /*      do_bout(bout,stats,nstats); */
+      process_hist(stats,nstats,pst,&m_msg.hist,&m_msg.pstat_void);
+      for (i=0; i<nbest; i++)
+	bptr[i]->zscore = 
+	  find_z(bptr[i]->score,bptr[i]->n1,bptr[i]->comp,m_msg.pstat_void);
+      dohist = 1;
+    }
+  }
+  
+  printf(" using n0: %d\n",pst.n0);
+
+  /* print histogram, statistics */
+
+  m_msg.nbr_seq = m_msg.db.entries;
+  pst.zdb_size = m_msg.db.entries;
+  /* get_param(&pst, info_gstring2,info_gstring3); */
+
+  prhist(stdout,m_msg,pst,m_msg.hist,nstats,m_msg.db,info_gstring2);
+
+  if (!zsflag) sortbest();
+  else {
+    sortbestz(bptr,nbest);
+    for (i=0; i<nbest; i++)
+      bptr[i]->escore = zs_to_E(bptr[i]->zscore,bptr[i]->n1,pst.dnaseq,
+				pst.zdb_size, m_msg.db);
+  }
+  
+  outfd = stdout;
+  showbest(m_msg.db);	/* display best matches */
+}
+
+initbest(nbest)		/* allocate arrays for best sort */
+     int nbest;
+{
+
+  if ((best=(struct beststr *)calloc((size_t)nbest,sizeof(struct beststr)))
+      == NULL) {fprintf(stderr,"cannot allocate best struct\n"); exit(1);}
+  if ((bptr=(struct beststr **)calloc((size_t)nbest,sizeof(struct beststr *)))
+      == NULL) {fprintf(stderr,"cannot allocate bptr\n"); exit(1);}
+}
+
+void
+prhist(FILE *fd, struct mngmsg m_msg,
+       struct pstruct pst, 
+       struct hist_str hist, 
+       int nstats,
+       struct db_str ntt,
+       char *info_gstring2)
+{
+  int i,j,hl,hll, el, ell, ev;
+  char hline[80], pch, *bp;
+  int mh1, mht;
+  int maxval, maxvalt, dotsiz, ddotsiz,doinset;
+  double cur_e, prev_e, f_int;
+  double max_dev, x_tmp;
+  double db_tt;
+  int n_chi_sq, cum_hl, max_i;
+
+
+  fprintf(fd,"\n");
+  
+  if (pst.zsflag < 0 || nstats <= 10) {
+    fprintf(fd, "%7ld residues in %5ld sequences\n", ntt.length,ntt.entries);
+    fprintf(fd,"\n%s\n",info_gstring2);
+    return;
+  }
+
+  max_dev = 0.0;
+  mh1 = hist.maxh-1;
+  mht = (3*hist.maxh-3)/4 - 1;
+
+  if (!m_msg.nohist && mh1 > 0) {
+    for (i=0,maxval=0,maxvalt=0; i<hist.maxh; i++) {
+      if (hist.hist_a[i] > maxval) maxval = hist.hist_a[i];
+      if (i >= mht &&  hist.hist_a[i]>maxvalt) maxvalt = hist.hist_a[i];
+    }
+    n_chi_sq = 0;
+    cum_hl = -hist.hist_a[0];
+    dotsiz = (maxval-1)/60+1;
+    ddotsiz = (maxvalt-1)/50+1;
+    doinset = (ddotsiz < dotsiz && dotsiz > 2);
+
+    if (pst.zsflag>=0)
+      fprintf(fd,"       opt      E()\n");
+    else 
+      fprintf(fd,"     opt\n");
+
+    prev_e =  zs_to_Ec((double)(hist.min_hist-hist.histint/2),hist.entries);
+    for (i=0; i<=mh1; i++) {
+      pch = (i==mh1) ? '>' : ' ';
+      pch = (i==0) ? '<' : pch;
+      hll = hl = hist.hist_a[i];
+      if (pst.zsflag>=0) {
+	cum_hl += hl;
+	f_int = (double)(i*hist.histint+hist.min_hist)+(double)hist.histint/2.0;
+	cur_e = (double)zs_to_Ec(f_int,hist.entries);
+	ev = el = ell = (int)(cur_e - prev_e + 0.5);
+	if (hl > 0  && i > 5 && i < (90-hist.min_hist)/hist.histint) {
+	  x_tmp  = fabs(cum_hl - cur_e);
+	  if ( x_tmp > max_dev) {
+	    max_dev = x_tmp;
+	    max_i = i;
+	  }
+	  n_chi_sq++;
+	}
+	if ((el=(el+dotsiz-1)/dotsiz) > 60) el = 60;
+	if ((ell=(ell+ddotsiz-1)/ddotsiz) > 40) ell = 40;
+	fprintf(fd,"%c%3d %5d %5d:",
+		pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
+		mh1*hist.histint+hist.min_hist,hl,ev);
+      }
+      else fprintf(fd,"%c%3d %5d :",
+		   pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
+		   mh1*hist.histint+hist.min_hist,hl);
+
+      if ((hl=(hl+dotsiz-1)/dotsiz) > 60) hl = 60;
+      if ((hll=(hll+ddotsiz-1)/ddotsiz) > 40) hll = 40;
+      for (j=0; j<hl; j++) hline[j]='='; 
+      if (pst.zsflag>=0) {
+	if (el <= hl ) {
+	  if (el > 0) hline[el-1]='*';
+	  hline[hl]='\0';
+	}
+	else {
+	  for (j = hl; j < el; j++) hline[j]=' ';
+	  hline[el-1]='*';
+	  hline[hl=el]='\0';
+	}
+      }
+      else hline[hl] = 0;
+      if (i==1) {
+	for (j=hl; j<10; j++) hline[j]=' ';
+	sprintf(&hline[10]," one = represents %d library sequences",dotsiz);
+      }
+      if (doinset && i == mht-2) {
+	for (j = hl; j < 10; j++) hline[j]=' ';
+	sprintf(&hline[10]," inset = represents %d library sequences",ddotsiz);
+      }
+      if (i >= mht&& doinset ) {
+	for (j = hl; j < 10; j++) hline[j]=' ';
+	hline[10]=':';
+	for (j = 11; j<11+hll; j++) hline[j]='=';
+	hline[11+hll]='\0';
+	if (pst.zsflag>=0) {
+	  if (ell <= hll) hline[10+ell]='*';
+	  else {
+	    for (j = 11+hll; j < 10+ell; j++) hline[j]=' ';
+	    hline[10+ell] = '*';
+	    hline[11+ell] = '\0';
+	  }
+	}
+      }
+
+      fprintf(fd,"%s\n",hline);
+      prev_e = cur_e;
+    }
+  }
+
+  if (ntt.carry==0) {
+    fprintf(fd, "%7ld residues in %5ld sequences\n", ntt.length, ntt.entries);
+  }
+  else {
+    db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
+    fprintf(fd, "%.0f residues in %5ld library sequences\n", db_tt, ntt.entries);
+  }
+
+  if (pst.zsflag>=0) {
+    if (MAX_STATS < hist.entries)
+      fprintf(fd," statistics extrapolated from %d to %ld sequences\n",
+	      MAX_STATS,hist.entries);
+    /*    summ_stats(stat_info); */
+    fprintf(fd," %s\n",hist.stat_info);
+    if (!m_msg.nohist && cum_hl > 0)
+      fprintf(fd," Kolmogorov-Smirnov  statistic: %6.4f (N=%d) at %3d\n",
+	      max_dev/(double)cum_hl, n_chi_sq,max_i*hist.histint+hist.min_hist);
+    if (m_msg.markx & MX_M10FORM) {
+      while ((bp=strchr(hist.stat_info,'\n'))!=NULL) *bp=' ';
+      if (cum_hl <= 0) cum_hl = -1;
+      sprintf(info_hstring1,"; mp_extrap: %d %ld\n; mp_stats: %s\n; mp_KS: %6.4f (N=%d) at %3d\n",
+	      MAX_STATS,hist.entries,hist.stat_info,max_dev/(double)cum_hl, n_chi_sq,max_i*hist.histint+hist.min_hist);
+    }
+  }
+  fprintf(fd,"\n%s\n",info_gstring2);
+  fflush(fd);
+}
+
+showbest(struct db_str ntt)
+  {
+    int ib, istart, istop;
+    char bline[200], fmt[40], pad[200];
+    char rline[20];
+    int ntmp;
+    int lcont, ccont, loff;
+    int hcutoff;
+
+    sprintf(fmt,"%%-%ds (%%3d)",llen-10);
+
+    nshow = min(20,nbest);
+    mshow = min(20,nbest);
+
+    if (outtty) {
+      printf(" How many scores would you like to see? [%d] ",nshow);
+      fflush(stdout);
+      if (fgets(rline,sizeof(rline),stdin)==NULL) exit(0);
+      if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&nshow);
+      if (nshow<=0) nshow = min(20,nbest);
+    }
+    else nshow=mshow;
+
+    memset(pad,' ',llen-10);
+    pad[llen-31]='\0';
+    if (zsflag)
+      fprintf(outfd,"The best scores are:%s s-w Z-score E(%ld)\n",pad,ntt.entries);
+    else
+      fprintf(outfd,"The best scores are:%s s-w\n",pad);
+
+    if (outfd != stdout)
+      if (zsflag)
+	fprintf(stdout,"The best scores are:%s s-w Z-score E(%ld)\n",pad,ntt.entries);
+      else
+	fprintf(stdout,"The best scores are:%s s-w\n",pad);
+
+    istart = 0;
+  l1:	istop = min(nbest,nshow);
+  for (ib=istart; ib<istop; ib++) {
+    bbp = bptr[ib];
+
+    if (!outtty && zsflag && bbp->escore > e_cut) {
+      nshow = ib;
+      goto done;
+    }
+
+    sprintf(bline,"%-12s %d",bbp->libstr,bbp->lib);
+    bline[13]='\0';
+
+    fprintf(outfd,fmt,bline,bbp->n1);
+
+    if (zsflag)
+      fprintf(outfd,"%4d %4.1f %6.2g\n",
+	      bbp->score,bbp->zscore,
+	      bbp->escore);
+    else 
+      fprintf(outfd,"%4d\n",bbp->score);
+
+    if (outfd!=stdout) {
+      fprintf(stdout,fmt,bline,bbp->n1);
+      if (zsflag)
+	printf("%4d %4.1f %6.2g\n",
+	       bbp->score,bbp->zscore,
+	       bbp->escore);
+      else 
+	printf("%4d\n",bbp->score);
+    }
+  }
+
+  fflush(outfd); if (outfd!=stdout) fflush(stdout);
+
+  if (outtty) {
+    printf(" More scores? [0] ");
+    fflush(stdout);
+    if (fgets(rline,sizeof(rline),stdin)==NULL) exit(0);
+    ntmp = 0;
+    if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&ntmp);
+    if (ntmp<=0) ntmp = 0;
+    if (ntmp>0) {
+      istart = istop;
+      nshow += ntmp;
+      mshow += ntmp;
+      goto l1;
+    }
+  }
+  else if (zsflag && bbp->escore < e_cut) {
+    istart=istop;
+    nshow += 10;
+    goto l1;
+  }
+
+  done:
+  if (outfd!=stdout) fprintf(outfd,"\n");
+}
+
+selectz(k,n)	/* k is rank in array */
+     int k,n;
+{
+  int t, i, j, l, r;
+  double v;
+  struct beststr *tmptr;
+
+  l=0; r=n-1;
+
+  while ( r > l ) {
+    i = l-1;
+    j = r;
+    v = bptr[r]->zscore;
+    do {
+      while (bptr[++i]->zscore > v ) ;
+      while (bptr[--j]->zscore < v ) ;
+      tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
+    } while (j > i);
+    bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
+    if (i>=k) r = i-1;
+    if (i<=k) l = i+1;
+  }
+}
+
+sortbest()
+{
+  int cmps(), cmp1(), cmpa(), cmpz();
+  ksort(bptr,nbest,cmps);
+}
+
+sortbeste()
+{
+  int cmpe();
+  ksort(bptr,nbest,cmpe);
+}
+
+sortbestz()
+{
+  int cmpz();
+  ksort(bptr,nbest,cmpz);
+}
+
+cmps(ptr1,ptr2)
+     struct beststr *ptr1, *ptr2;
+{
+  if (ptr1->score < ptr2->score) return (1);
+  else if (ptr1->score > ptr2->score) return (-1);
+  else return (0);
+}
+
+cmpe(ptr1,ptr2)
+     struct beststr *ptr1, *ptr2;
+{
+  if (ptr1->escore < ptr2->escore) return (-1);
+  else if (ptr1->escore > ptr2->escore) return (1);
+  else return (0);
+}
+
+cmpz(ptr1,ptr2)
+     struct beststr *ptr1, *ptr2;
+{
+  if (ptr1->zscore < ptr2->zscore) return (1);
+  else if (ptr1->zscore > ptr2->zscore) return (-1);
+  else return (0);
+}
+
+ksort(v,n,comp)
+     char *v[]; int n, (*comp)();
+{
+  int gap, i, j;
+  char *tmp;
+	
+  for (gap=n/2; gap>0; gap/=2)
+    for (i=gap; i<n; i++)
+      for (j=i-gap; j>=0; j -= gap) {
+	if ((*comp)(v[j],v[j+gap]) <=0)
+	  break;
+	tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp;
+      }
+}
+
+/*
+do_bout(FILE *bout,struct stat_str **bptr, int nbest)
+{
+  int i, min_hist, max_hist;
+  double mu, var;
+
+  if (bout==NULL) return;
+
+  inithist();
+  for (i = 0; i<nbest; i++)
+    addhist(bptr[i]->score,bptr[i]->n1);
+
+  for (i=0; i<MAX_LLEN; i++)
+    if (llen_hist[i]>0) {
+      min_hist=i;
+      break;
+    }
+
+  for (i=MAX_LLEN-1; i>=0; i--)
+    if (llen_hist[i]>0) {
+      max_hist=i;
+      break;
+    }
+
+  for (i=min_hist; i<=max_hist; i++) {
+    mu=(double)score_sums[i]/(double)llen_hist[i];
+    if (llen_hist[i]>1) {
+      var = ((double)score2_sums[i]-(double)llen_hist[i]*mu*mu)/
+	(double)(llen_hist[i]-1);
+
+      fprintf(bout,"%d\t%d\t%.1f\t%.1f\t%.1f\t%.4f\t%.4f\n",
+	      i,llen_hist[i],exp(((double)(i))/LN_FACT),
+	      score_sums[i],score2_sums[i],mu,var);
+    }
+  }
+  free_hist();
+  fclose(bout);
+}
+*/
+
+s_abort()
+{
+  exit(1);
+}
diff --git a/src/rstruct.h b/src/rstruct.h
new file mode 100644
index 0000000..94a9814
--- /dev/null
+++ b/src/rstruct.h
@@ -0,0 +1,16 @@
+/* $Id: rstruct.h 625 2011-03-23 17:21:38Z wrp $ */
+
+#ifndef RSTRUCT
+#define RSTRUCT
+struct rstruct
+{
+  int score[3];
+  int valid_stat;
+  int alg_info;
+  double comp;
+  double H;
+  double escore;
+  int segnum;
+  int seglen;
+};
+#endif
diff --git a/src/sc_to_e.c b/src/sc_to_e.c
new file mode 100644
index 0000000..5df14e8
--- /dev/null
+++ b/src/sc_to_e.c
@@ -0,0 +1,71 @@
+/* $Id: sc_to_e.c 625 2011-03-23 17:21:38Z wrp $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and the
+   The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* sc_to_e  uses statistical parameters from search and
+	    score, length, and database size to calculate E()
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+double mean_var, mu, rho;
+
+main(argc, argv)
+     int argc; char **argv;
+{
+  char line[128];
+  int score, length, db_size;
+  double z_val, s_to_zv(), zv_to_E();
+
+  if (argc == 4) {
+    sscanf(argv[1],"%lf",&rho);
+    sscanf(argv[2],"%lf",&mu);
+    sscanf(argv[3],"%lf",&mean_var);
+  }
+  else {
+    fprintf(stderr," enter rho mu mean_var: ");
+    fgets(line,sizeof(line),stdin);
+    sscanf(line,"%lf %lf %lf",&rho, &mu, &mean_var);
+  }
+
+  while (1) {
+    fprintf(stderr," enter score length db_size: ");
+    if (fgets(line,sizeof(line),stdin)==NULL) exit(0);
+    if (line[0]=='\n') exit(0);
+    sscanf(line,"%d %d %d",&score, &length, &db_size);
+    if (db_size < 1) db_size = 50000;
+
+    z_val = s_to_zv(score, length);
+
+    printf(" s: %d (%d) E(%d): %4.2g\n",score,length,db_size,zv_to_E(z_val,db_size));
+  }
+}
+
+double s_to_zv(int score, int length)
+{
+  return ((double)score - rho * log((double)length) - mu)/sqrt(mean_var);
+}
+
+double zv_to_E(double zv, int db_size)
+{
+  double e;
+
+  e = exp(-1.282554983 * zv - .577216);
+  return (double)db_size * (e > .01 ? 1.0 - exp(-e) : e);
+}
diff --git a/src/scaleswn.c b/src/scaleswn.c
new file mode 100644
index 0000000..cae7962
--- /dev/null
+++ b/src/scaleswn.c
@@ -0,0 +1,3136 @@
+/* scaleswn.c */
+
+/* $Id: scaleswn.c 1245 2013-12-18 18:19:38Z wrp $ */
+
+/* copyright (c) 1995, 1996, 2000, 2014 by William R. Pearson and The
+   Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* as of 24 Sept, 2000 - scaleswn uses no global variables */
+
+/* Provide statistical estimates using an extreme value distribution 
+
+	This code provides multiple methods for scaling sequence
+	similarity scores to correct for length effects.
+
+	Currently, six methods are available:
+
+	ppst->zsflag = 0 - no scaling  (AVE_STATS)
+	ppst->zsflag = 1 - regression-scaled scores (REG_STATS)
+	ppst->zsflag = 2 - (revised) MLE Lambda/K scaled scores (MLE_STATS)
+	ppst->zsflag = 3 - scaling using Altschul's parameters (AG_STATS)
+	ppst->zsflag = 4 - regression-scaled with iterative outlier removal (REGI_STATS)
+	ppst->zsflag = 5 = like 1, but length scaled variance (REG2_STATS)
+	ppst->zsflag = 6 = like 2, but uses lambda composition/scale (MLE2_STATS)
+	ppst->zsflag = 11 = 10 + 1 - use random shuffles, method 1
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#include <limits.h>
+
+#include "defs.h"
+#include "param.h"
+#include "structs.h"
+#include "best_stats.h"
+
+#define MAXHIST 50
+#define MAX_LLEN 200
+#define LHISTC 5
+#define VHISTC 5
+#define MAX_SSCORE 300
+
+#define LENGTH_CUTOFF 10 /* minimum database sequence length allowed, for fitting */
+
+#define LN_FACT 10.0
+#ifndef M_LN2
+#define M_LN2 0.69314718055994530942
+#endif
+#define EULER_G 0.57721566490153286060
+#define PI_SQRT6 1.28254983016186409554
+
+#ifndef M_SQRT2
+#define M_SQRT2 1.41421356237
+#endif
+#define LN200 5.2983173666
+#define ZS_MAX 400.0	/* used to prevent underflow on some machines */
+#define TOLERANCE 1.0e-12
+#define TINY 1.0e-6
+
+/* used by AVE_STATS, REG_STATS, REGI_STATS, REG2_STATS*/
+struct rstat_str {
+  double rho, rho_e, mu, mu_e, mean_var, var_e;  /* var_e:std. error of var */
+  double mean_var_sqrt;		/* sqrt(mean_var) - used frequently */
+/* used by REG2_STATS */
+  double rho2, mu2, var_cutoff;
+  int n_trimmed; /* excluded because of high z-score */
+  int n1_trimmed, nb_trimmed, nb_tot; /* excluded because of bin */
+};
+
+/* used by AG_STATS, MLE_STATS */
+struct ag_stat_str {
+  double K, Lambda, H, a_n0f, a_n0;
+};
+
+/* used by MLE2_STATS */
+struct mle2_stat_str {
+  double a_n0;
+  double mle2_a0, mle2_a1, mle2_a2, mle2_b1;
+  double ave_comp, max_comp, ave_H;
+};
+
+struct pstat_str {
+  int zsflag;
+  double ngLambda, ngK, ngH;
+  double ave_n1;
+  double sample_fract;
+  double zs_off;	/* z-score offset from sampling */
+  union {
+    struct rstat_str rg;
+    struct ag_stat_str ag;
+    struct mle2_stat_str m2;
+  } r_u;
+};
+
+double find_z(int score, double escore, int len, double comp, struct pstat_str *);
+
+#define AVE_STATS 0	/* no length effect, only mean/variance */
+double find_zn(int score, double escore, int len, double comp, struct pstat_str *);
+
+static void st_sort (struct stat_str *v, int n);
+
+static int
+calc_thresh(struct pstruct *ppst, struct stat_str *sptr, int nstats,
+	    struct score_count_s s_info,
+	    double Lambda, double K, double H, double *zstrim);
+
+int proc_hist_n(struct stat_str *sptr, int nstats, struct score_count_s s_info,
+		struct pstruct *ppst, struct hist_str *histp, int do_trim,
+		struct pstat_str *);
+
+#define REG_STATS 1	/* length-regression scaled */
+#define REGI_STATS 4	/* length regression, iterative */
+double find_zr(int score, double escore, int len, double comp, struct pstat_str *);
+int proc_hist_r(struct stat_str *sptr,  int nstats, struct score_count_s s_info,
+		struct pstruct *ppst, struct hist_str *histp,
+		int do_trim, struct pstat_str *pu);
+
+#define MLE_STATS 2	/* MLE for lambda, K */
+double find_ze(int score, double escore, int len, double comp, struct pstat_str *);
+int proc_hist_ml(struct stat_str *sptr,  int nstats, struct score_count_s s_info,
+		 struct pstruct *ppst, struct hist_str *histp, int do_trim,
+		 struct pstat_str *);
+
+#define AG_STATS 3	/* Altschul-Gish parameters */
+double find_za(int score, double escore, int len, double comp, struct pstat_str *);
+int proc_hist_a(struct stat_str *sptr, int nstats, struct score_count_s s_info,
+		struct pstruct *ppst, struct hist_str *histp, int do_trim,
+		struct pstat_str *);
+
+#ifdef NORMAL_DIST
+int proc_hist_anorm(struct stat_str *sptr, int nstats, struct score_count_s s_info,
+		struct pstruct *ppst, struct hist_str *histp, int do_trim,
+		struct pstat_str *);
+#endif
+
+#define REG2_STATS 5	/* length regression on mean + variance */
+double find_zr2(int score, double escore, int len, double comp, struct pstat_str *);
+int proc_hist_ri(struct stat_str *sptr, int nstats, struct score_count_s s_info,
+		 struct pstruct *ppst, struct hist_str *histp, int do_trim,
+		 struct pstat_str *);
+
+#define MLE2_STATS 6	/* MLE stats using comp(lambda) */
+double find_ze2(int score, double escore, int length, double comp, struct pstat_str *);
+int proc_hist_ml2(struct stat_str *sptr, int nstats, struct score_count_s s_info,
+		  struct pstruct *ppst, struct hist_str *histp, int do_trim,
+		  struct pstat_str *);
+
+#ifdef USE_LNSTATS
+#define LN_STATS 2
+double find_zl(int score, double escore, int len, double comp, struct pstat_str *);
+int proc_hist_ln(struct stat_str *sptr, int nstats, struct score_count_s s_info,
+		 struct pstruct *ppst, struct hist_str *histp, int do_trim,
+		 struct pstat_str *);
+#endif
+
+double (*find_z_arr[7])(int score, double escore, int len, double comp, struct pstat_str *) = {
+  find_zn, 	/* AVE_STATS zsflag ==0 */
+  find_zr,	/* REG_STATS zsflag ==1 */
+  find_ze,	/* MLE_STATS, zsflag==2 */
+#ifndef NORMAL_DIST
+  find_za,	/* AG_STATS, zsflag==3 */
+#else
+  find_zn, 	/* AVE_STATS zsflag ==0 */
+#endif  
+  find_zr,	/* REGI_STATS, zsflag==4 */
+  find_zr2,	/* REG2_STATS, zsflag==5 */
+  find_ze2,	/* MLE2_STATS, zsflag==6 */
+};
+
+/* print out all pstat_str info for independent calculation */
+void
+pstat_info(char *info_str, int info_str_n, char *comment, struct pstat_str *pu);
+
+/* scaleswn.c local variables that belong in their own structure */
+
+struct llen_str {
+  int min, max;
+  int max_score, min_score;
+  int *hist;
+  double *score_sums, *score2_sums;
+  double *score_var;
+  int max_length, min_length, zero_s;
+  int fit_flag;
+};
+
+/* llen bins */
+static void inithist(struct llen_str *, struct pstruct *, int);
+static void free_hist( struct llen_str *);
+static void addhist(struct llen_str *, int, int, int);
+static void prune_hist(struct llen_str *, int, int, int, long *);
+/* final display histogram */
+void inithistz(int, struct hist_str *histp, double zs_off);
+void addhistz(double zs, struct hist_str *histp);
+void addhistzp(double zs, struct hist_str *histp);
+
+/* calculate rho (slope), mu (intercept) REG_STATS,
+   mean_var, rho2, mu2 for variance (REG2_STATS),
+   minimal iterative exclusion
+*/
+static void fit_llen(struct llen_str *, struct rstat_str *);
+/* calculate rho (slope), mu (intercept) REG_STATS,
+   mean_var, rho2, mu2 for variance (REG2_STATS),
+   
+*/
+static void fit_llen2(struct llen_str *, struct rstat_str *);
+
+/* calculate rho (slope), mu (intercept) REG_STATS,
+   mean_var, rho2, mu2 for variance (REG2_STATS) */
+static void fit_llens(struct llen_str *, struct rstat_str *);
+
+extern void sortbeste(struct beststr **bptr, int nbest);
+extern void sortbestz(struct beststr **bptr, int nbest);
+
+/* void set_db_size(int, struct db_str *, struct hist_str *); */
+
+#ifdef DEBUG
+FILE *tmpf;
+#endif
+
+int
+process_hist(struct stat_str *sptr, int nstats, 
+	     const struct mngmsg *m_msp,
+	     struct pstruct *ppst,
+	     struct hist_str *histp,
+	     struct pstat_str **ps_sp,
+	     struct score_count_s *s_info,
+	     int do_hist)
+{
+  int zsflag, r_zsflag, do_trim, i;
+  struct pstat_str *ps_s;
+
+  if (ppst->zsflag < 0) {	/* no statistics */
+    *ps_sp = NULL;
+    return ppst->zsflag;
+  }
+
+  /* need a pstat_str if its NULL */
+  if (*ps_sp == NULL) {
+    if ((ps_s=(struct pstat_str *)calloc(1,sizeof(struct pstat_str)))==NULL) {
+      fprintf(stderr," cannot allocate pstat_union: %ld\n",sizeof(struct pstat_str));
+      exit(1);
+    }
+    else {
+      *ps_sp = ps_s;
+    }
+  }
+  else {
+    ps_s = *ps_sp;
+    memset(ps_s,0,sizeof(struct pstat_str));
+  }
+
+  if (s_info->tot_scores > 10) {
+    ps_s->sample_fract = min(1.0, (double)s_info->s_cnt[ppst->score_ix]/(double)s_info->tot_scores);
+    if (ps_s->sample_fract > 0.0 && ps_s -> sample_fract < 1.0) {
+      ps_s->zs_off = -log(ps_s->sample_fract)/PI_SQRT6;
+    }
+    else {
+      ps_s->zs_off = 0.0;
+    }
+  }
+  else {
+    ps_s->sample_fract = 1.0;
+    ps_s->zs_off = 0.0;
+  }
+  ppst->zs_off = ps_s->zs_off;
+
+  ps_s->ngLambda = m_msp->Lambda;
+  ps_s->ngK = m_msp->K;
+  ps_s->ngH = m_msp->H;
+
+  zsflag = ppst->zsflag;
+  if (nstats < 10) zsflag = AG_STATS;
+
+  if (m_msp->n0 <= LENGTH_CUTOFF && (zsflag == REG_STATS || zsflag == REGI_STATS || zsflag == REG2_STATS)) {
+    zsflag = MLE_STATS;
+  }
+
+/*
+#ifdef DEBUG
+  if (ppst->debug_lib) {
+    tmpf=fopen("tmp_stats.res","w+");
+    for (i=0; i<nstats; i++) fprintf(tmpf,"%d\t%d\n",sptr[i].score,sptr[i].n1);
+    fclose(tmpf);
+  }
+#endif
+*/
+
+#ifdef DEBUG
+  st_sort(sptr, nstats);
+#endif
+
+  if (zsflag >= 20) {	/* working with shuffled top sequences */
+    if (ppst->zsflag2 == AG_STATS) ppst->zsflag2 = MLE_STATS;
+    zsflag = ppst->zsflag2;
+    do_trim = 0;
+  }
+  else if (zsflag >= 10) {
+    zsflag -= 10;
+    do_trim = 0;
+  }
+  else do_trim = 1;
+
+  if (!ppst->zdb_size_set) ppst->zdb_size = m_msp->db.entries;
+
+#ifdef USE_LNSTATS
+  if (zsflag==LN_STATS) {	/* -z 2 */
+    find_z_arr[LN_STATS] = &find_zl;
+    ps_s->zsflag=r_zsflag = proc_hist_ln(sptr, nstats, m_msp->s_info, histp, do_trim, ps_s);
+  }
+#else				/* -z 2 */
+  if (zsflag==MLE_STATS) {
+    find_z_arr[MLE_STATS] = &find_ze;
+    ps_s->zsflag=r_zsflag = proc_hist_ml(sptr, nstats, m_msp->s_info, ppst, histp, do_trim, ps_s);
+  }
+#endif
+  else if (zsflag==REG_STATS) {	/* -z 1 */
+    ps_s->zsflag=r_zsflag = proc_hist_r(sptr, nstats, m_msp->s_info, ppst, histp, do_trim,  ps_s);
+  }
+  else if (zsflag==AG_STATS) {	/* -z 3 */
+#ifndef NORMAL_DIST
+    ps_s->zsflag=r_zsflag = proc_hist_a(sptr, nstats, m_msp->s_info, ppst, histp, do_trim,  ps_s);
+#else
+    ps_s->zsflag=r_zsflag = proc_hist_anorm(sptr, nstats, m_msp->s_info, ppst, histp, do_trim,  ps_s);
+#endif
+
+  }
+  else if (zsflag==REGI_STATS) {	/* -z 4, iterated outlier removal */
+    ps_s->zsflag=r_zsflag = proc_hist_ri(sptr,nstats, m_msp->s_info, ppst, histp, do_trim,  ps_s);
+  }
+  else if (zsflag==REG2_STATS) {	/* -z 5, length-scaled variance */
+    ps_s->zsflag=r_zsflag = proc_hist_r(sptr,nstats, m_msp->s_info, ppst, histp, do_trim,  ps_s);
+  }
+#if !defined(TFAST) && !defined(FASTX)
+  else if (zsflag == MLE2_STATS) {	/* -z 6, MLE + composition */
+    ps_s->zsflag=r_zsflag = proc_hist_ml2(sptr, nstats, m_msp->s_info, ppst, histp, do_trim,  ps_s);
+  }
+#endif
+  else {	/* AVE_STATS */
+    ps_s->zsflag=r_zsflag = proc_hist_n(sptr,nstats, m_msp->s_info, ppst, histp, do_trim,  ps_s);
+  }
+
+  if (!do_hist && histp != NULL) {
+    histp->entries = nstats; /* db->entries = 0; */
+    inithistz(MAXHIST, histp, ps_s->zs_off);
+    for (i = 0; i < nstats; i++) {
+      if (sptr[i].n1 < 0) sptr[i].n1 = -sptr[i].n1;
+      addhistz(find_z(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp,ps_s),
+	       histp);
+    }
+  }
+  return r_zsflag;
+}
+
+/* calc_thresh() sets the threshold used to exclude high scores from
+   regression calculations
+*/
+static int
+calc_thresh(struct pstruct *ppst, struct stat_str *sptr, int nstats,
+	    struct score_count_s s_info,
+	    double Lambda, double K, double H, double *zstrim)
+{
+  int max_hscore;
+  int i;
+  double ave_n1, tmp_score, z, l_fact;
+
+  ave_n1 = 0.0;
+  for (i=0; i<nstats; i++) {
+    ave_n1 += (double)sptr[i].n1;
+  }
+  if (nstats >= 1) ave_n1 /= nstats;
+
+  if (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA) {
+    l_fact = 1.0;
+  }
+  else {
+    l_fact = 0.7;
+  }
+
+/*  max_hscore = MAX_SSCORE; */
+/*  mean expected for ppst->n0 * 400 for protein, 5000 for DNA */
+/*  we want a number of offsets that is appropriate for the database size so
+    far (nstats)
+*/
+
+/*
+  the calculation below sets a high-score threshold using an
+  ungapped lambda, but errs towards the high-score side by using
+  E()=0.01 and calculating with 0.70*lambda, which is the correct for
+  going from ungapped to -12/-2 gapped lambda with BLOSUM50
+*/
+
+#ifndef NORMAL_DIST
+  if (K < 1.0e-50) K = 0.02;	/* prevent floating pt underflow with K=0 */
+  tmp_score = 0.01/((double)nstats*K*(double)ppst->n0*ave_n1);
+  tmp_score = -log(tmp_score)/(Lambda*l_fact);
+  max_hscore = (int)(tmp_score+0.5);
+
+  /*   z = 1.0/(double)nstats; */
+  z = (double)s_info.tot_scores/((double)nstats*(double)s_info.s_cnt[ppst->score_ix]);
+  z = (log(z)+EULER_G)/(- PI_SQRT6);
+  if (z < 3.0) { *zstrim = -1.0; }
+  else { *zstrim = 10.0*z+50.0;}
+#else
+  max_hscore = 100;
+  z = 5.0;
+  *zstrim = 10.0*z+50.0;
+#endif
+  return max_hscore;
+}
+
+int
+proc_hist_r(struct stat_str *sptr, int nstats, struct score_count_s s_info,
+	    struct pstruct *ppst, struct hist_str *histp,
+	    int do_trim, struct pstat_str *pu)
+{
+  int i, max_hscore;
+  double zs, zstrim;
+  char s_string[128];
+  char rho_str[32];	/* used for simplifying output label */
+  double rho_val;
+  struct llen_str llen;
+  char *f_string;
+  llen.fit_flag=1;
+  llen.hist=NULL;
+
+  max_hscore = calc_thresh(ppst, sptr,  nstats, s_info, pu->ngLambda,
+			   pu->ngK, pu->ngH, &zstrim);
+
+
+  if (do_trim && zstrim < 0.0) {
+    /* too few scores to do a z-trim, shift to Karlin-Altscul */
+    return proc_hist_a(sptr, nstats, s_info, ppst, histp, do_trim, pu);
+  }
+
+  inithist(&llen, ppst,max_hscore);
+  f_string = histp->stat_info;
+
+  for (i = 0; i<nstats; i++)
+    addhist(&llen,sptr[i].score,sptr[i].n1, max_hscore);
+
+  if ((llen.max_score - llen.min_score) < 10) {
+    free_hist(&llen);
+    llen.fit_flag = 0;
+    return proc_hist_n(sptr, nstats, s_info, ppst, histp, do_trim, pu);
+  }
+
+  fit_llen(&llen, &(pu->r_u.rg)); /* get parameters for REG_STATS */
+
+  if (!llen.fit_flag) {	/* the fit failed, fall back to proc_hist_ml */
+    free_hist(&llen);
+    return proc_hist_ml(sptr,nstats, s_info, ppst, histp, do_trim, pu);
+  }
+
+  pu->r_u.rg.n_trimmed= pu->r_u.rg.n1_trimmed = pu->r_u.rg.nb_trimmed = 0;
+
+  if (do_trim) {
+    for (i = 0; i < nstats; i++) {
+      zs = find_zr(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, pu);
+      if (zs < 20.0 || zs > zstrim) {
+	pu->r_u.rg.n_trimmed++;
+	prune_hist(&llen,sptr[i].score,sptr[i].n1, max_hscore,
+		   &(histp->entries));
+      }
+    }
+
+    if ((nstats - pu->r_u.rg.n_trimmed) < 50) {
+      llen.fit_flag = 0;
+      free_hist(&llen);
+      return proc_hist_ml(sptr,nstats, s_info, ppst, histp, do_trim, pu);
+    }
+
+    /*  fprintf(stderr,"Z-trimmed %d entries with z > 5.0\n",
+	pu->r_u.rg.n_trimmed); */
+
+    /* calculate parameters for REG2_STATS; fit_llens() is always
+       called, but the parameters are only used for REG2_STATS
+       (find_zr2) */
+
+    fit_llens(&llen, &(pu->r_u.rg));
+
+    /*   fprintf(stderr,"Bin-trimmed %d entries in %d bins\n",
+	 pu->r_u.rg.n1_trimmed,pu->r_u.rg.nb_trimmed); */
+  }
+
+  free_hist(&llen);
+
+  /* put all the scores in the histogram */
+
+  if (ppst->zsflag < 10) s_string[0]='\0';
+  else if (ppst->zs_win > 0) {sprintf(s_string,"(shuffled [%d], win: %d)",nstats,ppst->zs_win);}
+  else if (ppst->shuffle_dna3) { sprintf(s_string,"(shuffled3 [%d])",nstats);}
+  else { sprintf(s_string,"(shuffled [%d])",nstats);}
+
+  /* #ifdef LOCAL_SCORE */
+  strncpy(rho_str,"ln(x)",sizeof(rho_str));
+  rho_val = pu->r_u.rg.rho*LN_FACT;
+  /*
+#else
+  strncpy(rho_str,"x",sizeof(rho_str));
+  rho_val = pu->r_u.rg.rho;
+#endif
+  */
+
+  if (ppst->zsflag == REG2_STATS || ppst->zsflag == 10+REG2_STATS ||
+      ppst->zsflag == 20+REG2_STATS) {
+    sprintf(f_string,"%s Expectation_v fit: rho(%s)= %6.4f+/-%6.3g; mu= %6.4f+/-%6.3f;\n rho2=%6.2f; mu2= %6.2f, 0's: %d Z-trim: %d  B-trim: %d in %d/%d",
+	    s_string, rho_str, rho_val ,sqrt(pu->r_u.rg.rho_e),pu->r_u.rg.mu,sqrt(pu->r_u.rg.mu_e),
+	    pu->r_u.rg.rho2,pu->r_u.rg.mu2,llen.zero_s,
+	    pu->r_u.rg.n_trimmed, pu->r_u.rg.n1_trimmed, pu->r_u.rg.nb_trimmed, pu->r_u.rg.nb_tot);
+  }
+  else {
+    sprintf(f_string,"%s Expectation_n fit: rho(%s)= %6.4f+/-%6.3g; mu= %6.4f+/-%6.3f\n mean_var=%6.4f+/-%6.3f, 0's: %d Z-trim(%.1f): %d  B-trim: %d in %d/%d\n Lambda= %8.6f",
+	    s_string, rho_str, rho_val,sqrt(pu->r_u.rg.rho_e),pu->r_u.rg.mu,sqrt(pu->r_u.rg.mu_e), pu->r_u.rg.mean_var,sqrt(pu->r_u.rg.var_e),
+	    llen.zero_s, zstrim, pu->r_u.rg.n_trimmed, pu->r_u.rg.n1_trimmed, pu->r_u.rg.nb_trimmed, pu->r_u.rg.nb_tot,
+	    PI_SQRT6/sqrt(pu->r_u.rg.mean_var));
+  }
+  return REG_STATS;
+}
+
+/* proc_hist_ri() -- iterative removal of outliers before regression of mean vs log(len) */
+int
+proc_hist_ri(struct stat_str *sptr, int nstats, struct score_count_s s_info,
+	     struct pstruct *ppst, struct hist_str *histp,
+	     int do_trim, struct pstat_str *pu)
+{
+  int i, nit, nprune, max_hscore;
+  double zs, zstrim;
+  char s_string[128];
+  char *f_string;
+  struct llen_str llen;
+
+  llen.fit_flag=1;
+  llen.hist=NULL;
+
+  max_hscore = calc_thresh(ppst, sptr, nstats, s_info, pu->ngLambda,
+			   pu->ngK, pu->ngH, &zstrim);
+
+  if (do_trim && zstrim < 0.0) {
+    /* too few scores to do a z-trim, shift to Karlin-Altscul */
+    return proc_hist_a(sptr, nstats, s_info, ppst, histp, do_trim, pu);
+  }
+
+  inithist(&llen, ppst,max_hscore);
+  f_string = histp->stat_info;
+
+  for (i = 0; i<nstats; i++)
+    addhist(&llen,sptr[i].score,sptr[i].n1,max_hscore);
+
+  pu->r_u.rg.n_trimmed= pu->r_u.rg.n1_trimmed = pu->r_u.rg.nb_trimmed = 0;
+  if (do_trim) nit = 5;
+  else nit = 0;
+
+  while (nit-- > 0) {
+    nprune = 0;
+    fit_llen2(&llen, &(pu->r_u.rg));
+    if (llen.fit_flag == 0) break;
+
+    for (i = 0; i < nstats; i++) {
+      if (sptr[i].n1 < 0) continue;
+      zs = find_zr(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp,pu);
+      if (zs < 20.0 || zs > zstrim ) {
+	nprune++;
+	pu->r_u.rg.n_trimmed++;
+	prune_hist(&llen,sptr[i].score,sptr[i].n1,max_hscore,
+		   &(histp->entries));
+	sptr[i].n1 = -sptr[i].n1;
+      }
+    }
+    /*    fprintf(stderr," %d Z-trimmed at %d\n",nprune,nit); */
+    if (nprune < LHISTC) { break; }
+  }
+
+  fit_llen(&llen, &(pu->r_u.rg));
+  free_hist(&llen);
+
+  if (!llen.fit_flag) {	/* the fit failed, fall back to proc_hist_ml */
+    return proc_hist_ml(sptr,nstats, s_info, ppst, histp, do_trim, pu);
+  }
+
+  if (ppst->zsflag < 10) s_string[0]='\0';
+  else if (ppst->zs_win > 0)
+    sprintf(s_string,"(shuffled [%d], win: %d)",nstats,ppst->zs_win);
+  else 
+    sprintf(s_string,"(shuffled [%d])",nstats);
+
+  sprintf(f_string,"%s Expectation_i fit: rho(ln(x))= %6.4f+/-%6.3g; mu= %6.4f+/-%6.3f;\n mean_var=%6.4f+/-%6.3f 0's: %d Z-trim: %d N-it: %d\n Lambda= %8.6f",
+	  s_string,
+	  pu->r_u.rg.rho*LN_FACT,sqrt(pu->r_u.rg.rho_e),pu->r_u.rg.mu,sqrt(pu->r_u.rg.mu_e),
+	  pu->r_u.rg.mean_var,sqrt(pu->r_u.rg.var_e),llen.zero_s,pu->r_u.rg.n_trimmed, nit,
+	  PI_SQRT6/sqrt(pu->r_u.rg.mean_var));
+  return REGI_STATS;
+}
+
+/* this procedure implements Altschul's pre-calculated values for lambda, K */
+
+#include "alt_parms.h"
+
+int
+look_p(struct alt_p parm[], int gap, int ext, 
+       double *K, double *Lambda, double *H);
+
+int
+proc_hist_a(struct stat_str *sptr, int nstats, struct score_count_s s_info,
+	    struct pstruct *ppst, struct hist_str *histp,
+	    int do_trim, struct pstat_str *pu)
+{
+  double Lambda, K, H;
+  char *f_string;
+  int r_v;
+  int t_gdelval, t_ggapval;
+
+#ifdef OLD_FASTA_GAP
+  t_gdelval = ppst->gdelval;
+  t_ggapval = ppst->ggapval;
+#else
+  t_gdelval = ppst->gdelval+ppst->ggapval;
+  t_ggapval = ppst->ggapval;
+#endif
+
+  f_string = histp->stat_info;
+
+  if (ppst->dnaseq==0) {
+    if (strcmp(ppst->pam_name,"BL50")==0 || strcmp(ppst->pam_name,"BLOSUM50")==0) {
+      r_v = look_p(bl50_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+    }
+    else if (strcmp(ppst->pam_name,"BL62")==0 || strcmp(ppst->pam_name,"BLOSUM62")==0) {
+      r_v = look_p(bl62_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+    }
+    else if (strcmp(ppst->pam_name,"BL80")==0 || strcmp(ppst->pam_name,"BLOSUM80")==0) {
+      r_v = look_p(bl80_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+    }
+    else if (strcmp(ppst->pam_name,"PAM250")==0) {
+      r_v = look_p(p250_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+    }
+    else if ((strcmp(ppst->pam_name,"PAM120")==0) || (strcmp(ppst->pam_name,"VT120")==0)) {
+      r_v = look_p(p120_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+    }
+    else if ((strcmp(ppst->pam_name,"MD10")==0) || (strcmp(ppst->pam_name,"VT10")==0)) {
+      r_v = look_p(md10_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+    }
+    else if ((strcmp(ppst->pam_name,"MD20")==0) || (strcmp(ppst->pam_name,"VT20")==0)) {
+      r_v = look_p(md20_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+    }
+    else if ((strcmp(ppst->pam_name,"MD40")==0) || (strcmp(ppst->pam_name,"VT40")==0)) {
+      r_v = look_p(md40_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+    }
+    else if (strcmp(ppst->pam_name,"OPTIMA5")==0) {
+      r_v = look_p(opt5_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+    }
+    else r_v = 0;
+  }
+  else {
+    r_v = look_p(nt32_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+    if (strcmp(ppst->pam_name,"DNA")==0 || (ppst->pam_h==5 && ppst->pam_l == -4)) {
+      r_v = look_p(nt54_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+    }
+    else if (strcmp(ppst->pam_name,"+3/-2")==0 || (ppst->pam_h==3 && ppst->pam_l == -2)) {
+      r_v = look_p(nt32_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+    }
+    else if (strcmp(ppst->pam_name,"+1/-3")==0 || (ppst->pam_h==1 && ppst->pam_l == -3)) {
+      r_v = look_p(nt13_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+    }
+  }
+
+  if (r_v == 1) {
+    pu->r_u.ag.Lambda = Lambda;
+    pu->r_u.ag.K = K;
+    pu->r_u.ag.H = H;
+  }
+  else {
+      r_v = look_p(bl62_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+#ifdef DEBUG
+    fprintf(stderr,"+++ Warning : [%s:%d] Parameters not available for: %s: %d/%d -- using BL62\n",
+	    __FILE__, __LINE__, ppst->pam_name,t_gdelval-t_ggapval,t_ggapval);
+#endif
+  }
+
+  /*
+    fprintf(stderr," the parameters are: Lambda: %5.3f K: %5.3f H: %5.3f\n",
+	    Lambda, K, H);
+	    */
+
+    pu->r_u.ag.a_n0 = (double)ppst->n0;
+    pu->r_u.ag.a_n0f = log (K * pu->r_u.ag.a_n0)/H;
+
+    sprintf(f_string,"Altschul/Gish params: n0: %d Lambda: %5.3f K: %5.3f H: %5.3f",
+	    ppst->n0,Lambda, K, H);
+    return AG_STATS;
+}
+
+int 
+ag_parm(char *pam_name, int gdelval, int ggapval, struct pstat_str *pu)
+{
+  double Lambda, K, H;
+  int r_v;
+
+  if (strcmp(pam_name,"BL50")==0)
+    r_v = look_p(bl50_p,gdelval,ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_name,"BL62")==0)
+      r_v = look_p(bl62_p,gdelval,ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_name,"P250")==0)
+      r_v = look_p(p250_p,gdelval,ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_name,"P120")==0)
+      r_v = look_p(p120_p,gdelval,ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_name,"MD10")==0)
+      r_v = look_p(md10_p,gdelval,ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_name,"MD20")==0)
+      r_v = look_p(md20_p,gdelval,ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_name,"MD40")==0)
+      r_v = look_p(md40_p,gdelval,ggapval,&K,&Lambda,&H);
+  else if (strcmp(pam_name,"DNA")==0 || strcmp(pam_name,"+5/-4")==0)
+      r_v = look_p(nt54_p,gdelval,ggapval, &K,&Lambda,&H);
+  else if (strcmp(pam_name,"+3/-2")==0)
+      r_v = look_p(nt32_p,gdelval,ggapval, &K,&Lambda,&H);
+  else if (strcmp(pam_name,"+1/-3")==0)
+      r_v = look_p(nt13_p,gdelval,ggapval, &K,&Lambda,&H);
+  else r_v = 0;
+
+  pu->r_u.ag.K = K;
+  pu->r_u.ag.Lambda = Lambda;
+  pu->r_u.ag.H = H;
+
+  /*
+  if (r_v == 0) {
+    fprintf(stderr,"Parameters not available for: %s: %d/%d\n",
+	    pam_name,gdelval,ggapval);
+  }
+  */
+  return r_v;
+}
+
+int
+look_p(struct alt_p parm[], int gap, int ext,
+       double *K, double *Lambda, double *H)
+{
+  int i;
+
+  gap = -gap;
+  ext = -ext;
+
+  if (gap > parm[1].gap) {
+    *K = parm[0].K;
+    *Lambda = parm[0].Lambda;
+    *H = parm[0].H;
+    return 1;
+  }
+
+  for (i=1; parm[i].gap > 0; i++) {
+    if (parm[i].gap > gap) continue;
+    else if (parm[i].gap == gap && parm[i].ext > ext ) continue;
+    else if (parm[i].gap == gap && parm[i].ext == ext) {
+      *K = parm[i].K;
+      *Lambda = parm[i].Lambda;
+      *H = parm[i].H;
+      return 1;
+    }
+    else break;
+  }
+  return 0;
+}
+
+#ifdef NORMAL_DIST
+int
+proc_hist_anorm(struct stat_str *sptr, int nstats, struct score_count_s s_info,
+		struct pstruct *ppst, struct hist_str *histp,
+		int do_trim, struct pstat_str *pu)
+{
+  char s_string[128];
+  char *f_string;
+  int med_ix, q1_ix, q3_ix;
+  double iqr_sd;
+
+  f_string = histp->stat_info;
+
+  if (ppst->zsflag < 10) s_string[0]='\0';
+  else if (ppst->zs_win > 0) {
+    sprintf(s_string,"(shuffled [%d], win: %d)",nstats,ppst->zs_win);
+  }
+  else {
+    sprintf(s_string,"(shuffled [%d])",nstats);
+  }
+
+  /* find median, 1st/3rd quartile */
+  
+  med_ix = nstats/2;
+  q3_ix = med_ix - med_ix/2;
+  q1_ix = med_ix + med_ix/2;
+
+  st_sort(sptr,nstats);
+
+  pu->r_u.rg.mu = sptr[med_ix].score;
+
+  iqr_sd = (sptr[q1_ix].score - sptr[q3_ix].score)/1.35;
+
+  pu->r_u.rg.mean_var_sqrt = iqr_sd;
+  pu->r_u.rg.mean_var = iqr_sd * iqr_sd;
+  pu->r_u.rg.n_trimmed = 0;
+
+  sprintf(f_string,
+	  "%s Unscaled normal (median, IQR) statistics: mu= %6.4f  var=%6.4f Ztrim: %d",
+	  s_string, pu->r_u.rg.mu,pu->r_u.rg.mean_var, pu->r_u.rg.n_trimmed
+	  );
+  return AG_STATS;
+}
+#endif
+
+/* uncensored and censored maximum likelihood estimates developed
+   by Aaron Mackey based on a preprint from Sean Eddy */
+
+int mle_cen  (struct stat_str *, int, int, double, double *, double *);
+
+int
+proc_hist_ml(struct stat_str *sptr, int nstats, struct score_count_s s_info,
+	     struct pstruct *ppst, struct hist_str *histp,
+	     int do_trim, struct pstat_str *pu)
+{
+  double f_cen;
+  char s_string[128];
+  char *f_string;
+  double opt_correct;
+
+  f_string = histp->stat_info;
+  pu->r_u.ag.a_n0 = (double)ppst->n0;
+
+  if (ppst->zsflag < 10) s_string[0]='\0';
+  else {	/* shuffled, do_trim = 0 */
+    if (ppst->zs_win > 0) {
+      sprintf(s_string,"(shuffled [%d], win: %d)",nstats,ppst->zs_win);
+    }
+    else {
+      sprintf(s_string,"(shuffled [%d])",nstats);
+    }
+  }
+
+  if (!do_trim) {
+    if (mle_cen(sptr, nstats, ppst->n0, 0.0, &pu->r_u.ag.Lambda, &pu->r_u.ag.K) == -1)
+      goto bad_mle;
+    sprintf(f_string,"%s MLE statistics: Lambda= %6.4f;  K=%6.4g",
+	    s_string,pu->r_u.ag.Lambda,pu->r_u.ag.K);
+  }
+  else {
+    /* this section attempts to censor the appropriate fraction of
+       high-scores.  In general, we would like 5% censored.
+       Unfortunately, for fasta36 with statistical thresholds for
+       optimization, the thresholds mean that only a small fraction of
+       the highest scoring library sequences are optimized (and used
+       for scores).  As a result, there may be no low scores, and a
+       large excess of high-scores.  Thus, when only the best scores
+       are calculated initially, more censoring must be done.
+
+       The pre-selection information is available in struct
+       score_count_s m_msp->s_info, by comparing
+       s_info.s_cnt[ppst->score_ix] to sinfo.tot_scores.
+     */
+
+    opt_correct = (double)s_info.s_cnt[ppst->score_ix]/(double)s_info.tot_scores;
+
+    if ((double)nstats/20.0 > 1000.0) f_cen = 1000.0/((double)nstats*opt_correct);
+    else f_cen = 0.05/opt_correct;
+
+    if (f_cen > 0.4) f_cen = 0.2;
+
+    if (mle_cen(sptr, nstats, ppst->n0, f_cen, &pu->r_u.ag.Lambda, &pu->r_u.ag.K) == -1)
+      goto bad_mle;
+    sprintf(f_string, "MLE_cen statistics: Lambda= %6.4f;  K=%6.4g (cen=%d)",
+	    pu->r_u.ag.Lambda,pu->r_u.ag.K,(int)((double)nstats*f_cen));
+  }    
+
+  return MLE_STATS;
+ bad_mle:
+  return proc_hist_n(sptr, nstats, s_info, ppst, histp, do_trim, pu);
+}
+
+int
+mle_cen2  (struct stat_str *, int, int, double, double *, double *, double *, double *);
+
+
+int
+proc_hist_ml2(struct stat_str *sptr, int nstats, struct score_count_s s_info,
+	      struct pstruct *ppst, struct hist_str *histp,
+	      int do_trim, struct pstat_str *pu)
+{
+  int i, ns=0, nneg=0;
+  double f_cen, ave_lambda;
+  char s_string[128], ex_string[64];
+  char *f_string;
+
+  f_string = histp->stat_info;
+  pu->r_u.m2.a_n0 = (double)ppst->n0;
+
+  if (ppst->zsflag < 10) s_string[0]='\0';
+  else if (ppst->zs_win > 0)
+    sprintf(s_string,"(shuffled [%d], win: %d)",nstats,ppst->zs_win);
+  else 
+    sprintf(s_string,"(shuffled [%d])",nstats);
+
+
+  pu->r_u.m2.ave_comp = 0.0;
+  pu->r_u.m2.max_comp = -1.0;
+
+  ns = nneg = 0;
+  for (i=0; i<nstats; i++) {
+    if (sptr[i].comp > pu->r_u.m2.max_comp) pu->r_u.m2.max_comp = sptr[i].comp;
+    if (sptr[i].comp > 0.0) {
+      pu->r_u.m2.ave_comp += log(sptr[i].comp);
+      ns++;
+    }
+    else nneg++;
+  }
+  pu->r_u.m2.ave_comp /= (double)ns;
+  pu->r_u.m2.ave_comp = exp(pu->r_u.m2.ave_comp);
+  for (i=0; i<nstats; i++) if (sptr[i].comp < 0.0) {
+    sptr[i].comp = pu->r_u.m2.ave_comp;
+  }
+
+  if (nneg > 0)
+    sprintf(ex_string,"composition = -1 for %d sequences",nneg);
+  else ex_string[0]='\0';
+
+  if (!do_trim) {
+    if (mle_cen2(sptr, nstats, ppst->n0, 0.0,
+	     &pu->r_u.m2.mle2_a0, &pu->r_u.m2.mle2_a1,
+	     &pu->r_u.m2.mle2_a2, &pu->r_u.m2.mle2_b1) == -1) goto bad_mle2;
+    ave_lambda = 1.0/(pu->r_u.m2.ave_comp*pu->r_u.m2.mle2_b1);
+
+    sprintf(f_string,"%s MLE-2 statistics: a0= %6.4f;  a1=%6.4f; a2=%6.4f; b1=%6.4f\n  ave Lamdba: %6.4f",
+	    s_string, pu->r_u.m2.mle2_a0, pu->r_u.m2.mle2_a1, pu->r_u.m2.mle2_a2, pu->r_u.m2.mle2_b1,ave_lambda);
+  }
+  else {
+    if (nstats/20 > 500) f_cen = 500.0/(double)nstats;
+    else f_cen = 0.05;
+    if (mle_cen2(sptr, nstats, ppst->n0, f_cen, &pu->r_u.m2.mle2_a0, &pu->r_u.m2.mle2_a1, &pu->r_u.m2.mle2_a2, &pu->r_u.m2.mle2_b1)== -1) goto bad_mle2;
+
+    ave_lambda = 1.0/(pu->r_u.m2.ave_comp*pu->r_u.m2.mle2_b1);
+
+    sprintf(f_string,"%s MLE-2-cen statistics: a0= %6.4f;  a1=%6.4f; a2=%6.4f; b1=%6.4f (cen=%d)\n  ave Lambda:%6.4f",
+	    s_string, pu->r_u.m2.mle2_a0, pu->r_u.m2.mle2_a1, pu->r_u.m2.mle2_a2, pu->r_u.m2.mle2_b1, (int)((double)nstats*f_cen),ave_lambda);
+  }    
+
+  return MLE2_STATS;
+ bad_mle2:
+  return proc_hist_n(sptr, nstats, s_info, ppst, histp, do_trim, pu);
+}
+
+double first_deriv_cen(double lambda, struct stat_str *sptr, 
+		       int start, int stop,
+		       double sumlenL, double cenL,
+		       double sumlenH, double cenH);
+
+double second_deriv_cen(double lambda, struct stat_str *sptr,
+			int start, int stop,
+			double sumlenL, double cenL,
+			double sumlenH, double cenH);
+
+static void
+st_sort (struct stat_str *v, int n) {
+  int gap, i, j;
+  int tmp;
+  double dtmp;
+
+  for (gap = 1; gap < n/3; gap = 3*gap +1) ;
+
+  for (; gap > 0; gap = (gap-1)/3) {
+    for (i = gap; i < n; i++) {
+      for (j = i - gap; j >= 0; j -= gap) {
+	if (v[j].score <= v[j + gap].score) break;
+
+	tmp = v[j].score;
+	v[j].score = v[j + gap].score;
+	v[j + gap].score = tmp;
+
+	tmp = v[j].n1;
+	v[j].n1 = v[j + gap].n1;
+	v[j + gap].n1 = tmp;
+
+	dtmp = v[j].comp;
+	v[j].comp = v[j + gap].comp;
+	v[j + gap].comp = dtmp;
+
+	dtmp = v[j].H;
+	v[j].H = v[j + gap].H;
+	v[j + gap].H = dtmp;
+      }
+    }
+  }
+}
+
+
+/* sptr[].score, sptr[].n1; sptr[] must be sorted
+   int n = total number of samples
+   int M = length of query
+   double fn = fraction of scores to be censored fn/2.0 from top, bottom
+   double *Lambda = Lambda estimate
+   double *K = K estimate
+*/
+
+#define MAX_NIT 100
+
+int
+mle_cen(struct stat_str *sptr, int n, int M, double fc,
+	double *Lambda, double *K) {
+
+  double sumlenL, sumlenH, cenL, cenH;
+  double sum_s, sum2_s, mean_s, var_s, dtmp;
+  int start, stop;
+  int i, nf;
+  int nit = 0;
+  double deriv, deriv2, lambda, old_lambda, sum = 0.0;
+
+  /*
+   int sumlenL, int sumlenghtsR = sum of low (Left), right (High) seqs.
+   int cenL, cenH = censoring score low, high 
+  */
+
+  nf = (fc/2.0) * n;
+  start = nf;
+  stop = n - nf;
+
+  st_sort(sptr,n);
+
+  sum_s = sum2_s = 0.0;
+  for (i=start; i<stop; i++) {
+    sum_s += sptr[i].score;
+  }
+  dtmp = (double)(stop-start);
+  mean_s = sum_s/dtmp;
+
+  for (i=start; i<stop; i++) {
+    sum2_s += sptr[i].score * sptr[i].score;
+  }
+  var_s = sum2_s/(dtmp-1.0);
+
+  sumlenL = sumlenH = 0.0;
+  for (i=0; i<start; i++) sumlenL += (double)sptr[i].n1;
+  for (i=stop; i<n; i++) sumlenH += (double)sptr[i].n1;
+
+  if (nf > 0) {
+    cenL = (double)sptr[start].score;
+    cenH = (double)sptr[stop].score;
+  }
+  else {
+    cenL = (double)sptr[start].score/2.0;
+    cenH = (double)sptr[start].score*2.0;
+  }
+
+  if (cenL >= cenH) return -1;
+
+  /* initial guess for lambda is 0.2 - this does not work for matrices
+     with very different scales */
+  /*  lambda = 0.2; */
+  lambda = PI_SQRT6/sqrt(var_s);
+  if (lambda > 1.0) {
+    fprintf(stderr," Lambda initial estimate error: lambda: %6.4g; var_s: %6.4g\n",lambda,var_s);
+    lambda = 0.2;
+  }
+
+  do {
+    deriv = first_deriv_cen(lambda, sptr, start, stop,
+			    sumlenL, cenL, sumlenH, cenH);
+    /*   (uncensored version)
+	 first_deriv(lambda, &sptr[start], stop - start))
+    */
+
+    /*  (uncensored version)
+    deriv2 = second_deriv(lambda, &sptr[start], stop-start);
+    */
+    deriv2 = second_deriv_cen(lambda, sptr, start, stop,
+			     sumlenL, cenL, sumlenH, cenH); 
+
+    old_lambda = lambda;
+    if (lambda - deriv/deriv2 > 0.0) lambda = lambda - deriv/deriv2;
+    else lambda = lambda/2.0;
+    nit++;
+  } while (fabs((lambda - old_lambda)/lambda) > TINY && nit < MAX_NIT);
+
+  /*  fprintf(stderr," mle_cen nit: %d\n",nit); */
+
+  if (nit >= MAX_NIT) return -1;
+  
+  for(i = start; i < stop ; i++) {
+    sum += (double) sptr[i].n1 * exp(- lambda * (double)sptr[i].score);
+  }
+
+  *Lambda = lambda;
+  /* 
+  *K = (double)(stop-start)/((double)M*sum);
+  */
+  *K = (double)n/((double)M*
+		  (sum+sumlenL*exp(-lambda*cenL)-sumlenH*exp(-lambda*cenH)));
+  return 0;
+}
+
+/*
+double
+first_deriv(double lambda, struct stat_str *sptr, int n) {
+
+  int i;
+  double sum = 0.0, sum1 = 0.0, sum2 = 0.0;
+  double s, l, es;
+
+  for(i = 0 ; i < n ; i++) {
+    s = (double)sptr[i].score;
+    l = (double)sptr[i].n1;
+    es = exp(-lambda * s );
+    sum += s;
+    sum2 += l * es;
+    sum1 += s * l * es;
+  }
+
+  return (1.0/lambda) - (sum/(double)n) + (sum1/sum2);
+}
+*/
+
+/*
+double
+second_deriv(double lambda, struct stat_str *sptr, int n) {
+  double sum1 = 0.0, sum2 = 0.0, sum3 = 0.0;
+  double s, l, es;
+  int i;
+
+  for(i = 0 ; i < n ; i++) {
+    l = (double)sptr[i].n1;
+    s = (double)sptr[i].score;
+    es = exp(-lambda * s);
+    sum2 += l * es;
+    sum1 += l * s * es;
+    sum3 += l * s * s * es;
+  }
+
+  return ((sum1*sum1)/(sum2*sum2)) - (sum3/sum2) -  (1.0/(lambda*lambda));
+}
+*/
+
+double
+first_deriv_cen(double lambda, struct stat_str *sptr, int start, int stop,
+		double sumlenL, double cenL, double sumlenH, double cenH) {
+  int i;
+  double sum = 0.0, sum1 = 0.0, sum2 = 0.0;
+  double s, l, es;
+
+  for(i = start ; i < stop ; i++) {
+    s = (double)sptr[i].score;
+    l = (double)sptr[i].n1;
+    es = exp(-lambda * s );
+    sum += s;
+    sum2 += l * es;
+    sum1 += s * l * es;
+  }
+
+  sum1 += sumlenL*cenL*exp(-lambda*cenL) - sumlenH*cenH*exp(-lambda*cenH);
+  sum2 += sumlenL*exp(-lambda*cenL) - sumlenH*exp(-lambda*cenH);
+
+  return (1.0 / lambda) - (sum /(double)(stop-start)) + (sum1 / sum2);
+}
+
+double
+second_deriv_cen(double lambda, struct stat_str *sptr, int start, int stop,
+		 double sumlenL, double cenL, double sumlenH, double cenH) {
+
+  double sum1 = 0.0, sum2 = 0.0, sum3 = 0.0;
+  double s, l, es;
+  int i;
+
+  for(i = start ; i < stop ; i++) {
+    s = (double)sptr[i].score;
+    l = (double)sptr[i].n1;
+    es = exp(-lambda * s);
+    sum2 += l * es;
+    sum1 += l * s * es;
+    sum3 += l * s * s * es;
+  }
+
+  sum1 += sumlenL*cenL*exp(-lambda*cenL) - sumlenH*cenH*exp(-lambda*cenH);
+  sum2 += sumlenL*exp(-lambda * cenL) -  sumlenH*exp(-lambda * cenH);
+  sum3 += sumlenL*cenL*cenL * exp(-lambda * cenL) -
+    sumlenH*cenH*cenH * exp(-lambda * cenH);
+  return ((sum1 * sum1) / (sum2 * sum2)) - (sum3 / sum2)
+    - (1.0 / (lambda * lambda));
+}
+
+double mle2_func(double *params,
+		 double *consts,
+		 struct stat_str *values,
+		 int n, int start, int stop);
+
+void simplex(double *fitparams,
+	     double *lambda,
+	     int nparam,
+	     double (*minfunc) (double *tryparams, double *consts, 
+				struct stat_str *data, int ndata,
+				int start, int stop),
+	     double *consts,
+	     void *data,
+	     int ndata, int start, int stop
+	     );
+
+int
+mle_cen2(struct stat_str *sptr, int n, int M, double fc,
+	double *a0, double *a1, double *a2, double *b1) {
+
+  double params[4], lambdas[4], consts[9];
+  double avglenL, avglenH, avgcompL, avgcompH, cenL, cenH;
+  int start, stop;
+  int i, nf;
+
+  nf = (fc/2.0) * n;
+  start = nf;
+  stop = n - nf;
+
+  st_sort(sptr,n);
+
+  /* choose arithmetic or geometic mean for compositions by appropriate commenting */
+
+  if (nf > 0) {
+    avglenL = avglenH = 0.0;
+    avgcompL = avgcompH = 0.0;
+    /* avgcompL = avgcompH = 1.0 */
+    for (i=0; i<start; i++) {
+      avglenL += (double)sptr[i].n1;
+      avgcompL += (double)sptr[i].comp;
+      /* avgcompL *= (double) sptr[i].comp; */
+    }
+    avglenL /= (double) start;
+    avgcompL /= (double) start;
+    /* avgcompL = pow(avgcompL, 1.0/(double) start); */
+  
+    for (i=stop; i<n; i++) {
+      avglenH += (double)sptr[i].n1;
+      avgcompH += (double)sptr[i].comp;
+      /* avgcompH *= (double) sptr[i].comp; */
+    }
+    avglenH /= (double) (n - stop);
+    avgcompH /= (double) (n - stop);
+    /* avgcompL = pow(avgcompL, 1.0/(double) (n - stop)); */
+
+    cenL = (double)sptr[start].score;
+    cenH = (double)sptr[stop].score;
+    if (cenL >= cenH) return -1;
+  }
+  else {
+    avglenL = avglenH = cenL = cenH = 0.0;
+    avgcompL = avgcompH = 1.0;
+  }
+
+  params[0] = 10.0;
+  params[1] = -10.0;
+  params[2] = 1.0;
+  params[3] = 1.0;
+
+  lambdas[0] = 1.0;
+  lambdas[1] = 0.5;
+  lambdas[2] = 0.1;
+  lambdas[3] = 0.01;
+
+  consts[0] = M;
+  consts[1] = (double) start;
+  consts[2] = (double) stop;
+  consts[3] = cenL;
+  consts[4] = cenH;
+  consts[5] = avglenL;
+  consts[6] = avglenH;
+  consts[7] = avgcompL;
+  consts[8] = avgcompH;
+
+  simplex(params, lambdas, 4,
+	  (double (*) (double *, double *, struct stat_str *, int, int, int) )mle2_func,
+	  consts, sptr, n, start, stop);
+
+  *a0 = params[0];
+  *a1 = params[1];
+  *a2 = params[2];
+  *b1 = params[3];
+
+  return 0;
+}
+
+double mle2_func(double *params,
+		 double *consts,
+		 struct stat_str *values,
+		 int n, int start, int stop
+		 ) {
+
+  double a0, a1, a2, b1, M;
+  double score, length, comp;
+  double cenL, cenH, avglenL, avglenH, avgcompL, avgcompH;
+  double L, y;
+
+  int i;
+
+  a0 = params[0];
+  a1 = params[1];
+  a2 = params[2];
+  b1 = params[3];
+
+  M = consts[0];
+  /*
+  start = (int) consts[1];
+  stop = (int) consts[2];
+  */
+  cenL = consts[3];
+  cenH = consts[4];
+  avglenL = consts[5];
+  avglenH = consts[6];
+  avgcompL = consts[7];
+  avgcompH = consts[8];
+
+  L = 0;
+  y = 0;
+
+  if (start > 0) {
+    y = -(cenL - (a0 + a1*avgcompL +a2*avgcompL*log(M*avglenL)))/(b1*avgcompL);
+    L += (double) start * exp(y);
+  }
+
+  for(i = start ; i < stop ; i++) {
+    score = (double) values[i].score;
+    length = (double) values[i].n1;
+    comp = (double) values[i].comp;
+
+    y = - (score - (a0 + a1*comp + a2 * comp * log(M*length))) / (b1*comp);
+
+    L += -y + exp(y) + log(b1 * comp);
+  }
+
+  if (stop < n) {
+    y = -(cenH -(a0 + a1*avgcompH + a2*avgcompH*log(M*avglenH)))/(b1*avgcompH);
+    L -= (double) (n - stop) * exp(y);
+  }
+  return L;
+}
+
+/* Begin Nelder-Mead simplex code: */
+
+double evalfunc(double **param,
+		double *vals,
+		double *psums,
+		double *ptry,
+		int nparam,
+		double (*minfunc) (double *params, double *consts,
+				   struct stat_str *data, int ndata,
+				   int start, int stop),
+		double *consts,
+		void *data,
+		int ndata, int start, int stop,
+		int ihi,
+		double factor);
+
+void simplex(double *fitparams,
+	     double *lambda,
+	     int nparam,
+	     double (*minfunc) (double *tryparams, double *consts,
+				struct stat_str *data, int ndata, 
+				int start, int stop),
+	     double *consts,
+	     void *data,
+	     int ndata,
+	     int start,
+	     int stop
+	     )
+{
+
+  int i, j, ilo, ihi, inhi;
+  double rtol, sum, tmp, ysave, ytry;
+  double *psum, *vals, *ptry, **param;
+
+
+  psum = (double *) calloc(nparam, sizeof(double));
+  ptry = (double *) calloc(nparam, sizeof(double));
+
+  vals = (double *) calloc(nparam + 1, sizeof(double));
+
+  param = (double **) calloc(nparam + 1, sizeof(double *));
+  param[0] = (double *) calloc((nparam + 1) * nparam, sizeof(double));
+  for( i = 1 ; i < (nparam + 1) ; i++ ) {
+    param[i] = param[0] + i * nparam;
+  }
+
+  /* Get our N+1 initial parameter values for the simplex */
+
+  for( i = 0 ; i < nparam ; i++ ) {
+    param[0][i] = fitparams[i];
+  }
+
+  for( i = 1 ; i < (nparam + 1) ; i++ ) {
+    for( j = 0 ; j < nparam ; j++ ) {
+      param[i][j] = fitparams[j] + lambda[j] * ( (i - 1) == j ? 1 : 0 );
+    }
+  }
+
+  /* calculate initial values at the simplex nodes */
+
+  for( i = 0 ; i < (nparam + 1) ; i++ ) {
+    vals[i] = minfunc(param[i], consts, data, ndata, start, stop);
+  }
+
+  /* Begin Nelder-Mead simplex algorithm from Numerical Recipes in C */
+
+  for( j = 0 ; j < nparam ; j++ ) {
+    for( sum = 0.0, i = 0 ; i < nparam + 1 ; i++ ) {
+      sum += param[i][j];
+    }
+    psum[j] = sum;
+  }
+
+
+  while( 1 ) {
+/*
+      determine which point is highest (ihi), next highest (inhi) and
+      lowest (ilo) by looping over the points in the simplex
+*/
+    ilo = 0;
+
+/*  ihi = vals[0] > vals[1] ? (inhi = 1, 0) : (inhi = 0, 1); */
+    if(vals[0] > vals[1]) { ihi = 0; inhi = 1; }
+    else { ihi = 1; inhi = 0; }
+
+    for( i = 0 ; i < nparam + 1 ; i++) {
+      if( vals[i] <= vals[ilo] ) ilo = i;
+      if( vals[i] > vals[ihi] ) {
+	inhi = ihi;
+	ihi = i;
+      } else if ( vals[i] > vals[inhi] && i != ihi ) inhi = i;
+    }
+
+    /* Are we finished? */
+
+    rtol = 2.0 * fabs(vals[ihi] - vals[ilo]) / 
+      (fabs(vals[ihi]) + fabs(vals[ilo]) + TINY);
+
+    if( rtol < TOLERANCE ) {
+
+/* put the best value and best parameters into the first index */
+
+      tmp = vals[0];
+      vals[0] = vals[ilo];
+      vals[ilo] = tmp;
+
+      for( i = 0 ; i < nparam ; i++ ) {
+	tmp = param[0][i];
+	param[0][i] = param[ilo][i];
+	param[ilo][i] = tmp;
+      }
+
+      /* et voila, c'est finis */
+      break;
+    }
+
+    /* Begin a new iteration */
+
+    /* first, extrapolate by -1 through the face of the simplex across from ihi */
+
+    ytry = evalfunc(param, vals, psum, ptry, nparam, minfunc, consts,
+		    data, ndata, start, stop, ihi, -1.0);
+
+    if( ytry <= vals[ilo] ) {
+
+      /* Good result, try additional extrapolation by 2 */
+
+      ytry = evalfunc(param, vals, psum, ptry, nparam, minfunc, consts, 
+		      data, ndata, start, stop, ihi, 2.0);
+
+    } else if ( ytry >= vals[inhi] ) {
+
+      /* no good, look for an intermediate lower point by contracting */
+
+      ysave = vals[ihi];
+      ytry = evalfunc(param, vals, psum, ptry, nparam, minfunc, consts,
+		      data, ndata, start, stop, ihi, 0.5);
+
+      if( ytry >= ysave ) {
+
+	/* Still no good.  Contract around lowest (best) point. */
+
+	for( i = 0 ; i < nparam + 1 ; i++ ) {
+	  if( i != ilo ) {
+	    for ( j = 0 ; j < nparam ; j++ ) {
+	      param[i][j] = psum[j] = 0.5 * (param[i][j] + param[ilo][j]);
+	    }
+	    vals[i] = minfunc(psum, consts, data, ndata, start, stop);
+	  }
+	}
+
+
+	for( j = 0 ; j < nparam ; j++ ) {
+	  for( sum = 0.0, i = 0 ; i < nparam + 1 ; i++ ) {
+	    sum += param[i][j];
+	  }
+	  psum[j] = sum;
+	}
+
+      }
+    }
+  }
+			   
+  for( i = 0 ; i < nparam ; i++ ) {
+    fitparams[i] = param[0][i];
+  }
+
+  if (ptry!=NULL) {
+    free(ptry);
+    ptry=NULL;
+  }
+  free(param[0]);
+  free(param);
+  free(vals);
+  free(psum);
+}
+
+
+double evalfunc(double **param,
+		double *vals,
+		double *psum,
+		double *ptry,
+		int nparam,
+		double (*minfunc)(double *tryparam, double *consts,
+				  struct stat_str *data, int ndata,
+				  int start, int stop),
+		double *consts,
+		void *data,
+		int ndata, int start, int stop,
+		int ihi,
+		double factor) {
+
+  int j;
+  double fac1, fac2, ytry;
+
+
+  fac1 = (1.0 - factor) / nparam;
+  fac2 = fac1 - factor;
+
+  for( j = 0 ; j < nparam ; j++ ) {
+    ptry[j] = psum[j] * fac1 - param[ihi][j] * fac2;
+  }
+
+  ytry = minfunc(ptry, consts, data, ndata, start, stop);
+
+  if( ytry < vals[ihi] ) {
+    vals[ihi] = ytry;
+    for( j = 0 ; j < nparam ; j++ ) {
+      psum[j] += ptry[j] - param[ihi][j];
+      param[ihi][j] = ptry[j];
+    }
+  }
+
+  return ytry;
+}
+
+/* end of Nelder-Mead simplex code */
+
+int
+proc_hist_n(struct stat_str *sptr, int nstats, struct score_count_s s_info,
+	    struct pstruct *ppst, struct hist_str *histp,
+	    int do_trim, struct pstat_str *pu)
+{
+  int i, j, t_j;
+  double s_score, s2_score, ssd, zstrim, mean, var;
+  double t_score;
+  int max_trim, min_trim;
+  int nit, max_hscore;
+  char s_string[128];
+  char *f_string;
+  int no_trim = 0;
+
+  f_string = histp->stat_info;
+
+  max_hscore = calc_thresh(ppst, sptr, nstats, s_info, pu->ngLambda,
+			   pu->ngK, pu->ngH, &zstrim);
+
+  if (do_trim && zstrim < 0.0) {
+    /* too few scores to do a z-trim, shift to Karlin-Altscul */
+    /* cannot use proc_hist_a(), because proc_hist_a() calls proc_hist_n() */
+    /* return proc_hist_a(sptr, nstats, s_info, ppst, histp, do_trim, pu); */
+    no_trim = 1;
+  }
+
+  s_score = s2_score = 0.0;
+
+  /* calculate mean */
+  for (j=0, i=0; i < nstats; i++) {
+    if ( sptr[i].score <= max_hscore) {
+      s_score += (double)sptr[i].score;
+      j++;
+    }
+    else { sptr[i].n1 = -sptr[i].n1;}
+  }
+
+  if (j > 1) {
+    pu->r_u.rg.mu = mean = s_score/(double)j;
+
+    /* calculate variance */
+    for (i=0; i < nstats; i++) {
+      if (sptr[i].n1 < 0) continue;
+      ssd = (double)sptr[i].score - mean;
+      s2_score += ssd * ssd;
+    }
+    
+    var = pu->r_u.rg.mean_var = s2_score/(double)(j-1);
+    pu->r_u.rg.mean_var_sqrt = sqrt(var);
+  }
+  else {
+    pu->r_u.rg.mu = 50.0;
+    pu->r_u.rg.mean_var = 10.0;
+    pu->r_u.rg.mean_var_sqrt = sqrt(10.0);
+  }
+  
+  if (pu->r_u.rg.mean_var < 0.01) {
+    /*     pu->r_u.rg.mean_var = (pu->r_u.rg.mu > 1.0) ? pu->r_u.rg.mu: 1.0; */
+    return proc_hist_a(sptr, nstats, s_info, ppst, histp, do_trim, pu);
+  }
+
+  /* now remove some scores */
+  if (no_trim==0) {
+    nit = 5;
+    pu->r_u.rg.n_trimmed = 0;
+    max_trim = -BIGNUM;
+    min_trim = BIGNUM;
+    while (nit-- > 0) {
+      t_score = 0.0;
+      t_j = 0;
+      for (i=0; i< nstats; i++) {
+	if (sptr[i].n1 < 0) continue;
+	ssd = find_zn(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, pu);
+	if ((ssd > zstrim)
+#ifndef NORMAL_DIST
+	    || ssd < 20.0
+#else
+	    || ssd < 0.0
+#endif
+	    )
+	  {
+	    /*      fprintf(stderr,"removing %3d %3d %4.1f\n",
+		    sptr[i].score, sptr[i].n1,ssd); */
+
+	    ssd = sptr[i].score;
+	    if (ssd > max_trim) max_trim = ssd;
+	    if (ssd < min_trim) min_trim = ssd;
+
+	    t_score += (double)ssd;
+	    t_j++;
+	    pu->r_u.rg.n_trimmed++;
+	    histp->entries--;
+	    sptr[i].n1 = -sptr[i].n1;
+	  }
+      }
+
+      if (j - t_j > 1 ) {
+	mean = pu->r_u.rg.mu = (s_score - t_score)/(double)(j - t_j);
+
+	/* calculate variance */
+	s2_score = 0.0;
+	for (i=0; i < nstats; i++) {
+	  if ( sptr[i].n1 > 0 && sptr[i].score <= max_hscore) {
+	    ssd = (double)sptr[i].score - mean;
+	    s2_score += ssd * ssd;
+	  }
+	}
+	var = pu->r_u.rg.mean_var = s2_score/(double)(j-1);
+	pu->r_u.rg.mean_var_sqrt = sqrt(var);
+      }
+      else {
+	pu->r_u.rg.mu = 50.0;
+	pu->r_u.rg.mean_var = 10.0;
+	pu->r_u.rg.mean_var_sqrt = sqrt(10.0);
+      }
+
+      if (pu->r_u.rg.mean_var < 0.01) {
+	pu->r_u.rg.mean_var = (pu->r_u.rg.mu > 1.0) ? pu->r_u.rg.mu: 1.0;
+      }
+
+      if (pu->r_u.rg.n_trimmed < LHISTC) {
+	/*
+	  fprintf(stderr,"nprune %d at %d\n",nprune,nit);
+	*/
+	break;
+      }
+    }
+  }
+
+  if (ppst->zsflag < 10) s_string[0]='\0';
+  else if (ppst->zs_win > 0) {
+    sprintf(s_string,"(shuffled [%d], win: %d)",nstats,ppst->zs_win);
+  }
+  else {
+    sprintf(s_string,"(shuffled [%d])",nstats);
+  }
+
+  sprintf(f_string,
+#ifndef NORMAL_DIST
+	  "%s Unscaled statistics: mu= %6.4f  var=%6.4f; Lambda= %6.4f",
+	  s_string, pu->r_u.rg.mu,pu->r_u.rg.mean_var,PI_SQRT6/sqrt(pu->r_u.rg.mean_var_sqrt)
+#else
+	  "%s Unscaled normal statistics: mu= %6.4f  var=%6.4f Ztrim: %d",
+	  s_string, pu->r_u.rg.mu,pu->r_u.rg.mean_var, pu->r_u.rg.n_trimmed
+#endif
+	  );
+  return AVE_STATS;
+}
+
+/*
+This routine calculates the maximum likelihood estimates for the
+extreme value distribution exp(-exp(-(-x-a)/b)) using the formula
+
+	<lambda> = x_m - sum{ x[i] * exp (-x[i]<lambda>)}/sum{exp (-x[i]<lambda>)}
+	<a> = -<1/lambda> log ( (1/nlib) sum { exp(-x[i]/<lambda> } )
+
+	The <a> parameter can be transformed into and K
+	of the formula: 1 - exp ( - K m n exp ( - lambda S ))
+	using the transformation: 1 - exp ( -exp -(lambda S + log(K m n) ))
+			1 - exp ( -exp( - lambda ( S + log(K m n) / lambda))
+
+			a = log(K m n) / lambda
+			a lambda = log (K m n)
+			exp(a lambda)  = K m n 
+	 but from above: a lambda = log (1/nlib sum{exp( -x[i]*lambda)})
+	 so:            K m n = (1/n sum{ exp( -x[i] *lambda)})
+			K = sum{}/(nlib m n )
+
+*/
+
+void
+alloc_hist(struct llen_str *llen)
+{
+  int max_llen, i;
+  max_llen = llen->max;
+
+  if (llen->hist == NULL) {
+    llen->hist = (int *)calloc((size_t)(max_llen+1),sizeof(int));
+    llen->score_sums = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
+    llen->score2_sums =(double *)calloc((size_t)(max_llen + 1),sizeof(double));
+    llen->score_var = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
+  }
+
+  for (i=0; i< max_llen+1; i++) {
+      llen->hist[i] = 0;
+      llen->score_var[i] = llen->score_sums[i] = llen->score2_sums[i] = 0.0;
+  }
+}
+  
+void
+free_hist(struct llen_str *llen)
+{
+  if (llen->hist!=NULL) {
+    free(llen->score_var);
+    free(llen->score2_sums);
+    free(llen->score_sums);
+    free(llen->hist);
+    llen->hist=NULL;
+  }
+}
+
+void
+inithist(struct llen_str *llen, struct pstruct *ppst, int max_hscore)
+{
+  llen->max = MAX_LLEN;
+
+  llen->max_score = -1;
+  llen->min_score=10000;
+
+  alloc_hist(llen);
+
+  llen->zero_s = 0;
+  llen->min_length = 10000;
+  llen->max_length = 0;
+}
+
+void
+addhist(struct llen_str *llen, int score, int length, int max_hscore)
+{
+  int llength; 
+  double dscore;
+
+  if ( length < LENGTH_CUTOFF) {
+    llen->min_score = 0;
+    llen->zero_s++;
+    return;
+  }
+
+  if (score < llen->min_score) llen->min_score = score;
+  if (score > llen->max_score) llen->max_score = score;
+
+  if (length > llen->max_length) llen->max_length = length;
+  if (length < llen->min_length) llen->min_length = length;
+  if (score > max_hscore) score = max_hscore;
+
+#ifdef LOCAL_SCORE
+  llength = (int)(LN_FACT*log((double)length)+0.5);
+#else
+  llength = (int)(LN_FACT*log((double)length)+0.5);
+  /*  llength = length; */
+#endif
+
+  if (llength < 0 ) llength = 0;
+  if (llength > llen->max) llength = llen->max;
+  llen->hist[llength]++;
+  dscore = (double)score;
+  llen->score_sums[llength] += dscore;
+  llen->score2_sums[llength] += dscore * dscore;
+}
+
+/* histogram will go from z-scores of 20 .. 100 with mean 50 and z=10 */
+
+void
+inithistz(int mh, struct hist_str *histp, double zs_off)
+{
+  int i, izs_off;
+
+  izs_off = (int)(zs_off + 0.5);
+
+  histp->z_calls = 0;
+
+  histp->min_hist = 20 + izs_off*10;
+  histp->max_hist = 120 + izs_off * 10;
+
+  histp->histint = (int)
+    ((double)(histp->max_hist - histp->min_hist + 2)/(double)mh+0.5);
+  histp->maxh = (int)
+    ((double)(histp->max_hist - histp->min_hist + 2)/(double)histp->histint+0.5);
+
+  if (histp->hist_a==NULL) {
+    if ((histp->hist_a=(int *)calloc((size_t)histp->maxh,sizeof(int)))==
+	NULL) {
+      fprintf(stderr," cannot allocate %d for histogram\n",histp->maxh);
+      histp->histflg = 0;
+    }
+    else histp->histflg = 1;
+  }
+  else {
+    for (i=0; i<histp->maxh; i++) histp->hist_a[i]=0;
+  }
+  histp->entries = 0;
+}
+
+static double nrv[100]={
+  0.3098900570,-0.0313400923, 0.1131975903,-0.2832547606, 0.0073672659,
+  0.2914489107, 0.4209306311,-0.4630181404, 0.3326537896, 0.0050140359,
+ -0.1117435426,-0.2835630301, 0.2302997065,-0.3102716394, 0.0819894916,
+ -0.1676455701,-0.3782225018,-0.3204509938,-0.3594969187,-0.0308950398,
+  0.2922813812, 0.1337170751, 0.4666577031,-0.2917784349,-0.2438179916,
+  0.3002301394, 0.0231147123, 0.5687927366,-0.2318208709,-0.1476839273,
+ -0.0385043851,-0.1213476523, 0.1486341995, 0.1027917167, 0.1409192644,
+ -0.3280652579, 0.4232041455, 0.0775993309, 0.1159071787, 0.2769424442,
+  0.3197284751, 0.1507346903, 0.0028580909, 0.4825103412,-0.0496843610,
+ -0.2754357656, 0.6021881753,-0.0816123956,-0.0899148991, 0.4847183201,
+  0.2151621865,-0.4542246220, 0.0690709102, 0.2461894193, 0.2126042295,
+ -0.0767060668, 0.4819746149, 0.3323031326, 0.0177600676, 0.1143185210,
+  0.2653977455, 0.0921872958,-0.1330986718, 0.0412287716,-0.1691604748,
+ -0.0529679078,-0.0194157955,-0.6117493924, 0.1199067932, 0.0210243193,
+ -0.5832259838,-0.1685528664, 0.0008591271,-0.1120347822, 0.0839125069,
+ -0.2787486831,-0.1937017962,-0.1915733940,-0.7888453635,-0.3316745163,
+  0.1180885226,-0.3347001067,-0.2477492636,-0.2445697600, 0.0001342482,
+ -0.0015759812,-0.1516473992,-0.5202267615, 0.2136975210, 0.2500423188,
+ -0.2402926401,-0.1094186280,-0.0618869933,-0.0815221188, 0.2623337275,
+  0.0219427302 -0.1774469919, 0.0828245026,-0.3271952808,-0.0632898028};
+
+void
+addhistz(double zs, struct hist_str *histp)
+{
+  int ih, zi;
+  double rv;
+
+  if (histp == NULL) return;
+
+  rv = nrv[histp->z_calls++ % 100];
+  zi = (int)(zs + 0.5+rv );
+
+  if ((zi >= 0) && (zi <= 120)) histp->entries++;
+
+  if (zi < histp->min_hist) zi = histp->min_hist;
+  if (zi > histp->max_hist) zi = histp->max_hist;
+  
+  ih = (zi - histp->min_hist)/histp->histint;
+
+  histp->hist_a[ih]++;
+}
+
+/* addhistzp() does not increase histp->entries since addhist did it already */
+/*
+void
+addhistzp(double zs, struct hist_str *histp)
+{
+  int ih, zi;
+  double rv;
+
+  rv = nrv[histp->z_calls++ %100];
+  zi = (int)(zs + 0.5 + rv);
+
+  if (zi < histp->min_hist) zi = histp->min_hist;
+  if (zi > histp->max_hist) zi = histp->max_hist;
+  
+  ih = (zi - histp->min_hist)/histp->histint;
+
+  histp->hist_a[ih]++;
+}
+*/
+
+void
+prune_hist(struct llen_str *llen, int score, int length, int max_hscore,
+	   long *entries)
+{
+  int llength;
+  double dscore;
+
+#ifdef LOCAL_SCORE
+  if (score <= 0 || length < LENGTH_CUTOFF) return;
+#endif
+
+  if (score > max_hscore) score = max_hscore;
+
+#ifdef LOCAL_SCORE
+  llength = (int)(LN_FACT*log((double)length)+0.5);
+#else
+  llength = (int)(LN_FACT*log((double)length)+0.5);
+  /*  llength = length; */
+#endif
+
+
+  if (llength < 0 ) llength = 0;
+  if (llength > llen->max) llength = llen->max;
+  llen->hist[llength]--;
+  dscore = (double)score;
+  llen->score_sums[llength] -= dscore;
+  llen->score2_sums[llength] -= dscore * dscore;
+
+/*  (*entries)--; histp->entries is not yet initialized */
+}  
+
+/* find_zr uses rg.rho, rg.mu, and rg.mean_var_sqrt */
+/* find_zr2 uses rho,  mu, but calculates var from regression using rho2, mu2 */
+
+/* fit_llen: no trimming
+   (1) regress scores vs log(n) using weighted variance
+   (2) calculate mean variance after length regression
+   (3) regress residual variance vs log(n), calculate REG2_STATS parameters
+   (4) set variance cutoff for bin exclusion
+*/
+
+void
+fit_llen(struct llen_str *llen, struct rstat_str *pr)
+{
+  int j;
+  int n;
+  int n_size;
+  double x, y2, u, z;
+  double mean_x, mean_y, var_x, var_y, covar_xy;
+  double mean_y2, covar_xy2, var_y2, dllj;
+
+  double sum_x, sum_y, sum_x2, sum_xy, sum_v, det, n_w;
+  
+  /* now fit scores to best linear function of log(n), using
+     simple linear regression */
+  
+  /* find the shortest bin with data */
+  for (llen->min=0; llen->min < llen->max; llen->min++)
+    if (llen->hist[llen->min]) break;
+
+  /* calculate the mean/variance for each length interval */
+  for (n_size=0,j = llen->min; j < llen->max; j++) {
+    if (llen->hist[j] > 1) {
+      dllj = (double)llen->hist[j];
+      llen->score_var[j] = llen->score2_sums[j]/dllj
+	- (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
+      llen->score_var[j] /= (double)(llen->hist[j]-1);
+      if (llen->score_var[j] <= 0.1 ) llen->score_var[j] = 0.1;
+      n_size++;
+    }
+  }
+
+  pr->nb_tot = n_size;
+
+  n_w = 0.0;
+  sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
+  for (j = llen->min; j < llen->max; j++) {
+    if (llen->hist[j] > 1) {
+      x = j + 0.5;
+      dllj = (double)llen->hist[j];
+      n_w += dllj/llen->score_var[j];
+      sum_x +=   dllj * x / llen->score_var[j] ;
+      sum_y += llen->score_sums[j] / llen->score_var[j];
+      sum_x2 +=  dllj * x * x /llen->score_var[j];
+      sum_xy +=  x * llen->score_sums[j]/llen->score_var[j];
+    }
+  }
+
+  if (n_size < 5 ) {
+    llen->fit_flag=0;
+    pr->rho = 0;
+    pr->mu = sum_y/n_w;
+    return;
+  }
+  else {
+    det = n_w * sum_x2 - sum_x * sum_x;
+    if (det > 0.001) {
+      llen->fit_flag = 1;
+      pr->rho = (n_w * sum_xy  - sum_x * sum_y)/det;
+      pr->rho_e = n_w/det;
+      pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
+      pr->mu_e = sum_x2/det;
+    }
+    else {
+      llen->fit_flag = 0;
+      pr->rho = 0;
+      pr->mu = sum_y/n_w;
+      return;
+    }
+  }
+
+  /* this code duplicates the calculation above
+  det = n_w * sum_x2 - sum_x * sum_x;
+  pr->rho = (n_w * sum_xy  - sum_x * sum_y)/det;
+  pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
+  */
+
+  n = 0;
+  mean_x = mean_y = mean_y2 = 0.0;
+  var_x = var_y = 0.0;
+  covar_xy = covar_xy2 = 0.0;
+
+  for (j = llen->min; j <= llen->max; j++) {
+   if (llen->hist[j] > 1 ) {
+      n += llen->hist[j];
+      x = (double)j + 0.5;
+      mean_x += (double)llen->hist[j] * x;
+      mean_y += llen->score_sums[j];
+      var_x += (double)llen->hist[j] * x * x;
+      var_y += llen->score2_sums[j];
+      covar_xy += x * llen->score_sums[j];
+    }
+  }
+
+  mean_x /= n; mean_y /= n;
+  var_x = var_x / n - mean_x * mean_x;
+  var_y = var_y / n - mean_y * mean_y;
+  
+  covar_xy = covar_xy / n - mean_x * mean_y;
+/*
+  pr->rho = covar_xy / var_x;
+  pr->mu = mean_y - pr->rho * mean_x;
+*/
+
+  mean_y2 = covar_xy2 = var_y2 = 0.0;
+  for (j = llen->min; j <= llen->max; j++) {
+    if (llen->hist[j] > 1) {
+      x = (double)j + 0.5;
+      u = pr->rho * x + pr->mu;
+      y2 = llen->score2_sums[j] - 2.0 * llen->score_sums[j] * u + llen->hist[j] * u * u;
+      mean_y2 += y2;
+      var_y2 += y2 * y2;
+      covar_xy2 += x * y2;
+    }
+  }
+  
+  pr->mean_var = mean_y2 /= (double)n;
+  pr->mean_var_sqrt = sqrt(pr->mean_var);
+  covar_xy2 = covar_xy2 / (double)n - mean_x * mean_y2;
+
+  if (pr->mean_var <= 0.01) {
+    llen->fit_flag = 0;
+    pr->mean_var = (pr->mu > 1.0) ? pr->mu: 1.0;
+  }
+
+  /*
+  fprintf(stderr," rho1/mu1: %.4f/%.4f mean_var %.4f\n",
+	  pr->rho*LN_FACT,pr->mu,pr->mean_var);
+  */
+  if (n > 1) pr->var_e = (var_y2/n - mean_y2 * mean_y2)/(n-1);
+  else pr->var_e = 0.0;
+
+  if (llen->fit_flag) {
+    pr->rho2 = covar_xy2 / var_x;
+    pr->mu2 = pr->mean_var - pr->rho2 * mean_x;
+  }
+  else {
+    pr->rho2 = 0;
+    pr->mu2 = pr->mean_var;
+  }
+
+  if (pr->rho2 < 0.0 )
+    z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
+  else z =  pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
+  if (z < 2*LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
+
+  pr->var_cutoff = pr->rho2 * LN_FACT*log(z) + pr->mu2;
+}
+
+/* used to get parameters for REG2_STATS
+
+   fit_llens: trim high variance bins
+   (1) regress scores vs log(n) using weighted variance
+   (2) regress residuals vs log(n)
+   (3) remove high variance bins
+   (4) calculate mean variance after length regression
+*/
+
+void
+fit_llens(struct llen_str *llen, struct rstat_str *pr)
+{
+  int j;
+  int n, n_u2, n_size;
+  double x, y, y2, u, u2, v, z;
+  double mean_x, mean_y, var_x, var_y, covar_xy;
+  double mean_y2, covar_xy2;
+  double mean_u2, mean_3u2, dllj;
+  double sum_x, sum_y, sum_x2, sum_xy, sum_v, det, n_w;
+
+  /* now fit scores to best linear function of log(n), using
+     simple linear regression */
+  
+  /* find the shortest bin with data */
+  for (llen->min=0; llen->min < llen->max; llen->min++)
+    if (llen->hist[llen->min] > 1) break;
+
+  /* calculate the mean/variance for each length interval */
+  for (j = llen->min; j < llen->max; j++) {
+    if (llen->hist[j] > 1) {
+      dllj = (double)llen->hist[j];
+      llen->score_var[j] = (double)llen->score2_sums[j]/dllj
+	- (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
+      llen->score_var[j] /= (double)(llen->hist[j]-1);
+      if (llen->score_var[j] <= 1.0 ) llen->score_var[j] = 1.0;
+    }
+  }
+	  
+  n_w = 0.0;
+  sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
+  for (n_size = 0, j = llen->min; j < llen->max; j++) {
+    if (llen->hist[j] > 1) {
+      x = j + 0.5;
+      dllj = (double)llen->hist[j];
+      n_w += dllj/llen->score_var[j];
+      sum_x +=   dllj * x / llen->score_var[j] ;
+      sum_y += llen->score_sums[j] / llen->score_var[j];
+      sum_x2 +=  dllj * x * x /llen->score_var[j];
+      sum_xy +=  x * llen->score_sums[j]/llen->score_var[j];
+      n_size++;
+    }
+  }
+
+  pr->nb_tot = n_size;
+
+  if (n_size < 5 ) {
+    llen->fit_flag=0;
+    pr->rho = 0;
+    pr->mu = sum_y/n_w;
+    return;
+  }
+  else {
+    det = n_w * sum_x2 - sum_x * sum_x;
+    if (det > 0.001) {
+      llen->fit_flag = 1;
+      pr->rho = (n_w * sum_xy  - sum_x * sum_y)/det;
+      pr->rho_e = n_w/det;
+      pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
+      pr->mu_e = sum_x2/det;
+    }
+    else {
+      llen->fit_flag = 0;
+      pr->rho = 0;
+      pr->mu = sum_y/n_w;
+      return;
+    }
+  }
+
+  n = 0;
+  mean_x = mean_y = mean_y2 = 0.0;
+  var_x = var_y = 0.0;
+  covar_xy = covar_xy2 = 0.0;
+
+  for (j = llen->min; j <= llen->max; j++) 
+    if (llen->hist[j] > 1 ) {
+      n += llen->hist[j];
+      x = (double)j + 0.5;
+      dllj = (double)llen->hist[j];
+      mean_x += dllj * x;
+      mean_y += llen->score_sums[j];
+      var_x += dllj * x * x;
+      var_y += llen->score2_sums[j];
+      covar_xy += x * llen->score_sums[j];
+    }
+  mean_x /= n; mean_y /= n;
+  var_x = var_x / n - mean_x * mean_x;
+  var_y = var_y / n - mean_y * mean_y;
+  
+  covar_xy = covar_xy / n - mean_x * mean_y;
+
+  /* to this point, fit_llen(), fit_llens(), and fit_llen2() -- this
+     function -- are the same */
+
+  /* now regress on the residual variance */
+  mean_y2 = covar_xy2 = 0.0;
+  for (j = llen->min; j <= llen->max; j++) 
+    if (llen->hist[j] > 1) {
+      x = (double)j + 0.5;
+      u = pr->rho * x + pr->mu;
+      y2 = llen->score2_sums[j] - 2 * llen->score_sums[j] * u + llen->hist[j] * u * u;
+      mean_y2 += y2;
+      covar_xy2 += x * y2;
+    }
+  
+  mean_y2 /= n;
+  covar_xy2 = covar_xy2 / n - mean_x * mean_y2;
+  /* calculate parameters for variance regression */
+  pr->rho2 = covar_xy2 / var_x;
+  pr->mu2 = mean_y2 - pr->rho2 * mean_x;
+
+  if (pr->rho2 < 0.0 )
+    z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
+  else z =  pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
+  if (z < 2* LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
+
+  pr->var_cutoff = pr->rho2*LN_FACT*log(z) + pr->mu2;
+
+/*  fprintf(stderr,"\nminimum allowed predicted variance (%0.2f) at n = %.0f\n",
+	 pr->var_cutoff,z);
+*/
+  mean_u2 = 0.0;
+  n_u2 = 0;
+  for ( j = llen->min; j < llen->max; j++) {
+    y = j+0.5;
+    dllj = (double)llen->hist[j];
+    x = pr->rho * y + pr->mu;
+    v = pr->rho2 * y + pr->mu2;
+    if (v < pr->var_cutoff) v = pr->var_cutoff;
+    if (llen->hist[j]> 1) {
+      u2 =  (llen->score2_sums[j] - 2 * x * llen->score_sums[j] + dllj * x * x) - v*dllj;
+      mean_u2 += llen->score_var[j] = u2*u2/(llen->hist[j]-1);
+      n_u2++;
+    }
+    else llen->score_var[j] = -1.0;
+  }
+
+  /* mean residual variance after both length and variance with length
+     considered */
+  mean_u2 = sqrt(mean_u2/(double)n_u2);
+  mean_3u2 = mean_u2*3.0;
+
+  /* trim all bins with variance > mean_3u2 */
+  for (j = llen->min; j < llen->max; j++) {
+    if (llen->hist[j] <= 1) continue;
+    if (sqrt(llen->score_var[j]) > mean_3u2) {
+      /*      fprintf(stderr," removing %d %d %.2f\n",
+	     j, (int)(exp((double)j/LN_FACT)-0.5),
+	     sqrt(llen->score_var[j]));
+	     */
+      pr->nb_trimmed++;
+      pr->n1_trimmed += llen->hist[j];
+      llen->hist[j] = 0;
+    }
+  }
+  /* fit what is left */
+  fit_llen(llen, pr);
+}
+
+/* s2str is used to regress against residual variance */
+struct s2str {double s; int n;};
+void s2_sort ( struct s2str *sptr, int n);
+
+/* fit_llen2  - used by REGI_STATS
+
+   (1) does the normal fit_llen() regression fitting
+   (2) sorts the residual variance
+   (3) excludes 5% lowest, highest bins by residual variance
+   (4) recalculates mean_var without excluded bins
+*/
+void
+fit_llen2(struct llen_str *llen, struct rstat_str *pr)
+{
+  int j;
+  int n, n_y2, llen_delta, llen_del05;
+  int n_size;
+  double x, y2, u;
+  double mean_x, mean_y, var_x, var_y, covar_xy;
+  double mean_y2, covar_xy2, dllj;
+  struct s2str *ss2;
+
+  double sum_x, sum_y, sum_x2, sum_xy, sum_v, det, n_w;
+  
+  /* now fit scores to best linear function of log(n), using
+     simple linear regression */
+  
+  /* find the shortest bin with data */
+  for (llen->min=0; llen->min < llen->max; llen->min++)
+    if (llen->hist[llen->min]) break;
+
+  /* find the longest bin with data */
+  for ( ; llen->max > llen->min; llen->max--)
+    if (llen->hist[llen->max]) break;
+
+  /* calculate the mean/variance for each length interval */
+  for (n_size=0,j = llen->min; j < llen->max; j++) {
+    if (llen->hist[j] > 1) {
+      dllj = (double)llen->hist[j];
+      llen->score_var[j] = llen->score2_sums[j]/dllj
+	- (llen->score_sums[j]/dllj) * (llen->score_sums[j]/dllj);
+      llen->score_var[j] /= (double)(llen->hist[j]-1);
+      if (llen->score_var[j] <= 1.0 ) llen->score_var[j] = 1.0;
+      n_size++;
+    }
+  }
+	  
+  pr->nb_tot = n_size;
+
+  n_w = 0.0;
+  sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
+  for (j = llen->min; j < llen->max; j++) {
+    if (llen->hist[j] > 1) {
+      x = j + 0.5;
+      dllj = (double)llen->hist[j];
+      n_w += (double)llen->hist[j]/dllj;
+      sum_x +=   (double)llen->hist[j] * x / llen->score_var[j] ;
+      sum_y += llen->score_sums[j] / llen->score_var[j];
+      sum_x2 +=  dllj * x * x /llen->score_var[j];
+      sum_xy +=  x * llen->score_sums[j]/llen->score_var[j];
+    }
+  }
+
+  if (n_size < 5 ) {
+    llen->fit_flag=0;
+    pr->rho = 0;
+    pr->mu = sum_y/n_w;
+    return;
+  }
+  else {
+    det = n_w * sum_x2 - sum_x * sum_x;
+    if (det > 0.001) {
+      llen->fit_flag = 1;
+      pr->rho = (n_w * sum_xy  - sum_x * sum_y)/det;
+      pr->rho_e = n_w/det;
+      pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
+      pr->mu_e = sum_x2/det;
+    }
+    else {
+      llen->fit_flag = 0;
+      pr->rho = 0;
+      pr->mu = sum_y/n_w;
+      return;
+    }
+  }
+
+  /*
+  det = n_w * sum_x2 - sum_x * sum_x;
+  pr->rho = (n_w * sum_xy  - sum_x * sum_y)/det;
+  pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
+  */
+/*   fprintf(stderr," rho1/mu1: %.2f/%.2f\n",pr->rho*LN_FACT,pr->mu); */
+
+  n = 0;
+  mean_x = mean_y = mean_y2 = 0.0;
+  var_x = var_y = 0.0;
+  covar_xy = covar_xy2 = 0.0;
+
+  for (j = llen->min; j <= llen->max; j++) 
+    if (llen->hist[j] > 1 ) {
+      n += llen->hist[j];
+      x = (double)j + 0.5;
+      mean_x += (double)llen->hist[j] * x;
+      mean_y += llen->score_sums[j];
+      var_x += (double)llen->hist[j] * x * x;
+      var_y += llen->score2_sums[j];
+      covar_xy += x * llen->score_sums[j];
+    }
+  mean_x /= n; mean_y /= n;
+  var_x = var_x / n - mean_x * mean_x;
+  var_y = var_y / n - mean_y * mean_y;
+  
+  covar_xy = covar_xy / n - mean_x * mean_y;
+
+  /* to this point, fit_llen(), fit_llens(), and fit_llen2() -- this
+     function -- are the same */
+
+  /* now prepare to exclude some bins because of excess variance */
+
+  /* allocate space for s2str */
+  if ((ss2=(struct s2str *)calloc(llen->max+1,sizeof(struct s2str)))==NULL) {
+    llen->fit_flag = 0;
+    fprintf(stderr," cannot allocate ss2\n");
+    return;
+  }
+
+  /* calculate residual variance after fit, bin into ss2[] */
+  mean_y2 = 0.0;
+  n_y2 = n = 0;
+  for (j = llen->min; j <= llen->max; j++) {
+    if (llen->hist[j] > VHISTC) {
+      n++;
+      n_y2 += ss2[j].n = llen->hist[j];
+      x = (double)j + 0.5;
+      u = pr->rho * x + pr->mu;
+      ss2[j].s = y2 = llen->score2_sums[j] - 2*llen->score_sums[j]*u + llen->hist[j]*u*u;
+      mean_y2 += y2;
+    }
+  }
+
+  pr->mean_var = mean_y2/(double)n_y2;
+  pr->mean_var_sqrt = sqrt(pr->mean_var);
+
+  /* sort the ss2[.squared residuals, n] by decreasing squared residuals */
+  s2_sort(ss2+llen->min,llen->max-llen->min+1);
+  
+  /*  fprintf(stderr,"llen->min: %d, max: %d\n",llen->min,llen->max); */
+  /* llen_delta has the number of bins with data */
+  llen_delta = 0;
+  for (j=llen->min; j<=llen->max; j++) {
+    if (ss2[j].n > 1) { llen_delta++;}
+  }
+
+  llen_del05 = llen_delta/20;	/* top 5% of bins */
+
+  /* exclude bottom 5% */
+  for (j = llen->min; j<llen->min+llen_del05; j++) {
+    pr->n1_trimmed += ss2[j].n;
+    pr->nb_trimmed++;
+  }
+
+  /* calculate mean_y2 using middle 90% */
+  mean_y2 = 0.0;
+  n_y2 = 0;
+  for (j = llen->min+llen_del05; j <= llen->min+llen_delta-llen_del05; j++) 
+    if (ss2[j].n > 1) {
+      mean_y2 += ss2[j].s;
+      n_y2 += ss2[j].n;
+    }
+
+  /* exclude top 5% */
+  for (j = llen->min+llen_delta-llen_del05+1; j< llen->max; j++) {
+    pr->n1_trimmed += ss2[j].n;
+    pr->nb_trimmed++;
+  }
+  
+  free(ss2);
+
+  /* return mean_var from middle 90% */
+  if (n_y2 > 1) {
+    pr->mean_var = mean_y2/(double)n_y2;
+    pr->mean_var_sqrt = sqrt(pr->mean_var);
+  }
+  else {
+    llen->fit_flag = 0;
+  }
+
+  /*    fprintf(stderr," rho1/mu1: %.4f/%.4f mean_var: %.4f/%d\n",
+	  pr->rho*LN_FACT,pr->mu,pr->mean_var,n); */
+  pr->var_e = 0.0;
+}
+
+double find_z(int score, double escore, int length, double comp, struct pstat_str *pu) {
+  if (pu == NULL) {return 0.0;}
+  return find_z_arr[pu->zsflag](score, escore, length, comp, pu);
+}
+
+
+/* REG_STATS - Z() from rho/mu/mean_var */
+double find_zr(int score, double escore, int length, double comp, struct pstat_str *pu)
+{
+  double log_len, z;
+  
+  if (pu == NULL) return score;
+
+#ifdef LOCAL_SCORE
+  if (score <= 0) return 0;
+#endif
+  if ( length < LENGTH_CUTOFF) return 0;
+
+#ifdef LOCAL_SCORE
+  log_len = LN_FACT*log((double)(length));
+#else
+  /*   log_len = length; */
+  log_len = LN_FACT*log((double)(length));
+#endif
+
+/*  var = pu->r_u.rg.rho2 * log_len + pu->r_u.rg.mu2;
+  if (var < pu->r_u.rg.var_cutoff) var = pu->r_u.rg.var_cutoff;
+*/
+
+  z = pu->zs_off + ((double)score - pu->r_u.rg.rho * log_len - pu->r_u.rg.mu) / pu->r_u.rg.mean_var_sqrt;
+
+  return (50.0 + z*10.0);
+}
+
+/* REG2_STATS Z() from rho/mu, rho2/mu2 */
+double find_zr2(int score, double escore, int length, double comp, struct pstat_str *pu)
+{
+  double log_len, var;
+  double z;
+  
+  if ( length < LENGTH_CUTOFF) return 0;
+
+#ifdef LOCAL_SCORE
+  log_len = LN_FACT*log((double)(length));
+#else
+  /*   log_len = length; */
+  log_len = LN_FACT*log((double)(length));
+#endif
+
+  var = pu->r_u.rg.rho2 * log_len + pu->r_u.rg.mu2;
+  if (var < pu->r_u.rg.var_cutoff) var = pu->r_u.rg.mean_var;
+
+  z = pu->zs_off + ((double)score - pu->r_u.rg.rho * log_len - pu->r_u.rg.mu) / sqrt(var);
+
+  return (50.0 + z*10.0);
+}
+
+#ifdef USE_LNSTATS
+/* LN_STATS - ln()-scaled mu, mean_var */
+double find_zl(int score, int length, double comp, struct pstat_str *pu)
+{
+  double ls, z;
+  
+  ls = (double)score*LN200/log((double)length);
+
+  z = (ls - pu->r_u.rg.mu) / pu->r_u.rg.mean_var_sqrt;
+
+  return (50.0 + z*10.0);
+}
+#endif
+
+/* MLE_STATS - Z() from MLE for lambda, K */
+double
+find_ze(int score, double escore, int length, double comp, struct pstat_str *pu)
+{
+  double z, mp, np, a_n1;
+  
+  a_n1 = (double)length; 
+
+  mp = pu->r_u.ag.a_n0;
+  np = a_n1;
+
+  if (np < 1.0) np = 1.0;
+  if (mp < 1.0) mp = 1.0;
+
+  z = pu->r_u.ag.Lambda * score - log(pu->r_u.ag.K * np * mp);
+
+  z = -z + EULER_G;
+  z /= - PI_SQRT6;
+  z += pu->zs_off;
+
+  return (50.0 + z*10.0);
+}
+
+/* MLE2_STATS - Z() from MLE for mle_a0..2, mle_b1, length, comp */
+double
+find_ze2(int score, double escore, int length, double comp, struct pstat_str *pu)
+{
+  double z, mp, np, a_n1;
+  
+  a_n1 = (double)length; 
+
+  if (comp <= 0.0) comp = pu->r_u.m2.ave_comp;
+
+  /* avoid very biased comp estimates */
+  /* comp = exp((4.0*log(comp)+log(pu->r_u.m2.ave_comp))/5.0); */
+
+  mp = pu->r_u.m2.a_n0;
+  np = a_n1;
+
+  if (np < 1.0) np = 1.0;
+  if (mp < 1.0) mp = 1.0;
+
+  z = (-(pu->r_u.m2.mle2_a0 + pu->r_u.m2.mle2_a1 * comp + pu->r_u.m2.mle2_a2 * comp * log(np * mp)) + score) / (pu->r_u.m2.mle2_b1 * comp);
+
+
+  z = -z + EULER_G;
+  z /= - PI_SQRT6;
+  z += pu->zs_off;
+
+  return (50.0 + z*10.0);
+}
+
+/* AG_STATS - Altschul-Gish Lamdba, K */
+double
+find_za(int score, double escore, int length, double comp, struct pstat_str *pu)
+{
+  double z, mp, np, a_n1, a_n1f;
+  
+  a_n1 = (double)length; 
+  a_n1f = log(a_n1)/pu->r_u.ag.H;
+
+  mp = pu->r_u.ag.a_n0 - pu->r_u.ag.a_n0f - a_n1f;
+  np = a_n1 - pu->r_u.ag.a_n0f - a_n1f;
+
+  if (np < 1.0) np = 1.0;
+  if (mp < 1.0) mp = 1.0;
+
+  z = pu->r_u.ag.Lambda * score - log(pu->r_u.ag.K * np * mp);
+
+  z = -z + EULER_G;
+  z /= - PI_SQRT6;
+
+  return (50.0 + z*10.0);
+}
+
+double find_zn(int score, double escore, int length, double comp, struct pstat_str *pu)
+{
+  double z;
+  
+  z = pu->zs_off + ((double)score - pu->r_u.rg.mu) / pu->r_u.rg.mean_var_sqrt;
+
+  return (50.0 + z*10.0);
+}
+
+/* computes E value for a given z value, assuming extreme value distribution */
+double
+z_to_E(double zs, long entries, struct db_str db)
+{
+  double e, n;
+
+  /*  if (db->entries < 5) return (double)db.entries; */
+  if (entries < 1) { n = db.entries;}
+  else {n = entries;}
+
+  if (zs > ZS_MAX) return 0.0;
+
+#ifndef NORMAL_DIST
+  e = exp(- PI_SQRT6 * zs - EULER_G);
+  return n * (e > .01 ? 1.0 - exp(-e) : e);
+#else
+  return n * erfc(zs/M_SQRT2)/2.0; 
+#endif
+}
+
+double
+zs_to_p(double zs)
+{
+  double e, z;
+
+  /*  if (db.entries < 5) return 0.0; */
+
+  z = (zs - 50.0)/10.0;
+
+  if (z > ZS_MAX) return 0.0;
+
+#ifndef NORMAL_DIST
+  e = exp(- PI_SQRT6 * z - EULER_G);
+  return (e > .01 ? 1.0 - exp(-e) : e);
+#else
+  return erfc(z/M_SQRT2)/2.0; 
+#endif
+}
+
+double
+zs_to_bit(double zs, int n0, int n1)
+{
+  double z, a_n0, a_n1;
+
+  z = (zs - 50.0)/10.0;
+  a_n0 = (double)n0;
+  a_n1 = (double)n1;
+
+  return (PI_SQRT6 * z + EULER_G + log(a_n0*a_n1))/M_LN2 ;
+}
+
+/* computes E-value for a given z value, assuming extreme value distribution */
+double
+zs_to_E(double zs,int n1, int dnaseq, long entries, struct db_str db)
+{
+  double e, z, k;
+
+  /*  if (db->entries < 5) return 0.0; */
+
+  z = (zs - 50.0)/10.0;
+
+  if (z > ZS_MAX ) return 0.0;
+
+  if (entries < 1) entries = db.entries;
+
+  if (dnaseq == SEQT_DNA || dnaseq == SEQT_RNA) {
+    k = (double)db.length /(double)n1;
+    if (db.carry > 0) {
+      k += ((double)db.carry * (double)LONG_MAX)/(double)n1;
+    }
+  }
+  else k = (double)entries;
+
+  if (k < 1.0) k = 1.0;
+
+#ifndef NORMAL_DIST
+  z *= PI_SQRT6;
+  z += EULER_G;
+  e = exp(-z);
+  return k * (e > .01 ? 1.0 - exp(-e) : e);
+#else
+  return k * erfc(z/M_SQRT2)/2.0; 
+#endif
+}
+
+#ifdef NORMAL_DIST
+double np_to_z(double, int *);
+#endif
+
+/* compute z-score for given E()-value, assuming normal or
+   extreme-value dist */
+double
+E_to_zs(double E, long entries)
+{
+  double e, z;
+  int error;
+
+  e = E/(double)entries;
+
+#ifndef NORMAL_DIST
+  z = (log(e)+EULER_G)/(- PI_SQRT6);
+  return z*10.0 + 50.0;
+#else
+  /* this formula does not work for E() >= 1 */
+  if (e >= 1.0) e = 0.99;
+  z = np_to_z(1.0-e,&error);
+  if (!error) return z*10.0 + 50.0;
+  else return 0.0;
+#endif
+}
+
+/* computes 1.0 - E value for a given z value, assuming extreme value
+   distribution */
+double
+zs_to_Ec(double zs, long entries)
+{
+  double e, z;
+
+  if (entries < 5) return 0.0;
+
+  z = (zs - 50.0)/10.0;
+
+  if (z > ZS_MAX) return 1.0;
+
+#ifndef NORMAL_DIST
+  e =  exp(- PI_SQRT6 * z - EULER_G);
+  return (double)entries * (e > .01 ?  exp(-e) : 1.0 - e);
+#else
+  return (double)entries*erf(z/M_SQRT2)/2.0; 
+#endif
+}
+
+int
+ELK_to_s(double e_val, int n0, int n1,
+	 double Lambda, double K, double H) {
+
+  double a_n0, a_n1;
+  double a_n0f, mp, np;
+
+  a_n0 = (double)n0;
+  a_n1 = (double)n1;
+
+  a_n0f = log(K * a_n0 * a_n1)/H;
+  mp = a_n0 - a_n0f;
+  np = a_n1 - a_n0f;
+
+  if (np < 1.0) np = 1.0;
+  if (mp < 1.0) mp = 1.0;
+
+  return (int)((log(K * mp * np/e_val))/Lambda);
+}
+
+/* calculate a threshold score, given an E() value and Lambda,K,H */
+
+int
+E1_to_s(double e_val, int n0, int n1, int db_size,
+	struct pstat_str *pu) {
+  double mp, np, a_n0, a_n0f, a_n1;
+  double zs, log_len, p_val;
+  int score;
+
+  if (pu->zsflag < 0 || n0 < LENGTH_CUTOFF || n1 < LENGTH_CUTOFF) return BIGNUM;
+
+  a_n0 = (double)n0;
+  a_n1 = (double)n1;
+  zs = E_to_zs(e_val, db_size);
+  /* convert from "zscore" to "zvalue" */
+  zs = (zs - 50.0)/10.0;
+  p_val = e_val / db_size;
+
+  switch (pu->zsflag) {
+
+  case AVE_STATS:
+    score = zs * pu->r_u.rg.mean_var_sqrt + pu->r_u.rg.mu;
+    break;
+
+  case REG_STATS:
+  case REGI_STATS:
+#ifdef LOCAL_SCORE
+    log_len = LN_FACT * log(a_n1);
+#else
+    /*     log_len = a_n1; */
+    log_len = LN_FACT * log(a_n1);
+#endif
+    score = zs * pu->r_u.rg.mean_var_sqrt + pu->r_u.rg.rho * log_len + pu->r_u.rg.mu;
+    break;
+
+  case MLE_STATS:
+    score = (int)((log( pu->r_u.ag.K * a_n0 * a_n1) - log(p_val))/pu->r_u.ag.Lambda +0.5);
+    break;
+
+  case AG_STATS:
+    a_n0f = log(pu->r_u.ag.K * a_n0 * a_n1)/pu->r_u.ag.H;
+    mp = a_n0 - a_n0f;
+    np = a_n1 - a_n0f;
+
+    if (np < 1.0) np = 1.0;
+    if (mp < 1.0) mp = 1.0;
+
+    score = (int)((log( pu->r_u.ag.K * mp * np) - log(p_val))/pu->r_u.ag.Lambda +0.5);
+    break;
+
+  default: 
+    fprintf(stderr,"\n*** statistics method: %d not yet supported ***\n", pu->zsflag);
+    score = 999;
+  }
+
+#ifndef NORMAL_DIST
+  if (score < 0) score = 0;
+#endif
+  return score;
+}
+
+/* calculate E()-value from score, lengths, database size */
+double
+s_to_bit(int score, int n0, int  n1, struct pstat_str *pu) { 
+
+  double bit;
+  double a_n0, a_n1;
+  double z, log_len;
+
+  if (n0 < LENGTH_CUTOFF || n1 < LENGTH_CUTOFF) return -1.0;
+
+  a_n0 = (double)n0;
+  a_n1 = (double)n1;
+
+  switch (pu->zsflag) {
+
+  case AVE_STATS:
+    z = ((double)score - pu->r_u.rg.mu)/pu->r_u.rg.mean_var_sqrt;
+    bit = (PI_SQRT6 * z + EULER_G + log(a_n0*a_n1))/M_LN2 ;
+    break;
+
+  case REG_STATS:
+  case REGI_STATS:
+    log_len = LN_FACT * log(a_n1);
+    z = ((double)score - pu->r_u.rg.rho * log_len - pu->r_u.rg.mu);
+    z /= pu->r_u.rg.mean_var_sqrt ;
+    bit = (PI_SQRT6 * z + EULER_G + log(a_n0*a_n1))/M_LN2 ;
+    break;
+
+  case MLE_STATS:
+  case AG_STATS:
+    bit = ((double)score * pu->r_u.ag.Lambda - log(pu->r_u.ag.K))/M_LN2;
+    break;
+
+  default: 
+    fprintf(stderr,"\n*** s_to_bit -- statistics method: %d not yet supported ***\n", pu->zsflag);
+    bit = -1.0;
+  }
+
+  return bit;
+}
+
+double
+bit_to_E (double bit, int n0, int n1, long db_size,
+	  struct pstat_str *pu)
+{
+  double a_n0, a_n1, a_n0f, p_val;
+
+  a_n0 = (double)n0;
+  a_n1 = (double)n1;
+
+  if (pu->zsflag == AG_STATS) {
+    a_n0f = log(pu->r_u.ag.K * a_n0 * a_n1)/pu->r_u.ag.H;
+
+    a_n0 -= a_n0f;
+    a_n1 -= a_n0f;
+
+    if (a_n0 < 1.0) a_n0 = 1.0;
+    if (a_n1 < 1.0) a_n1 = 1.0;
+  }
+
+  p_val = a_n0 * a_n1 / pow(2.0, bit);
+  if (p_val > 0.01) p_val = 1.0 - exp(-p_val);
+
+  return (double)db_size * p_val;
+}
+
+
+void s2_sort (struct s2str *ptr, int n)
+{
+  int gap, i, j;
+  struct s2str tmp;
+
+  /* shell sort using sequence from Knuth */
+  /* find a gap larger than 1/3 n */
+  for (gap = 1; gap < n/3; gap = 3*gap +1) ;
+
+  /* do a shell() sort, shrinking the gap */
+  for ( ; gap > 0; gap = (gap-1)/3) {
+    for (i = gap; i < n; i++) {
+      for (j = i - gap; j >= 0; j-= gap) {
+	if (ptr[j].s >= ptr[j + gap].s) break;
+	tmp.s = ptr[j].s;
+	tmp.n = ptr[j].n;
+	ptr[j].s = ptr[j + gap].s;
+	ptr[j].n = ptr[j + gap].n;
+	ptr[j + gap].s = tmp.s;
+	ptr[j + gap].n = tmp.n;
+      }
+    }
+  }
+}
+
+void last_stats() {}
+
+void
+scale_scores(struct beststr **bptr, int nbest, struct db_str db,
+	     struct pstruct *ppst, struct pstat_str *rs)
+{
+  int i, ix;
+  double zscore;
+
+  ix = ppst->score_ix;
+
+  if (ppst->zsflag < 0 || ppst->zsflag_f < 0) {
+    /* even though no statistics, we need to sort the scores */
+    for (i=0; i<nbest; i++) {
+      bptr[i]->zscore = bptr[i]->rst.score[ix];
+    }
+    sortbestz(bptr,nbest);
+  }
+  else {
+    for (i=0; i<nbest; i++) {
+      zscore = find_z(bptr[i]->rst.score[ppst->score_ix], bptr[i]->rst.escore,
+		       bptr[i]->seq->n1,bptr[i]->rst.comp,rs);
+      bptr[i]->zscore = zscore;
+      bptr[i]->rst.escore
+	=zs_to_E(zscore,bptr[i]->seq->n1,ppst->dnaseq, ppst->zdb_size,db);
+    }
+    sortbeste(bptr,nbest);
+  }
+}
+
+#ifdef NORMAL_DIST
+/*     ALGORITHM AS241  APPL. STATIST. (1988) VOL. 37, NO. 3
+
+       Produces the normal deviate Z corresponding to a given lower
+       tail area of P; Z is accurate to about 1 part in 10**16.
+
+       The hash sums below are the sums of the mantissas of the
+       coefficients.   They are included for use in checking
+       transcription.
+*/
+
+double np_to_z(double p, int *fault) {
+
+  double q, r, ppnd16;
+
+  double zero = 0.0, one = 1.0, half = 0.5;
+  double split1 = 0.425, split2 = 5.0;
+  double const1 = 0.180625, const2 = 1.6;
+
+/*   Coefficients for P close to 0.5 */
+
+  double a0 = 3.3871328727963666080e0;
+  double a1 = 1.3314166789178437745e+2;
+  double a2 = 1.9715909503065514427e+3;
+  double a3 = 1.3731693765509461125e+4;
+  double a4 = 4.5921953931549871457e+4;
+  double a5 = 6.7265770927008700853e+4;
+  double a6 = 3.3430575583588128105e+4;
+  double a7 = 2.5090809287301226727e+3;
+  double b1 = 4.2313330701600911252e+1;
+  double b2 = 6.8718700749205790830e+2;
+  double b3 = 5.3941960214247511077e+3;
+  double b4 = 2.1213794301586595867e+4;
+  double b5 = 3.9307895800092710610e+4;
+  double b6 = 2.8729085735721942674e+4;
+  double b7 = 5.2264952788528545610e+3;
+
+  double sum_ab= 55.8831928806149014439;
+/* 
+  Coefficients for P not close to 0, 0.5 or 1.
+*/
+
+  double c0 = 1.42343711074968357734;
+  double c1 = 4.63033784615654529590;
+  double c2 = 5.76949722146069140550;
+  double c3 = 3.64784832476320460504;
+  double c4 = 1.27045825245236838258;
+  double c5 = 2.41780725177450611770e-1;
+  double c6 = 2.27238449892691845833e-2;
+  double c7 = 7.74545014278341407640e-4;
+  double d1 = 2.05319162663775882187;
+  double d2 = 1.67638483018380384940;
+  double d3 = 6.89767334985100004550e-1;
+  double d4 = 1.48103976427480074590e-1;
+  double d5 = 1.51986665636164571966e-2;
+  double d6 = 5.47593808499534494600e-4;
+  double d7 = 1.05075007164441684324e-9;
+
+  double sum_cd=49.33206503301610289036;
+/*
+       Coefficients for P near 0 or 1.
+*/
+  double e0 = 6.65790464350110377720e0;
+  double e1 = 5.46378491116411436990e0;
+  double e2 = 1.78482653991729133580e0;
+  double e3 = 2.96560571828504891230e-1;
+  double e4 = 2.65321895265761230930e-2;
+  double e5 = 1.24266094738807843860e-3;
+  double e6 = 2.71155556874348757815e-5;
+  double e7 = 2.01033439929228813265e-7;
+  double f1 = 5.99832206555887937690e-1;
+  double f2 = 1.36929880922735805310e-1;
+  double f3 = 1.48753612908506148525e-2;
+  double f4 = 7.86869131145613259100e-4;
+  double f5 = 1.84631831751005468180e-5;
+  double f6 = 1.42151175831644588870e-7;
+  double f7 = 2.04426310338993978564e-15;
+
+  double sum_ef=47.52583317549289671629;
+
+  double sum_tmp = 0.0;
+
+  /*
+  sum_tmp = a0+a1+a2+a3+a4+a5+a6+a7+b1+b2+b3+b4+b5+b6+b7;
+  if (fabs(sum_tmp - sum_ab) > 1e-12) {
+    fprintf (stderr," sum_ab error: %lg %lg\n",sum_tmp,sum_ab);
+    *fault = 1;
+    return zero;
+  }
+
+  sum_tmp = c0+c1+c2+c3+c4+c5+c6+c7+d1+d2+d3+d4+d5+d6+d7;
+  if (fabs(sum_tmp - sum_cd) > 1e-12) {
+    fprintf (stderr," sum_cd error: %lg %lg\n",sum_tmp,sum_cd);
+    *fault = 1;
+    return zero;
+  }
+  sum_tmp = e0+e1+e2+e3+e4+e5+e6+e7+f1+f2+f3+f4+f5+f6+f7;
+  if (fabs(sum_tmp - sum_ef) > 1e-12) {
+    fprintf (stderr," sum_ef error: %lg %lg\n",sum_tmp,sum_ef);
+    *fault = 1;
+    return zero;
+  }
+  */
+
+  *fault = 0;
+  q = p - half;
+  if (fabs(q) <= split1) {
+    r = const1 - q * q;
+    return q * (((((((a7 * r + a6) * r + a5) * r + a4) * r + a3)
+		    * r + a2) * r + a1) * r + a0) /
+      (((((((b7 * r + b6) * r + b5) * r + b4) * r + b3)
+	 * r + b2) * r + b1) * r + one);
+  }
+  else {
+    r = (q < zero) ?  p : one - p;
+    if (r <= zero) {
+      *fault = 1;
+      return zero;
+    }
+    r = sqrt(-log(r));
+    if (r <= split2) {
+      r -= const2;
+      ppnd16 = (((((((c7 * r + c6) * r + c5) * r + c4) * r + c3)
+		  * r + c2) * r + c1) * r + c0) /
+	(((((((d7 * r + d6) * r + d5) * r + d4) * r + d3)
+	   * r + d2) * r + d1) * r + one);
+    }
+    else {
+      r -= split2;
+      ppnd16 = (((((((e7 * r + e6) * r + e5) * r + e4) * r + e3)
+		  * r + e2) * r + e1) * r + e0) /
+	(((((((f7 * r + f6) * r + f5) * r + f4) * r + f3)
+	   * r + f2) * r + f1) * r + one);
+    }
+    if (q < zero) return -ppnd16;
+    else return ppnd16;
+  }
+}
+#endif
+
+/* print out all pstat_str info for independent calculation */
+void
+pstat_info(char *info_str, int info_str_n, char *comment, struct pstat_str *pu) {
+  char pstat_buf[MAX_STR];
+
+  sprintf(pstat_buf,"%s zsflag: %d\n",comment,pu->zsflag);
+  SAFE_STRNCPY(info_str,pstat_buf,info_str_n);
+  sprintf(pstat_buf,"%s ngLambda: %g; ngK: %g; ngH: %g\n",comment,pu->ngLambda,pu->ngK,pu->ngH);
+  SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+  sprintf(pstat_buf,"%s ave_n1: %g; sample_fract: %g; zs_off: %g\n",comment,
+	  pu->ave_n1,pu->sample_fract,pu->zs_off);
+  if (pu->zsflag == MLE2_STATS) {	/* print r_u.m2 */
+    sprintf(pstat_buf,"%s mle2_stat_str: {\n",comment);
+    SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+    sprintf(pstat_buf,"%s a_n0 %g;\n",comment,pu->r_u.m2.a_n0);
+    SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+    sprintf(pstat_buf,"%s mle2_a0: %g; mle2_a1: %g; mle2_a2: %g; mle2_b1: %g\n",comment,
+	    pu->r_u.m2.mle2_a0,pu->r_u.m2.mle2_a1,pu->r_u.m2.mle2_a2,pu->r_u.m2.mle2_b1);
+    SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+    sprintf(pstat_buf,"%s ave_comp: %g; max_comp: %g; ave_H: %g }\n",comment,
+	    pu->r_u.m2.ave_comp,pu->r_u.m2.max_comp,pu->r_u.m2.ave_H);
+    SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+  }
+  else if (pu->zsflag  == AG_STATS || pu->zsflag == MLE_STATS) {
+    sprintf(pstat_buf,"%s ag_stat_str: {\n",comment);
+    SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+    sprintf(pstat_buf,"%s K: %g; Lambda: %g; a_n0f: %g; a_n0: %g }\n",comment,
+	    pu->r_u.ag.K,pu->r_u.ag.Lambda,pu->r_u.ag.a_n0f,pu->r_u.ag.a_n0);
+    SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+  }
+  else {
+    sprintf(pstat_buf,"%s rstat_str (LN_FACT: %.1f): {\n",comment,LN_FACT);
+    SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+    sprintf(pstat_buf,"%s  rho: %g; rho_e: %g; mu: %g; mu_e: %g;\n",comment,
+	    pu->r_u.rg.rho,pu->r_u.rg.rho_e,pu->r_u.rg.mu,pu->r_u.rg.mu_e);
+    SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+    sprintf(pstat_buf,"%s  mean_var: %g; var_e: %g; mean_var_sqrt: %g\n",comment,
+	    pu->r_u.rg.mean_var, pu->r_u.rg.var_e,pu->r_u.rg.mean_var_sqrt);
+    SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+    sprintf(pstat_buf,"%s  rho2: %g; mu2: %g; var_cutoff: %g\n",comment,
+	    pu->r_u.rg.rho2, pu->r_u.rg.mu2,pu->r_u.rg.var_cutoff);
+    SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+    sprintf(pstat_buf,"%s  n_trimmed: %d; n1_trimmed: %d; nb_trimmed: %d; nb_tot: %d }\n",comment,
+	    pu->r_u.rg.n_trimmed, pu->r_u.rg.n1_trimmed,pu->r_u.rg.nb_trimmed, pu->r_u.rg.nb_tot);
+    SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+  }
+}
diff --git a/src/scaleswt.c b/src/scaleswt.c
new file mode 100644
index 0000000..2a59bd8
--- /dev/null
+++ b/src/scaleswt.c
@@ -0,0 +1,1566 @@
+/* $Id: scaleswt.c 714 2011-05-05 00:33:40Z wrp $ */
+
+/* copyright (c) 1995, 1996, 2000, 2002, 2014 by William R. Pearson and The
+   Rectors & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* as of 24 Sept, 2000 - scaleswn uses no global variables */
+
+/* 
+  This version is designed for fasts/f, which used Tatusov
+  probabilities for statistical estimates, but still needs a
+  quick-and-dirty linear regression fit to rank things
+
+  For comparisons that obey tatusov statistics, we try whenever
+  possible to provide accurate e_scores, rather than raw scores.  As a
+  result, no lambda/K fitting is required; and process_hist() can be
+  called at the very beginning of the search to initialize some of the
+  statistics structures and find_zp().
+
+  find_z() must still return a valid z_score surrogate, as
+  comp_lib.c/p2_complib.c continue to use z_score's to rank hits, save
+  the best, etc.
+
+  If e_score's cannot be calculated, the process_hist() provides
+  linear regression fitting for conventional z_score estimates.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <limits.h>
+#include <float.h>
+#include <math.h>
+
+#include <limits.h>
+
+#include "defs.h"
+#include "param.h"
+#include "structs.h"
+#include "best_stats.h"
+
+#define MAXHIST 50
+#define MAX_LLEN 200
+#define LHISTC 5
+#define VHISTC 5
+#define MAX_SSCORE 300
+
+#define LENGTH_CUTOFF 10 /* minimum database sequence length allowed, for fitting */
+
+#define LN_FACT 10.0
+#ifndef M_LN2
+#define M_LN2 0.69314718055994530942
+#endif
+
+#define EULER_G 0.57721566490153286060
+#define PI_SQRT6 1.28254983016186409554
+
+#ifndef M_SQRT2
+#define M_SQRT2 1.41421356237
+#endif
+#define LN200 5.2983173666
+#define ZS_MAX 400.0	/* used to prevent underflow on some machines */
+#define TOLERANCE 1.0e-12
+#define TINY 1.0e-6
+
+/* used by AVE_STATS, REG_STATS, REGI_STATS, REG2_STATS*/
+struct pstat_str {
+  int zsflag;
+  double ngLambda, ngK, ngH;
+  double rho, rho_e, mu, mu_e, mean_var, var_e;  /* ?_e:std. error of ? */
+/* used by REG2_STATS */
+  double rho2, mu2, var_cutoff;
+  int n_trimmed; /* excluded because of high z-score */
+  int n1_trimmed, nb_trimmed, nb_tot; /* excluded because of bin */
+  double tat_a, tat_b, tat_c, spacefactor;
+  int have_tat;
+  int tie_j;
+  int eval_is_pval;
+  long zdb_size;
+};
+
+#define AVE_STATS 0	/* no length effect, only mean/variance */
+double find_zt(int score, double escore, int len, double comp, struct pstat_str *);
+
+double find_zn(int score, double escore, int len, double comp, struct pstat_str *);
+
+double power(double, int);
+
+void sortbesto(double *, int );
+extern void sortbeste(struct beststr **bptr, int nbest);
+
+int proc_hist_n(struct stat_str *sptr, int n, 
+		 struct pstruct *ppst, struct hist_str *histp, int do_trim,
+		 struct pstat_str *);
+
+#define REG_STATS 1	/* length-regression scaled */
+double find_zr(int score, double escore, int len, double comp, struct pstat_str *);
+
+int proc_hist_r(struct stat_str *sptr, int n,
+		 struct pstruct *ppst, struct hist_str *histp,
+		 int do_trim, struct pstat_str *rs);
+
+static double (*find_zp)(int score, double escore, int len, double comp,
+			 struct pstat_str *) = &find_zr;
+
+double find_z(int score, double escore, int len, double comp, struct pstat_str *);
+
+/* print out all pstat_str info for independent calculation */
+void
+pstat_info(char *info_str, int info_str_n, char *comment, struct pstat_str *pu);
+
+struct llen_str {
+  int min, max;
+  int max_score, min_score;
+  int *hist;
+  double *score_sums, *score2_sums;
+  double *score_var;
+  int max_length, min_length, zero_s;
+  int fit_flag;
+};
+
+static void inithist(struct llen_str *, struct pstruct *, int);
+static void free_hist( struct llen_str *);
+static void addhist(struct llen_str *, int, int, int);
+static void prune_hist(struct llen_str *, int, int, int, long *);
+void inithistz(int, struct hist_str *histp);
+void addhistz(double zs, struct hist_str *histp);
+
+static void fit_llen(struct llen_str *, struct pstat_str *);
+static void fit_llens(struct llen_str *, struct pstat_str *);
+
+void linreg(double *lny, double *x, double *lnx, int n,
+	    double *a, double *b, double *c, int start);
+
+double calc_spacefactor(const unsigned char *, int, int, int);
+
+double det(double a11, double a12, double a13,
+	   double a21, double a22, double a23,
+	   double a31, double a32, double a33);
+
+double factorial (int a, int b);
+
+/* void set_db_size(int, struct db_str *, struct hist_str *); */
+
+#ifdef DEBUG
+FILE *tmpf;
+#endif
+
+int
+process_hist(struct stat_str *sptr, int nstats, 
+	     const struct mngmsg *m_msg,
+	     struct pstruct *ppst,
+	     struct hist_str *histp,
+	     struct pstat_str **rs_sp,
+	     int do_hist
+	     )
+{
+  int zsflag, do_trim;
+  struct pstat_str *rs_s;
+
+  if (ppst->zsflag < 0) {
+    *rs_sp = NULL;
+    return ppst->zsflag;
+  }
+
+  ppst->zs_off = 0.0;
+
+  if (*rs_sp == NULL) {
+    if ((rs_s=(struct pstat_str *)calloc(1,sizeof(struct pstat_str)))==NULL) {
+      fprintf(stderr," cannot allocate rs_snion: %ld\n",sizeof(struct pstat_str));
+      exit(1);
+    }
+    else *rs_sp = rs_s;
+  }
+  else {
+    rs_s = *rs_sp;
+    memset(rs_s,0,sizeof(struct pstat_str));
+  }
+
+  rs_s->zsflag = zsflag = ppst->zsflag;
+  rs_s->zdb_size = ppst->zdb_size;
+
+  if (m_msg->escore_flg) {
+    find_zp = &find_zt;
+    inithistz(MAXHIST,histp);
+    rs_s->eval_is_pval = 1;
+    return 1;
+  }
+
+  if (nstats < 20) {
+    fprintf(stderr," too few sequences for sampling: %d\n",nstats);
+    free(rs_s);
+    *rs_sp = NULL;
+    return -1;
+  }
+
+  rs_s->ngLambda = m_msg->Lambda;
+  rs_s->ngK = m_msg->K;
+  rs_s->ngH = m_msg->H;
+
+  if (zsflag >= 20) {
+    zsflag = ppst->zsflag2;
+    do_trim = 0;
+  }
+  else if (zsflag >= 10) {
+    zsflag -= 10;
+    do_trim = 0;
+  }
+  else do_trim = 1;
+
+  rs_s->eval_is_pval = 0;
+  find_zp = &find_zr;
+
+  return rs_s->zsflag = proc_hist_r(sptr, nstats, ppst, histp, do_trim,  rs_s);
+}
+
+int
+calc_thresh(struct pstruct *ppst, int nstats, 
+	    double Lambda, double K, double H, double *zstrim)
+{
+  int max_hscore;
+  double ave_n1, tmp_score, z, l_fact;
+
+  if (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA) {
+    ave_n1 = 5000.0;
+    l_fact = 1.0;
+  }
+  else {
+    ave_n1 = 400.0;
+    l_fact = 0.7;
+  }
+
+/*  max_hscore = MAX_SSCORE; */
+/*  mean expected for ppst->n0 * 400 for protein, 5000 for DNA */
+/*  we want a number of offsets that is appropriate for the database size so
+    far (nstats)
+*/
+
+/*
+  the calculation below sets a high-score threshold using an
+  ungapped lambda, but errs towards the high-score side by using
+  E()=0.001 and calculating with 0.70*lambda, which is the correct for
+  going from ungapped to -12/-2 gapped lambda with BLOSUM50
+*/
+
+#ifndef NORMAL_DIST
+  tmp_score = 0.01/((double)nstats*K*(double)ppst->n0*ave_n1);
+  tmp_score = -log(tmp_score)/(Lambda*l_fact);
+  max_hscore = (int)(tmp_score+0.5);
+
+  z = 1.0/(double)nstats;
+  z = (log(z)+EULER_G)/(-PI_SQRT6);
+#else
+  max_hscore = 100;
+  z = 5.0;
+#endif
+  *zstrim = 10.0*z+50.0;
+  return max_hscore;
+}
+
+int
+proc_hist_r(struct stat_str *sptr, int nstats,
+	    struct pstruct *ppst, struct hist_str *histp,
+	    int do_trim, struct pstat_str *rs)
+{
+  int i, max_hscore;
+  double zs, ztrim;
+  char s_string[128];
+  struct llen_str llen;
+  char *f_string;
+  llen.fit_flag=1;
+  llen.hist=NULL;
+
+  max_hscore = calc_thresh(ppst, nstats, rs->ngLambda,
+			   rs->ngK, rs->ngH, &ztrim);
+
+  inithist(&llen, ppst,max_hscore);
+  f_string = &(histp->stat_info[0]);
+
+  for (i = 0; i<nstats; i++)
+    addhist(&llen,sptr[i].score,sptr[i].n1, max_hscore);
+  histp->entries = nstats - llen.zero_s;
+
+  if ((llen.max_score - llen.min_score) < 10) {
+    free_hist(&llen);
+    llen.fit_flag = 0;
+    find_zp = &find_zn;
+    return proc_hist_n(sptr, nstats, ppst, histp, do_trim, rs);
+  }
+
+  fit_llen(&llen, rs); /* now we have rho, mu, rho2, mu2, mean_var
+			  to set the parameters for the histogram */
+
+  if (!llen.fit_flag) {	/* the fit failed, fall back to proc_hist_n */
+    free_hist(&llen);
+    find_zp = &find_zn;
+    return proc_hist_n(sptr,nstats, ppst, histp, do_trim, rs);
+  }
+
+  rs->n_trimmed= rs->n1_trimmed = rs->nb_trimmed = 0;
+
+  if (do_trim) {
+    if (llen.fit_flag) {
+      for (i = 0; i < nstats; i++) {
+	zs = find_zr(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, rs);
+	if (zs < 20.0 || zs > ztrim) {
+	  rs->n_trimmed++;
+	  prune_hist(&llen,sptr[i].score,sptr[i].n1, max_hscore,
+		     &(histp->entries));
+	}
+      }
+    }
+
+  /*  fprintf(stderr,"Z-trimmed %d entries with z > 5.0\n", rs->n_trimmed); */
+
+    if (llen.fit_flag) fit_llens(&llen, rs);
+
+  /*   fprintf(stderr,"Bin-trimmed %d entries in %d bins\n", rs->n1_trimmed,rs->nb_trimmed); */
+  }
+
+
+  free_hist(&llen);
+
+  /* rst all the scores in the histogram */
+
+  if (ppst->zsflag < 10) s_string[0]='\0';
+  else if (ppst->zs_win > 0)
+    sprintf(s_string,"(shuffled, win: %d)",ppst->zs_win);
+  else strncpy(s_string,"(shuffled)",sizeof(s_string));
+
+  inithistz(MAXHIST, histp);
+
+  sprintf(f_string,"%s Expectation_n fit: rho(ln(x))= %6.4f+/-%6.3g; mu= %6.4f+/-%6.3f\n mean_var=%6.4f+/-%6.3f, 0's: %d Z-trim: %d  B-trim: %d in %d/%d\n Lambda= %6.4f",
+	  s_string,
+	  rs->rho*LN_FACT,sqrt(rs->rho_e),rs->mu,sqrt(rs->mu_e), rs->mean_var,sqrt(rs->var_e),
+	  llen.zero_s, rs->n_trimmed, rs->n1_trimmed, rs->nb_trimmed, rs->nb_tot,
+	  PI_SQRT6/sqrt(rs->mean_var));
+    return REG_STATS;
+}
+
+
+int
+proc_hist_n(struct stat_str *sptr, int nstats,
+	    struct pstruct *ppst, struct hist_str *histp,
+	    int do_trim, struct pstat_str *rs)
+{
+  int i, j;
+  double s_score, s2_score, ssd;
+  double ztrim;
+  int nit, max_hscore;
+  char s_string[128];
+  char *f_string;
+
+  f_string = &(histp->stat_info[0]);
+  /*   db->entries = db->length = db->carry = 0; */
+
+  max_hscore = calc_thresh(ppst, nstats, rs->ngLambda,
+			   rs->ngK, rs->ngH, &ztrim);
+
+  s_score = s2_score = 0.0;
+
+  histp->entries = 0;
+
+  for ( j = 0, i = 0; i < nstats; i++) {
+    if (sptr[i].score > 0 && sptr[i].score <= max_hscore) {
+      s_score += (ssd=(double)sptr[i].score);
+      s2_score += ssd * ssd;
+      histp->entries++;
+      /* 
+      db->length += sptr[i].n1;
+      if (db->length > LONG_MAX) {
+	db->carry++;
+	db->length -= LONG_MAX;
+      }
+      */
+      j++;
+    }
+  }
+
+  if (j > 1 ) {
+    rs->mu = s_score/(double)j;
+    rs->mean_var = s2_score - (double)j * rs->mu * rs->mu;
+    rs->mean_var /= (double)(j-1);
+  }
+  else {
+    rs->mu = 50.0;
+    rs->mean_var = 10.0;
+  }
+  
+  if (rs->mean_var < 0.01) {
+    rs->mean_var = (rs->mu > 1.0) ? rs->mu: 1.0;
+  }
+
+  /* now remove some scores */
+
+  nit = 5;
+  while (nit-- > 0) {
+    rs->n_trimmed = 0;
+
+    for (i=0; i< nstats; i++) {
+      if (sptr[i].n1 < 0) continue;
+      ssd = find_zn(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, rs);
+      if (ssd > ztrim || ssd < 20.0) {
+	/*      fprintf(stderr,"removing %3d %3d %4.1f\n",
+		sptr[i].score, sptr[i].n1,ssd); */
+	ssd = sptr[i].score;
+	s_score -= ssd;
+	s2_score -= ssd*ssd;
+	j--;
+	rs->n_trimmed++;
+	histp->entries--;
+	sptr[i].n1 = -sptr[i].n1;
+      }
+    }
+
+    if (j > 1 ) {
+      rs->mu = s_score/(double)j;
+      rs->mean_var = s2_score - (double)j * rs->mu * rs->mu;
+      rs->mean_var /= (double)(j-1);
+    }
+    else {
+      rs->mu = 50.0;
+      rs->mean_var = 10.0;
+    }
+
+    if (rs->mean_var < 0.01) {
+      rs->mean_var = (rs->mu > 1.0) ? rs->mu: 1.0;
+    }
+
+    if (rs->n_trimmed < LHISTC) {
+      /*
+	fprintf(stderr,"nprune %d at %d\n",nprune,nit);
+	*/
+      break;
+    }
+  }
+
+  if (ppst->zsflag < 10) s_string[0]='\0';
+  else if (ppst->zs_win > 0)
+    sprintf(s_string,"(shuffled, win: %d)",ppst->zs_win);
+  else strncpy(s_string,"(shuffled)",sizeof(s_string));
+
+  sprintf(f_string,"%s unscaled statistics: mu= %6.4f  var=%6.4f; Lambda= %6.4f",
+	  s_string, rs->mu,rs->mean_var,PI_SQRT6/sqrt(rs->mean_var));
+  return AVE_STATS;
+}
+
+
+/*
+This routine calculates the maximum likelihood estimates for the
+extreme value distribution exp(-exp(-(-x-a)/b)) using the formula
+
+	<lambda> = x_m - sum{ x[i] * exp (-x[i]<lambda>)}/sum{exp (-x[i]<lambda>)}
+	<a> = -<1/lambda> log ( (1/nlib) sum { exp(-x[i]/<lambda> } )
+
+	The <a> parameter can be transformed into and K
+	of the formula: 1 - exp ( - K m n exp ( - lambda S ))
+	using the transformation: 1 - exp ( -exp -(lambda S + log(K m n) ))
+			1 - exp ( -exp( - lambda ( S + log(K m n) / lambda))
+
+			a = log(K m n) / lambda
+			a lambda = log (K m n)
+			exp(a lambda)  = K m n 
+	 but from above: a lambda = log (1/nlib sum{exp( -x[i]*lambda)})
+	 so:            K m n = (1/n sum{ exp( -x[i] *lambda)})
+			K = sum{}/(nlib m n )
+
+*/
+
+void
+alloc_hist(struct llen_str *llen)
+{
+  int max_llen, i;
+  max_llen = llen->max;
+
+  if (llen->hist == NULL) {
+    llen->hist = (int *)calloc((size_t)(max_llen+1),sizeof(int));
+    llen->score_sums = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
+    llen->score2_sums =(double *)calloc((size_t)(max_llen + 1),sizeof(double));
+    llen->score_var = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
+  }
+
+  for (i=0; i< max_llen+1; i++) {
+      llen->hist[i] = 0;
+      llen->score_var[i] = llen->score_sums[i] = llen->score2_sums[i] = 0.0;
+  }
+}
+  
+void
+free_hist(struct llen_str *llen)
+{
+  if (llen->hist!=NULL) {
+    free(llen->score_var);
+    free(llen->score2_sums);
+    free(llen->score_sums);
+    free(llen->hist);
+    llen->hist=NULL;
+  }
+}
+
+void
+inithist(struct llen_str *llen, struct pstruct *ppst, int max_hscore)
+{
+  llen->max = MAX_LLEN;
+
+  llen->max_score = -1;
+  llen->min_score=10000;
+
+  alloc_hist(llen);
+
+  llen->zero_s = 0;
+  llen->min_length = 10000;
+  llen->max_length = 0;
+}
+
+void
+addhist(struct llen_str *llen, int score, int length, int max_hscore)
+{
+  int llength; 
+  double dscore;
+
+  if ( score<=0 || length < LENGTH_CUTOFF) {
+    llen->min_score = 0;
+    llen->zero_s++;
+    return ;
+  }
+
+  if (score < llen->min_score) llen->min_score = score;
+  if (score > llen->max_score) llen->max_score = score;
+
+  if (length > llen->max_length) llen->max_length = length;
+  if (length < llen->min_length) llen->min_length = length;
+  if (score > max_hscore) score = max_hscore;
+
+  llength = (int)(LN_FACT*log((double)length)+0.5);
+
+  if (llength < 0 ) llength = 0;
+  if (llength > llen->max) llength = llen->max;
+  llen->hist[llength]++;
+  dscore = (double)score;
+  llen->score_sums[llength] += dscore;
+  llen->score2_sums[llength] += dscore * dscore;
+
+  /*
+  db->entries++;
+  db->length += length;
+  if (db->length > LONG_MAX) {db->carry++;db->length -= LONG_MAX;}
+  */
+}
+
+/* histogram will go from z-scores of 20 .. 100 with mean 50 and z=10 */
+
+
+void
+inithistz(int mh, struct hist_str *histp )
+{
+  int i;
+
+  histp->min_hist = 20;
+  histp->max_hist = 120;
+
+  histp->histint = (int)
+    ((double)(histp->max_hist - histp->min_hist + 2)/(double)mh+0.5);
+  histp->maxh = (int)
+    ((double)(histp->max_hist - histp->min_hist + 2)/(double)histp->histint+0.5);
+
+  if (histp->hist_a==NULL) {
+    if ((histp->hist_a=(int *)calloc((size_t)histp->maxh,sizeof(int)))==
+	NULL) {
+      fprintf(stderr," cannot allocate %d for histogram\n",histp->maxh);
+      histp->histflg = 0;
+    }
+    else histp->histflg = 1;
+  }
+  else {
+    for (i=0; i<histp->maxh; i++) histp->hist_a[i]=0;
+  }
+}
+
+/* fasts/f will not show any histogram */
+void
+addhistz(double zs, struct hist_str *histp)
+{
+}
+
+void
+prune_hist(struct llen_str *llen, int score, int length, int max_hscore,
+	   long *entries)
+{
+  int llength;
+  double dscore;
+
+  if (score <= 0 || length < LENGTH_CUTOFF) return;
+
+  if (score > max_hscore) score = max_hscore;
+
+  llength = (int)(LN_FACT*log((double)length)+0.5);
+
+  if (llength < 0 ) llength = 0;
+  if (llength > llen->max) llength = llen->max;
+  llen->hist[llength]--;
+  dscore = (double)score;
+  llen->score_sums[llength] -= dscore;
+  llen->score2_sums[llength] -= dscore * dscore;
+
+  (*entries)--;
+  /*
+  if (length < db->length) db->length -= length;
+  else {db->carry--; db->length += (LONG_MAX - (unsigned long)length);}
+  */
+}  
+
+/* fit_llen: no trimming
+   (1) regress scores vs log(n) using weighted variance
+   (2) calculate mean variance after length regression
+*/
+
+void
+fit_llen(struct llen_str *llen, struct pstat_str *pr)
+{
+  int j;
+  int n;
+  int n_size;
+  double x, y2, u, z;
+  double mean_x, mean_y, var_x, var_y, covar_xy;
+  double mean_y2, covar_xy2, var_y2, dllj;
+
+  double sum_x, sum_y, sum_x2, sum_xy, sum_v, delta, n_w;
+  
+/* now fit scores to best linear function of log(n), using
+   simple linear regression */
+  
+  for (llen->min=0; llen->min < llen->max; llen->min++)
+    if (llen->hist[llen->min]) break;
+  llen->min--;
+
+  for (n_size=0,j = llen->min; j < llen->max; j++) {
+    if (llen->hist[j] > 1) {
+      dllj = (double)llen->hist[j];
+      llen->score_var[j] = llen->score2_sums[j]/dllj
+	- (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
+      llen->score_var[j] /= (double)(llen->hist[j]-1);
+      if (llen->score_var[j] <= 0.1 ) llen->score_var[j] = 0.1;
+      n_size++;
+    }
+  }
+
+  pr->nb_tot = n_size;
+
+  n_w = 0.0;
+  sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
+  for (j = llen->min; j < llen->max; j++)
+    if (llen->hist[j] > 1) {
+      x = j + 0.5;
+      dllj = (double)llen->hist[j];
+      n_w += dllj/llen->score_var[j];
+      sum_x +=   dllj * x / llen->score_var[j] ;
+      sum_y += llen->score_sums[j] / llen->score_var[j];
+      sum_x2 +=  dllj * x * x /llen->score_var[j];
+      sum_xy +=  x * llen->score_sums[j]/llen->score_var[j];
+    }
+
+  if (n_size < 5 ) {
+    llen->fit_flag=0;
+    pr->rho = 0;
+    pr->mu = sum_y/n_w;
+    return;
+  }
+  else {
+    delta = n_w * sum_x2 - sum_x * sum_x;
+    if (delta > 0.001) {
+      pr->rho = (n_w * sum_xy  - sum_x * sum_y)/delta;
+      pr->rho_e = n_w/delta;
+      pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/delta;
+      pr->mu_e = sum_x2/delta;
+    }
+    else {
+      llen->fit_flag = 0;
+      pr->rho = 0;
+      pr->mu = sum_y/n_w;
+      return;
+    }
+  }
+
+  delta = n_w * sum_x2 - sum_x * sum_x;
+  pr->rho = (n_w * sum_xy  - sum_x * sum_y)/delta;
+  pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/delta;
+
+  n = 0;
+  mean_x = mean_y = mean_y2 = 0.0;
+  var_x = var_y = 0.0;
+  covar_xy = covar_xy2 = 0.0;
+
+  for (j = llen->min; j <= llen->max; j++) 
+   if (llen->hist[j] > 1 ) {
+      n += llen->hist[j];
+      x = (double)j + 0.5;
+      mean_x += (double)llen->hist[j] * x;
+      mean_y += llen->score_sums[j];
+      var_x += (double)llen->hist[j] * x * x;
+      var_y += llen->score2_sums[j];
+      covar_xy += x * llen->score_sums[j];
+    }
+  mean_x /= n; mean_y /= n;
+  var_x = var_x / n - mean_x * mean_x;
+  var_y = var_y / n - mean_y * mean_y;
+  
+  covar_xy = covar_xy / n - mean_x * mean_y;
+/*
+  pr->rho = covar_xy / var_x;
+  pr->mu = mean_y - pr->rho * mean_x;
+*/
+  mean_y2 = covar_xy2 = var_y2 = 0.0;
+  for (j = llen->min; j <= llen->max; j++) 
+    if (llen->hist[j] > 1) {
+      x = (double)j + 0.5;
+      u = pr->rho * x + pr->mu;
+      y2 = llen->score2_sums[j] - 2.0 * llen->score_sums[j] * u + llen->hist[j] * u * u;
+/*
+      dllj = (double)llen->hist[j];
+      fprintf(stderr,"%.2f\t%d\t%g\t%g\n",x/LN_FACT,llen->hist[j],
+	      llen->score_sums[j]/dllj,y2/dllj);
+*/
+      mean_y2 += y2;
+      var_y2 += y2 * y2;
+      covar_xy2 += x * y2;
+      /*      fprintf(stderr,"%6.1f %4d %8d %8d %7.2f %8.2f\n",
+	      x,llen->hist[j],llen->score_sums[j],llen->score2_sums[j],u,y2); */
+    }
+  
+  pr->mean_var = mean_y2 /= (double)n;
+  covar_xy2 = covar_xy2 / (double)n - mean_x * mean_y2;
+
+  if (pr->mean_var <= 0.01) {
+    llen->fit_flag = 0;
+    pr->mean_var = (pr->mu > 1.0) ? pr->mu: 1.0;
+  }
+
+  /*
+  fprintf(stderr," rho1/mu1: %.4f/%.4f mean_var %.4f\n",
+	  pr->rho*LN_FACT,pr->mu,pr->mean_var);
+  */
+  if (n > 1) pr->var_e = (var_y2/n - mean_y2 * mean_y2)/(n-1);
+  else pr->var_e = 0.0;
+
+  if (llen->fit_flag) {
+    pr->rho2 = covar_xy2 / var_x;
+    pr->mu2 = pr->mean_var - pr->rho2 * mean_x;
+  }
+  else {
+    pr->rho2 = 0;
+    pr->mu2 = pr->mean_var;
+  }
+
+  if (pr->rho2 < 0.0 )
+    z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
+  else z =  pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
+  if (z < 2*LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
+
+  pr->var_cutoff = pr->rho2 * LN_FACT*log(z) + pr->mu2;
+}
+
+/* fit_llens: trim high variance bins
+   (1) regress scores vs log(n) using weighted variance
+   (2) regress residuals vs log(n)
+   (3) remove high variance bins
+   (4) calculate mean variance after length regression
+*/
+
+void
+fit_llens(struct llen_str *llen, struct pstat_str *pr)
+{
+  int j;
+  int n, n_u2;
+  double x, y, y2, u, u2, v, z;
+  double mean_x, mean_y, var_x, var_y, covar_xy;
+  double mean_y2, covar_xy2;
+  double mean_u2, mean_3u2, dllj;
+  double sum_x, sum_y, sum_x2, sum_xy, sum_v, delta, n_w;
+
+/* now fit scores to best linear function of log(n), using
+   simple linear regression */
+  
+  for (llen->min=0; llen->min < llen->max; llen->min++)
+    if (llen->hist[llen->min]) break;
+  llen->min--;
+
+  for (j = llen->min; j < llen->max; j++) {
+    if (llen->hist[j] > 1) {
+      dllj = (double)llen->hist[j];
+      llen->score_var[j] = (double)llen->score2_sums[j]/dllj
+	- (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
+      llen->score_var[j] /= (double)(llen->hist[j]-1);
+      if (llen->score_var[j] <= 1.0 ) llen->score_var[j] = 1.0;
+    }
+  }
+	  
+  n_w = 0.0;
+  sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
+  for (j = llen->min; j < llen->max; j++)
+    if (llen->hist[j] > 1) {
+      x = j + 0.5;
+      dllj = (double)llen->hist[j];
+      n_w += dllj/llen->score_var[j];
+      sum_x +=   dllj * x / llen->score_var[j] ;
+      sum_y += llen->score_sums[j] / llen->score_var[j];
+      sum_x2 +=  dllj * x * x /llen->score_var[j];
+      sum_xy +=  x * llen->score_sums[j]/llen->score_var[j];
+    }
+
+  delta = n_w * sum_x2 - sum_x * sum_x;
+  pr->rho = (n_w * sum_xy  - sum_x * sum_y)/delta;
+  pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/delta;
+
+/* printf(" rho1/mu1: %.2f/%.2f\n",pr->rho*LN_FACT,pr->mu); */
+
+  n = 0;
+  mean_x = mean_y = mean_y2 = 0.0;
+  var_x = var_y = 0.0;
+  covar_xy = covar_xy2 = 0.0;
+
+  for (j = llen->min; j <= llen->max; j++) 
+    if (llen->hist[j] > 1 ) {
+      n += llen->hist[j];
+      x = (double)j + 0.5;
+      dllj = (double)llen->hist[j];
+      mean_x += dllj * x;
+      mean_y += llen->score_sums[j];
+      var_x += dllj * x * x;
+      var_y += llen->score2_sums[j];
+      covar_xy += x * llen->score_sums[j];
+    }
+  mean_x /= n; mean_y /= n;
+  var_x = var_x / n - mean_x * mean_x;
+  var_y = var_y / n - mean_y * mean_y;
+  
+  covar_xy = covar_xy / n - mean_x * mean_y;
+/*  pr->rho = covar_xy / var_x;
+  pr->mu = mean_y - pr->rho * mean_x;
+*/
+
+  mean_y2 = covar_xy2 = 0.0;
+  for (j = llen->min; j <= llen->max; j++) 
+    if (llen->hist[j] > 1) {
+      x = (double)j + 0.5;
+      u = pr->rho * x + pr->mu;
+      y2 = llen->score2_sums[j] - 2 * llen->score_sums[j] * u + llen->hist[j] * u * u;
+      mean_y2 += y2;
+      covar_xy2 += x * y2;
+    }
+  
+  mean_y2 /= n;
+  covar_xy2 = covar_xy2 / n - mean_x * mean_y2;
+  pr->rho2 = covar_xy2 / var_x;
+  pr->mu2 = mean_y2 - pr->rho2 * mean_x;
+
+  if (pr->rho2 < 0.0 )
+    z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
+  else z =  pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
+  if (z < 2* LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
+
+  pr->var_cutoff = pr->rho2*LN_FACT*log(z) + pr->mu2;
+
+/*  fprintf(stderr,"\nminimum allowed predicted variance (%0.2f) at n = %.0f\n",
+	 pr->var_cutoff,z);
+*/
+  mean_u2 = 0.0;
+  n_u2 = 0;
+  for ( j = llen->min; j < llen->max; j++) {
+    y = j+0.5;
+    dllj = (double)llen->hist[j];
+    x = pr->rho * y + pr->mu;
+    v = pr->rho2 * y + pr->mu2;
+    if (v < pr->var_cutoff) v = pr->var_cutoff;
+    if (llen->hist[j]> 1) {
+      u2 =  (llen->score2_sums[j] - 2 * x * llen->score_sums[j] + dllj * x * x) - v*dllj;
+      mean_u2 += llen->score_var[j] = u2*u2/(llen->hist[j]-1);
+      n_u2++;
+      /*      fprintf(stderr," %d (%d) u2: %.2f v*ll: %.2f %.2f\n",
+	      j,llen->hist[j],u2,v*dllj,sqrt(llen->score_var[j])); */
+    }
+    else llen->score_var[j] = -1.0;
+  }
+
+  mean_u2 = sqrt(mean_u2/(double)n_u2);
+  /* fprintf(stderr," mean s.d.: %.2f\n",mean_u2); */
+
+  mean_3u2 = mean_u2*3.0;
+
+  for (j = llen->min; j < llen->max; j++) {
+    if (llen->hist[j] <= 1) continue;
+    if (sqrt(llen->score_var[j]) > mean_3u2) {
+      /*      fprintf(stderr," removing %d %d %.2f\n",
+	     j, (int)(exp((double)j/LN_FACT)-0.5),
+	     sqrt(llen->score_var[j]));
+	     */
+      pr->nb_trimmed++;
+      pr->n1_trimmed += llen->hist[j];
+      llen->hist[j] = 0;
+    }
+  }
+  fit_llen(llen, pr);
+}
+
+
+double find_z(int score, double escore, int length, double comp, struct pstat_str *pu) {
+  return find_zp(score, escore, length, comp, pu);
+}
+
+/* REG_STATS - Z() from rho/mu/mean_var */
+double find_zr(int score, double escore, int length, double comp, 
+	      struct pstat_str *rs)
+{
+  double log_len, z;
+  
+  if (score <= 0) return 0.0;
+  if ( length < LENGTH_CUTOFF) return 0.0;
+
+  log_len = LN_FACT*log((double)(length));
+/*  var = rs->rho2 * log_len + rs->mu2;
+  if (var < rs->var_cutoff) var = rs->var_cutoff;
+*/
+
+  z = ((double)score - rs->rho * log_len - rs->mu) / sqrt(rs->mean_var);
+
+  return (50.0 + z*10.0);
+}
+
+double find_zt(int score, double escore, int length, double comp, 
+	      struct pstat_str *rs)
+{
+  if (!rs->eval_is_pval) {escore /= rs->zdb_size;}
+
+  if (escore > 0.0) return -log(escore)/M_LN2;
+  else return 744.440071/M_LN2;
+}
+
+double find_zn(int score, double escore, int length, double comp,
+	      struct pstat_str *rs)
+{
+  double z;
+  
+  z = ((double)score - rs->mu) / sqrt(rs->mean_var);
+
+  return (50.0 + z*10.0);
+}
+
+/* computes E value for a given z value, assuming extreme value distribution */
+double
+z_to_E(double zs, long entries, struct db_str db)
+{
+  double e, n;
+
+  /*  if (db->entries < 5) return (double)db.entries; */
+  if (entries < 1) { n = db.entries;}
+  else {n = entries;}
+
+  if (zs > ZS_MAX) return 0.0;
+
+  e = exp(-PI_SQRT6 * zs - EULER_G);
+  return n * (e > .01 ? 1.0 - exp(-e) : e);
+}
+
+double
+zs_to_p(double zs)
+{
+  return zs;
+}
+
+/* this version assumes the probability is in the ->zscore variable,
+   which is provided by this file after last_scale()
+*/
+
+double
+zs_to_bit(double zs, int n0, int n1)
+{
+  return zs+log((double)(n0*n1))/M_LN2 ;
+}
+
+/* computes E-value for a given z value, assuming extreme value distribution */
+double
+zs_to_E(double zs,int n1, int dnaseq, long entries, struct db_str db)
+{
+  double e, z, k;
+
+  /*  if (db->entries < 5) return 0.0; */
+
+  if (zs > ZS_MAX ) return 0.0;
+
+  if (entries < 1) entries = db.entries;
+
+  if (dnaseq == SEQT_DNA || dnaseq == SEQT_RNA) {
+    k = (double)db.length /(double)n1;
+    if (db.carry > 0) { k *= (double)db.carry * (double)LONG_MAX;}
+  }
+  else k = (double)entries;
+
+  if (k < 1.0) k = 1.0;
+
+  zs *= M_LN2;
+  if ( zs > 100.0) e = 0.0;
+  else e =  exp(-zs);
+  return k * e;
+}
+
+/* computes E-value for a given z value, assuming extreme value distribution */
+double
+E_to_zs(double E, long entries)
+{
+  double e, z;
+  int error;
+
+  e = E/(double)entries;
+
+#ifndef NORMAL_DIST
+  z = (log(e)+EULER_G)/(-PI_SQRT6);
+  return z*10.0+50.0;
+#else
+  z = np_to_z(1.0-e,&error);
+
+  if (!error) return z*10.0+50.0;
+  else return 0.0;
+#endif
+}
+
+/* computes 1.0 - E value for a given z value, assuming extreme value
+   distribution */
+double
+zs_to_Ec(double zs, long entries)
+{
+  double e, z;
+
+  if (entries < 5) return 0.0;
+
+  z = (zs - 50.0)/10.0;
+
+  if (z > ZS_MAX) return 1.0;
+
+  e =  exp(-PI_SQRT6 * z - EULER_G);
+  return (double)entries * (e > .01 ?  exp(-e) : 1.0 - e);
+}
+
+int
+E1_to_s(double e_val, int n0, int n1, int db_size,
+	void *pu) {
+  double mp, np, a_n0, a_n0f, a_n1;
+  double zs, log_len, p_val;
+  int score;
+
+  if (n1 < LENGTH_CUTOFF) return 0;
+
+  score = -log(e_val)/log(10.0);
+
+#ifndef NORMAL_DIST
+  if (score < 0) score = 0;
+#endif
+  return score;
+}
+
+void
+sort_escore(double *v, int n)
+{
+  int gap, i, j;
+  double dtmp;
+	
+  for (gap=n/2; gap>0; gap/=2) {
+    for (i=gap; i<n; i++) {
+      for (j=i-gap; j>=0; j -= gap) {
+	if (v[j] <= v[j+gap]) break;
+	dtmp = v[j];
+	v[j] = v[j+gap];
+	v[j+gap] = dtmp;
+      }
+    }
+  }
+}
+
+/* scale_tat - compute 'a', 'b', 'c' coefficients for scaling fasts/f
+   escores 
+   5-May-2003 - also calculate index for high ties
+*/
+void
+scale_tat(double *escore, int nstats,
+	  long db_entries, int do_trim,
+	  struct pstat_str *rs)
+{
+  int i, j, k, start;
+  double *x, *lnx, *lny;
+
+  /*   sort_escore(escore, nstats); */
+
+  while (*escore<0.0) {escore++; nstats--; }
+
+  x = (double *) calloc(nstats, sizeof(double));
+  if(x == NULL) {
+    fprintf(stderr, "Couldn't calloc tatE/x\n");
+    exit(1);
+  }
+
+  lnx = (double *) calloc(nstats,sizeof(double));
+  if(lnx == NULL) {
+    fprintf(stderr, "Couldn't calloc tatE/lnx\n");
+    exit(1);
+  }
+  
+  lny = (double *) calloc(nstats,sizeof(double));
+  if(lny == NULL) {
+    fprintf(stderr, "Couldn't calloc tatE/lny\n");
+    exit(1);
+  }
+  
+  for(i = 0 ; i < nstats ; ) {
+
+    lny[i] = log(escore[i]);
+
+    for(j = i+1 ; j < nstats ; j++) {
+	if(escore[j] != escore[i]) break;
+    }
+
+    x[i] = ((((double)i + (double)(j - i - 1)/2.0)*(double)nstats/(double)db_entries)+1.0)/(double)nstats;
+    lnx[i] = log(x[i]);
+
+    for(k = i+1 ; k < j ; k++) {
+      lny[k]=lny[i];
+      x[k] = x[i];
+      lnx[k]=lnx[i];
+    }
+    i = k;
+  }
+
+  if (!do_trim) {
+    start = 0;
+  } else {
+    start = 0.05 * (double) nstats;
+    start = start > 500 ? 500 : start;
+  }
+
+  linreg(lny, x, lnx, nstats, &rs->tat_a, &rs->tat_b, &rs->tat_c, start);
+
+  /* I have the coefficients I need - a, b, c; free arrays */
+
+  free(lny);
+  free(lnx);
+  free(x);
+
+  /* calculate tie_j - the index below which all scores are considered
+     positional ties */
+
+  rs->tie_j = 0.005 * db_entries;
+}
+
+void
+linreg(double *lny, double *x, double *lnx, int n,
+       double *a, double *b, double *c, int start) {
+
+  double yf1, yf2, yf3;
+  double f1f1, f1f2, f1f3;
+  double f2f2, f2f3;
+  double f3f3, delta;
+
+  int i;
+
+  yf1 = yf2 = yf3 = 0.0;
+  f1f1 = f1f2 = f1f3 = f2f2 = f2f3 = f3f3 = 0.0;
+
+  for(i = start; i < n; i++) {
+    yf1 += lny[i] * lnx[i];
+    yf2 += lny[i] * x[i];
+    yf3 += lny[i];
+
+    f1f1 += lnx[i] * lnx[i];
+    f1f2 += lnx[i] * x[i];
+    f1f3 += lnx[i];
+
+    f2f2 += x[i] * x[i];
+    f2f3 += x[i];
+
+    f3f3 += 1.0;
+  }
+
+  delta = det(f1f1, f1f2, f1f3, f1f2, f2f2, f2f3, f1f3, f2f3, f3f3);
+
+  *a = det(yf1, f1f2, f1f3, yf2, f2f2, f2f3, yf3, f2f3, f3f3) / delta;
+  *b = det(f1f1, yf1, f1f3, f1f2, yf2, f2f3, f1f3, yf3, f3f3) / delta;
+  *c = det(f1f1, f1f2, yf1, f1f2, f2f2, yf2, f1f3, f2f3, yf3) / delta;
+
+}
+
+double det(double a11, double a12, double a13,
+	   double a21, double a22, double a23,
+	   double a31, double a32, double a33)
+{
+  double result;
+
+  result = a11 * (a22 * a33 - a32 * a23);
+  result -= a12 * (a21 * a33 - a31 * a23);
+  result += a13 * (a21 * a32 - a31 * a22);
+    
+  return result;
+}
+
+void
+last_stats(const unsigned char *aa0, int n0,
+	   struct stat_str *sptr, int nstats,
+	   struct beststr **bestp_arr, int nbest,
+	   const struct mngmsg *m_msg, struct pstruct *ppst,
+	   struct hist_str *histp, struct pstat_str **rs_sp)
+{
+  double *obs_escore;
+  int i, nobs, nobs_t, do_trim;
+  long db_entries;
+  struct pstat_str *rs_s;
+
+  if (*rs_sp == NULL) {
+    if ((rs_s=(struct pstat_str *)calloc(1,sizeof(struct pstat_str)))==NULL) {
+      fprintf(stderr," cannot allocate rs_s: %ld\n",sizeof(struct pstat_str));
+      exit(1);
+    }
+    else *rs_sp = rs_s;
+  }
+  else rs_s = *rs_sp;
+    
+  histp->entries = 0;
+
+  sortbeste(bestp_arr,nbest);
+
+  rs_s->spacefactor = 
+    calc_spacefactor(aa0, n0, m_msg->nm0,ppst->nsq);
+
+  if (ppst->zsflag >= 1 && ppst->zsflag <= 4) {
+    if (m_msg->escore_flg) {
+      nobs = nbest;
+      do_trim = 1;
+    }
+    else {
+      nobs = nstats;
+      do_trim = 0;
+    }
+
+    if ((obs_escore = (double *)calloc(nobs,sizeof(double)))==NULL) {
+      fprintf(stderr," cannot allocate obs_escore[%d]\n",nbest);
+      exit(1);
+    }
+
+    if (m_msg->escore_flg) {
+      for (i=nobs=0; i<nbest; i++) {
+	if (bestp_arr[i]->rst.escore<= 1.00)
+	  obs_escore[nobs++]=bestp_arr[i]->rst.escore;
+      }
+      /*
+      nobs_t = nobs;
+      for (i=0; i<nbest; i++) {
+	if (bestp_arr[i]->rst.escore >= 0.99 &&
+	    bestp_arr[i]->rst.escore <= 1.00)
+	  obs_escore[nobs++]=bestp_arr[i]->rst.escore;
+      }
+      */
+      db_entries = m_msg->db.entries;
+    }
+    else {
+      for (i=nobs=0; i<nstats; i++) {
+	if (sptr[i].escore <= 1.00 ) obs_escore[nobs++]=sptr[i].escore;
+      }
+      /*
+      nobs_t = nobs;
+      for (i=0; i<nstats; i++) {
+	if (sptr[i].escore >= 0.99 &&
+	    sptr[i].escore <= 1.0) obs_escore[nobs++]=sptr[i].escore;
+      }
+      */
+      db_entries = nobs;
+/*    db_entries = m_msg->db.entries;*/
+    }
+
+    sortbesto(obs_escore,nobs);
+    if (nobs > 100) {
+      scale_tat(obs_escore,nobs,db_entries,do_trim,rs_s);
+      rs_s->have_tat=1;
+      sprintf(histp->stat_info,"scaled Tatusov statistics (%d): tat_a: %6.4f tat_b: %6.4f tat_c: %6.4f",
+	      nobs,rs_s->tat_a, rs_s->tat_b, rs_s->tat_c);
+    }
+    else {
+      rs_s->have_tat=0;
+      sprintf(histp->stat_info,"Space_factor %.4g scaled statistics",
+	    rs_s->spacefactor);
+    }
+    free(obs_escore);
+  }
+  else {
+    rs_s->have_tat=0;
+    histp->stat_info[0] = '\0';
+  }
+  if (rs_s->have_tat) {
+    find_zp = &find_zt;
+  }
+
+}
+
+/* scale_scores() takes the best (real) scores and re-scales them;
+   beststr bptr[] must be sorted */
+
+void
+scale_scores(struct beststr **bptr, int nbest, struct db_str db,
+	     struct pstruct *ppst, struct pstat_str *rs)
+{
+  int i, j, k;
+  double obs, r_a, r_b, r_c;
+
+  /* this scale function absolutely requires that the results be sorted
+     before it is used */
+
+  sortbeste(bptr,nbest);
+
+  if (!rs->have_tat) {
+    for (i=0; i<nbest; i++) {
+      bptr[i]->rst.escore *= rs->spacefactor;
+    }
+  }
+  else {
+
+    /* here if more than 1000 scores */
+
+    r_a = rs->tat_a; r_b = rs->tat_b; r_c = rs->tat_c;
+
+  /* the problem with scaletat is that the E() value is related to
+     ones position in the list of top scores - thus, knowing the score
+     is not enough - one must know the rank */
+
+    for(i = 0 ; i < nbest ; ) {
+      /* take the bottom 0.5%, and the ties, and treat them all the same */
+      j = i + 1;
+      while (j< nbest && 
+	     (j <= (0.005 * db.entries) || bptr[j]->rst.escore == bptr[i]->rst.escore)
+	     ) {
+	j++;
+      }
+
+      /* observed frequency */
+      obs = ((double)i + ((double)(j - i - 1)/ 2.0) + 1.0)/(double)db.entries;
+
+      /* make certain ties all have the same correction */
+      for (k = i ; k < j ; k++) {
+	bptr[k]->rst.escore *= obs/exp(r_a*log(obs) + r_b*obs + r_c);
+      }
+      i = k;
+    }
+  }
+
+  for (i=0; i<nbest; i++) {
+    if(bptr[i]->rst.escore > 0.01)
+      bptr[i]->rst.escore = 1.0 - exp(-bptr[i]->rst.escore);
+    if (bptr[i]->rst.escore > 0.0)
+      bptr[i]->zscore = -log(bptr[i]->rst.escore)/M_LN2;
+    else 
+      bptr[i]->zscore = 744.440071/M_LN2;
+    bptr[i]->rst.escore *= ppst->zdb_size;
+  }
+
+  rs->zdb_size = ppst->zdb_size;
+  rs->eval_is_pval = 0;
+}
+
+double scale_one_score (int ipos, double escore,
+                        struct db_str db,
+                        struct pstat_str *rs) {
+  double obs;
+  double a, b, c;
+
+  if (!rs->have_tat)
+    return escore * rs->spacefactor;
+
+  if (ipos < rs->tie_j) ipos = rs->tie_j/2;
+
+  a = rs->tat_a; b = rs->tat_b; c = rs->tat_c;
+
+  obs = ((double)ipos + 1.0)/(double)db.entries;
+
+  escore *= obs/exp(a*log(obs) + b*obs + c);
+
+  return escore;
+}
+
+double calc_spacefactor(const unsigned char *aa0, int n0,
+			int nm0, int nsq) {
+
+#if !defined(FASTF)
+  return pow(2.0, (double) nm0) - 1.0;
+#else
+
+  int i, j, n, l, nr, bin, k;
+  int nmoff;
+  int **counts;
+  int **factors;
+  double tmp, result = 0.0;
+
+  nmoff = (n0 - nm0 + 1)/nm0+1;
+
+  counts = (int **) calloc(nsq, sizeof(int *));
+  if(counts == NULL) {
+    fprintf(stderr, "couldn't calloc counts array!\n");
+    exit(1);
+  }
+
+  counts[0] = (int *) calloc(nsq * (nmoff - 1), sizeof(int));
+  if(counts[0] == NULL) {
+    fprintf(stderr, "couldn't calloc counts array!\n");
+    exit(1);
+  }
+
+  for(i = 0 ; i < nsq ; i++) {
+    counts[i] = counts[0] + (i * (nmoff - 1));
+  }
+
+  for(i = 0 ; i < nm0 ; i++) {
+    for(j = 0 ; j < (nmoff - 1) ; j++) {
+      counts[ aa0[nmoff * i + j] ] [ j ] ++;
+    }
+  }
+
+  factors = (int **) calloc(nm0 + 1, sizeof(int *));
+  if(factors == NULL) {
+    fprintf(stderr, "Couldn't calloc factors array!\n");
+    exit(1);
+  }
+
+  factors[0] = (int *) calloc((nm0 + 1) * (nmoff - 1), sizeof(int));
+  if(factors[0] == NULL) {
+    fprintf(stderr, "Couldn't calloc factors array!\n");
+    exit(1);
+  }
+
+  for(i = 0 ; i <= nm0 ; i++) {
+    factors[i] = factors[0] + (i * (nmoff - 1));
+  }
+
+  /*
+    this algorithm was adapted from the GAP4 library's NrArrangement function:
+    The GAP Group, GAP --- Groups, Algorithms, and Programming,
+    Version 4.1; Aachen, St Andrews, 1999.
+    (http://www-gap.dcs.st-and.ac.uk/ gap)
+  */
+
+  /* calculate K factors for each column in query: */
+  for(j = 0 ; j < (nmoff - 1) ; j++) {
+
+    /* only one way to select 0 elements */
+    factors[0][j] = 1;
+
+    /* for each of the possible elements in this column */
+    for(n = 0 ; n < nsq ; n++) {
+
+      /* if there aren't any of these, skip it */
+      if(counts[n][j] == 0) { continue; }
+
+      /* loop over the possible lengths of the arrangement: K..0 */
+      for(l = nm0 ; l >= 0 ; l--) {
+	nr = 0;
+	bin = 1;
+
+	/*
+	  compute the number of arrangements of length <l>
+	  using only the first <n> elements of <mset>
+	*/
+	for(i = 0, k = min(counts[n][j], l); i <= k ; i++) {
+
+	  /* 
+	     add the number of arrangements of length <l>
+	     that consist of <l>-<i> of the first <n>-1 elements
+	     and <i> copies of the <n>th element
+	  */
+	  nr += bin * factors[l-i][j];
+	  bin = (int) ((float) bin * (float) (l - i) / (float) (i + 1));
+	}
+
+	factors[l][j] = nr;
+      }
+    }
+  }
+
+  result = 0.0;
+  for(i = 1 ; i <= nm0 ; i++) {
+    tmp = 1.0;
+    for(j = 0 ; j < (nmoff - 1) ; j++) {
+      tmp *= (double) factors[i][j];
+    }
+    tmp /= factorial(i, 1);
+    result += tmp;
+  }
+  
+  free(counts[0]);
+  free(counts);
+  free(factors[0]);
+  free(factors);
+
+  return result;
+#endif
+}
+
+void sortbesto (double *obs, int nobs)
+{
+  int gap, i, j, k;
+  double v;
+  int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
+		   13776, 4592, 1968, 861, 336, 
+		   112, 48, 21, 7, 3, 1 };
+
+  for ( k = 0; k < 16; k++)
+    for (gap = incs[k], i=gap; i < nobs; i++) {
+      v = obs[i];
+      j = i;
+      while ( j >= gap && obs[j-gap] > v) {
+	obs[j] = obs[j - gap];
+	j -= gap;
+      }
+      obs[j] = v;
+    }
+}
+
+/* print out all pstat_str info for independent calculation */
+void
+pstat_info(char *info_str, int info_str_n, char *comment, struct pstat_str *pu) {
+  char pstat_buf[MAX_STR];
+
+  sprintf(pstat_buf,"%s zsflag: %d\n",comment,pu->zsflag);
+  SAFE_STRNCPY(info_str,pstat_buf,info_str_n);
+  sprintf(pstat_buf,"%s ngLambda: %g; ngK: %g; ngH: %g\n",comment,pu->ngLambda,pu->ngK,pu->ngH);
+  SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+
+  sprintf(pstat_buf,"%s rho: %g; rho_e: %g; mu: %g; mu_e: %g;\n",comment,
+	  pu->rho,pu->rho_e,pu->mu,pu->mu_e);
+  SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+
+  sprintf(pstat_buf,"%s mean_var: %g; var_e: %gg\n",comment,
+	  pu->mean_var, pu->var_e);
+  SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+
+  sprintf(pstat_buf,"%s rho2: %g; mu2: %g; var_cutoff: %g\n",comment,
+	  pu->rho2, pu->mu2,pu->var_cutoff);
+  SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+
+  sprintf(pstat_buf,"%s n_trimmed: %d; n1_trimmed: %d; nb_trimmed: %d; nb_tot: %d\n",comment,
+	  pu->n_trimmed, pu->n1_trimmed,pu->nb_trimmed, pu->nb_tot);
+  SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+
+  sprintf(pstat_buf,"%s tat_a: %g; tat_b: %g; tat_c: %g; spacefactor: %g\n",comment,
+	  pu->tat_a, pu->tat_b,pu->tat_c, pu->spacefactor);
+  SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+
+  sprintf(pstat_buf,"%s have_tat: %d; tie_j: %d; eval_is_pval: %d; zdb_size: %ld\n",comment,
+	  pu->have_tat,pu->tie_j,pu->eval_is_pval,pu->zdb_size);
+  SAFE_STRNCAT(info_str,pstat_buf,info_str_n);
+}
diff --git a/src/showrss.c b/src/showrss.c
new file mode 100644
index 0000000..d737c3b
--- /dev/null
+++ b/src/showrss.c
@@ -0,0 +1,82 @@
+/* $Id: showrss.c 625 2011-03-23 17:21:38Z wrp $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and the
+   The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+#include "structs.h"
+#include "param.h"
+#include "best_stats.h"
+
+extern double
+zs_to_E(double zs, int n1, int isdna, long entries,struct db_str db);
+extern double zs_to_bit(double zs, int n0, int n1);
+extern double zs_to_p(double zs);
+
+extern char *prog_func;
+
+void showbest (FILE *fp, unsigned char **aa0, unsigned char *aa1, int maxn,
+	       struct beststr **bptr, int nbest, int qlib, struct mngmsg *m_msg,
+	       struct pstruct pst, struct db_str db,
+	       char *info_gstring2, void **f_str)
+{
+  double zs;
+  int score;
+  char *rlabel;
+  struct beststr *bbp;
+
+  if ((rlabel=strrchr(m_msg->label,' '))==NULL) rlabel = m_msg->label;
+
+  fprintf(fp,"\n %s - %d shuffles; ",prog_func,m_msg->shuff_max);
+  if (m_msg->shuff_wid > 0)
+    fprintf(fp," window shuffle, window size: %d\n",m_msg->shuff_wid);
+  else
+    fprintf(fp," uniform shuffle\n");
+
+  bbp = bptr[0];
+
+  fprintf(fp," unshuffled %s score: %d;  bits(s=%d|n_l=%d): %4.1f p(%d) < %g\n",
+	  rlabel,bbp->score[0],bbp->score[0], bbp->n1,
+	  zs_to_bit(bbp->zscore,m_msg->n0,bbp->n1),bbp->score[0],zs_to_p(bbp->zscore));
+
+  fprintf(fp,"For %ld sequences, a score >= %d is expected %4.4g times\n\n", 
+	  pst.zdb_size,bbp->score[0],zs_to_E(bbp->zscore,bbp->n1,0l,pst.zdb_size,db)); 
+}
+
+void showalign (FILE *fp, unsigned char *aa0, unsigned char *aa1, int maxn,
+		struct beststr **bptr, int nbest,int qlib,
+		const struct mngmsg *m_msg, struct pstruct *ppst,
+		void *f_str, char *info_gstring2)
+{
+}
+
+void
+aancpy(char *to, char *from, int count,
+       struct pstruct pst)
+{
+  char *tp;
+
+  tp=to;
+  while (count-- && *from) {
+    if (*from <= pst.nsq) *tp++ = pst.sq[*(from++)];
+    else *tp++ = *from++;
+  }
+  *tp='\0';
+}
diff --git a/src/smith_waterman_altivec.c b/src/smith_waterman_altivec.c
new file mode 100644
index 0000000..9f9b614
--- /dev/null
+++ b/src/smith_waterman_altivec.c
@@ -0,0 +1,3086 @@
+
+/* Implementation of the Wozniak "anti-diagonal" vectorization
+   strategy for Smith-Waterman comparison, Wozniak (1997) Comp.
+   Appl. Biosci. 13:145-150
+
+   November, 2004
+*/
+
+/*
+  Written by Erik Lindahl, Stockholm Bioinformatics Center, 2004.
+  Please send bug reports and/or suggestions to lindahl at sbc.su.se.
+*/
+
+#include <stdio.h>
+
+#include "defs.h"
+#include "param.h"
+#include "dropgsw2.h"
+
+#ifdef SW_ALTIVEC
+
+int
+smith_waterman_altivec_word(unsigned char *     query_sequence,
+                            unsigned short *    query_profile_word,
+                            int                 query_length,
+                            unsigned char *     db_sequence,
+                            int                 db_length,
+                            unsigned short      bias,
+                            unsigned short      gap_open,
+                            unsigned short      gap_extend,
+                            struct f_struct *   f_str)
+{
+    int                     i,j,k;
+    unsigned short *        p;
+    unsigned short          score;   
+    unsigned char *         p_dbseq;
+    int                     alphabet_size = f_str->alphabet_size;
+    unsigned short *        workspace     = (unsigned short *)f_str->workspace;
+
+    vector unsigned short   Fup,Hup1,Hup2,E,F,H,tmp;
+    vector unsigned char    perm;
+    vector unsigned short   v_maxscore;
+    vector unsigned short   v_bias,v_gapopen,v_gapextend;
+    vector unsigned short   v_score;
+    vector unsigned short   v_score_q1;
+    vector unsigned short   v_score_q2;
+    vector unsigned short   v_score_q3;
+    vector unsigned short   v_score_load; 
+    vector unsigned char    queue1_to_score  = (vector unsigned char)(16,17,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
+    vector unsigned char    queue2_to_queue1 = (vector unsigned char)(0,1,18,19,4,5,6,7,8,9,10,11,12,13,14,15);
+    vector unsigned char    queue3_to_queue2 = (vector unsigned char)(16,16,16,16,16,21,16,0,16,1,16,2,16,3,16,4);
+    vector unsigned char    queue3_with_load = (vector unsigned char)(23,5,6,7,8,25,9,10,11,27,12,13,29,14,31,16);
+        
+    /* Load the bias to all elements of a constant */
+    v_bias           = vec_lde(0,&bias);
+    perm             = vec_lvsl(0,&bias);
+    v_bias           = vec_perm(v_bias,v_bias,perm);
+    v_bias           = vec_splat(v_bias,0);
+    
+    /* Load gap opening penalty to all elements of a constant */
+    v_gapopen        = vec_lde(0,&gap_open);
+    perm             = vec_lvsl(0,&gap_open);
+    v_gapopen        = vec_perm(v_gapopen,v_gapopen,perm);
+    v_gapopen        = vec_splat(v_gapopen,0);
+
+    /* Load gap extension penalty to all elements of a constant */
+    v_gapextend      = vec_lde(0,&gap_extend);  
+    perm             = vec_lvsl(0,&gap_extend);
+    v_gapextend      = vec_perm(v_gapextend,v_gapextend,perm);
+    v_gapextend      = vec_splat(v_gapextend,0);
+    
+    v_maxscore = vec_xor(v_maxscore,v_maxscore);
+   
+    // Zero out the storage vector 
+    k = 2*(db_length+7);
+        
+    for(i=0,j=0;i<k;i++,j+=16)
+    {
+        // borrow the zero value in v_maxscore to have something to store
+        vec_st(v_maxscore,j,workspace);
+    }
+    
+    for(i=0;i<query_length;i+=8)
+    {
+        // fetch first data asap.
+        p_dbseq    = db_sequence;
+        k          = *p_dbseq++;
+        v_score_load = vec_ld(16*k,query_profile_word);
+
+        // zero lots of stuff. 
+        // We use both the VPERM and VSIU unit to knock off some cycles.
+        
+        E          = vec_splat_u16(0);
+        F          = vec_xor(F,F);
+        H          = vec_splat_u16(0);
+        Hup2       = vec_xor(Hup2,Hup2);
+        v_score_q1 = vec_splat_u16(0);
+        v_score_q2 = vec_xor(v_score_q2,v_score_q2);
+        v_score_q3 = vec_splat_u16(0);
+
+        // reset pointers to the start of the saved data from the last row
+        p = workspace;
+                
+        // PROLOGUE 1
+        // prefetch next residue
+        k          = *p_dbseq++;
+        
+        // Create the actual diagonal score vector
+        // and update the queue of incomplete score vectors
+        
+        v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+        v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+        v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+        v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+        
+        // prefetch score for next step 
+        v_score_load = vec_ld(16*k,query_profile_word);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1   = vec_ld(16, p);
+        p += 16; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,14);
+        Hup1   = vec_sld(Hup1,H,14);            
+        
+        // do the dynamic programming 
+
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Save value to use for next diagonal H 
+        Hup2 = Hup1;
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        // PROLOGUE 2
+        // prefetch next residue
+        k          = *p_dbseq++;
+        
+        // Create the actual diagonal score vector
+        // and update the queue of incomplete score vectors
+        
+        v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+        v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+        v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+        v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+        
+        // prefetch score for next step 
+        v_score_load = vec_ld(16*k,query_profile_word);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1   = vec_ld(16, p);
+        p += 16; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,14);
+        Hup1   = vec_sld(Hup1,H,14);            
+        
+        // do the dynamic programming 
+
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Save value to use for next diagonal H 
+        Hup2 = Hup1;
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+
+        // PROLOGUE 3
+        // prefetch next residue
+        k          = *p_dbseq++;
+        
+        // Create the actual diagonal score vector
+        // and update the queue of incomplete score vectors
+        
+        v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+        v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+        v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+        v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+
+        // prefetch score for next step 
+        v_score_load = vec_ld(16*k,query_profile_word);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1   = vec_ld(16, p);
+        p += 16; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,14);
+        Hup1   = vec_sld(Hup1,H,14);            
+        
+        // do the dynamic programming 
+
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Save value to use for next diagonal H 
+        Hup2 = Hup1;
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+
+        // PROLOGUE 4
+        // prefetch next residue
+        k          = *p_dbseq++;
+        
+        // Create the actual diagonal score vector
+        // and update the queue of incomplete score vectors
+        
+        v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+        v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+        v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+        v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+        
+        // prefetch score for next step 
+        v_score_load = vec_ld(16*k,query_profile_word);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1   = vec_ld(16, p);
+        p += 16; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,14);
+        Hup1   = vec_sld(Hup1,H,14);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Save value to use for next diagonal H 
+        Hup2 = Hup1;
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+
+        // PROLOGUE 5
+        // prefetch next residue
+        k          = *p_dbseq++;
+        
+        // Create the actual diagonal score vector
+        // and update the queue of incomplete score vectors
+        
+        v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+        v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+        v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+        v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+        
+        // prefetch score for next step 
+        v_score_load = vec_ld(16*k,query_profile_word);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1   = vec_ld(16, p);
+        p += 16; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,14);
+        Hup1   = vec_sld(Hup1,H,14);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Save value to use for next diagonal H 
+        Hup2 = Hup1;
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+
+        // PROLOGUE 6
+        // prefetch next residue
+        k          = *p_dbseq++;
+        
+        // Create the actual diagonal score vector
+        // and update the queue of incomplete score vectors
+        
+        v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+        v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+        v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+        v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+        
+        // prefetch score for next step 
+        v_score_load = vec_ld(16*k,query_profile_word);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1   = vec_ld(16, p);
+        p += 16; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,14);
+        Hup1   = vec_sld(Hup1,H,14);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Save value to use for next diagonal H 
+        Hup2 = Hup1;
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+
+        
+        // PROLOGUE 7
+        // prefetch next residue
+        k          = *p_dbseq++;
+        
+        // Create the actual diagonal score vector
+        // and update the queue of incomplete score vectors
+        
+        v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+        v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+        v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+        v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+        
+        // prefetch score for next step 
+        v_score_load = vec_ld(16*k,query_profile_word);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1   = vec_ld(16, p);
+        p += 16; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,14);
+        Hup1   = vec_sld(Hup1,H,14);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Save value to use for next diagonal H 
+        Hup2 = Hup1;
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+
+        // PROLOGUE 8
+        // prefetch next residue
+        k          = *p_dbseq++;
+        
+        // Create the actual diagonal score vector
+        // and update the queue of incomplete score vectors
+        
+        v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+        v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+        v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+        v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+        
+        // prefetch score for next step 
+        v_score_load = vec_ld(16*k,query_profile_word);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1   = vec_ld(16, p);
+        p += 16; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,14);
+        Hup1   = vec_sld(Hup1,H,14);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Save value to use for next diagonal H 
+        Hup2 = Hup1;
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+    
+
+        // reset pointers to the start of the saved data from the last row
+        p = workspace;
+
+        for(j=8;j<db_length;j+=8)
+        {           
+            // STEP 1
+            
+            // prefetch next residue
+            k          = *p_dbseq++;
+            
+            // Create the actual diagonal score vector
+            // and update the queue of incomplete score vectors
+
+            v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+            v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+            v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+            v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+            
+            // prefetch score for next step
+            v_score_load = vec_ld(16*k,query_profile_word);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(256, p);
+            Hup1   = vec_ld(272, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 16; // move ahead 32 bytes
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,14);
+            Hup1   = vec_sld(Hup1,H,14);            
+
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F); 
+            
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+ 
+ 
+            // STEP 2
+            
+            // prefetch next residue
+            k          = *p_dbseq++;
+            
+            // Create the actual diagonal score vector
+            // and update the queue of incomplete score vectors
+            
+            v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+            v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+            v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+            v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+            
+            // prefetch score for next step
+            v_score_load = vec_ld(16*k,query_profile_word);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(256, p);
+            Hup2   = vec_ld(272, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 16; // move ahead 32 bytes
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,14);
+            Hup2   = vec_sld(Hup2,H,14);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup2,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            // add score to H
+            H   = vec_adds(Hup1,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F); 
+            
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+
+
+            // STEP 3
+            
+            // prefetch next residue
+            k          = *p_dbseq++;
+            
+            // Create the actual diagonal score vector
+            // and update the queue of incomplete score vectors
+            
+            v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+            v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+            v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+            v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+            
+            // prefetch score for next step
+            v_score_load = vec_ld(16*k,query_profile_word);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(256, p);
+            Hup1   = vec_ld(272, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 16; // move ahead 32 bytes
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,14);
+            Hup1   = vec_sld(Hup1,H,14);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F); 
+            
+
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+
+            
+            // STEP 4
+            
+            // prefetch next residue
+            k          = *p_dbseq++;
+            
+            // Create the actual diagonal score vector
+            // and update the queue of incomplete score vectors
+            
+            v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+            v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+            v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+            v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+            
+            // prefetch score for next step
+            v_score_load = vec_ld(16*k,query_profile_word);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(256, p);
+            Hup2   = vec_ld(272, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 16; // move ahead 32 bytes
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,14);
+            Hup2   = vec_sld(Hup2,H,14);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup2,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            // add score to H
+            H   = vec_adds(Hup1,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F); 
+
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+
+
+            // STEP 5
+            
+            // prefetch next residue
+            k          = *p_dbseq++;
+            
+            // Create the actual diagonal score vector
+            // and update the queue of incomplete score vectors
+            
+            v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+            v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+            v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+            v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+            
+            // prefetch score for next step
+            v_score_load = vec_ld(16*k,query_profile_word);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(256, p);
+            Hup1   = vec_ld(272, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 16; // move ahead 32 bytes
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,14);
+            Hup1   = vec_sld(Hup1,H,14);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F); 
+            
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+
+
+            // STEP 6
+            
+            // prefetch next residue
+            k          = *p_dbseq++;
+            
+            // Create the actual diagonal score vector
+            // and update the queue of incomplete score vectors
+            
+            v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+            v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+            v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+            v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+            
+            // prefetch score for next step
+            v_score_load = vec_ld(16*k,query_profile_word);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(256, p);
+            Hup2   = vec_ld(272, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 16; // move ahead 32 bytes
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,14);
+            Hup2   = vec_sld(Hup2,H,14);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup2,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            // add score to H
+            H   = vec_adds(Hup1,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F); 
+            
+
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+
+            
+            // STEP 7
+            
+            // prefetch next residue
+            k          = *p_dbseq++;
+            
+            // Create the actual diagonal score vector
+            // and update the queue of incomplete score vectors
+            
+            v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+            v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+            v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+            v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+            
+            // prefetch score for next step
+            v_score_load = vec_ld(16*k,query_profile_word);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(256, p);
+            Hup1   = vec_ld(272, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 16; // move ahead 32 bytes
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,14);
+            Hup1   = vec_sld(Hup1,H,14);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F); 
+            
+
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+
+
+            // STEP 8
+            
+            // prefetch next residue
+            k          = *p_dbseq++;
+            
+            // Create the actual diagonal score vector
+            // and update the queue of incomplete score vectors
+            
+            v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+            v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+            v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+            v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+            
+            // prefetch score for next step
+            v_score_load = vec_ld(16*k,query_profile_word);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(256, p);
+            Hup2   = vec_ld(272, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 16; // move ahead 32 bytes
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,14);
+            Hup2   = vec_sld(Hup2,H,14);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup2,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            // add score to H
+            H   = vec_adds(Hup1,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F); 
+            
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+        }
+        
+        v_score_load = vec_splat_u16(0);
+        
+        for(;j<db_length+7;j++)
+        {
+            // Create the actual diagonal score vector
+            // and update the queue of incomplete score vectors
+            //
+            // This could of course be done with only vec_perm or vec_sel,
+            // but since they use different execution units we have found
+            // it to be slightly faster to mix them.
+            v_score    = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+            v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+            v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+            v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 16; // move ahead 32 bytes
+            
+            // v_score_load contains all zeros
+            Fup    = vec_sld(v_score_load,F,14);
+            Hup1   = vec_sld(v_score_load,H,14);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Save value to use for next diagonal H 
+            Hup2 = Hup1;
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+        }
+        vec_st(F, 0,  p);
+        vec_st(H, 16, p);
+
+        query_profile_word += 8*alphabet_size;
+    }
+
+    // find largest score in the v_maxscore vector
+    tmp = vec_sld(v_maxscore,v_maxscore,8);
+    v_maxscore = vec_max(v_maxscore,tmp);
+    tmp = vec_sld(v_maxscore,v_maxscore,4);
+    v_maxscore = vec_max(v_maxscore,tmp);
+    tmp = vec_sld(v_maxscore,v_maxscore,2);
+    v_maxscore = vec_max(v_maxscore,tmp);
+
+    // store in temporary variable
+    vec_ste(v_maxscore,0,&score);
+    
+    // return largest score
+    return score;
+}
+
+int
+smith_waterman_altivec_byte(unsigned char *     query_sequence,
+                            unsigned char *     query_profile_byte,
+                            int                 query_length,
+                            unsigned char *     db_sequence,
+                            int                 db_length,
+                            unsigned char       bias,
+                            unsigned char       gap_open,
+                            unsigned char       gap_extend,
+                            struct f_struct *   f_str)
+{
+    int                     i,j,k,k8;
+    int                     overflow;
+    unsigned char *         p;
+    unsigned char           score;   
+    int                     alphabet_size = f_str->alphabet_size;
+    unsigned char *         workspace     = (unsigned char *)f_str->workspace;
+    
+    vector unsigned char    Fup,Hup1,Hup2,E,F,H,tmp;
+    vector unsigned char    perm;
+    vector unsigned char    v_maxscore;
+    vector unsigned char    v_bias,v_gapopen,v_gapextend;
+    vector unsigned char    v_score;
+    vector unsigned char    v_score_q1;
+    vector unsigned char    v_score_q2;
+    vector unsigned char    v_score_q3;
+    vector unsigned char    v_score_q4;
+    vector unsigned char    v_score_q5;
+    vector unsigned char    v_score_load1;
+    vector unsigned char    v_score_load2;  
+    vector unsigned char    v_zero;  
+
+    vector unsigned char    queue1_to_score  = (vector unsigned char)(16,1,2,3,4,5,6,7,24,9,10,11,12,13,14,15);
+    vector unsigned char    queue2_to_queue1 = (vector unsigned char)(16,17,2,3,4,5,6,7,24,25,10,11,12,13,14,15);
+    vector unsigned char    queue3_to_queue2 = (vector unsigned char)(16,17,18,3,4,5,6,7,24,25,26,11,12,13,14,15);
+    vector unsigned char    queue4_to_queue3 = (vector unsigned char)(16,17,18,19,4,5,6,7,24,25,26,27,12,13,14,15);
+    vector unsigned char    queue5_to_queue4 = (vector unsigned char)(16,17,18,19,20,2,3,4,24,25,26,27,28,10,11,12);
+    vector unsigned char    queue5_with_load = (vector unsigned char)(19,20,21,5,6,22,7,23,27,28,29,13,14,30,15,31);
+    vector unsigned char    merge_score_load = (vector unsigned char)(0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
+
+    v_zero           = vec_splat_u8(0);
+        
+    /* Load the bias to all elements of a constant */
+    v_bias           = vec_lde(0,&bias);
+    perm             = vec_lvsl(0,&bias);
+    v_bias           = vec_perm(v_bias,v_bias,perm);
+    v_bias           = vec_splat(v_bias,0);
+    
+    /* Load gap opening penalty to all elements of a constant */
+    v_gapopen        = vec_lde(0,&gap_open);
+    perm             = vec_lvsl(0,&gap_open);
+    v_gapopen        = vec_perm(v_gapopen,v_gapopen,perm);
+    v_gapopen        = vec_splat(v_gapopen,0);
+
+    /* Load gap extension penalty to all elements of a constant */
+    v_gapextend      = vec_lde(0,&gap_extend);  
+    perm             = vec_lvsl(0,&gap_extend);
+    v_gapextend      = vec_perm(v_gapextend,v_gapextend,perm);
+    v_gapextend      = vec_splat(v_gapextend,0);
+    
+    v_maxscore = vec_xor(v_maxscore,v_maxscore);
+   
+    // Zero out the storage vector 
+    k = (db_length+15);
+    for(i=0,j=0;i<k;i++,j+=32)
+    {
+        // borrow the zero value in v_maxscore to have something to store
+        vec_st(v_maxscore,j,workspace);
+        vec_st(v_maxscore,j+16,workspace);
+    }
+    
+    for(i=0;i<query_length;i+=16)
+    {
+        // zero lots of stuff. 
+        // We use both the VPERM and VSIU unit to knock off some cycles.
+        
+        E          = vec_splat_u8(0);
+        F          = vec_xor(F,F);
+        H          = vec_splat_u8(0);
+        Hup2      = vec_xor(Hup2,Hup2);
+        v_score_q1 = vec_splat_u8(0);
+        v_score_q2 = vec_xor(v_score_q2,v_score_q2);
+        v_score_q3 = vec_splat_u8(0);
+        v_score_q4 = vec_xor(v_score_q4,v_score_q4);
+        v_score_q5 = vec_splat_u8(0);
+
+        // reset pointers to the start of the saved data from the last row
+        p = workspace;
+        
+        // start directly and prefetch score column
+        k             = db_sequence[0];
+        k8            = k;
+        v_score_load1 = vec_ld(16*k,query_profile_byte);
+        v_score_load2 = v_score_load1;
+        v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+
+        // PROLOGUE 1
+        // prefetch next residue
+        k                = db_sequence[1];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1   = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup1    = vec_sld(Hup1,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        
+        
+        // PROLOGUE 2
+        // prefetch next residue
+        k                = db_sequence[2];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+  
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup2   = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup2   = vec_sld(Hup2,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup2,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup1,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+     
+        
+        // PROLOGUE 3
+        // prefetch next residue
+        k                = db_sequence[3];
+  
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1   = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup1    = vec_sld(Hup1,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        // PROLOGUE 4
+        // prefetch next residue
+        k                = db_sequence[4];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup2   = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup2   = vec_sld(Hup2,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup2,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup1,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        // PROLOGUE 5
+        // prefetch next residue
+        k                = db_sequence[5];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+     
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1   = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup1    = vec_sld(Hup1,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        // PROLOGUE 6
+        // prefetch next residue
+        k                = db_sequence[6];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup2   = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup2   = vec_sld(Hup2,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup2,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup1,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        
+        // PROLOGUE 7
+        // prefetch next residue
+        k                = db_sequence[7];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1   = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup1    = vec_sld(Hup1,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        
+        // PROLOGUE 8
+        // prefetch next residue
+        k                = db_sequence[8];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup2   = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup2   = vec_sld(Hup2,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup2,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup1,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        
+        
+        // PROLOGUE 9
+        // prefetch next residue
+        k                = db_sequence[9];
+        k8               = db_sequence[1];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        v_score_load2 = vec_ld(16*k8,query_profile_byte);
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1    = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup1    = vec_sld(Hup1,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        
+        // PROLOGUE 10
+        // prefetch next residue
+        k                = db_sequence[10];
+        k8               = db_sequence[2];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        v_score_load2 = vec_ld(16*k8,query_profile_byte);
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup2   = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup2   = vec_sld(Hup2,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup2,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup1,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        
+        
+        // PROLOGUE 11
+        // prefetch next residue
+        k                = db_sequence[11];
+        k8               = db_sequence[3];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        v_score_load2 = vec_ld(16*k8,query_profile_byte);
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1    = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup1    = vec_sld(Hup1,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        
+        // PROLOGUE 12
+        // prefetch next residue
+        k                = db_sequence[12];
+        k8               = db_sequence[4];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        v_score_load2 = vec_ld(16*k8,query_profile_byte);
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup2   = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup2   = vec_sld(Hup2,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup2,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup1,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        
+        
+        // PROLOGUE 13
+        // prefetch next residue
+        k                = db_sequence[13];
+        k8               = db_sequence[5];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        v_score_load2 = vec_ld(16*k8,query_profile_byte);
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1    = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup1    = vec_sld(Hup1,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        
+        // PROLOGUE 14
+        // prefetch next residue
+        k                = db_sequence[14];
+        k8               = db_sequence[6];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        v_score_load2 = vec_ld(16*k8,query_profile_byte);
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup2   = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup2   = vec_sld(Hup2,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup2,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup1,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        
+        // PROLOGUE 15
+        // prefetch next residue
+        k                = db_sequence[15];
+        k8               = db_sequence[7];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        v_score_load2 = vec_ld(16*k8,query_profile_byte);
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup1    = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup1    = vec_sld(Hup1,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup1,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup2,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        
+        
+        // PROLOGUE 16
+        // prefetch next residue
+        k                = db_sequence[16];
+        k8               = db_sequence[8];
+        
+        v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+        v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+        v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+        v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+        v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+        v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+        
+
+        // prefetch score for next step 
+        v_score_load1 = vec_ld(16*k,query_profile_byte);            
+        v_score_load2 = vec_ld(16*k8,query_profile_byte);
+        
+        // load values of F and H from previous row (one unit up)
+        Fup    = vec_ld(0,  p);
+        Hup2   = vec_ld(16, p);
+        p += 32; // move ahead 32 bytes
+        
+        // shift into place so we have complete F and H vectors
+        // that refer to the values one unit up from each cell
+        // that we are currently working on.
+        Fup    = vec_sld(Fup,F,15);
+        Hup2   = vec_sld(Hup2,H,15);            
+        
+        // do the dynamic programming 
+        
+        // update E value
+        E   = vec_subs(E,v_gapextend);
+        tmp = vec_subs(H,v_gapopen);
+        E   = vec_max(E,tmp);
+        
+        // update F value
+        F   = vec_subs(Fup,v_gapextend);
+        tmp = vec_subs(Hup2,v_gapopen);
+        F   = vec_max(F,tmp);
+        
+        v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+        
+        // add score to H
+        H   = vec_adds(Hup1,v_score);
+        H   = vec_subs(H,v_bias);
+        
+        // set H to max of H,E,F
+        H   = vec_max(H,E);
+        H   = vec_max(H,F);
+        
+        // Update highest score encountered this far
+        v_maxscore = vec_max(v_maxscore,H);
+        
+        p = workspace;
+        
+        for(j=16;j<db_length;j+=16)
+        { 
+            // STEP 1
+            
+            // prefetch next residue 
+            k                = db_sequence[j+1];
+            k8               = db_sequence[j-7];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+       
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup1   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup1    = vec_sld(Hup1,H,15);            
+
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+          
+
+            
+            
+            
+            // STEP 2
+            
+            // prefetch next residue
+            k                = db_sequence[j+2];
+            k8               = db_sequence[j-6];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup2   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup2   = vec_sld(Hup2,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup2,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup1,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+            
+
+            
+            
+            
+            // STEP 3
+            
+            // prefetch next residue
+            k                = db_sequence[j+3];
+            k8               = db_sequence[j-5];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup1   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup1    = vec_sld(Hup1,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+      
+            
+
+            
+            
+            // STEP 4
+            
+            // prefetch next residue
+            k                = db_sequence[j+4];
+            k8               = db_sequence[j-4];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup2   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup2   = vec_sld(Hup2,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup2,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup1,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+            
+            
+
+            
+            
+            // STEP 5
+            
+            // prefetch next residue
+            k                = db_sequence[j+5];
+            k8               = db_sequence[j-3];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup1    = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup1   = vec_sld(Hup1,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+            
+
+            
+            
+            
+            // STEP 6
+            
+            // prefetch next residue
+            k                = db_sequence[j+6];
+            k8               = db_sequence[j-2];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup2   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup2   = vec_sld(Hup2,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup2,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup1,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+            
+
+            
+            
+            
+            // STEP 7
+            
+            // prefetch next residue
+            k                = db_sequence[j+7];
+            k8               = db_sequence[j-1];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup1    = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup1    = vec_sld(Hup1,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+            
+            
+
+            
+            
+            // STEP 8
+            
+            // prefetch next residue
+            k                = db_sequence[j+8];
+            k8               = db_sequence[j];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup2   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup2   = vec_sld(Hup2,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup2,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup1,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+            
+            
+
+            
+            
+            // STEP 9
+            
+            // prefetch next residue
+            k                = db_sequence[j+9];
+            k8               = db_sequence[j+1];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup1   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup1   = vec_sld(Hup1,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+            // STEP 10
+            
+            // prefetch next residue
+            k                = db_sequence[j+10];
+            k8               = db_sequence[j+2];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup2   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup2   = vec_sld(Hup2,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup2,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup1,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+        
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+            // STEP 11
+            
+            // prefetch next residue
+            k                = db_sequence[j+11];
+            k8               = db_sequence[j+3];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup1   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup1   = vec_sld(Hup1,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+            // STEP 12
+            
+            // prefetch next residue
+            k                = db_sequence[j+12];
+            k8               = db_sequence[j+4];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup2   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup2   = vec_sld(Hup2,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup2,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup1,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+            // STEP 13
+            
+            // prefetch next residue
+            k                = db_sequence[j+13];
+            k8               = db_sequence[j+5];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup1   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup1   = vec_sld(Hup1,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+            // STEP 14
+            
+            // prefetch next residue
+            k                = db_sequence[j+14];
+            k8               = db_sequence[j+6];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup2   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup2   = vec_sld(Hup2,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup2,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup1,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+            // STEP 15
+            
+            // prefetch next residue
+            k                = db_sequence[j+15];
+            k8               = db_sequence[j+7];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+                        
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup1   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup1   = vec_sld(Hup1,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+            // STEP 16
+            
+            // prefetch next residue
+            k                = db_sequence[j+16];
+            k8               = db_sequence[j+8];
+            
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load1 = vec_ld(16*k,query_profile_byte);
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            
+            // load values of F and H from previous row (one unit up)
+            Fup    = vec_ld(512, p);
+            Hup2   = vec_ld(528, p);
+            
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32;
+            
+            // shift into place so we have complete F and H vectors
+            // that refer to the values one unit up from each cell
+            // that we are currently working on.
+            Fup    = vec_sld(Fup,F,15);
+            Hup2   = vec_sld(Hup2,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup2,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+            
+            // add score to H
+            H   = vec_adds(Hup1,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+            
+        }
+        
+        for(;j<db_length+15;j++)
+        {
+            k8               = db_sequence[j-7];
+
+            v_score     = vec_perm(v_score_q1,  v_score_load1,  queue1_to_score);
+            v_score_q1  = vec_perm(v_score_q2,  v_score_load1,  queue2_to_queue1);
+            v_score_q2  = vec_perm(v_score_q3,  v_score_load1,  queue3_to_queue2);
+            v_score_q3  = vec_perm(v_score_q4,  v_score_load1,  queue4_to_queue3);
+            v_score_q4  = vec_perm(v_score_q5,  v_score_load1,  queue5_to_queue4);
+            v_score_q5  = vec_perm(v_score_q5,  v_score_load1,  queue5_with_load);
+            
+            
+            // prefetch scores for next step
+            v_score_load2 = vec_ld(16*k8,query_profile_byte);
+            v_score_load1 = vec_perm(v_zero,v_score_load2,merge_score_load);
+
+            // save old values of F and H to use on next row
+            vec_st(F, 0,  p);
+            vec_st(H, 16, p);
+            p += 32; // move ahead 32 bytes
+            
+            Fup    = vec_sld(v_zero,F,15);
+            Hup1   = vec_sld(v_zero,H,15);            
+            
+            // do the dynamic programming 
+            
+            // update E value
+            E   = vec_subs(E,v_gapextend);
+            tmp = vec_subs(H,v_gapopen);
+            E   = vec_max(E,tmp);
+            
+            // update F value
+            F   = vec_subs(Fup,v_gapextend);
+            tmp = vec_subs(Hup1,v_gapopen);
+            F   = vec_max(F,tmp);
+            
+            // add score to H
+            H   = vec_adds(Hup2,v_score);
+            H   = vec_subs(H,v_bias);
+            
+            // set H to max of H,E,F
+            H   = vec_max(H,E);
+            H   = vec_max(H,F);
+            
+            // Save value to use for next diagonal H 
+            Hup2 = Hup1;
+
+            // Update highest score encountered this far
+            v_maxscore = vec_max(v_maxscore,H);
+        }
+        vec_st(F, 512, p);
+        vec_st(H, 528, p);
+
+        query_profile_byte += 16*alphabet_size;
+
+        // End of this row (actually 16 rows due to SIMD).
+        // Before we continue, check for overflow.
+        tmp      = vec_subs(vec_splat_u8(-1),v_bias);
+        overflow = vec_any_ge(v_maxscore,tmp);
+        
+
+    }
+
+    if(overflow)
+    {
+        return 255;
+    }
+    else
+    {
+        // find largest score in the v_maxscore vector
+        tmp = vec_sld(v_maxscore,v_maxscore,8);
+        v_maxscore = vec_max(v_maxscore,tmp);
+        tmp = vec_sld(v_maxscore,v_maxscore,4);
+        v_maxscore = vec_max(v_maxscore,tmp);
+        tmp = vec_sld(v_maxscore,v_maxscore,2);
+        v_maxscore = vec_max(v_maxscore,tmp);
+        tmp = vec_sld(v_maxscore,v_maxscore,1);
+        v_maxscore = vec_max(v_maxscore,tmp);
+        
+        // store in temporary variable
+        vec_ste(v_maxscore,0,&score);
+        
+        // return largest score
+        return score;
+    }}
+
+
+#else
+
+/* No Altivec support. Avoid compiler complaints about empty object */
+
+int sw_dummy;
+
+#endif
diff --git a/src/smith_waterman_altivec.h b/src/smith_waterman_altivec.h
new file mode 100644
index 0000000..0437552
--- /dev/null
+++ b/src/smith_waterman_altivec.h
@@ -0,0 +1,26 @@
+/* $Id: smith_waterman_altivec.h 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+int
+smith_waterman_altivec_word(const unsigned char *     query_sequence,
+                            unsigned short *    query_profile_word,
+                            const int                 query_length,
+                            const unsigned char *     db_sequence,
+                            const int                 db_length,
+                            unsigned short      bias,
+                            unsigned short      gap_open,
+                            unsigned short      gap_extend,
+                            struct f_struct *   f_str);
+
+
+int
+smith_waterman_altivec_byte(const unsigned char *     query_sequence,
+                            unsigned char *     query_profile_byte,
+                            const int                 query_length,
+                            const unsigned char *     db_sequence,
+                            const int                 db_length,
+                            unsigned char       bias,
+                            unsigned char       gap_open,
+                            unsigned char       gap_extend,
+                            struct f_struct *   f_str);
+
diff --git a/src/smith_waterman_sse2.c b/src/smith_waterman_sse2.c
new file mode 100644
index 0000000..dbc1cfa
--- /dev/null
+++ b/src/smith_waterman_sse2.c
@@ -0,0 +1,432 @@
+/******************************************************************
+  Copyright 2006 by Michael Farrar.  All rights reserved.
+  This program may not be sold or incorporated into a commercial product,
+  in whole or in part, without written consent of Michael Farrar.  For 
+  further information regarding permission for use or reproduction, please 
+  contact: Michael Farrar at farrar.michael at gmail.com.
+*******************************************************************/
+
+/*
+  Written by Michael Farrar, 2006.
+  Please send bug reports and/or suggestions to farrar.michael at gmail.com.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "defs.h"
+#include "param.h"
+#include "dropgsw2.h"
+#include "smith_waterman_sse2.h"
+
+#ifdef __SUNPRO_C
+#include <sunmedia_intrin.h>
+#else
+#include <emmintrin.h>
+#endif
+
+#ifdef SW_SSE2
+
+int
+smith_waterman_sse2_word(const unsigned char *     query_sequence,
+                         unsigned short *    query_profile_word,
+                         const int                 query_length,
+                         const unsigned char *     db_sequence,
+                         const int                 db_length,
+                         unsigned short      gap_open,
+                         unsigned short      gap_extend,
+                         struct f_struct *   f_str)
+{
+    int     i, j, k;
+    short   score;
+
+    int     cmp;
+    int     iter = (query_length + 7) / 8;
+    
+
+    __m128i *p;
+    __m128i *workspace = (__m128i *) f_str->workspace;
+
+    __m128i E, F, H;
+
+    __m128i v_maxscore;
+    __m128i v_gapopen;
+    __m128i v_gapextend;
+
+    __m128i v_min;
+    __m128i v_minimums;
+    __m128i v_temp;
+
+    __m128i *pHLoad, *pHStore;
+    __m128i *pE;
+
+    __m128i *pScore;
+
+    /* Load gap opening penalty to all elements of a constant */
+    v_gapopen = _mm_setzero_si128();	/* Apple Devel */
+    v_gapopen = _mm_insert_epi16 (v_gapopen, gap_open, 0);
+    v_gapopen = _mm_shufflelo_epi16 (v_gapopen, 0);
+    v_gapopen = _mm_shuffle_epi32 (v_gapopen, 0);
+
+    /* Load gap extension penalty to all elements of a constant */
+    v_gapextend = _mm_setzero_si128();	/* Apple Devel */
+    v_gapextend = _mm_insert_epi16 (v_gapextend, gap_extend, 0);
+    v_gapextend = _mm_shufflelo_epi16 (v_gapextend, 0);
+    v_gapextend = _mm_shuffle_epi32 (v_gapextend, 0);
+
+    /* load v_maxscore with the zeros.  since we are using signed */
+    /*  math, we will bias the maxscore to -32768 so we have the */
+    /*  full range of the short. */
+    v_maxscore = _mm_setzero_si128();	/* Apple Devel */
+    v_maxscore = _mm_cmpeq_epi16 (v_maxscore, v_maxscore);
+    v_maxscore = _mm_slli_epi16 (v_maxscore, 15);
+
+    v_minimums = _mm_shuffle_epi32 (v_maxscore, 0);
+
+    v_min = _mm_shuffle_epi32 (v_maxscore, 0);
+    v_min = _mm_srli_si128 (v_min, 14);
+
+    /* Zero out the storage vector */
+    k = 2 * iter;
+
+    p = workspace;
+    for (i = 0; i < k; i++)
+    {
+        _mm_store_si128 (p++, v_maxscore);
+    }
+
+    pE = workspace;
+    pHStore = pE + iter;
+    pHLoad = pHStore + iter;
+
+    for (i = 0; i < db_length; ++i)
+    {
+        /* fetch first data asap. */
+        pScore = (__m128i *) query_profile_word + db_sequence[i] * iter;
+
+        /* bias all elements in F to -32768 */
+        F = _mm_setzero_si128();	/* Apple Devel */
+        F = _mm_cmpeq_epi16 (F, F);
+        F = _mm_slli_epi16 (F, 15);
+
+        /* load the next h value */
+        H = _mm_load_si128 (pHStore + iter - 1);
+        H = _mm_slli_si128 (H, 2);
+        H = _mm_or_si128 (H, v_min);
+
+        p = pHLoad;
+        pHLoad = pHStore;
+        pHStore = p;
+
+        for (j = 0; j < iter; j++)
+        {
+            /* load E values */
+            E = _mm_load_si128 (pE + j);
+
+            /* add score to H */
+            H = _mm_adds_epi16 (H, *pScore++);
+
+            /* Update highest score encountered this far */
+            v_maxscore = _mm_max_epi16 (v_maxscore, H);
+
+            /* get max from H, E and F */
+            H = _mm_max_epi16 (H, E);
+            H = _mm_max_epi16 (H, F);
+
+            /* save H values */
+            _mm_store_si128 (pHStore + j, H);
+
+            /* subtract the gap open penalty from H */
+            H = _mm_subs_epi16 (H, v_gapopen);
+
+            /* update E value */
+            E = _mm_subs_epi16 (E, v_gapextend);
+            E = _mm_max_epi16 (E, H);
+
+            /* update F value */
+            F = _mm_subs_epi16 (F, v_gapextend);
+            F = _mm_max_epi16 (F, H);
+
+            /* save E values */
+            _mm_store_si128 (pE + j, E);
+
+            /* load the next h value */
+            H = _mm_load_si128 (pHLoad + j);
+        }
+
+        /* reset pointers to the start of the saved data */
+        j = 0;
+        H = _mm_load_si128 (pHStore + j);
+
+        /*  the computed F value is for the given column.  since */
+        /*  we are at the end, we need to shift the F value over */
+        /*  to the next column. */
+        F = _mm_slli_si128 (F, 2);
+        F = _mm_or_si128 (F, v_min);
+        v_temp = _mm_subs_epi16 (H, v_gapopen);
+        v_temp = _mm_cmpgt_epi16 (F, v_temp);
+        cmp  = _mm_movemask_epi8 (v_temp);
+
+        while (cmp != 0x0000) 
+        {
+            E = _mm_load_si128 (pE + j);
+
+            H = _mm_max_epi16 (H, F);
+
+            /* save H values */
+            _mm_store_si128 (pHStore + j, H);
+
+            /* update E in case the new H value would change it */
+            H = _mm_subs_epi16 (H, v_gapopen);
+            E = _mm_max_epi16 (E, H);
+            _mm_store_si128 (pE + j, E);
+
+            /* update F value */
+            F = _mm_subs_epi16 (F, v_gapextend);
+
+            j++;
+            if (j >= iter)
+            {
+                j = 0;
+                F = _mm_slli_si128 (F, 2);
+                F = _mm_or_si128 (F, v_min);
+            }
+            H = _mm_load_si128 (pHStore + j);
+
+            v_temp = _mm_subs_epi16 (H, v_gapopen);
+            v_temp = _mm_cmpgt_epi16 (F, v_temp);
+            cmp  = _mm_movemask_epi8 (v_temp);
+        }
+    }
+
+    /* find largest score in the v_maxscore vector */
+    v_temp = _mm_srli_si128 (v_maxscore, 8);
+    v_maxscore = _mm_max_epi16 (v_maxscore, v_temp);
+    v_temp = _mm_srli_si128 (v_maxscore, 4);
+    v_maxscore = _mm_max_epi16 (v_maxscore, v_temp);
+    v_temp = _mm_srli_si128 (v_maxscore, 2);
+    v_maxscore = _mm_max_epi16 (v_maxscore, v_temp);
+
+    /* extract the largest score */
+    score = _mm_extract_epi16 (v_maxscore, 0);
+
+    /* return largest score biased by 32768 */
+
+    /* fix for Mac OSX clang 4.1 */ 
+    /*
+#ifdef __clang__
+    if (score < 0) score += 32768;
+    return score;
+#else
+    */
+    return score + 32768;
+    /* #endif */
+}
+
+int
+smith_waterman_sse2_byte(const unsigned char *     query_sequence,
+                         unsigned char *     query_profile_byte,
+                         const int                 query_length,
+                         const unsigned char *     db_sequence,
+                         const int                 db_length,
+                         unsigned char       bias,
+                         unsigned char       gap_open,
+                         unsigned char       gap_extend,
+                         struct f_struct *   f_str)
+{
+    int     i, j, k;
+    int     score;
+
+    int     dup;
+    int     cmp;
+    int     iter = (query_length + 15) / 16;
+    
+    __m128i *p;
+    __m128i *workspace = (__m128i *) f_str->workspace;
+
+    __m128i E, F, H;
+
+    __m128i v_maxscore;
+    __m128i v_bias;
+    __m128i v_gapopen;
+    __m128i v_gapextend;
+
+    __m128i v_temp;
+    __m128i v_zero;
+
+    __m128i *pHLoad, *pHStore;
+    __m128i *pE;
+
+    __m128i *pScore;
+
+    /* Load the bias to all elements of a constant */
+    dup    = ((short) bias << 8) | bias;
+    v_bias = _mm_setzero_si128();
+    v_bias = _mm_insert_epi16 (v_bias, dup, 0);
+    v_bias = _mm_shufflelo_epi16 (v_bias, 0);
+    v_bias = _mm_shuffle_epi32 (v_bias, 0);
+
+    /* Load gap opening penalty to all elements of a constant */
+    dup  = ((short) gap_open << 8) | gap_open;
+    v_gapopen = _mm_setzero_si128();
+    v_gapopen = _mm_insert_epi16 (v_gapopen, dup, 0);
+    v_gapopen = _mm_shufflelo_epi16 (v_gapopen, 0);
+    v_gapopen = _mm_shuffle_epi32 (v_gapopen, 0);
+
+    /* Load gap extension penalty to all elements of a constant */
+    dup  = ((short) gap_extend << 8) | gap_extend;
+    v_gapextend = _mm_setzero_si128();
+    v_gapextend = _mm_insert_epi16 (v_gapextend, dup, 0);
+    v_gapextend = _mm_shufflelo_epi16 (v_gapextend, 0);
+    v_gapextend = _mm_shuffle_epi32 (v_gapextend, 0);
+
+    /* initialize the max score */
+    /*     v_maxscore = _mm_xor_si128 (v_maxscore, v_maxscore);  - Apple Devel*/
+    v_maxscore = _mm_setzero_si128();	/* Apple Devel */
+
+    /* create a constant of all zeros for comparison */
+    /* v_zero = _mm_xor_si128 (v_zero, v_zero);   - Apple Devel */
+    v_zero = _mm_setzero_si128();	/* Apple Devel */
+
+    /* Zero out the storage vector */
+    k = iter * 2;
+
+    p = workspace;
+    for (i = 0; i < k; i++)
+    {
+        _mm_store_si128 (p++, v_maxscore);
+    }
+
+    pE = workspace;
+    pHStore = pE + iter;
+    pHLoad = pHStore + iter;
+
+    for (i = 0; i < db_length; ++i)
+    {
+        /* fetch first data asap. */
+        pScore = (__m128i *) query_profile_byte + db_sequence[i] * iter;
+
+        /* zero out F value. */
+        /* F = _mm_xor_si128 (F, F);  -Apple Devel */
+        F = _mm_setzero_si128();	/* Apple Devel */
+
+        /* load the next h value */
+        H = _mm_load_si128 (pHStore + iter - 1);
+        H = _mm_slli_si128 (H, 1);
+
+        p = pHLoad;
+        pHLoad = pHStore;
+        pHStore = p;
+
+        for (j = 0; j < iter; j++)
+        {
+            /* load values E. */
+            E = _mm_load_si128 (pE + j);
+
+            /* add score to H */
+            H = _mm_adds_epu8 (H, *pScore++);
+            H = _mm_subs_epu8 (H, v_bias);
+
+            /* Update highest score encountered this far */
+            v_maxscore = _mm_max_epu8 (v_maxscore, H);
+
+            /* get max from H, E and F */
+            H = _mm_max_epu8 (H, E);
+            H = _mm_max_epu8 (H, F);
+
+            /* save H values */
+            _mm_store_si128 (pHStore + j, H);
+
+            /* subtract the gap open penalty from H */
+            H = _mm_subs_epu8 (H, v_gapopen);
+
+            /* update E value */
+            E = _mm_subs_epu8 (E, v_gapextend);
+            E = _mm_max_epu8 (E, H);
+
+            /* update F value */
+            F = _mm_subs_epu8 (F, v_gapextend);
+            F = _mm_max_epu8 (F, H);
+
+            /* save E values */
+            _mm_store_si128 (pE + j, E);
+
+            /* load the next h value */
+            H = _mm_load_si128 (pHLoad + j);
+        }
+
+        /* reset pointers to the start of the saved data */
+        j = 0;
+        H = _mm_load_si128 (pHStore + j);
+
+        /*  the computed F value is for the given column.  since */
+        /*  we are at the end, we need to shift the F value over */
+        /*  to the next column. */
+        F = _mm_slli_si128 (F, 1);
+        v_temp = _mm_subs_epu8 (H, v_gapopen);
+        v_temp = _mm_subs_epu8 (F, v_temp);
+        v_temp = _mm_cmpeq_epi8 (v_temp, v_zero);
+        cmp  = _mm_movemask_epi8 (v_temp);
+
+        while (cmp != 0xffff) 
+        {
+            E = _mm_load_si128 (pE + j);
+
+            H = _mm_max_epu8 (H, F);
+
+            /* save H values */
+            _mm_store_si128 (pHStore + j, H);
+
+            /* update E in case the new H value would change it */
+            H = _mm_subs_epu8 (H, v_gapopen);
+            E = _mm_max_epu8 (E, H);
+            _mm_store_si128 (pE + j, E);
+
+            /* update F value */
+            F = _mm_subs_epu8 (F, v_gapextend);
+
+            j++;
+            if (j >= iter)
+            {
+                j = 0;
+                F = _mm_slli_si128 (F, 1);
+            }
+            H = _mm_load_si128 (pHStore + j);
+
+            v_temp = _mm_subs_epu8 (H, v_gapopen);
+            v_temp = _mm_subs_epu8 (F, v_temp);
+            v_temp = _mm_cmpeq_epi8 (v_temp, v_zero);
+            cmp  = _mm_movemask_epi8 (v_temp);
+        }
+    }
+
+    /* find largest score in the v_maxscore vector */
+    v_temp = _mm_srli_si128 (v_maxscore, 8);
+    v_maxscore = _mm_max_epu8 (v_maxscore, v_temp);
+    v_temp = _mm_srli_si128 (v_maxscore, 4);
+    v_maxscore = _mm_max_epu8 (v_maxscore, v_temp);
+    v_temp = _mm_srli_si128 (v_maxscore, 2);
+    v_maxscore = _mm_max_epu8 (v_maxscore, v_temp);
+    v_temp = _mm_srli_si128 (v_maxscore, 1);
+    v_maxscore = _mm_max_epu8 (v_maxscore, v_temp);
+
+    /* store in temporary variable */
+    score = _mm_extract_epi16 (v_maxscore, 0);
+    score = score & 0x00ff;
+
+    /*  check if we might have overflowed */
+    if (score + bias >= 255)
+    {
+        score = 255;
+    }
+
+    /* return largest score */
+    return score;
+}
+#else
+
+/* No SSE2 support. Avoid compiler complaints about empty object */
+
+int sw_dummy;
+
+#endif
diff --git a/src/smith_waterman_sse2.h b/src/smith_waterman_sse2.h
new file mode 100755
index 0000000..8ff004d
--- /dev/null
+++ b/src/smith_waterman_sse2.h
@@ -0,0 +1,43 @@
+
+/* $Id: smith_waterman_sse2.h 625 2011-03-23 17:21:38Z wrp $ */
+/* $Revision: 625 $  */
+
+/******************************************************************
+  Copyright 2006 by Michael Farrar.  All rights reserved.
+  This program may not be sold or incorporated into a commercial product,
+  in whole or in part, without written consent of Michael Farrar.  For 
+  further information regarding permission for use or reproduction, please 
+  contact: Michael Farrar at farrar.michael at gmail.com.
+*******************************************************************/
+
+/*
+  Written by Michael Farrar, 2006.
+  Please send bug reports and/or suggestions to farrar.michael at gmail.com.
+*/
+
+#ifndef SMITH_WATERMAN_SSE2_H
+#define SMITH_WATERMAN_SSE2_H
+
+int
+smith_waterman_sse2_word(const unsigned char *     query_sequence,
+                         unsigned short *    query_profile_word,
+                         const int                 query_length,
+                         const unsigned char *     db_sequence,
+                         const int                 db_length,
+                         unsigned short      gap_open,
+                         unsigned short      gap_extend,
+                         struct f_struct *   f_str);
+
+
+int
+smith_waterman_sse2_byte(const unsigned char *     query_sequence,
+                         unsigned char *     query_profile_byte,
+                         const int                 query_length,
+                         const unsigned char *     db_sequence,
+                         const int                 db_length,
+                         unsigned char       bias,
+                         unsigned char       gap_open,
+                         unsigned char       gap_extend,
+                         struct f_struct *   f_str);
+
+#endif /* SMITH_WATERMAN_SSE2_H */
diff --git a/src/structs.h b/src/structs.h
new file mode 100644
index 0000000..9065b89
--- /dev/null
+++ b/src/structs.h
@@ -0,0 +1,196 @@
+/* $Id: structs.h 1259 2014-05-28 19:28:06Z wrp $ */
+
+#include "rstruct.h"
+
+#include "aln_structs.h"
+
+#include "param.h"
+
+struct hist_str {
+  int histflg;
+  int *hist_a;
+  int histint, min_hist, max_hist, maxh;
+  long entries;
+  int z_calls;
+  char stat_info[MAX_STR];
+};
+
+struct db_str {
+  long entries;
+  unsigned long length;
+  int carry;
+};
+
+struct mng_thr {	/* structure to keep track of thread buffers */
+  int max_work_buf;	/* number of threads buffers */
+  int max_buf2_res;	/* number of results/thread buffer */
+  int max_chain_seqs;	/* number of sequences/seqr chain */
+  int seq_buf_size;	/* size of sequence buffer in each max_seq_cnt chain */
+};
+
+/* values required for successive calles to getlib() */
+struct lib_seq_info {
+  int ldnaseq;
+  int term_code;
+  int maxn;
+  int dupn;
+  int l_overlap;
+  int maxt3;
+};
+
+/* used by help system to describe optional arguments */
+struct opt_def_str {
+  char opt_char;
+  int has_arg;
+  char *opt_str;
+  char *opt_descr_s;
+  char *opt_descr_l;
+  int opt_rank;
+  int fmt_type;
+  int i_param1;
+  int i_param2;
+  double d_param1;
+  double d_param2;
+  char *s_param;
+};
+
+struct markx_str {
+  /* values to be copied into m_msg to modify format */
+  int markx;
+  int nohist;
+  int ashow;
+  int show_code;
+  int long_info;
+  int aln_llen;
+  int aln_llcntx;
+  int aln_llcntx_set;
+  int std_output;
+
+  char *out_file;
+  FILE *out_fd;
+  struct markx_str *next;
+};
+
+struct mngmsg 		/* Message from host to manager */
+{
+  char pgm_name[MAX_FN];	/* program name from argv[0] */
+  long max_memK;	/* maximum amount of sequence buffer memory */
+  int cur_seqr_cnt;	/* current number of seqr chains */
+  int n0;		/* query length ^qm_msg */
+  int nm0;		/* number of segments ^qm_msg */
+  int nmoff;		/* length of fastf segment */
+  unsigned char *aa0a;	/* annotation array */
+  struct annot_str *annot_p;	/* annot_str for query */
+  unsigned char ann_arr[MAX_FN]; /* annotation characters */
+  int ann_arr_n;	/* number of annotation characters */
+  char *ann_arr_def[MAX_FN];	/* definitions of ann_arr characters */
+  int ann_flg;		/* have annotation array, characters */
+  int have_ann;		/* have annotation on this query */
+  char tname[MAX_FN];	/* Query sequence name */
+  int tnamesize;	/* Query name size */
+  char lname[MAX_LSTR];	/* Library  file  name */
+  char link_lname[MAX_LSTR]; /* link-library name */
+  char annot0_sname[MAX_LSTR]; /* query annotation script name */
+  char annot1_sname[MAX_LSTR]; /* library annotation script name */
+  int max_tot;		/* function defined total sequence area */
+  int qdnaseq;		/* query is protein (0)/dna (1) */
+  int q_overlap;	/* overlap when segmenting long query sequence */
+  long q_offset;	/* q_offset, l_offset are set outside getlib()
+			   and report the number of residues that were
+			   skipped/read-previously to get to this
+			   version of aa0/aa1 (0-based) */
+  long q_off;		/* q_off, l_off are set by getlib(); starting
+			   at 1 but modified by @C: the position of
+			   the first residue of aa0/aa1 in the
+			   original sequence */
+  int qframe;		/* number of possible query frames */
+  int nframe;		/* frame for TFASTA */
+  int nitt1;		/* nframe-1 */
+  int thr_fact;		/* fudge factor for threads */
+  int s_int;		/* sampling interval for statistics */
+  int ql_start;		/* starting query sequence */
+  int ql_stop;		/* ending query sequence */
+  int pbuf_siz;		/* buffer size for sequences send in p2_complib */
+  char qtitle[MAX_STR];	/* query title */
+  char ltitle[MAX_STR];	/* library title */
+  char flstr[MAX_FN];	/* FASTLIBS string */
+  int std_output;	/* produce normal output */
+  char outfile[MAX_FN];
+  FILE *outfd;	/* indicates outfile is already open */
+  char label [MAX_SSTR];	/* Output label, "opt", "s-w", "initn init1", "initn opt"  */
+  char alabel[MAX_SSTR];	/* Output label, "Smith-Waterman", "banded Smith-Waterman", etc  */
+  char f_id0[4];	/* function id for markx==10 */
+  char f_id1[4];	/* function id for markx==10 */
+  char sqnam[4];	/* "aa" or "nt" */ 
+  char sqtype[10];	/* "DNA" or "protein" */
+  int long_info;	/* long description flag*/
+  long sq0off, sq1off;	/* virtual offset into aa0, aa1 */
+  int markx;		/* alignment display type */
+  int tot_markx;	/* markx as summ of all alternative markx */
+  struct markx_str *markx_list;	/* list of alternate outputs */
+  int nbr_seq;		/* number of library sequences */
+  int n1_high;		/* upper limit on sequence length */
+  int n1_low;		/* lower limit on sequence length */
+  double e_cut;		/* e_value for display */
+  double e_low;		/* e_value for display */
+  int e_cut_set;	/* e_value deliberately set */
+  int zsflag;		/* zsflag for all searches */
+  int zsflag2;		/* zsflag2 for all searches */
+  int pamd1;		/* 1st dimension of pam matrix */
+  int pamd2;		/* 2nd dimension of pam matrix */
+  int revcomp;		/* flag to do reverse complement */
+  int quiet;		/* quiet option */
+  int nrelv;		/* number of interesting scores */
+  int srelv;		/* number of scores to show in showbest */
+  int arelv;		/* number of scores to show at alignment */
+  int z_bits;		/* z_bits==1: show bit score, ==0 show z-score */
+  int tot_ident;	/* tot_ident=1 -> no mismatches for 100% identity */
+  char alab[3][24];	/* labels for alignment scores */
+  int nohist;		/* no histogram option */
+  int do_showbest;	/* do not showbest() */
+  int nshow;		/* number shown in showbest() */
+  int nskip;		/* number skipped by e_low */
+  int mshow;		/* number of scores to show */
+  int mshow_set;	/* mshow set with -b */
+  int mshow_min;	/* at least mshow scores must be shown, but limited by e_cut */
+  int ashow;		/* number of alignments to show */
+  int ashow_set;	/* ashow set with -d */
+  int nmlen;		/* length of name label */
+  int show_code;	/* show alignment code in -m 9;  ==1 => identity only, ==2 alignment code*/
+  int tot_show_code;	/* show alignment for all outputs */
+  int pre_load_done;	/* set after pre_load_best() call */
+  int align_done;	/* do_walign() called */
+  unsigned char *aa1save_buf_b; /* buffer for re_getlib() sequences for alignments */
+  char *bline_buf_b; /* buffer for re_getlib() sequences for alignments */
+  int thold;		/* threshold */
+  int max_repeat;	/* max number of non-intersecting local alignments */
+  int last_calc_flg;	/* needs a last calculation stage */
+  int qshuffle;		/* shuffle the query and do additional comparisons ^qm_msg*/
+  int shuff_max;	/* number of shuffles to perform */
+  int shuff_max_save;	/* number of shuffles to perform */
+  int shuff_node;	/* number of shuffles/worker node */
+  int shuff_wid;
+  int stages;		/* number of stages */
+  double Lambda, K, H;	/* (ungapped) Karlin-Altschul parameters */
+  int escore_flg;	/* use escore calculated by do_work() */
+  struct lib_seq_info ldb_info;	/* maxn, maxt, l_overlap, ldnaseq */
+  struct db_str db;	/* the database size as read */
+  struct db_str ldb;	/* the database size via save_best() */
+  struct score_count_s s_info;	/* counts of different score types */
+  struct score_count_s ss_info;	/* counts of different score types from shuffles */
+  struct hist_str hist;
+  void *pstat_void;	/* pstat structure for standard statistics */
+  void *pstat_void2;	/* pstat structure for zsflag > 2 */
+  struct a_struct aln;	/* has llen, llnctx, llnctx_flg, showall */
+  struct a_res_str a_res; /* has individual alignment coordinates */
+  char dfile [MAX_FN];	/* file for dumping scores to */
+};
+
+struct lib_struct {
+  char *file_name; /* this library file */
+  int lib_type;	   /* the library type can be specified here, for files in an indirect list */
+  /* struct lib_mol_info *lib_params; */ /* parameters (ldnaseq, term_code, constant for all libraries */
+  struct lmf_str *acc_file_p;
+  struct lmf_str *m_file_p;	/* magic *m_file_p for reading */
+  struct lib_struct *next;	/* next in the list */  
+};
diff --git a/src/tatstats.c b/src/tatstats.c
new file mode 100644
index 0000000..ed5413e
--- /dev/null
+++ b/src/tatstats.c
@@ -0,0 +1,583 @@
+/* $Id: tatstats.c 1202 2013-07-20 12:55:32Z wrp $  */
+/* $Revision: 1202 $  */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and the
+   The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+
+#include "defs.h"
+#include "param.h"
+#define XTERNAL
+#include "upam.h"
+#undef XTERNAL
+#include "tatstats.h"
+#include "best_stats.h"
+#define XTERNAL
+#include "uascii.h"
+
+/* calc_priors() - calculate frequencies of amino-acids, possibly with counts */
+/* generate_tatprobs() - build the table of score probabilities if the
+   sequences are not too long */
+
+double
+det(double a11, double a12, double a13,
+    double a21, double a22, double a23,
+    double a31, double a32, double a33);
+
+double power(double r, int p)
+{
+  double tr;
+  int neg;
+
+  if (r==0.0) return((p==0)?1.0:0.0);
+  if ((neg = (p<0))) p = -p;
+  tr = 1.0;
+  while (p>0) {
+    if (p & 1) tr *= r;
+    p >>= 1;
+    if (p) r *= r;
+  }
+  return((neg ? 1.0/tr: tr));
+}
+
+double
+factorial (int a, int b) {
+
+  double res = 1.0;
+
+  if(a == 0) { return 1.0; }
+
+  while(a > b) {
+    res *= (double) a;
+    a--;
+  }
+
+  return res;
+}
+
+void
+calc_priors(double *priors,
+	    struct pstruct *ppst,
+	    struct f_struct *f_str,
+	    const unsigned char *aa1, int n1,
+	    int pseudocts)
+{
+  long *counts, sum;
+  int i, n_counts;
+
+  if (ppst->nsq > MAXUC) {
+    fprintf(stderr,"*** ERROR *** tatstats.c:calc_priors nsq [ %d] out of range [%d]\n",
+	    ppst->nsq, MAXUC);
+  }
+  n_counts = ppst->nsq;
+
+  if ((counts = (long *)calloc(n_counts,sizeof(long)))==NULL) {
+    fprintf(stderr,"*** ERROR *** tatstats.c:calc_priors cannot allocate counts[] for priors\n");
+    exit(1);
+  }
+
+  if (n1 == 0 && f_str->priors[1] > 0.0) {
+    for(i = 1 ; i < ppst->nsq ; i++) {
+      priors[i] = f_str->priors[i];
+    }
+    free(counts);
+    return;
+  }
+
+  /* pre-initialize counts/priors for all library sequences */
+  if(n1 == 0) {
+    if (ppst->dnaseq==SEQT_PROT ) {
+
+      /* Robinson & Robinson residue counts from Stephen Altschul */
+      counts[pascii['A']] = 35155;    /*   A  */
+      counts[pascii['R']] = 23105;    /*   R  */  
+      counts[pascii['N']] = 20212;    /*   N  */  
+      counts[pascii['D']] = 24161;    /*   D  */  
+      counts[pascii['C']] =  8669;    /*   C  */  
+      counts[pascii['Q']] = 19208;    /*   Q  */  
+      counts[pascii['E']] = 28354;    /*   E  */  
+      counts[pascii['G']] = 33229;    /*   G  */  
+      counts[pascii['H']] =  9906;    /*   H  */  
+      counts[pascii['I']] = 23161;    /*   I  */  
+      counts[pascii['L']] = 40625;    /*   L  */  
+      counts[pascii['K']] = 25872;    /*   K  */  
+      counts[pascii['M']] = 10101;    /*   M  */  
+      counts[pascii['F']] = 17367;    /*   F  */  
+      counts[pascii['P']] = 23435;    /*   P  */  
+      counts[pascii['S']] = 32070;    /*   S  */  
+      counts[pascii['T']] = 26311;    /*   T  */  
+      counts[pascii['W']] =  5990;    /*   W  */  
+      counts[pascii['Y']] = 14488;    /*   Y  */  
+      counts[pascii['V']] = 29012;    /*   V  */  
+      counts[pascii['B']] =     0;    /*   B  */  
+      counts[pascii['X']] =     0;    /*   X   */ 
+      counts[pascii['Z']] =     0;    /*   Z  */  
+      counts[pascii['U']] =     0;    /*   U   */
+      counts[pascii['*']] =     0;    /*   *   */
+      counts[pascii['O']] =     0;    /*   O   */
+      counts[pascii['J']] =     0;    /*   J   */
+    }
+    else { /* SEQT_DNA */
+      counts[pascii['A']] = 250;
+      counts[pascii['C']] = 250;
+      counts[pascii['G']] = 250;
+      counts[pascii['T']] = 250;
+      for (i=5; i<n_counts; i++) counts[i]=0;
+    }
+  }
+  else {	/* initialize counts for THIS library sequence */
+    for(i = 0 ; i < n1 ; i++) {
+      if(aa1[i] > ppst->nsq || aa1[i] < 1) continue;
+      counts[aa1[i]]++;
+    }
+  }
+
+  sum = 0;
+  for(i = 1 ; i < ppst->nsq ; i++) sum += counts[i];
+  
+  for(i = 1 ; i < ppst->nsq ; i++) {
+    if(n1 == 0) {	/* pre-initialize */
+      priors[i] = (double) counts[i] / (double) sum;
+    } else {	/* THIS library sequence */
+      priors[i] = ( ((double) pseudocts * f_str->priors[i]) + (double) counts[i] ) / ( (double) sum + (double) pseudocts );
+    }
+  }
+  free(counts);
+  return;
+} 
+
+int
+max_score(int *scores, int nsq) {
+
+  int max, i;
+
+  max = -BIGNUM;
+  for ( i = 1 ; i < nsq ; i++ ) {
+    if (scores[i] > max) max = scores[i];
+  }
+ 
+  return max;
+}
+
+int
+min_score(int *scores, int nsq) {
+
+  int min, i;
+
+  min = BIGNUM;
+  for (i = 1 ; i < nsq ; i++ ) {
+    if (scores[i] < min) min = scores[i];
+  }
+  return min;
+}
+
+double
+calc_tatusov ( struct slink *last,
+	       struct slink *this,
+	       const unsigned char *aa0, int n0,
+	       const unsigned char *aa1, int n1,
+	       int **pam2, int nsq,
+	       struct f_struct *f_str,
+	       int pseudocts,
+	       int do_opt,
+	       int zsflag
+	       )
+{
+  int i, is, j, k;
+
+  double *priors, my_priors[MAXUC], tatprob, left_tatprob, right_tatprob;
+  unsigned char *query = NULL;
+  int length, maxlength, sumlength, sumscore, tmp, seg;
+  int start, stop;
+  struct slink *sl;
+  int N;
+  double *tatprobsptr;
+
+#if defined(FASTS) || defined(FASTM)
+  int index = 0;
+  int notokay = 0;
+#endif
+
+  struct tat_str *oldtat = NULL, *newtat = NULL;
+
+#if defined(FASTS) || defined(FASTM)
+  start = this->vp->start - this->vp->dp + f_str->noff;
+  stop = this->vp->stop - this->vp->dp + f_str->noff;
+  tmp = stop - start + 1;
+#else
+  /*
+    FASTF alignments can also hang off the end of library sequences,
+    but no query residues are used up in the process, but we have to
+    keep track of which are
+  */
+  tmp = 0;
+  for(i = 0, j = 0 ; i < n0 ; i++) {
+    if (this->vp->used[i] == 1) {tmp++; }
+  }
+#endif
+
+  sumlength = maxlength = length = tmp;
+  seg = 1;
+  sumscore = this->vp->score;
+
+#if defined(FASTS) || defined(FASTM)
+  if(f_str->aa0b[start] == start && f_str->aa0e[stop] == stop) {
+    index |= (1 << f_str->aa0i[start]);
+  } else {
+    notokay |= (1 << f_str->aa0i[start]);
+  }
+#endif
+
+  for(sl = last; sl != NULL ; sl = sl->prev) {
+
+#if defined(FASTS) || defined(FASTM)
+    start = sl->vp->start - sl->vp->dp + f_str->noff;
+    stop = sl->vp->stop - sl->vp->dp + f_str->noff;
+    tmp = stop - start + 1;
+#else
+    tmp = 0;
+    for(i = 0, j = 0 ; i < n0 ; i++) {
+      if(sl->vp->used[i] == 1) {
+	tmp++;
+      }
+    }
+#endif
+    sumlength += tmp;
+    maxlength = tmp > maxlength ? tmp : maxlength;
+    seg++;
+    sumscore += sl->vp->score;
+
+#if defined(FASTS) || defined(FASTM)
+    if(f_str->aa0b[start] == start && f_str->aa0e[stop] == stop) {
+      index |= (1 << f_str->aa0i[start]);
+    } else {
+      notokay |= (1 << f_str->aa0i[start]);
+    }
+#endif
+
+  }
+
+  tatprob = -1.0; 
+    
+#if defined(FASTS) || defined(FASTM)
+
+  /* for T?FASTS, we try to use what we've precalculated: */
+
+  /* with z = 3, do_opt is true, but we can use precalculated - with
+     all other z's we can use precalculated only if !do_opt */
+  if(!notokay && f_str->tatprobs != NULL) {
+    /* create our own newtat and copy f_str's tat into it */
+    index--;
+
+    newtat = (struct tat_str *) malloc(sizeof(struct tat_str));
+    if(newtat == NULL) {
+      fprintf(stderr, "Couldn't calloc memory for newtat.\n");
+      exit(1);
+    }
+    
+    memcpy(newtat, f_str->tatprobs[index], sizeof(struct tat_str));
+
+    newtat->probs = (double *) calloc(f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore + 1, sizeof(double));
+    if(newtat->probs == NULL) {
+      fprintf(stderr, "Couldn't calloc memory for newtat->probs.\n");
+      exit(1);
+    }
+
+    memcpy(newtat->probs, f_str->tatprobs[index]->probs,
+	   (f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore + 1) * sizeof(double)); 
+
+
+    tatprob = f_str->intprobs[index][sumscore - f_str->tatprobs[index]->lowscore];
+    /*
+    if (tatprob > 0.0 && tatprob < 1e-50) {
+      fprintf(stderr," tatprob[%d][%d] near zero: %lf\n",index,sumscore - f_str->tatprobs[index]->lowscore,tatprob);
+    }
+    */
+  } else { /* we need to recalculate from scratch */
+#endif
+
+    /* for T?FASTF, we're always recalculating from scratch: */
+
+    query = (unsigned char *) calloc(length, sizeof(unsigned char));
+    if(query == NULL) {
+      fprintf(stderr, "Couldn't calloc memory for query.\n");
+      exit(1);
+    }
+    
+#if defined(FASTS) || defined(FASTM)
+    start = this->vp->start - this->vp->dp + f_str->noff;
+    for(i = 0, j = 0 ; i < length ; i++) {
+      query[j++] = aa0[start + i];
+    }
+#else
+    for(i = 0, j = 0 ; i < n0 ; i++) {
+      if (this->vp->used[i] == 1) {query[j++] = aa0[i];}
+    }
+#endif
+
+    /*  calc_priors - not currently implemented for aa1 dependent */
+    /* 
+    if( (do_opt && zsflag == 2) || zsflag == 4 ) {    
+      priors = &my_priors[0];
+      calc_priors(priors, f_str, aa1, n1, pseudocts);
+    } else {
+      priors = f_str->priors;
+    }
+    */
+
+    priors = f_str->priors;
+    oldtat = (last != NULL ? last->tat : NULL);
+
+    generate_tatprobs(query, 0, length - 1, priors, pam2, nsq, &newtat, oldtat);
+
+    free(query);
+#if defined(FASTS) || defined(FASTM)
+  } /* close the FASTS-specific if-else from above */
+#endif
+
+  this->newtat = newtat;
+  
+  if(tatprob < 0.0) { /* hasn't been set by precalculated FASTS intprobs */
+
+    /* integrate probabilities >= sumscore */
+    tatprobsptr = newtat->probs;
+
+    is = i = newtat->highscore - newtat->lowscore;
+    N = sumscore - newtat->lowscore;
+
+    right_tatprob = 0;
+    for ( ;  i >= N; i--) {
+      right_tatprob += tatprobsptr[i];
+    }
+
+    left_tatprob = tatprobsptr[0];
+    for (i = 1 ; i < N ; i++ ) {
+      left_tatprob += tatprobsptr[i];
+    }
+
+    if (right_tatprob < left_tatprob) {tatprob = right_tatprob;}
+    else {tatprob = 1.0 - left_tatprob;}
+
+    tatprob /= (right_tatprob+left_tatprob);
+  }
+
+  if (maxlength > 0) {
+    n1 += 2 * (maxlength - 1);
+  }
+
+#ifndef FASTM
+  tatprob *= factorial(n1 - sumlength + seg, n1 - sumlength);
+#else
+  tatprob *= power(n1 - sumlength,seg)/(1<<seg);
+#endif
+
+  if(tatprob > 0.01)
+    tatprob = 1.0 - exp(-tatprob);
+ 
+  return tatprob;
+}
+
+/* generates a set of probabilities for every score produced by the
+   query */
+void
+generate_tatprobs(const unsigned char *query,
+		  int begin,
+		  int end,
+		  double *priors,
+		  int **pam2,
+		  int nsq,
+		  struct tat_str **tatarg,
+		  struct tat_str *oldtat)
+{
+
+  int i, j, k, l, m, n, N, highscore, lowscore;
+  int *lowrange = NULL, *highrange = NULL;
+  int show_probs = 0;
+  double *probs = NULL, *newprobs = NULL, *priorptr, tmp;
+  struct tat_str *tatprobs = NULL;
+  int *pamptr, *pamptrsave;
+  int last_zero;
+
+  if((tatprobs = (struct tat_str *) calloc(1, sizeof(struct tat_str)))==NULL) {
+    fprintf(stderr, "Couldn't allocate individual tatprob struct.\n");
+    exit(1);
+  }
+
+  n = end - begin + 1;
+
+  if ( (lowrange = (int *) calloc(n, sizeof(int))) == NULL ) {
+    fprintf(stderr, "Couldn't allocate memory for lowrange.\n");
+    exit(1);
+  }
+  
+  if ( (highrange = (int *) calloc(n, sizeof(int))) == NULL ) {
+    fprintf(stderr, "Couldn't allocate memory for highrange.\n");
+    exit(1);
+  }
+
+  /* calculate the absolute highest and lowest score possible for this */
+  /* segment.  Also, set the range we need to iterate over at each position */
+  /* in the query: */
+  if(oldtat == NULL) {
+    highscore = lowscore = 0;
+  } else {
+    highscore = oldtat->highscore;
+    lowscore = oldtat->lowscore;
+  }
+
+  for ( i = 0 ; i < n ; i++ ) {
+
+    if (query[begin+i] == 0) break;
+
+    highscore =
+      (highrange[i] = highscore + max_score(pam2[query[begin + i]], nsq));
+
+    lowscore =
+      (lowrange[i] = lowscore + min_score(pam2[query[begin + i]], nsq));
+
+    /*
+    fprintf(stderr, "i: %d, max: %d, min: %d, high[i]: %d, low[i]: %d, high: %d, low: %d, char: %d\n",
+	    i,
+	    max_score(pam2[query[begin + i]], nsq),
+	    min_score(pam2[query[begin + i]], nsq),
+	    highrange[i], lowrange[i],
+	    highscore, lowscore, query[begin + i]); 
+    */
+  }
+
+  /*
+  if (lowscore == -55 && highscore==34) {
+    show_probs = 1;
+    fprintf(stderr,"Range: low: %d -- high %d\n",lowscore, highscore);
+  }
+  */
+  /* allocate an array of probabilities for all possible scores */
+  /* i.e. if highest score possible is 50 and lowest score possible */
+  /* is -20, then there are 50 - (-20) + 1 = 71 possible different */
+  /* scores (including 0): */
+  N = highscore - lowscore;
+  if ( (probs = (double *) calloc(N + 1, sizeof(double))) == NULL ) {
+    fprintf(stderr, "Couldn't allocate probability matrix : %d.\n", N + 1);
+    exit(1);
+  }
+
+  if(oldtat == NULL) {
+    /* for the first position, iterate over the only possible scores, */
+    /* summing the priors for the amino acids that can yield each score. */
+    pamptr = pam2[query[begin]];
+    for ( i = 1 ; i < nsq ; i++ ) {
+      if(priors[i] > 0.0) {
+	/*
+	fprintf(stderr," updated: %d(%d)[%d]: %0.4g\n",pamptr[i]-lowscore,pamptr[i],i,priors[i]);
+	*/
+	probs[(pamptr[i] - lowscore)] += priors[i];
+      }
+    }
+  } else {
+    /* Need to copy the data out of oldtat->probs into probs */
+    memcpy( &probs[oldtat->lowscore - lowscore],
+	    oldtat->probs,
+	    (oldtat->highscore - oldtat->lowscore + 1) * sizeof(double));
+  }
+
+  if ( (newprobs = (double *) calloc(N + 1, sizeof(double))) == NULL ) {
+    fprintf(stderr, "Couldn't allocate newprobs matrix.\n");
+    exit(1);
+  }
+
+  /* now for each remaining residue in the segment ... */
+  /* i is the position in the query */
+  for ( i = (oldtat == NULL ? 1 : 0) ; i < n ; i++ ) {
+
+    pamptrsave = pam2[query[begin + i]];
+
+    /* ... calculate new probability distribution .... */
+    /* ... for each possible score j (limited to current range) ... */
+    /* j is the possible score */
+    for ( j = lowrange[i] - lowscore,
+	    k = highrange[i] - lowscore ;
+	  j <= k ;
+	  j++ ) {
+      
+      tmp = 0.0;
+      pamptr = &pamptrsave[1];
+      priorptr = &priors[1];
+      /* ... for each of the possible alignment scores at this position ... */
+      for ( l = 1 ;
+	    l < nsq ;
+	    l++) {
+
+	/*
+	if (*priorptr == 0.0) {
+	  priorptr++;
+	  pamptr++;
+	  continue;
+	}
+	*/
+	/* make sure we don't go past highest possible score, or past
+           the lowest possible score; not sure why this can happen */
+	m = j - *pamptr++;
+	if ( m <= N && m >= 0 ) {
+	  /* update the probability of getting score j: */
+	  tmp += probs[m] * *priorptr;
+	  /*
+	  if (show_probs && j==N) 
+	    fprintf(stderr,"probs[%d]: %lg i: %d j: %d l: %d(%c) pam2: %d this: %lg prior: %lg tmp: %lg\n",m,probs[m],i,j,l,NCBIstdaa[l],*(pamptr-1),(probs[m]* *priorptr),*priorptr, tmp);
+	  */
+	}
+	priorptr++;
+      }
+
+      /*      if (tmp >= 0.0 && tmp < 1e-50) {
+	fprintf(stderr," tmp[%d] near zero: %lg\n",j,tmp);
+	} */
+      newprobs[j] += tmp;
+    }
+
+    /* save the new set of probabilities, get rid of old; we don't
+       necessarily have to copy/clear all N+1 slots, we could use
+       high/low score boundaries -- not sure that's worth the
+       effort. */
+    memcpy(probs, newprobs, (N + 1) * sizeof(double));
+    memset(newprobs, 0, (N + 1) * sizeof(double));
+  }
+
+  last_zero = -100;
+  for (i=N; i < N+1; i++) {
+    tmp = probs[i];
+    if (tmp >= 0.0 && tmp < 1e-200) {
+      if (i == 1 || i == N) {
+	fprintf(stderr," *** tatstats.c/generate_tatprobs() probs[%d] near zero: %lg\n",i+lowscore,tmp);
+      }
+      last_zero = i;
+    }
+  }
+
+  free(newprobs);
+  free(highrange);
+  free(lowrange);
+
+  tatprobs->probs = probs;
+  /*  tatprobs->intprobs = intprobs; */
+  tatprobs->lowscore = lowscore;
+  tatprobs->highscore = highscore;
+
+  *tatarg = tatprobs;
+}
diff --git a/src/tatstats.h b/src/tatstats.h
new file mode 100644
index 0000000..9c5899a
--- /dev/null
+++ b/src/tatstats.h
@@ -0,0 +1,160 @@
+/* $Id: tatstats.h 1254 2014-01-29 16:03:40Z wrp $ */
+
+#ifndef MAXSQ
+#include "param.h"
+#endif
+
+#ifndef MAXSAV
+#ifdef FASTS
+#define MAXSAV 25
+#else
+#define MAXSAV 10
+#endif
+#endif
+
+#if defined(IBM_AIX) && defined(MAXSEG)
+#undef MAXSEG
+#endif
+#define MAXSEG 30
+
+struct savestr {
+  int     score;		/* pam score with segment optimization */
+  int     score0;		/* pam score of best single segment */
+  int     start0;		/* score from global match */
+  int     dp;			/* diagonal of match */
+  int     start;		/* start of match in lib seq */
+  int     stop;                 /* end of match in lib seq */
+  int     exact;		/* exact match */
+#if defined(FASTF)
+  int     *used;                /* array of positions in aa0 that were used */
+#endif
+};
+
+struct dstruct	{	/* diagonal structure for saving current run */
+   int     score;	/* hash score of current match */
+   int     start;	/* start of current match */
+   int     stop;	/* end of current match */
+   struct savestr *dmax;   /* location in vmax[] where best score data saved */
+};
+
+struct tat_str {
+  double *probs;
+  int lowscore;
+  int highscore;
+};
+
+struct f_struct {
+  struct dstruct *diag;
+  struct savestr *vmax;	/* best matches saved for one sequence */
+  struct savestr **vptr;
+  struct slink *sarr;
+  struct savestr *lowmax;
+  int maxsav;	/* max number of peptide alignments saved in search */
+  int maxsav_w;	/* max number of peptide alignments saved in alignment */
+  int shuff_cnt;
+  int nsave;
+  int ndo;
+  int noff;
+  int nm0;		/* number of fragments */
+#if defined(FASTS) || defined(FASTM)
+  int *nmoff;		/* offset number, start */
+  int *nm_u;
+  int *aa0b;	        /* beginning of each segment */
+  int *aa0e;		/* end of each segment */
+  int *aa0i;		/* index of each segment */
+  int *aa0s;		/* max score of each segment */
+  int *aa0l;		/* longest possible peptide match */
+#else
+  int nmoff;		/* offset number, start */
+  unsigned char *aa0;
+  int aa0ix;
+#endif
+  unsigned char *aa0t;	/* temp location for peptides */
+  int aa0t_off;		/* offset between aa0 and aa0t for correct coordinates */
+  int *aa0ti;		/* temp index for peptides */
+  int hmask;			/* hash constants */
+  int *pamh1;			/* pam based array */
+  int *pamh2;			/* pam based kfact array */
+#if defined(FASTS) || defined(FASTM)
+  int *link, *harr, *l_end;		/* hash arrays */
+#else
+  struct hlstr *link, *harr;            /* hash arrays */
+#endif
+  int kshft;			/* shift width */
+  int nsav, lowscor;		/* number of saved runs, worst saved run */
+  unsigned char *aa1x;		/* contains translated codons 111222333 */
+  unsigned char *aa1y;		/* contains translated codons 123123123 */
+  int n10;
+  int *waa;
+  int *res;
+  int max_res;
+  double *priors;
+#if defined(FASTS) || defined(FASTM)
+  struct tat_str **tatprobs;          /* array of pointers to tat structs */
+  double **intprobs;                  /* array of integrated tatprobs */
+#endif
+  int dotat;
+  double spacefactor;
+};
+
+struct slink {
+  int     score;
+  double  tatprob;
+  struct tat_str *tat;
+  struct tat_str *newtat;
+  struct savestr *vp;
+  struct slink *next;
+  struct slink *prev;
+};
+
+struct segstr {
+  double tatprob;
+  int length;
+};
+
+void generate_tatprobs(const unsigned char *query,
+		       int begin,
+		       int end,
+		       double *priors,
+		       int **pam2,
+		       int nsq,
+		       struct tat_str **tatarg, struct tat_str *oldtat);
+
+double
+calc_tatusov ( struct slink *last,
+	       struct slink *this,
+	       const unsigned char *aa0, int n0,
+	       const unsigned char *aa1, int n1,
+	       int **pam2, int nsq,
+	       struct f_struct *f_str,
+	       int pseudocts,
+	       int do_opt,
+	       int zsflag
+	       );
+
+double seg_tatprob(struct slink *start,
+		   const unsigned char *aa0,
+		   int n0,
+		   const unsigned char *aa1,
+		   int n1,
+		   struct f_struct *f_str,
+		   struct pstruct *ppst,
+		   int do_opt);
+
+void calc_priors(double *priors,
+		 struct pstruct *ppst,
+		 struct f_struct *f_str,
+		 const unsigned char *aa1,
+		 int n1, int pseudocts);
+
+double factorial (int a, int b);
+
+int max_score(int *scores, int nsq);
+
+int min_score(int *scores, int nsq);
+
+double calc_spacefactor(struct f_struct *f_str);
+
+void linreg(double *lnx, double *x, double *lny,
+	    int n,
+	    double *a, double *b, double *c, int start);
diff --git a/src/thr_buf_structs.h b/src/thr_buf_structs.h
new file mode 100644
index 0000000..ea5324e
--- /dev/null
+++ b/src/thr_buf_structs.h
@@ -0,0 +1,119 @@
+/* thr_bufs.h - structures for passing buffers of sequences to threads */
+
+/* $Id: thr_buf_structs.h 793 2011-07-03 00:03:55Z wrp $ */
+
+/* copyright (c) 2007, 2014 by William R. Pearson and The Rector &
+   Vistors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+#include <sys/types.h>
+
+struct thr_str {
+  int worker;
+  void *status;
+  int max_work_buf;
+  int qframe;
+  struct pstruct *ppst;
+  const struct mngmsg *m_msp;
+  int qshuffle;
+  int nrshuffle;
+  unsigned char *aa0;	/* query sequence */
+  int n0;		/* query sequence length */
+  int nm0;
+  int max_tot;
+  char info_lib_range[MAX_SSTR];
+  void **f_str_ap;
+};
+
+
+/* this structure passes library sequences to the worker threads
+   and returns scores */
+
+struct buf2_hdr_s {
+  int buf2_cnt;		/* number of buf2 records */
+  /*  int buf2_stats_cnt; */	/* number of stats scores */
+  int buf2_type;	/* search, shuffle, etc */
+  int stop_work;
+  int have_data;
+  int have_results;
+  int have_best_save;
+  int shuff_cnt;
+  int worker_idx;
+  struct seq_record *seq_b;	/* pointer to contig. actual sequence data */
+  struct mseq_record *mseq_b;	/* pointer to contig. seq. meta-info */
+  struct seqr_chain *my_chain;	/* pointer to the seqr_chain providing data for this buffer */
+  int seqr_cnt;		/* count of seq_records, which can be less than buf2_cnt */
+  int seq_record_continuous;	/* contiguous seq_record/aa1b */
+  unsigned char *aa1b_start;	/* pointer to contiguous aa1b buffer */
+  int aa1b_size;	/* allocated size of aab buffer -- needed for PCOMPLIB */
+  int aa1b_used;	/* used size of aab buffer -- needed for PCOMPLIB */
+  int my_id;
+  int my_worker;
+};
+
+
+/* this structure contains a single sequence record, with all the
+   information necessary to calculate a score */
+
+struct buf2_data_s {
+  struct seq_record *seq;	/* pointer to sequence */
+  struct mseq_record *mseq;	/* pointer to sequence meta data */
+  int frame;			/* query frame used for returning results, indexes into aa0[], f_str[]
+				   also used in best_stats.h bbp->frame */
+  int repeat_thresh;		/* threshold for sub-alignment */
+  int stats_idx;		/* where to save for statistics */
+  int seq_dup;			/* duplicate entry for alternate frame */
+  struct beststr *best_save;	/* if beststr is pointing to this record, where is it saved? */
+};
+
+struct buf2_res_s {
+  struct rstruct rst;
+  int is_valid_stat;
+  int qr_score;
+  struct rstruct r_rst;
+  double qr_escore;
+};
+
+/*
+struct buf2_stats_s {
+  int stats_idx;
+  int valid_stat;
+  int iscore;
+  int n1;
+  double escore;
+  double comp;
+  double H;
+};
+*/
+
+struct buf2_ares_s {
+  int have_ares;
+  struct a_res_str *a_res;
+  int best_idx;
+};
+
+#define BUF2_DOWORK 0x1
+#define BUF2_DOSHUF 0x2
+#define BUF2_DOOPT 0x4
+#define BUF2_DOALIGN 0x8
+
+struct buf_head {
+  struct buf2_hdr_s hdr;		/* meta-information */
+  struct buf2_data_s *buf2_data;	/* input (sequence) datat */
+  struct buf2_res_s *buf2_res; 		/* score type results */
+  /*  struct buf2_stats_s *buf2_stats; */	/* statistics values */
+  struct buf2_ares_s *buf2_ares;	/* alignment results */
+  struct score_count_s s_cnt_info;	/* statitics info */
+};
diff --git a/src/thr_bufs2.h b/src/thr_bufs2.h
new file mode 100644
index 0000000..d881ca2
--- /dev/null
+++ b/src/thr_bufs2.h
@@ -0,0 +1,41 @@
+/***************************************/
+/* thread global variable declarations */
+/***************************************/
+
+/* $Id: thr_bufs2.h 625 2011-03-23 17:21:38Z wrp $ */
+
+#ifndef MAX_WORKERS
+#define MAX_WORKERS 2
+#endif
+
+#ifndef XTERNAL
+struct buf_head **worker_buf;  /* pointers to full buffers */
+struct buf_head **reader_buf;  /* pointers to empty buffers */
+
+/* protected by worker_mutex/worker_cond_var */
+ /* indices into full-buffers ptrs */
+int worker_buf_workp;	/* modified by get_wbuf() */
+int worker_buf_readp;	/* modified by put_rbuf() */
+int num_worker_bufs;
+int reader_done;
+
+/* protected by reader_mutex/reader_cond var */
+ /* indices into empty-buffers ptrs */
+int reader_buf_workp;	/* modified by put_wbuf() */
+int reader_buf_readp;	/* modified by get_rbuf(), main()-- after rbuf_wait */
+int num_reader_bufs;
+int reader_wait;
+
+/* protected by start_mutex/start_cont_var */
+int start_thread=1;        /* start-up predicate, 0 starts */
+#else
+extern struct buf_head **worker_buf;
+extern struct buf_head **reader_buf;
+extern int num_worker_bufs, reader_done;
+extern int num_reader_bufs, reader_wait;
+extern int worker_buf_workp, worker_buf_readp;
+extern int reader_buf_workp, reader_buf_readp;
+
+extern int start_thread;
+#endif
+
diff --git a/src/uascii.h b/src/uascii.h
new file mode 100644
index 0000000..983a2dd
--- /dev/null
+++ b/src/uascii.h
@@ -0,0 +1,59 @@
+/* Concurrent read version */
+/*	ascii.gbl	ascii translation to amino acids */
+/*	modified 10-Mar-1987 for B, Z	*/
+
+/* $Id: uascii.h 989 2012-07-24 19:37:38Z wrp $ */
+
+#define NA 123
+#define NANN 60	/* changed 24-July-2012 because NCBIstdaa_ext_n = 56 */
+#define ESS 59	/* code for ',' in FASTS,FASTF, FASTM */
+#define EL 125
+#define ES 126
+#define AAMASK 127
+
+#ifndef XTERNAL
+/*       0  1  2  3  4  5  6  7  8  9  10 11 12 13 14 15	*/
+/* 32	    !  "  #  $  %  &  '  (  )  *  +  ,  -  .  / 	*/
+/* 48	 0  1  2  3  4  5  6  7  8  9  :  ;  <  =  >  ? 	*/
+/* 64	 @  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O		*/ 
+/* 80	 P  Q  R  S  T  U  V  W  X  Y  Z  [  \  ]  ^  _		*/ 
+/* 96	 `  a  b  c  d  e  f  g  h  i  j  k  l  m  n  o 	*/
+/*112	 p  q  r  s  t  u  v  w  x  y  z  {  |  }  ~  ^?	*/ 
+
+int aascii[128]={
+	EL,NA,NA,NA,NA,NA,NA,NA,NA,NA,EL,NA,NA,EL,NA,NA,	/* 15 */
+	NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,	/* 31 */
+	NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,24,NA,NA,NA,NA,NA,	/* 47 */
+	NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,	/* 63 */
+	NA, 1,21, 5, 4, 7,14, 8, 9,10,25,12,11,13, 3,26,	/* 79 */
+	15, 6, 2,16,17,27,20,18,23,19,22,NA,NA,NA,NA,NA,	/* 95 */
+	NA, 1,21, 5, 4, 7,14, 8, 9,10,25,12,11,13, 3,26,	/*111 */
+	15, 6, 2,16,17,27,20,18,23,19,22,NA,NA,NA,NA,NA};	/*127 */
+
+int nascii[128]={
+/*	 0  1  2  3  5  6  7  8  9 10 11 12 13 14 15 15
+	 @  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O
+	 P  Q  R  S  T  U  V  W  X  Y  Z		*/
+	EL,NA,NA,NA,NA,NA,NA,NA,NA,NA,EL,NA,NA,EL,NA,NA,
+	NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
+	NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,ES,NA,NA,16,NA,NA,
+	NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,ES,NA,NA,ES,NA,
+	NA, 1,15, 2,12,NA,NA, 3,13,NA,NA,11,NA, 8,16,NA,
+	 6, 7, 6,10, 4, 5,14, 9,17, 7,NA,NA,NA,NA,NA,NA,
+	NA, 1,15, 2,12,NA,NA, 3,13,NA,NA,11,NA, 8,16,NA,
+	 6, 7, 6,10, 4, 5,14, 9,17, 7,NA,NA,NA,NA,NA,NA};
+
+int *pascii;
+int qascii[128];
+int lascii[128];
+int l_ann_ascii[128];
+#else
+#define AAMASK 127
+extern int aascii[128];
+extern int nascii[128];
+
+extern int *pascii;
+extern int qascii[128];
+extern int lascii[128];
+extern int l_ann_ascii[128];
+#endif
diff --git a/src/upam.h b/src/upam.h
new file mode 100644
index 0000000..e36d59b
--- /dev/null
+++ b/src/upam.h
@@ -0,0 +1,872 @@
+/* Concurrent read version */
+/*	20-June-1986	universal pam file */
+
+/* $Id: upam.h 1124 2013-03-13 20:24:57Z wrp $ */
+/* $Revision: 1124 $  */
+
+/* modified to accomodate both lower and upper case amino acid numbers
+   as a result MAXSQ = 50
+*/
+
+
+#ifndef UPAM_GBL_DEF
+#define UPAM_GBL_DEF
+
+#define TERM 25
+#define EOSEQ 0
+#define MAXSQ 60	/* increased to accomodate ESS=57 */
+#define MAXUC 28
+#define MAXLC 56
+
+#define MAXHASH 32
+#define NMAP MAXHASH+1
+
+struct std_pam_str {
+  char abbrev[6];	/* argument name */
+  char name[10];	/* canonical name */
+  int *pam;		/* data pointer */
+  float scale;		/* pam scale (ln(2)/2, ln(2)/3 */
+  float ulambda;	/* lambda (ungapped) */
+  float entropy;	/* bits/position */
+  float tfract_id;	/* target fract id */
+  int gdel, ggap;	/* gdel, ggap */
+};
+
+#ifndef XTERNAL
+
+int pamoff=0;
+
+/*extern int gdelval, ggapval;*/
+
+/* char sqnam[]="aa"; */
+/* char sqtype[]="protein"; */
+
+
+/* this alphabet covers 56 values, so that libraries can include lower-case characters which will be re-mapped unless -S */
+char *NCBIstdaa = "-ABCDEFGHIKLMNPQRSTVWXYZU*OJ";
+char *NCBIstdaa_l = "-ABCDEFGHIKLMNPQRSTVWXYZU*OJ-ABCDEFGHIKLMNPQRSTVWXYZU*OJ";
+char NCBIstdaa_n = 28;
+
+char *NCBIstdaa_ext = "-ABCDEFGHIKLMNPQRSTVWXYZU*OJ-abcdefghiklmnpqrstvwxyzu*oj";
+char NCBIstdaa_ext_n = 56;
+
+/* the residue ordering used for the internal pam matrices */
+char *pam_sq;
+int pam_sq_n;
+char *apam_sq  = "\0ARNDCQEGHILKMFPSTWYVBZX*";
+int apam_sq_n = 25;
+char *npam_sq =  "\0ACGTURYMWSKDHVBNX";
+int npam_sq_n = 17;
+
+/* these values have been replaced by NCBIstdaa and *pam_sq */
+/*
+char aa[MAXSQ+1]  = {"\0ARNDCQEGHILKMFPSTWYVBZX*JARNDCQEGHILKMFPSTWYVBZX*J\0"};
+char aax[MAXSQ+1] = {"\0ARNDCQEGHILKMFPSTWYVBZX*Jarndcqeghilkmfpstwyvbzx*j\0"};
+*/
+char pssm_aa[26]  = {"\0ARNDCQEGHILKMFPSTWYVBZX*"};
+
+char othx[MAXSQ+1] = {"OUou\0"};
+int noth = 2;
+int nothx = 4;
+
+int naa = 25;	/* this should be calculated from aa[] */
+int naax = 50;
+
+/* haa[] used to map all valid amino acid codes into a hash value;
+   now, there is an additional hash value - not-mapped - NM */
+
+/* this has been expanded to accomodate '*' */
+  /* 0 A R N D C Q E G H  I  L  K  M  F  P  S  T  W  Y  V B Z X    *    J */
+/*
+int haa[MAXSQ+1] = {
+  NMAP,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,3,7,NMAP,NMAP,10,
+       1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,3,7,NMAP,NMAP,10};
+*/
+int h_NCBIstdaa[MAXSQ+1] = {
+  /* - A  B C D E F G H I  K  L  M  N  P  Q  R  S  T  V  W    X  Y  Z  U  *    O  J */
+  NMAP,1,13,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,NMAP,21, 5, 3,NMAP,10,11,
+  NMAP,1,13,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,NMAP,21, 5, 3,NMAP,10,11
+};
+
+int h_NCBIstdaa_ext[MAXSQ+1] = {
+  /* - A  B C D E F G H I  K  L  M  N  P  Q  R  S  T  V  W    X  Y  Z  U  *    O  J */
+  NMAP,1,13,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,NMAP,21, 5, 3,NMAP,10,11,
+  NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,
+  NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,
+};
+
+/*
+int haax[MAXSQ+1] = {
+  NMAP, 1,2,3,4,5,6,7,8,9,
+  10,11,12,13,14,15,16,17,18,19,
+  20,3,7,NMAP,NMAP,10,NMAP,NMAP,NMAP,NMAP,
+  NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,
+  NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,
+  NMAP};
+*/
+
+/*
+  PAM 250 substitution matrix, scale = ln(2)/3 = 0.231049
+  Expected score = -0.844, Entropy = 0.354 bits
+  Lowest score = -8, Highest score = 17
+*/
+int apam250[450] = {
+ 2,
+-2, 6,
+ 0, 0, 2,
+ 0,-1, 2, 4,
+-2,-4,-4,-5,12,
+ 0, 1, 1, 2,-5, 4,
+ 0,-1, 1, 3,-5, 2, 4,
+ 1,-3, 0, 1,-3,-1, 0, 5,
+-1, 2, 2, 1,-3, 3, 1,-2, 6,
+-1,-2,-2,-2,-2,-2,-2,-3,-2, 5,
+-2,-3,-3,-4,-6,-2,-3,-4,-2, 2, 6,
+-1, 3, 1, 0,-5, 1, 0,-2, 0,-2,-3, 5,
+-1, 0,-2,-3,-5,-1,-2,-3,-2, 2, 4, 0, 6,
+-4,-4,-4,-6,-4,-5,-5,-5,-2, 1, 2,-5, 0, 9,
+ 1, 0,-1,-1,-3, 0,-1,-1, 0,-2,-3,-1,-2,-5, 6,
+ 1, 0, 1, 0, 0,-1, 0, 1,-1,-1,-3, 0,-2,-3, 1, 2,
+ 1,-1, 0, 0,-2,-1, 0, 0,-1, 0,-2, 0,-1,-3, 0, 1, 3,
+-6, 2,-4,-7,-8,-5,-7,-7,-3,-5,-2,-3,-4, 0,-6,-2,-5,17,
+-3,-4,-2,-4, 0,-4,-4,-5, 0,-1,-1,-4,-2, 7,-5,-3,-3, 0,10,
+ 0,-2,-2,-2,-2,-2,-2,-1,-2, 4, 2,-2, 2,-1,-1,-1, 0,-6,-2, 4,
+ 0,-1, 2, 3,-4, 1, 2, 0, 1,-2,-3, 1,-2,-5,-1, 0, 0,-5,-3,-2, 2,
+ 0, 0, 1, 3,-5, 3, 3,-1, 2,-2,-3, 0,-2,-5, 0, 0,-1,-6,-4,-2, 2, 3,
+ 0,-1, 0,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-2,-1, 0, 0,-4,-2,-1,-1,-1,-1,
+ 0,-1, 0,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-2,-1, 0, 0,-4,-2,-1,-1,-1,-1, 8};
+
+/*
+ This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
+ PAM 120 substitution matrix, scale = ln(2)/2 = 0.346574
+ Expected score = -1.64, Entropy = 0.979 bits
+ Lowest score = -8, Highest score = 12
+*/
+int apam120[450] = {
+  3,
+ -3, 6,
+  0,-1, 4,
+  0,-3, 2, 5,
+ -3,-4,-5,-7, 9,
+ -1, 1, 0, 1,-7, 6,
+  0,-3, 1, 3,-7, 2, 5,
+  1,-4, 0, 0,-5,-3,-1, 5,
+ -3, 1, 2, 0,-4, 3,-1,-4, 7,
+ -1,-2,-2,-3,-3,-3,-3,-4,-4, 6,
+ -3,-4,-4,-5,-7,-2,-4,-5,-3, 1, 5,
+ -2, 2, 1,-1,-7, 0,-1,-3,-2,-2,-4, 5,
+ -2,-1,-3,-4,-6,-1,-4,-4,-4, 1, 3, 0, 8,
+ -4,-4,-4,-7,-6,-6,-6,-5,-2, 0, 0,-6,-1, 8,
+  1,-1,-2,-2,-3, 0,-1,-2,-1,-3,-3,-2,-3,-5, 6,
+  1,-1, 1, 0,-1,-2,-1, 1,-2,-2,-4,-1,-2,-3, 1, 3,
+  1,-2, 0,-1,-3,-2,-2,-1,-3, 0,-3,-1,-1,-4,-1, 2, 4,
+ -7, 1,-5,-8,-8,-6,-8,-8,-5,-7,-5,-5,-7,-1,-7,-2,-6, 12,
+ -4,-6,-2,-5,-1,-5,-4,-6,-1,-2,-3,-6,-4, 4,-6,-3,-3,-1, 8,
+  0,-3,-3,-3,-2,-3,-3,-2,-3, 3, 1,-4, 1,-3,-2,-2, 0,-8,-3, 5,
+  0,-2, 3, 4,-6, 0, 3, 0, 1,-3,-4, 0,-4,-5,-2, 0, 0,-6,-3,-3, 4,
+ -1,-1, 0, 3,-7, 4, 4,-2, 1,-3,-3,-1,-2,-6,-1,-1,-2,-7,-5,-3, 2, 4,
+ -1,-2,-1,-2,-4,-1,-1,-2,-2,-1,-2,-2,-2,-3,-2,-1,-1,-5,-3,-1,-1,-1,-2,
+ -1,-2,-1,-2,-4,-1,-1,-2,-2,-1,-2,-2,-2,-3,-2,-1,-1,-5,-3,-1,-1,-1,-2, 6};
+
+/*
+#
+#  VTML_10
+#
+# This matrix was produced from: vtml_10qij.mat using robinson2.back background frequencies
+#
+# VTML_10 substitution matrix, Units = bits/2.0
+# Expected score = -3.859581 bits; Entropy = 3.462930 bits
+# Target fraction identity = 0.9107
+# Lowest Score = -20, Highest Score= 12
+#
+*/
+int a_vt10[450] = {
+   7,
+  -9,   8,
+  -8,  -7,   9,
+  -8, -16,  -3,   8,
+  -5,  -9, -10, -18,  11,
+  -7,  -4,  -6,  -6, -17,   9,
+  -7, -14,  -7,  -3, -18,  -3,   8,
+  -6,  -9,  -7,  -8, -10, -10,  -8,   7,
+  -9,  -5,  -4,  -6,  -9,  -4,  -8,  -9,  11,
+  -9, -10, -11, -15,  -8, -12, -12, -19, -11,   8,
+  -9, -10, -11, -19, -17,  -8, -10, -13,  -8,  -3,   7,
+  -8,  -2,  -5,  -7, -17,  -4,  -4,  -9,  -7, -10, -10,   8,
+  -7,  -8,  -9, -11,  -7,  -6, -10, -12, -15,  -3,  -2,  -7,  11,
+ -10, -12, -13, -20, -17, -10, -18, -13,  -6,  -6,  -5, -18,  -4,   9,
+  -6,  -8, -10,  -8, -11,  -7,  -8, -10,  -8, -12,  -9,  -7, -12, -10,   8,
+  -4,  -7,  -4,  -7,  -5,  -6,  -6,  -6,  -6, -11, -10,  -7, -10,  -8,  -6,   7,
+  -5,  -8,  -5,  -8,  -8,  -7,  -7, -10,  -7,  -6,  -9,  -6,  -6, -10,  -8,  -3,   8,
+ -11, -10, -12, -12, -20, -19, -20, -11,  -8,  -8,  -8, -10, -16,  -4, -11, -10, -19,  12,
+ -10,  -9,  -8, -17,  -7, -16,  -9, -13,  -3,  -9,  -8, -10, -15,  -1, -19,  -8, -10,  -4,  10,
+  -5, -11, -11, -11,  -6,  -9,  -9, -12, -10,  -1,  -5,  -9,  -5,  -8, -10, -10,  -6, -17,  -9,   8,
+  -8, -11,   3,   2, -14,  -6,  -5,  -7,  -5, -13, -15,  -6, -10, -16,  -9,  -5,  -6, -12, -12, -11,   8,
+  -7,  -9,  -6,  -4, -17,   3,   2,  -9,  -6, -12,  -9,  -4,  -8, -14,  -7,  -6,  -7, -19, -12,  -9,  -4,   8,
+  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,   -1
+};
+
+/*
+#
+#  VTML_20
+#
+# This matrix was produced from: vtml_20qij.mat using robinson2.back background frequencies
+#
+# VTML_20 substitution matrix, Units = bits/2.0
+# Expected score = -2.889610 bits; Entropy = 2.921119 bits
+# Target fraction identity = 0.8312
+# Lowest Score = -16, Highest Score= 12
+#
+*/
+int a_vt20[450] = {
+   7,
+  -7,   8,
+  -6,  -5,   8,
+  -6, -12,  -1,   8,
+  -3,  -7,  -8, -14,  11,
+  -5,  -2,  -4,  -4, -13,   8,
+  -5, -10,  -5,  -1, -14,  -1,   7,
+  -4,  -7,  -5,  -6,  -8,  -8,  -6,   7,
+  -7,  -3,  -3,  -5,  -7,  -2,  -6,  -7,  10,
+  -7,  -8,  -9, -13,  -6, -10, -10, -15,  -9,   8,
+  -7,  -8,  -9, -15, -13,  -7,  -8, -11,  -7,  -1,   6,
+  -6,   0,  -3,  -5, -14,  -2,  -2,  -7,  -5,  -8,  -8,   8,
+  -5,  -6,  -7,  -9,  -5,  -5,  -8, -10, -12,  -1,   0,  -5,  10,
+  -8, -10, -10, -16, -13,  -8, -14, -11,  -5,  -4,  -3, -14,  -2,   9,
+  -4,  -6,  -8,  -6,  -9,  -5,  -6,  -8,  -6, -10,  -7,  -6, -10,  -8,   8,
+  -2,  -6,  -2,  -5,  -4,  -4,  -5,  -4,  -4,  -9,  -8,  -5,  -8,  -6,  -4,   7,
+  -3,  -6,  -4,  -6,  -6,  -5,  -6,  -8,  -5,  -4,  -7,  -4,  -4,  -8,  -6,  -1,   8,
+  -9,  -8, -10, -11, -16, -15, -16,  -9,  -6,  -6,  -6,  -8, -12,  -3,  -9,  -8, -15,  12,
+  -8,  -7,  -6, -14,  -5, -12,  -7, -10,  -1,  -7,  -6,  -8, -11,   1, -15,  -6,  -8,  -2,   9,
+  -3,  -9,  -9,  -9,  -4,  -7,  -7, -10,  -8,   1,  -3,  -8,  -3,  -6,  -8,  -8,  -4, -13,  -7,   7,
+  -6,  -8,   3,   3, -11,  -4,  -3,  -5,  -4, -11, -12,  -4,  -8, -13,  -7,  -3,  -5, -10, -10,  -9,   8,
+  -5,  -6,  -4,  -3, -13,   3,   3,  -7,  -4, -10,  -7,  -2,  -6, -11,  -5,  -4,  -5, -15,  -9,  -7,  -2,   7,
+  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,   -1
+};
+
+/*
+#
+#  VTML_40
+#
+# This matrix was produced from: vtml_40qij.mat using robinson2.back background frequencies
+#
+# VTML_40 substitution matrix, Units = bits/2.0
+# Expected score = -1.963330 bits; Entropy = 2.266217 bits
+# Target fraction identity = 0.6968
+# Lowest Score = -13, Highest Score= 12
+#
+*/
+int a_vt40[450] = {
+   6,
+  -5,   8,
+  -4,  -3,   8,
+  -4,  -8,   0,   7,
+  -1,  -5,  -6, -10,  11,
+  -3,   0,  -2,  -3, -10,   8,
+  -3,  -7,  -3,   1, -11,   0,   7,
+  -2,  -5,  -3,  -4,  -6,  -6,  -5,   7,
+  -5,  -1,  -1,  -3,  -5,   0,  -4,  -5,  10,
+  -5,  -6,  -7, -10,  -4,  -7,  -8, -12,  -7,   7,
+  -5,  -6,  -7, -11,  -9,  -5,  -7,  -9,  -5,   1,   6,
+  -4,   2,  -2,  -3, -10,  -1,  -1,  -5,  -3,  -6,  -6,   7,
+  -4,  -4,  -5,  -7,  -3,  -3,  -6,  -8,  -8,   0,   1,  -4,   9,
+  -6,  -8,  -8, -13, -10,  -7, -11,  -9,  -3,  -2,  -1, -10,  -1,   9,
+  -3,  -5,  -6,  -4,  -7,  -4,  -4,  -6,  -4,  -8,  -5,  -4,  -7,  -7,   8,
+   0,  -4,  -1,  -3,  -2,  -3,  -3,  -3,  -3,  -6,  -6,  -3,  -6,  -5,  -3,   6,
+  -1,  -4,  -2,  -4,  -4,  -3,  -4,  -6,  -3,  -3,  -5,  -3,  -2,  -6,  -4,   0,   7,
+  -7,  -6,  -8,  -9, -12, -12, -12,  -7,  -4,  -4,  -4,  -7,  -9,  -1,  -7,  -6, -11,  12,
+  -6,  -5,  -4, -10,  -3,  -9,  -5,  -8,   0,  -5,  -4,  -6,  -7,   2, -11,  -5,  -6,   0,   9,
+  -1,  -7,  -7,  -7,  -2,  -5,  -6,  -8,  -6,   3,  -1,  -6,  -1,  -4,  -6,  -6,  -2, -10,  -5,   7,
+  -4,  -5,   4,   3,  -8,  -2,  -1,  -3,  -2,  -8,  -9,  -2,  -6, -10,  -5,  -2,  -3,  -8,  -7,  -7,   7,
+  -3,  -4,  -2,  -1, -10,   4,   3,  -5,  -2,  -7,  -6,  -1,  -4,  -9,  -4,  -3,  -3, -12,  -7,  -5,   0,   7,
+  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,   -1
+};
+
+/*
+#
+#  VTML_80
+#
+# This matrix was produced from: vtml_80qij.mat using robinson2.back background frequencies
+#
+# VTML_80 substitution matrix, Units = bits/2.0
+# Expected score = -1.137019 bits; Entropy = 1.390771 bits
+# Target fraction identity = 0.5024
+# Lowest Score = -9, Highest Score= 12
+#
+*/
+int a_vt80[450] = {
+   5,
+  -3,   7,
+  -2,  -1,   7,
+  -2,  -5,   1,   7,
+   0,  -4,  -4,  -7,  10,
+  -2,   1,  -1,  -1,  -6,   6,
+  -2,  -3,  -1,   2,  -7,   1,   6,
+  -1,  -3,  -1,  -2,  -4,  -4,  -3,   6,
+  -3,   0,   0,  -1,  -3,   0,  -2,  -4,   9,
+  -3,  -5,  -5,  -7,  -2,  -5,  -5,  -8,  -4,   6,
+  -3,  -4,  -5,  -8,  -6,  -3,  -5,  -7,  -3,   2,   5,
+  -2,   3,  -1,  -2,  -6,   0,   0,  -3,  -1,  -4,  -4,   6,
+  -2,  -3,  -4,  -5,  -2,  -2,  -4,  -6,  -5,   2,   2,  -2,   8,
+  -4,  -5,  -6,  -9,  -6,  -5,  -7,  -7,  -1,  -1,   0,  -7,   0,   8,
+  -1,  -3,  -4,  -3,  -5,  -2,  -2,  -4,  -3,  -5,  -4,  -2,  -5,  -5,   7,
+   1,  -2,   0,  -1,  -1,  -2,  -1,  -1,  -1,  -4,  -4,  -1,  -4,  -3,  -1,   5,
+   0,  -2,  -1,  -2,  -2,  -2,  -2,  -4,  -2,  -1,  -3,  -1,  -1,  -4,  -3,   1,   6,
+  -5,  -4,  -6,  -7,  -9,  -8,  -8,  -5,  -2,  -2,  -2,  -5,  -5,   1,  -6,  -5,  -8,  12,
+  -4,  -3,  -3,  -7,  -2,  -6,  -4,  -6,   2,  -3,  -2,  -4,  -4,   3,  -8,  -3,  -4,   1,   8,
+   0,  -4,  -5,  -5,  -1,  -4,  -4,  -6,  -4,   3,   0,  -4,   0,  -2,  -4,  -4,  -1,  -6,  -4,   6,
+  -2,  -3,   4,   4,  -5,  -1,   0,  -1,   0,  -6,  -6,  -1,  -4,  -7,  -3,   0,  -1,  -6,  -5,  -5,   7,
+  -2,  -1,  -1,   0,  -6,   4,   3,  -3,  -1,  -5,  -4,   0,  -3,  -6,  -2,  -1,  -2,  -8,  -5,  -4,   0,   6,
+  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,   -1
+};
+
+/*
+#
+#  VTML_120
+#
+# This matrix was produced from: vtml_120qij.mat using robinson2.back background frequencies
+#
+# VTML_120 substitution matrix, Units = bits/2.0
+# Expected score = -0.723803 bits; Entropy = 0.938201 bits
+# Target fraction identity = 0.3748
+# Lowest Score = -7, Highest Score= 11
+#
+*/
+int a_vt120[450] = {
+   4,
+  -2,   6,
+  -1,  -1,   6,
+  -1,  -3,   2,   6,
+   0,  -3,  -3,  -5,   9,
+  -1,   1,   0,   0,  -5,   5,
+  -1,  -2,   0,   2,  -5,   1,   5,
+   0,  -2,  -1,  -1,  -3,  -3,  -2,   6,
+  -2,   0,   1,  -1,  -3,   1,  -1,  -3,   8,
+  -2,  -3,  -4,  -5,  -1,  -4,  -4,  -6,  -3,   5,
+  -2,  -3,  -4,  -6,  -4,  -3,  -4,  -5,  -2,   2,   5,
+  -1,   3,   0,  -1,  -5,   1,   1,  -2,   0,  -3,  -3,   5,
+  -1,  -2,  -3,  -4,  -1,  -2,  -3,  -5,  -3,   2,   2,  -2,   7,
+  -3,  -4,  -4,  -7,  -5,  -4,  -5,  -5,   0,   0,   1,  -5,   1,   7,
+  -1,  -2,  -2,  -2,  -4,  -1,  -2,  -3,  -2,  -4,  -3,  -1,  -4,  -4,   7,
+   1,  -1,   1,  -1,   0,  -1,  -1,  -1,  -1,  -3,  -3,  -1,  -2,  -2,  -1,   4,
+   0,  -2,   0,  -1,  -1,  -1,  -1,  -2,  -1,   0,  -2,  -1,  -1,  -3,  -2,   1,   5,
+  -4,  -3,  -5,  -6,  -7,  -6,  -6,  -4,  -1,  -2,  -2,  -4,  -4,   2,  -5,  -4,  -6,  11,
+  -3,  -2,  -2,  -5,  -1,  -4,  -3,  -5,   2,  -2,  -1,  -3,  -3,   4,  -6,  -2,  -3,   2,   8,
+   0,  -3,  -4,  -4,   0,  -3,  -3,  -4,  -3,   3,   1,  -3,   1,  -1,  -3,  -3,   0,  -4,  -2,   5,
+  -1,  -2,   4,   4,  -4,   0,   1,  -1,   0,  -4,  -5,   0,  -3,  -5,  -2,   0,   0,  -5,  -3,  -4,   6,
+  -1,   0,   0,   0,  -5,   3,   3,  -2,   0,  -4,  -3,   1,  -2,  -4,  -1,  -1,  -1,  -6,  -3,  -3,   0,   5,
+  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,   -1
+};
+
+/*
+#     VTML160
+#
+# This matrix was produced with scripts written by
+# Tobias Mueller and Sven Rahmann [June-2001].
+#
+# VTML160 substitution matrix, Units = Third-Bits
+# Expected Score = -1.297840 Third-Bits
+# Lowest Score = -7, Highest Score = 16
+#
+# Entropy H = 0.562489 Bits
+#
+# 30-Jun-2001
+*/
+int a_vt160[450] = {
+  5,
+ -2, 7,
+ -1, 0, 7,
+ -1,-3, 3, 7,
+  1,-3,-3,-5,13,
+ -1, 2, 0, 1,-4, 6,
+ -1,-1, 0, 3,-5, 2, 6,
+  0,-3, 0,-1,-2,-3,-2, 8,
+ -2, 1, 1, 0,-2, 2,-1,-3, 9,
+ -1,-4,-4,-6,-1,-4,-5,-7,-4, 6,
+ -2,-3,-4,-6,-4,-2,-4,-6,-3, 3, 6,
+ -1, 4, 0, 0,-4, 2, 1,-2, 0,-4,-3, 5,
+ -1,-2,-3,-5,-1,-1,-3,-5,-3, 2, 4,-2, 8,
+ -3,-5,-5,-7,-4,-4,-6,-6, 0, 0, 2,-5, 1, 9,
+  0,-2,-2,-1,-3,-1,-1,-3,-2,-4,-3,-1,-4,-5, 9,
+  1,-1, 1, 0, 1, 0, 0, 0,-1,-3,-3,-1,-3,-3, 0, 4,
+  1,-1, 0,-1, 0,-1,-1,-2,-1,-1,-2,-1,-1,-3,-1, 2, 5,
+ -5,-4,-5,-7,-7,-6,-7,-5,-1,-2,-1,-5,-4, 3,-5,-4,-6,16,
+ -3,-3,-2,-5,-1,-4,-3,-5, 3,-2,-1,-3,-2, 6,-6,-2,-3, 4,10,
+  0,-4,-4,-4, 1,-3,-3,-5,-3, 4, 2,-3, 1,-1,-3,-2, 0,-5,-3, 5,
+ -1,-2, 5, 6,-4, 0, 2,-1, 0,-5,-5, 0,-4,-6,-2, 1, 0,-6,-3,-4, 5,
+ -1, 0, 0, 3,-5, 4, 5,-2, 0,-4,-3, 2,-3,-5,-1, 0,-1,-7,-4,-3, 2, 5,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7, 6};
+
+/*
+#
+#  VTML_200
+#
+# This matrix was produced from: vtml_200qij.mat using vtml_P.mat background frequencies
+#
+# VTML_200 substitution matrix, Units = bits/3.0
+# Expected score = -0.358430 bits; Entropy = 0.412084 bits
+# Target fraction identity = 0.2295
+# Lowest Score = -6, Highest Score= 15
+#
+*/
+int a_vt200[450] = {
+  4,
+ -2,  7,
+ -1,  0,  6,
+ -1, -2,  3,  6,
+  1, -3, -2, -4, 12,
+ -1,  2,  1,  1, -3,  5,
+ -1, -1,  1,  3, -4,  2,  5,
+  0, -2,  0, -1, -2, -2, -1,  8,
+ -2,  1,  1,  0, -2,  2,  0, -2,  8,
+ -1, -3, -4, -5,  0, -3, -4, -6, -3,  5,
+ -2, -3, -4, -5, -3, -2, -4, -5, -2,  3,  5,
+ -1,  4,  1,  0, -4,  2,  1, -2,  0, -3, -3,  5,
+ -1, -2, -3, -4, -1, -1, -3, -4, -3,  2,  3, -2,  6,
+ -3, -4, -4, -6, -3, -3, -5, -5,  0,  0,  2, -5,  1,  8,
+  0, -1, -2, -1, -3, -1, -1, -2, -2, -4, -3, -1, -3, -4,  9,
+  1, -1,  1,  0,  1,  0,  0,  0,  0, -3, -3,  0, -2, -3,  0,  4,
+  1, -1,  0, -1,  0,  0, -1, -2, -1, -1, -2,  0, -1, -3, -1,  2,  4,
+ -4, -3, -5, -6, -6, -6, -6, -5, -1, -2, -1, -4, -3,  3, -4, -4, -5, 15,
+ -3, -2, -2, -4,  0, -3, -3, -5,  3, -2, -1, -3, -2,  5, -5, -2, -3,  4,  9,
+  0, -3, -3, -4,  1, -2, -3, -4, -3,  4,  2, -3,  2, -1, -3, -2,  0, -4, -2,  4,
+ -1, -1,  4,  4, -3,  1,  2,  0,  0, -4, -4,  0, -3, -5, -1,  0,  0, -5, -3, -3,  6,
+ -1,  0,  1,  2, -3,  3,  3, -1,  1, -3, -3,  1, -2, -4, -1,  0,  0, -6, -3, -2,  2,  5,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  -1
+};
+
+/*
+  Matrix made by matblas from blosum50.iij
+  BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+  Blocks Database = /data/blocks_5.0/blocks.dat
+  Cluster Percentage: >= 50
+  Entropy =   0.4808, Expected =  -0.3573
+*/
+/*
+ A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  * */
+int abl50[450] = {
+  5,
+ -2, 7,
+ -1,-1, 7,
+ -2,-2, 2, 8,
+ -1,-4,-2,-4,13,
+ -1, 1, 0, 0,-3, 7,
+ -1, 0, 0, 2,-3, 2, 6,
+  0,-3, 0,-1,-3,-2,-3, 8,
+ -2, 0, 1,-1,-3, 1, 0,-2,10,
+ -1,-4,-3,-4,-2,-3,-4,-4,-4, 5,
+ -2,-3,-4,-4,-2,-2,-3,-4,-3, 2, 5,
+ -1, 3, 0,-1,-3, 2, 1,-2, 0,-3,-3, 6,
+ -1,-2,-2,-4,-2, 0,-2,-3,-1, 2, 3,-2, 7,
+ -3,-3,-4,-5,-2,-4,-3,-4,-1, 0, 1,-4, 0, 8,
+ -1,-3,-2,-1,-4,-1,-1,-2,-2,-3,-4,-1,-3,-4,10,
+  1,-1, 1, 0,-1, 0,-1, 0,-1,-3,-3, 0,-2,-3,-1, 5,
+  0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 2, 5,
+ -3,-3,-4,-5,-5,-1,-3,-3,-3,-3,-2,-3,-1, 1,-4,-4,-3,15,
+ -2,-1,-2,-3,-3,-1,-2,-3, 2,-1,-1,-2, 0, 4,-3,-2,-2, 2, 8,
+  0,-3,-3,-4,-1,-3,-3,-4,-4, 4, 1,-3, 1,-1,-3,-2, 0,-3,-1, 5,
+ -2,-1, 4, 5,-3, 0, 1,-1, 0,-4,-4, 0,-3,-4,-2, 0, 0,-5,-3,-4, 5,
+ -1, 0, 0, 1,-3, 4, 5,-2, 0,-3,-3, 1,-1,-4,-1, 0,-1,-2,-2,-3, 2, 5,
+ -1,-1,-1,-1,-2,-1,-1,-2,-1,-1,-1,-1,-1,-2,-2,-1, 0,-3,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-2,-1,-1,-2,-1,-1,-1,-1,-1,-2,-2,-1, 0,-3,-1,-1,-1,-1,-1, 7};
+
+/*
+  A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X   * */
+int a_md10[450]= {
+ 11,	/* A */
+-12, 12,	/* R */
+-12,-13, 13,	/* N */
+-11,-18, -3, 12,	/* D */
+-13,-10,-14,-20, 17,	/* C */
+-13, -5,-11,-13,-19, 13,	/* Q */
+-10,-15,-12, -2,-22, -5, 12,	/* E */
+ -8, -9,-11, -9,-12,-16, -9, 11,	/* G */
+-16, -5, -5,-10,-12, -3,-15,-16, 16,	/* H */
+-13,-17,-14,-19,-17,-20,-19,-21,-18, 12,	/* I */
+-15,-14,-19,-21,-16,-12,-20,-21,-13, -7, 10,	/* L */
+-14, -2, -6,-15,-21, -6, -8,-15,-13,-17,-18, 12,	/* K */
+-13,-14,-15,-18,-15,-14,-18,-19,-15, -4, -4,-12, 16,	/* M */
+-18,-22,-19,-22,-11,-22,-23,-22,-14,-11, -6,-23,-14, 14,	/* F */
+ -7,-12,-17,-18,-18, -8,-17,-16,-10,-19,-10,-16,-17,-17, 13,	/* P */
+ -5,-10, -4,-12, -7,-13,-15, -7,-11,-14,-13,-13,-15,-11, -6, 11,	/* S */
+ -4,-12, -7,-14,-14,-13,-15,-14,-13, -7,-16,-10, -7,-19, -9, -4, 12,	/* T */
+-21, -9,-21,-21,-10,-17,-21,-13,-21,-21,-13,-21,-17,-13,-21,-15,-18, 18,	/* W */
+-20,-17,-12,-13, -7,-16,-21,-20, -3,-15,-16,-20,-17, -3,-20,-12,-17,-12, 15,	/* Y */
+ -6,-17,-17,-15,-12,-17,-14,-13,-19, -1, -8,-18, -5,-12,-16,-14,-10,-16,-18, 11,	/* V */
+-12,-15,  5,  5,-17,-12, -7,-10, -7,-16,-20,-11,-17,-21,-17, -8,-10,-22,-13,-16, 13,	/* B */
+-16,-18,-17, -8,-32,  1,  9,-17,-17,-29,-26,-11,-24,-34,-21,-21,-21,-29,-29,-22, -9, 13, /* Z */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,	/* X */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 9};	/* * */
+
+int a_md20[450] = {
+ 10,
+-10, 12,
+ -9,-10, 13,
+ -8,-14, -1, 12,
+-10, -7,-11,-16, 17,
+-10, -3, -8, -9,-16, 13,
+ -7,-11, -9,  1,-19, -3, 11,
+ -5, -6, -8, -6, -9,-12, -7, 11,
+-12, -3, -2, -7, -9,  0,-12,-13, 15,
+-10,-14,-11,-16,-14,-16,-16,-17,-14, 12,
+-12,-11,-15,-18,-13, -9,-17,-18,-10, -4, 10,
+-11,  0, -4,-12,-17, -3, -5,-12, -9,-14,-15, 12,
+ -9,-11,-12,-15,-12,-11,-15,-16,-12, -1, -2, -9, 15,
+-15,-19,-16,-19, -8,-18,-20,-19,-11, -8, -4,-19,-10, 13,
+ -5, -9,-13,-15,-14, -5,-14,-12, -7,-15, -7,-13,-14,-14, 12,
+ -2, -8, -1, -9, -4,-10,-12, -5, -8,-11,-10,-10,-12, -8, -3, 10,
+ -1, -9, -4,-11,-10,-10,-12,-11,-10, -4,-12, -7, -4,-15, -7, -1, 11,
+-17, -6,-18,-18, -7,-14,-18,-10,-17,-17,-10,-17,-14,-10,-18,-12,-15, 18,
+-16,-14, -9,-11, -4,-12,-18,-17,  0,-12,-12,-17,-14,  0,-16, -9,-13, -9, 14,
+ -3,-14,-14,-12, -9,-14,-11,-11,-15,  2, -5,-15, -2, -9,-13,-11, -7,-13,-14, 11,
+ -9,-12,  6,  6,-14, -9, -4, -7, -4,-13,-17, -8,-13,-18,-14, -5, -7,-19,-10,-13, 12,
+-12,-13,-13, -4,-27,  4, 10,-13,-12,-24,-21, -6,-20,-29,-17,-17,-17,-24,-24,-18, -6, 12,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 9 };
+
+int a_md40[450] = {
+  9,
+ -7, 11,
+ -6, -6, 12,
+ -6,-10,  1, 11,
+ -7, -5, -8,-13, 16,
+ -7,  0, -5, -6,-12, 12,
+ -5, -8, -5,  3,-15,  0, 11,
+ -3, -4, -5, -4, -7, -9, -4, 10,
+ -9,  0,  0, -4, -6,  2, -8,-10, 14,
+ -6,-10, -8,-12,-11,-12,-12,-13,-11, 11,
+ -9, -9,-12,-14,-10, -6,-13,-14, -7, -1,  9,
+ -8,  3, -1, -8,-12, -1, -3, -9, -6,-11,-12, 11,
+ -6, -8, -9,-12, -9, -8,-11,-12, -9,  1,  1, -7, 14,
+-11,-15,-12,-15, -5,-14,-16,-15, -7, -5, -1,-16, -7, 13,
+ -2, -6, -9,-11,-11, -3,-11, -9, -4,-11, -5,-10,-10,-11, 12,
+  0, -5,  1, -6, -2, -7, -8, -2, -6, -8, -7, -7, -8, -6, -1,  9,
+  1, -6, -2, -8, -7, -7, -8, -7, -7, -2, -9, -5, -2,-11, -4,  1, 10,
+-14, -4,-14,-15, -4,-11,-15, -7,-13,-13, -8,-13,-11, -7,-14, -9,-12, 18,
+-13,-10, -6, -8, -2, -9,-14,-13,  2, -9, -9,-13,-11,  2,-13, -7,-10, -6, 14,
+ -1,-11,-10, -9, -7,-11, -8, -8,-12,  4, -2,-12,  0, -6, -9, -7, -4,-10,-11, 10,
+ -6, -8,  6,  6,-10, -6, -1, -4, -2,-10,-13, -5,-10,-14,-10, -3, -5,-15, -7,-10, 11,
+ -8, -8, -8,  0,-21,  6, 10, -8, -7,-18,-16, -3,-15,-23,-12,-12,-12,-19,-18,-14, -3, 11,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 9};
+
+/*
+  Matrix made by matblas from blosum62.iij
+  * column uses minimum score
+  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
+  Blocks Database = /data/blocks_5.0/blocks.dat
+  Cluster Percentage: >= 62
+  Entropy =   0.6979, Expected =  -0.5209
+*/
+
+int abl62[450] = {
+  4,
+ -1, 5,
+ -2, 0, 6,
+ -2,-2, 1, 6,
+  0,-3,-3,-3, 9,
+ -1, 1, 0, 0,-3, 5,
+ -1, 0, 0, 2,-4, 2, 5,
+  0,-2, 0,-1,-3,-2,-2, 6,
+ -2, 0, 1,-1,-3, 0, 0,-2, 8,
+ -1,-3,-3,-3,-1,-3,-3,-4,-3, 4,
+ -1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,
+ -1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,
+ -1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5,
+ -2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,
+ -1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,
+  1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4,
+  0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,
+ -3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11,
+ -2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,
+  0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,
+ -2,-1, 3, 4,-3, 0, 1,-1, 0,-3,-4, 0,-3,-3,-2, 0,-1,-4,-3,-3, 4,
+ -1, 0, 0, 1,-3, 3, 4,-2, 0,-3,-3, 1,-1,-3,-1, 0,-1,-3,-2,-2, 1, 4,
+  0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-1,-1,-1,
+  0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-1,-1,-1, 6};
+
+/* blosum80 in 1/2 bit units (previous versions had 1/3 bit units) */
+/*
+  Matrix made by matblas from blosum80.iij
+  * column uses minimum score
+  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
+  Blocks Database = /data/blocks_5.0/blocks.dat
+  Cluster Percentage: >= 80
+  Entropy =   0.9868, Expected =  -0.7442
+*/
+
+int abl80[450] = {
+  5,
+ -2, 6,
+ -2,-1, 6,
+ -2,-2, 1, 6,
+ -1,-4,-3,-4, 9,
+ -1, 1, 0,-1,-4, 6,
+ -1,-1,-1, 1,-5, 2, 6,
+  0,-3,-1,-2,-4,-2,-3, 6,
+ -2, 0, 0,-2,-4, 1, 0,-3, 8,
+ -2,-3,-4,-4,-2,-3,-4,-5,-4, 5,
+ -2,-3,-4,-5,-2,-3,-4,-4,-3, 1, 4,
+ -1, 2, 0,-1,-4, 1, 1,-2,-1,-3,-3, 5,
+ -1,-2,-3,-4,-2, 0,-2,-4,-2, 1, 2,-2, 6,
+ -3,-4,-4,-4,-3,-4,-4,-4,-2,-1, 0,-4, 0, 6,
+ -1,-2,-3,-2,-4,-2,-2,-3,-3,-4,-3,-1,-3,-4, 8,
+  1,-1, 0,-1,-2, 0, 0,-1,-1,-3,-3,-1,-2,-3,-1, 5,
+  0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-2,-1,-1,-2,-2, 1, 5,
+ -3,-4,-4,-6,-3,-3,-4,-4,-3,-3,-2,-4,-2, 0,-5,-4,-4,11,
+ -2,-3,-3,-4,-3,-2,-3,-4, 2,-2,-2,-3,-2, 3,-4,-2,-2, 2, 7,
+  0,-3,-4,-4,-1,-3,-3,-4,-4, 3, 1,-3, 1,-1,-3,-2, 0,-3,-2, 4,
+ -2,-2, 4, 4,-4, 0, 1,-1,-1,-4,-4,-1,-3,-4,-2, 0,-1,-5,-3,-4, 4,
+ -1, 0, 0, 1,-4, 3, 4,-3, 0,-4,-3, 1,-2,-4,-2, 0,-1,-4,-3,-3, 0, 4,
+ -1,-1,-1,-2,-3,-1,-1,-2,-2,-2,-2,-1,-1,-2,-2,-1,-1,-3,-2,-1,-2,-1,-1,
+ -1,-1,-1,-2,-3,-1,-1,-2,-2,-2,-2,-1,-1,-2,-2,-1,-1,-3,-2,-1,-2,-1,-1, 6};
+
+/* OPTIMA_5 matrix: Kann et al. (2000) Proteins 41:498-503 */
+
+int aopt5[450] = {
+  7,
+ -2,11,
+ -4, 1,12,
+ -4,-4, 4,13,
+  1,-6,-6,-6,20,
+ -1, 2, 0, 0,-6, 9,
+ -2, 0, 1, 4,-8, 4, 8,
+  1,-4, 1,-2,-6,-4,-5,13,
+ -4, 1, 2,-2,-6, 0, 0,-4,17,
+ -1,-6,-6,-7,-1,-6,-7,-8,-6, 7,
+ -1,-4,-7,-9,-1,-5,-6,-8,-5, 6, 6,
+ -2, 6, 0,-1,-6, 3, 3,-4,-1,-6,-4, 7,
+ -2,-2,-4,-6,-2, 0,-4,-6,-4, 2, 5,-2,10,
+ -4,-6,-6,-7,-4,-6,-6,-6,-2, 1, 3,-6, 0,11,
+ -1,-4,-4,-1,-6,-2,-1,-4,-4,-6,-7,-2,-4,-8,15,
+  2,-2, 2, 1,-2, 0, 0, 0,-2,-4,-4, 1,-2,-4,-2, 7,
+  0,-2, 0,-2,-2,-2,-1,-3,-4,-2,-3,-2,-1,-4,-2, 4, 9,
+ -6,-6,-8,-8,-4,-4,-6,-4,-4,-6,-3,-6,-2, 3,-8,-6,-4,22,
+ -4,-3,-4,-4,-4,-2,-4,-6, 4,-2, 0,-4,-2, 7,-6,-4,-4, 4,14,
+  1,-7,-6,-6,-1,-4,-6,-6,-6, 7, 3,-5, 2, 0,-4,-5, 1,-6,-2, 8,
+ -4,-1, 8,10,-6, 0, 3, 0, 1,-7,-8, 0,-5,-6,-2, 1,-1,-8,-4,-6, 9,
+ -1, 1, 0, 3,-7, 6, 6,-4, 0,-6,-6, 3,-2,-6,-2, 0,-1,-5,-3,-5, 2, 6,
+ -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+ -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,10};
+
+/*	DNA alphabet
+
+	A, C, G, T, U   1-4, 5
+	R, Y            6, 7
+	M (A or C)	8
+	W (A or T)	9
+	S (C or G)	10
+	K (G or T)	11
+	D (not C)	12
+	H (not G)	13
+	V (not T)	14
+	B (not A)	15
+	N		16
+	X 		17
+*/
+
+char nt[MAXSQ+1] ={"\0ACGTURYMWSKDHVBNXACGTURYMWSKDHVBNX\0"};
+char ntx[MAXSQ+1]={"\0ACGTURYMWSKDHVBNXacgturymwskdhvbnx\0"};
+char ntc[MAXSQ+1]={"\0TGCAAYRKWSMHDBVNXtgcaayrkwsmhdbvnx\0"};
+
+/* nt complement to encoding */
+                     /* A:T C:G G:C T:A U:A */
+int gc_nt[MAXSQ+1]={ 0,  4,  3,  2,  1,  1,
+                     /* R:Y Y:R M:K W:W */
+ 		         7,  6, 11,  9,
+                     /* S:S K:M D:H H:D */
+		        10,  8, 13, 12,
+                     /* B:V V:B N:N X:X */
+		        15, 14, 16, 16};
+
+int nnt = 17;
+int nntx = 34;
+
+int hnt[MAXSQ+1] = {
+  NMAP,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,NMAP,
+  NMAP,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,NMAP,NMAP};
+int hntx[MAXSQ+1] = {
+  NMAP,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,NMAP,
+  NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,
+  NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP};
+
+int npam[450] = {
+/*       A  C  G  T  U  R  Y  M  W  S  K  D  H  V  B  N  X  */
+	 5,						/* A */
+	-4, 5,						/* C */
+	-4,-4, 5,					/* G */
+	-4,-4,-4, 5,					/* T */
+	-4,-4,-4, 5, 5,					/* U */
+	 2,-1, 2,-1,-1, 2,				/* R (A G)*/
+	-1, 2,-1, 2, 2,-2, 2,				/* Y (C T)*/
+	 2, 2,-1,-1,-1,-1,-1, 2,			/* M (A C)*/
+	 2,-1,-1, 2, 2, 1, 1, 1, 2,			/* W (A T)*/
+	-1, 2, 2,-1,-1, 1, 1, 1,-1, 2,			/* S (C G)*/
+	-1,-1, 2, 2, 2, 1, 1,-1, 1, 1, 2,		/* K (G T)*/
+	 1,-2, 1, 1, 1, 1,-1,-1, 1,-1, 1, 1,		/* D (!C) */
+	 1, 1,-2, 1, 1,-1, 1, 1, 1,-1,-1,-1, 1,		/* H (!G) */
+	 1, 1, 1,-2,-2, 1,-1, 1,-1, 1,-1,-1,-1, 1,	/* V (!T) */
+	-2, 1, 1, 1, 1,-1, 1,-1,-1, 1, 1,-1,-1,-1, 1,	/* B (!A) */
+	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* N */
+	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; /* X */
+/*       A  C  G  T  U  R  Y  M  W  S  K  D  H  V  B  N  */
+
+int *pam;			/* Pam matrix- 1D */
+/* int *pam12; */
+/* int *pam12x; */
+int pamh1[MAXSQ+1];		/* used for kfact replacement */
+
+/* according to Reese and Pearson (2002) Bioinformatics 18:1500-1507
+the most effective gap penalties for matrices in 1/3 bit united are:
+ open = 25 - 0.1 * Pam_distance; ext = 5
+*/
+
+/* 16-Nov-2010 modified for modern gap open/gap extend
+ */
+
+
+/* must be ordered by entropy to adjust scoring matrix for query
+   length */
+
+#include <math.h>
+#ifndef M_LN2
+#define M_LN2 0.69314718055994530942
+#endif
+
+#define BIT2_SCALE M_LN2/2.0
+#define BIT3_SCALE M_LN2/3.0
+#define BIT5_SCALE M_LN2/5.0
+
+/* abbrev, name, matrix, scale, ulambda, entropy, tfract_id, gopen, gext */
+struct std_pam_str std_pams[] = {
+  {"VT10", "VT10",   a_vt10,  BIT2_SCALE, 0.2299, 3.4474, 0.9107, -16, -2},
+  {"P10",  "MD10",   a_md10,  BIT3_SCALE, 0.2299, 3.4474, 0.9107, -23, -4},
+  {"M10",  "MD10",   a_md10,  BIT3_SCALE, 0.2299, 3.4474, 0.9107, -23, -4},
+  {"MD10", "MD10",   a_md10,  BIT3_SCALE, 0.2299, 3.46293, 0.9107, -23, -4},
+  {"VT20", "VT20",   a_vt20,  BIT2_SCALE, 0.2300, 2.921119, 0.8312, -15, -2},
+  {"P20",  "MD20",   a_md20,  BIT3_SCALE, 0.2300, 2.9397, 0.822, -22, -4},
+  {"M20",  "MD20",   a_md20,  BIT3_SCALE, 0.2300, 2.9397, 0.822, -22, -4},
+  {"MD20", "MD20",   a_md20,  BIT3_SCALE, 0.2300, 2.9397, 0.822, -22, -4},
+  {"VT40", "VT40",   a_vt40,  BIT2_SCALE, 0.2305, 2.266217, 0.6968, -13, -1},
+  {"P40",  "MD40",   a_md40,  BIT3_SCALE, 0.2305, 2.2284, 0.679, -21, -4},
+  {"M40",  "MD40",   a_md40,  BIT3_SCALE, 0.2305, 2.2284, 0.679, -21, -4},
+  {"MD40", "MD40",   a_md40,  BIT3_SCALE, 0.2305, 2.2284, 0.679, -21, -4},
+  {"VT80", "VT80",   a_vt80,  BIT2_SCALE, 0.2305, 1.390771, 0.5024, -11, -1},
+  {"BL80", "BL80",    abl80,  BIT2_SCALE, 0.2259, 0.9128, 0.392, -10, -2},
+  {"VT120","VT120", a_vt120,  BIT2_SCALE, 0.3416, 0.938201, 0.3748,  -11, -1},
+  {"P120","PAM120", apam120,  BIT3_SCALE, 0.3416, 0.9062, 0.353,  -14, -3},
+  {"BL62", "BL62",    abl62,  BIT2_SCALE, 0.3716, 0.6979, 0.302, -11, -1},
+  {"BP62", "BL62",    abl62,  BIT2_SCALE, 0.3716, 0.6979, 0.302, -11, -1},
+  {"VT160","VT160", a_vt160,  BIT3_SCALE, 0.2263, 0.617215, 0.2884, -12, -2},
+  {"BL50", "BL50",    abl50,  BIT3_SCALE, 0.2318, 0.4850, 0.273, -10, -2},
+  {"OPT5","OPTIMA5",  aopt5,  BIT5_SCALE, 0.1432, 0.4560, 0.262, -18, -2},
+  {"VT200","VTM200",a_vt200,  BIT3_SCALE, 0.2252, 0.4121, 0.2295, -10, -2},
+  {"P250", "PAM250",apam250,  BIT3_SCALE, 0.2252, 0.3207, 0.185, -10, -2},
+  {"\0",   "\0",       NULL,  0.0,         0.0,    0.0,   0.0,   0,  0}
+};
+
+/* Robinson & Robinson counts (based on old aa[] ordering) */
+long rrcounts[25] = {
+  0,
+  35155,	/* A */
+  23105,	/* R */
+  20212,	/* N */
+  24161,	/* D */
+  8669,		/* C */
+  19208,	/* Q */
+  28354,	/* E */
+  33229,	/* G */
+  9906,		/* H */
+  23161,	/* I */
+  40625,	/* L */
+  25872,	/* K */
+  10101,	/* M */
+  17367,	/* F */
+  23435,	/* P */
+  32070,	/* S */
+  26311,	/* T */
+  5990,		/* W */
+  14488,	/* Y */
+  29012,	/* V */
+  0,		/* B */
+  0,		/* Z */
+  0,		/* X */
+  0 		/* * */
+};
+
+long rrtotal = 450431;
+#else
+
+/* extern char sqnam[]; */
+/* extern char sqtype[]; */
+/* extern int gdelval, ggapval; */
+extern int pamoff;
+
+extern char *NCBIstdaa;
+extern char *NCBIstdaa_l;
+extern char NCBIstdaa_n;
+extern char *NCBIstdaa_ext;
+extern char NCBIstdaa_ext_n;
+extern char *pam_sq;
+extern char *apam_sq;
+extern char *npam_sq;
+extern int  pam_sq_n;
+extern int  apam_sq_n;
+extern int  npam_sq_n;
+
+/*
+extern char aa[MAXSQ+1];
+extern char aax[MAXSQ+1];
+*/
+extern char pssm_aa[26];
+extern char othx[MAXSQ+1];
+extern char nt[MAXSQ+1];
+extern char ntx[MAXSQ+1];
+extern char ntc[MAXSQ+1];
+extern int gc_nt[MAXSQ+1];
+
+extern  int naa;
+extern  int naax;
+extern  int noth;
+extern  int nothx;
+extern  int nnt;
+extern  int nntx;
+
+extern int h_NCBIstdaa[MAXSQ+1];
+extern int h_NCBIstdaa_ext[MAXSQ+1];
+/*
+extern  int haa[MAXSQ+1];
+extern  int haax[MAXSQ+1];
+*/
+extern  int hnt[MAXSQ+1];
+extern  int hntx[MAXSQ+1];
+/* extern  int had[MAXSQ+1]; */
+
+extern  int apam250[450];
+extern  int apam120[450];
+extern  int a_vt10[450];
+extern  int a_vt20[450];
+extern  int a_vt40[450];
+extern  int a_vt80[450];
+extern  int a_vt120[450];
+extern  int a_vt160[450];
+extern  int a_vt200[450];
+extern	int a_md10[450];
+extern  int a_md20[450];
+extern  int a_md40[450];
+extern  int abl50[450];
+extern  int abl62[450];
+extern  int abl80[450];
+extern  int aopt5[450];
+extern	int npam[450];
+extern	int *pam;
+/* extern  int *pam12; */
+/* extern  int *pam12x; */
+extern	int pamh1[MAXSQ+1];
+extern  long rrcounts[25];
+extern  long rrtotal;
+
+extern struct std_pam_str std_pams[];
+#endif
+#endif
diff --git a/src/url_subs.c b/src/url_subs.c
new file mode 100644
index 0000000..76d8070
--- /dev/null
+++ b/src/url_subs.c
@@ -0,0 +1,383 @@
+/* $Id: url_subs.c $ */
+
+/* copyright (c) 1998, 1999, 2014 by William R. Pearson and the
+   The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* 30 Dec 2004 - modify REF_URL to accomodate current Entrez */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+#include "structs.h"
+#include "param.h"
+
+#ifndef DEF_PROT_LIB
+#define DEF_PROT_LIB "q"
+#endif
+
+extern int seq_pos(int pos, int rev, int off);
+
+char *display_domains(char, struct annot_entry **s_annot_arr_p, int n_domains);
+char *web_encode(const char *);
+
+void encode_json_str(FILE *fp, const char *label, const char *value, int first) {
+  if (!first) {fprintf(fp, ",\n");}
+  fprintf(fp, " \"%s\": \"%s\"",label, value);
+}
+
+void encode_json_long(FILE *fp, const char *label, long value, int first) {
+  if (!first) {fprintf(fp, ",\n");}
+  fprintf(fp, " \"%s\": %ld",label, value);
+}
+
+void encode_json_dfmt(FILE *fp, const char *label, double value, char *fmt, int first) {
+  fprintf(fp, fmt, label, value);
+}
+
+void encode_json_aln(FILE *fp, const struct a_struct *aln_p, long q_offset, long l_offset, int first) {
+}
+
+void encode_json_lines(FILE *fp, const char *label, const char *annot_s, int first) {
+  char *obp, *bp;
+
+  char *tmp_annot_s;
+  int n_tmp_annot_s;
+  
+  n_tmp_annot_s = strlen(annot_s)+1;
+  if ((tmp_annot_s = (char *)calloc(n_tmp_annot_s,sizeof(char)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] *** cannot allocate tmp_annot_s[%d]\n",
+	    __FILE__, __LINE__,n_tmp_annot_s);
+    return;
+  }
+
+  SAFE_STRNCPY(tmp_annot_s, annot_s, n_tmp_annot_s);
+
+  if (!first) {fprintf(fp, ",\n");}
+  fprintf(fp, " \"%s\": [\n",label);
+
+  obp = bp = tmp_annot_s;
+  while ((bp = strchr(obp,'\n'))) {
+    *bp='\0';
+    if (obp != tmp_annot_s) fprintf(fp, ",\n");
+    fprintf(fp," \"%s\"",obp);
+    obp = bp+1;
+  }
+  fprintf(fp, "\n ]");
+  free(tmp_annot_s);
+}
+
+void encode_json_domains(FILE *fp, const char *label, const struct annot_str *annot_p, int first) {
+  int i;
+
+  if (!first) {fprintf(fp, ",\n");}
+  fprintf(fp, "\"%s\": [\n",label);
+  for (i=0; i < annot_p->n_annot; i++) {
+    if (annot_p->s_annot_arr_p[i]->label != '-') continue;
+    if (i != 0) fprintf(fp, ",\n");
+    fprintf(fp, "  { \"start\":%ld, \"stop\":%ld, \"description\":\"%s\" }",
+	    annot_p->s_annot_arr_p[i]->pos+1,annot_p->s_annot_arr_p[i]->end+1,annot_p->s_annot_arr_p[i]->comment);
+  }
+  fprintf(fp,"\n  ]");
+}
+
+void do_url1(FILE *fp, const struct mngmsg *m_msp, const struct pstruct *ppst,
+	     char *l_name, int n1,
+	     const struct a_struct *aln_p, const char *annot_var_s,
+	     const struct annot_str *q_annot_p,
+	     const struct annot_str *l_annot_p )
+{
+  char my_q_name[200], my_l_name[200], json_l_name[200];
+  char *db, *bp;
+  char pgm[10], o_pgm[10], lib[MAX_LSTR];
+  char *tmp_annot_s, *q_domain_s, *l_domain_s, *tmp_domain_s, *etmp_domain_s;
+  int  n_tmp_annot_s, n_tmp_domain;
+  long q_offset, l_offset;
+  char *ref_url, *lbp=NULL;
+  char *srch_url, *srch_url1, *dom_url;
+
+  /* set the database */
+  if (m_msp->ldb_info.ldnaseq==SEQT_DNA) db="nucleotide";
+  else db="Protein";
+
+  /* set the program type */
+  if (strncmp(m_msp->f_id0,"rss",3)==0) {
+    strncpy(pgm,"fa",sizeof(pgm));
+  }
+  else if (strncmp(m_msp->f_id0,"rfx",3)==0) {
+    strncpy(pgm,"fx",sizeof(pgm));
+  }
+  else { strncpy(pgm,m_msp->f_id0,sizeof(pgm)); }
+
+  SAFE_STRNCPY(o_pgm, pgm, sizeof(o_pgm));
+
+  /* get a library name (probably does not work for %, + abbreviations */
+  if (m_msp->lname[0]!='%') {
+    SAFE_STRNCPY(lib,m_msp->lname,sizeof(lib));
+  }
+  else {
+    SAFE_STRNCPY(lib,"%25",sizeof(lib));
+    SAFE_STRNCAT(lib,&m_msp->lname[1],sizeof(lib));
+  }
+  lib[sizeof(lib)-1]='\0';
+
+  if ((lbp = strchr(l_name,'|'))==NULL) {
+    lbp = l_name;
+  }
+  else {
+    lbp++;
+  }
+
+  SAFE_STRNCPY(my_q_name,m_msp->qtitle,sizeof(my_q_name));
+  if ((bp=strchr(my_q_name,' '))!=NULL) *bp='\0';
+
+  SAFE_STRNCPY(my_l_name,lbp,sizeof(my_l_name));
+
+  if (pgm[0]=='t' || !strcmp(pgm,"fx") || !strcmp(pgm,"fy") ) {
+    if ((lbp=strchr(my_l_name,':'))!=NULL) *lbp='\0';
+    lbp = &my_l_name[strlen(my_l_name)-2];
+    if ( *lbp == '_' ) *lbp = '\0';
+  }
+
+  /* change the program name for fastx, tfastx, tfasta */
+  /* fastx returns proteins */
+  if (strcmp(pgm,"fx")==0 || strcmp(pgm,"fy")==0) {SAFE_STRNCPY(pgm,"fa",sizeof(pgm));}
+  else if (strcmp(pgm,"ff")==0) {SAFE_STRNCPY(pgm,"fa",sizeof(pgm));}
+  else if (pgm[0]=='t') {
+    SAFE_STRNCPY(pgm,"fx",sizeof(pgm));
+    SAFE_STRNCPY(lib,DEF_PROT_LIB,sizeof(lib));
+  }
+
+  fflush(fp);
+
+  q_offset = aln_p->q_offset;
+  l_offset = aln_p->l_offset;
+
+  /* set up ref_url, srch_url, srch_url1, dom_url */
+
+  fflush(fp);
+
+  ref_url = getenv("REF_URL");
+  srch_url = getenv("SRCH_URL");
+  srch_url1 = getenv("SRCH_URL1");
+  dom_url = NULL;
+  dom_url = getenv("DOMAIN_PLOT_URL");
+
+  if (ref_url || srch_url || srch_url1 || dom_url) {
+    fprintf(fp,"<!-- LINK_START %s -->",l_name);
+
+  /* REF_URL should provide */
+  /* "<A HREF=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=%s&fcmd=Search&doptcmd1=DocSum&term=%s\">Entrez lookup</A>  " */
+  if (ref_url != NULL) {fprintf(fp,ref_url,db,my_l_name);}
+
+  /* SRCH_URL should provide */
+  /* "<A HREF=\"http://localhost/fasta_www2/searchfa.cgi?dummy=%s&query=%s&db=fasta_www.cgi&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d&o_pgm=%s\">Re-search database</A>  " */
+  if (srch_url != NULL) {
+    fprintf(fp,srch_url,my_q_name, my_l_name,db,lib,pgm,
+	    l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1,m_msp->f_id0);
+  }
+
+  /* SRCH_URL1 should provide: */
+  /*  "<A HREF=\"http://localhost/fasta_www2/searchxf.cgi?dummy=%s&query=%s&db=%s&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d&o_pgm=%s\">General re-search</A>\n" */
+
+  if (srch_url1 != NULL) {
+    fprintf(fp,srch_url1,my_q_name, my_l_name,db,lib,pgm,
+	    l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1,m_msp->f_id0);
+  }
+  
+  if (dom_url!=NULL) {
+    if (annot_var_s && annot_var_s[0]) {
+      tmp_annot_s = web_encode(annot_var_s);
+    }
+    else tmp_annot_s = "";
+
+    q_domain_s = l_domain_s = NULL;
+
+    if (q_annot_p && q_annot_p->n_domains > 0 && 
+	(q_domain_s = display_domains('q',q_annot_p->s_annot_arr_p, q_annot_p->n_annot))!=NULL) {
+    }
+    if (l_annot_p && l_annot_p->n_domains > 0 && 
+	(l_domain_s = display_domains('l',l_annot_p->s_annot_arr_p, l_annot_p->n_annot))!=NULL) {
+    }
+
+    /* combine domain strings */
+    n_tmp_domain = 0;
+    if (q_domain_s) n_tmp_domain += strlen(q_domain_s)+1;
+    if (l_domain_s) n_tmp_domain += strlen(l_domain_s)+1;
+    etmp_domain_s = "";
+    if (n_tmp_domain > 0) {
+      if ((tmp_domain_s=(char *)calloc(n_tmp_domain,sizeof(char)))==NULL) {
+	fprintf(stderr,"*** error [%s:%d] *** cannot allocate tmp_domain_s[%d]\n",
+		__FILE__, __LINE__,n_tmp_domain);
+      }
+      else {
+	tmp_domain_s[0] = '\0';
+	if (q_domain_s) SAFE_STRNCAT(tmp_domain_s, q_domain_s, n_tmp_domain);
+	if (l_domain_s) SAFE_STRNCAT(tmp_domain_s, l_domain_s, n_tmp_domain);
+	etmp_domain_s = web_encode(tmp_domain_s);
+      }
+    }
+
+    /* appropriate format string: */
+    /* 
+       pgm=%s	    -- program abbrev that created alignment
+       q_name=%s     -- query info
+       q_cstart=%ld
+       q_cstop=%ld
+       q_astart=%ld
+       q_astop=%ld
+       l_name=%s     -- library info
+       l_cstart=%ld
+       l_cstop=%ld
+       l_astart=%ld
+       l_astop=%ld
+       region=%s       -- aligned domain and variant information
+       doms=%s
+
+       DOMAIN_PLOT_URL = "pgm=%s;q_name=%s;q_cstart=%ld;q_cstop=%ld&q_astart=%ld&q_astop=%ld&l_name=%s&l_cstart=%ld&l_cstop=%ld&l_astart=%ld&l_astop=%ld&regions=%s&doms=%s"
+    */
+
+    /* think about the alternative of running a script
+       rather than embedding it */
+
+    fprintf(fp,dom_url,o_pgm,
+	    my_q_name, q_offset+seq_pos(1,aln_p->qlrev,2),q_offset+seq_pos(m_msp->n0,aln_p->qlrev,2),
+	    q_offset+seq_pos(aln_p->amin0+1,aln_p->qlrev,1), q_offset+seq_pos(aln_p->amax0, aln_p->qlrev,2),
+	    my_l_name, l_offset+seq_pos(1,aln_p->llrev,2), l_offset+seq_pos(n1,aln_p->llrev,2),
+	    l_offset+seq_pos(aln_p->amin1+1,aln_p->llrev,1),l_offset+seq_pos(aln_p->amax1,aln_p->llrev,2),
+	    tmp_annot_s, etmp_domain_s);
+
+    if (n_tmp_domain>0 && tmp_domain_s) {
+      free(tmp_domain_s);
+      free(etmp_domain_s);
+    }
+    if (l_annot_p && l_annot_p->n_domains && l_domain_s) {
+      free(l_domain_s);
+    }
+    if (q_annot_p && q_annot_p->n_domains && q_domain_s) {
+      free(q_domain_s);
+    }
+    if (annot_var_s && annot_var_s[0] && tmp_annot_s) free(tmp_annot_s);
+  }
+
+  fprintf(fp,"\n<!-- LINK_STOP -->");
+  fflush(fp);
+  }
+
+  /*
+    if ((srch_url2 = getenv("SRCH_URL2"))==NULL)
+    fprintf(fp,"<A HREF=\"http://fasta.bioch.virginia.edu/fasta/cgi/lalignx.cgi?seq1=\"%s\"&in_seq1=\"FASTA\"&seq2=\"%s\"&in_seq2=\"Accession\"&ssr2=%ld:%ld\">lalign</A>\n<p>\n",my_l_name,db,lib,pgm,l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1);
+    else 
+    fprintf(fp,srch_url1,my_l_name,db,lib,pgm,
+    l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1);
+  */
+
+
+  if (getenv("JSON_HTML")) {
+
+    /* replace '|' with '_' */
+    SAFE_STRNCPY(json_l_name, l_name, sizeof(json_l_name));
+    for (bp=strchr(json_l_name,'|'); bp; bp=strchr(bp+1,'|')) { *bp = '_'; }
+
+    /* replace '.' with '_' */
+    for (bp=strchr(json_l_name,'.'); bp; bp=strchr(bp+1,'.')) { *bp = '_'; }
+
+    fprintf(fp,"\n<script type=\"text/javascript\">\n//<![CDATA[\n var json_%s = {\n",json_l_name);
+    encode_json_str(fp, "db", db, 1);
+    encode_json_str(fp, "l_acc", l_name, 0);
+    encode_json_str(fp, "acc", my_l_name, 0);
+    encode_json_str(fp, "lib", lib, 0);
+    encode_json_str(fp, "pgm", pgm, 0);
+    encode_json_str(fp, "o_pgm", m_msp->f_id0, 0);
+    encode_json_aln(fp, aln_p, q_offset, l_offset, 0);
+    if (annot_var_s && annot_var_s[0]) { encode_json_lines(fp, "annot", annot_var_s, 0); }
+    if (q_annot_p && q_annot_p->n_domains > 0) { encode_json_domains(fp, "q_domains", q_annot_p, 0); }
+    if (l_annot_p && l_annot_p->n_domains > 0) { encode_json_domains(fp, "l_domains", l_annot_p, 0); }
+
+    fprintf(fp, "\n}\n//]]>\n</script>");
+    fflush(fp);
+  }
+}
+
+char *display_domains(char target, struct annot_entry **annot_arr_p, int n_annots) {
+  char *domain_s;
+  char line[MAX_STR];
+  int i, i_doms, n_domain_s = MAX_LSTR;
+
+  /* since (currently) annot_var_s is MAX_LSOTR, do the same for domain_s */
+  if ((domain_s = (char *)calloc(n_domain_s, sizeof(char)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] *** cannot allocate domain_s[%d]\n",__FILE__, __LINE__,n_domain_s);
+    return NULL;
+  }
+
+  for (i=0; i < n_annots; i++) {
+    /* annot_arr_p[] has both domains and non domains, but n_domains only counts domains */
+    if (annot_arr_p[i]->label != '-') continue;
+    sprintf(line, "%cDomain:\t%ld-%ld\t%s\n",
+	    target, annot_arr_p[i]->pos+1, annot_arr_p[i]->end+1, annot_arr_p[i]->comment);
+    if (strlen(domain_s) + strlen(line)+1 > n_domain_s) {
+      n_domain_s += n_domain_s/2;
+      domain_s = realloc(domain_s, n_domain_s);
+    }
+    SAFE_STRNCAT(domain_s, line, n_domain_s);
+  }
+
+  domain_s = realloc(domain_s, (n_domain_s=strlen(domain_s))+1);
+  domain_s[n_domain_s]='\0';
+
+  return domain_s;
+}
+
+/* take an annotation string *annot_var_s and convert problematic characters to their web encoding */
+/* ' ' (space) %20 */
+/* '|' 	    %7C */
+/* ';'	    %3B */
+/* '='	    %3D */
+/* '\n'	    %0A */
+
+static char bad_chars[] = "\n =;|";
+
+char *web_encode(const char *annot_var_s) {
+  
+  int n_tmp_annot_s;
+  char *tmp_annot_s, *tmp_annot_d, *dp;
+  const char *bp, *sp;
+  int bad_cnt = 0;
+  
+  /* make string largest possible size */
+  n_tmp_annot_s = strlen(annot_var_s)*3 + 1;
+  if ((tmp_annot_s = (char *)calloc(n_tmp_annot_s,sizeof(char)))==NULL) {
+    fprintf(stderr,"*** error [%s:%d] *** cannot allocate tmp_annot_s[%d]\n",__FILE__, __LINE__,n_tmp_annot_s);
+    return NULL;
+  }
+
+  dp = tmp_annot_s;
+  for (sp = annot_var_s; *sp ; sp++) {
+
+    if ((*sp < '0') ||
+	(*sp > 9 &&  *sp < 'A') ||
+	(*sp > 'Z' &&  *sp < 'a') ||
+	(*sp > 'z')) { sprintf(dp,"%%%02x",*sp); dp += 3;}
+    else { *dp++ = *sp; }
+  }
+
+  n_tmp_annot_s = dp - tmp_annot_s;
+  tmp_annot_s = realloc(tmp_annot_s, n_tmp_annot_s+1);
+  tmp_annot_s[n_tmp_annot_s] = '\0';
+
+  return tmp_annot_s;
+}
diff --git a/src/uthr_subs.h b/src/uthr_subs.h
new file mode 100644
index 0000000..ba0a270
--- /dev/null
+++ b/src/uthr_subs.h
@@ -0,0 +1,52 @@
+/***************************************/
+/* thread global variable declarations */
+/***************************************/
+
+/* $Id: uthr_subs.h 625 2011-03-23 17:21:38Z wrp $ */
+
+#ifndef MAX_WORKERS
+#define MAX_WORKERS 2
+#endif
+#define NUM_WORK_BUF 2*MAX_WORKERS
+
+#include <synch.h>
+#include <thread.h>
+
+#define check(status,string) \
+     if (status == -1) perror(string)   /* error macro for thread calls */
+
+#ifndef XTERNAL
+
+thread_t threads[MAX_WORKERS];
+
+/* mutex stuff */
+
+mutex_t reader_mutex;      /* empty buffer pointer structure lock */
+mutex_t worker_mutex;      /* full buffer pointer structure lock */
+
+/* condition variable stuff */
+
+cond_t reader_cond_var;    /* condition variable for reader */
+cond_t worker_cond_var;    /* condition variable for workers */
+
+mutex_t start_mutex;       /* start-up synchronisation lock */
+cond_t start_cond_var;     /* start-up synchronisation condition variable */
+
+#else
+
+extern thread_t threads[];
+
+/* mutex stuff */
+
+extern mutex_t reader_mutex;
+extern mutex_t worker_mutex;
+
+/* condition variable stuff */
+
+extern cond_t reader_cond_var;
+extern cond_t worker_cond_var;
+
+extern mutex_t start_mutex;
+extern cond_t start_cond_var;
+
+#endif
diff --git a/src/wm_align.c b/src/wm_align.c
new file mode 100644
index 0000000..3d04bcc
--- /dev/null
+++ b/src/wm_align.c
@@ -0,0 +1,581 @@
+/* $Id: wm_align.c 1166 2013-05-30 01:05:55Z wrp $  */
+
+/* algorithms and code provided by Webb Miller, Penn State
+   University */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+#include "rstruct.h"
+#include "aln_structs.h"
+
+struct swstr {int H, E;};
+
+int
+NW_ALIGN(int IW, const unsigned char *B,
+	 int M, int N,
+	 int **W, int G, int H, int *S, int *NC);
+
+static int
+CHECK_SCORE(int IW, const unsigned char *B,
+	    int M, int N,
+	    int *S, int **W, int G, int H, int *nres, int *sw);
+
+/* sw_walign() is here, rather than in dropgsw2.c, because it is also
+   used by dropnfa.c
+*/
+
+struct a_res_str *
+merge_ares_chains(struct a_res_str *cur_ares,
+		  struct a_res_str *tmp_ares,
+		  int score_ix,
+		  const char *msg);
+
+int
+sw_walign (int **pam2p, int n0,
+	   const unsigned char *aa1, int n1,
+	   int q, int r,
+	   struct swstr *ss,
+	   struct a_res_str *a_res
+	   )
+{
+   const unsigned char *aa1p;
+   register int i, j;
+   register struct swstr *ssj;
+   int e, f, h, p;
+   int qr;
+   int     score;
+   int cost, I, J, K, L;
+
+   qr = q + r;
+
+   /* initialize 0th row */
+   for (ssj=ss; ssj<ss+n0; ssj++) {
+     ssj->H = 0;
+     ssj->E = -q;
+   }
+
+   /* I = saved position in aa1
+      J = saved position in aa0
+   */
+   score = I = J = 0;
+   aa1p = aa1;
+   i = 0;
+   while (*aa1p) {
+     h = p = 0;
+     f = -q;
+     /* pwaa = waa + (*aa1p++ * n0); */
+     for (ssj = ss, j=0; j < n0; ssj++, j++) {
+       if ((h =   h    - qr) > (f =   f    - r)) f = h;
+       if ((h = ssj->H - qr) > (e = ssj->E - r)) e = h;
+       /* h = p + *pwaa++; */
+       h = p + pam2p[j][*aa1p];
+       if (h < 0 ) h = 0;
+       if (h < f ) h = f;
+       if (h < e ) h = e;
+       p = ssj->H;
+       ssj->H = h;
+       ssj->E = e;
+       if (h > score) {
+	 score = h;
+	 I = i;
+	 /* J = (int)(ssj-ss);  */
+	 J = j;
+       }
+     }
+     i++;
+     aa1p++;
+   }				/* done with forward pass */
+   if (score <= 0) return 0;
+
+  /* to get the start point, go backwards */
+
+   /* K = begin in aa1
+      L = begin in aa0
+   */
+  cost = K = L = 0;
+  for (ssj=ss+J; ssj>=ss; ssj--) {
+    ssj->H=ssj->E= -1;
+  }
+
+  for (i=I,aa1p=aa1+I; i>=0; i--) {
+    h = f = -1;
+    p = (i == I) ? 0 : -1;
+    for (ssj = ss+J, j=J; ssj>=ss; ssj--,j--) {
+      f = max (f,h-q)-r;
+      ssj->E=max(ssj->E,ssj->H-q)-r;
+      h = max(max(ssj->E,f), p+pam2p[j][aa1[i]]);
+      p = ssj->H;
+      ssj->H=h;
+      if (h > cost) {
+	cost = h;
+	K = i;
+	L = (int)(ssj-ss);
+	if (cost >= score) goto found;
+      }
+    }
+  }
+
+found:
+
+  /*  printf(" %d: L: %3d-%3d/%3d; K: %3d-%3d/%3d\n",score,L,J,n0,K,I,n1); */
+
+  a_res->n1 = n1;
+  a_res->max0 = J+1; a_res->min0 = L; a_res->max1 = I+1; a_res->min1 = K;
+
+  NW_ALIGN(L,&aa1[K-1],J-L+1,I-K+1,pam2p,q,r,a_res->res,&a_res->nres);
+
+  return score;
+}
+
+/* nsw_malign is a recursive interface to nw/sw_walign() that is called
+   from do_walign(). nsw_malign() first does an alignment, then checks
+   to see if the score is greater than the threshold. If so, it tries
+   doing a left and right alignment.
+
+   2009-Mar-22 -- This version generalizes the strategy for
+   partitioning the solution by taking the *_walign function as an
+   argument
+
+   2009-May-1 -- add code to ensure that returned a_res->chain is
+   sorted by score.  One strategy is to simply always insert at the
+   appropriate place, which requires re-searching from the top each
+   time (which is not a big deal, since the list is short).  We simply
+   need another argument to nsw_malign, which is the head of the list.
+ */
+struct a_res_str *
+nsw_malign (int ***pam2p, int pam_ix, int n0,
+	    const unsigned char *aa1, int n1,
+	    int score_thresh, int max_res,
+	    int gdelval, int ggapval,
+	    struct swstr *ss,
+	    struct a_res_str *cur_ares,
+	    int (*fn_walign)
+	    (
+	     int **pam2p, int n0,
+	     const unsigned char *aa1, int n1,
+	     int q, int r,
+	     struct swstr *ss,
+	     struct a_res_str *a_res
+	     ),
+	    int do_rep
+	    )
+{
+  struct a_res_str *tmpl_ares, *tmpr_ares, *this_ares;
+  unsigned char *local_aa1;
+  int nc, score_ix;
+  int min_alen;
+  int max_sub_score = -1;
+
+  min_alen = min(MIN_LOCAL_LEN,n0);
+
+  /* now we need alignment storage - get it */
+  if ((cur_ares->res = (int *)calloc((size_t)max_res,sizeof(int)))==NULL) {
+    fprintf(stderr," *** cannot allocate alignment results array %d\n",max_res);
+    exit(1);
+  }
+
+  score_ix = 0;
+
+  cur_ares->next = NULL;
+
+  cur_ares->sw_score = (*fn_walign)(pam2p[0], n0, aa1, n1,
+				    gdelval,  ggapval,
+				    ss, cur_ares);
+
+  /* The scores in a_res->rst include low-complexity alignment.
+     Re-calculate the score of the optimal alignment using the -S matrix.
+
+     This makes sense for secondary HSP's, but not for the initial
+     HSP, because it -S score for the non-S alignment could be smaller
+     than the original optimal -S score (it cannot be higher).
+  */
+  CHECK_SCORE(cur_ares->min0, &aa1[cur_ares->min1-1],
+	      cur_ares->max0 - cur_ares->min0, cur_ares->max1-cur_ares->min1,
+	      cur_ares->res,
+	      pam2p[pam_ix], gdelval, ggapval,
+	      &nc, &cur_ares->rst.score[0]);
+
+  if (!do_rep || cur_ares->rst.score[score_ix] <= score_thresh) { return cur_ares;}
+
+  if (cur_ares->min1 >= min_alen) { /* try the left  */
+    /* allocate a_res */
+    tmpl_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
+
+    local_aa1 = (unsigned char *)calloc(cur_ares->min1+2,sizeof(unsigned char));
+    local_aa1++;
+    memcpy(local_aa1,aa1,cur_ares->min1);
+    /*
+    save_res = aa1[cur_ares->min1];
+    aa1[cur_ares->min1] = '\0';
+    */
+    tmpl_ares = nsw_malign(pam2p, pam_ix, n0, local_aa1, cur_ares->min1,
+			   score_thresh, max_res,
+			   gdelval, ggapval, ss, tmpl_ares,
+			   fn_walign, do_rep);
+    free(--local_aa1);
+    /*
+    aa1[cur_ares->min1] = save_res;
+    */
+
+    if (tmpl_ares->rst.score[score_ix] > score_thresh) {
+      max_sub_score = tmpl_ares->rst.score[score_ix];
+    }
+    else {
+      if (tmpl_ares->res) free(tmpl_ares->res);
+      free(tmpl_ares);
+      tmpl_ares=NULL;
+    }
+  }
+  else {tmpl_ares = NULL;}
+
+  if (n1 - cur_ares->max1 >= min_alen) { /* try the right  */
+    /* allocate a_res */
+    tmpr_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
+
+    /* find boundaries */
+
+    local_aa1 = (unsigned char *)calloc(n1-cur_ares->max1+2,sizeof(unsigned char));
+    local_aa1++;
+    memcpy(local_aa1,aa1+cur_ares->max1,n1-cur_ares->max1);
+    /*
+    save_res = aa1[cur_ares->max1-1];
+    aa1[cur_ares->max1-1] = '\0';
+    */
+
+    tmpr_ares = nsw_malign(pam2p, pam_ix, n0, local_aa1, n1 - cur_ares->max1,
+			   score_thresh, max_res,
+			   gdelval, ggapval, ss, tmpr_ares,
+			   fn_walign, do_rep);
+
+    free(--local_aa1);
+    /*
+    aa1[cur_ares->max1-1] = save_res;
+    */
+
+    if (tmpr_ares->rst.score[score_ix] > score_thresh) {
+      /* adjust the left boundary */
+      for (this_ares = tmpr_ares; this_ares; this_ares = this_ares->next) {
+	this_ares->min1 += cur_ares->max1;
+	this_ares->max1 += cur_ares->max1;
+      }
+
+      if (tmpr_ares->rst.score[score_ix] >= max_sub_score) {
+	max_sub_score = tmpr_ares->rst.score[score_ix];
+      }
+    }
+    else {
+      if (tmpr_ares->res) free(tmpr_ares->res);
+     free(tmpr_ares);
+      tmpr_ares = NULL;
+    }
+  }
+  else {tmpr_ares = NULL;}
+
+  /* We have checked both left and right, and better score is in max_sub_score.
+     If both scores are <= score_thresh, then forget it */
+
+  if (max_sub_score <= score_thresh) {
+    if (tmpl_ares) {
+      if (tmpl_ares->res) {free(tmpl_ares->res);}
+      free(tmpl_ares);
+    }
+    if (tmpr_ares) {
+      if (tmpr_ares->res) {free(tmpr_ares->res);}
+      free(tmpr_ares);
+    }
+    return cur_ares;
+  }
+
+  cur_ares =  merge_ares_chains(cur_ares, tmpl_ares, score_ix, "left");
+  cur_ares =  merge_ares_chains(cur_ares, tmpr_ares, score_ix, "right");
+
+  return cur_ares;
+}
+
+#define gap(k)  ((k) <= 0 ? 0 : q+r*(k))	/* k-symbol indel cost */
+
+/* Append "Delete k" op */
+#define DEL(k)				\
+{ if (*last < 0)			\
+    *last = (*sapp)[-1] -= (k);		\
+  else {				\
+    *last = (*sapp)[0] = -(k);		\
+    (*sapp)++;				\
+  }					\
+}
+
+/* Append "Insert k" op */
+#define INS(k)				\
+{ if (*last > 0)			\
+    *last = (*sapp)[-1] += (k);		\
+  else {				\
+    *last = (*sapp)[0] = (k);		\
+    (*sapp)++;				\
+  }					\
+}
+
+/* align(A,B,M,N,tb,te,last) returns the cost of an optimum conversion between
+   A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
+   and appends such a conversion to the current script.                   */
+
+static int
+nw_align(int iw,	/* beginning of alignment in pam2p profile */
+	 const unsigned char *B,	/* second sequence aa1 */
+	 int M, int N,			/* length of profile, aa1 */
+	 int tb, int te,
+	 int **w, int q, int r, 	/* pam2p profile, open, ext */
+	 struct swstr *f_ss,		/* forward, reverse row matrix */
+	 struct swstr *r_ss,
+	 int dir,			/* dir [0..3] is not currently used */
+	 int **sapp, int *last)
+{
+
+  int midi, midj, type;	/* Midpoint, type, and cost */
+  int midc;
+  int c1, c2;
+
+  register int   i, j;
+  register int c, e, d, s;
+  int qr, t, *wa;
+
+/*   print_seq_prof(A,M,B,N,w,iw); */
+
+/*  m = g + h;  */
+  qr = q + r;
+
+/* Boundary cases: M <= 1 or N == 0 */
+
+  if (N <= 0) {
+    if (M > 0) {DEL(M)}
+    return -gap(M);
+  }
+
+  if (M <= 1) {
+    if (M <= 0) {
+      INS(N)
+      return -gap(N);
+    }
+
+    if (tb < te) tb = te;
+    midc = (tb-r) - gap(N);
+    midj = 0;
+/*  wa = w[A[1]]; */
+    wa = w[iw];
+    for (j = 1; j <= N; j++) {
+      c = -gap(j-1) + wa[B[j]] - gap(N-j);
+      if (c > midc) { midc = c; midj = j;}
+    }
+    if (midj == 0) { DEL(1) INS(N) }
+    else  {
+      if (midj > 1) { INS(midj-1)}
+      *last = (*sapp)[0] = 0;
+      (*sapp)++;
+      if (midj < N) { INS(N-midj)}
+    }
+    return midc;
+  }
+
+/* Divide: Find optimum midpoint (midi,midj) of cost midc */
+
+  midi = M/2;		/* Forward phase:                          */
+  f_ss[0].H = 0;	/*   Compute H(M/2,k) & E(M/2,k) for all k */
+  f_ss[0].E = t = -q;
+  for (j = 1; j <= N; j++) {
+    f_ss[j].H = t = t-r;
+    f_ss[j].E = t-q;
+  }
+  t = tb;
+  for (i = 1; i <= midi; i++) {
+    s = f_ss[0].H;
+    f_ss[0].H = c = t = t-r;
+    e = t-q;
+/*    wa = w[A[i]]; */
+    wa = w[iw+i-1];
+    for (j = 1; j <= N; j++) {
+      if ((c =   c   - qr) > (e =   e   - r)) e = c;
+      if ((c = f_ss[j].H - qr) > (d = f_ss[j].E - r)) d = c;
+      c = s + wa[B[j]];
+      if (e > c) c = e;
+      if (d > c) c = d;
+      s = f_ss[j].H;
+      f_ss[j].H = c;
+      f_ss[j].E = d;
+    }
+  }
+  f_ss[0].E = f_ss[0].H;
+
+  r_ss[N].H = 0;		/* Reverse phase:                  */
+  t = -q;			/*   Compute R(M/2,k) & S(M/2,k) for all k */
+
+  for (j = N-1; j >= 0; j--) {
+    r_ss[j].H = t = t-r;
+    r_ss[j].E = t-q;
+  }
+
+  t = te;
+  for (i = M-1; i >= midi; i--) {
+    s = r_ss[N].H;
+    r_ss[N].H = c = t = t-r;
+    e = t-q;
+/*    wa = w[A[i+1]]; */
+    wa = w[iw+i];
+    for (j = N-1; j >= 0; j--) {
+      if ((c =   c   - qr) > (e =   e   - r)) { e = c; }
+      if ((c = r_ss[j].H - qr) > (d = r_ss[j].E - r)) { d = c; }
+      c = s + wa[B[j+1]];
+      if (e > c) c = e;
+      if (d > c) c = d;
+      s = r_ss[j].H;
+      r_ss[j].H = c;
+      r_ss[j].E = d;
+    }
+  }
+  r_ss[N].E = r_ss[N].H;
+
+  midc = f_ss[0].H+r_ss[0].H;		/* Find optimal midpoint */
+  midj = 0;
+  type = 1;
+
+  for (j = 0; j <= N; j++) {
+    if ((c = f_ss[j].H + r_ss[j].H) >= midc) {
+      if (c > midc || (f_ss[j].H != f_ss[j].E && r_ss[j].H == r_ss[j].E)) {
+	midc = c;
+	midj = j;
+      }
+    }
+  }
+
+  for (j = N; j >= 0; j--) {
+    if ((c = f_ss[j].E + r_ss[j].E + q) > midc) {
+      midc = c;
+      midj = j;
+      type = 2;
+    }
+  }
+
+/* Conquer: recursively around midpoint */
+
+  if (type == 1) {
+    c1 = nw_align(iw,B,midi,midj,tb,-q,w,q,r,f_ss, r_ss,0,sapp,last);
+    c2 = nw_align(iw+midi,B+midj,M-midi,N-midj,-q,te,w,q,r,f_ss, r_ss,1,sapp,last);
+  }
+  else {
+    nw_align(iw,B,midi-1,midj,tb,0,w,q,r,f_ss, r_ss,2,sapp,last);
+    DEL(2);
+    nw_align(iw+midi+1,B+midj,M-midi-1,N-midj,0,te,w,q,r,f_ss,r_ss,3,sapp,last);
+  }
+  return midc;
+}
+
+/* Interface and top level of comparator */
+
+int
+NW_ALIGN(int IW, const unsigned char *B,
+	 int M, int N,
+	 int **W, int G, int H, int *S, int *NC)
+{
+  struct swstr *f_ss, *r_ss;
+  int *sapp, last;
+  int c, ck, sw;
+
+  sapp = S;
+  last = 0;
+
+   if ((f_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
+       == NULL) {
+     fprintf (stderr, " *** cannot allocate f_ss array %3d\n", N+2);
+     exit (1);
+   }
+   f_ss++;
+
+   if ((r_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
+       == NULL) {
+     fprintf (stderr, " *** cannot allocate r_ss array %3d\n", N+2);
+     exit (1);
+   }
+   r_ss++;
+
+  /*   print_seq_prof(A,M,W,IW); */
+  c = nw_align(IW,B,M,N,-G,-G,W,G,H,f_ss, r_ss,0,&sapp,&last);	/* OK, do it */
+
+  ck = CHECK_SCORE(IW,B,M,N,S,W,G,H,NC, &sw);
+  if (c != ck) {
+    fprintf(stderr," *** Check_score error. %d != %d ***\n",c,ck);
+  }
+
+  f_ss--; r_ss--;
+  free(r_ss); free(f_ss);
+
+  return c;
+}
+
+/* CHECK_SCORE - return the score of the alignment stored in S */
+
+static int
+CHECK_SCORE(int iw, const unsigned char *B,
+	    int M, int N,
+	    int *S, int **w,
+	    int g, int h, int *NC, int *sw_score)
+{
+  register int   i,  j, op, nc;
+  int itmp;
+  int score;
+  int l_score, mx_l_score;
+
+  /*  print_seq_prof(A,M,w,iw); */
+
+  score = i = j = nc = l_score = mx_l_score = 0;
+#ifdef SHOW_ALIGN_SCORE
+  printf("====start\n");
+  printf("#i j pam2 score l_score mx_l_score\n");
+#endif
+  while (i < M || j < N) {
+    op = *S++;
+    if (op == 0) {
+      itmp = w[iw+i][B[++j]];
+      score += itmp;
+      i++;
+      nc++;
+      l_score += itmp;
+      if (l_score < 0) l_score = 0;
+      if (l_score > mx_l_score) mx_l_score = l_score;
+#ifdef SHOW_ALIGN_SCORE
+      printf("%d\t%d\t%d\t%d\t%d\t%d\n",i, j, itmp, score, l_score, mx_l_score);
+#endif
+    }
+    else if (op > 0) {
+      score = score - (g+op*h);
+      j += op;
+      nc += op;
+      l_score -= (g+op*h);
+      if (l_score < 0) l_score = 0;
+#ifdef SHOW_ALIGN_SCORE
+      printf("%d\t%d\t%d\t%d\t%d\t%d\n",i, j, -(g+op*h) ,score, l_score, mx_l_score);
+#endif
+    } else {
+      score = score - (g-op*h);
+      i -= op;
+      nc -= op;
+      l_score -= (g-op*h);
+      if (l_score < 0) l_score = 0;
+#ifdef SHOW_ALIGN_SCORE
+      printf("%d\t%d\t%d\t%d\t\%d\t%d\n",i, j, -(g-op*h), score, l_score, mx_l_score);
+#endif
+    }
+  }
+#ifdef SHOW_ALIGN_SCORE
+  printf("%d\t%d\tend\t%d\t%d\n====\n",i, j, score, mx_l_score);
+#endif
+  *NC = nc;
+  /* used to return mx_l_score, which is wrong when CHECK_SCORE is used for global alignments */
+#ifndef GGSEARCH
+  *sw_score = mx_l_score;
+#else
+  *sw_score = score;
+#endif
+  return score;
+}
+
diff --git a/src/work_thr2.c b/src/work_thr2.c
new file mode 100644
index 0000000..f335042
--- /dev/null
+++ b/src/work_thr2.c
@@ -0,0 +1,492 @@
+/*  $Id: work_thr2.c $ */
+
+/* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson
+   and The The Rector & Visitors of the University of Virginia */
+
+/* Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under this License is distributed on an "AS
+   IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
+   express or implied.  See the License for the specific language
+   governing permissions and limitations under the License. 
+*/
+
+/* work_thr.c - threaded worker */
+
+/* modified 21-Oct-1998 to work with reverse complement for DNA */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include "defs.h"		/* various constants */
+#include "best_stats.h"			/* defines beststr */
+#include "structs.h"
+#include "param.h"		/* pstruct rstruct */
+#include "thr_buf_structs.h"
+
+/***************************************/
+/* thread global variable declarations */
+/***************************************/
+
+#ifndef PCOMPLIB
+#define XTERNAL
+#include "thr_bufs2.h"
+#undef XTERNAL
+#else
+#include "msg.h"
+#define XTERNAL
+#include "uascii.h"
+#undef XTERNAL
+#ifdef MPI_SRC
+#include "mpi.h"
+#endif
+#endif
+
+void alloc_pam (int, int, struct pstruct *);
+int **alloc_pam2p(int **,int, int);
+void revcomp(unsigned char *seq, int n, int *c_nt);
+
+#if defined(WIN32) || !defined(THR_EXIT)
+void pthread_exit(void *);
+#define THR_EXIT pthread_exit
+#else
+void THR_EXIT(void *);
+#endif
+
+#ifdef DEBUG
+extern struct buf_head *lib_buf2_list;
+#endif
+
+/* functions getting/sending buffers to threads (thr_sub.c) */
+extern void wait_thr(void);
+extern int get_wbuf(struct buf_head **cur_buf, int max_work_buf);
+extern void put_wbuf(struct buf_head *cur_buf, int max_work_buf);
+
+/* dropxx.c functions */
+#include "drop_func.h"
+
+extern void *my_srand();
+extern unsigned int my_nrand(int, void *);
+extern void qshuffle(unsigned char *aa0, int n0, int nm0, void *);
+extern void free_pam2p(int **);
+
+void init_aa0(unsigned char **aa0, int n0, int nm0,
+	      unsigned char **aa0s, unsigned char **aa1s, 
+	      int qframe, int qshuffle_flg, int max_tot,
+	      struct pstruct *ppst, void **f_str, void **qf_str,
+	      void *my_rand_state);
+
+extern void
+buf_do_work(unsigned char **aa0, int n0, struct buf_head *lib_bhead_p,
+	    int max_frame, struct pstruct *ppst, void **f_str);
+extern void
+buf_qshuf_work(unsigned char *aa0s, int n0, struct buf_head *lib_bhead_p,
+	       int max_frame, struct pstruct *ppst, void *qf_str, int score_ix);
+extern void
+buf_shuf_work(unsigned char **aa0, int n0, unsigned char *aa1s,
+	      struct buf_head *lib_bhead_p, int max_frame, struct pstruct *ppst,
+	      void **f_str, int score_ix, void *);
+
+void
+buf_do_align(unsigned char **aa0,  int n0,
+	     struct buf_head *lib_bhead_p, 
+	     struct pstruct *ppst, const struct mngmsg *my_msp,
+	     void **f_str);
+
+#ifndef PCOMPLIB
+#define FIRSTNODE 0
+void
+work_thread (struct thr_str *work_info)
+#else
+#if defined(TFAST)
+extern void aainit(int tr_type, int debug);
+#endif
+
+int g_worker;
+
+void work_comp(int my_worker)
+#endif
+{
+  struct buf_head *cur_buf, *my_cur_buf;
+  char info_lib_range[MAX_FN];
+  unsigned char *aa1s=NULL;
+#ifndef PCOMPLIB
+
+  const struct mngmsg *my_msp;
+  int my_worker;
+#else
+#ifdef MPI_SRC
+  struct mngmsg *my_msp;
+  MPI_Status mpi_status;
+  int buf_alloc_flag = 0;
+#endif
+  struct mngmsg my_msg;
+  int int_msg_b[4];
+  struct buf2_data_s *my_buf2_data;
+  struct buf2_res_s *my_buf2_res;
+  struct buf2_ares_s *my_buf2_ares;
+  struct seq_record *my_seq_buf;
+  unsigned char *my_aa1b_buf;
+#endif
+  int i, j, npam, n0, nm0;
+  int max_work_buf, max_buf2_res, max_chain_seqs, seq_buf_size;
+  void *my_rand_state;
+
+  struct pstruct my_pst, *my_ppst;
+  unsigned char *aa0[6], *aa0s;
+  void *f_str[6], *qf_str;
+
+  my_rand_state=my_srand();
+
+#ifndef PCOMPLIB
+  my_worker = work_info->worker;
+  max_work_buf = work_info->max_work_buf;
+  wait_thr();	/* wait for start_thread predicate to drop to  0 */
+
+  my_msp = work_info->m_msp;
+#else 	/* PCOMPLIB */
+
+#ifdef DEBUG
+/*  fprintf(stderr,"%d: work_comp started\n",my_worker); */
+#endif
+  g_worker = my_worker;
+  my_msp = &my_msg;
+
+#ifdef MPI_SRC
+ pcomp_loop:
+
+  MPI_Recv(int_msg_b,4,MPI_INT,0, STARTTYPE0,MPI_COMM_WORLD,
+	   &mpi_status);
+  
+  max_work_buf = int_msg_b[0];
+  max_buf2_res = int_msg_b[1];
+  max_chain_seqs = int_msg_b[2];
+  seq_buf_size = int_msg_b[3];
+
+  /* quit the main loop with a message of 0 max_work_buf */
+  if (max_work_buf == 0) { goto pcomp_final;}
+
+  MPI_Recv((void *)my_msp,sizeof(struct mngmsg),MPI_BYTE,0,STARTTYPE1,MPI_COMM_WORLD,
+	   &mpi_status);
+
+  MPI_Recv((void *)&my_pst,(int)sizeof(struct pstruct),MPI_BYTE,0,STARTTYPE2,MPI_COMM_WORLD,
+	   &mpi_status);
+  my_ppst = &my_pst;
+
+#endif	/* MPI_SRC */
+
+  if (!buf_alloc_flag) {
+    buf_alloc_flag = 1;
+    /* must allocate buffers for data, sequences, results */
+    if ((my_cur_buf = cur_buf = (struct buf_head *)calloc(1,sizeof(struct buf_head)))==NULL) {
+      fprintf(stderr,"cannot allocate buf_head\n");
+      exit(1);
+    }
+
+    /* allocate results array */
+    if ((my_buf2_res = (struct buf2_res_s*)calloc(max_buf2_res+1,sizeof(struct buf2_res_s)))==NULL) {
+      fprintf(stderr,"cannot allocate buf2_data[%d]\n",max_buf2_res);
+      exit(1);
+    }
+    cur_buf->buf2_res = my_buf2_res;
+
+    /* allocate buffers for ares alignment encodings */
+    if ((my_buf2_ares = (struct buf2_ares_s*)calloc(max_buf2_res+1,sizeof(struct buf2_ares_s)))==NULL) {
+      fprintf(stderr,"cannot allocate buf2_data[%d]\n",max_buf2_res);
+      exit(1);
+    }
+    cur_buf->buf2_ares = my_buf2_ares;
+
+    /* allocate buffers for data */
+    if ((my_buf2_data = (struct buf2_data_s*)calloc(max_buf2_res+1,sizeof(struct buf2_data_s)))==NULL) {
+      fprintf(stderr,"cannot allocate buf2_data[%d]\n",max_buf2_res);
+      exit(1);
+    }
+    cur_buf->buf2_data = my_buf2_data;
+
+    /* also must allocate seq_records */
+    if ((my_seq_buf = 
+	 (struct seq_record *)calloc((size_t)(max_buf2_res+1), sizeof(struct seq_record)))
+        ==NULL) {
+      fprintf(stderr,"%d: cannot allocate seq_record buffer[%d]\n",my_worker,max_buf2_res+1);
+      exit(1);
+    }
+    cur_buf->buf2_data[0].seq = cur_buf->hdr.seq_b = my_seq_buf;
+
+    if ((my_aa1b_buf = (unsigned char *)calloc((size_t)(seq_buf_size+1),sizeof(unsigned char)))
+        ==NULL) {
+      fprintf(stderr,"%d: cannot allocate sequence buffer[%d]\n",my_worker, seq_buf_size);
+      exit(1);
+    }
+    else {	  /* now associate the my_aa1b_buf with cur_buf */
+      my_aa1b_buf++;
+      cur_buf->hdr.aa1b_start = cur_buf->buf2_data[0].seq->aa1b = my_aa1b_buf;
+      cur_buf->hdr.aa1b_size = seq_buf_size;
+   }
+  }
+  else {
+    cur_buf = my_cur_buf;
+    cur_buf->buf2_data = my_buf2_data;
+    cur_buf->buf2_data[0].seq = cur_buf->hdr.seq_b = my_seq_buf;
+    cur_buf->buf2_res = my_buf2_res;
+    cur_buf->buf2_ares = my_buf2_ares;
+    cur_buf->hdr.aa1b_start = cur_buf->buf2_data[0].seq->aa1b = my_aa1b_buf;
+    cur_buf->hdr.aa1b_size = seq_buf_size;
+  }
+
+#if defined(TFAST)
+    /* set up translation tables: faatran.c */
+  aainit(my_ppst->tr_type,my_ppst->debug_lib);
+#endif
+
+#endif	/* PCOMPLIB */
+
+  /* the pam allocation stuff is very different for threaded vs PCOMPLIB,
+     so the code is separate */
+#if !defined(PCOMPLIB)
+  /* make certain that all but 0 have their own copy of pst */
+  if (my_worker== 0) {
+    my_ppst=work_info->ppst;
+  }
+  else {
+    my_ppst = &my_pst;
+    memcpy(my_ppst,work_info->ppst,sizeof(struct pstruct));
+    /* #else we already have the stuff in my_pst from initialization */
+
+    my_ppst->pam2p[0] = my_ppst->pam2p[1] = NULL;
+
+    alloc_pam(MAXSQ, MAXSQ, my_ppst);
+
+    npam = my_pst.nsqx;
+
+    /* allocate local copy of pam2[][] */
+    for (i=0; i<npam; i++) {
+      for (j=0; j<npam; j++) {
+	my_pst.pam2[0][i][j] = work_info->ppst->pam2[0][i][j];
+	my_pst.pam2[1][i][j] = work_info->ppst->pam2[1][i][j];
+      }
+    }
+  }
+#endif
+#if defined(PCOMPLIB)	/* PCOMPLIB */
+  my_ppst = &my_pst;	/* for all workers */
+  alloc_pam(my_msg.pamd1,my_msg.pamd2,my_ppst);
+#ifdef MPI_SRC
+  MPI_Recv(&my_pst.pam2[0][0][0],my_msg.pamd1*my_msg.pamd2,MPI_INT,0,
+	   STARTTYPE3, MPI_COMM_WORLD,&mpi_status);
+
+  MPI_Recv(&my_pst.pam2[1][0][0],my_msg.pamd1*my_msg.pamd2,MPI_INT,0,
+	   STARTTYPE3, MPI_COMM_WORLD,&mpi_status);
+  /* no code for profiles */
+
+  /* get pascii (only for fasty/tfasty */
+  pascii = aascii;
+  MPI_Recv(pascii, sizeof(aascii), MPI_BYTE, 0, STARTTYPE4, MPI_COMM_WORLD, &mpi_status);
+#endif
+#endif
+
+  /* fill in info_lib_range */
+  if (my_worker == FIRSTNODE) {
+    /* label library size limits */
+    if (my_ppst->n1_low > 0 && my_ppst->n1_high < BIGNUM) {
+      sprintf(info_lib_range," (range: %d-%d)",my_ppst->n1_low,my_ppst->n1_high);}
+    else if (my_ppst->n1_low > 0) {
+      sprintf(info_lib_range," (range: >%d)",my_ppst->n1_low);}
+    else if (my_ppst->n1_high < BIGNUM) {
+      sprintf(info_lib_range," (range: <%d)",my_ppst->n1_high);}
+    else {
+      info_lib_range[0]='\0';
+    }
+    info_lib_range[sizeof(info_lib_range)-1]='\0';
+#ifndef PCOMPLIB
+    strncpy(work_info->info_lib_range,info_lib_range,MAX_SSTR);
+    /* this does not work on some architectures */
+    work_info->f_str_ap = &f_str[0];
+#endif
+  }
+
+#ifdef PCOMPLIB
+#ifdef MPI_SRC
+  /* send back sync message */
+  int_msg_b[0]=my_worker;
+  MPI_Send(int_msg_b,1,MPI_INT,0,MSEQTYPE0,MPI_COMM_WORLD);
+  if (my_worker == FIRSTNODE) {
+    MPI_Send(info_lib_range,MAX_FN,MPI_BYTE,0,MSEQTYPE0,MPI_COMM_WORLD);
+  }
+#endif
+#endif
+
+  /* do the aa0[] stuff after m_msg/my_pst are initialized, for later
+     inclusion in a loop */
+
+#ifdef PCOMPLIB
+#ifdef MPI_SRC
+  MPI_Recv(int_msg_b,2,MPI_INT,0,
+	   QSEQTYPE0, MPI_COMM_WORLD, &mpi_status);
+
+  n0 = int_msg_b[0];
+  nm0 = int_msg_b[1];
+#endif
+#else	/* COMP_THR */
+  n0 = my_msp->n0;
+  nm0 = my_msp->nm0;
+  if (my_worker != FIRSTNODE) {
+    /* if this is a pssm search, allocate local copy of pam2p[][]*/
+    if (work_info->ppst->pam_pssm && work_info->ppst->pam2p[0]) {
+      my_ppst->pam2p[0] = alloc_pam2p(my_ppst->pam2p[0],n0,npam);
+      my_ppst->pam2p[1] = alloc_pam2p(my_ppst->pam2p[1],n0,npam);
+
+      for (i=0; i<n0; i++) {
+	for (j=0; j < npam; j++) {
+	  my_pst.pam2p[0][i][j] = work_info->ppst->pam2p[0][i][j];
+	  my_pst.pam2p[1][i][j] = work_info->ppst->pam2p[1][i][j];
+	}
+      }
+    }
+  }
+#endif
+
+  if ((aa0[0]=(unsigned char *)calloc((size_t)n0+2+SEQ_PAD,sizeof(unsigned char)))
+      ==NULL) {
+    fprintf(stderr," cannot allocate aa00[%d] for worker %d\n",
+	    n0, my_worker);
+    exit(1);
+  }
+  *aa0[0]='\0';
+  aa0[0]++;
+
+#ifndef PCOMPLIB
+  memcpy(aa0[0],work_info->aa0,n0+1);
+#else
+#ifdef MPI_SRC
+  /* get aa0[0] from host */
+  MPI_Recv(aa0[0],n0+1,MPI_BYTE,0,
+	   QSEQTYPE1,MPI_COMM_WORLD, &mpi_status);
+
+  /* also get annotation if available */
+  if (my_msp->ann_flg && my_msp->aa0a != NULL) {
+    if ((my_msp->aa0a = (unsigned char *)calloc(my_msp->n0+2,sizeof(char)))==NULL) {
+      fprintf(stderr, "*** error -- cannot allocate annotation array\n");
+      exit(1);
+    }
+    MPI_Recv(my_msp->aa0a, (my_msp->n0+2)*sizeof(char), MPI_BYTE, 0,
+	     QSEQTYPE1, MPI_COMM_WORLD, &mpi_status);
+  }
+#endif
+#endif
+
+  init_aa0(aa0, n0, nm0, &aa0s, &aa1s,
+	   my_msp->qframe, my_msp->qshuffle, my_msp->max_tot,
+	   my_ppst,  &f_str[0], &qf_str, my_rand_state);
+
+/* **************************************************************** */
+/* main work loop */
+
+  while (get_wbuf(&cur_buf,max_work_buf)) {
+
+    if (cur_buf->hdr.stop_work) break;
+
+    /* exit thread on specific command -- this option is not used 
+       for threads - get_wbuf() stops when rbuf_done() sets reader_done==1
+       but it is used for PCOMPLIB
+    */
+
+    if (cur_buf->hdr.buf2_cnt <= 0) {	/* buffers can be empty */
+      cur_buf->hdr.have_results = 0;
+      goto res_done;
+    }
+
+    if (cur_buf->hdr.buf2_type & BUF2_DOWORK) {
+
+      buf_do_work(aa0, n0, cur_buf, my_msp->nitt1, my_ppst, f_str);
+
+      if (my_msp->qshuffle) {
+	buf_qshuf_work(aa0s, n0, cur_buf, my_msp->nitt1,
+		       my_ppst, qf_str, my_ppst->score_ix);
+      }
+    }
+
+    if (cur_buf->hdr.buf2_type & BUF2_DOSHUF) {
+      buf_shuf_work(aa0, n0, aa1s,  cur_buf, my_msp->nitt1,
+		    my_ppst, f_str, my_ppst->score_ix, my_rand_state);
+    }
+
+    /*
+    if (cur_buf->hdr.buf2_type & BUF2_DOOPT) {
+      buf_do_opt(aa0, n0, cur_buf, my_ppst, f_str);
+    }
+    */
+
+    if (cur_buf->hdr.buf2_type & BUF2_DOALIGN) {
+      buf_do_align(aa0, n0, cur_buf, my_ppst, my_msp, f_str);
+    }
+    cur_buf->hdr.have_results = 1;
+
+  res_done:
+    cur_buf->hdr.have_data = 0;
+
+    put_wbuf(cur_buf,max_work_buf);
+
+  } /* end main while */
+
+/* **************************************************************** */
+/* all done - clean-up */
+
+  close_work(aa0[0], n0, my_ppst, &f_str[0]);
+  free(aa0[0]-1);
+  if (my_msp->qframe == 2) {
+    close_work(aa0[1], n0, my_ppst, &f_str[1]);
+    free(aa0[1]-1);
+  }
+
+  if (my_msp->qshuffle) {
+    close_work(aa0s, n0, my_ppst, &qf_str);
+    free(aa0s-1);
+  }
+
+  free(aa1s-1);
+
+#ifdef PCOMPLIB
+  if (my_msp->ann_flg && my_msp->aa0a) { free(my_msp->aa0a);}
+#endif
+
+  if (my_worker) {
+    free(my_pst.pam2[1][0]);
+    free(my_pst.pam2[0][0]);
+    free(my_pst.pam2[1]);
+    free(my_pst.pam2[0]);
+  }
+
+  if (my_worker && my_pst.pam_pssm) {
+    free_pam2p(my_pst.pam2p[0]);
+    free_pam2p(my_pst.pam2p[1]);
+  }
+
+/* **************************************************************** */
+/* and exit */
+
+#ifdef DEBUG
+  /*   fprintf(stderr,"worker [%d] done\n",my_worker); */
+#endif
+
+#ifndef PCOMPLIB
+  free(my_rand_state);
+  THR_EXIT(&work_info->status);
+#else
+  /* the PCOMPLIB version loops after a search, waiting for another max_work_buf */
+  /* max_work_buf==0 signals end of queries */
+  goto pcomp_loop;
+
+ pcomp_final:
+  free(my_rand_state);
+#endif
+}  /* end work_thread */
diff --git a/test/results/README b/test/results/README
new file mode 100644
index 0000000..e8131ca
--- /dev/null
+++ b/test/results/README
@@ -0,0 +1 @@
+Placeholder file to create test/results/ directory.
diff --git a/test/test.bat b/test/test.bat
new file mode 100644
index 0000000..61fc390
--- /dev/null
+++ b/test/test.bat
@@ -0,0 +1,73 @@
+rem ""
+rem "starting fasta36_t - protein on win32"
+rem ""
+..\bin\fasta36_t -q -m 6 -Z 100000 ..\seq\mgstm1.aa:1-100 q > results\test_m1.ok2_t.html
+..\bin\fasta36_t -S -q -z 11 -O results\test_m1.ok2_t_p25 -s P250 ..\seq\mgstm1.aa:100-218 q
+rem "done"
+rem "starting fastxy36_t"
+..\bin\fastx36_t -m 9c -S -q ..\seq\mgtt2_x.seq q 1 > results\test_t2.xk1_t
+..\bin\fasty36_t -S -q ..\seq\mgtt2_x.seq q > results\test_t2.yk2_t
+..\bin\fastx36_t -m 9c -S -q -z 2 ..\seq\mgstm1.esq a > results\test_m1.xk2_tz2
+..\bin\fasty36_t -S -q -z 2 ..\seq\mgstm1.esq a > results\test_m1.yk2_tz2
+rem "done"
+rem "starting fastxy36_t rev"
+..\bin\fastx36_t -m 9c -q -m 5 ..\seq\mgstm1.rev q > results\test_m1.xk2r_t
+..\bin\fasty36_t -q -m 5 -M 200-300 -z 2 ..\seq\mgstm1.rev q > results\test_m1.yk2r_tz2
+..\bin\fasty36_t -q -m 5 -z 11 ..\seq\mgstm1.rev q > results\test_m1.yk2rz11_t
+rem "done"
+rem "starting ssearch36_t"
+..\bin\ssearch36_t -m 9c -S -z 3 -q ..\seq\mgstm1.aa  q > results\test_m1.ss_tz3
+..\bin\ssearch36_t -q -M 200-300 -z 2 -Z 100000 -s P250 ..\seq\mgstm1.aa q > results\test_m1.ss_t_p25
+rem "done"
+rem "starting prss/prfx36"
+..\bin\ssearch36_t -q -k 1000 -A ..\seq\mgstm1.aa ..\seq\xurt8c.aa  > results\test_m1.rss
+..\bin\fastx36_t -q -k 1000 -A ..\seq\mgstm1.esq ..\seq\xurt8c.aa > results\test_m1.rfx
+rem "done"
+rem "starting fasta36_t - DNA"
+..\bin\fasta36_t -S -q -z 2 ..\seq\mgstm1.seq %M 4 > results\test_m1.ok4_tz2
+..\bin\fasta36_t -S -q ..\seq\mgstm1.rev %M 4 > results\test_m1.ok4r_t
+rem "done"
+rem "starting tfastxy36_t"
+..\bin\tfastx36_t -m 9c -q -i -3 -m 6 ..\seq\mgstm1.aa %m > results\test_m1.tx2_t.html
+..\bin\tfasty36_t -q -i -3 -N 5000 ..\seq\mgstm1.aa %m > results\test_m1.ty2_t
+rem "done"
+rem "starting fastf36_t"
+..\bin\fastf36_t -q ..\seq\m1r.aa q > results\test_mf.ff_t
+..\bin\fastf36 -q ..\seq\m1r.aa q > results\test_mf.ff_s
+rem "done"
+rem "starting tfastf36_t"
+..\bin\tfastf36_t -q ..\seq\m1r.aa %m > results\test_mf.tf_tr
+rem "done"
+rem "starting fasts36_t"
+..\bin\fasts36_t -q -V '*?@' ..\seq\ngts.aa q > results\test_m1.fs1_t
+..\bin\fasts36_t -q ..\seq\ngt.aa q > results\test_m1.fs_t
+..\bin\fasts36_t -q -n ..\seq\mgstm1.nts m > results\test_m1.nfs_t
+rem "done"
+rem "starting tfasts36_t"
+..\bin\tfasts36_t -q ..\seq\n0.aa %m > results\test_m1.ts_r
+rem "done"
+rem "starting fasta36 - protein"
+..\bin\fasta36 -q -z 2 ..\seq\mgstm1.aa q 1 > results\test_m1.ok1z2
+..\bin\fasta36 -q -s P250 ..\seq\mgstm1.aa q > results\test_m1.ok2_p25 
+rem "done"
+rem "starting fastx3"
+..\bin\fastx36 -m 9c -q ..\seq\mgstm1.esq q > results\test_m1.ok2x 
+rem "done"
+rem "starting fasty3"
+..\bin\fasty36 -q ..\seq\mgstm1.esq q > results\test_m1.ok2y 
+rem "done"
+rem "starting fasta36 - DNA "
+..\bin\fasta36 -m 9c -q ..\seq\mgstm1.seq M 4 > results\test_m1.ok4 
+rem "done"
+rem "starting ssearch3"
+..\bin\ssearch36 -S -q -z 2 ..\seq\mgstm1.aa q > results\test_m1.ss_z2
+..\bin\ssearch36 -q -s P250 ..\seq\mgstm1.aa q > results\test_m1.ss_p25 
+rem "done"
+rem "starting tfastxy3"
+..\bin\tfastx36 -q ..\seq\mgstm1.aa M > results\test_m1.tx2 
+..\bin\tfasty36 -m 9c -q ..\seq\mgstm1.aa M > results\test_m1.ty2 
+rem "done"
+rem "starting fasts36"
+..\bin\fasts36 -q -V '@?*' ..\seq\ngts.aa q > results\test_m1.fs1
+..\bin\fasts36 -q ..\seq\ngt.aa q > results\test_m1.fs
+rem "done"
diff --git a/test/test.sh b/test/test.sh
new file mode 100755
index 0000000..15fd49d
--- /dev/null
+++ b/test/test.sh
@@ -0,0 +1,79 @@
+#!/bin/sh
+echo ""
+echo "STARTING FASTA36" `date` "on" `hostname`
+echo `uname -a`
+echo ""
+if [ ! -d results ]; then
+  mkdir results
+fi
+echo "starting fasta36 - protein" `date`
+../bin/fasta36 -q -m 6 -Z 100000 ../seq/mgstm1.aa:1-100 q > results/test_m1.ok2.html
+../bin/fasta36 -S -q -z 11 -O results/test_m1.ok2_p25 -s P250 ../seq/mgstm1.aa:100-218 q
+echo "done"
+echo "starting fastxy36" `date`
+../bin/fastx36 -m 9c -S -q ../seq/mgtt2_x.seq q 1 > results/test_t2.xk1
+../bin/fasty36 -S -q ../seq/mgtt2_x.seq q > results/test_t2.yk2
+../bin/fastx36 -m 9c -S -q -z 2 ../seq/mgstm1.esq a > results/test_m1.xk2z2
+../bin/fasty36 -S -q -z 2 ../seq/mgstm1.esq a > results/test_m1.yk2z2
+echo "done"
+echo "starting fastxy36 rev" `date`
+../bin/fastx36 -m 9c -q -m 5 ../seq/mgstm1.rev q > results/test_m1.xk2r
+../bin/fasty36 -q -m 5 -M 200-300 -z 2 ../seq/mgstm1.rev q > results/test_m1.yk2rz2
+../bin/fasty36 -q -m 5 -z 11 ../seq/mgstm1.rev q > results/test_m1.yk2rz11
+echo "done"
+echo "starting ssearch36" `date`
+../bin/ssearch36 -m 9c -S -z 3 -q ../seq/mgstm1.aa  q > results/test_m1.ssz3
+../bin/ssearch36 -q -M 200-300 -z 2 -Z 100000 -s P250 ../seq/mgstm1.aa q > results/test_m1.ss_p25
+echo "done"
+if [ -e ../bin/ssearch36s ]; then
+    echo "starting ssearch36s" `date`
+    ../bin/ssearch36s -m 9c -S -z 3 -q ../seq/mgstm1.aa  q > results/test_m1.sssz3
+    ../bin/ssearch36s -q -M 200-300 -z 2 -Z 100000 -s P250 ../seq/mgstm1.aa q > results/test_m1.sss_p25
+    echo "done"
+fi
+echo "starting prss36(ssearch/fastx)" `date`
+../bin/ssearch36 -q -k 1000 -a ../seq/mgstm1.aa ../seq/xurt8c.aa  > results/test_m1.rss
+../bin/fastx36 -q -k 1000 ../seq/mgstm1.esq ../seq/xurt8c.aa > results/test_m1.rfx
+echo "done"
+echo "starting ggsearch36/glsearch36" `date`
+../bin/ggsearch36 -q -m 9i -w 80 ../seq/hahu.aa q > results/test_h1.gg
+../bin/glsearch36 -q -m 9i -w 80 ../seq/hahu.aa q > results/test_h1.gl
+../bin/ggsearch36 -q ../seq/gtt1_drome.aa q > results/test_t1.gg
+../bin/glsearch36 -q ../seq/gtt1_drome.aa q > results/test_t1.gl
+echo "done"
+echo "starting fasta36 - DNA" `date`
+../bin/fasta36 -S -q ../seq/mgstm1.nt %RMB 4 > results/test_m1.ok4
+../bin/fasta36 -S -q ../seq/mgstm1.rev %RMB 4 > results/test_m1.ok4r
+echo "done"
+#echo "starting tfasta36" `date`
+#tfasta36 -q ../seq/mgstm1.aa %RMB > results/test_m1.tk2
+#echo "done"
+echo "starting tfastxy36" `date`
+../bin/tfastx36 -m 9c -q -i -3 -m 6 ../seq/mgstm1.aa %p > results/test_m1.tx2.html
+../bin/tfasty36 -q -i -3 -N 5000 ../seq/mgstm1.aa %p > results/test_m1.ty2
+echo "done"
+echo "starting fastf36" `date`
+../bin/fastf36 -q ../seq/m1r.aa q > results/test_mf.ff
+../bin/fastf36 -q ../seq/m1r.aa q > results/test_mf.ff_s
+echo "done"
+echo "starting tfastf36" `date`
+../bin/tfastf36 -q ../seq/m1r.aa %r > results/test_mf.tfr
+echo "done"
+echo "starting fasts36" `date`
+../bin/fasts36 -q -V '*?@' ../seq/ngts.aa q > results/test_m1.fs1
+../bin/fasts36 -q ../seq/ngt.aa q > results/test_m1.fs
+../bin/fasts36 -q -n ../seq/mgstm1.nts m > results/test_m1.nfs
+echo "starting fastm36" `date`
+../bin/fastm36 -q ../seq/ngts.aa q > results/test_m1.fm
+../bin/fastm36 -q -n ../seq/mgstm1.nts m > results/test_m1.nfm
+echo "done"
+echo "starting tfasts36" `date`
+../bin/tfasts36 -q ../seq/n0.aa %r > results/test_m1.ts_r
+echo "starting lalign36" `date`
+../bin/lalign36 -k 1000 -q ../seq/mchu.aa ../seq/mchu.aa > results/test_mc.lal
+../bin/lalign36 -z 3 -q ../seq/mchu.aa ../seq/mchu.aa > results/test_mc.lal_z3
+../bin/lalign36 -s BL62 -f -11 -g -1  -q ../seq/mchu.aa ../seq/mchu.aa > results/test_mc.lal_bl62
+../bin/lalign36 -k 1000 -q ../seq/mwkw.aa ../seq/mwkw.aa > results/test_mw.lal
+../bin/lalign36 -z 3 -q ../seq/mwkw.aa ../seq/mwkw.aa > results/test_mw.lal_z3
+../bin/lalign36 -s BL62 -f -11 -g -1  -q ../seq/mwkw.aa ../seq/mwkw.aa > results/test_mw.lal_bl62
+echo "FINISHED" `date`
diff --git a/test/test2.sh b/test/test2.sh
new file mode 100755
index 0000000..dc499b6
--- /dev/null
+++ b/test/test2.sh
@@ -0,0 +1,53 @@
+#!/bin/sh
+echo ""
+echo "starting FASTA36" `date` "on" `hostname`
+echo `uname -a`
+echo ""
+echo "starting fasta36 - protein" `date`
+if [ ! -d results ]; then
+ mkdir results
+fi
+../bin/fasta36 -S -z 21 -s BP62 -c O ../seq/mgstm1.aa q > results/test2o_m1.ok2_bp62
+../bin/fasta36 -S -z 21 -s BP62 ../seq/mgstm1.aa q > results/test2_m1.ok2_bp62
+../bin/fasta36 -S -z 21 -c O ../seq/mgstm1.aa q > results/test2o_m1.ok2_z21
+../bin/fasta36 -S -z 21 ../seq/mgstm1.aa q > results/test2_m1.ok2_z21
+../bin/fasta36 -S -m BB  ../seq/mgstm1.aa q > results/test2_m1.ok2mB
+../bin/fasta36 -S -m 8CC  ../seq/mgstm1.aa q > results/test2_m1.ok2m8CC
+../bin/fasta36 -S -m 8CB ../seq/mgstm1.aa q > results/test2_m1.ok2m8CB
+echo "done"
+echo "starting fastxy36" `date`
+../bin/fastx36 -m 9c -S -c O -q ../seq/mgtt2_x.seq q > results/test2o_t2.xk2m9c
+../bin/fastx36 -m 8C -S -q ../seq/mgtt2_x.seq q > results/test2_t2.xk2m8C
+../bin/fastx36 -m 8CB -S -q ../seq/mgtt2_x.seq q > results/test2_t2.xk2m8CB
+../bin/fastx36 -m BB -S -q -H ../seq/mgtt2_x.seq q > results/test2_t2.xk2mB
+../bin/fasty36 -S -c O -q ../seq/mgtt2_x.seq q > results/test2o_t2.yk2
+../bin/fasty36 -S -c O -q ../seq/mgtt2_x.seq q > results/test2_t2.yk2
+../bin/fasty36 -S -m8 -q ../seq/mgtt2_x.seq q > results/test2_t2.yk2m8
+../bin/fastx36 -m 9c -c O  -S -q -z 22 ../seq/mgstm1.esq q > results/test2o_m1.xk2m9cz22
+../bin/fastx36 -m 8C -S -q  ../seq/mgstm1.esq q > results/test2_m1.xk2m8Cz22
+../bin/fastx36 -m 8CB -S -q ../seq/mgstm1.esq q > results/test2_m1.xk2m8CBz22
+../bin/fasty36 -S -c O -q -z 21 ../seq/mgstm1.esq q > results/test2o_m1.yk2z21
+../bin/fasty36 -S -q -z 21 ../seq/mgstm1.esq q > results/test2_m1.yk2z21
+echo "done"
+echo "starting ssearch36" `date`
+../bin/ssearch36 -m 9c -S -z 22 -q ../seq/mgstm1.aa  q > results/test2_m1.ssm9cz22
+../bin/ssearch36 -m 9C -S -z 21 -q ../seq/mgstm1.aa  q > results/test2_m1.ssm9Cz21
+../bin/ssearch36 -m 8C -S -q ../seq/mgstm1.aa  q > results/test2_m1.ssm8C
+../bin/ssearch36 -m 8CB -S -q ../seq/mgstm1.aa  q > results/test2_m1.ssm8CB
+echo "done"
+echo "starting fasta36 - DNA" `date`
+../bin/fasta36 -S -q -c O -r "+2/-4" ../seq/mgstm1.nt %RMB 4 > results/test2o_m1.ok4z1r24
+../bin/fasta36 -S -q -r "+2/-4" ../seq/mgstm1.nt %RMB 4 > results/test2_m1.ok4z1r24
+../bin/fasta36 -S -q -c O ../seq/mgstm1.rev %RMB 4 > results/test2o_m1.ok4r
+../bin/fasta36 -S -q  ../seq/mgstm1.rev %RMB 4 > results/test2_m1.ok4r
+../bin/fasta36 -m BB -q  ../seq/mgstm1.rev %RMB > results/test2_m1.ok6mB
+../bin/fasta36 -m 8C -q  ../seq/mgstm1.rev %RMB > results/test2_m1.ok6m8C
+../bin/fasta36 -m 8CB -q  ../seq/mgstm1.rev %RMB > results/test2_m1.ok6m8CB
+echo "done"
+echo "starting tfastxy36" `date`
+../bin/tfastx36 -c O -m 9c -q -i -3 ../seq/mgstm1.aa %p > results/test2o_m1.tx2
+../bin/tfastx36 -m 8C -q -i -3 ../seq/mgstm1.aa %p > results/test2_m1.tx2_m8C
+../bin/tfastx36 -m 8CB -q -i -3 ../seq/mgstm1.aa %p > results/test2_m1.tx2_m8CB
+../bin/tfasty36 -c O  -q -i -3 -N 5000 ../seq/mgstm1.aa %p > results/test2o_m1.ty2
+../bin/tfasty36 -q -i -3 -N 5000 ../seq/mgstm1.aa %p > results/test2_m1.ty2
+echo "done" `date`
diff --git a/test/test2G.sh b/test/test2G.sh
new file mode 100755
index 0000000..64b6e9a
--- /dev/null
+++ b/test/test2G.sh
@@ -0,0 +1,79 @@
+#!/bin/sh
+echo ""
+echo "STARTING FASTA36" `date` "on" `hostname`
+echo `uname -a`
+echo ""
+if [ ! -d results ]; then
+  mkdir results
+fi
+echo "starting fasta36 - protein" `date`
+../bin/fasta36 -XM2G -q -m 6 -Z 100000 ../seq/mgstm1.aa:1-100 q > results/test2G_m1.ok2.html
+../bin/fasta36 -q -XM2G -S -z 11 -O results/test2G_m1.ok2_p25 -s P250 ../seq/mgstm1.aa:100-218 q
+echo "done"
+echo "starting fastxy36" `date`
+../bin/fastx36 -q -XM2G -m 9c -S ../seq/mgtt2_x.seq q 1 > results/test2G_t2.xk1
+../bin/fasty36 -q -XM2G -S ../seq/mgtt2_x.seq q > results/test2G_t2.yk2
+../bin/fastx36 -q -XM2G -m 9c -S -z 2 ../seq/mgstm1.esq a > results/test2G_m1.xk2z2
+../bin/fasty36 -q -XM2G -S -z 2 ../seq/mgstm1.esq a > results/test2G_m1.yk2z2
+echo "done"
+echo "starting fastxy36 rev" `date`
+../bin/fastx36 -q -XM2G -m 9c -m 5 ../seq/mgstm1.rev q > results/test2G_m1.xk2r
+../bin/fasty36 -q -XM2G -m 5 -M 200-300 -z 2 ../seq/mgstm1.rev q > results/test2G_m1.yk2rz2
+../bin/fasty36 -q -XM2G -m 5 -z 11 ../seq/mgstm1.rev q > results/test2G_m1.yk2rz11
+echo "done"
+echo "starting ssearch36" `date`
+../bin/ssearch36 -q -XM2G -m 9c -S -z 3 ../seq/mgstm1.aa  q > results/test2G_m1.ssz3
+../bin/ssearch36 -q -XM2G -M 200-300 -z 2 -Z 100000 -s P250 ../seq/mgstm1.aa q > results/test2G_m1.ss_p25
+echo "done"
+if [ -e ../bin/ssearch36s ]; then
+    echo "starting ssearch36s" `date`
+    ../bin/ssearch36s -q -XM2G -m 9c -S -z 3 ../seq/mgstm1.aa  q > results/test2G_m1.sssz3
+    ../bin/ssearch36s -q -XM2G -M 200-300 -z 2 -Z 100000 -s P250 ../seq/mgstm1.aa q > results/test2G_m1.sss_p25
+    echo "done"
+fi
+echo "starting prss36(ssearch/fastx)" `date`
+../bin/ssearch36 -q -XM2G -k 1000 -a ../seq/mgstm1.aa ../seq/xurt8c.aa  > results/test2G_m1.rss
+../bin/fastx36 -q -XM2G -k 1000 ../seq/mgstm1.esq ../seq/xurt8c.aa > results/test2G_m1.rfx
+echo "done"
+echo "starting ggsearch36/glsearch36" `date`
+../bin/ggsearch36 -q -XM2G -m 9i -w 80 ../seq/hahu.aa q > results/test2G_h1.gg
+../bin/glsearch36 -q -XM2G -m 9i -w 80 ../seq/hahu.aa q > results/test2G_h1.gl
+../bin/ggsearch36 -q -XM2G ../seq/gtt1_drome.aa q > results/test2G_t1.gg
+../bin/glsearch36 -q -XM2G ../seq/gtt1_drome.aa q > results/test2G_t1.gl
+echo "done"
+echo "starting fasta36 - DNA" `date`
+../bin/fasta36 -S -q -XM2G ../seq/mgstm1.nt %RMB 4 > results/test2G_m1.ok4
+../bin/fasta36 -S -q -XM2G ../seq/mgstm1.rev %RMB 4 > results/test2G_m1.ok4r
+echo "done"
+#echo "starting tfasta36" `date`
+#tfasta36 -q -XM2G ../seq/mgstm1.aa %RMB > results/test2G_m1.tk2
+#echo "done"
+echo "starting tfastxy36" `date`
+../bin/tfastx36 -m 9c -q -XM2G -i -3 -m 6 ../seq/mgstm1.aa %p > results/test2G_m1.tx2.html
+../bin/tfasty36 -q -XM2G -i -3 -N 5000 ../seq/mgstm1.aa %p > results/test2G_m1.ty2
+echo "done"
+echo "starting fastf36" `date`
+../bin/fastf36 -q -XM2G ../seq/m1r.aa q > results/test2G_mf.ff
+../bin/fastf36 -q -XM2G ../seq/m1r.aa q > results/test2G_mf.ff_s
+echo "done"
+echo "starting tfastf36" `date`
+../bin/tfastf36 -q -XM2G ../seq/m1r.aa %r > results/test2G_mf.tfr
+echo "done"
+echo "starting fasts36" `date`
+../bin/fasts36 -q -XM2G -V '*?@' ../seq/ngts.aa q > results/test2G_m1.fs1
+../bin/fasts36 -q -XM2G ../seq/ngt.aa q > results/test2G_m1.fs
+../bin/fasts36 -q -XM2G -n ../seq/mgstm1.nts m > results/test2G_m1.nfs
+echo "starting fastm36" `date`
+../bin/fastm36 -q -XM2G ../seq/ngts.aa q > results/test2G_m1.fm
+../bin/fastm36 -q -XM2G -n ../seq/mgstm1.nts m > results/test2G_m1.nfm
+echo "done"
+echo "starting tfasts36" `date`
+../bin/tfasts36 -q -XM2G ../seq/n0.aa %r > results/test2G_m1.ts_r
+echo "starting lalign36" `date`
+../bin/lalign36 -q -XM2G -k 1000 ../seq/mchu.aa ../seq/mchu.aa > results/test2G_mc.lal
+../bin/lalign36 -q -XM2G -z 3 ../seq/mchu.aa ../seq/mchu.aa > results/test2G_mc.lal_z3
+../bin/lalign36 -q -XM2G -s BL62 -f -11 -g -1  ../seq/mchu.aa ../seq/mchu.aa > results/test2G_mc.lal_bl62
+../bin/lalign36 -q -XM2G -k 1000 ../seq/mwkw.aa ../seq/mwkw.aa > results/test2G_mw.lal
+../bin/lalign36 -z 3 -q -XM2G ../seq/mwkw.aa ../seq/mwkw.aa > results/test2G_mw.lal_z3
+../bin/lalign36 -q -XM2G -s BL62 -f -11 -g -1 ../seq/mwkw.aa ../seq/mwkw.aa > results/test2G_mw.lal_bl62
+echo "FINISHED" `date`
diff --git a/test/test2V.sh b/test/test2V.sh
new file mode 100755
index 0000000..26bf7ed
--- /dev/null
+++ b/test/test2V.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+echo ""
+echo "starting FASTA36" `date` "on" `hostname`
+echo `uname -a`
+echo ""
+echo "starting fasta36 - protein" `date`
+if [ ! -d results ]; then
+ mkdir results
+fi
+../bin/fasta36 -V q\!../scripts/ann_feats_up_www2.pl -V \!../scripts/ann_feats_up_www2.pl -S -z 21 -s BP62 ../seq/gstm1_human.vaa q > results/test2V_m1.ok2_bp62
+../bin/fasta36 -V q\!../scripts/ann_feats_up_www2.pl -V \!../scripts/ann_feats_up_www2.pl -S -z 21 ../seq/gstm1_human.vaa q > results/test2V_m1.ok2_z21
+../bin/fasta36 -V q\!../scripts/ann_feats_up_www2.pl -V \!../scripts/ann_feats_up_www2.pl -S -m BB  ../seq/gstm1_human.vaa q > results/test2V_m1.ok2mB
+echo "done"
+echo "starting fastxy36" `date`
+../bin/fastx36 -V \!../scripts/ann_feats_up_www2.pl -m 9c -S -q ../seq/mgtt2_x.seq q > results/test2V_t2.xk2m9c
+../bin/fastx36 -V \!../scripts/ann_feats_up_www2.pl -m BB -S -q ../seq/mgtt2_x.seq q > results/test2V_t2.xk2mB
+../bin/fastx36 -V \!../scripts/ann_feats_up_www2.pl -m 9c -S -q -z 22 ../seq/gstm1b_human.nt q > results/test2V_m1.xk2m9cz22
+../bin/fasty36 -V \!../scripts/ann_feats_up_www2.pl -S -q -z 21 ../seq/gstm1b_human.nt q > results/test2V_m1.yk2z21
+echo "done"
+echo "starting ssearch36" `date`
+../bin/ssearch36 -V q\!../scripts/ann_pfam_www.pl -V \!../scripts/ann_pfam_www.pl -m 9c -S -z 22 -q ../seq/gstm1_human.vaa  q > results/test2V_m1.ssm9cz22
+../bin/ssearch36 -V q\!../scripts/ann_pfam_www.pl -V \!../scripts/ann_pfam_www.pl -m 9C -S -z 21 -q ../seq/gstm1_human.vaa  q > results/test2V_m1.ssm9Cz21
+../bin/ssearch36 -V q\!../scripts/ann_pfam_www.pl -V \!../scripts/ann_pfam_www.pl -m 8CC -S  -q ../seq/gstm1_human.vaa  q > results/test2V_m1.ssm8CC
+echo "done" `date`
+echo "starting ssearch36" `date`
+../bin/ggsearch36 -V q\!../scripts/ann_feats_up_www2.pl -V \!../scripts/ann_feats_up_www2.pl -m 9c -S -q ../seq/gstm1_human.vaa  q > results/test2V_m1.ggm9c
+../bin/ggsearch36 -V q\!../scripts/ann_feats_up_www2.pl -V \!../scripts/ann_feats_up_www2.pl -m 9C -S -z 21 -q ../seq/gstm1_human.vaa  q > results/test2V_m1.ggm9Cz21
+echo "done" `date`
diff --git a/test/test_mpi.pbs b/test/test_mpi.pbs
new file mode 100644
index 0000000..a09c9e1
--- /dev/null
+++ b/test/test_mpi.pbs
@@ -0,0 +1,59 @@
+#!/bin/sh
+#PBS -l nodes=4:ppn=4
+#PBS -l mem=16GB
+#PBS -l walltime=2:00:00
+
+echo ""
+echo "starting FASTA36" `date` "on" `hostname`
+echo `uname -a`
+echo ""
+if [ ! -d ${TEST_DIR}/test/mpi_results ]; then
+  mkdir ${TEST_DIR}/test/mpi_results
+fi
+echo "starting fasta36_mpi - protein" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasta36_mpi -q -m 6 -Z 100000 ${TEST_DIR}/seq/mgstm1.aa:1-100 q > ${TEST_DIR}/test/mpi_results/test_m1.ok2_mpi.html
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasta36_mpi -S -q -z 11 -O ${TEST_DIR}/test/mpi_results/test_m1.ok2_mpi_p25 -s P250 ${TEST_DIR}/seq/mgstm1.aa:100-218 q
+echo "done"
+echo "starting fastxy36_mpi" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fastx36_mpi -m 9c -S -q ${TEST_DIR}/seq/mgtt2_x.seq q 1 > ${TEST_DIR}/test/mpi_results/test_t2.xk1_mpi
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasty36_mpi -S -q ${TEST_DIR}/seq/mgtt2_x.seq q > ${TEST_DIR}/test/mpi_results/test_t2.yk2_mpi
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fastx36_mpi -m 9c -S -q -z 2 ${TEST_DIR}/seq/mgstm1.esq a > ${TEST_DIR}/test/mpi_results/test_m1.xk2_mpi_z2
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasty36_mpi -S -q -z 2 ${TEST_DIR}/seq/mgstm1.esq a > ${TEST_DIR}/test/mpi_results/test_m1.yk2_mpi_z2
+echo "done"
+echo "starting fastxy36_mpi rev" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fastx36_mpi -m 9c -q -m 5 ${TEST_DIR}/seq/mgstm1.rev q > ${TEST_DIR}/test/mpi_results/test_m1.xk2r_mpi
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasty36_mpi -q -m 5 -M 200-300 -z 2 ${TEST_DIR}/seq/mgstm1.rev q > ${TEST_DIR}/test/mpi_results/test_m1.yk2r_mpi_z2
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasty36_mpi -q -m 5 -z 11 ${TEST_DIR}/seq/mgstm1.rev q > ${TEST_DIR}/test/mpi_results/test_m1.yk2rz11_mpi
+echo "done"
+echo "starting ssearch36_mpi" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/ssearch36_mpi -m 9c -S -z 3 -q ${TEST_DIR}/seq/mgstm1.aa  q > ${TEST_DIR}/test/mpi_results/test_m1.ss_mpi_z3
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/ssearch36_mpi -q -M 200-300 -z 2 -Z 100000 -s P250 ${TEST_DIR}/seq/mgstm1.aa q > ${TEST_DIR}/test/mpi_results/test_m1.ss_mpi_p25
+echo "done"
+echo "starting ggsearch36/glsearch36" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/ggsearch36_t -q -m 9i -w 80 ${TEST_DIR}/seq/hahu.aa q > results/test_h1.gg_t
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/glsearch36_t -q -m 9i -w 80 ${TEST_DIR}/seq/hahu.aa q > results/test_h1.gl_t
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/ggsearch36_t -q ${TEST_DIR}/seq/gtt1_drome.aa q > results/test_t1.gg_t
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/glsearch36_t -q ${TEST_DIR}/seq/gtt1_drome.aa q > results/test_t1.gl_t
+echo "done"
+echo "starting fasta36_t - DNA" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasta36_t -S -q ${TEST_DIR}/seq/mgstm1.nt %R 4 > results/test_m1.ok4_t
+echo "done"
+echo "starting tfastxy36_t" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/tfastx36_t -m 9c -q -i -3 -m 6 ${TEST_DIR}/seq/mgstm1.aa %p > results/test_m1.tx2_t.html
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/tfasty36_t -q -i -3 -N 5000 ${TEST_DIR}/seq/mgstm1.aa %p > results/test_m1.ty2_t
+echo "done"
+echo "starting fastf36_t" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fastf36_t -q ${TEST_DIR}/seq/m1r.aa q > results/test_mf.ff_t
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fastf36 -q ${TEST_DIR}/seq/m1r.aa q > results/test_mf.ff_s
+echo "done"
+echo "starting tfastf36_t" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/tfastf36_t -q ${TEST_DIR}/seq/m1r.aa %r > results/test_mf.tf_tr
+echo "done"
+echo "starting fasts36_t" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasts36_t -q -V '*?@' ${TEST_DIR}/seq/ngts.aa q > results/test_m1.fs1_t
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasts36_t -q ${TEST_DIR}/seq/ngt.aa q > results/test_m1.fs_t
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasts36_t -q -n ${TEST_DIR}/seq/mgstm1.nts m > results/test_m1.nfs_t
+echo "done"
+echo "starting tfasts36_t" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/tfasts36_t -q ${TEST_DIR}/seq/n0.aa %r > results/test_m1.ts_r
+echo "done" `date`
diff --git a/test/test_mpi1.pbs b/test/test_mpi1.pbs
new file mode 100644
index 0000000..8a1b7cb
--- /dev/null
+++ b/test/test_mpi1.pbs
@@ -0,0 +1,27 @@
+#!/bin/sh
+#PBS -l nodes=4:ppn=4
+#PBS -l mem=8GB
+#PBS -l walltime=60:00
+
+echo ""
+echo "starting FASTA36" `date` "on" `hostname`
+echo `uname -a`
+echo ""
+if [ ! -d ${TEST_DIR}/test/mpi_results ]; then
+  mkdir ${TEST_DIR}/test/mpi_results
+fi
+echo "starting fasta36_mpi - protein" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasta36_mpi -q -m 9c -Z 100000 -d 10 ${TEST_DIR}/seq/prot_test.lseg q > ${TEST_DIR}/test/mpi_results/test_plib.ok2_mpi
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasta36_mpi -S -q -z 21 -s BP62 -d 10 -E 1e-6 ${TEST_DIR}/seq/prot_test.lseg q >${TEST_DIR}/test/mpi_results/test_plib.ok2_mpi_BP62
+echo "done"
+echo "starting fastxy36_mpi" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fastx36_mpi -m 9c -S -q ${TEST_DIR}/seq/gst.nlib q 1 > ${TEST_DIR}/test/mpi_results/test_nlib.xk1_mpi
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasty36_mpi -S -q ${TEST_DIR}/seq/gst.nlib q > ${TEST_DIR}/test/mpi_results/test_nlib.yk2_mpi
+echo "done"
+echo "starting ssearch36_mpi" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/ssearch36_mpi -m 9c -S -z 21 -d 10 -E 1e-6 -q ${TEST_DIR}/seq/prot_test.lseg  q > ${TEST_DIR}/test/mpi_results/test_plib.ss_mpi_z21
+echo "done"
+echo "starting ggsearch36/glsearch36" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/ggsearch36_mpi -q -m 9i -w 80 -d 5 ${TEST_DIR}/seq/prot_test.lseg q >${TEST_DIR}/test/mpi_results/test_plib.gg_mpi
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/glsearch36_mpi -q -m 9i -w 80 -d 5 ${TEST_DIR}/seq/prot_test.lseg q >${TEST_DIR}/test/mpi_results/test_plib.gl_mpi
+echo "done" `date`
diff --git a/test/test_mpi2.pbs b/test/test_mpi2.pbs
new file mode 100644
index 0000000..ae86eb8
--- /dev/null
+++ b/test/test_mpi2.pbs
@@ -0,0 +1,33 @@
+#!/bin/sh
+#PBS -l nodes=4:ppn=4
+#PBS -l mem=16GB
+#PBS -l walltime=1:00:00
+
+echo ""
+echo "starting FASTA36" `date` "on" `hostname`
+echo `uname -a`
+echo ""
+if [ ! -d ${TEST_DIR}/test/mpi_results ]; then
+  mkdir ${TEST_DIR}/test/mpi_results
+fi
+echo "starting fasta36_mpi - DNA" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasta36_mpi -S -q ${TEST_DIR}/seq/dna_test_s.nlib %R 4 >${TEST_DIR}/test/mpi_results/test_nlib.ok4_mpi
+echo "done"
+echo "starting tfastxy36_mpi" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/tfastx36_mpi -m 9c -q -i -3 ${TEST_DIR}/seq/prot_test_s.lseg %p >${TEST_DIR}/test/mpi_results/test_plib.tx2_mpi
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/tfasty36_mpi -q -i -3 -N 5000 ${TEST_DIR}/seq/prot_test_s.lseg %p >${TEST_DIR}/test/mpi_results/test_plib.ty2_mpi
+echo "done"
+echo "starting fastf36_mpi" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fastf36_mpi -q ${TEST_DIR}/seq/m1r.aa q >${TEST_DIR}/test/mpi_results/test_mf.ff_mpi
+echo "done"
+echo "starting tfastf36_mpi" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/tfastf36_mpi -q ${TEST_DIR}/seq/m1r.aa %r >${TEST_DIR}/test/mpi_results/test_mf.tf_mpir
+echo "done"
+echo "starting fasts36_mpi" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasts36_mpi -q -V '*?@' ${TEST_DIR}/seq/ngts.aa q >${TEST_DIR}/test/mpi_results/test_m1.fs1_mpi
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasts36_mpi -q ${TEST_DIR}/seq/ngt.aa q >${TEST_DIR}/test/mpi_results/test_m1.fs_mpi
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/fasts36_mpi -q -n ${TEST_DIR}/seq/mgstm1.nts m >${TEST_DIR}/test/mpi_results/test_m1.nfs_mpi
+echo "done"
+echo "starting tfasts36_mpi" `date`
+mpiexec -comm mpich2-pmi ${TEST_DIR}/bin/tfasts36_mpi -q ${TEST_DIR}/seq/n0.aa %r >${TEST_DIR}/test/mpi_results/test_m1.ts_r
+echo "done" `date`
diff --git a/test/test_s.sh b/test/test_s.sh
new file mode 100755
index 0000000..4d90841
--- /dev/null
+++ b/test/test_s.sh
@@ -0,0 +1,47 @@
+#!/bin/csh -f
+echo ""
+echo "starting fasta36 - protein" `date` "on" `hostname`
+echo `uname -a`
+echo ""
+fasta36 -q -m 6 -Z 100000 ../seq/mgstm1.aa:1-100 q > test_m1.ok2.html
+fasta36 -S -q -z 11 -O test_m1.ok2_p25 -s P250 ../seq/mgstm1.aa:100-218 q
+echo "done"
+echo "starting fastxy36" `date`
+fastx36 -m 9 -S -q ../seq/mgtt2_x.seq q > test_t2.xk2
+fasty36 -S -q ../seq/mgtt2_x.seq q > test_t2.yk2
+fastx36 -m 9 -S -q -z 2 ../seq/mgstm1.esq a > test_m1.xk2z2
+fasty36 -S -q -z 2 ../seq/mgstm1.esq a > test_m1.yk2z2
+echo "done"
+echo "starting fastxy36 rev" `date`
+fastx36 -m 9 -q -m 5 mgstm1.rev q > test_m1.xk2r
+fasty36 -q -m 5 -M 200-300 -z 2 mgstm1.rev q > test_m1.yk2rz2
+fasty36 -q -m 5 -z 11 mgstm1.rev q > test_m1.yk2rz11
+echo "done"
+echo "starting ssearch36" `date`
+ssearch36 -m 9 -S -z 3 -q mgstm1.aa  q > test_m1.ssz3
+ssearch36 -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_p25
+echo "done"
+echo "starting fasta36 - DNA" `date`
+fasta36 -q -z 2 mgstm1.seq %RMB 4 > test_m1.ok4z2
+fasta36 -q mgstm1.rev %RMB 4 > test_m1.ok4r
+echo "done"
+echo "starting tfasta36" `date`
+tfasta36 -q mgstm1.aa %RMB > test_m1.tk2
+echo "done"
+echo "starting tfastxy36" `date`
+tfastx36 -m 9 -q -i -3 -m 6 mgstm1.aa %p > test_m1.tx2.html
+tfasty36 -q -i -3 -N 5000 mgstm1.aa %p > test_m1.ty2
+echo "done"
+echo "starting fastf36" `date`
+fastf36 -q m1r.aa q > test_mf.ff_s
+echo "done"
+echo "starting tfastf36" `date`
+tfastf36 -q -E 0.0001 m1r.aa %r > test_mf.tf_r
+echo "done"
+echo "starting fasts36" `date`
+fasts36 -q n0.aa q > test_m1.fs_s
+echo "done"
+echo "starting tfasts36" `date`
+tfasts36 -q n0.aa %r > test_m1.ts_r
+echo "done"
+echo "done" `date`
diff --git a/test/test_z.sh b/test/test_z.sh
new file mode 100755
index 0000000..b4507b2
--- /dev/null
+++ b/test/test_z.sh
@@ -0,0 +1,22 @@
+#!/bin/csh -f
+echo "starting fasta36 - protein" `date`
+foreach z ( 1 2 3 11 12 21 22)
+../bin/fasta36 -q  -z $z -d 0 ../seq/mgstm1.aa a > results/test_m1_a.ok2_${z}
+../bin/fasta36 -q  -z $z -d 0 ../seq/oohu.aa a > results/test_m1_b.ok2_${z}
+../bin/fasta36 -q -S -z $z -d 0 ../seq/prio_atepa.aa a > results/test_m1_c.ok2S_${z}
+../bin/fasta36 -q -S -z $z -d 0 ../seq/h10_human.aa a > results/test_m1_d.ok2S_${z}
+../bin/fasta36 -c -1 -q  -z $z -d 0 ../seq/mgstm1.aa a > results/test_m1_a.c1_ok2_${z}
+../bin/fasta36 -c -1 -q  -z $z -d 0 ../seq/oohu.aa a > results/test_m1_b.c1_ok2_${z}
+../bin/fasta36 -c -1 -q -S -z $z -d 0 ../seq/prio_atepa.aa a > results/test_m1_c.c1_ok2S_${z}
+../bin/fasta36 -c -1 -q -S -z $z -d 0 ../seq/h10_human.aa a > results/test_m1_d.c1_ok2S_${z}
+end
+echo "done"
+echo "starting ssearch36" `date`
+foreach z ( 1 2 3 11 21 22)
+../bin/ssearch36 -q  -z $z -d 0 ../seq/mgstm1.aa a > results/test_m1_a.ssS_${z}
+../bin/ssearch36 -q  -z $z -d 0 ../seq/oohu.aa a > results/test_m1_b.ssS_${z}
+../bin/ssearch36 -q -sBL62 -d 0 -S -f -11 -z $z ../seq/prio_atepa.aa a > results/test_m1_c.ssSbl62_${z}
+../bin/ssearch36 -q -sBL62 -d 0 -S -f -11 -z $z ../seq/h10_human.aa a > results/test_m1_d.ssSbl62_${z}
+end
+echo "done"
+echo "done" `date`

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fasta3.git



More information about the debian-med-commit mailing list