[med-svn] [pcma] 08/10: New upstream version 2.0+20040626
Andreas Tille
tille at debian.org
Mon Dec 18 13:03:48 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository pcma.
commit c29ae55b33eb8e69fbd5fa45b9fd41419bf6af63
Author: Andreas Tille <tille at debian.org>
Date: Mon Dec 18 14:02:21 2017 +0100
New upstream version 2.0+20040626
---
README | 123 ++
README_clustalw | 240 +++
alcomp2.c | 3382 +++++++++++++++++++++++++++++++++
alnscore.c | 114 ++
amenu.c | 1298 +++++++++++++
blosum62.h | 47 +
calcgapcoeff.c | 497 +++++
calcprf1.c | 99 +
calcprf2.c | 73 +
calctree.c | 1264 +++++++++++++
dayhoff.h | 45 +
debian/changelog | 5 -
debian/compat | 1 -
debian/control | 28 -
debian/copyright | 14 -
debian/get-orig-source | 20 -
debian/rules | 10 -
debian/source/format | 1 -
debian/upstream/metadata | 12 -
debian/watch | 9 -
example/1aboA_ref2.dnd | 40 +
example/1aboA_ref2.fa | 30 +
example/1aboA_ref2.pcma50.aln | 36 +
example/alnlist | 4 +
example/alnlist.aln | 70 +
example/alnlist.dnd | 4 +
example/ggdef1.aln | 23 +
example/ggdef2.aln | 26 +
example/ggdef3.aln | 20 +
example/pcma_command | 7 +
gcgcheck.c | 15 +
general.h | 50 +
interface.c | 4124 +++++++++++++++++++++++++++++++++++++++++
lib_extension.c | 136 ++
lib_generation.c | 348 ++++
lsim1.c | 1258 +++++++++++++
makefile | 60 +
malign.c | 924 +++++++++
matrices.h | 852 +++++++++
new.h | 58 +
pairalign.c | 818 ++++++++
param.h | 381 ++++
pcma.c | 123 ++
pcma.h | 310 ++++
prfalign.c | 1153 ++++++++++++
prfalign1.c | 1349 ++++++++++++++
prfalign2.c | 1267 +++++++++++++
prfalignabs.c | 665 +++++++
random.c | 81 +
readmat.c | 477 +++++
sequence.c | 1705 +++++++++++++++++
showpair.c | 489 +++++
subtrees.c | 49 +
trees.c | 1618 ++++++++++++++++
util.c | 405 ++++
55 files changed, 26157 insertions(+), 100 deletions(-)
diff --git a/README b/README
new file mode 100644
index 0000000..209e615
--- /dev/null
+++ b/README
@@ -0,0 +1,123 @@
+
+ PCMA - Profile Consistency Multiple sequence Alignment
+
+
+******************************************************************************
+
+Please send bug reports, comments etc. to one of:
+ jpei at mednet.swmed.edu
+ grishin at chop.swmed.edu
+
+******************************************************************************
+
+ POLICY ON DISTRIBUTION OF PCMA
+
+PCMA has adapted codes from Clustal W, version 1.81. Clustal W,
+developed by Thompson JD , Higgins DG and Gibson TJ, is freely available to the
+user community. Commercial distributors of Clustal W must take out a
+NON-EXCLUSIVE LICENCE from the authors of Clustal W (gibson at embl-heidelberg.de,
+thompson at embl-heidelberg.de or d.higgins at ucc.ie). According to this policy,
+PCMA is free for non-commercial uses. Commercial uses are disallowed.
+
+******************************************************************************
+
+Version: 2.0
+
+What is new in version 2.0:
+
+ - pcma can now make an alignment by combining a number of input alignments.
+ - output alignment looks nicer for the N-terminal gaps.
+ - pcma is now faster due to modifications of library generation and
+ local profile-profile alignment, especially for large numbers of
+ sequences. Below is a comparison of performance on 49 large SMART
+ database alignments with sequence number between 100 and 200. PCMA
+ ave_grp_id threshold set to 50. Alignment evaluation routines are
+ available at: ftp://iole.swmed.edu/pub/PCMA/evalscore.
+
+ PCMA-v2.0 PCMA-v1.0 T-Coffee ClustalW
+ Sum-of-pairs accuracy 0.870 0.870 0.841 0.780
+ Column-score accuracy 0.263 0.258 0.246 0.210
+ Average CPU time (s) 732 1334 16284 28
+ Median CPU time (s) 311 565 15386 15
+
+
+******************************************************************************
+
+ PCMA help
+
+PCMA - Profile Consistency Multiple sequence Alignment
+
+1.A quick start
+ To align a sequence set in fasta format, use the following command:
+ pcma <target_sequences>
+ Two output files will be generated:
+ <target_sequences>.aln - A multiple sequence alignment in clustal format
+ <target_sequences>.dnd - A dendrogram in phylip format
+
+2.Usage: pcma <target_sequences> <options>
+ The first command line argument <target_sequences> should be the name of the
+ file containing FASTA format sequences. One IMPORTANT notice is that the
+ sequences should not contain gap characters in them, otherwise the results
+ might be incorrect.
+
+ Options are in the format of -optionName or -optionName=option.
+ NOTE that there should be no space(s) between "optionName", "=" and "option".
+ Although many of the original ClustalW options are supported in PCMA,
+ changes from default parameters are not recommended for most of them.
+
+ An example:
+ pcma yfp.fa -ave_grp_id=50 -outfile=yfp.pcma50.aln
+
+3.Commonly used options
+ -ave_grp_id= Threshold of PERCENTAGE sequence identity above which
+ neighboring groups are aligned by ClustalW and below which
+ neighboring groups are subject to profile consistency measure.
+ If the sequence number is very large, a decrease of the
+ threshold from the default value is recommended.
+ Range [0..100]
+ Default: -ave_grp_id=40
+
+ -outfile= Name of the output alignment.
+ If this option is not used, the output alignment will be
+ in clustal format with .aln suffix
+
+ -output= The output alignment format.
+ Default: -output=clustal
+ Other formats include gcg, phylip and pir.
+
+ -help or -options Help and options.
+
+ 4.Newly added function for PCMA
+ PCMA now supports the alignment of several alignments. In this case, the first
+ command-line parameter should be a file containing a list of file names of the
+ alignments to be aligned together. To make this format distinct from a fasta
+ format sequence file, the first line should start with a character "@". Each of
+ the other lines contains the file name of an alignment. Here, input alignments
+ CAN have gap characters (usually they do). There should NOT be any two sequences
+ with the same name in all these alignments.
+ For example, below is the content of file "alnlist":
+
+ @
+ alignment1.aln
+ alignment2.aln
+ alignment3.aln
+
+ PCMA (command: pcma alnlist) will generate a new alignment named "alnlist.aln".
+
+
+******************************************************************************
+
+ References
+
+Pei, J., Sadreyev, R., Grishin, N.V., (2003) PCMA: a program for fast
+and accurate multiple sequence alignment, Bioinformatics.19(3):427-428.
+
+Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
+sensitivity of progressive multiple sequence alignment through sequence
+weighting, positions-specific gap penalties and weight matrix choice. Nucleic
+Acids Research, 22:4673-4680.
+
+Notredame, C., Higgins, D.G., and Heringa, J. (2000). T-Coffee: A novel method
+for fast and accurate multiple sequence alignment, J Mol Biol 302, 205-17.
+
+
diff --git a/README_clustalw b/README_clustalw
new file mode 100644
index 0000000..35bc8e8
--- /dev/null
+++ b/README_clustalw
@@ -0,0 +1,240 @@
+******************************************************************************
+
+ CLUSTAL W Multiple Sequence Alignment Program
+ (version 1.8, June 1999)
+
+******************************************************************************
+
+
+Please send bug reports, comments etc. to one of:-
+ gibson at embl-heidelberg.de
+ thompson at embl-heidelberg.de
+ d.higgins at ucc.ie
+
+
+******************************************************************************
+
+ POLICY ON COMMERCIAL DISTRIBUTION OF CLUSTAL W
+
+Clustal W is freely available to the user community. However, Clustal W is
+increasingly being distributed as part of commercial sequence analysis
+packages. To help us safeguard future maintenance and development, commercial
+distributors of Clustal W must take out a NON-EXCLUSIVE LICENCE. Anyone
+wishing to commercially distribute version 1.8 of Clustal W should contact the
+authors unless they have previously taken out a licence.
+
+******************************************************************************
+
+Clustal W is written in ANSI-C and can be run on any machine with an ANSI-C
+compiler. Executables are provided for several major platforms.
+
+
+Changes since version 1.74
+--------------------------
+
+1. Some work has been done to automatically select the optimal parameters
+depending on the set of sequences to be aligned. The Gonnet series of residue
+comparison matrices are now used by default. The Blosum series remains as an
+option. The default gap extension penalty for proteins has been changed to 0.2
+(was 0.05).The 'delay divergent sequences' option has been changed to 30%
+residue identity (was 40%).
+
+2. The default parameters used when the 'Negative matrix' option is selected
+have been optimised. This option may help when the sequences to be aligned are
+not superposable over their whole lengths (e.g. in the presence of N/C terminal
+extensions).
+
+3. A bug in the calculation of phylogenetic trees for 2 sequences has been
+fixed.
+
+4. A command line option has been added to turn off the sequence weighting
+calculation.
+
+5. The phylogenetic tree calculation now ignores any ambiguity codes in the
+sequences.
+
+6. A bug in the memory access during the calculation of profiles has been
+fixed. (Thanks to Haruna Cofer at SGI).
+
+7. A bug has been fixed in the 'transition weight' option for nucleic acid
+sequences. (Thanks to Chanan Rubin at Compugen).
+
+8. An option has been added to read in a series of comparison matrices from a
+file. This option is only applicable for protein sequences. For details of the
+file format, see the on-line documentation.
+
+9. The MSF output file format has been changed. The sequence weights
+calculated by Clustal W are now included in the header.
+
+10. Two bugs in the FAST/APPROXIMATE pairwise alignments have been fixed. One
+involved the alignment of new sequences to an existing profile using the fast
+pairwise alignment option; the second was caused by changing the default
+options for the fast pairwise alignments.
+
+11. A bug in the alignment of a small number of sequences has been fixed.
+Previously a Guide Tree was not calculated for less than 4 sequences.
+
+
+Changes since version 1.6
+-------------------------
+
+1. The static arrays used by clustalw for storing the alignment data have been
+replaced by dynamically allocated memory. There is now no limit on the number
+or length of sequences which can be input.
+
+2. The alignment of DNA sequences now offers a new hard-coded matrix, as well
+as the identity matrix used previously. The new matrix is the default scoring
+matrix used by the BESTFIT program of the GCG package for the comparison of
+nucleic acid sequences. X's and N's are treated as matches to any IUB ambiguity
+symbol. All matches score 1.9; all mismatches for IUB symbols score 0.0.
+
+3. The transition weight option for aligning nucleotide sequences has been
+changed from an on/off toggle to a weight between 0 and 1. A weight of zero
+means that the transitions are scored as mismatches; a weight of 1 gives
+transitions the full match score. For distantly related DNA sequences, the
+weight should be near to zero; for closely related sequences it can be useful
+to assign a higher score.
+
+4. The RSF sequence alignment file format used by GCG Version 9 can now be
+read.
+
+5. The clustal sequence alignment file format has been changed to allow
+sequence names longer than 10 characters. The maximum length allowed is set in
+clustalw.h by the statement:
+#define MAXNAMES 10
+
+For the fasta format, the name is taken as the first string after the '>'
+character, stopping at the first white space. (Previously, the first 10
+characters were taken, replacing blanks by underscores).
+
+6. The bootstrap values written in the phylip tree file format can be assigned
+either to branches or nodes. The default is to write the values on the nodes,
+as this can be read by several commonly-used tree display programs. But note
+that this can lead to confusion if the tree is rooted and the bootstraps may
+be better attached to the internal branches: Software developers should ensure
+they can read the branch label format.
+
+7. The sequence weighting used during sequence to profile alignments has been
+changed. The tree weight is now multiplied by the percent identity of the
+new sequence compared with the most closely related sequence in the profile.
+
+8. The sequence weighting used during profile to profile alignments has been
+changed. A guide tree is now built for each profile separately and the
+sequence weights calculated from the two trees. The weights for each
+sequence are then multiplied by the percent identity of the sequence compared
+with the most closely related sequence in the opposite profile.
+
+9. The adjustment of the Gap Opening and Gap Extension Penalties for sequences
+of unequal length has been improved.
+
+10. The default order of the sequences in the output alignment file has been
+changed. Previously the default was to output the sequences in the same order
+as the input file. Now the default is to use the order in which the sequences
+were aligned (from the guide tree/dendrogram), thus automatically grouping
+closely related sequences.
+
+11. The option to 'Reset Gaps between alignments' has been switched off by
+default.
+
+12. The conservation line output in the clustal format alignment file has been
+changed. Three characters are now used:
+'*' indicates positions which have a single, fully conserved residue
+':' indicates that one of the following 'strong' groups is fully conserved:-
+ STA
+ NEQK
+ NHQK
+ NDEQ
+ QHRK
+ MILV
+ MILF
+ HY
+ FYW
+
+'.' indicates that one of the following 'weaker' groups is fully conserved:-
+ CSA
+ ATV
+ SAG
+ STNK
+ STPA
+ SGND
+ SNDEQK
+ NDEQHK
+ NEQHRK
+ FVLIM
+ HFY
+
+These are all the positively scoring groups that occur in the Gonnet Pam250
+matrix. The strong and weak groups are defined as strong score >0.5 and weak
+score =<0.5 respectively.
+
+13. A bug in the modification of the Myers and Miller alignment algorithm
+for residue-specific gap penalites has been fixed. This occasionally caused
+new gaps to be opened a few residues away from the optimal position.
+
+14. The GCG/MSF input format no longer needs the word PILEUP on the first
+line. Several versions can now be recognised:-
+ 1. The word PILEUP as the first word in the file
+ 2. The word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
+ as the first word in the file
+ 3. The characters MSF on the first line in the line, and the
+ characters .. at the end of the line.
+
+15. The standard command line separator for UNIX systems has been changed from
+'/' to '-'. ie. to give options on the command line, you now type
+
+ clustalw input.aln -gapopen=8.0
+
+instead of clustalw input.aln /gapopen=8.0
+
+
+ ATTENTION SOFTWARE DEVELOPERS!!
+ -------------------------------
+
+The CLUSTAL sequence alignment output format was modified from version 1.7:
+
+1. Names longer than 10 chars are now allowed. (The maximum is specified in
+clustalw.h by '#define MAXNAMES'.)
+
+2. The consensus line now consists of three characters: '*',':' and '.'. (Only
+the '*' and '.' were previously used.)
+
+3. An option (not the default) has been added, allowing the user to print out
+sequence numbers at the end of each line of the alignment output.
+
+4. Both RNA bases (U) and base ambiguities are now supported in nucleic acid
+sequences. In the past, all characters (upper or lower case) other than
+a,c,g,t or u were converted to N. Now the following characters are recognised
+and retained in the alignment output: ABCDGHKMNRSTUVWXY (upper or lower case).
+
+5. A Blank line inadvertently added in the version 1.6 header has been taken
+out again.
+
+ CLUSTAL REFERENCES
+ ------------------
+
+Details of algorithms, implementation and useful tips on usage of Clustal
+programs can be found in the following publications:
+
+Jeanmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998)
+Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.
+
+Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
+The ClustalX windows interface: flexible strategies for multiple sequence
+alignment aided by quality analysis tools. Nucleic Acids Research, 24:4876-4882.
+
+Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for
+multiple sequence alignments. Methods Enzymol., 266, 383-402.
+
+Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
+sensitivity of progressive multiple sequence alignment through sequence
+weighting, positions-specific gap penalties and weight matrix choice. Nucleic
+Acids Research, 22:4673-4680.
+
+Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for
+multiple sequence alignment. CABIOS 8,189-191.
+
+Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence
+alignments on a microcomputer. CABIOS 5,151-153.
+
+Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple
+sequence alignment on a microcomputer. Gene 73,237-244.
diff --git a/alcomp2.c b/alcomp2.c
new file mode 100644
index 0000000..1e7545e
--- /dev/null
+++ b/alcomp2.c
@@ -0,0 +1,3382 @@
+/*** Program alcomp2.c for comparison of two alignments by local profile-profile alignment
+***/
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <ctype.h>
+#include <string.h>
+#include <malloc.h>
+#include <stddef.h>
+#include "pcma.h"
+#include "blosum62.h"
+
+#define NR_END 1
+#define FREE_ARG char*
+
+#define SQUARE(a) ((a)*(a))
+#define NUM_METHOD 9
+#define MAX_WINDOW 20
+#define MAX_DELTASITE 20
+#define MAXSTR 100001
+#define INDI -100
+
+#define JMAX 40
+#define IA 16807
+#define IM 2147483647
+#define AM (1.0/IM)
+#define IQ 127773
+#define IR 2836
+#define NTAB 32
+#define NDIV (1+(IM-1)/NTAB)
+#define EPS 1.2e-7
+#define RNMX (1.0-EPS)
+
+#define NRANSI
+#define SWAP(a,b) temp=(a);(a)=(b);(b)=temp;
+#define M 7
+#define NSTACK 50
+
+#define LN2 0.69314718055994528623
+#define LAMB_UNG 0.3176
+
+#define NCOLMAX 100000
+#define NFILES_MAX 1500
+#define NSEQ_OUT 1
+
+
+char *digit="0123456789";
+void nrerror(char error_text[]);
+char *cvector(long nl, long nh);
+int *ivector(long nl, long nh);
+double *dvector(long nl, long nh);
+char **cmatrix(long nrl, long nrh, long ncl, long nch);
+int **imatrix(long nrl, long nrh, long ncl, long nch);
+double **dmatrix(long nrl, long nrh, long ncl, long nch);
+char **cmatrix(long nrl, long nrh, long ncl, long nch);
+double ***d3tensor(long nrl,long nrh,long ncl,long nch,long ndl,long ndh);
+
+void free_ivector(int *v, long nl, long nh);
+void free_dvector(double *v, long nl, long nh);
+void free_cvector(char *v, long nl, long nh);
+void free_dmatrix(double **m, long nrl, long nrh, long ncl, long nch);
+void free_imatrix(int **m, long nrl, long nrh, long ncl, long nch);
+void free_cmatrix(char **m, long nrl, long nrh, long ncl, long nch);
+
+int a3let2num(char *let);
+int am2num_c(int c);
+int am2num(int c);
+int am2numBZX(int c);
+
+char am2lower(char inchr);
+char am2upper(char inchr);
+
+static void *mymalloc(int size);
+char *strsave(char *str);
+char *strnsave(char *str, int l);
+static char **incbuf(int n, char **was);
+static int *incibuf(int n, int *was);
+
+void err_readali(int err_num);
+void readali(char *filename);
+static void printali_ali(char *argt, int chunk, int n1, int n2, int len, char **aname1, char **aname2,
+char **aseqGap1, char **aseqGap2, int *start1, int *start2, int *positive, int **col_score, int score);
+int **ali_char2int(char **aseq,int start_num, int start_seq);
+int **read_alignment2int(char *filename,int start_num,int start_seq);
+
+char **traceback(char **aseq_mat1, char **aseq_mat2, int n1, int n2, int start_ali1, int start_ali2, int
+end_ali1, int end_ali2, int **tracebackDir, int **flagNewGapQuery, int **flagNewGapDb, char **aseqGapTr1, char **aseqGapTr2);
+
+void **traceback_outputPos(int start_ali1, int start_ali2, int end_ali1, int end_ali2, int **tracebackDir, int **flagNewGapQuery, int **flagNewGapDb, int *apos1, int *apos2);
+
+void counter(int b);
+double effective_number(int **ali, int *marks, int n, int start, int end);
+double effective_number_nogaps(int **ali, int *marks, int n, int start, int end);
+double effective_number_nogaps_expos(int **ali, int *marks, int n, int start, int end, int pos);
+
+void **freqInt(int **ali,int nal, int alilen, int **f,int *num_gaps,int
+*effindiarr,double gap_threshold, double *p_comp);
+void **freqIntMaskGaps(int **ali,int nal, int alilen, int **f, double gap_threshold, double gapRegionMin, double *p_comp);
+
+double *overall_freq(int **ali, int startp, int endp, int *mark);
+double *overall_freq_wgt(int **ali,int startp,int endp,int *mark,double *wgt);
+double *h_weight(int **ali, int ip);
+double **h_freq(int **ali, double **f, double **hfr);
+double *entro_conv(double **f, int **ali, double *econv);
+
+double *variance_conv(double **f, int **ali, double **oaf, double *vconv);
+double *pairs_conv(double **f,int **ali,int **matrix1,int indx,double *pconv);
+
+ typedef struct _score_Vector{
+ int *noGap, *gapExists, *noGapOld, *gapExistsOld, *prevScoreGapQueryOld, *noGapStore, *gapExistsStore, *prevScoreGapQueryOldStore;
+ } score_Vector;
+
+ int *dbSequence, queryEnd, dbEnd, queryStart,dbStart;
+ int gapOpen, gapExtend, dbLength, queryLength;
+/***** double **query, **matrix; *****/
+ static int SmithWatermanScore( double **score_matrix, int queryLength, int dbLength, int gapOpen, int gapextend, int queryEnd, int dbEnd, int **tracebackDir,
+int **flagNewGapQuery, int **flagNewGapDb);
+
+static int SmithWatermanFindStart( double **score_matrix, int queryLength, int dbLength, int gapOpen, int gapExtend, int queryEnd, int dbEnd, int
+score, int queryStart, int dbStart);
+
+int score, End1, End2, Start1, Start2;
+
+int gap__open, gap__extend;
+
+int *Sequence2;
+double lambda_al, score_scale;
+
+int ScoreForTwoRows(double *subjectRow, double *queryRow);
+double ScoreForTwoRows_Model(int *cntRow1, int *cntRow2, double *pseudoCntRow1, double *pseudoCntRow2);
+
+double ScoreForTwoRows_Model2(int *cntRow1, int *cntRow2, double *pseudoCntRow1, double *pseudoCntRow2);
+double ScoreForTwoRows_Model3(int *cntRow1, int *cntRow2, double *pseudoCntRow1, double *pseudoCntRow2, double n_effColumn1, double n_effColumn2);
+double ScoreForTwoRows_Model4(int *cntRow1, int *cntRow2, double *pseudoCntRow1, double *pseudoCntRow2, double n_effColumn1, double n_effColumn2);
+double ScoreForTwoRows_Model5(int *cntRow1, int *cntRow2, double *pseudoCntRow1, double *pseudoCntRow2, double n_effColumn1, double n_effColumn2, double score_scale, double b);
+double ScoreForTwoRows_Model6(int pos1, int pos2, double score_scale, double b);
+
+double ScoreForTwoRows_smat3_21(int pos1, int pos2);
+double ScoreForTwoRows_smat3_22(int pos1, int pos2);
+double ScoreForTwoRows_smat3_23(int pos1, int pos2);
+double ScoreForTwoRows_smat3_27(int pos1, int pos2);
+double ScoreForTwoRows_smat3_28(int pos1, int pos2);
+
+double Sgap6_smat(int pos1, int pos2, double b);
+double Sgap6_smat_off(int pos1, int pos2, double b);
+double (*sgapfcn)(int pos1, int pos2, double b);
+
+double GapExtend1(int pos2, double b);
+double GapExtend1_off(int pos2, double b);
+double (*g_e1)(int pos2, double b);
+double GapExtend2(int pos1, double b);
+double GapExtend2_off(int pos1, double b);
+double (*g_e2)(int pos1, double b);
+
+int *ScoreOverColumn (int colScore, int flag1, int flag2, int flag3, int flag4, int flag5, int flag6, int *column_Score);
+
+void *ReadRef (char *inputfile);
+int CompareAlnVsReferenceAln (int *apos1, int *apos2, int *aposref1, int *aposref2, int start_ref1, int start_ref2, int end_ref1, int end_ref2 /* , int coverage1, int coverage2, int accuracy1, int accuracy2*/ );
+
+double p_dayhoff[]={0, 0.0143859, 0.0384319, 0.0352742, 0.0195027, 0.086209, 0.048466, 0.0708956, 0.0866279, 0.0193078,
+0.0832518, 0.0457631, 0.0610127, 0.0695179, 0.0390894, 0.0367281, 0.0570451, 0.0580589, 0.0244313, 0.043972, 0.0620286};
+
+double p_rbnsn[]={0.013298, 0.038556, 0.032165, 0.022425, 0.090191, 0.05142, 0.064409, 0.078047, 0.019246, 0.073772, 0.052028, 0.058413,
+0.071198, 0.044873, 0.042644, 0.05364, 0.062949, 0.021992, 0.051295, 0.057438};
+
+double **blosum, **bl_ctrl;
+double **qBlosum, *p_blsm;
+
+double smatrix[21][21];
+
+/* JP */
+extern sint debug;
+
+/* The freqs in other (Dayhoff) order
+
+
+A R N D C Q
+0.0866279 0.043972 0.0390894 0.0570451 0.0193078 0.0367281
+
+E
+0.0580589
+
+G H I L K M
+0.0832518 0.0244313 0.048466 0.086209 0.0620286 0.0195027
+
+F
+0.0384319
+
+
+P S T W Y V
+0.0457631 0.0695179 0.0610127 0.0143859 0.0352742 0.0708956
+
+A R N D C Q
+E
+G H I L K M
+F
+P S T W Y V */
+
+
+char **aname, **aname1, **aname2, **aseq, **aseq1, **aseq2;
+int nal, nal1, nal2, nalmerge, alilen, alilen1, alilen2, maxalilen,
+*astart, *astart1, *astart2, *alen;
+
+int **align_mat1, **align_mat2;
+int n_lowgaps, alilen_mat1, alilen_mat2;
+char **aseq_mat1, **aseq_mat2;
+char **aseqGapTr1, **aseqGapTr2;
+int **tracebackDir;
+int **flagNewGapQuery, **flagNewGapDb;
+int *positive, **col_score;
+int posGp, segment_len;
+
+int *apos1, *apos2, *aposref1, *aposref2;
+int start_ref1, start_ref2, end_ref1, end_ref2, reflen_nogp;
+double coverage1, coverage2, falsecov, accuracy1, accuracy2;
+
+static int **alignment1, **alignment2;
+double **u_oaf,**h_oaf;
+char *am="-WFYMLIVACGPTSNQDEHRKBZX*.wfymlivacgptsnqdehrkbzx";
+char *am3[]={
+"---",
+"TRP",
+"PHE",
+"TYR",
+"MET",
+"LEU",
+"ILE",
+"VAL",
+"ALA",
+"CYS",
+"GLY",
+"PRO",
+"THR",
+"SER",
+"ASN",
+"GLN",
+"ASP",
+"GLU",
+"HIS",
+"ARG",
+"LYS",
+"ASX",
+"GLX",
+"UNK",
+"***"
+"...",
+};
+
+double **read_aa_dmatrix(FILE *fmat);
+int **identity_imat(long n);
+void print_parameters(FILE *outfile,char *argi,char *argo,int nt,char *argt,int argb,char *args,int argm,int argf,int argc, int argw, char *argn,char *arga,double argg, char *argp,char *argd);
+
+double gammln(double xx);
+float ran1(long *idum);
+double ScoreForRow(int *cntRow, double *pseudoCntRow);
+
+double funcAl(double x,int len1,int len2, double *score_matrix_srt);
+
+double funcAaFreq(double x, double *p_comp1, double *p_comp2);
+
+double func_precise1(double x,int ntot, double *fixedPseudoCountRow);
+
+double lambdaAl1(int len1, int len2, double *score_matrix_srt);
+
+void **pseudoCounts(double **matrix, double n_eff, int len, double **pseudoCnt);
+void **neffsForEachCol_maskGapReg(int **ali, int n, int len, double effgapmax, double effgapRegionMin, double **n_effAa, double *sum_eff_let, int *maskgapRegion, int *apos_filtr, int *len_lowgaps, double *n_eff);
+
+void currdirlimits_walk (int nlow, int nhigh, void (*fcn)(char *));
+void get_Neff_andQ(char *filename);
+
+void sort(int n, double arr[]);
+
+/*double *score_matrix_srt; */
+double **score_matrix, *score_matrix_srt;
+int **newScore_mat;
+int **matrix1, **matrix2;
+
+double *ident1, *ident2;
+int **count;
+int setsize = 1000;
+int *maskgaps, *maskgaps1, *maskgaps2, *maskgapRegion, *maskgapRegion1, *maskgapRegion2;
+double **pseudoCnt1, **pseudoCnt2;
+double *p_comp1, *p_comp2;
+double n_c;
+double n_eff1, n_eff2;
+double **n_effAa1, **n_effAa2;
+double *sum_eff_let1, *sum_eff_let2;
+int *apos_filtr1, *apos_filtr2;
+
+/* int *ali1seq1, *ali1seq2; */
+
+int scoreGivenEnd, score_final;
+
+double Evalue;
+double lambda_len[] = {0.277, 2.25}; /* coefficients for linear approximation of lambda(len) and K(len) */
+double K_len[] = {0.044, 7.4};
+
+double lambda_est, K_est; /* estimated lambda_g and K_g values */
+
+double scorebylambda;
+
+double lambda_u;
+double b = 1.0;
+double f= 32.0;
+
+int **fV_RepeatOpenGapQuery, **fV_RepeatOpenGapDb;
+int **fV_DbInClosestNewGapDb, **fV_QInClosestNewGapQuery;
+
+int flag_errread = 0;
+
+char **aseq_out1, **aseq_out2;
+int *positive_out;
+int inputpos1, inputpos2, outputpos, pos_beforegap1, pos_beforegap2, step1, step2;
+
+
+static char str[MAXSTR+1];
+
+ char ARG_I1[200],ARG_I2[200],ARG_O[100],ARG_P[100],ARG_D[50],ARG_S[100],ARG_Q[100],ARG_N[50],ARG_A[50];
+ // change ARG_F from 3 to 1
+ int ARG_F=1,ARG_C=0,ARG_V=0,ARG_M=0,ARG_B=60;
+ int ARG_GO = 10, ARG_GE = 1; /* penalties for gap opening and extension */
+ int ARG_E = 0; /* switch for adjustment of gap extension penalty according to gap content in opposite column */
+ int ARG_R = 0; /* switch for reduction of column-column scores according to gap content */
+ double ARG_G=1.0, ARG_T=1.0; /* thresholds of gap content for column excision and for "gapped regions" with waiving of 1st gap__open penalty */
+ double ARG_L = LAMB_UNG; /* Ungapped lambda */
+
+extern double **sumefflet, ***neffAa, ***pscnt;
+
+void generatematrix(int **align1, int alnlength1, int nali1, int indi)
+{
+ FILE *smatrixfile,*qmatrixfile, *bl62file, *qbl62file;
+ FILE *fout, *fpdb,*matrixfile,*fpdbout,*fp,*ft;
+ int i,j,k,l,nt=0;
+ int jposnogp, jmat;
+ int fcount=0, fi=0;
+
+ double av_sumlet1, av_sumlet2;
+
+ char *sqbuf;
+ int jj;
+ int *Sequence2;
+
+ int nentry1, nentry2;
+
+ int len_out;
+
+ /* JP */
+ /*read input arguments */
+
+
+ if((ARG_F>8)||(ARG_F<0)){fprintf(stderr,"column-column score calculation method(-f): \n1, 3_21; 2, 3_22 ...., 8, 3_28\n");
+ exit(0);}
+ if((ARG_E>1)||(ARG_E<0)){fprintf(stderr,"adjustment of gap extension penalty depending on gap content in opposite column (-e): \n0, no adjustment; 1, adjust gap extension\n");
+ exit(0);}
+ if((ARG_R>1)||(ARG_R<0)){fprintf(stderr,"reduction of column-column scores according to gap content (-r): \n0, no reduction; 1, adjust scores\n");
+ exit(0);}
+ if((ARG_G>1.0)||(ARG_G<=0)){fprintf(stderr,"gap content(-g) to eliminate a column must be no more than 1 and more than 0 \n");
+ exit(0);}
+
+ /* substitution matrix: should be bits (BLOSUM62 if reading from input failed) */
+
+ for(i=0;i<=20;i++) for(j=0;j<=20;j++) smatrix[i][j] = blosum62_smatrix[i][j];
+
+
+ /* q_ij matrix: qij for BLOSUM62 if reading from input failed */
+
+ alignment1 = align1;
+ alilen1 = alnlength1;
+ nal1 = nali1;
+
+/* filter "gapped" columns, get total n_eff, n_eff for each aa in each column and mask moderately "gapped regions" */
+ apos_filtr1 = ivector(1,alilen1);
+ /*n_effAa1 = dmatrix(1,alilen1,0,20); */
+ /* sum_eff_let1 = dvector(1,alilen1); */
+ n_effAa1 = neffAa[indi];
+ sum_eff_let1 = sumefflet[indi];
+ maskgapRegion1 = ivector(0,alilen1+1);
+ /*fprintf(stdout, "***********\n");
+ for(i=1;i<=nal1;i++) {
+ for(j=1;j<=alilen1;j++) {
+ fprintf(stdout, "%d ", alignment1[i][j]);
+ }
+ fprintf(stdout, "\n");
+ }*/
+ neffsForEachCol_maskGapReg(alignment1, nal1, alilen1, ARG_G, ARG_T, n_effAa1, sum_eff_let1, maskgapRegion1, apos_filtr1, &alilen_mat1, &n_eff1);
+
+ //for(i=1;i<=alilen1;i++) fprintf(stdout, "%f\n", sum_eff_let1[i]);
+
+/* Calculate target frequencies */
+ /* pseudoCnt1 = dmatrix(1,alilen_mat1, 0, 20); */
+ pseudoCnt1 = pscnt[indi];
+ pseudoCounts(n_effAa1, n_eff1, alilen_mat1, pseudoCnt1);
+
+ fprintf(stdout,".");
+ if(debug>4) fprintf(stdout, "++++++++\n");fflush(stdout);
+
+/* Choose column-to-column score formula to use */
+
+ return;
+
+}
+
+/*int main(int argc, char *argv[]) */
+void prfprfmatrix(int indi1, int indi2, int alnlength1, int alnlength2, int nali1, int nali2, double **prfprfmat)
+{
+ FILE *smatrixfile,*qmatrixfile, *bl62file, *qbl62file;
+ FILE *fout, *fpdb,*matrixfile,*fpdbout,*fp,*ft;
+ int i,j,k,l,nt=0;
+ int jposnogp, jmat;
+ int fcount=0, fi=0;
+
+ double av_sumlet1, av_sumlet2;
+
+ char *sqbuf;
+ int jj;
+ int *Sequence2;
+
+ char *bl62qijLocation = "blosum62.qij";
+ char *bl62Location = "blosum62.sij";
+
+ double (*scorefcn)(int pos1, int pos2);
+
+ int nentry1, nentry2;
+
+ int len_out;
+
+ /* JP */
+ double **score_matrix;
+ double miniscore;
+ static int int_scale=100;
+
+ /*read input arguments */
+ strcpy(ARG_S, bl62Location);
+ strcpy(ARG_Q, bl62qijLocation);
+
+
+ if((ARG_F>8)||(ARG_F<0)){fprintf(stderr,"column-column score calculation method(-f): \n1, 3_21; 2, 3_22 ...., 8, 3_28\n");
+ exit(0);}
+ if((ARG_E>1)||(ARG_E<0)){fprintf(stderr,"adjustment of gap extension penalty depending on gap content in opposite column (-e): \n0, no adjustment; 1, adjust gap extension\n");
+ exit(0);}
+ if((ARG_R>1)||(ARG_R<0)){fprintf(stderr,"reduction of column-column scores according to gap content (-r): \n0, no reduction; 1, adjust scores\n");
+ exit(0);}
+ if((ARG_G>1.0)||(ARG_G<=0)){fprintf(stderr,"gap content(-g) to eliminate a column must be no more than 1 and more than 0 \n");
+ exit(0);}
+
+ gap__open = f*ARG_GO;
+ gap__extend = f*ARG_GE;
+ lambda_u = ARG_L/f;
+ /* substitution matrix: should be bits (BLOSUM62 if reading from input failed) */
+
+ for(i=0;i<=20;i++) for(j=0;j<=20;j++) smatrix[i][j] = blosum62_smatrix[i][j];
+
+
+ for(i=0;i<=20;i++) {
+ for(j=0;j<=20;j++) {
+ smatrix[i][j] *= LN2;
+ }
+ }
+
+
+ /* q_ij matrix: qij for BLOSUM62 if reading from input failed */
+
+ alilen1 = alnlength1;
+ alilen2 = alnlength2;
+ nal1 = nali1;
+ nal2 = nali2;
+ if(debug>0) fprintf(stdout, "alcomp2: %d %d %d %d \n", alilen1, alilen2, nal1, nal2); fflush(stdout);
+
+/* check switch for optional modes of comparison:
+ 1st seq VS full ali; full ali VS 1st seq; 1st seq VS 1st seq
+*/
+ switch (ARG_V) {
+ case 1:
+ nal1 = 1; break;
+ case 2:
+ nal2 = 1; break;
+ case 3:
+ nal1 =1; nal2= 1; break;
+ }
+
+/* filter "gapped" columns, get total n_eff, n_eff for each aa in each column and mask moderately "gapped regions" */
+ apos_filtr1 = ivector(1,alilen1);
+ maskgapRegion1 = ivector(0,alilen1+1);
+ apos_filtr2 = ivector(1,alilen2);
+ maskgapRegion2 = ivector(0,alilen2+1);
+
+
+/* Calculate target frequencies */
+
+ n_effAa1 = neffAa[indi1];
+ n_effAa2 = neffAa[indi2];
+ sum_eff_let1 = sumefflet[indi1];
+
+ //fprintf(stdout, "=======\n"); fflush(stdout);
+ //for(i=1;i<=alilen1;i++) fprintf(stdout, "sum_eff_let1: %d %f\n", i, sum_eff_let1[i]);
+ sum_eff_let2 = sumefflet[indi2];
+ //for(i=1;i<=alilen2;i++) fprintf(stdout, "sum_eff_let2: %d %f\n", i, sum_eff_let2[i]);
+ pseudoCnt1 = pscnt[indi1];
+ pseudoCnt2 = pscnt[indi2];
+
+/* Choose column-to-column score formula to use */
+
+ switch (ARG_F) {
+ case 1:
+ scorefcn = ScoreForTwoRows_smat3_21; break;
+ case 2:
+ scorefcn = ScoreForTwoRows_smat3_22; break;
+ case 3:
+ scorefcn = ScoreForTwoRows_smat3_23; break;
+ case 7:
+ scorefcn = ScoreForTwoRows_smat3_27; break;
+ case 8:
+ scorefcn = ScoreForTwoRows_smat3_28; break;
+ default:
+ scorefcn = ScoreForTwoRows_smat3_23;
+ }
+
+
+/* compute matrix of scores for column pairs, correct them with lambda */
+
+ score_matrix = prfprfmat;
+ score_matrix_srt = dvector(1,alilen1*alilen2);
+
+
+
+ k=1;
+ //fprintf(stdout, "alilens: %d %d \n", alilen1, alilen2);
+ for (i=1; i<=alilen1; i++){
+ for (j=1;j<=alilen2;j++){
+ score_matrix[i][j] = score_matrix_srt[k] = scorefcn(i, j);
+ if(debug>4)fprintf(stdout, "%6.1f", score_matrix[i][j]);
+ fflush(stdout);
+ k++;
+ }
+ if(debug>0)fprintf(stdout, "*\n");
+ }
+
+ //lambda_al = lambdaAl1(alilen_mat1,alilen_mat2,score_matrix_srt);
+ //score_scale = lambda_al/ lambda_u;
+ //fprintf(stdout, "score_scale: %f\n", score_scale);
+ score_scale = 1;
+
+ miniscore = 100000;
+ for(i=1; i<=alilen1; i++){
+ for (j=1;j<=alilen2;j++){
+ score_matrix[i][j] = score_matrix[i][j]*score_scale;
+ if(miniscore > score_matrix[i][j]) miniscore = score_matrix[i][j];
+ /*fprintf(stdout, "%5d%c%c", (int)(score_matrix[i][j]/30), am[alignment1[1][i]], am[alignment2[1][j]]); */
+ }
+ /*fprintf(stdout, "*\n"); */
+ }
+ if(debug>0) fprintf(stdout, "score_scale: %8.5f; lambda_al: %8.5f; lambda_u: %8.5f\n", score_scale, lambda_al, lambda_u);
+
+ free_dvector(score_matrix_srt,1,alilen1*alilen2);
+ free_ivector(apos_filtr1, 1,alilen1);
+ free_ivector(apos_filtr2, 1,alilen2);
+ free_ivector(maskgapRegion1, 0,alilen1+1);
+ free_ivector(maskgapRegion2, 0,alilen2+1);
+
+ /*exit(0);*/
+ return;
+
+}
+
+
+/* from given alignment with aa as numbers, computes effective aa counts (PSIC->our formula)
+and marks the columns with EFFECTIVE content of gaps > threshold (effgapmax) */
+
+void **neffsForEachCol_maskGapReg(int **ali, int n, int len, double effgapmax, double effgapRegionMin, double **n_effAa, double *sum_eff_let, int *maskgapRegion, int *apos_filtr, int *len_lowgaps, double *nef)
+{
+ int i,j,k,l;
+ int alilen_mat, nsymbols_col, nsymbols;
+ double nef_loc;
+ int ele;
+ double *effnu;
+ double sum_let;
+ int *mark;
+ int flagmark;
+
+ if(debug>1) fprintf(stderr,".");
+
+ effnu = dvector(0,20);
+ mark = ivector(0,n+10);
+ alilen_mat = 0;
+ nsymbols = 0;
+ for(j=1;j<=len;j++) {
+
+ nsymbols_col = 0;
+ sum_let=0;
+
+ for(k=0;k<=20;++k){
+/* Mark sequences that have amino acid k (or gap, k=0) in this jth position */
+ flagmark =0;
+ for(i=1;i<=n;++i){
+ mark[i]=0;
+
+ ele=ali[i][j];
+ if(ele==k){mark[i]=1; flagmark =1;}
+ ele=ali[i][j]-25;
+ if(ele==k) {mark[i]=1; flagmark =1;}
+ }
+
+/* If aa k (or gap) is present in this position call compute k-th effective count */
+ if (flagmark == 1) {
+
+ effnu[k]=effective_number_nogaps(ali,mark,n,1,len);
+ nsymbols_col++;
+
+ } else { effnu[k] = 0.0; }
+
+ if (k>0) sum_let += effnu[k];
+ }
+
+
+ if ( sum_let > 0 && 1.0*effnu[0]/(sum_let + effnu[0]) < effgapmax ) {
+ alilen_mat++;
+ for (k=0; k<=20; k++) {
+ n_effAa[alilen_mat][k] = effnu[k];
+ }
+ sum_eff_let[alilen_mat] = sum_let;
+ apos_filtr[alilen_mat] = j;
+ nsymbols += nsymbols_col;
+
+ if(1.0*effnu[0]/(sum_let + effnu[0]) < effgapRegionMin) {
+ maskgapRegion[alilen_mat] = 0;
+ } else {
+ maskgapRegion[alilen_mat] = 1;
+ }
+
+ }
+
+
+ }
+
+
+ nef_loc = 1.0*nsymbols/alilen_mat;
+ *nef = nef_loc;
+ *len_lowgaps = alilen_mat;
+
+ maskgapRegion[0] = maskgapRegion[alilen_mat+1] = 0;
+
+ free_dvector(effnu,0,20);
+ free_ivector(mark,0,n+10);
+
+
+}
+
+
+void **pseudoCounts(double **matrix, double n_eff, int len, double **pseudoCnt)
+{
+ int i,j,k;
+ double *f, *g;
+ double sumN;
+ double alpha, beta;
+
+ alpha = n_eff-1;
+ beta = 10.0;
+
+ f = dvector(0,20);
+ g = dvector(0,20);
+ for (i=1;i<=len;i++) {
+ sumN = 0;
+ for (j=1;j<=20;j++) sumN += matrix[i][j];
+ for (j=1;j<=20;j++) {
+ f[j] = 1.0*matrix[i][j]/sumN;
+ }
+ for (j=1;j<=20;j++) {
+ g[j] = 0;
+
+ /*** for (k=1;k<=20;k++) g[j]+= qmatrix[j][k]*f[k]/p_dayhoff[k-1]; ***/
+
+ for (k=1;k<=20;k++) g[j]+= qmatrix[j][k]*f[k]/p_rbnsn[k-1];
+
+ pseudoCnt[i][j]= (alpha*f[j] + beta*g[j])/(alpha+beta);
+
+ /** fprintf (stderr, "f[%d] = %e g[%d] = %e pseudoCount = %e", j, f[j], j, g[j], pseudoCnt[i][j]); **/
+
+ }
+ }
+
+}
+
+/** Lambda finding - in version3_21, from all-to-all column comparisons (using funcAl()) ***/
+double lambdaAl1(int len1, int len2, double *score_matrix_srt)
+{
+ int i,j,k;
+ double dx,f,fmid,xmid,rtb;
+ double x1, x2, xacc, scale;
+
+/*** Sorting score_matrix_srt[] for the further summation of exp ***/
+ sort(len1*len2, score_matrix_srt);
+
+ x1=1e-6;
+ x2=1.0;
+ xacc = 1e-10;
+
+ f=funcAl(x1,len1,len2,score_matrix_srt);
+ fmid=funcAl(x2,len1,len2,score_matrix_srt);
+
+/***
+ f=funcAaFreq(x1,p_comp1, p_comp2);
+ fmid=funcAaFreq(x2,p_comp1, p_comp2);
+***/
+
+ if (f*fmid >= 0.0) nrerror("Root must be bracketed for bisection in rtbis");
+ rtb = f < 0.0 ? (dx=x2-x1,x1) : (dx=x1-x2,x2);
+ for (j=1;j<=JMAX;j++) {
+
+ fmid=funcAl(xmid=rtb+(dx *= 0.5),len1,len2,score_matrix_srt);
+
+/*** fmid=funcAaFreq(xmid=rtb+(dx *= 0.5),p_comp1, p_comp2); ***/
+
+/*** fmid=func_precise1(xmid=rtb+(dx *= 0.5),n,qFixed); ***/
+
+
+ if (fmid <= 0.0) rtb=xmid;
+ if (fabs(dx) < xacc || fmid == 0.0) return rtb;
+ }
+ nrerror("Too many bisections in rtbis");
+ return 0.0;
+
+}
+
+/** Calculation of the function: funcAl(lambda)=0 **/
+
+/** Calculation of func based on all column combinations fom two alignments **/
+
+double funcAl(double x, int len1, int len2, double *score_matrix_srt) {
+ int i,j,k;
+/*** double probln, prob, sumProb; ***/
+ double f, ffin;
+/*** int *countRow1, *countRow2;
+ double *psCntRow1, *psCntRow2;
+***/
+
+/* fprintf (stderr, "Entered func...\n"); */
+
+ f=0.0;
+ for (i=1; i<=len1*len2; i++) {
+
+/* fprintf (stderr, "i=%d\n",i); */
+
+/*** f+= prob*exp(x*ScoreForRow(countRow, fixedPseudoCountRow)); ***/
+
+ f += exp(x*score_matrix_srt[i]);
+
+/*** fprintf (stderr, "%d:%d->%e ", i,j,ScoreForTwoRows_Model6(i, j, 1.0, 0.0)); ***/
+
+/*** if (f/f!=1.0) {
+ fprintf (stderr, "pos1=%d pos2=%d::%e -> %e \n",i, j, ScoreForTwoRows_Model6(i,j, 1.0, 0.0), f);
+ exit(0);
+ }
+***/
+ }
+
+ //fprintf (stderr, "f = %e\n", f);
+
+ ffin = f/(len1*len2) - 1.0;
+ return ffin;
+}
+
+
+double funcAaFreq(double x,double *p_comp1, double *p_comp2)
+{
+ int i,j,k, sumPrevN;
+ double prob1, prob2;
+ double f, ffin;
+ int *countRow;
+
+/* fprintf (stderr, "Entered func...\n"); */
+ f=0.0;
+
+ for (i=1; i<=20; i++) {
+ for (j=1; j<=20; j++) {
+
+ prob1 = p_comp1[i];
+ prob2 = p_comp2[j];
+
+/* f+= prob1*prob2*exp(x*smatrix[i][j]); */
+
+ f+= prob1*prob2*exp(x*smatrix[i][j]/lambda_u);
+
+/* fprintf (stderr, "f = %e\n", f); */
+
+ }
+ }
+ ffin = f - 1;
+ return ffin;
+}
+
+
+
+
+
+
+double ScoreForRow(int *cntRow, double *pseudoCntRow) {
+ int i,j;
+ double s1, s2, s3;
+ double sc, score;
+
+ sc=0;
+ for (i=1;i<=20;i++) {
+/* fprintf (stderr, "cntRow[%d]=%d pseudoCntRow[%d]=%e p_dayhoff[%d] = %e\n",i, cntRow[i], i,
+pseudoCntRow[i], i, p_dayhoff[i]);
+*/
+ s1 = pseudoCntRow[i]/p_dayhoff[i];
+ s2 = log(s1);
+ s3=cntRow[i]*log(pseudoCntRow[i]/p_dayhoff[i]);
+/*** sc+=cntRow[i]*log(pseudoCntRow[i]/p_dayhoff[i-1]); ***/
+
+ sc+=cntRow[i]*log(pseudoCntRow[i]/p_rbnsn[i-1]);
+
+/** fprintf (stderr, "s1= %e s2= %e s3= %e sc_%d = %e", s1, s2, s3, i, sc); **/
+
+
+ }
+
+/* fprintf (stderr, " %e ", s); */
+ score = sc;
+ return score;
+}
+
+
+
+
+float ran1(long *idum)
+{
+ int j;
+ long k;
+ static long iy=0;
+ static long iv[NTAB];
+ float temp;
+
+ if (*idum <= 0 || !iy) {
+ if (-(*idum) < 1) *idum=1;
+ else *idum = -(*idum);
+ for (j=NTAB+7;j>=0;j--) {
+ k=(*idum)/IQ;
+ *idum=IA*(*idum-k*IQ)-IR*k;
+ if (*idum < 0) *idum += IM;
+ if (j < NTAB) iv[j] = *idum;
+ }
+ iy=iv[0];
+ }
+ k=(*idum)/IQ;
+ *idum=IA*(*idum-k*IQ)-IR*k;
+ if (*idum < 0) *idum += IM;
+ j=iy/NDIV;
+ iy=iv[j];
+ iv[j] = *idum;
+ if ((temp=AM*iy) > RNMX) return RNMX;
+ else return temp;
+}
+
+double gammln(double xx)
+{
+ double x,y,tmp,ser;
+ static double cof[6]={76.18009172947146,-86.50532032941677,
+ 24.01409824083091,-1.231739572450155,
+ 0.1208650973866179e-2,-0.5395239384953e-5};
+ int j;
+
+ y=x=xx;
+ tmp=x+5.5;
+ tmp -= (x+0.5)*log(tmp);
+ ser=1.000000000190015;
+ for (j=0;j<=5;j++) ser += cof[j]/++y;
+ return -tmp+log(2.5066282746310005*ser/x);
+}
+
+double **read_aa_dmatrix(FILE *fmat){
+
+/* read matrix from file *fmat */
+
+int i,ncol,ri,rj,c,flag,j;
+int col[31],row[31];
+char stri[31];
+double t;
+double **mat;
+
+mat=dmatrix(0,25,0,25);
+for(i=0;i<=25;++i)for(j=0;j<=25;++j)mat[i][j]=0.0;
+
+ncol=0;
+i=0;
+ri=0;
+rj=0;
+flag=0;
+
+while( (c=getc(fmat)) != EOF){
+
+if(flag==0 && c=='#'){flag=-1;continue;}
+else if(flag==-1 && c=='\n'){flag=0;continue;}
+else if(flag==-1){continue;}
+else if(flag==0 && c==' '){flag=1;continue;}
+else if(flag==1 && c=='\n'){flag=0;continue;}
+else if(flag==1 && c==' '){continue;}
+else if(flag==1){
+ ++ncol;
+ if(ncol>=25){nrerror("matrix has more than 24 columns: FATAL");exit(0);}
+ col[ncol]=am2numBZX(c);
+ continue;
+ }
+else if(flag==0 && c!=' ' && c!='#'){
+ ri=0;
+ ++rj;
+ if(rj>=25){nrerror("matrix has more than 24 rows: FATAL");exit(0);}
+ row[rj]=col[rj];
+ for(i=0;i<=30;++i){stri[i]=' ';}
+ stri[0]=c;
+ j=0;
+ flag=3;
+ continue;
+ }
+else if (flag==2 && c==' '){for(i=0;i<=30;++i){stri[i]=' ';}j=0;continue;}
+else if (flag==2 && c=='\n'){flag=0;continue;}
+else if (flag==2){flag=3;stri[j]=c;if(j>30){nrerror("string too long:FATAL");exit(0);}continue;}
+else if (flag==3 && c==' ' || flag==3 && c=='\n'){
+ j=0;
+ ++ri;
+ t=atof(stri);
+ mat[row[rj]][col[ri]]=t;
+ if (c=='\n')flag=0;else flag=2;
+ continue;
+ }
+else if (flag==3){stri[++j]=c;continue;}
+
+}
+
+for(i=1;i<=ncol;i++) {
+ for(j=i+1;j<=ncol;j++) {
+/* fprintf (stderr, "col[%d]=%d col[%d]=%d ", i,col[i],j,col[j]);*/
+ mat[col[i]][col[j]]=mat[col[j]][col[i]];
+ }
+ }
+
+for(i=1;i<=ncol;i++) {
+ for(j=1;j<=ncol;j++) {
+ fprintf(stdout, "%7.4f,", mat[i][j]);
+ }
+ fprintf(stdout, "\n");
+}
+fprintf(stdout, "\n");
+return mat;
+}
+
+int **identity_imat(long n){
+/* allocates square integer identity matrix of length n+1: m[0.n][0.n] */
+
+int i,j;
+int **m;
+m=imatrix(0,n,0,n);
+for(i=0;i<=n;++i)for(j=0;j<=n;++j){if(i==j)m[i][j]=1;else m[i][j]=0;}
+return m;
+}
+
+void nrerror(char error_text[]){
+fprintf(stderr,"%s\n",error_text);
+fprintf(stderr,"FATAL - execution terminated\n");
+exit(1);
+}
+
+
+char *cvector(long nl, long nh){
+char *v;
+v=(char *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(int)));
+if (!v) nrerror("allocation failure in ivector()");
+return v-nl+NR_END;
+}
+
+
+int *ivector(long nl, long nh){
+int *v;
+v=(int *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(int)));
+if (!v) nrerror("allocation failure in ivector()");
+return v-nl+NR_END;
+}
+
+/**** DUMP IN FAVOR OF NRUTIL.H ****/
+
+long *lvector(long nl, long nh){
+long int *v;
+v=(long int *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(long int)));
+if (!v) nrerror("allocation failure in lvector()");
+return v-nl+NR_END;
+}
+
+double *dvector(long nl, long nh){
+double *v;
+v=(double *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(double)));
+if (!v) nrerror("allocation failure in dvector()");
+return v-nl+NR_END;
+}
+
+char **cmatrix(long nrl, long nrh, long ncl, long nch){
+long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
+char **m;
+m=(char **)malloc((size_t)((nrow+NR_END)*sizeof(char*)));
+if (!m) nrerror("allocation failure 1 in cmatrix()");
+m += NR_END;
+m -= nrl;
+
+m[nrl]=(char *)malloc((size_t)((nrow*ncol+NR_END)*sizeof(char)));
+if (!m[nrl]) nrerror("allocation failure 2 in cmatrix()");
+m[nrl] += NR_END;
+m[nrl] -= ncl;
+
+for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;
+
+return m;
+
+}
+
+int **imatrix(long nrl, long nrh, long ncl, long nch){
+long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
+int **m;
+m=(int **)malloc((size_t)((nrow+NR_END)*sizeof(int*)));
+if (!m) nrerror("allocation failure 1 in imatrix()");
+m += NR_END;
+m -= nrl;
+
+m[nrl]=(int *)malloc((size_t)((nrow*ncol+NR_END)*sizeof(int)));
+if (!m[nrl]) nrerror("allocation failure 2 in imatrix()");
+m[nrl] += NR_END;
+m[nrl] -= ncl;
+
+for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;
+
+return m;
+
+}
+
+double **dmatrix(long nrl, long nrh, long ncl, long nch){
+long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
+double **m;
+m=(double **)malloc((size_t)((nrow+NR_END)*sizeof(double*)));
+if (!m) nrerror("allocation failure 1 in dmatrix()");
+m += NR_END;
+m -= nrl;
+
+m[nrl]=(double *)malloc((size_t)((nrow*ncol+NR_END)*sizeof(double)));
+if (!m[nrl]) nrerror("allocation failure 2 in dmatrix()");
+m[nrl] += NR_END;
+m[nrl] -= ncl;
+
+for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;
+
+return m;
+}
+
+
+double ***d3tensor(long nrl,long nrh,long ncl,long nch,long ndl,long ndh){
+long i,j,nrow=nrh-nrl+1,ncol=nch-ncl+1,ndep=ndh-ndl+1;
+double ***t;
+
+t=(double ***) malloc((size_t)((nrow+NR_END)*sizeof(double**)));
+if(!t)nrerror("allocation failure 1 in d3tensor()");
+t += NR_END;
+t -= nrl;
+
+t[nrl]=(double **) malloc((size_t)((nrow*ncol+NR_END)*sizeof(double*)));
+if(!t[nrl])nrerror("allocation failure 2 in d3tensor()");
+t[nrl] += NR_END;
+t[nrl] -= ncl;
+
+t[nrl][ncl]=(double *) malloc((size_t)((nrow*ncol*ndep+NR_END)*sizeof(double)));
+if(!t[nrl][ncl])nrerror("allocation failure 3 in d3tensor()");
+t[nrl][ncl] += NR_END;
+t[nrl][ncl] -= ndl;
+
+for(j=ncl+1;j<=nch;j++) t[nrl][j]=t[nrl][j-1]+ndep;
+for(i=nrl+1;i<=nrh;i++){
+ t[i]=t[i-1]+ncol;
+ t[i][ncl]=t[i-1][ncl]+ncol*ndep;
+ for(j=ncl+1;j<=nch;j++)t[i][j]=t[i][j-1]+ndep;
+ }
+return t;
+}
+
+void free_ivector(int *v, long nl, long nh)
+/* free an int vector allocated with ivector() */
+{
+ free((FREE_ARG) (v+nl-NR_END));
+}
+
+void free_cvector(char *v, long nl, long nh)
+/* free an unsigned char vector allocated with cvector() */
+{
+ free((FREE_ARG) (v+nl-NR_END));
+}
+
+void free_dvector(double *v, long nl, long nh)
+/* free a double vector allocated with dvector() */
+{
+ free((FREE_ARG) (v+nl-NR_END));
+}
+
+
+
+void free_dmatrix(double **m, long nrl, long nrh, long ncl, long nch)
+/* free a double matrix allocated by dmatrix() */
+{
+ free((FREE_ARG) (m[nrl]+ncl-NR_END));
+ free((FREE_ARG) (m+nrl-NR_END));
+}
+
+void free_imatrix(int **m, long nrl, long nrh, long ncl, long nch)
+/* free an int matrix allocated by imatrix() */
+{
+ free((FREE_ARG) (m[nrl]+ncl-NR_END));
+ free((FREE_ARG) (m+nrl-NR_END));
+}
+
+void free_cmatrix(char **m, long nrl, long nrh, long ncl, long nch)
+/* free a double matrix allocated by dmatrix() */
+{
+ free((FREE_ARG) (m[nrl]+ncl-NR_END));
+ free((FREE_ARG) (m+nrl-NR_END));
+}
+
+
+int am2num(int c)
+{
+switch (c) {
+ case 'W': case 'w':
+ c=1; break;
+ case 'F': case 'f':
+ c=2; break;
+ case 'Y': case 'y':
+ c=3; break;
+ case 'M': case 'm':
+ c=4; break;
+ case 'L': case 'l':
+ c=5; break;
+ case 'I': case 'i':
+ c=6; break;
+ case 'V': case 'v':
+ c=7; break;
+ case 'A': case 'a':
+ c=8; break;
+ case 'C': case 'c':
+ c=9; break;
+ case 'G': case 'g':
+ c=10; break;
+ case 'P': case 'p':
+ c=11; break;
+ case 'T': case 't':
+ c=12; break;
+ case 'S': case 's':
+ c=13; break;
+ case 'N': case 'n':
+ c=14; break;
+ case 'Q': case 'q':
+ c=15; break;
+ case 'D': case 'd':
+ c=16; break;
+ case 'E': case 'e':
+ c=17; break;
+ case 'H': case 'h':
+ c=18; break;
+ case 'R': case 'r':
+ c=19; break;
+ case 'K': case 'k':
+ c=20; break;
+
+ // NEW: to include ambiguous or weird amino acids
+ // X -> A; B -> N; Z -> Q; U-> A
+ // The other 2 letters J and O are ignored when reading the sequence file
+ case 'X': case 'x':
+ c=8; break;
+ case 'B': case 'b':
+ c=14; break;
+ case 'Z': case 'z':
+ c=15; break;
+ case 'U': case 'u':
+ c=8; break;
+ default :
+ c=0;
+ }
+return (c);
+}
+
+
+int am2numBZX(c)
+{
+switch (c) {
+ case 'W': case 'w':
+ c=1; break;
+ case 'F': case 'f':
+ c=2; break;
+ case 'Y': case 'y':
+ c=3; break;
+ case 'M': case 'm':
+ c=4; break;
+ case 'L': case 'l':
+ c=5; break;
+ case 'I': case 'i':
+ c=6; break;
+ case 'V': case 'v':
+ c=7; break;
+ case 'A': case 'a':
+ c=8; break;
+ case 'C': case 'c':
+ c=9; break;
+ case 'G': case 'g':
+ c=10; break;
+ case 'P': case 'p':
+ c=11; break;
+ case 'T': case 't':
+ c=12; break;
+ case 'S': case 's':
+ c=13; break;
+ case 'N': case 'n':
+ c=14; break;
+ case 'Q': case 'q':
+ c=15; break;
+ case 'D': case 'd':
+ c=16; break;
+ case 'E': case 'e':
+ c=17; break;
+ case 'H': case 'h':
+ c=18; break;
+ case 'R': case 'r':
+ c=19; break;
+ case 'K': case 'k':
+ c=20; break;
+ case 'B': case 'b':
+ c=21; break;
+ case 'Z': case 'z':
+ c=22; break;
+ case 'X': case 'x':
+ c=23; break;
+ case '*':
+ c=24; break;
+ default :
+ c=0;
+ }
+return (c);
+}
+
+
+char am2lower(char inchr)
+{
+char c;
+switch (inchr) {
+ case '-':
+ c='.'; break;
+ case 'W':
+ c='w'; break;
+ case 'F':
+ c='f'; break;
+ case 'Y':
+ c='y'; break;
+ case 'M':
+ c='m'; break;
+ case 'L':
+ c='l'; break;
+ case 'I':
+ c='i'; break;
+ case 'V':
+ c='v'; break;
+ case 'A':
+ c='a'; break;
+ case 'C':
+ c='c'; break;
+ case 'G':
+ c='g'; break;
+ case 'P':
+ c='p'; break;
+ case 'T':
+ c='t'; break;
+ case 'S':
+ c='s'; break;
+ case 'N':
+ c='n'; break;
+ case 'Q':
+ c='q'; break;
+ case 'D':
+ c='d'; break;
+ case 'E':
+ c='e'; break;
+ case 'H':
+ c='h'; break;
+ case 'R':
+ c='r'; break;
+ case 'K':
+ c='k'; break;
+ case 'B':
+ c='b'; break;
+ case 'Z':
+ c='z'; break;
+ case 'X':
+ c='x'; break;
+ default :
+ c=inchr;
+ }
+return (c);
+}
+
+char am2upper(char inchr)
+{
+char c;
+switch (inchr) {
+ case '.':
+ c='-'; break;
+ case 'w':
+ c='W'; break;
+ case 'f':
+ c='F'; break;
+ case 'y':
+ c='Y'; break;
+ case 'm':
+ c='M'; break;
+ case 'l':
+ c='L'; break;
+ case 'i':
+ c='I'; break;
+ case 'v':
+ c='V'; break;
+ case 'a':
+ c='A'; break;
+ case 'c':
+ c='C'; break;
+ case 'g':
+ c='G'; break;
+ case 'p':
+ c='P'; break;
+ case 't':
+ c='T'; break;
+ case 's':
+ c='S'; break;
+ case 'n':
+ c='N'; break;
+ case 'q':
+ c='Q'; break;
+ case 'd':
+ c='D'; break;
+ case 'e':
+ c='E'; break;
+ case 'h':
+ c='H'; break;
+ case 'r':
+ c='R'; break;
+ case 'k':
+ c='K'; break;
+ case 'b':
+ c='B'; break;
+ case 'z':
+ c='Z'; break;
+ case 'x':
+ c='X'; break;
+ default :
+ c=inchr;
+ }
+return (c);
+}
+
+
+
+
+
+int **alignment;
+
+static void *mymalloc(int size);
+char *strsave(char *str);
+char *strnsave(char *str, int l);
+static char **incbuf(int n, char **was);
+static int *incibuf(int n, int *was);
+
+void readali(char *filename);
+int **ali_char2int(char **aseq,int start_num, int start_seq);
+int **read_alignment2int(char *filename,int start_num,int start_seq);
+
+void counter(int b);
+double effective_number(int **ali, int *marks, int n, int start, int end);
+double effective_number_nogaps(int **ali, int *marks, int n, int start, int end);
+double effective_number_nogaps_expos(int **ali, int *marks, int n, int start, int end, int pos);
+
+
+
+static void *mymalloc(size)
+int size;
+{
+ void *buf;
+
+ if ((buf = malloc(size)) == NULL) {
+ fprintf(stderr, "Not enough memory: %d\n", size);
+ exit(1);
+ }
+ return buf;
+}
+
+char *strsave(str)
+char *str;
+{
+ char *buf;
+ int l;
+
+ l = strlen(str);
+ buf = mymalloc(l + 1);
+ strcpy(buf, str);
+ return buf;
+}
+
+char *strnsave(str, l)
+char *str;
+int l;
+{
+ char *buf;
+
+ buf = mymalloc(l + 1);
+ memcpy(buf, str, l);
+ buf[l] = '\0';
+ return buf;
+}
+
+static char **incbuf(n, was)
+int n;
+char **was;
+{
+ char **buf;
+ char *aaa;
+
+ buf = mymalloc((n+1) * sizeof(buf[0]));
+ if (n > 0) {
+ memcpy(buf, was, n * sizeof(was[0]));
+ free(was);
+ }
+ buf[n] = NULL;
+ return buf;
+}
+
+static int *incibuf(n, was)
+int n, *was;
+{
+ int *ibuf;
+
+ ibuf = mymalloc((n+1) * sizeof(ibuf[0]));
+
+ if (n > 0) {
+ memcpy(ibuf, was, n * sizeof(was[0]));
+ free(was);
+ }
+ ibuf[n] = 0;
+ return ibuf;
+}
+void err_readali(int err_num)
+{
+ fprintf(stderr,"Error with reading alignment: %d\n",err_num);
+}
+
+void readali(char *filename)
+
+{
+ FILE *fp;
+ char *s, *ss, *seqbuf;
+ int n, l, len, len0;
+ int ii,mark=1;
+
+ if ((fp = fopen(filename, "r")) == NULL) {
+ fprintf(stderr, "No such file: \"%s\"\n", filename);
+ err_readali(1);
+/* ;exit(1); */
+ flag_errread=1;
+ return;
+ }
+
+ alilen = 0;
+ nal = 0;
+ n = 0;
+
+ if(fgets(str, MAXSTR, fp) != NULL) {
+ if(strncmp(str,"CLUSTAL ",8)!=0){rewind(fp);}
+ }
+
+ while (fgets(str, MAXSTR, fp) != NULL) {
+
+/* fprintf(stderr,"OK ");*/
+
+ if (*str=='#' || strncmp(str,"//",2) == 0) {continue;}
+ for (ss = str; isspace(*ss); ss++) ;
+ if ((ii<=ss-str)&&(mark==0)) {continue;}
+
+/* fprintf(stderr, "n=%d ",n); */
+
+ if (*ss == '\0') {
+ if (n == 0) {
+ continue;
+ }
+ if (nal == 0) {
+ if (n == 0) {
+ fprintf(stderr, "No alignments read\n");
+ err_readali(2);
+/* exit(1); */
+ flag_errread=1;
+ return;
+ }
+ nal = n;
+ } else if (n != nal) {
+ fprintf(stderr, "Wrong nal, was: %d, now: %d\n", nal, n);
+ err_readali(3);
+/* exit(1); */
+ flag_errread=1;
+ return;
+ }
+ n = 0;
+ continue;
+ }
+ for (s = ss; *s != '\0' && !isspace(*s); s++) ;
+ *s++ = '\0';
+
+ if (nal == 0) {
+
+ astart = incibuf(n, astart);
+ alen = incibuf(n, alen);
+ aseq = incbuf(n, aseq);
+ aname = incbuf(n, aname);
+ aname[n] = strsave(ss);
+
+ } else {
+ if (n < 0 || n >= nal) {
+ fprintf(stderr, "Bad sequence number: %d of %d\n", n, nal);
+ err_readali(4);
+/* exit(1); */
+ flag_errread=1;
+ return;
+ }
+ if (strcmp(ss, aname[n]) != 0) {
+ fprintf(stderr, "Names do not match");
+ fprintf(stderr, ", was: %s, now: %s\n", aname[n], ss);
+ err_readali(5);
+/* exit(1); */
+ flag_errread=1;
+ return;
+ }
+ }
+ for (ss = s; isspace(*ss); ss++);
+ if(mark==1){
+ ii = ss-str;
+ mark=0;}
+
+ for (s = ss; isdigit(*s); s++) ;
+ if (isspace(*s)) {
+ if (nal == 0) {
+ astart[n] = atoi(ss);
+ }
+ for (ss = s; isspace(*ss); ss++);
+ }
+ for (s = ss, len=0, l = 0; *s != '\0' && !isspace(*s); s++) {
+ if (isalpha(*s)) {
+ l++;
+ }
+
+/*** Calculate len -- the full number of aa and gaps, excluding position numbers in the end ***/
+
+ if (isalpha(*s) || *s == '-' || *s == '.') {
+ len++;
+ }
+
+
+ }
+
+/**** len = s - ss; *************/
+
+
+
+ if (n == 0) {
+ len0 = len;
+ alilen += len;
+ } else if (len != len0) {
+ fprintf(stderr, "wrong len for %s", aname[n]);
+ fprintf(stderr, ", was: %d, now: %d\n", len0, len);
+ err_readali(6);
+/* exit(1); */
+ flag_errread=1;
+ return;
+ }
+
+ alen[n] += l;
+ if (aseq[n] == NULL) {
+ aseq[n] = strnsave(ss, len);
+ } else {
+ seqbuf = mymalloc(alilen+1);
+ memcpy(seqbuf, aseq[n], alilen-len);
+ free(aseq[n]);
+ aseq[n] = seqbuf;
+ memcpy(seqbuf+alilen-len, ss, len);
+ seqbuf[alilen] = '\0';
+ }
+ n++;
+ }
+ if (nal == 0) {
+ if (n == 0) {
+ fprintf(stderr, "No alignments read\n");
+ err_readali(7);
+/* exit(1); */
+ flag_errread=1;
+ return;
+ }
+ nal = n;
+ } else if (n != 0 && n != nal) {
+ fprintf(stderr, "Wrong nal, was: %d, now: %d\n", nal, n);
+ err_readali(8);
+/* exit(1); */
+ flag_errread=1;
+ return;
+ }
+
+ fclose(fp);
+}
+
+/*** Print ali to stderr ****/
+static void printali_ali(char *argo, int chunk, int n1, int n2, int len, char **aname1, char **aname2,
+char **aseqGap1, char **aseqGap2, int *start1, int *start2, int *positive, int **col_score, int score)
+{
+ int i, j, k, jj, mlen, str_len, len_start;
+ int ratio;
+ int scoreFin;
+ char arg_o[100], namebuf[100];
+ char *sq;
+ int *isq;
+ char *sqn;
+ FILE *fpp;
+ strcpy(arg_o,argo);
+ fpp=fopen(arg_o,"a");
+
+ for (i=0; i<n1; i++) {
+ start1[i] += apos1[1]-1;
+ start2[i] += apos2[1]-1;
+ }
+
+ for (i=1, mlen=strlen(aname1[0]); i < n1; i++) {
+ if (mlen < strlen(aname1[i])) {
+ mlen = strlen(aname1[i]);
+ }
+ }
+ for (i=0 ; i < n2; i++) {
+ if (mlen < strlen(aname2[i])) {
+ mlen = strlen(aname2[i]);
+ }
+ }
+
+ jj = 0;
+
+ do {
+
+/* Print the chunk of the first alignment */
+ if (jj > 0) {
+ fprintf(fpp, "\n");
+ }
+
+ for (i=0; i < n1; i++) {
+ strcpy(namebuf,aname1[i]);
+ fprintf(fpp, namebuf);
+ str_len = strlen(aname1[i]);
+ for(k=str_len;k<mlen+3;k++) fprintf(fpp," ");
+
+ if (jj==0) {
+
+ for(len_start=0, ratio=start1[i]; ratio>0; ratio /= 10, len_start++);
+ fprintf(fpp, "%d", start1[i]);
+ } else {len_start=0;}
+
+ for(k=len_start;k<7;k++) fprintf(fpp," ");
+
+ sq = aseqGap1[i] + jj;
+
+ for (j=1; j+jj <=len && j <= chunk; j++) {
+
+ fprintf(fpp, "%c", sq[j]);
+ }
+
+ fprintf(fpp, "\n");
+ }
+
+ for(k=0;k<mlen+10;k++) fprintf(fpp," ");
+ isq = positive + jj ;
+ for (j=1; j+jj <= len && j <= chunk; j++) {
+ if (isq[j]) {
+ fprintf(fpp,"+");
+ } else {
+ fprintf(fpp," ");
+ }
+ }
+ fprintf(fpp, "\n");
+
+
+/*Print the chunk of the second alignment*/
+
+ for (i=0; i < n2; i++) {
+ sqn = aname2[i];
+
+ for(k=0;k<mlen+3;k++){
+ if(k<strlen(sqn)){fprintf(fpp,"%c",sqn[k]);}
+ else fprintf(fpp," ");
+ }
+
+ if (jj==0) {
+
+ for(len_start=0, ratio=start2[i]; ratio>0; ratio /= 10, len_start++);
+/* len_start = log(start2[i])/log(10.0)+1; */
+ fprintf(fpp, "%d", start2[i]);
+ } else {len_start=0;}
+
+ for(k=len_start;k<7;k++) fprintf(fpp," ");
+
+
+ sq = aseqGap2[i] + jj;
+ for (j=1; j+jj <= len && j <= chunk; j++) {
+ fprintf(fpp, "%c", sq[j]);
+ }
+ fprintf(fpp, "\n");
+ }
+ fprintf(fpp, "\n");
+
+ jj += chunk;
+
+ } while (jj < len);
+
+ fclose(fpp);
+}
+
+int **ali_char2int(char **aseq, int start_num, int start_seq){
+/* fills the alignment ali[start_num..start_num+nal-1][start_seq..start_seq+alilen-1]
+convetring charater to integer from aseq[0..nal-1][0..alilen-1]
+*/
+
+int i,j,end_num,end_seq;
+int **ali;
+end_num=start_num+nal-1;
+end_seq=start_seq+alilen-1;
+ali=imatrix(start_num,end_num,start_seq,end_seq);
+for(i=start_num;i<=end_num;++i)for(j=start_seq;j<=end_seq;++j)ali[i][j]=am2num(aseq[i-start_num][j-start_seq]);
+return ali;
+}
+
+int **read_alignment2int(char *filename,int start_num,int start_seq){
+int **ali;
+readali(filename);
+
+if (flag_errread==1) return;
+
+ali=ali_char2int(aseq,start_num,start_seq);
+return ali;
+}
+
+double effective_number_nogaps(int **ali, int *marks, int n, int start, int end){
+
+/* from the alignment of n sequences ali[1..n][1..l]
+calculates effective number of sequences that are marked by 1 in mark[1..n]
+for the segment of positions ali[][start..end]
+Neff=ln(1-0.05*N-of-different-letters-per-site)/ln(0.95)
+*/
+
+int i,k,a,flag;
+int *amco,lettercount=0,sitecount=0;
+double letpersite=0,neff;
+
+ amco=ivector(0,20);
+for(k=start;k<=end;++k){
+/******************DUMP the condition "consider only positions without gaps in the marked seqs" ***********/
+/***** flag=0;for(i=1;i<=n;++i)if(marks[i]==1 && ali[i][k]==0)flag=1;
+ if(flag==1)continue;
+*****/
+ for(a=0;a<=20;++a)amco[a]=0;
+ for(i=1;i<=n;++i)if(marks[i]==1)amco[ali[i][k]]++;
+ flag=0;for(a=1;a<=20;++a)if(amco[a]>0){flag=1;lettercount++;}
+ if(flag==1)sitecount++;
+
+/* else fprintf (stderr, "%d ", k); */
+ }
+if(sitecount==0)letpersite=0;
+else letpersite=1.0*lettercount/sitecount;
+
+/*** neff = letpersite; ***/
+
+ neff=-log(1.0-0.05*letpersite)/0.05129329438755;
+
+ free_ivector(amco,0,20);
+return neff;
+}
+
+int *letters;
+
+void **freqInt(int **ali,int nal, int alilen, int **f,int *num_gaps,int
+*effindiarr,double gap_threshold, double *p_comp)
+{
+ int i,j,k,effnumind, sumNC, fullCountNogaps;
+ int count[21], sum_comp[21];
+
+ fprintf (stderr, "freqInt started...\n");
+
+ letters = ivector(0, alilen+1);
+ letters[0]=1;
+
+ /* find the number of frequences at each position */
+ effnumind=0;
+ sumNC = 0;
+ for(i=0;i<=20;i++) sum_comp[i]=0;
+ for(j=1;j<=alilen;j++){
+ for(i=0;i<=20;i++) count[i]=0;
+ for(i=1;i<=nal;i++) {
+ if(ali[i][j]<=20) {
+ count[ali[i][j]]++;
+ }
+ else {if(ali[i][j]>=25&&ali[i][j]<=45)
+ {count[ali[i][j]-25]++;}
+ else {
+ fprintf(stderr,"not good number for AA\n");
+ fprintf(stderr,"%d", i);
+ fprintf(stderr,"\n");
+ fprintf(stderr,"%d", j);
+ exit(0);
+ }
+ }
+ }
+ /*** Adding to the sum of different symbols in the columns over the alignment, to derive N_C , and to the overall aa counts ***/
+
+ for (i=0; i<=20; i++) {
+ if(count[i]>0) {
+ sumNC++;
+ sum_comp[i]+=count[i];
+ }
+
+/* fprintf (stderr, "%d_%d ",count[i], sumNC); */
+ }
+/* fprintf (stderr, "\n\n"); */
+
+ num_gaps[j] = count[0];
+/*** f[0][j] = count[0]*1.0/nal; ***/
+ f[j][0] = count[0];
+ if(f[j][0]>nal) {
+ fprintf(stderr,"gap number>total number\n");
+ exit(0);
+ }
+
+/* Eliminate the condition for small enough number of gaps */
+
+/* if(f[0][j]>=gap_threshold) { /* ignore the case where gaps occur >= gap_threshold(percentage of gaps)
+ f[0][j]=INDI;
+ continue;
+ } */
+
+ effnumind++;
+ effindiarr[effnumind]=j;
+ count[0]=nal-count[0];
+ letters[j] = count[0];
+/*** if(count[0]<=0){
+ fprintf(stderr, "count[0] less than 0: %d column = %d\n",count[0],j);
+ exit(0);
+ }
+***/
+ for(k=1;k<=20;k++){
+/* Eliminate the division by count[0] - not freqs but counts !!! */
+/* f[k][j]=count[k]*1.0/count[0]; */
+ f[j][k]=count[k];
+ }
+ }
+
+
+ n_c = sumNC;
+ n_c = n_c/alilen;
+
+ fullCountNogaps = alilen*nal-sum_comp[0];
+ for (i=1;i<=20;i++) p_comp[i] = 1.0*sum_comp[i]/fullCountNogaps;
+
+ fprintf (stderr, "\n sumNC = %d alilen = %d \n",sumNC, alilen);
+ effindiarr[effnumind+1]=INDI;/*set the last element negative*/
+ effindiarr[0]=effnumind;
+}
+
+/* Version for alscr_wwm1.c : no deleting columns wth gaps in 1st seq */
+void **freqIntMaskGaps(int **ali,int nal, int alilen, int **f, double gapmax, double gapRegionMin, double *p_comp)
+{
+ int i,j,k, jnew, fullCountNogaps;
+ int sumNC; /*sum of different symbols in columns over alignment, used to derived effective size n_c*/
+ int count[21];
+ int sum_comp[21]; /*overall aa counts*/
+ letters = ivector(0, alilen+1);
+ letters[0]=1;
+
+ /* find the number of frequences at each position */
+ sumNC = 0;
+ for(i=0;i<=20;i++) sum_comp[i]=0;
+ jnew = 0;
+ for(j=1;j<=alilen;j++){
+ for(i=0;i<=20;i++) count[i]=0;
+ for(i=1;i<=nal;i++) {
+ if(ali[i][j]<=20) {count[ali[i][j]]++;}
+ else {if(ali[i][j]>=25&&ali[i][j]<=45) {count[ali[i][j]-25]++;}
+ else {
+ fprintf(stderr,"not good number for AA\n");
+ fprintf(stderr,"%d", i);
+ fprintf(stderr,"\n");
+ fprintf(stderr,"%d", j);
+ exit(0);
+ }
+ }
+ }
+
+ for (i=0; i<=20; i++) {
+ if(count[i]>0) {
+ sumNC++;
+ sum_comp[i]+=count[i];
+ }
+ }
+
+ if(count[0]>nal) {
+ fprintf(stderr,"gap number>total number\n");
+ exit(0);
+ }
+
+/* Eliminate the condition for small enough number of gaps */
+
+/* if(f[0][j]>=gap_threshold) { /* ignore the case where gaps occur >= gap_threshold(percentage of gaps)
+ f[0][j]=INDI;
+ continue;
+ } */
+
+ letters[j]=nal-count[0];
+
+/* Raise flags for further deletion over higly gapped columns; mark "moderately gapped" regions */
+ if(1.0*count[0]/nal >= gapmax /* || ali[1][j] == 0 || ali[1][j]==25 */ ) maskgaps[j] = 1;
+ else {
+ maskgaps[j] = 0;
+ if(1.0*count[0]/nal >= gapRegionMin) maskgapRegion[j] = 1;
+ else maskgapRegion[j] = 0;
+ jnew++;
+ for(k=0;k<=20;k++){f[jnew][k]=count[k];}
+ }
+ }
+ n_lowgaps = jnew;
+ n_c = sumNC;
+ n_c = n_c/alilen;
+
+ fullCountNogaps = alilen*nal-sum_comp[0];
+ for (i=1;i<=20;i++) p_comp[i] = 1.0*sum_comp[i]/fullCountNogaps;
+
+
+}
+
+void **neffsForEachCol(int **ali, int n, int len, double **n_effAa, double *sum_eff_let)
+{
+ int i,j,k;
+ int ele;
+ double *effnu;
+ double sum_let;
+ int *mark;
+ int flagmark;
+
+ effnu = dvector(0,20);
+ mark = ivector(0,n+10);
+ for(j=1;j<=len;j++) {
+
+ sum_eff_let[j] = 0;
+ for(k=0;k<=20;++k){
+ n_effAa[j][k]=0;
+ }
+ }
+
+ for(j=1;j<=len;j++) {
+ sum_let=0;
+
+ for(k=0;k<=20;++k){
+/* Mark sequences that have amino acid k (or gap, k=0) in this jth position */
+ flagmark =0;
+ for(i=1;i<=n;++i){
+ mark[i]=0;
+
+ ele=ali[i][j];
+ if(ele==k){mark[i]=1; flagmark =1;}
+ ele=ali[i][j]-25;
+ if(ele==k) {mark[i]=1; flagmark =1;}
+ }
+
+/* If aa k (or gap) is present in this position call compute k-th effective count */
+ if (flagmark == 1) effnu[k]=effective_number_nogaps(ali,mark,n,1,len);
+ else effnu[k] = 0.0;
+
+ if (k>0) sum_let += effnu[k];
+ }
+
+ for (k=0; k<=20; k++) {
+ n_effAa[j][k] = effnu[k];
+ }
+ sum_eff_let[j] = sum_let;
+
+ }
+}
+
+
+/*computes Smith-Waterman local alignment score and returns the
+ evalue
+ query is the query sequence
+ queryLength is the length of query in amino acids
+ dbSequence is the sequence corresponding to some matrix profile
+ dbLength is the length of dbSequnece
+ matrix is the position-specific matrix associated with dbSequence
+ gapOpen is the cost of opening a gap
+ gapExtend is the cost of extending an exisiting gap by 1 position
+ queryEnd returns the final position in the query of an optimal
+ local alignment
+ dbEnd returns the final position in dbSequence of an optimal
+ local alignment
+ queryEnd and dbEnd can be used to run the local alignment in reverse
+ to find optimal starting positions
+ score is used to pass back the optimal score
+ kbp holds the Karlin-Altschul paramters
+ L holds an intermediate term for E-value computation
+ adjustedDbLength is the adjusted database length used for e-value computation
+ minGappedK holds the minimum gapped K for all matrices in the
+ database, and is used for e-value computation */
+
+
+ static int SmithWatermanScore(double **score_matrix, int queryLength, int dbLength, int gapOpen, int gapextend, int queryEnd, int dbEnd, int **tracebackDir,
+int **flagNewGapQuery, int **flagNewGapDb)
+{
+ int bestScore; /*best score seen so far*/
+ int newScore; /* score of next entry*/
+ int bestQueryPos, bestDbPos; /*position ending best score in
+ query and database sequences*/
+ int newGapCost; /*cost to have a gap of one character*/
+ int gapExtend;
+ int prevScoreNoGapQuery; /*score one row and column up
+ with no gaps*/
+ int prevScoreGapQuery; /*score if a gap already started in query*/
+ int continueGapScore; /*score for continuing a gap in dbSequence*/
+ int queryPos, dbPos; /*positions in query and dbSequence*/
+/* Nlm_FloatHi returnEvalue; /*e-value to return*/
+ score_Vector scoreVector; /*keeps one row of the Smith-Waterman matrix
+ overwrite old row with new row*/
+ int RowScore; /*score for match of two positions*/
+ int gapDb2NoGap, gapQuery2NoGap, noGap2NoGap, score2NoGap;
+
+
+
+/********************** Introduce arrays and variables:
+
+int *fV_RepeatOpenGapQuery[queryPos] -- the current row of flagRepeatOpenGapQuery;
+
+int *fV_RepeatOpenGapDb[queryPos] -- the current row of flagRepeatOpenGapDb;
+
+int *fV_RepeatOpenGapDb1[queryPos] -- previous row of flagRepeatOpenGapDb, overwrite with the new row after
+the SW matrix row is passed;
+
+int *fV_RepeatOpenGapDb2[queryPos] -- the row of flagRepeatOpenGapDb 2 positions higher, overwrite with the
+fV_RepeatOpenGapQuery1 after the SW matrix row is passed;
+
+int *fV_RepeatOpenGapQuery1[queryPos] -- previous row of flagRepeatOpenGapQuery, overwrite with the new row after
+the SW matrix row is passed;
+
+int *fV_RepeatOpenGapQuery2[queryPos] -- the row of flagRepeatOpenGapQuery 2 positions higher, overwrite with the
+fV_RepeatOpenGapQuery1 after the SW matrix row is passed;
+
+int *fV_QInClosestNewGapDb, *fV_DbInClosestNewGapQuery - current rows of the flags equal to the flags in the closest starting points of new gaps in Db (closest at the vertical queryPos = const) and Query (closest at the horizontal dbPos = const). They are used to calculate flagRepeatOpenGapQuery and flagRepeatOpenGapDb, respectively.
+
+int *fV_QInClosestNewGapDb1 - previous row of flagInClosestNewGapDb, overwrite with the new flagInClosestNewGapDb after the SW matrix row is passed;
+
+int *fV_DbInClosestNewGapQuery1 -- previous row of flagInClosestNewGapQuery, overwrite with the new flagInClosestNewGapDb after the SW matrix row is passed;
+
+??? Some of arrays fV_InClosestNewGap... can be probably replaced by some variables, since we use only positions (-1,-1), (0,-1), (-1,0) and (0,0), or we can use the scheme analogous to scoreVector.nogap and scoreVector.gapExist.
+
+*****************************/
+/***** int **fV_RepeatOpenGapQuery, **fV_RepeatOpenGapDb; *****/
+int **fV_QInClosestNewGapDb, **fV_DbInClosestNewGapQuery;
+
+/*****int **fV_DbInClosestNewGapDb, **fV_QInClosestNewGapQuery; *****/
+
+
+int flagRepeatOpenGapDb, flagRepeatOpenGapQuery;
+
+ scoreVector.noGap = ivector (1,queryLength);
+ scoreVector.gapExists = ivector (1,queryLength);
+
+ fV_RepeatOpenGapQuery = imatrix (0,queryLength,0, dbLength);
+ fV_RepeatOpenGapDb = imatrix (0,queryLength,0, dbLength);
+ fV_QInClosestNewGapDb = imatrix (0,queryLength,0, dbLength);
+ fV_DbInClosestNewGapQuery = imatrix (0,queryLength,0, dbLength);
+
+ fV_DbInClosestNewGapDb = imatrix (0,queryLength,0, dbLength);
+ fV_QInClosestNewGapQuery = imatrix (0,queryLength,0, dbLength);
+
+ bestQueryPos = 0;
+ bestDbPos = 0;
+ bestScore = 0;
+/*** newGapCost = gapOpen + gapExtend; ***/
+ for (queryPos = 1; queryPos <= queryLength; queryPos++) {
+ scoreVector.noGap[queryPos] = 0;
+ scoreVector.gapExists[queryPos] = -(gapOpen);
+ }
+
+ for (queryPos = 0; queryPos <= queryLength; queryPos++) {
+
+ fV_RepeatOpenGapDb[queryPos][0] = 0;
+ fV_RepeatOpenGapQuery[queryPos][0] = 0;
+
+ fV_QInClosestNewGapDb[queryPos][0] = 0;
+ fV_DbInClosestNewGapQuery[queryPos][0] = 0;
+ fV_DbInClosestNewGapDb[queryPos][0] = 0;
+ fV_QInClosestNewGapQuery[queryPos][0] = 0;
+
+ fV_QInClosestNewGapDb[queryPos][1] = 0;
+ fV_DbInClosestNewGapQuery[queryPos][1] = 0;
+ fV_DbInClosestNewGapDb[queryPos][1] = 0;
+ fV_QInClosestNewGapQuery[queryPos][1] = 0;
+ }
+
+ for(dbPos = 1; dbPos <= dbLength; dbPos++) {
+
+ newScore = 0;
+ noGap2NoGap = 0;
+ prevScoreGapQuery = -(gapOpen);
+
+ fV_RepeatOpenGapDb[0][dbPos]= 0;
+ fV_RepeatOpenGapQuery[0][dbPos]= 0;
+
+ fV_QInClosestNewGapDb[0][dbPos]= 0;
+ fV_DbInClosestNewGapQuery[0][dbPos]= 0;
+ fV_DbInClosestNewGapDb[0][dbPos]= 0;
+ fV_QInClosestNewGapQuery[0][dbPos]= 0;
+
+ for(queryPos = 1; queryPos <= queryLength; queryPos++) {
+
+ flagNewGapQuery[queryPos][dbPos] = 0;
+ flagNewGapDb[queryPos][dbPos] = 0;
+
+/*** Check if we are in the gapped region of query and no gaps in db were opened against this region before;
+ if TRUE eliminate gapOpen penalty; if this is the first position of the gapped region, reward the extending
+ previous gap by compensating gapOpen in gapExtend ***/
+
+ gapExtend = rint(g_e2(queryPos,b));
+
+ flagRepeatOpenGapDb = fV_RepeatOpenGapDb[queryPos-1][dbPos];
+
+ if (maskgapRegion1[queryPos]==1 && flagRepeatOpenGapDb==0) {newGapCost = gapExtend;}
+ else { newGapCost = gapOpen + gapExtend; }
+
+ if (maskgapRegion1[queryPos-1]==0 && maskgapRegion1[queryPos]==1 && fV_DbInClosestNewGapDb[queryPos-1][dbPos]==0) {
+ gapExtend -= gapOpen;
+ }
+
+ /*testing scores with a gap in DB, either starting a new
+ gap or extending an existing gap*/
+
+ if ((newScore = newScore - newGapCost) >
+ (prevScoreGapQuery = prevScoreGapQuery - gapExtend)) {
+ prevScoreGapQuery = newScore;
+ flagNewGapQuery[queryPos][dbPos] = 1;
+ }
+
+
+/*** Check if we are in the gapped region of Db and no gaps were opened in query against this region before;
+ if TRUE, eliminate gapOpen penalty ***/
+ gapExtend = rint(g_e1(dbPos,b));
+
+ flagRepeatOpenGapQuery = fV_RepeatOpenGapQuery[queryPos][dbPos-1];
+
+ if (maskgapRegion2[dbPos]==1 && flagRepeatOpenGapQuery==0) {newGapCost = gapExtend;}
+ else {newGapCost = gapOpen + gapExtend;}
+
+/*** if this is the first position of the gapped region
+ reward the extending previous gap by compensating gapOpen in gapExtend ***/
+ if (maskgapRegion2[dbPos]==1 && maskgapRegion2[dbPos-1]==0 && fV_QInClosestNewGapQuery[queryPos][dbPos-1]==0) {
+ gapExtend -= gapOpen;
+ }
+
+ /*testing scores with a gap in Query, either starting a new
+ gap or extending an existing gap*/
+
+ if ((newScore = scoreVector.noGap[queryPos] - newGapCost) >
+ (continueGapScore = scoreVector.gapExists[queryPos] - gapExtend)) {
+ continueGapScore = newScore;
+ flagNewGapDb[queryPos][dbPos] = 1;
+ }
+
+ /*compute new score extending one position in query and db*/
+
+ RowScore = rint(score_matrix[queryPos][dbPos]*score_scale - sgapfcn(queryPos,dbPos,b));
+
+ newScore = noGap2NoGap + RowScore;
+
+ if (newScore < 0)
+ newScore = 0; /*Smith-Waterman locality condition*/
+
+/*** TraceBackDir:
+***/
+
+ if (RowScore>0) {tracebackDir[queryPos][dbPos] = 6;}
+ else { tracebackDir[queryPos][dbPos] = 5;}
+
+ if (maskgapRegion1[queryPos] == 0) fV_RepeatOpenGapDb[queryPos][dbPos] = 0;
+ else fV_RepeatOpenGapDb[queryPos][dbPos] = fV_RepeatOpenGapDb[queryPos-1][dbPos-1];
+
+ if (maskgapRegion2[dbPos] == 0) fV_RepeatOpenGapQuery[queryPos][dbPos] = 0;
+ else fV_RepeatOpenGapQuery[queryPos][dbPos] = fV_RepeatOpenGapQuery[queryPos-1][dbPos-1];
+
+/**** Assign the flags coming from the closest NewGap in Query ******/
+
+ if (flagNewGapDb[queryPos][dbPos] == 1) {
+ fV_DbInClosestNewGapQuery[queryPos][dbPos] = fV_RepeatOpenGapDb[queryPos][dbPos-1];
+ fV_QInClosestNewGapQuery[queryPos][dbPos] = fV_RepeatOpenGapQuery[queryPos][dbPos-1];
+ } else {
+ fV_DbInClosestNewGapQuery[queryPos][dbPos] = fV_DbInClosestNewGapQuery[queryPos][dbPos-1];
+ fV_QInClosestNewGapQuery[queryPos][dbPos] = fV_QInClosestNewGapQuery[queryPos][dbPos-1];
+ }
+
+ if (maskgapRegion2[dbPos]==1) {fV_QInClosestNewGapQuery[queryPos][dbPos] = 1;}
+ if (maskgapRegion1[queryPos]==1) {fV_DbInClosestNewGapQuery[queryPos][dbPos] = 1;}
+
+/**** Assign the flags coming from the closest NewGap in Db ******/
+
+ if (flagNewGapQuery[queryPos][dbPos] == 1) {
+ fV_QInClosestNewGapDb[queryPos][dbPos] = fV_RepeatOpenGapQuery[queryPos-1][dbPos];
+ fV_DbInClosestNewGapDb[queryPos][dbPos] = fV_RepeatOpenGapDb[queryPos-1][dbPos];
+ } else {
+ fV_QInClosestNewGapDb[queryPos][dbPos] = fV_QInClosestNewGapDb[queryPos-1][dbPos];
+ fV_DbInClosestNewGapDb[queryPos][dbPos] = fV_DbInClosestNewGapDb[queryPos-1][dbPos];
+ }
+
+ if (maskgapRegion1[queryPos]==1) {fV_DbInClosestNewGapDb[queryPos][dbPos] = 1;}
+ if (maskgapRegion2[dbPos]==1) {fV_QInClosestNewGapDb[queryPos][dbPos] = 1;}
+
+
+ /*test two alternatives*/
+
+/*** Gap in Db ***/
+
+ if (newScore < prevScoreGapQuery) {
+ newScore = prevScoreGapQuery;
+
+/**** Determine tracebackDir pointer and the flags fV_RepeatOpenGapQuery and fV_RepeatOpenGapDb ******/
+
+ if (flagNewGapQuery[queryPos][dbPos] == 1) { tracebackDir[queryPos][dbPos] = 1;}
+ else {tracebackDir[queryPos][dbPos] = 2;}
+
+ fV_RepeatOpenGapQuery[queryPos][dbPos] = fV_QInClosestNewGapDb[queryPos][dbPos];
+ fV_RepeatOpenGapDb[queryPos][dbPos] = fV_DbInClosestNewGapDb[queryPos][dbPos];
+
+ }
+
+/*** Gap in Query ***/
+
+ if (newScore < continueGapScore) {
+ newScore = continueGapScore;
+
+/**** Determine tracebackDir pointer and the flags fV_RepeatOpenGapQuery and fV_RepeatOpenGapDb ******/
+
+ if (flagNewGapDb[queryPos][dbPos] == 1) {tracebackDir[queryPos][dbPos] = 3;}
+ else {tracebackDir[queryPos][dbPos] = 4;}
+
+ fV_RepeatOpenGapDb[queryPos][dbPos] = fV_DbInClosestNewGapQuery[queryPos][dbPos];
+ fV_RepeatOpenGapQuery[queryPos][dbPos] = fV_QInClosestNewGapQuery[queryPos][dbPos];
+
+ }
+
+ noGap2NoGap = scoreVector.noGap[queryPos];
+ scoreVector.noGap[queryPos] = newScore;
+ scoreVector.gapExists[queryPos] = continueGapScore;
+
+ if (newScore > bestScore) {
+ bestScore = newScore;
+ bestDbPos = dbPos;
+ bestQueryPos = queryPos;
+ }
+
+ fprintf(stderr,"");
+ }
+
+ }
+
+ fprintf(stderr,".");
+
+ /* Closed assignments of arrays *score and *...End */
+
+ /* MemFree(scoreVector); */
+ if (bestScore < 0)
+ bestScore = 0;
+ End1 = bestQueryPos;
+ End2 = bestDbPos;
+
+ return (bestScore);
+
+/* returnEvalue = scoreToEvalue(effSearchSpace, bestScore, kbp); */
+}
+
+
+/*computes where optimal Smith-Waterman local alignment starts given the
+ ending positions
+ query is the query sequence
+ queryLength is the length of query in amino acids
+ dbSequence is the sequence corresponding to some matrix profile
+ dbLength is the length of dbSequnece
+ matrix is the position-specific matrix associated with dbSequence
+ gapOpen is the cost of opening a gap
+ gapExtend is the cost of extending an exisiting gap by 1 position
+ queryEnd is the final position in the query of an optimal
+ local alignment
+ dbEnd is the final position in dbSequence of an optimal
+ local alignment
+ queryEnd and dbEnd can be used to run the local alignment in reverse
+ to find optimal starting positions
+ these are passed back in queryStart and dbStart
+ the optimal score is passed in to check when it has
+ been reached going backwards
+ the score is also returned
+ */
+
+ static int SmithWatermanFindStart( double **score_matrix, int
+queryLength, int dbLength, int gapOpen, int gapExtend,int queryEnd, int dbEnd, int score, int queryStart, int dbStart)
+{
+
+ int bestScore; /*best score seen so far*/
+ int newScore; /* score of next entry*/
+ int bestQueryPos, bestDbPos; /*position starting best score in
+ query and database sequences*/
+ int newGapCost; /*cost to have a gap of one character*/
+ int prevScoreNoGapQuery; /*score one row and column up
+ with no gaps*/
+ int prevScoreGapQuery; /*score if a gap already started in query*/
+ int continueGapScore; /*score for continuing a gap in dbSequence*/
+ int queryPos, dbPos; /*positions in query and dbSequence*/
+
+ score_Vector scoreVector; /*keeps one row of the Smith-Waterman matrix
+ overwrite old row with new row*/
+
+ int flagNewGapQuery_Rev, flagNewGapDb_Rev;
+
+
+
+/* scoreVector = (SWpairs *) MemNew(queryLength * sizeof(SWpairs)); */
+/********************** Introduce arrays and variables:
+
+int *fV_RepeatOpenGapQuery[queryPos] -- the current row of flagRepeatOpenGapQuery;
+
+int *fV_RepeatOpenGapDb[queryPos] -- the current row of flagRepeatOpenGapDb;
+
+int *fV_RepeatOpenGapDb1[queryPos] -- previous row of flagRepeatOpenGapDb, overwrite with the new row after
+the SW matrix row is passed;
+
+int *fV_RepeatOpenGapDb2[queryPos] -- the row of flagRepeatOpenGapDb 2 positions higher, overwrite with the
+fV_RepeatOpenGapQuery1 after the SW matrix row is passed;
+
+int *fV_RepeatOpenGapQuery1[queryPos] -- previous row of flagRepeatOpenGapQuery, overwrite with the new row after
+the SW matrix row is passed;
+
+int *fV_RepeatOpenGapQuery2[queryPos] -- the row of flagRepeatOpenGapQuery 2 positions higher, overwrite with the
+fV_RepeatOpenGapQuery1 after the SW matrix row is passed;
+
+int *fV_QInClosestNewGapDb, *fV_DbInClosestNewGapQuery - current rows of the flags equal to the flags in the closest starting points of new gaps in Db (closest at the vertical queryPos = const) and Query (closest at the horizontal dbPos = const). They are used to calculate flagRepeatOpenGapQuery and flagRepeatOpenGapDb, respectively.
+
+int *fV_QInClosestNewGapDb1 - previous row of flagInClosestNewGapDb, overwrite with the new flagInClosestNewGapDb after the SW matrix row is passed;
+
+int *fV_DbInClosestNewGapQuery1 -- previous row of flagInClosestNewGapQuery, overwrite with the new flagInClosestNewGapDb after the SW matrix row is passed;
+
+??? Some of arrays fV_InClosestNewGap... can be probably replaced by some variables, since we use only positions (-1,-1), (0,-1), (-1,0) and (0,0), or we can use the scheme analogous to scoreVector.nogap and scoreVector.gapExist.
+
+*****************************/
+int *fV_RepeatOpenGapQuery, *fV_RepeatOpenGapDb, *fV_RepeatOpenGapDb1, *fV_RepeatOpenGapDb2, *fV_RepeatOpenGapQuery1, *fV_RepeatOpenGapQuery2;
+int *fV_QInClosestNewGapDb, *fV_DbInClosestNewGapQuery, *fV_QInClosestNewGapDb1 , *fV_DbInClosestNewGapQuery1;
+
+int *fV_DbInClosestNewGapDb, *fV_QInClosestNewGapQuery, *fV_DbInClosestNewGapDb1 , *fV_QInClosestNewGapQuery1;
+
+int flagRepeatOpenGapDb, flagRepeatOpenGapQuery;
+
+
+ scoreVector.noGap = ivector (1,queryLength);
+ scoreVector.gapExists = ivector (1,queryLength);
+
+ fV_RepeatOpenGapQuery = ivector (1,queryLength+1);
+ fV_RepeatOpenGapDb = ivector (1,queryLength+1);
+ fV_RepeatOpenGapDb1 = ivector (1,queryLength+1);
+ fV_RepeatOpenGapDb2 = ivector (1,queryLength+1);
+ fV_RepeatOpenGapQuery1 = ivector (1,queryLength+1);
+ fV_RepeatOpenGapQuery2 = ivector (1,queryLength+1);
+ fV_QInClosestNewGapDb = ivector (1,queryLength+1);
+ fV_QInClosestNewGapDb1 = ivector (1,queryLength+1);
+ fV_DbInClosestNewGapQuery = ivector (1,queryLength+1);
+ fV_DbInClosestNewGapQuery1 = ivector (1,queryLength+1);
+
+ fV_DbInClosestNewGapDb = ivector (1,queryLength+1);
+ fV_DbInClosestNewGapDb1 = ivector (1,queryLength+1);
+ fV_QInClosestNewGapQuery = ivector (1,queryLength+1);
+ fV_QInClosestNewGapQuery1 = ivector (1,queryLength+1);
+
+
+ bestQueryPos = 0;
+ bestDbPos = 0;
+ bestScore = 0;
+
+ for (queryPos = 1; queryPos <= queryLength; queryPos++) {
+ scoreVector.noGap[queryPos] = 0;
+ scoreVector.gapExists[queryPos] = -(gapOpen);
+ }
+
+ for (queryPos = 1; queryPos <= queryLength+1; queryPos++) {
+ fV_RepeatOpenGapDb[queryPos] = 0;
+ fV_RepeatOpenGapQuery[queryPos] = 0;
+
+ fV_RepeatOpenGapDb1[queryPos] = 0;
+ fV_RepeatOpenGapDb2[queryPos] = 0;
+ fV_RepeatOpenGapQuery1[queryPos] = 0;
+ fV_RepeatOpenGapQuery2[queryPos] = 0;
+
+ fV_QInClosestNewGapDb1[queryPos] = 0;
+ fV_DbInClosestNewGapQuery1[queryPos] = 0;
+ fV_DbInClosestNewGapDb1[queryPos] = 0;
+ fV_QInClosestNewGapQuery1[queryPos] = 0;
+
+ fV_QInClosestNewGapDb[queryPos] = 0;
+ fV_DbInClosestNewGapQuery[queryPos] = 0;
+ fV_DbInClosestNewGapDb[queryPos] = 0;
+ fV_QInClosestNewGapQuery[queryPos] = 0;
+
+ }
+
+ for(dbPos = dbEnd; dbPos >= 1; dbPos--) {
+
+ newScore = 0;
+ prevScoreNoGapQuery = 0;
+ prevScoreGapQuery = -(gapOpen);
+
+ for(queryPos = queryEnd; queryPos >= 1; queryPos--) {
+ flagNewGapQuery_Rev = 0;
+ flagNewGapDb_Rev = 0;
+
+/*** Check if we are in the gapped region of query and no gaps were opened against this region before;
+ if TRUE, eliminate gapOpen penalty;
+ if this is the first position of the gapped region, reward the extending
+ previous gap by compensating gapOpen in gapExtend ***/
+
+ gapExtend = rint(g_e2(queryPos,b));
+
+ flagRepeatOpenGapDb = fV_RepeatOpenGapDb[queryPos+1];
+
+ if (maskgapRegion1[queryPos]==1 && flagRepeatOpenGapDb==0) {newGapCost = gapExtend;}
+ else {newGapCost = gapOpen + gapExtend;}
+
+ if (maskgapRegion1[queryPos+1]==0 && maskgapRegion1[queryPos]==1 && fV_DbInClosestNewGapDb[queryPos+1]==0) {
+ gapExtend -= gapOpen;
+ }
+
+ /*testing scores with a gap in DB, either starting a new
+ gap or extending an existing gap*/
+
+ if ((newScore = newScore - newGapCost) >
+ (prevScoreGapQuery = prevScoreGapQuery - gapExtend)) {
+ prevScoreGapQuery = newScore;
+ flagNewGapQuery_Rev = 1;
+ }
+
+
+/*** Check if we are in the gapped region of Db and no gaps were opened against this region before;
+ if TRUE, eliminate gapOpen penalty;
+ if this is the first position of the gapped region, reward the extending
+ previous gap by compensating gapOpen in gapExtend ***/
+ gapExtend = rint(g_e1(dbPos,b));
+
+ flagRepeatOpenGapQuery = fV_RepeatOpenGapQuery1[queryPos];
+
+ if (maskgapRegion2[dbPos]==1 && flagRepeatOpenGapQuery==0) {newGapCost = gapExtend;}
+ else { newGapCost = gapOpen + gapExtend;}
+
+ if (maskgapRegion2[dbPos]==1 && maskgapRegion2[dbPos+1]==0 && fV_QInClosestNewGapQuery1[queryPos]==0) {
+ gapExtend -= gapOpen;
+ }
+
+ /*testing scores with a gap in Query, either starting a new
+ gap or extending an existing gap*/
+
+ if ((newScore = scoreVector.noGap[queryPos] - newGapCost) >
+ (continueGapScore = scoreVector.gapExists[queryPos] -gapExtend)) {
+ continueGapScore = newScore;
+ flagNewGapDb_Rev = 1;
+ }
+
+ /*compute new score extending one position in query and dbSequence*/
+
+ newScore = prevScoreNoGapQuery + rint(score_matrix[queryPos][dbPos]*score_scale - sgapfcn(queryPos, dbPos, b));
+
+ if (newScore < 0)
+ newScore = 0; /*Smith-Waterman locality condition*/
+
+ if (maskgapRegion1[queryPos] == 0) fV_RepeatOpenGapDb[queryPos] = 0;
+ else fV_RepeatOpenGapDb[queryPos] = fV_RepeatOpenGapDb1[queryPos+1];
+
+ if (maskgapRegion2[dbPos] == 0) fV_RepeatOpenGapQuery[queryPos] = 0;
+ else fV_RepeatOpenGapQuery[queryPos] = fV_RepeatOpenGapQuery1[queryPos+1];
+
+/**** Assign the flags coming from the closest NewGap in Query ******/
+
+ if (flagNewGapDb_Rev == 1) {
+ fV_DbInClosestNewGapQuery[queryPos] = fV_RepeatOpenGapDb1[queryPos];
+ fV_QInClosestNewGapQuery[queryPos] = fV_RepeatOpenGapQuery1[queryPos];
+ } else {
+ fV_DbInClosestNewGapQuery[queryPos] = fV_DbInClosestNewGapQuery1[queryPos];
+ fV_QInClosestNewGapQuery[queryPos] = fV_QInClosestNewGapQuery1[queryPos];
+ }
+
+ if (maskgapRegion2[dbPos]==1) {fV_QInClosestNewGapQuery[queryPos] = 1;}
+ if (maskgapRegion1[queryPos]==1) {fV_DbInClosestNewGapQuery[queryPos] = 1;}
+
+
+/**** Assign the flags coming from the closest NewGap in Db ******/
+
+ if (flagNewGapQuery_Rev == 1) {
+ fV_QInClosestNewGapDb[queryPos] = fV_RepeatOpenGapQuery[queryPos+1];
+ fV_DbInClosestNewGapDb[queryPos] = fV_RepeatOpenGapDb[queryPos+1];
+ } else {
+ fV_QInClosestNewGapDb[queryPos] = fV_QInClosestNewGapDb[queryPos+1];
+ fV_DbInClosestNewGapDb[queryPos] = fV_DbInClosestNewGapDb[queryPos+1];
+ }
+
+ if (maskgapRegion1[queryPos]==1) {fV_DbInClosestNewGapDb[queryPos] = 1;}
+ if (maskgapRegion2[dbPos]==1) {fV_QInClosestNewGapDb[queryPos] = 1;}
+
+ /*test two alternatives*/
+ if (newScore < prevScoreGapQuery) {
+ newScore = prevScoreGapQuery;
+ fV_RepeatOpenGapQuery[queryPos] = fV_QInClosestNewGapDb[queryPos];
+ fV_RepeatOpenGapDb[queryPos] = fV_DbInClosestNewGapDb[queryPos];
+ }
+
+
+ if (newScore < continueGapScore) {
+ newScore = continueGapScore;
+ fV_RepeatOpenGapDb[queryPos] = fV_DbInClosestNewGapQuery[queryPos];
+ fV_RepeatOpenGapQuery[queryPos] = fV_QInClosestNewGapQuery[queryPos];
+ }
+
+ prevScoreNoGapQuery = scoreVector.noGap[queryPos];
+ scoreVector.noGap[queryPos]= newScore;
+ scoreVector.gapExists[queryPos] = continueGapScore;
+
+ if (newScore > bestScore) {
+ bestScore = newScore;
+ bestDbPos = dbPos;
+ bestQueryPos = queryPos;
+ }
+
+ if (bestScore >= score) break;
+
+ }
+ if (bestScore >= score) break;
+
+ /***** Reassignments in "the array stack" of flags for previous rows of SW matrix *************/
+
+ for (queryPos=1; queryPos<=queryLength; queryPos++) {
+ fV_RepeatOpenGapQuery2[queryPos] = fV_RepeatOpenGapQuery1[queryPos];
+ fV_RepeatOpenGapQuery1[queryPos] = fV_RepeatOpenGapQuery[queryPos];
+ fV_RepeatOpenGapDb2[queryPos] = fV_RepeatOpenGapDb1[queryPos];
+ fV_RepeatOpenGapDb1[queryPos] = fV_RepeatOpenGapDb[queryPos];
+
+ fV_QInClosestNewGapDb1[queryPos] = fV_QInClosestNewGapDb[queryPos];
+ fV_DbInClosestNewGapQuery1[queryPos] = fV_DbInClosestNewGapQuery[queryPos];
+
+ fV_DbInClosestNewGapDb1[queryPos] = fV_DbInClosestNewGapDb[queryPos];
+ fV_QInClosestNewGapQuery1[queryPos] = fV_QInClosestNewGapQuery[queryPos];
+ }
+
+ }
+
+ free(scoreVector.noGap);
+ free(scoreVector.gapExists);
+
+ if (bestScore < 0)
+ bestScore = 0;
+
+ Start1 = bestQueryPos;
+ Start2 = bestDbPos;
+
+ scoreGivenEnd = bestScore;
+
+ return(bestScore);
+}
+
+/* Traces back the best alignment path using tracebackDir[][], flagNewGapDb[][] and flagNewGapQuery[][];
+output is the set of arrays: aligned portions of the aseq... arrays, with gaps inserted,
+scores for each position in alignment,
+flags for positive matches,
+positions in the initial alignments that are aligned
+*/
+
+ void **traceback_outputPos(int start_ali1, int start_ali2, int end_ali1, int end_ali2, int **tracebackDir, int **flagNewGapQuery, int **flagNewGapDb, int *apos1, int *apos2)
+{
+ int pos1, pos2, posGapped, dir, i, j;
+ char **aseqGapTrInt1, **aseqGapTrInt2;
+ int *positiveInt, *apos1Int, *apos2Int;
+ int **col_scoreInt;
+ int gapOpen, gapExtend, newGapCost, colScore, d0, d1, d2, d3;
+ int sctrl;
+ int ascore[10];
+ int jnogp1, jnogp2;
+
+ int flagRepeatOpenGap1, flagRepeatOpenGap2;
+
+ positiveInt = ivector(0,alilen_mat1+alilen_mat2);
+
+ apos1Int = ivector(0,alilen_mat1+alilen_mat2);
+ apos2Int = ivector(0,alilen_mat1+alilen_mat2);
+
+ col_scoreInt = imatrix(0,alilen_mat1+alilen_mat2, 0,9);
+
+ gapOpen = gap__open;
+
+ sctrl = 0;
+ segment_len = 0;
+ flagRepeatOpenGap1 = 0;
+ flagRepeatOpenGap2 = 0;
+ pos1 = end_ali1;
+ pos2 = end_ali2;
+ posGapped = alilen_mat1+alilen_mat2;
+
+
+/*** TraceBackDir: 6 - positive match; 5 - non-positive match;
+1 - previousScoreGapQuery wins, the gap is new; 2- previousScoreGapQuery wins, the gap is extended from existing;
+3 - continueGapScore wins, the gap is new; 4- continueGapScore wins, the gap is extended from existing;
+***/
+
+ do {
+ dir = tracebackDir[pos1][pos2];
+ if (dir==3) {
+
+ positiveInt[posGapped]=0;
+
+ apos1Int[posGapped] = 0;
+ apos2Int[posGapped] = apos_filtr2[pos2];
+
+ gapExtend = rint(g_e1(pos2,b));
+ newGapCost = gapOpen + gapExtend;
+ colScore = -newGapCost;
+ if (maskgapRegion2[pos2]==1 && flagRepeatOpenGap1==0) {
+ colScore += gapOpen;
+ flagRepeatOpenGap1 = 1;
+ }
+ sctrl += colScore;
+ if (sctrl < 0) sctrl = 0;
+
+ ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
+ for (i=0; i<=9; i++) col_scoreInt[posGapped][i] = ascore[i];
+
+ pos2--;
+ posGapped--;
+ segment_len ++;
+
+ }
+
+ if (dir==4) {
+ do {
+ positiveInt[posGapped]=0;
+
+ apos1Int[posGapped] = 0;
+ apos2Int[posGapped] = apos_filtr2[pos2];
+
+ gapExtend = rint(g_e1(pos2,b));
+ colScore = -gapExtend;
+ if (maskgapRegion2[pos2]==1 && maskgapRegion2[pos2-1]==0 && flagRepeatOpenGap1 == 0) {
+ colScore += gapOpen;
+ flagRepeatOpenGap1 = 1;
+ }
+
+ sctrl += colScore;
+ if (sctrl < 0) sctrl = 0;
+
+ ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
+ for (i=0; i<=9; i++) col_scoreInt[posGapped][i] = ascore[i];
+
+ pos2--;
+ posGapped--;
+ segment_len ++;
+
+ } while (flagNewGapDb[pos1][pos2]!= 1);
+
+ positiveInt[posGapped]=0;
+
+ apos1Int[posGapped] = 0;
+ apos2Int[posGapped] = apos_filtr2[pos2];
+
+ gapExtend = rint(g_e1(pos2,b));
+ newGapCost = gapOpen + gapExtend;
+ colScore = -newGapCost;
+ if (maskgapRegion2[pos2]==1 && flagRepeatOpenGap1==0) {
+ colScore += gapOpen;
+ flagRepeatOpenGap1 = 1;
+ }
+
+ sctrl += colScore;
+ if (sctrl < 0) sctrl = 0;
+
+ ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
+ for (i=0; i<=9; i++) col_scoreInt[posGapped][i] = ascore[i];
+
+ pos2--;
+ posGapped--;
+ segment_len ++;
+
+ }
+
+ if (dir==1) {
+
+ positiveInt[posGapped]=0;
+
+ apos1Int[posGapped] = apos_filtr1[pos1];
+ apos2Int[posGapped] = 0;
+
+ gapExtend = rint(g_e2(pos1,b));
+ newGapCost = gapOpen + gapExtend;
+ colScore = -newGapCost;
+ if (maskgapRegion1[pos1]==1 && flagRepeatOpenGap2==0) {
+ colScore += gapOpen;
+ flagRepeatOpenGap2 = 1;
+ }
+
+ sctrl += colScore;
+ if (sctrl < 0) sctrl = 0;
+
+ ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
+ for (i=0; i<=9; i++) col_scoreInt[posGapped][i] = ascore[i];
+
+ pos1--;
+ posGapped--;
+ segment_len ++;
+ }
+
+ if (dir==2) {
+ do {
+
+ positiveInt[posGapped]=0;
+
+ apos1Int[posGapped] = apos_filtr1[pos1];
+ apos2Int[posGapped] = 0;
+
+ gapExtend = rint(g_e2(pos1,b));
+ colScore = -gapExtend;
+ if (maskgapRegion1[pos1]==1 && maskgapRegion1[pos1-1]==0 && flagRepeatOpenGap2 == 0) {
+ colScore += gapOpen;
+ flagRepeatOpenGap2 = 1;
+ }
+
+ sctrl += colScore;
+ if (sctrl < 0) sctrl = 0;
+
+ ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
+ for (i=0; i<=9; i++) col_scoreInt[posGapped][i] = ascore[i];
+
+ pos1--;
+ posGapped--;
+ segment_len ++;
+
+ } while (flagNewGapQuery[pos1][pos2] != 1 && sctrl<score);
+
+ positiveInt[posGapped]=0;
+
+ apos1Int[posGapped] = apos_filtr1[pos1];
+ apos2Int[posGapped] = 0;
+
+ gapExtend = rint(g_e2(pos1,b));
+ newGapCost = gapOpen + gapExtend;
+ colScore = -newGapCost;
+ if (maskgapRegion1[pos1]==1 && flagRepeatOpenGap2==0) {
+ colScore += gapOpen;
+ flagRepeatOpenGap2 = 1;
+ }
+
+ sctrl += colScore;
+ if (sctrl < 0) sctrl = 0;
+
+ ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
+ for (i=0; i<=9; i++) col_scoreInt[posGapped][i] = ascore[i];
+
+ pos1--;
+ posGapped--;
+ segment_len ++;
+
+ }
+
+ if (dir==5) {
+
+ positiveInt[posGapped]=0;
+
+ apos1Int[posGapped] = apos_filtr1[pos1];
+ apos2Int[posGapped] = apos_filtr2[pos2];
+
+ colScore = rint(score_matrix[pos1][pos2]*score_scale - sgapfcn(pos1,pos2,b));
+
+ sctrl += colScore;
+ if (sctrl < 0) sctrl = 0;
+
+ if (maskgapRegion2[pos2] == 0) flagRepeatOpenGap1 = 0;
+ if (maskgapRegion1[pos1] == 0) flagRepeatOpenGap2 = 0;
+
+ pos2--;
+ pos1--;
+ posGapped--;
+
+ segment_len++;
+ }
+
+ if (dir==6) {
+
+ positiveInt[posGapped]=1;
+
+ apos1Int[posGapped] = apos_filtr1[pos1];
+ apos2Int[posGapped] = apos_filtr2[pos2];
+
+ colScore = rint(score_matrix[pos1][pos2]*score_scale - sgapfcn(pos1,pos2,b));
+
+ sctrl += colScore;
+ if (sctrl < 0) sctrl = 0;
+
+ ScoreOverColumn (colScore, fV_QInClosestNewGapQuery[pos1][pos2], fV_DbInClosestNewGapDb[pos1][pos2], fV_RepeatOpenGapDb[pos1][pos2], fV_RepeatOpenGapQuery[pos1][pos2], flagRepeatOpenGap1, flagRepeatOpenGap2, ascore);
+ for (i=0; i<=9; i++) col_scoreInt[posGapped][i] = ascore[i];
+
+ if (maskgapRegion2[pos2] == 0) flagRepeatOpenGap1 = 0;
+ if (maskgapRegion1[pos1] == 0) flagRepeatOpenGap2 = 0;
+
+ pos2--;
+ pos1--;
+ posGapped--;
+
+ segment_len++;
+ }
+
+
+/*** } while ((pos1>=start_ali1) && (pos2>=start_ali2)); ***/
+ } while (sctrl<score && pos1>0 && pos2>0);
+
+ posGp = posGapped+1;
+ jnogp1 = jnogp2 = 1;
+ for (j=posGp;j<posGp+segment_len;j++) {
+ positive[j-posGp+1] = positiveInt[j];
+
+ apos1[j-posGp+1] = apos1Int[j];
+ apos2[j-posGp+1] = apos2Int[j];
+
+ for (i=0;i<=9; i++) col_score[i][j-posGp+1] = col_scoreInt[j][i];
+ }
+
+ free_ivector(positiveInt, 0,alilen_mat1+alilen_mat2);
+ free_imatrix(col_scoreInt, 0,alilen_mat1+alilen_mat2, 0,9);
+ free_ivector(apos1Int, 0,alilen_mat1+alilen_mat2);
+ free_ivector(apos2Int, 0,alilen_mat1+alilen_mat2);
+
+}
+
+/* Computes the score for column match using "beta-function" formula */
+ int ScoreForTwoRows(double *subjectRow, double *queryRow)
+ {
+ double effnum_subj, effnum_query; /* Total counts in each row (N1 and N2) */
+ double sum_mu; /* Variable used in the calculation of P */
+ double p, pnorm, p_back, lgodd; /* Logarithms of : the probability P, normalized P and the background probability */
+/* double *p_dayhoff; /* background probabilities of amino acid occurrence (from Whelan & Goldman 2001) */
+ int i_lgodd; /* Returned logg-odd score*/
+ int i;
+ double gamma1, gamma2, gamma3;
+
+ effnum_subj = effnum_query = 0;
+
+ for(i=1;i<=20;i++) {
+ effnum_subj += subjectRow[i];
+ effnum_query += queryRow[i];
+ }
+
+ sum_mu = effnum_subj+effnum_query+21;
+/* p = lgamma(effnum_subj+1) + lgamma(effnum_query+1) - lgamma(sum_mu);
+ p_back = lgamma(effnum_subj+1) + lgamma(effnum_query+1); */
+ lgodd = -lgamma(sum_mu);
+
+ for (i=1; i<=20; i++) {
+ gamma1 = lgamma(subjectRow[i]+queryRow[i]+1.0) ;
+/****** gamma2 = lgamma(subjectRow[i]+1.0);
+ gamma3 = lgamma(queryRow[i]+1.0);
+ p += gamma1 - gamma2 - gamma3;
+ p_back += (subjectRow[i]+queryRow[i])*log(p_dayhoff[i-1]) - gamma2 - gamma3; *****/
+ lgodd += -(subjectRow[i]+queryRow[i])*log(p_dayhoff[i-1]) + gamma1;
+
+/* fprintf(stderr, "ln p_%d = %e ln p_back_%d = %e lgodd_%d = %e \n", i, p, i, p_back, i, lgodd);
+ fprintf(stderr, "subject_row[%d] = %e, query_row[%d] = %e", i, subjectRow[i], i, queryRow[i]);
+ fprintf(stderr, "gamma1_%d = %e ; gamma2_%d = %e ; gamma3_%d = %e \n", i, gamma1, i, gamma2, i, gamma3); */
+
+ }
+
+/* pnorm = p + lgamma(21); */
+/* fprintf(stderr, "p= %e pnorm= %e \n", p, pnorm);
+ fprintf(stderr, "p_back= %e \n", p_back); */
+
+ lgodd += lgamma(21);
+/* fprintf(stderr, "lgodd = %e \n", lgodd); */
+
+ i_lgodd = rint(lgodd);
+ return(i_lgodd);
+}
+
+
+double ScoreForTwoRows_Model6(int pos1, int pos2, double score_scale, double b)
+{
+ int i, k1, k2;
+ double s;
+ double ngap1, ngap2, g1, g2;
+ s=0.0;
+
+ if ((sum_eff_let1[pos1]==1.0) && (sum_eff_let2[pos2] == 1.0)) {
+ k1=k2=1;
+ for (i=1;i<=20;i++) {
+ if (matrix1[pos1][i]!= 0.0) k1=i;
+ if (matrix2[pos2][i]!= 0.0) k2=i;
+ }
+ s = smatrix[k1][k2];
+ } else {
+ for (i=1;i<=20;i++) {
+ s+= n_effAa1[pos1][i]*(sum_eff_let2[pos2]-1)/sum_eff_let1[pos1]*log(pseudoCnt2[pos2][i]/p_rbnsn[i-1]) + n_effAa2[pos2][i]*(sum_eff_let1[pos1]-1)/sum_eff_let2[pos2]*log(pseudoCnt1[pos1][i]/p_rbnsn[i-1]);
+ }
+/*** s= s/(sum_eff_let1[pos1]+sum_eff_let2[pos2]-2); ***/
+ }
+ s = s*score_scale/lambda_u;
+
+ g1 = n_effAa1[pos1][0]/(n_effAa1[pos1][0]+sum_eff_let1[pos1]);
+ g2 = n_effAa2[pos2][0]/(n_effAa2[pos2][0]+sum_eff_let2[pos2]);
+ s -= b*((1-g1)*g2 + (1-g2)*g1);
+
+ return s;
+}
+
+double ScoreForTwoRows_smat3_21(int pos1, int pos2)
+{
+ int i, k1, k2;
+ double s;
+ double ngap1, ngap2, g1, g2;
+ double comp1, comp2;
+ double s1,s2;
+ s1=s2=0.0;
+
+ comp1 = sum_eff_let1[pos1]-1.0;
+ comp2 = sum_eff_let2[pos2]-1.0;
+ //fprintf(stdout, "%d %d %f %f\n", pos1, pos2, sum_eff_let1[pos1], sum_eff_let2[pos2]); fflush(stdout);
+
+ if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {
+ k1=k2=1;
+ for (i=1;i<=20;i++) {
+ if (n_effAa1[pos1][i]!= 0.0) k1=i;
+ if (n_effAa2[pos2][i]!= 0.0) k2=i;
+ }
+
+ s = smatrix[k1][k2];
+ } else {
+ for (i=1;i<=20;i++) {
+
+ s1 += n_effAa1[pos1][i]*log(pseudoCnt2[pos2][i]/p_rbnsn[i-1]);
+ s2 += n_effAa2[pos2][i]*log(pseudoCnt1[pos1][i]/p_rbnsn[i-1]);
+
+/* s+= n_effAa1[pos1][i]*(sum_eff_let2[pos2]-1)/sum_eff_let1[pos1]*log(pseudoCnt2[pos2][i]/p_rbnsn[i-1]) +
+n_effAa2[pos2][i]*(sum_eff_let1[pos1]-1)/sum_eff_let2[pos2]*log(pseudoCnt1[pos1][i]/p_rbnsn[i-1]);
+*/
+ }
+
+/*** Do normalization of s: ***/
+ s = s1*(sum_eff_let2[pos2]-1)/sum_eff_let1[pos1] + s2*(sum_eff_let1[pos1]-1)/sum_eff_let2[pos2];
+ s= s/(sum_eff_let1[pos1]+sum_eff_let2[pos2]-2);
+
+ }
+ s = s/lambda_u;
+
+ return s;
+}
+
+double ScoreForTwoRows_smat3_22(int pos1, int pos2)
+{
+ int i, k1, k2;
+ double s;
+ double ngap1, ngap2, g1, g2;
+ double comp1, comp2;
+ s=0.0;
+
+ comp1 = sum_eff_let1[pos1]-1.0;
+ comp2 = sum_eff_let2[pos2]-1.0;
+ if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {
+ k1=k2=1;
+ for (i=1;i<=20;i++) {
+ if (matrix1[pos1][i]!= 0.0) k1=i;
+ if (matrix2[pos2][i]!= 0.0) k2=i;
+ }
+ s = smatrix[k1][k2];
+ } else {
+ for (i=1;i<=20;i++) {
+ s+= n_effAa1[pos1][i]*(sum_eff_let2[pos2]-1)/sum_eff_let1[pos1]*log(pseudoCnt2[pos2][i]/p_rbnsn[i-1]) + n_effAa2[pos2][i]*(sum_eff_let1[pos1]-1)/sum_eff_let2[pos2]*log(pseudoCnt1[pos1][i]/p_rbnsn[i-1]);
+ }
+
+/*** No normalization of s ***/
+ }
+ s = s/lambda_u;
+
+ return s;
+}
+
+double ScoreForTwoRows_smat3_23(int pos1, int pos2)
+{
+ int i, k1, k2;
+ double s;
+ double ngap1, ngap2, g1, g2;
+ double comp1, comp2;
+ s=0.0;
+
+ comp1 = sum_eff_let1[pos1]-1.0;
+ comp2 = sum_eff_let2[pos2]-1.0;
+ fprintf(stdout, "%f %f\n", comp1, comp2); fflush(stdout);
+ if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {
+ k1=k2=1;
+ for (i=1;i<=20;i++) {
+ if (n_effAa1[pos1][i]!= 0.0) k1=i;
+ if (n_effAa2[pos2][i]!= 0.0) k2=i;
+ }
+ s = smatrix[k1][k2];
+ } else {
+ for (i=1;i<=20;i++) {
+/* No division by the opposite sum_eff_let in each of two terms (formula 3_23): */
+ s+= n_effAa1[pos1][i]*(sum_eff_let2[pos2]-1)*log(pseudoCnt2[pos2][i]/p_rbnsn[i-1]) + n_effAa2[pos2][i]*(sum_eff_let1[pos1]-1)*log(pseudoCnt1[pos1][i]/p_rbnsn[i-1]);
+ }
+
+/*** Do normalization of s: ***/
+ s= s/(sum_eff_let1[pos1]+sum_eff_let2[pos2]-2);
+ }
+ s = s/lambda_u;
+
+ return s;
+}
+
+
+double ScoreForTwoRows_smat3_27(int pos1, int pos2)
+{
+ int i, k1, k2;
+ double s;
+ double ngap1, ngap2, g1, g2;
+ double comp1, comp2;
+
+ s=0.0;
+ comp1 = sum_eff_let1[pos1]-1.0;
+ comp2 = sum_eff_let2[pos2]-1.0;
+ if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {
+ k1=k2=1;
+ for (i=1;i<=20;i++) {
+ if (matrix1[pos1][i]!= 0.0) k1=i;
+ if (matrix2[pos2][i]!= 0.0) k2=i;
+ }
+ s = smatrix[k1][k2];
+ } else {
+ for (i=1;i<=20;i++) {
+
+/* Simplest scoring formula 3_27 */
+ s+= n_effAa1[pos1][i]*log(pseudoCnt2[pos2][i]/p_rbnsn[i-1]) + n_effAa2[pos2][i]*log(pseudoCnt1[pos1][i]/p_rbnsn[i-1]);
+ }
+
+/*** No normalization of s ***/
+ }
+ s = s/lambda_u;
+
+ return s;
+}
+
+double ScoreForTwoRows_smat3_28(int pos1, int pos2)
+{
+ int i, k1, k2;
+ double s;
+ double ngap1, ngap2, g1, g2;
+ double comp1, comp2;
+ s=0.0;
+
+ comp1 = sum_eff_let1[pos1]-1.0;
+ comp2 = sum_eff_let2[pos2]-1.0;
+ if (abs(comp1)<1e-5 && abs(comp2)<1e-5) {
+ k1=k2=1;
+ for (i=1;i<=20;i++) {
+ if (matrix1[pos1][i]!= 0.0) k1=i;
+ if (matrix2[pos2][i]!= 0.0) k2=i;
+ }
+ s = smatrix[k1][k2];
+ } else {
+ for (i=1;i<=20;i++) {
+/* NO normalization and no division by the opposite sum_eff_let in each of two terms (formula 3_28): */
+ s+= n_effAa1[pos1][i]*(sum_eff_let2[pos2]-1)*log(pseudoCnt2[pos2][i]/p_rbnsn[i-1]) + n_effAa2[pos2][i]*(sum_eff_let1[pos1]-1)*log(pseudoCnt1[pos1][i]/p_rbnsn[i-1]);
+ }
+/*** No normalization of s ***/
+ }
+ s = s/lambda_u;
+
+ return s;
+}
+
+/* version of S_g = 0 */
+double Sgap6_smat_off(int pos1, int pos2, double b)
+{
+ double sg;
+ sg = 0.0;
+ return sg;
+}
+
+/* computes S_g - reduction of col-col score due to gap content */
+double Sgap6_smat(int pos1, int pos2, double b)
+{
+ double g1, g2, sg;
+ g1 = n_effAa1[pos1][0]/(n_effAa1[pos1][0]+sum_eff_let1[pos1]);
+ g2 = n_effAa2[pos2][0]/(n_effAa2[pos2][0]+sum_eff_let2[pos2]);
+ sg = f*b*((1-g1)*g2 + (1-g2)*g1);
+ return sg;
+}
+
+
+/* computes gap extension penalty in 1 depending on gap content in 2 */
+double GapExtend1(int pos2, double b)
+{
+ double ge;
+ ge = gap__extend*b*(sum_eff_let2[pos2]/(n_effAa2[pos2][0]+sum_eff_let2[pos2]));
+ return ge;
+}
+
+/* version not depending on gap content */
+double GapExtend1_off(int pos2, double b)
+{
+ double ge;
+ ge = gap__extend;
+ return ge;
+}
+
+/* computes gap extension penalty in 2 depending on gap content in 1 */
+double GapExtend2(int pos1, double b)
+{
+ double ge;
+ ge = gap__extend*b*(sum_eff_let1[pos1]/(n_effAa1[pos1][0]+sum_eff_let1[pos1]));
+ return ge;
+}
+
+/* version not depending on gap content */
+double GapExtend2_off(int pos1, double b)
+{
+ double ge;
+ ge = gap__extend;
+ return ge;
+}
+
+void sort(int n, double arr[])
+{
+ unsigned long i,ir=n,j,k,l=1;
+ int jstack=0,*istack;
+ double a,temp;
+
+ istack=ivector(1,NSTACK);
+ for (;;) {
+ if (ir-l < M) {
+ for (j=l+1;j<=ir;j++) {
+ a=arr[j];
+ for (i=j-1;i>=1;i--) {
+ if (arr[i] <= a) break;
+ arr[i+1]=arr[i];
+ }
+ arr[i+1]=a;
+ }
+ if (jstack == 0) break;
+ ir=istack[jstack--];
+ l=istack[jstack--];
+ } else {
+ k=(l+ir) >> 1;
+ SWAP(arr[k],arr[l+1])
+ if (arr[l+1] > arr[ir]) {
+ SWAP(arr[l+1],arr[ir])
+ }
+ if (arr[l] > arr[ir]) {
+ SWAP(arr[l],arr[ir])
+ }
+ if (arr[l+1] > arr[l]) {
+ SWAP(arr[l+1],arr[l])
+ }
+ i=l+1;
+ j=ir;
+ a=arr[l];
+ for (;;) {
+ do i++; while (arr[i] < a);
+ do j--; while (arr[j] > a);
+ if (j < i) break;
+ SWAP(arr[i],arr[j]);
+ }
+ arr[l]=arr[j];
+ arr[j]=a;
+ jstack += 2;
+ if (jstack > NSTACK) nrerror("NSTACK too small in sort.");
+ if (ir-i+1 >= j-l) {
+ istack[jstack]=ir;
+ istack[jstack-1]=i;
+ ir=j-1;
+ } else {
+ istack[jstack]=j-1;
+ istack[jstack-1]=l;
+ l=i;
+ }
+ }
+ }
+ free (istack);
+}
+
+
+int *ScoreOverColumn (int colScore, int flag1, int flag2, int flag3, int flag4, int flag5, int flag6, int *column_score)
+{
+ int d0, d1, d2, d3;
+ if (colScore>=0) d0 = 1;
+ else d0 = 0;
+ d1 = abs(colScore)/100;
+ d2 = abs(colScore)/10 - 10*d1;
+ d3 = abs(colScore) - 100*d1 - 10*d2;
+ column_score[0] = d0;
+ column_score[1] = d1;
+ column_score[2] = d2;
+ column_score[3] = d3;
+
+ column_score[4] = flag1;
+ column_score[5] = flag2;
+ column_score[7] = flag3;
+ column_score[6] = flag4;
+
+ column_score[8] = flag5;
+ column_score[9] = flag6;
+
+}
+
+/* Reads reference FSSP alignment, creates the array of positions of capital letter pairs */
+void *ReadRef (char *inputfile)
+{
+/* char *path = "/home/sadreyev/fssp_test/db_id/pairaln/"; */
+ int i, pos, pos1, pos2;
+
+/* strcat (path, inputfile);
+ readali(path);
+*/
+ readali (inputfile);
+
+ aposref1 = ivector (0,alilen);
+ aposref2 = ivector (0,alilen);
+ pos=pos1=pos2=0;
+ for (i=0; i<alilen; i++) {
+ if (isalpha(aseq[0][i])) pos1++;
+ if (isalpha(aseq[1][i])) pos2++;
+ if (isupper(aseq[0][i]) && isupper(aseq[1][i])) {
+ if (pos==0) {start_ref1 = pos1; start_ref2 = pos2;}
+ pos++;
+ aposref1[pos]=pos1;
+ aposref2[pos]=pos2;
+ end_ref1 = pos1; end_ref2 = pos2;
+ }
+ }
+
+ reflen_nogp = pos;
+}
+
+int CompareAlnVsReferenceAln (int *apos1, int *apos2, int *aposref1, int *aposref2, int start_ref1, int start_ref2, int end_ref1, int end_ref2 /*, int coverage1, int coverage2, int accuracy1, int accuracy2 */)
+{
+ int i,j,k;
+ int start_ali1, start_ali2, end_ali1, end_ali2;
+ int len_common1, len_common2;
+/*** Starting/ending positions of intersections between ali1,2 and ref1,2 : ****/
+ int start_common1, end_common1, start_common2, end_common2;
+/*** Indexes of elements closest to the starting/ending points of intersections between ali1,2 and ref1,2 : ****/
+ int ind_ali1_startc1, ind_ali1_endc1, ind_ali2_startc2, ind_ali2_endc2;
+ int ind_ref1_startc1, ind_ref1_endc1, ind_ref2_startc2, ind_ref2_endc2;
+ int ind_ref_startc, ind_ref_endc;
+ double factor = 0.6931472;
+
+/*** Calculation of coverage1 and coverage2.
+In parallel to finding common points of starts and ends corresponding to the intersection,
+find the indexes in arrays (apos[] and aposref[]) of the elements closest to these points *****/
+
+ for (i=1; apos1[i]==0; i++);
+ start_ali1 = apos1[i];
+
+ for (i=1; apos2[i]==0; i++);
+ start_ali2 = apos2[i];
+
+ for (i=segment_len; apos1[i]==0; i--);
+ end_ali1 = apos1[i];
+
+ for (i=segment_len; apos2[i]==0; i--);
+ end_ali2 = apos2[i];
+
+ if (end_ali1<end_ref1) {
+ end_common1 = end_ali1;
+ ind_ali1_endc1 = segment_len;
+ for (i=reflen_nogp; aposref1[i]>end_common1 && i>1; i--);
+ ind_ref1_endc1 = i;
+ } else {
+ end_common1 = end_ref1;
+ ind_ref1_endc1 = reflen_nogp;
+ for (i=segment_len; (apos1[i]>end_common1 || apos1[i]==0) && i>1; i--);
+ ind_ali1_endc1 = i;
+ }
+
+ if (end_ali2<end_ref2) {
+ end_common2 = end_ali2;
+ ind_ali2_endc2 = segment_len;
+ for (i=reflen_nogp; aposref2[i]>end_common2 && i>1; i--);
+ ind_ref2_endc2 = i;
+ } else {
+ end_common2 = end_ref2;
+ ind_ref2_endc2 = reflen_nogp;
+ for (i=segment_len; (apos2[i]>end_common2 || apos2[i]==0) && i>1 ; i--);
+ ind_ali2_endc2 = i;
+ }
+
+ if (start_ali1>start_ref1) {
+ start_common1 = start_ali1;
+ ind_ali1_startc1 = 1;
+ for (i=1; aposref1[i]<start_common1 && i<reflen_nogp; i++);
+ ind_ref1_startc1 = i;
+ } else {
+ start_common1 = start_ref1;
+ ind_ref1_startc1 = 1;
+ for (i=1; (apos1[i]<start_common1 || apos1[i]==0) && i<segment_len ; i++);
+ ind_ali1_startc1 = i;
+ }
+
+ if (start_ali2>start_ref2) {
+ start_common2 = start_ali2;
+ ind_ali2_startc2 = 1;
+ for (i=1; aposref2[i]<start_common2 && i<reflen_nogp; i++);
+ ind_ref2_startc2 = i;
+ } else {
+ start_common2 = start_ref2;
+ ind_ref2_startc2 = 1;
+ for (i=1; (apos2[i]<start_common2 || apos2[i]==0) && i<segment_len ; i++);
+ ind_ali2_startc2 = i;
+ }
+
+
+ len_common1 = end_common1 - start_common1 + 1;
+ len_common2 = end_common2 - start_common2 + 1;
+
+ if (len_common1<=0 || len_common2<=0) {coverage1 = coverage2 = accuracy1 = accuracy2 = 0.0; return;}
+
+
+
+ coverage1 = 1.0*(len_common1 + len_common2)/((end_ref1 - start_ref1 + 1) + (end_ref2 - start_ref2 + 1));
+ coverage2 = 0.5*len_common1/(end_ref1 - start_ref1 + 1) + 0.5*len_common2/(end_ref2 - start_ref2 + 1);
+ falsecov = 1.0*((end_ali1-start_ali1+1) - len_common1 + (end_ali2-start_ali2+1) - len_common2)/ (len_common1+len_common2);
+
+
+/*** Calculation of accuracy1 and 2 ***/
+
+ if (ind_ref1_startc1 < ind_ref2_startc2) ind_ref_startc = ind_ref1_startc1;
+ else ind_ref_startc = ind_ref2_startc2;
+
+ if (ind_ref1_endc1 > ind_ref2_endc2) ind_ref_endc = ind_ref1_endc1;
+ else ind_ref_endc = ind_ref2_endc2;
+
+
+ accuracy1 = accuracy2 = 0.0;
+/* Let us do it simpler and search not throught the intersection regions but through all aposref1,2 and
+apos1,2. So let us temporarily change the cycle limits.
+
+ for (i=ind_ref_startc; i<=ind_ref_endc; i++) {
+ for (j=ind_ali1_startc1; j<=ind_ali1_endc1; j++) {
+*/
+ for (i=1; i<=reflen_nogp; i++) {
+ for (j=1; j<=segment_len; j++) {
+
+ if (apos1[j]>aposref1[i]) break;
+ if (apos1[j] == aposref1[i]) {
+/* for (k=ind_ali2_startc2; k<=ind_ali2_endc2; k++) {
+*/
+ for (k=1; k<=segment_len; k++) {
+ if (apos2[k]>aposref2[i]) break;
+ if (apos2[k] == aposref2[i]) {
+ if (k==j) {
+ accuracy1 += 1.0;
+ accuracy2 +=1.0;
+/** fprintf (stderr, "\n match: posref1=%d\n",aposref1[i]); **/
+ }
+ else { accuracy2 += exp(-factor*(abs(k-j))); }
+ }
+ }
+ }
+ }
+ }
+
+ accuracy1 /= ind_ref_endc - ind_ref_startc + 1.0;
+ accuracy2 /= ind_ref_endc - ind_ref_startc + 1.0;
+
+}
+
diff --git a/alnscore.c b/alnscore.c
new file mode 100644
index 0000000..8ade3c5
--- /dev/null
+++ b/alnscore.c
@@ -0,0 +1,114 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "pcma.h"
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+/*
+ * Prototypes
+ */
+
+static sint count_gaps(sint s1, sint s2, sint l);
+
+/*
+ * Global Variables
+ */
+
+extern float gap_open;
+extern sint nseqs;
+extern sint *seqlen_array;
+extern short blosum45mt[];
+extern short def_aa_xref[];
+extern sint debug;
+extern sint max_aa;
+extern char **seq_array;
+
+
+void aln_score(void)
+{
+ static short *mat_xref, *matptr;
+ static sint maxres;
+ static sint s1,s2,c1,c2;
+ static sint ngaps;
+ static sint i,l1,l2;
+ static lint score;
+ static sint matrix[NUMRES][NUMRES];
+
+/* calculate an overall score for the alignment by summing the
+scores for each pairwise alignment */
+
+ matptr = blosum45mt;
+ mat_xref = def_aa_xref;
+ maxres = get_matrix(matptr, mat_xref, matrix, TRUE, 100);
+ if (maxres == 0)
+ {
+ fprintf(stdout,"Error: matrix blosum30 not found\n");
+ return;
+ }
+
+ score=0;
+ for (s1=1;s1<=nseqs;s1++)
+ {
+ for (s2=1;s2<s1;s2++)
+ {
+
+ l1 = seqlen_array[s1];
+ l2 = seqlen_array[s2];
+ for (i=1;i<l1 && i<l2;i++)
+ {
+ c1 = seq_array[s1][i];
+ c2 = seq_array[s2][i];
+ if ((c1>=0) && (c1<=max_aa) && (c2>=0) && (c2<=max_aa))
+ score += matrix[c1][c2];
+ }
+
+ ngaps = count_gaps(s1, s2, l1);
+
+ score -= 100 * gap_open * ngaps;
+
+ }
+ }
+
+ score /= 100;
+
+ info("Alignment Score %d", (pint)score);
+
+}
+
+static sint count_gaps(sint s1, sint s2, sint l)
+{
+ sint i, g;
+ sint q, r, *Q, *R;
+
+
+ Q = (sint *)ckalloc((l+2) * sizeof(sint));
+ R = (sint *)ckalloc((l+2) * sizeof(sint));
+
+ Q[0] = R[0] = g = 0;
+
+ for (i=1;i<l;i++)
+ {
+ if (seq_array[s1][i] > max_aa) q = 1;
+ else q = 0;
+ if (seq_array[s2][i] > max_aa) r = 1;
+ else r = 0;
+
+ if (((Q[i-1] <= R[i-1]) && (q != 0) && (1-r != 0)) ||
+ ((Q[i-1] >= R[i-1]) && (1-q != 0) && (r != 0)))
+ g += 1;
+ if (q != 0) Q[i] = Q[i-1]+1;
+ else Q[i] = 0;
+
+ if (r != 0) R[i] = R[i-1]+1;
+ else R[i] = 0;
+ }
+
+ Q=ckfree((void *)Q);
+ R=ckfree((void *)R);
+
+ return(g);
+}
+
+
diff --git a/amenu.c b/amenu.c
new file mode 100644
index 0000000..93c1c7c
--- /dev/null
+++ b/amenu.c
@@ -0,0 +1,1298 @@
+/* Menus and command line interface for Clustal W */
+/* DES was here MARCH. 1994 */
+/* DES was here SEPT. 1994 */
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <signal.h>
+#include <setjmp.h>
+#include "pcma.h"
+
+static jmp_buf jmpbuf;
+#ifndef VMS
+#ifndef AIX
+#define BADSIG (void (*)())-1
+#endif
+#endif
+
+static void jumper(int);
+
+static void jumper(int i)
+{
+ longjmp(jmpbuf,1);
+}
+
+
+/*
+* Prototypes
+*/
+
+
+static void pair_menu(void);
+static void multi_menu(void);
+static void gap_penalties_menu(void);
+static void multiple_align_menu(void); /* multiple alignments menu */
+static void profile_align_menu(void); /* profile " " */
+static void phylogenetic_tree_menu(void); /* NJ trees/distances menu */
+static void format_options_menu(void); /* format of alignment output */
+static void tree_format_options_menu(void); /* format of tree output */
+static void ss_options_menu(void);
+static sint secstroutput_options(void);
+static sint read_matrix(char *title,MatMenu menu, char *matnam, sint matn, short *mat, short *xref);
+
+/*
+* Global variables
+*/
+
+extern float gap_open, gap_extend;
+extern float dna_gap_open, dna_gap_extend;
+extern float prot_gap_open, prot_gap_extend;
+extern float pw_go_penalty, pw_ge_penalty;
+extern float dna_pw_go_penalty, dna_pw_ge_penalty;
+extern float prot_pw_go_penalty, prot_pw_ge_penalty;
+extern float transition_weight;
+extern char revision_level[];
+extern sint wind_gap,ktup,window,signif;
+extern sint dna_wind_gap, dna_ktup, dna_window, dna_signif;
+extern sint prot_wind_gap,prot_ktup,prot_window,prot_signif;
+extern sint nseqs;
+extern sint divergence_cutoff;
+extern sint debug;
+extern Boolean neg_matrix;
+extern Boolean quick_pairalign;
+extern Boolean reset_alignments_new; /* DES */
+extern Boolean reset_alignments_all; /* DES */
+extern sint gap_dist;
+extern Boolean no_var_penalties, no_hyd_penalties, no_pref_penalties;
+extern sint output_order;
+extern sint profile_no;
+extern short usermat[], pw_usermat[];
+extern short aa_xref[], pw_aa_xref[];
+extern short userdnamat[], pw_userdnamat[];
+extern short dna_xref[], pw_dna_xref[];
+
+extern Boolean lowercase; /* Flag for GDE output - set on comm. line*/
+extern Boolean cl_seq_numbers;
+extern Boolean output_clustal, output_nbrf, output_phylip, output_gcg, output_gde, output_nexus;
+extern Boolean output_tree_clustal, output_tree_phylip, output_tree_distances,output_tree_nexus;
+extern sint bootstrap_format;
+extern Boolean tossgaps, kimura;
+extern Boolean percent;
+extern Boolean usemenu;
+extern Boolean showaln, save_parameters;
+extern Boolean dnaflag;
+extern Boolean use_ambiguities;
+
+
+extern char hyd_residues[];
+extern char mtrxname[], pw_mtrxname[];
+extern char dnamtrxname[], pw_dnamtrxname[];
+extern char seqname[];
+
+extern sint output_struct_penalties;
+extern Boolean use_ss1, use_ss2;
+
+extern Boolean empty;
+extern Boolean profile1_empty, profile2_empty; /* whether or not profiles */
+
+extern char profile1_name[FILENAMELEN+1];
+extern char profile2_name[FILENAMELEN+1];
+
+extern Boolean use_endgaps;
+extern sint matnum,pw_matnum;
+extern sint dnamatnum,pw_dnamatnum;
+
+extern sint helix_penalty;
+extern sint strand_penalty;
+extern sint loop_penalty;
+extern sint helix_end_minus;
+extern sint helix_end_plus;
+extern sint strand_end_minus;
+extern sint strand_end_plus;
+extern sint helix_end_penalty;
+extern sint strand_end_penalty;
+
+extern MatMenu matrix_menu;
+extern MatMenu pw_matrix_menu;
+extern MatMenu dnamatrix_menu;
+
+static char phylip_name[FILENAMELEN]="";
+static char clustal_name[FILENAMELEN]="";
+static char dist_name[FILENAMELEN]="";
+static char nexus_name[FILENAMELEN]="";
+static char p1_tree_name[FILENAMELEN]="";
+static char p2_tree_name[FILENAMELEN]="";
+
+static char *secstroutput_txt[] = {
+ "Secondary Structure",
+ "Gap Penalty Mask",
+ "Structure and Penalty Mask",
+ "None" };
+
+
+static char *lin1, *lin2, *lin3;
+
+
+void init_amenu(void)
+{
+
+ lin1 = (char *)ckalloc( (MAXLINE+1) * sizeof (char) );
+ lin2 = (char *)ckalloc( (MAXLINE+1) * sizeof (char) );
+ lin3 = (char *)ckalloc( (MAXLINE+1) * sizeof (char) );
+}
+
+void main_menu(void)
+{
+ int catchint;
+
+ catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+ if (catchint) {
+ if (setjmp(jmpbuf) != 0)
+ fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+ if (signal(SIGINT,jumper) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#else
+ if (signal(SIGINT,SIG_DFL) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#endif
+ }
+
+ while(TRUE) {
+ fprintf(stdout, "\n\n");
+ fprintf(stdout, " ********************************************************************");
+ fprintf(stdout,"\n\tPCMA - Profile Consistency Multiple sequence Alignment\n"); /* JP */
+ fprintf(stdout, " ********************************************************************");
+ fprintf(stdout,"\n\n");
+
+ fprintf(stdout," 1. Sequence Input From Disc\n");
+ fprintf(stdout," 2. Multiple Alignments\n");
+ fprintf(stdout," 3. Profile / Structure Alignments\n");
+ fprintf(stdout," 4. Phylogenetic trees\n");
+ fprintf(stdout,"\n");
+ fprintf(stdout," S. Execute a system command\n");
+ fprintf(stdout," H. HELP\n");
+ fprintf(stdout," X. EXIT (leave program)\n\n\n");
+
+ getstr("Your choice",lin1);
+
+ switch(toupper(*lin1)) {
+ case '1': seq_input(FALSE);
+ phylip_name[0]=EOS;
+ clustal_name[0]=EOS;
+ dist_name[0]=EOS;
+ nexus_name[0]=EOS;
+ break;
+ case '2': multiple_align_menu();
+ break;
+ case '3': profile_align_menu();
+ break;
+ case '4': phylogenetic_tree_menu();
+ break;
+ case 'S': do_system();
+ break;
+ case '?':
+ case 'H': get_help('1');
+ break;
+ case 'Q':
+ case 'X': exit(0);
+ break;
+ default: fprintf(stdout,"\n\nUnrecognised Command\n\n");
+ break;
+ }
+ }
+}
+
+
+
+
+
+
+
+
+
+static void multiple_align_menu(void)
+{
+ int catchint;
+
+ catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+ if (catchint) {
+ if (setjmp(jmpbuf) != 0)
+ fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+ if (signal(SIGINT,jumper) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#else
+ if (signal(SIGINT,SIG_DFL) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#endif
+ }
+
+
+ while(TRUE)
+ {
+ fprintf(stdout,"\n\n\n");
+ fprintf(stdout,"****** MULTIPLE ALIGNMENT MENU ******\n");
+ fprintf(stdout,"\n\n");
+
+
+ fprintf(stdout," 1. Do complete multiple alignment now (%s)\n",
+ (!quick_pairalign) ? "Slow/Accurate" : "Fast/Approximate");
+ fprintf(stdout," 2. Produce guide tree file only\n");
+ fprintf(stdout," 3. Do alignment using old guide tree file\n\n");
+ fprintf(stdout," 4. Toggle Slow/Fast pairwise alignments = %s\n\n",
+ (!quick_pairalign) ? "SLOW" : "FAST");
+ fprintf(stdout," 5. Pairwise alignment parameters\n");
+ fprintf(stdout," 6. Multiple alignment parameters\n\n");
+ fprintf(stdout," 7. Reset gaps before alignment?");
+ if(reset_alignments_new)
+ fprintf(stdout," = ON\n");
+ else
+ fprintf(stdout," = OFF\n");
+ fprintf(stdout," 8. Toggle screen display = %s\n",
+ (!showaln) ? "OFF" : "ON");
+ fprintf(stdout," 9. Output format options\n");
+ fprintf(stdout,"\n");
+
+ fprintf(stdout," S. Execute a system command\n");
+ fprintf(stdout," H. HELP\n");
+ fprintf(stdout," or press [RETURN] to go back to main menu\n\n\n");
+
+ getstr("Your choice",lin1);
+ if(*lin1 == EOS) return;
+
+ switch(toupper(*lin1))
+ {
+ case '1': align(phylip_name);
+ break;
+ case '2': make_tree(phylip_name);
+ break;
+ case '3': get_tree(phylip_name);
+ break;
+ case '4': quick_pairalign ^= TRUE;
+ break;
+ case '5': pair_menu();
+ break;
+ case '6': multi_menu();
+ break;
+ case '7': reset_alignments_new ^= TRUE;
+ if(reset_alignments_new==TRUE)
+ reset_alignments_all=FALSE;
+ break;
+ case '8': showaln ^= TRUE;
+ break;
+ case '9': format_options_menu();
+ break;
+ case 'S': do_system();
+ break;
+ case '?':
+ case 'H': get_help('2');
+ break;
+ case 'Q':
+ case 'X': return;
+
+ default: fprintf(stdout,"\n\nUnrecognised Command\n\n");
+ break;
+ }
+ }
+}
+
+
+
+
+
+
+
+
+
+static void profile_align_menu(void)
+{
+ int catchint;
+
+ catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+ if (catchint) {
+ if (setjmp(jmpbuf) != 0)
+ fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+ if (signal(SIGINT,jumper) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#else
+ if (signal(SIGINT,SIG_DFL) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#endif
+ }
+
+
+ while(TRUE)
+ {
+ fprintf(stdout,"\n\n\n");
+ fprintf(stdout,"****** PROFILE AND STRUCTURE ALIGNMENT MENU ******\n");
+ fprintf(stdout,"\n\n");
+
+ fprintf(stdout," 1. Input 1st. profile ");
+ if (!profile1_empty) fprintf(stdout,"(loaded)");
+ fprintf(stdout,"\n");
+ fprintf(stdout," 2. Input 2nd. profile/sequences ");
+ if (!profile2_empty) fprintf(stdout,"(loaded)");
+ fprintf(stdout,"\n\n");
+ fprintf(stdout," 3. Align 2nd. profile to 1st. profile\n");
+ fprintf(stdout," 4. Align sequences to 1st. profile (%s)\n\n",
+ (!quick_pairalign) ? "Slow/Accurate" : "Fast/Approximate");
+ fprintf(stdout," 5. Toggle Slow/Fast pairwise alignments = %s\n\n",
+ (!quick_pairalign) ? "SLOW" : "FAST");
+ fprintf(stdout," 6. Pairwise alignment parameters\n");
+ fprintf(stdout," 7. Multiple alignment parameters\n\n");
+ fprintf(stdout," 8. Toggle screen display = %s\n",
+ (!showaln) ? "OFF" : "ON");
+ fprintf(stdout," 9. Output format options\n");
+ fprintf(stdout," 0. Secondary structure options\n");
+ fprintf(stdout,"\n");
+ fprintf(stdout," S. Execute a system command\n");
+ fprintf(stdout," H. HELP\n");
+ fprintf(stdout," or press [RETURN] to go back to main menu\n\n\n");
+
+ getstr("Your choice",lin1);
+ if(*lin1 == EOS) return;
+
+ switch(toupper(*lin1))
+ {
+ case '1': profile_no = 1; /* 1 => 1st profile */
+ profile_input();
+ strcpy(profile1_name, seqname);
+ break;
+ case '2': profile_no = 2; /* 2 => 2nd profile */
+ profile_input();
+ strcpy(profile2_name, seqname);
+ break;
+ case '3': profile_align(p1_tree_name,p2_tree_name); /* align the 2 alignments now */
+ break;
+ case '4': new_sequence_align(phylip_name); /* align new sequences to profile 1 */
+ break;
+ case '5': quick_pairalign ^= TRUE;
+ break;
+ case '6': pair_menu();
+ break;
+ case '7': multi_menu();
+ break;
+ case '8': showaln ^= TRUE;
+ break;
+ case '9': format_options_menu();
+ break;
+ case '0': ss_options_menu();
+ break;
+ case 'S': do_system();
+ break;
+ case '?':
+ case 'H': get_help('6');
+ break;
+ case 'Q':
+ case 'X': return;
+
+ default: fprintf(stdout,"\n\nUnrecognised Command\n\n");
+ break;
+ }
+ }
+}
+
+
+static void ss_options_menu(void)
+{
+ int catchint;
+
+ catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+ if (catchint) {
+ if (setjmp(jmpbuf) != 0)
+ fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+ if (signal(SIGINT,jumper) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#else
+ if (signal(SIGINT,SIG_DFL) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#endif
+ }
+
+
+ while(TRUE) {
+
+ fprintf(stdout,"\n\n\n");
+ fprintf(stdout," ********* SECONDARY STRUCTURE OPTIONS *********\n");
+ fprintf(stdout,"\n\n");
+
+ fprintf(stdout," 1. Use profile 1 secondary structure / penalty mask ");
+ if(use_ss1)
+ fprintf(stdout,"= YES\n");
+ else
+ fprintf(stdout,"= NO\n");
+ fprintf(stdout," 2. Use profile 2 secondary structure / penalty mask ");
+ if(use_ss2)
+ fprintf(stdout,"= YES\n");
+ else
+ fprintf(stdout,"= NO\n");
+ fprintf(stdout,"\n");
+ fprintf(stdout," 3. Output in alignment ");
+ fprintf(stdout,"= %s\n",secstroutput_txt[output_struct_penalties]);
+ fprintf(stdout,"\n");
+
+ fprintf(stdout," 4. Helix gap penalty :%d\n",(pint)helix_penalty);
+ fprintf(stdout," 5. Strand gap penalty :%d\n",(pint)strand_penalty);
+ fprintf(stdout," 6. Loop gap penalty :%d\n",(pint)loop_penalty);
+
+ fprintf(stdout," 7. Secondary structure terminal penalty :%d\n",(pint)helix_end_penalty);
+ fprintf(stdout," 8. Helix terminal positions within :%d outside :%d\n",
+ (pint)helix_end_minus,(pint)helix_end_plus);
+ fprintf(stdout," 9. Strand terminal positions within :%d outside :%d\n",
+ (pint)strand_end_minus,(pint)strand_end_plus);
+
+ fprintf(stdout,"\n\n");
+ fprintf(stdout," H. HELP\n\n\n");
+
+ getstr("Enter number (or [RETURN] to exit)",lin2);
+ if( *lin2 == EOS) {
+ return;
+ }
+
+ switch(toupper(*lin2)) {
+ case '1': use_ss1 ^= TRUE;
+ break;
+ case '2': use_ss2 ^= TRUE;
+ break;
+ case '3': output_struct_penalties = secstroutput_options();
+ break;
+ case '4':
+ fprintf(stdout,"Helix Penalty Currently: %d\n",(pint)helix_penalty);
+ helix_penalty=getint("Enter number",1,9,helix_penalty);
+ break;
+ case '5':
+ fprintf(stdout,"Strand Gap Penalty Currently: %d\n",(pint)strand_penalty);
+ strand_penalty=getint("Enter number",1,9,strand_penalty);
+ break;
+ case '6':
+ fprintf(stdout,"Loop Gap Penalty Currently: %d\n",(pint)loop_penalty);
+ loop_penalty=getint("Enter number",1,9,loop_penalty);
+ break;
+ case '7':
+ fprintf(stdout,"Secondary Structure Terminal Penalty Currently: %d\n",
+ (pint)helix_end_penalty);
+ helix_end_penalty=getint("Enter number",1,9,helix_end_penalty);
+ strand_end_penalty = helix_end_penalty;
+ break;
+ case '8':
+ fprintf(stdout,"Helix Terminal Positions Currently: \n");
+ fprintf(stdout," within helix: %d outside helix: %d\n",
+ (pint)helix_end_minus,(pint)helix_end_plus);
+ helix_end_minus=getint("Enter number of residues within helix",0,3,helix_end_minus);
+ helix_end_plus=getint("Enter number of residues outside helix",0,3,helix_end_plus);
+ break;
+ case '9':
+ fprintf(stdout,"Strand Terminal Positions Currently: \n");
+ fprintf(stdout," within strand: %d outside strand: %d\n",
+ (pint)strand_end_minus,(pint)strand_end_plus);
+ strand_end_minus=getint("Enter number of residues within strand",0,3,strand_end_minus);
+ strand_end_plus=getint("Enter number of residues outside strand",0,3,strand_end_plus);
+ break;
+ case '?':
+ case 'H':
+ get_help('B');
+ break;
+ default:
+ fprintf(stdout,"\n\nUnrecognised Command\n\n");
+ break;
+ }
+ }
+}
+
+
+static sint secstroutput_options(void)
+{
+
+ while(TRUE)
+ {
+ fprintf(stdout,"\n\n\n");
+ fprintf(stdout," ********* Secondary Structure Output Menu *********\n");
+ fprintf(stdout,"\n\n");
+
+
+ fprintf(stdout," 1. %s\n",secstroutput_txt[0]);
+ fprintf(stdout," 2. %s\n",secstroutput_txt[1]);
+ fprintf(stdout," 3. %s\n",secstroutput_txt[2]);
+ fprintf(stdout," 4. %s\n",secstroutput_txt[3]);
+ fprintf(stdout," H. HELP\n\n");
+ fprintf(stdout,
+" -- Current output is %s ",secstroutput_txt[output_struct_penalties]);
+ fprintf(stdout,"--\n");
+
+
+ getstr("\n\nEnter number (or [RETURN] to exit)",lin2);
+ if(*lin2 == EOS) return(output_struct_penalties);
+
+ switch(toupper(*lin2))
+ {
+ case '1': return(0);
+ case '2': return(1);
+ case '3': return(2);
+ case '4': return(3);
+ case '?':
+ case 'H': get_help('C');
+ case 'Q':
+ case 'X': return(0);
+
+ default: fprintf(stdout,"\n\nUnrecognised Command\n\n");
+ break;
+ }
+ }
+}
+
+
+
+
+
+
+
+static void phylogenetic_tree_menu(void)
+{
+ int catchint;
+
+ catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+ if (catchint) {
+ if (setjmp(jmpbuf) != 0)
+ fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+ if (signal(SIGINT,jumper) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#else
+ if (signal(SIGINT,SIG_DFL) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#endif
+ }
+
+
+ while(TRUE)
+ {
+ fprintf(stdout,"\n\n\n");
+ fprintf(stdout,"****** PHYLOGENETIC TREE MENU ******\n");
+ fprintf(stdout,"\n\n");
+
+ fprintf(stdout," 1. Input an alignment\n");
+ fprintf(stdout," 2. Exclude positions with gaps? ");
+ if(tossgaps)
+ fprintf(stdout,"= ON\n");
+ else
+ fprintf(stdout,"= OFF\n");
+ fprintf(stdout," 3. Correct for multiple substitutions? ");
+ if(kimura)
+ fprintf(stdout,"= ON\n");
+ else
+ fprintf(stdout,"= OFF\n");
+ fprintf(stdout," 4. Draw tree now\n");
+ fprintf(stdout," 5. Bootstrap tree\n");
+ fprintf(stdout," 6. Output format options\n");
+ fprintf(stdout,"\n");
+ fprintf(stdout," S. Execute a system command\n");
+ fprintf(stdout," H. HELP\n");
+ fprintf(stdout," or press [RETURN] to go back to main menu\n\n\n");
+
+ getstr("Your choice",lin1);
+ if(*lin1 == EOS) return;
+
+ switch(toupper(*lin1))
+ {
+ case '1': seq_input(FALSE);
+ phylip_name[0]=EOS;
+ clustal_name[0]=EOS;
+ dist_name[0]=EOS;
+ nexus_name[0]=EOS;
+ break;
+ case '2': tossgaps ^= TRUE;
+ break;
+ case '3': kimura ^= TRUE;;
+ break;
+ case '4': phylogenetic_tree(phylip_name,clustal_name,dist_name,nexus_name);
+ break;
+ case '5': bootstrap_tree(phylip_name,clustal_name,nexus_name);
+ break;
+ case '6': tree_format_options_menu();
+ break;
+ case 'S': do_system();
+ break;
+ case '?':
+ case 'H': get_help('7');
+ break;
+ case 'Q':
+ case 'X': return;
+
+ default: fprintf(stdout,"\n\nUnrecognised Command\n\n");
+ break;
+ }
+ }
+}
+
+
+
+
+
+
+static void tree_format_options_menu(void) /* format of tree output */
+{
+ int catchint;
+
+ catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+ if (catchint) {
+ if (setjmp(jmpbuf) != 0)
+ fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+ if (signal(SIGINT,jumper) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#else
+ if (signal(SIGINT,SIG_DFL) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#endif
+ }
+
+
+ while(TRUE) {
+ fprintf(stdout,"\n\n\n");
+ fprintf(stdout," ****** Format of Phylogenetic Tree Output ******\n");
+ fprintf(stdout,"\n\n");
+ fprintf(stdout," 1. Toggle CLUSTAL format tree output = %s\n",
+ (!output_tree_clustal) ? "OFF" : "ON");
+ fprintf(stdout," 2. Toggle Phylip format tree output = %s\n",
+ (!output_tree_phylip) ? "OFF" : "ON");
+ fprintf(stdout," 3. Toggle Phylip distance matrix output = %s\n",
+ (!output_tree_distances)? "OFF" : "ON");
+ fprintf(stdout," 4. Toggle Nexus format tree output = %s\n\n",
+ (!output_tree_nexus)? "OFF" : "ON");
+ fprintf(stdout," 5. Toggle Phylip bootstrap positions = %s\n\n",
+(bootstrap_format==BS_NODE_LABELS) ? "NODE LABELS" : "BRANCH LABELS");
+ fprintf(stdout,"\n");
+ fprintf(stdout," H. HELP\n\n\n");
+
+ getstr("Enter number (or [RETURN] to exit)",lin2);
+ if(*lin2 == EOS) return;
+
+ switch(toupper(*lin2)) {
+ case '1':
+ output_tree_clustal ^= TRUE;
+ break;
+ case '2':
+ output_tree_phylip ^= TRUE;
+ break;
+ case '3':
+ output_tree_distances ^= TRUE;
+ break;
+ case '4':
+ output_tree_nexus ^= TRUE;
+ break;
+ case '5':
+ if (bootstrap_format == BS_NODE_LABELS)
+ bootstrap_format = BS_BRANCH_LABELS;
+ else
+ bootstrap_format = BS_NODE_LABELS;
+ break;
+ case '?':
+ case 'H':
+ get_help('0');
+ break;
+ default:
+ fprintf(stdout,"\n\nUnrecognised Command\n\n");
+ break;
+ }
+ }
+}
+
+
+static void format_options_menu(void) /* format of alignment output */
+{
+ sint i;
+ sint length = 0;
+ char path[FILENAMELEN+1];
+ int catchint;
+
+ catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+ if (catchint) {
+ if (setjmp(jmpbuf) != 0)
+ fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+ if (signal(SIGINT,jumper) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#else
+ if (signal(SIGINT,SIG_DFL) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#endif
+ }
+
+
+ while(TRUE) {
+ fprintf(stdout,"\n\n\n");
+ fprintf(stdout," ********* Format of Alignment Output *********\n");
+ fprintf(stdout,"\n\n");
+ fprintf(stdout," 1. Toggle CLUSTAL format output = %s\n",
+ (!output_clustal) ? "OFF" : "ON");
+ fprintf(stdout," 2. Toggle NBRF/PIR format output = %s\n",
+ (!output_nbrf) ? "OFF" : "ON");
+ fprintf(stdout," 3. Toggle GCG/MSF format output = %s\n",
+ (!output_gcg) ? "OFF" : "ON");
+ fprintf(stdout," 4. Toggle PHYLIP format output = %s\n",
+ (!output_phylip) ? "OFF" : "ON");
+ fprintf(stdout," 5. Toggle NEXUS format output = %s\n",
+ (!output_nexus) ? "OFF" : "ON");
+ fprintf(stdout," 6. Toggle GDE format output = %s\n\n",
+ (!output_gde) ? "OFF" : "ON");
+ fprintf(stdout," 7. Toggle GDE output case = %s\n",
+ (!lowercase) ? "UPPER" : "LOWER");
+ fprintf(stdout," 8. Toggle CLUSTALW sequence numbers = %s\n",
+ (!cl_seq_numbers) ? "OFF" : "ON");
+ fprintf(stdout," 9. Toggle output order = %s\n\n",
+ (output_order==0) ? "INPUT FILE" : "ALIGNED");
+ fprintf(stdout," 0. Create alignment output file(s) now?\n\n");
+ fprintf(stdout," T. Toggle parameter output = %s\n",
+ (!save_parameters) ? "OFF" : "ON");
+ fprintf(stdout,"\n");
+ fprintf(stdout," H. HELP\n\n\n");
+
+ getstr("Enter number (or [RETURN] to exit)",lin2);
+ if(*lin2 == EOS) return;
+
+ switch(toupper(*lin2)) {
+ case '1':
+ output_clustal ^= TRUE;
+ break;
+ case '2':
+ output_nbrf ^= TRUE;
+ break;
+ case '3':
+ output_gcg ^= TRUE;
+ break;
+ case '4':
+ output_phylip ^= TRUE;
+ break;
+ case '5':
+ output_nexus ^= TRUE;
+ break;
+ case '6':
+ output_gde ^= TRUE;
+ break;
+ case '7':
+ lowercase ^= TRUE;
+ break;
+ case '8':
+ cl_seq_numbers ^= TRUE;
+ break;
+ case '9':
+ if (output_order == INPUT) output_order = ALIGNED;
+ else output_order = INPUT;
+ break;
+ case '0': /* DES */
+ if(empty) {
+ error("No sequences loaded");
+ break;
+ }
+ get_path(seqname,path);
+ if(!open_alignment_output(path)) break;
+ create_alignment_output(1,nseqs);
+ break;
+ case 'T': save_parameters ^= TRUE;
+ break;
+ case '?':
+ case 'H':
+ get_help('5');
+ break;
+ default:
+ fprintf(stdout,"\n\nUnrecognised Command\n\n");
+ break;
+ }
+ }
+}
+
+
+
+
+
+
+
+
+
+
+
+
+static void pair_menu(void)
+{
+ int catchint;
+
+ catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+ if (catchint) {
+ if (setjmp(jmpbuf) != 0)
+ fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+ if (signal(SIGINT,jumper) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#else
+ if (signal(SIGINT,SIG_DFL) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#endif
+ }
+
+
+ if(dnaflag) {
+ pw_go_penalty = dna_pw_go_penalty;
+ pw_ge_penalty = dna_pw_ge_penalty;
+ ktup = dna_ktup;
+ window = dna_window;
+ signif = dna_signif;
+ wind_gap = dna_wind_gap;
+
+ }
+ else {
+ pw_go_penalty = prot_pw_go_penalty;
+ pw_ge_penalty = prot_pw_ge_penalty;
+ ktup = prot_ktup;
+ window = prot_window;
+ signif = prot_signif;
+ wind_gap = prot_wind_gap;
+
+ }
+
+ while(TRUE) {
+
+ fprintf(stdout,"\n\n\n");
+ fprintf(stdout," ********* PAIRWISE ALIGNMENT PARAMETERS *********\n");
+ fprintf(stdout,"\n\n");
+
+ fprintf(stdout," Slow/Accurate alignments:\n\n");
+
+ fprintf(stdout," 1. Gap Open Penalty :%4.2f\n",pw_go_penalty);
+ fprintf(stdout," 2. Gap Extension Penalty :%4.2f\n",pw_ge_penalty);
+ fprintf(stdout," 3. Protein weight matrix :%s\n" ,
+ matrix_menu.opt[pw_matnum-1].title);
+ fprintf(stdout," 4. DNA weight matrix :%s\n" ,
+ dnamatrix_menu.opt[pw_dnamatnum-1].title);
+ fprintf(stdout,"\n");
+
+ fprintf(stdout," Fast/Approximate alignments:\n\n");
+
+ fprintf(stdout," 5. Gap penalty :%d\n",(pint)wind_gap);
+ fprintf(stdout," 6. K-tuple (word) size :%d\n",(pint)ktup);
+ fprintf(stdout," 7. No. of top diagonals :%d\n",(pint)signif);
+ fprintf(stdout," 8. Window size :%d\n\n",(pint)window);
+
+ fprintf(stdout," 9. Toggle Slow/Fast pairwise alignments ");
+ if(quick_pairalign)
+ fprintf(stdout,"= FAST\n\n");
+ else
+ fprintf(stdout,"= SLOW\n\n");
+
+
+ fprintf(stdout," H. HELP\n\n\n");
+
+ getstr("Enter number (or [RETURN] to exit)",lin2);
+ if( *lin2 == EOS) {
+ if(dnaflag) {
+ dna_pw_go_penalty = pw_go_penalty;
+ dna_pw_ge_penalty = pw_ge_penalty;
+ dna_ktup = ktup;
+ dna_window = window;
+ dna_signif = signif;
+ dna_wind_gap = wind_gap;
+
+ }
+ else {
+ prot_pw_go_penalty = pw_go_penalty;
+ prot_pw_ge_penalty = pw_ge_penalty;
+ prot_ktup = ktup;
+ prot_window = window;
+ prot_signif = signif;
+ prot_wind_gap = wind_gap;
+
+ }
+
+ return;
+ }
+
+ switch(toupper(*lin2)) {
+ case '1':
+ fprintf(stdout,"Gap Open Penalty Currently: %4.2f\n",pw_go_penalty);
+ pw_go_penalty=(float)getreal("Enter number",(double)0.0,(double)100.0,(double)pw_go_penalty);
+ break;
+ case '2':
+ fprintf(stdout,"Gap Extension Penalty Currently: %4.2f\n",pw_ge_penalty);
+ pw_ge_penalty=(float)getreal("Enter number",(double)0.0,(double)10.0,(double)pw_ge_penalty);
+ break;
+ case '3':
+ pw_matnum = read_matrix("PROTEIN",pw_matrix_menu,pw_mtrxname,pw_matnum,pw_usermat,pw_aa_xref);
+ break;
+ case '4':
+ pw_dnamatnum = read_matrix("DNA",dnamatrix_menu,pw_dnamtrxname,pw_dnamatnum,pw_userdnamat,pw_dna_xref);
+ break;
+ case '5':
+ fprintf(stdout,"Gap Penalty Currently: %d\n",(pint)wind_gap);
+ wind_gap=getint("Enter number",1,500,wind_gap);
+ break;
+ case '6':
+ fprintf(stdout,"K-tuple Currently: %d\n",(pint)ktup);
+ if(dnaflag)
+ ktup=getint("Enter number",1,4,ktup);
+ else
+ ktup=getint("Enter number",1,2,ktup);
+ break;
+ case '7':
+ fprintf(stdout,"Top diagonals Currently: %d\n",(pint)signif);
+ signif=getint("Enter number",1,50,signif);
+ break;
+ case '8':
+ fprintf(stdout,"Window size Currently: %d\n",(pint)window);
+ window=getint("Enter number",1,50,window);
+ break;
+ case '9': quick_pairalign ^= TRUE;
+ break;
+ case '?':
+ case 'H':
+ get_help('3');
+ break;
+ default:
+ fprintf(stdout,"\n\nUnrecognised Command\n\n");
+ break;
+ }
+ }
+}
+
+
+
+
+
+static void multi_menu(void)
+{
+ int catchint;
+
+ catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+ if (catchint) {
+ if (setjmp(jmpbuf) != 0)
+ fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+ if (signal(SIGINT,jumper) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#else
+ if (signal(SIGINT,SIG_DFL) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#endif
+ }
+
+
+ if(dnaflag) {
+ gap_open = dna_gap_open;
+ gap_extend = dna_gap_extend;
+ }
+ else {
+ gap_open = prot_gap_open;
+ gap_extend = prot_gap_extend;
+ }
+
+ while(TRUE) {
+
+ fprintf(stdout,"\n\n\n");
+ fprintf(stdout," ********* MULTIPLE ALIGNMENT PARAMETERS *********\n");
+ fprintf(stdout,"\n\n");
+
+ fprintf(stdout," 1. Gap Opening Penalty :%4.2f\n",gap_open);
+ fprintf(stdout," 2. Gap Extension Penalty :%4.2f\n",gap_extend);
+
+ fprintf(stdout," 3. Delay divergent sequences :%d %%\n\n",(pint)divergence_cutoff);
+
+ fprintf(stdout," 4. DNA Transitions Weight :%1.2f\n\n",transition_weight);
+ fprintf(stdout," 5. Protein weight matrix :%s\n"
+ ,matrix_menu.opt[matnum-1].title);
+ fprintf(stdout," 6. DNA weight matrix :%s\n"
+ ,dnamatrix_menu.opt[dnamatnum-1].title);
+ fprintf(stdout," 7. Use negative matrix :%s\n\n",(!neg_matrix) ? "OFF" : "ON");
+ fprintf(stdout," 8. Protein Gap Parameters\n\n");
+ fprintf(stdout," H. HELP\n\n\n");
+
+ getstr("Enter number (or [RETURN] to exit)",lin2);
+
+ if(*lin2 == EOS) {
+ if(dnaflag) {
+ dna_gap_open = gap_open;
+ dna_gap_extend = gap_extend;
+ }
+ else {
+ prot_gap_open = gap_open;
+ prot_gap_extend = gap_extend;
+ }
+ return;
+ }
+
+ switch(toupper(*lin2)) {
+ case '1':
+ fprintf(stdout,"Gap Opening Penalty Currently: %4.2f\n",gap_open);
+ gap_open=(float)getreal("Enter number",(double)0.0,(double)100.0,(double)gap_open);
+ break;
+ case '2':
+ fprintf(stdout,"Gap Extension Penalty Currently: %4.2f\n",gap_extend);
+ gap_extend=(float)getreal("Enter number",(double)0.0,(double)10.0,(double)gap_extend);
+ break;
+ case '3':
+ fprintf(stdout,"Min Identity Currently: %d\n",(pint)divergence_cutoff);
+ divergence_cutoff=getint("Enter number",0,100,divergence_cutoff);
+ fprintf(stdout, "This value is not to be changed in ClustalCoffee\n"); /* JP */
+ divergence_cutoff=0; /* JP */
+ break;
+ case '4':
+ fprintf(stdout,"Transition Weight Currently: %1.2f\n",(pint)transition_weight);
+ transition_weight=(float)getreal("Enter number",(double)0.0,(double)1.0,(double)transition_weight);
+ break;
+ case '5':
+ matnum = read_matrix("PROTEIN",matrix_menu,mtrxname,matnum,usermat,aa_xref);
+ break;
+ case '6':
+ dnamatnum = read_matrix("DNA",dnamatrix_menu,dnamtrxname,dnamatnum,userdnamat,dna_xref);
+ break;
+ case '7':
+ neg_matrix ^= TRUE;
+ break;
+ case '8':
+ gap_penalties_menu();
+ break;
+ case '?':
+ case 'H':
+ get_help('4');
+ break;
+ default:
+ fprintf(stdout,"\n\nUnrecognised Command\n\n");
+ break;
+ }
+ }
+}
+
+
+
+
+
+
+static void gap_penalties_menu(void)
+{
+ char c;
+ sint i;
+ int catchint;
+
+ catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+ if (catchint) {
+ if (setjmp(jmpbuf) != 0)
+ fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+ if (signal(SIGINT,jumper) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#else
+ if (signal(SIGINT,SIG_DFL) == BADSIG)
+ fprintf(stdout,"Error: signal\n");
+#endif
+ }
+
+
+ while(TRUE) {
+
+ fprintf(stdout,"\n\n\n");
+ fprintf(stdout," ********* PROTEIN GAP PARAMETERS *********\n");
+ fprintf(stdout,"\n\n\n");
+
+ fprintf(stdout," 1. Toggle Residue-Specific Penalties :%s\n\n",(no_pref_penalties) ? "OFF" : "ON");
+ fprintf(stdout," 2. Toggle Hydrophilic Penalties :%s\n",(no_hyd_penalties) ? "OFF" : "ON");
+ fprintf(stdout," 3. Hydrophilic Residues :%s\n\n"
+ ,hyd_residues);
+ fprintf(stdout," 4. Gap Separation Distance :%d\n",(pint)gap_dist);
+ fprintf(stdout," 5. Toggle End Gap Separation :%s\n\n",(!use_endgaps) ? "OFF" : "ON");
+ fprintf(stdout," H. HELP\n\n\n");
+
+ getstr("Enter number (or [RETURN] to exit)",lin2);
+
+ if(*lin2 == EOS) return;
+
+ switch(toupper(*lin2)) {
+ case '1':
+ no_pref_penalties ^= TRUE;
+ break;
+ case '2':
+ no_hyd_penalties ^= TRUE;
+ break;
+ case '3':
+ fprintf(stdout,"Hydrophilic Residues Currently: %s\n",hyd_residues);
+
+ getstr("Enter residues (or [RETURN] to quit)",lin1);
+ if (*lin1 != EOS) {
+ for (i=0;i<strlen(hyd_residues) && i<26;i++) {
+ c = lin1[i];
+ if (isalpha(c))
+ hyd_residues[i] = (char)toupper(c);
+ else
+ break;
+ }
+ hyd_residues[i] = EOS;
+ }
+ break;
+ case '4':
+ fprintf(stdout,"Gap Separation Distance Currently: %d\n",(pint)gap_dist);
+ gap_dist=getint("Enter number",0,100,gap_dist);
+ break;
+ case '5':
+ use_endgaps ^= TRUE;
+ break;
+ case '?':
+ case 'H':
+ get_help('A');
+ break;
+ default:
+ fprintf(stdout,"\n\nUnrecognised Command\n\n");
+ break;
+ }
+ }
+}
+
+
+
+
+static sint read_matrix(char *title,MatMenu menu, char *matnam, sint matn, short *mat, short *xref)
+{ static char userfile[FILENAMELEN+1];
+ int i;
+
+ while(TRUE)
+ {
+ fprintf(stdout,"\n\n\n");
+ fprintf(stdout," ********* %s WEIGHT MATRIX MENU *********\n",title);
+ fprintf(stdout,"\n\n");
+
+ for(i=0;i<menu.noptions;i++)
+ fprintf(stdout," %d. %s\n",i+1,menu.opt[i].title);
+ fprintf(stdout," H. HELP\n\n");
+ fprintf(stdout,
+" -- Current matrix is the %s ",menu.opt[matn-1].title);
+ if(matn == menu.noptions) fprintf(stdout,"(file = %s)",userfile);
+ fprintf(stdout,"--\n");
+
+
+ getstr("\n\nEnter number (or [RETURN] to exit)",lin2);
+ if(*lin2 == EOS) return(matn);
+
+ i=toupper(*lin2)-'0';
+ if(i>0 && i<menu.noptions) {
+ strcpy(matnam,menu.opt[i-1].string);
+ matn=i;
+ } else if (i==menu.noptions) {
+ if(user_mat(userfile, mat, xref)) {
+ strcpy(matnam,userfile);
+ matn=i;
+ }
+ }
+ else
+ switch(toupper(*lin2)) {
+ case '?':
+ case 'H':
+ get_help('8');
+ break;
+ default:
+ fprintf(stdout,"\n\nUnrecognised Command\n\n");
+ break;
+ }
+ }
+}
+
+
+char prompt_for_yes_no(char *title,char *prompt)
+{
+ char line[80];
+ char lin2[80];
+
+ fprintf(stdout,"\n%s\n",title);
+ strcpy(line,prompt);
+ strcat(line, "(y/n) ? [y]");
+ getstr(line,lin2);
+ if ((*lin2 != 'n') && (*lin2 != 'N'))
+ return('y');
+ else
+ return('n');
+
+}
+
+
+/*
+* fatal()
+*
+* Prints error msg to stdout and exits.
+* Variadic parameter list can be passed.
+*
+* Return values:
+* none
+*/
+
+void fatal( char *msg,...)
+{
+ va_list ap;
+
+ va_start(ap,msg);
+ fprintf(stdout,"\n\nFATAL ERROR: ");
+ vfprintf(stdout,msg,ap);
+ fprintf(stdout,"\n\n");
+ va_end(ap);
+ exit(1);
+}
+
+/*
+* error()
+*
+* Prints error msg to stdout.
+* Variadic parameter list can be passed.
+*
+* Return values:
+* none
+*/
+
+void error( char *msg,...)
+{
+ va_list ap;
+
+ va_start(ap,msg);
+ fprintf(stdout,"\n\nERROR: ");
+ vfprintf(stdout,msg,ap);
+ fprintf(stdout,"\n\n");
+ va_end(ap);
+}
+
+/*
+* warning()
+*
+* Prints warning msg to stdout.
+* Variadic parameter list can be passed.
+*
+* Return values:
+* none
+*/
+
+void warning( char *msg,...)
+{
+ va_list ap;
+
+ va_start(ap,msg);
+ fprintf(stdout,"\n\nWARNING: ");
+ vfprintf(stdout,msg,ap);
+ fprintf(stdout,"\n\n");
+ va_end(ap);
+}
+
+/*
+* info()
+*
+* Prints info msg to stdout.
+* Variadic parameter list can be passed.
+*
+* Return values:
+* none
+*/
+
+void info( char *msg,...)
+{
+ va_list ap;
+
+ va_start(ap,msg);
+ fprintf(stdout,"\n");
+ vfprintf(stdout,msg,ap);
+ va_end(ap);
+}
diff --git a/blosum62.h b/blosum62.h
new file mode 100644
index 0000000..47de699
--- /dev/null
+++ b/blosum62.h
@@ -0,0 +1,47 @@
+double qmatrix[21][21] = {
+{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
+{0, 0.0065, 0.0008, 0.0009, 0.0002, 0.0007, 0.0004, 0.0004, 0.0004, 0.0001, 0.0004, 0.0001, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003, 0.0002, 0.0003, 0.0003},
+{0, 0.0008, 0.0183, 0.0042, 0.0012, 0.0054, 0.0030, 0.0026, 0.0016, 0.0005, 0.0012, 0.0005, 0.0012, 0.0012, 0.0008, 0.0005, 0.0008, 0.0009, 0.0008, 0.0009, 0.0009},
+{0, 0.0009, 0.0042, 0.0102, 0.0006, 0.0022, 0.0014, 0.0015, 0.0013, 0.0003, 0.0008, 0.0005, 0.0009, 0.0010, 0.0007, 0.0007, 0.0006, 0.0009, 0.0015, 0.0009, 0.0010},
+{0, 0.0002, 0.0012, 0.0006, 0.0040, 0.0049, 0.0025, 0.0023, 0.0013, 0.0004, 0.0007, 0.0004, 0.0010, 0.0009, 0.0005, 0.0007, 0.0005, 0.0007, 0.0004, 0.0008, 0.0009},
+{0, 0.0007, 0.0054, 0.0022, 0.0049, 0.0371, 0.0114, 0.0095, 0.0044, 0.0016, 0.0021, 0.0014, 0.0033, 0.0024, 0.0014, 0.0016, 0.0015, 0.0020, 0.0010, 0.0024, 0.0025},
+{0, 0.0004, 0.0030, 0.0014, 0.0025, 0.0114, 0.0184, 0.0120, 0.0032, 0.0011, 0.0014, 0.0010, 0.0027, 0.0017, 0.0010, 0.0009, 0.0012, 0.0012, 0.0006, 0.0012, 0.0016},
+{0, 0.0004, 0.0026, 0.0015, 0.0023, 0.0095, 0.0120, 0.0196, 0.0051, 0.0014, 0.0018, 0.0012, 0.0036, 0.0024, 0.0012, 0.0012, 0.0013, 0.0017, 0.0006, 0.0016, 0.0019},
+{0, 0.0004, 0.0016, 0.0013, 0.0013, 0.0044, 0.0032, 0.0051, 0.0215, 0.0016, 0.0058, 0.0022, 0.0037, 0.0063, 0.0019, 0.0019, 0.0022, 0.0030, 0.0011, 0.0023, 0.0033},
+{0, 0.0001, 0.0005, 0.0003, 0.0004, 0.0016, 0.0011, 0.0014, 0.0016, 0.0119, 0.0008, 0.0004, 0.0009, 0.0010, 0.0004, 0.0003, 0.0004, 0.0004, 0.0002, 0.0004, 0.0005},
+{0, 0.0004, 0.0012, 0.0008, 0.0007, 0.0021, 0.0014, 0.0018, 0.0058, 0.0008, 0.0378, 0.0014, 0.0022, 0.0038, 0.0029, 0.0014, 0.0025, 0.0019, 0.0010, 0.0017, 0.0025},
+{0, 0.0001, 0.0005, 0.0005, 0.0004, 0.0014, 0.0010, 0.0012, 0.0022, 0.0004, 0.0014, 0.0191, 0.0014, 0.0017, 0.0009, 0.0008, 0.0012, 0.0014, 0.0005, 0.0010, 0.0016},
+{0, 0.0003, 0.0012, 0.0009, 0.0010, 0.0033, 0.0027, 0.0036, 0.0037, 0.0009, 0.0022, 0.0014, 0.0125, 0.0047, 0.0022, 0.0014, 0.0019, 0.0020, 0.0007, 0.0018, 0.0023},
+{0, 0.0003, 0.0012, 0.0010, 0.0009, 0.0024, 0.0017, 0.0024, 0.0063, 0.0010, 0.0038, 0.0017, 0.0047, 0.0126, 0.0031, 0.0019, 0.0028, 0.0030, 0.0011, 0.0023, 0.0031},
+{0, 0.0002, 0.0008, 0.0007, 0.0005, 0.0014, 0.0010, 0.0012, 0.0019, 0.0004, 0.0029, 0.0009, 0.0022, 0.0031, 0.0141, 0.0015, 0.0037, 0.0022, 0.0014, 0.0020, 0.0024},
+{0, 0.0002, 0.0005, 0.0007, 0.0007, 0.0016, 0.0009, 0.0012, 0.0019, 0.0003, 0.0014, 0.0008, 0.0014, 0.0019, 0.0015, 0.0073, 0.0016, 0.0035, 0.0010, 0.0025, 0.0031},
+{0, 0.0002, 0.0008, 0.0006, 0.0005, 0.0015, 0.0012, 0.0013, 0.0022, 0.0004, 0.0025, 0.0012, 0.0019, 0.0028, 0.0037, 0.0016, 0.0213, 0.0049, 0.0010, 0.0016, 0.0024},
+{0, 0.0003, 0.0009, 0.0009, 0.0007, 0.0020, 0.0012, 0.0017, 0.0030, 0.0004, 0.0019, 0.0014, 0.0020, 0.0030, 0.0022, 0.0035, 0.0049, 0.0161, 0.0014, 0.0027, 0.0041},
+{0, 0.0002, 0.0008, 0.0015, 0.0004, 0.0010, 0.0006, 0.0006, 0.0011, 0.0002, 0.0010, 0.0005, 0.0007, 0.0011, 0.0014, 0.0010, 0.0010, 0.0014, 0.0093, 0.0012, 0.0012},
+{0, 0.0003, 0.0009, 0.0009, 0.0008, 0.0024, 0.0012, 0.0016, 0.0023, 0.0004, 0.0017, 0.0010, 0.0018, 0.0023, 0.0020, 0.0025, 0.0016, 0.0027, 0.0012, 0.0178, 0.0062},
+{0, 0.0003, 0.0009, 0.0010, 0.0009, 0.0025, 0.0016, 0.0019, 0.0033, 0.0005, 0.0025, 0.0016, 0.0023, 0.0031, 0.0024, 0.0031, 0.0024, 0.0041, 0.0012, 0.0062, 0.0161}
+};
+
+double blosum62_smatrix[21][21] = {
+{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
+{0, 5.2520, 0.4588, 1.0771,-0.7124,-0.8159,-1.2903,-1.4171,-1.2634,-1.1521,-1.2457,-1.8271,-1.2145,-1.3759,-1.8480,-0.9732,-2.1072,-1.4177,-1.1711,-1.3397,-1.4782},
+{0, 0.4588, 3.0230, 1.4696, 0.0063, 0.2074,-0.0804,-0.4245,-1.1050,-1.1877,-1.5537,-1.7986,-1.0538,-1.1845,-1.4970,-1.5822,-1.7419,-1.5962,-0.6171,-1.3932,-1.5393},
+{0, 1.0771, 1.4696, 3.2975,-0.4974,-0.5310,-0.6657,-0.6038,-0.8820,-1.2036,-1.5199,-1.4599,-0.8030,-0.8429,-1.0409,-0.7105,-1.5325,-1.0102, 0.8463,-0.8469,-0.9100},
+{0,-0.7124, 0.0063,-0.4974, 2.6963, 0.9959, 0.5634, 0.3436,-0.4676,-0.7099,-1.3383,-1.2382,-0.3331,-0.7404,-1.0754,-0.2105,-1.5293,-0.9990,-0.7756,-0.6836,-0.6774},
+{0,-0.8159, 0.2074,-0.5310, 0.9959, 1.9247, 0.7608, 0.3942,-0.7323,-0.6387,-1.8135,-1.4300,-0.5987,-1.2213,-1.6895,-1.0670,-1.8028,-1.4232,-1.3934,-1.0773,-1.2234},
+{0,-1.2903,-0.0804,-0.6657, 0.5634, 0.7608, 1.9993, 1.2735,-0.6609,-0.6138,-1.8624,-1.3783,-0.3588,-1.1741,-1.6085,-1.3848,-1.5606,-1.5972,-1.6158,-1.4951,-1.3351},
+{0,-1.4171,-0.4245,-0.6038, 0.3436, 0.3942, 1.2735, 1.8845,-0.0947,-0.4038,-1.5694,-1.1744,-0.0278,-0.8231,-1.4382,-1.0992,-1.5713,-1.2211,-1.5587,-1.2513,-1.1312},
+{0,-1.2634,-1.1050,-0.8820,-0.4676,-0.7323,-0.6609,-0.0947, 1.9646,-0.2043, 0.0798,-0.4071,-0.0227, 0.5579,-0.7654,-0.4020,-0.8767,-0.4319,-0.8126,-0.7068,-0.3670},
+{0,-1.1521,-1.1877,-1.2036,-0.7099,-0.6387,-0.6138,-0.4038,-0.2043, 4.2911,-1.2502,-1.3976,-0.4333,-0.4375,-1.3299,-1.4509,-1.7300,-1.8062,-1.4939,-1.6946,-1.5182},
+{0,-1.2457,-1.5537,-1.5199,-1.3383,-1.8135,-1.8624,-1.5694, 0.0798,-1.2502, 2.7816,-1.0668,-0.7877,-0.1462,-0.2114,-0.8926,-0.6568,-1.0551,-1.0204,-1.1521,-0.7640},
+{0,-1.8271,-1.7986,-1.4599,-1.2382,-1.4300,-1.3783,-1.1744,-0.4071,-1.3976,-1.0668, 3.6823,-0.5376,-0.4045,-1.0002,-0.6410,-0.7401,-0.5581,-1.0805,-1.0543,-0.5068},
+{0,-1.2145,-1.0538,-0.8030,-0.3331,-0.5987,-0.3588,-0.0278,-0.0227,-0.4333,-0.7877,-0.5376, 2.2727, 0.6906,-0.0230,-0.3377,-0.5254,-0.4316,-0.8429,-0.5612,-0.3348},
+{0,-1.3759,-1.1845,-0.8429,-0.7404,-1.2213,-1.1741,-0.8231, 0.5579,-0.4375,-0.1462,-0.4045, 0.6906, 1.9422, 0.3005,-0.0506,-0.1305,-0.0735,-0.4408,-0.3824,-0.1017},
+{0,-1.8480,-1.4970,-1.0409,-1.0754,-1.6895,-1.6085,-1.4382,-0.7654,-1.3299,-0.2114,-1.0002,-0.0230, 0.3005, 2.8266, 0.0008, 0.6358,-0.1340, 0.2892,-0.2199,-0.0895},
+{0,-0.9732,-1.5822,-0.7105,-0.2105,-1.0670,-1.3848,-1.0992,-0.4020,-1.4509,-0.8926,-0.6410,-0.3377,-0.0506, 0.0008, 2.6426,-0.1567, 0.9273, 0.2240, 0.4914, 0.6363},
+{0,-2.1072,-1.7419,-1.5325,-1.5293,-1.8028,-1.5606,-1.5713,-0.8767,-1.7300,-0.6568,-0.7401,-0.5254,-0.1305, 0.6358,-0.1567, 2.8871, 0.7552,-0.5595,-0.8029,-0.3509},
+{0,-1.4177,-1.5962,-1.0102,-0.9990,-1.4232,-1.5972,-1.2211,-0.4319,-1.8062,-1.0551,-0.5581,-0.4316,-0.0735,-0.1340, 0.9273, 0.7552, 2.4514,-0.0588,-0.0577, 0.3877},
+{0,-1.1711,-0.6171, 0.8463,-0.7756,-1.3934,-1.6158,-1.5587,-0.8126,-1.4939,-1.0204,-1.0805,-0.8429,-0.4408, 0.2892, 0.2240,-0.5595,-0.0588, 3.7555,-0.1249,-0.3605},
+{0,-1.3397,-1.3932,-0.8469,-0.6836,-1.0773,-1.4951,-1.2513,-0.7068,-1.6946,-1.1521,-1.0543,-0.5612,-0.3824,-0.2199, 0.4914,-0.8029,-0.0577,-0.1249, 2.7367, 1.0544},
+{0,-1.4782,-1.5393,-0.9100,-0.6774,-1.2234,-1.3351,-1.1312,-0.3670,-1.5182,-0.7640,-0.5068,-0.3348,-0.1017,-0.0895, 0.6363,-0.3509, 0.3877,-0.3605, 1.0544, 2.2523}
+};
diff --git a/calcgapcoeff.c b/calcgapcoeff.c
new file mode 100644
index 0000000..9703578
--- /dev/null
+++ b/calcgapcoeff.c
@@ -0,0 +1,497 @@
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pcma.h"
+
+
+/*
+ * Prototypes
+ */
+void calc_p_penalties(char **aln, sint n, sint fs, sint ls, sint *weight);
+void calc_h_penalties(char **aln, sint n, sint fs, sint ls, sint *weight);
+void calc_v_penalties(char **aln, sint n, sint fs, sint ls, sint *weight);
+sint local_penalty(sint penalty, sint n, sint *pweight, sint *hweight, sint *vweight);
+float percentid(char *s1, char *s2,sint length);
+/*
+ * Global variables
+ */
+
+extern sint gap_dist;
+extern sint max_aa;
+extern sint debug;
+extern Boolean dnaflag;
+extern Boolean use_endgaps;
+extern Boolean endgappenalties;
+extern Boolean no_var_penalties, no_hyd_penalties, no_pref_penalties;
+extern char hyd_residues[];
+extern char *amino_acid_codes;
+
+/* vwindow is the number of residues used for a window for the variable zone penalties */
+/* vll is the lower limit for the variable zone penalties (vll < pen < 1.0) */
+int vll=50;
+int vwindow=5;
+
+sint vlut[26][26] = {
+/* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */
+/*A*/ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*B*/ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*C*/ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*D*/ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*E*/ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*F*/ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*G*/ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*H*/ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*I*/ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*J*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*K*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*L*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*M*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*N*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*O*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*P*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*Q*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*R*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+/*S*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
+/*T*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
+/*U*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+/*V*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
+/*W*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+/*X*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
+/*Y*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+/*Z*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
+ };
+
+/* pascarella probabilities for opening a gap at specific residues */
+char pr[] = {'A' , 'C', 'D', 'E', 'F', 'G', 'H', 'K', 'I', 'L',
+ 'M' , 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'Y', 'W'};
+sint pas_op[] = { 87, 87,104, 69, 80,139,100,104, 68, 79,
+ 71,137,126, 93,128,124,111, 75,100, 77};
+sint pas_op2[] ={ 88, 57,111, 98, 75,126, 95, 97, 70, 90,
+ 60,122,110,107, 91,125,124, 81,106, 88};
+sint pal_op[] = { 84, 69,128, 78, 88,176, 53, 95, 55, 49,
+ 52,148,147,100, 91,129,105, 51,128, 88};
+
+float reduced_gap = 1.0;
+Boolean nvar_pen,nhyd_pen,npref_pen; /* local copies of ho_hyd_penalties, no_pref_penalties */
+sint gdist; /* local copy of gap_dist */
+
+void calc_gap_coeff(char **alignment, sint *gaps, sint **profile, Boolean struct_penalties,
+ char *gap_penalty_mask, sint first_seq, sint last_seq,
+ sint prf_length, sint gapcoef, sint lencoef)
+{
+
+ char c;
+ sint i, j;
+ sint is, ie;
+ static sint numseq,val,pcid;
+ static sint *gap_pos;
+ static sint *v_weight, *p_weight, *h_weight;
+ static float scale;
+
+ numseq = last_seq - first_seq;
+ if(numseq == 2)
+ {
+ pcid=percentid(alignment[first_seq],alignment[first_seq+1],prf_length);
+ }
+ else pcid=0;
+
+ for (j=0; j<prf_length; j++)
+ gaps[j] = 0;
+/*
+ Check for a gap penalty mask
+*/
+ if (struct_penalties != NONE)
+ {
+ nvar_pen = nhyd_pen = npref_pen = TRUE;
+ gdist = 0;
+ }
+ else if (no_var_penalties == FALSE && pcid > 60)
+ {
+if(debug>3) fprintf(stderr,"Using variable zones to set gap penalties (pcid = %d)\n",pcid);
+ nhyd_pen = npref_pen = TRUE;
+ nvar_pen = FALSE;
+ }
+ else
+ {
+ nvar_pen = TRUE;
+ nhyd_pen = no_hyd_penalties;
+ npref_pen = no_pref_penalties;
+ gdist = gap_dist;
+ }
+
+ for (i=first_seq; i<last_seq; i++)
+ {
+/*
+ Include end gaps as gaps ?
+*/
+ is = 0;
+ ie = prf_length;
+ if (use_endgaps == FALSE && endgappenalties==FALSE)
+ {
+ for (j=0; j<prf_length; j++)
+ {
+ c = alignment[i][j];
+ if ((c < 0) || (c > max_aa))
+ is++;
+ else
+ break;
+ }
+ for (j=prf_length-1; j>=0; j--)
+ {
+ c = alignment[i][j];
+ if ((c < 0) || (c > max_aa))
+ ie--;
+ else
+ break;
+ }
+ }
+
+ for (j=is; j<ie; j++)
+ {
+ if ((alignment[i][j] < 0) || (alignment[i][j] > max_aa))
+ gaps[j]++;
+ }
+ }
+
+ if ((!dnaflag) && (nvar_pen == FALSE))
+ {
+ v_weight = (sint *) ckalloc( (prf_length+2) * sizeof (sint) );
+ calc_v_penalties(alignment, prf_length, first_seq, last_seq, v_weight);
+ }
+
+
+ if ((!dnaflag) && (npref_pen == FALSE))
+ {
+ p_weight = (sint *) ckalloc( (prf_length+2) * sizeof (sint) );
+ calc_p_penalties(alignment, prf_length, first_seq, last_seq, p_weight);
+ }
+
+ if ((!dnaflag) && (nhyd_pen == FALSE))
+ {
+ h_weight = (sint *) ckalloc( (prf_length+2) * sizeof (sint) );
+ calc_h_penalties(alignment, prf_length, first_seq, last_seq, h_weight);
+ }
+
+ gap_pos = (sint *) ckalloc( (prf_length+2) * sizeof (sint) );
+/*
+ mark the residues close to an existing gap (set gaps[i] = -ve)
+*/
+ if (dnaflag || (gdist <= 0))
+ {
+ for (i=0;i<prf_length;i++) gap_pos[i] = gaps[i];
+ }
+ else
+ {
+ i=0;
+ while (i<prf_length)
+ {
+ if (gaps[i] <= 0)
+ {
+ gap_pos[i] = gaps[i];
+ i++;
+ }
+ else
+ {
+ for (j = -gdist+1; j<0; j++)
+ {
+ if ((i+j>=0) && (i+j<prf_length) &&
+ ((gaps[i+j] == 0) || (gaps[i+j] < j))) gap_pos[i+j] = j;
+ }
+ while (gaps[i] > 0)
+ {
+ if (i>=prf_length) break;
+ gap_pos[i] = gaps[i];
+ i++;
+ }
+ for (j = 0; j<gdist; j++)
+ {
+ if (gaps[i+j] > 0) break;
+ if ((i+j>=0) && (i+j<prf_length) &&
+ ((gaps[i+j] == 0) || (gaps[i+j] < -j))) gap_pos[i+j] = -j-1;
+ }
+ i += j;
+ }
+ }
+ }
+if (debug>3)
+{
+fprintf(stdout,"gap open %d gap ext %d\n",(pint)gapcoef,(pint)lencoef);
+fprintf(stdout,"gaps:\n");
+ for(i=0;i<prf_length;i++) fprintf(stdout,"%d ", (pint)gaps[i]);
+ fprintf(stdout,"\n");
+fprintf(stdout,"gap_pos:\n");
+ for(i=0;i<prf_length;i++) fprintf(stdout,"%d ", (pint)gap_pos[i]);
+ fprintf(stdout,"\n");
+}
+
+
+ for (j=0;j<prf_length; j++)
+ {
+
+ if (gap_pos[j] <= 0)
+ {
+/*
+ apply residue-specific and hydrophilic gap penalties.
+*/
+ if (!dnaflag) {
+ profile[j+1][GAPCOL] = local_penalty(gapcoef, j,
+ p_weight, h_weight, v_weight);
+ profile[j+1][LENCOL] = lencoef;
+ }
+ else {
+ profile[j+1][GAPCOL] = gapcoef;
+ profile[j+1][LENCOL] = lencoef;
+ }
+
+/*
+ increase gap penalty near to existing gaps.
+*/
+ if (gap_pos[j] < 0)
+ {
+ profile[j+1][GAPCOL] *= 2.0+2.0*(gdist+gap_pos[j])/gdist;
+ }
+
+
+ }
+ else
+ {
+ scale = ((float)(numseq-gaps[j])/(float)numseq) * reduced_gap;
+ profile[j+1][GAPCOL] = scale*gapcoef;
+ profile[j+1][LENCOL] = 0.5 * lencoef;
+ }
+/*
+ apply the gap penalty mask
+*/
+ if (struct_penalties != NONE)
+ {
+ val = gap_penalty_mask[j]-'0';
+ if (val > 0 && val < 10)
+ {
+ profile[j+1][GAPCOL] *= val;
+ profile[j+1][LENCOL] *= val;
+ }
+ }
+/*
+ make sure no penalty is zero - even for all-gap positions
+*/
+ if (profile[j+1][GAPCOL] <= 0) profile[j+1][GAPCOL] = 1;
+ if (profile[j+1][LENCOL] <= 0) profile[j+1][LENCOL] = 1;
+ }
+
+/* set the penalties at the beginning and end of the profile */
+ if(endgappenalties==TRUE)
+ {
+ profile[0][GAPCOL] = gapcoef;
+ profile[0][LENCOL] = lencoef;
+ }
+ else
+ {
+ profile[0][GAPCOL] = 0;
+ profile[0][LENCOL] = 0;
+ profile[prf_length][GAPCOL] = 0;
+ profile[prf_length][LENCOL] = 0;
+ }
+if (debug>3)
+{
+ fprintf(stdout,"Opening penalties:\n");
+ for(i=0;i<=prf_length;i++) fprintf(stdout," %d:%d ",i, (pint)profile[i][GAPCOL]);
+ fprintf(stdout,"\n");
+}
+if (debug>3)
+{
+ fprintf(stdout,"Extension penalties:\n");
+ for(i=0;i<=prf_length;i++) fprintf(stdout,"%d:%d ",i, (pint)profile[i][LENCOL]);
+ fprintf(stdout,"\n");
+}
+ if ((!dnaflag) && (nvar_pen == FALSE))
+ v_weight=ckfree((void *)v_weight);
+
+ if ((!dnaflag) && (npref_pen == FALSE))
+ p_weight=ckfree((void *)p_weight);
+
+ if ((!dnaflag) && (nhyd_pen == FALSE))
+ h_weight=ckfree((void *)h_weight);
+
+
+ gap_pos=ckfree((void *)gap_pos);
+}
+
+void calc_v_penalties(char **aln, sint n, sint fs, sint ls, sint *weight)
+{
+ char ix1,ix2;
+ sint i,j,k,t;
+
+ for (i=0;i<n;i++)
+ {
+ weight[i] = 0;
+ t=0;
+ for(j=i-vwindow;j<i+vwindow;j++)
+ {
+ if(j>=0 && j<n)
+ {
+ ix1 = aln[fs][j];
+ ix2 = aln[fs+1][j];
+ if ((ix1 < 0) || (ix1 > max_aa) || (ix2< 0) || (ix2> max_aa)) continue;
+ weight[i] += vlut[amino_acid_codes[ix1]-'A'][amino_acid_codes[ix2]-'A'];
+ t++;
+ }
+ }
+/* now we have a weight -t < w < t */
+ weight[i] +=t;
+ if(t>0)
+ weight[i] = (weight[i]*100)/(2*t);
+ else
+ weight[i] = 100;
+/* now we have a weight vll < w < 100 */
+ if (weight[i]<vll) weight[i]=vll;
+ }
+
+
+}
+
+void calc_p_penalties(char **aln, sint n, sint fs, sint ls, sint *weight)
+{
+ char ix;
+ sint j,k,numseq;
+ sint i;
+
+ numseq = ls - fs;
+ for (i=0;i<n;i++)
+ {
+ weight[i] = 0;
+ for (k=fs;k<ls;k++)
+ {
+ for (j=0;j<22;j++)
+ {
+ ix = aln[k][i];
+ if ((ix < 0) || (ix > max_aa)) continue;
+ if (amino_acid_codes[ix] == pr[j])
+ {
+ weight[i] += (180-pas_op[j]);
+ break;
+ }
+ }
+ }
+ weight[i] /= numseq;
+ }
+
+}
+
+void calc_h_penalties(char **aln, sint n, sint fs, sint ls, sint *weight)
+{
+
+/*
+ weight[] is the length of the hydrophilic run of residues.
+*/
+ char ix;
+ sint nh,j,k;
+ sint i,e,s;
+ sint *hyd;
+ float scale;
+
+ hyd = (sint *)ckalloc((n+2) * sizeof(sint));
+ nh = (sint)strlen(hyd_residues);
+ for (i=0;i<n;i++)
+ weight[i] = 0;
+
+ for (k=fs;k<ls;k++)
+ {
+ for (i=0;i<n;i++)
+ {
+ hyd[i] = 0;
+ for (j=0;j<nh;j++)
+ {
+ ix = aln[k][i];
+ if ((ix < 0) || (ix > max_aa)) continue;
+ if (amino_acid_codes[ix] == hyd_residues[j])
+ {
+ hyd[i] = 1;
+ break;
+ }
+ }
+ }
+ i = 0;
+ while (i < n)
+ {
+ if (hyd[i] == 0) i++;
+ else
+ {
+ s = i;
+ while ((hyd[i] != 0) && (i<n)) i++;
+ e = i;
+ if (e-s > 3)
+ for (j=s; j<e; j++) weight[j] += 100;
+ }
+ }
+ }
+
+ scale = ls - fs;
+ for (i=0;i<n;i++)
+ weight[i] /= scale;
+
+ hyd=ckfree((void *)hyd);
+
+if (debug>3)
+{
+ for(i=0;i<n;i++) fprintf(stdout,"%d ", (pint)weight[i]);
+ fprintf(stdout,"\n");
+}
+
+}
+
+sint local_penalty(sint penalty, sint n, sint *pweight, sint *hweight, sint *vweight)
+{
+
+ Boolean h = FALSE;
+ float gw;
+
+ if (dnaflag) return(1);
+
+ gw = 1.0;
+ if (nvar_pen == FALSE)
+ {
+ gw *= (float)vweight[n]/100.0;
+ }
+
+ if (nhyd_pen == FALSE)
+ {
+ if (hweight[n] > 0)
+ {
+ gw *= 0.5;
+ h = TRUE;
+ }
+ }
+ if ((npref_pen == FALSE) && (h==FALSE))
+ {
+ gw *= ((float)pweight[n]/100.0);
+ }
+
+ gw *= penalty;
+ return((sint)gw);
+
+}
+
+float percentid(char *s1, char *s2,sint length)
+{
+ sint i;
+ sint count,total;
+ float score;
+
+ count = total = 0;
+ for (i=0;i<length;i++) {
+ if ((s1[i]>=0) && (s1[i]<max_aa)) {
+ total++;
+ if (s1[i] == s2[i]) count++;
+ }
+ if (s1[i]==(-3) || s2[i]==(-3)) break;
+
+ }
+
+ if(total==0) score=0;
+ else
+ score = 100.0 * (float)count / (float)total;
+ return(score);
+
+}
+
diff --git a/calcprf1.c b/calcprf1.c
new file mode 100644
index 0000000..3dbb63b
--- /dev/null
+++ b/calcprf1.c
@@ -0,0 +1,99 @@
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pcma.h"
+
+
+/*
+ * Prototypes
+ */
+
+/*
+ * Global variables
+ */
+
+extern sint max_aa,gap_pos1,gap_pos2;
+
+void calc_prf1(sint **profile, char **alignment, sint *gaps,
+ sint matrix[NUMRES][NUMRES],
+ sint *seq_weight, sint prf_length, sint first_seq, sint last_seq)
+{
+
+ sint **weighting, sum2, d, i, res;
+ sint numseq;
+ sint r, pos;
+ int f;
+ float scale;
+
+ weighting = (sint **) ckalloc( (NUMRES+2) * sizeof (sint *) );
+ for (i=0;i<NUMRES+2;i++)
+ weighting[i] = (sint *) ckalloc( (prf_length+2) * sizeof (sint) );
+
+ numseq = last_seq-first_seq;
+
+ sum2 = 0;
+ for (i=first_seq; i<last_seq; i++)
+ sum2 += seq_weight[i];
+
+ for (r=0; r<prf_length; r++)
+ {
+ for (d=0; d<=max_aa; d++)
+ {
+ weighting[d][r] = 0;
+ for (i=first_seq; i<last_seq; i++)
+ if (d == alignment[i][r]) weighting[d][r] += seq_weight[i];
+ }
+ weighting[gap_pos1][r] = 0;
+ for (i=first_seq; i<last_seq; i++)
+ if (gap_pos1 == alignment[i][r]) weighting[gap_pos1][r] += seq_weight[i];
+ weighting[gap_pos2][r] = 0;
+ for (i=first_seq; i<last_seq; i++)
+ if (gap_pos2 == alignment[i][r]) weighting[gap_pos2][r] += seq_weight[i];
+ }
+
+ for (pos=0; pos< prf_length; pos++)
+ {
+ if (gaps[pos] == numseq)
+ {
+ for (res=0; res<=max_aa; res++)
+ {
+ profile[pos+1][res] = matrix[res][gap_pos1];
+ }
+ profile[pos+1][gap_pos1] = matrix[gap_pos1][gap_pos1];
+ profile[pos+1][gap_pos2] = matrix[gap_pos2][gap_pos1];
+ }
+ else
+ {
+ scale = (float)(numseq-gaps[pos]) / (float)numseq;
+ for (res=0; res<=max_aa; res++)
+ {
+ f = 0;
+ for (d=0; d<=max_aa; d++)
+ f += (weighting[d][pos] * matrix[d][res]);
+ f += (weighting[gap_pos1][pos] * matrix[gap_pos1][res]);
+ f += (weighting[gap_pos2][pos] * matrix[gap_pos2][res]);
+ profile[pos+1][res] = (sint )(((float)f / (float)sum2)*scale);
+ }
+ f = 0;
+ for (d=0; d<=max_aa; d++)
+ f += (weighting[d][pos] * matrix[d][gap_pos1]);
+ f += (weighting[gap_pos1][pos] * matrix[gap_pos1][gap_pos1]);
+ f += (weighting[gap_pos2][pos] * matrix[gap_pos2][gap_pos1]);
+ profile[pos+1][gap_pos1] = (sint )(((float)f / (float)sum2)*scale);
+ f = 0;
+ for (d=0; d<=max_aa; d++)
+ f += (weighting[d][pos] * matrix[d][gap_pos2]);
+ f += (weighting[gap_pos1][pos] * matrix[gap_pos1][gap_pos2]);
+ f += (weighting[gap_pos2][pos] * matrix[gap_pos2][gap_pos2]);
+ profile[pos+1][gap_pos2] = (sint )(((float)f / (float)sum2)*scale);
+ }
+ }
+
+ for (i=0;i<NUMRES+2;i++)
+ weighting[i]=ckfree((void *)weighting[i]);
+ weighting=ckfree((void *)weighting);
+
+}
+
+
diff --git a/calcprf2.c b/calcprf2.c
new file mode 100644
index 0000000..e57ea22
--- /dev/null
+++ b/calcprf2.c
@@ -0,0 +1,73 @@
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pcma.h"
+
+/*
+ * Prototypes
+ */
+/*
+ * Global variables
+ */
+
+extern sint max_aa,gap_pos1,gap_pos2;
+
+void calc_prf2(sint **profile, char **alignment,
+ sint *seq_weight,sint prf_length, sint first_seq, sint last_seq)
+{
+
+ sint sum1, sum2;
+ sint i, d;
+ sint r;
+
+
+ for (r=0; r<prf_length; r++)
+ {
+/*
+ calculate sum2 = number of residues found in this column
+*/
+ sum2 = 0;
+ for (i=first_seq; i<last_seq; i++)
+ {
+ sum2 += seq_weight[i];
+ }
+/*
+ only include matrix comparison scores for those residue types found in this
+ column
+*/
+ if (sum2 == 0)
+ {
+ for (d=0; d<=max_aa; d++)
+ profile[r+1][d] = 0;
+ profile[r+1][gap_pos1] = 0;
+ profile[r+1][gap_pos2] = 0;
+ }
+ else
+ {
+ for (d=0; d<=max_aa; d++)
+ {
+ sum1 = 0;
+ for (i=first_seq; i<last_seq; i++)
+ {
+ if (d == alignment[i][r]) sum1 += seq_weight[i];
+ }
+ profile[r+1][d] = (sint)(10 * (float)sum1 / (float)sum2);
+ }
+ sum1 = 0;
+ for (i=first_seq; i<last_seq; i++)
+ {
+ if (gap_pos1 == alignment[i][r]) sum1 += seq_weight[i];
+ }
+ profile[r+1][gap_pos1] = (sint)(10 * (float)sum1 / (float)sum2);
+ sum1 = 0;
+ for (i=first_seq; i<last_seq; i++)
+ {
+ if (gap_pos2 == alignment[i][r]) sum1 += seq_weight[i];
+ }
+ profile[r+1][gap_pos2] = (sint)(10 * (float)sum1 / (float)sum2);
+ }
+ }
+}
+
+
diff --git a/calctree.c b/calctree.c
new file mode 100644
index 0000000..b60c868
--- /dev/null
+++ b/calctree.c
@@ -0,0 +1,1264 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include "pcma.h"
+/*#include "new.h" JP */
+
+/*
+ * Prototypes
+ */
+static void create_tree(treeptr ptree, treeptr parent);
+static void create_node(treeptr pptr, treeptr parent);
+static treeptr insert_node(treeptr pptr);
+static void skip_space(FILE *fd);
+static treeptr avail(void);
+static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist);
+static treeptr reroot(treeptr ptree, sint nseqs);
+static treeptr insert_root(treeptr p, float diff);
+static float calc_root_mean(treeptr root, float *maxdist);
+static float calc_mean(treeptr nptr, float *maxdist, sint nseqs);
+static void order_nodes(void);
+static sint calc_weight(sint leaf);
+/*
+static void group_seqs(treeptr p, sint *next_groups, sint nseqs);
+static void mark_group1(treeptr p, sint *groups, sint n);
+static void mark_group2(treeptr p, sint *groups, sint n);
+static void save_set(sint n, sint *groups);
+*/
+static void clear_tree_nodes(treeptr p);
+
+/* JP */
+static streeptr savail(void);
+static void copy_tree(treeptr t1, streeptr t2);
+static void copy_content(treeptr t1, streeptr t2);
+static void group_seqs(streeptr p, sint *next_groups, sint nseqs);
+static void mark_group1(streeptr p, sint *groups, sint n);
+static void mark_group2(streeptr p, sint *groups, sint n);
+static void save_set(sint n, sint *groups, streeptr p);
+extern int am2num_c(int c);
+
+
+/*
+ * Global variables
+ */
+extern Boolean interactive;
+extern Boolean distance_tree;
+extern Boolean usemenu;
+extern sint debug;
+extern double **tmat;
+extern sint **sets;
+extern sint nsets;
+extern char **names;
+extern sint *seq_weight;
+extern Boolean no_weights;
+extern char **seq_array;
+sint *seqlen_array;
+extern char *amino_acid_codes;
+
+char ch;
+FILE *fd;
+treeptr *lptr;
+treeptr *olptr;
+treeptr *nptr;
+treeptr *ptrs;
+sint nnodes = 0;
+sint ntotal = 0;
+Boolean rooted_tree = TRUE;
+static treeptr seq_tree,root;
+static sint *groups, numseq;
+static sint nseqshere;
+
+/* JP */
+streeptr sroot;
+streeptr *grp_ancestor; /* ancestor nodes for the groups */
+streeptr *solptr;
+streeptr *groupptr;
+sint ngroups;
+extern sint gap_pos1, gap_pos2;
+
+void calc_seq_weights(sint first_seq, sint last_seq, sint *sweight)
+{
+ sint i, nseqs;
+ sint temp, sum, *weight;
+
+
+/*
+ If there are more than three sequences....
+*/
+ nseqs = last_seq-first_seq; nseqshere = nseqs;
+ if ((nseqs >= 2) && (distance_tree == TRUE) && (no_weights == FALSE))
+ {
+/*
+ Calculate sequence weights based on Phylip tree.
+*/
+ weight = (sint *)ckalloc((last_seq+1) * sizeof(sint));
+
+ for (i=first_seq; i<last_seq; i++)
+ weight[i] = calc_weight(i);
+
+/*
+ Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
+*/
+
+ sum = 0;
+ for (i=first_seq; i<last_seq; i++)
+ sum += weight[i];
+
+ if (sum == 0)
+ {
+ for (i=first_seq; i<last_seq; i++)
+ weight[i] = 1;
+ sum = i;
+ }
+
+ for (i=first_seq; i<last_seq; i++)
+ {
+ sweight[i] = (weight[i] * INT_SCALE_FACTOR) / sum;
+ if (sweight[i] < 1) sweight[i] = 1;
+ }
+
+ weight=ckfree((void *)weight);
+
+ }
+
+ else
+ {
+/*
+ Otherwise, use identity weights.
+*/
+ temp = INT_SCALE_FACTOR / nseqs;
+ for (i=first_seq; i<last_seq; i++)
+ sweight[i] = temp;
+ }
+
+}
+
+void create_sets(sint first_seq, sint last_seq)
+{
+ sint i, j, nseqs;
+
+ nsets = 0;
+ nseqs = last_seq-first_seq;
+ /*fprintf(stdout, "\nfirst: %d; last: %d\n", first_seq, last_seq);*/
+
+ /* JP: generate the tree with sequences */
+ copy_tree(root, sroot);
+ /* for(i=0;i<nseqshere;i++) fprintf(stdout, "%s\n", solptr[i]->name[1]); */
+ grp_ancestor = ckalloc((nseqs+1)*sizeof(streeptr *));
+
+
+
+ if (nseqs >= 2)
+ {
+/*
+ If there are more than three sequences....
+*/
+
+ groups = (sint *)ckalloc((nseqs+1) * sizeof(sint));
+ group_seqs(sroot, groups, nseqs);
+ groups=ckfree((void *)groups);
+
+ }
+
+ else
+ {
+ groups = (sint *)ckalloc((nseqs+1) * sizeof(sint));
+ for (i=0;i<nseqs-1;i++)
+ {
+ for (j=0;j<nseqs;j++)
+ if (j<=i) groups[j] = 1;
+ else if (j==i+1) groups[j] = 2;
+ else groups[j] = 0;
+ save_set(nseqs, groups, sroot);
+ }
+ groups=ckfree((void *)groups);
+ }
+
+}
+
+sint read_tree(char *treefile, sint first_seq, sint last_seq)
+{
+
+ char c;
+ char name1[MAXNAMES+1], name2[MAXNAMES+1];
+ sint i, j, k;
+ Boolean found;
+
+ numseq = 0;
+ nnodes = 0;
+ ntotal = 0;
+ rooted_tree = TRUE;
+
+#ifdef VMS
+ if ((fd = fopen(treefile,"r","rat=cr","rfm=var")) == NULL)
+#else
+ if ((fd = fopen(treefile, "r")) == NULL)
+#endif
+ {
+ error("cannot open %s", treefile);
+ return((sint)0);
+ }
+
+ skip_space(fd);
+ ch = (char)getc(fd);
+ if (ch != '(')
+ {
+ error("Wrong format in tree file %s", treefile);
+ return((sint)0);
+ }
+ rewind(fd);
+
+ distance_tree = TRUE;
+
+/*
+ Allocate memory for tree
+*/
+ nptr = (treeptr *)ckalloc(3*(last_seq-first_seq+1) * sizeof(treeptr));
+ ptrs = (treeptr *)ckalloc(3*(last_seq-first_seq+1) * sizeof(treeptr));
+ lptr = (treeptr *)ckalloc((last_seq-first_seq+1) * sizeof(treeptr));
+ olptr = (treeptr *)ckalloc((last_seq+1) * sizeof(treeptr));
+ solptr = (streeptr *)ckalloc((last_seq+1) * sizeof(streeptr));
+
+ seq_tree = avail();
+ set_info(seq_tree, NULL, 0, "", 0.0);
+
+ create_tree(seq_tree,NULL);
+ fclose(fd);
+
+
+ if (numseq != last_seq-first_seq)
+ {
+ error("tree not compatible with alignment\n(%d sequences in alignment and %d in tree", (pint)last_seq-first_seq,(pint)numseq);
+ return((sint)0);
+ }
+
+/*
+ If the tree is unrooted, reroot the tree - ie. minimise the difference
+ between the mean root->leaf distances for the left and right branches of
+ the tree.
+*/
+
+ if (distance_tree == FALSE)
+ {
+ if (rooted_tree == FALSE)
+ {
+ error("input tree is unrooted and has no distances.\nCannot align sequences");
+ return((sint)0);
+ }
+ }
+
+ if (rooted_tree == FALSE)
+ {
+ root = reroot(seq_tree, last_seq-first_seq+1);
+ }
+ else
+ {
+ root = seq_tree;
+ }
+
+/*
+ calculate the 'order' of each node.
+*/
+ order_nodes();
+
+ if (numseq >= 2)
+ {
+/*
+ If there are more than three sequences....
+*/
+/*
+ assign the sequence nodes (in the same order as in the alignment file)
+*/
+ for (i=first_seq; i<last_seq; i++)
+ {
+ if (strlen(names[i+1]) > MAXNAMES)
+ warning("name %s is too long for PHYLIP tree format (max %d chars)", names[i+1],MAXNAMES);
+
+ for (k=0; k< strlen(names[i+1]) && k<MAXNAMES ; k++)
+ {
+ c = names[i+1][k];
+ if ((c>0x40) && (c<0x5b)) c=c | 0x20;
+ if (c == ' ') c = '_';
+ name2[k] = c;
+ }
+ name2[k]='\0';
+ found = FALSE;
+ for (j=0; j<numseq; j++)
+ {
+ for (k=0; k< strlen(lptr[j]->name) && k<MAXNAMES ; k++)
+ {
+ c = lptr[j]->name[k];
+ if ((c>0x40) && (c<0x5b)) c=c | 0x20;
+ name1[k] = c;
+ }
+ name1[k]='\0';
+ if (strcmp(name1, name2) == 0)
+ {
+ olptr[i] = lptr[j];
+ found = TRUE;
+ }
+ }
+ if (found == FALSE)
+ {
+ error("tree not compatible with alignment:\n%s not found", name2);
+ return((sint)0);
+ }
+ }
+
+ }
+ return((sint)1);
+}
+
+static void create_tree(treeptr ptree, treeptr parent)
+{
+ treeptr p;
+
+ sint i, type;
+ float dist;
+ char name[MAXNAMES+1];
+
+/*
+ is this a node or a leaf ?
+*/
+ skip_space(fd);
+ ch = (char)getc(fd);
+ if (ch == '(')
+ {
+/*
+ this must be a node....
+*/
+ type = NODE;
+ name[0] = '\0';
+ ptrs[ntotal] = nptr[nnodes] = ptree;
+ nnodes++;
+ ntotal++;
+
+ create_node(ptree, parent);
+
+ p = ptree->left;
+ create_tree(p, ptree);
+
+ if ( ch == ',')
+ {
+ p = ptree->right;
+ create_tree(p, ptree);
+ if ( ch == ',')
+ {
+ ptree = insert_node(ptree);
+ ptrs[ntotal] = nptr[nnodes] = ptree;
+ nnodes++;
+ ntotal++;
+ p = ptree->right;
+ create_tree(p, ptree);
+ rooted_tree = FALSE;
+ }
+ }
+
+ skip_space(fd);
+ ch = (char)getc(fd);
+ }
+/*
+ ...otherwise, this is a leaf
+*/
+ else
+ {
+ type = LEAF;
+ ptrs[ntotal++] = lptr[numseq++] = ptree;
+/*
+ get the sequence name
+*/
+ name[0] = ch;
+ ch = (char)getc(fd);
+ i = 1;
+ while ((ch != ':') && (ch != ',') && (ch != ')'))
+ {
+ if (i < MAXNAMES) name[i++] = ch;
+ ch = (char)getc(fd);
+ }
+ name[i] = '\0';
+ if (ch != ':')
+ {
+ distance_tree = FALSE;
+ dist = 0.0;
+ }
+ }
+
+/*
+ get the distance information
+*/
+ dist = 0.0;
+ if (ch == ':')
+ {
+ skip_space(fd);
+ fscanf(fd,"%f",&dist);
+ skip_space(fd);
+ ch = (char)getc(fd);
+ }
+ set_info(ptree, parent, type, name, dist);
+
+
+}
+
+static void create_node(treeptr pptr, treeptr parent)
+{
+ treeptr t;
+
+ pptr->parent = parent;
+ t = avail();
+ pptr->left = t;
+ t = avail();
+ pptr->right = t;
+
+}
+
+static treeptr insert_node(treeptr pptr)
+{
+
+ treeptr newnode;
+
+ newnode = avail();
+ create_node(newnode, pptr->parent);
+
+ newnode->left = pptr;
+ pptr->parent = newnode;
+
+ set_info(newnode, pptr->parent, NODE, "", 0.0);
+
+ return(newnode);
+}
+
+static void skip_space(FILE *fd)
+{
+ int c;
+
+ do
+ c = getc(fd);
+ while(isspace(c));
+
+ ungetc(c, fd);
+}
+
+static treeptr avail(void)
+{
+ treeptr p;
+ p = ckalloc(sizeof(stree));
+ p->left = NULL;
+ p->right = NULL;
+ p->parent = NULL;
+ p->dist = 0.0;
+ p->leaf = 0;
+ p->order = 0;
+ p->name[0] = '\0';
+ return(p);
+}
+
+
+/* JP */
+static streeptr savail(void)
+{
+ streeptr p;
+ p = ckalloc(sizeof(sstree));
+ p->left = NULL;
+ p->right = NULL;
+ p->parent = NULL;
+ p->dist = 0.0;
+ p->leaf = 0;
+ p->order = 0;
+ p->name = NULL;
+ /* p->name[0]= '\0'; */
+ p->seq = NULL;
+ p->seqnum = 0;
+ p->abstractseq = NULL;
+ p->abseqnum = 0;
+ p->abseqlength = 0;
+ return(p);
+}
+
+
+
+void clear_tree(treeptr p)
+{
+ clear_tree_nodes(p);
+
+ nptr=ckfree((void *)nptr);
+ ptrs=ckfree((void *)ptrs);
+ lptr=ckfree((void *)lptr);
+ olptr=ckfree((void *)olptr);
+}
+
+static void clear_tree_nodes(treeptr p)
+{
+ if (p==NULL) p = root;
+ if (p->left != NULL)
+ {
+ clear_tree_nodes(p->left);
+ }
+ if (p->right != NULL)
+ {
+ clear_tree_nodes(p->right);
+ }
+ p->left = NULL;
+ p->right = NULL;
+ p=ckfree((void *)p);
+}
+
+static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist)
+{
+ p->parent = parent;
+ p->leaf = pleaf;
+ p->dist = pdist;
+ p->order = 0;
+ strcpy(p->name, pname);
+ if (p->leaf == TRUE)
+ {
+ p->left = NULL;
+ p->right = NULL;
+ }
+}
+
+static treeptr reroot(treeptr ptree, sint nseqs)
+{
+
+ treeptr p, rootnode, rootptr;
+ float diff, mindiff = 0.0, mindepth = 1.0, maxdist;
+ sint i;
+ Boolean first = TRUE;
+
+/*
+ find the difference between the means of leaf->node
+ distances on the left and on the right of each node
+*/
+ rootptr = ptree;
+ for (i=0; i<ntotal; i++)
+ {
+ p = ptrs[i];
+ if (p->parent == NULL)
+ diff = calc_root_mean(p, &maxdist);
+ else
+ diff = calc_mean(p, &maxdist, nseqs);
+
+ if ((diff == 0) || ((diff > 0) && (diff < 2 * p->dist)))
+ {
+ if ((maxdist < mindepth) || (first == TRUE))
+ {
+ first = FALSE;
+ rootptr = p;
+ mindepth = maxdist;
+ mindiff = diff;
+ }
+ }
+
+ }
+
+/*
+ insert a new node as the ancestor of the node which produces the shallowest
+ tree.
+*/
+ if (rootptr == ptree)
+ {
+ mindiff = rootptr->left->dist + rootptr->right->dist;
+ rootptr = rootptr->right;
+ }
+ rootnode = insert_root(rootptr, mindiff);
+
+ diff = calc_root_mean(rootnode, &maxdist);
+
+ return(rootnode);
+}
+
+static treeptr insert_root(treeptr p, float diff)
+{
+ treeptr newp, prev, q, t;
+ float dist, prevdist,td;
+
+ newp = avail();
+
+ t = p->parent;
+ prevdist = t->dist;
+
+ p->parent = newp;
+
+ dist = p->dist;
+
+ p->dist = diff / 2;
+ if (p->dist < 0.0) p->dist = 0.0;
+ if (p->dist > dist) p->dist = dist;
+
+ t->dist = dist - p->dist;
+
+ newp->left = t;
+ newp->right = p;
+ newp->parent = NULL;
+ newp->dist = 0.0;
+ newp->leaf = NODE;
+
+ if (t->left == p) t->left = t->parent;
+ else t->right = t->parent;
+
+ prev = t;
+ q = t->parent;
+
+ t->parent = newp;
+
+ while (q != NULL)
+ {
+ if (q->left == prev)
+ {
+ q->left = q->parent;
+ q->parent = prev;
+ td = q->dist;
+ q->dist = prevdist;
+ prevdist = td;
+ prev = q;
+ q = q->left;
+ }
+ else
+ {
+ q->right = q->parent;
+ q->parent = prev;
+ td = q->dist;
+ q->dist = prevdist;
+ prevdist = td;
+ prev = q;
+ q = q->right;
+ }
+ }
+
+/*
+ remove the old root node
+*/
+ q = prev;
+ if (q->left == NULL)
+ {
+ dist = q->dist;
+ q = q->right;
+ q->dist += dist;
+ q->parent = prev->parent;
+ if (prev->parent->left == prev)
+ prev->parent->left = q;
+ else
+ prev->parent->right = q;
+ prev->right = NULL;
+ }
+ else
+ {
+ dist = q->dist;
+ q = q->left;
+ q->dist += dist;
+ q->parent = prev->parent;
+ if (prev->parent->left == prev)
+ prev->parent->left = q;
+ else
+ prev->parent->right = q;
+ prev->left = NULL;
+ }
+
+ return(newp);
+}
+
+static float calc_root_mean(treeptr root, float *maxdist)
+{
+ float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
+ treeptr p;
+ sint i;
+ sint nl, nr;
+ sint direction;
+/*
+ for each leaf, determine whether the leaf is left or right of the root.
+*/
+ dist = (*maxdist) = 0;
+ nl = nr = 0;
+ for (i=0; i< numseq; i++)
+ {
+ p = lptr[i];
+ dist = 0.0;
+ while (p->parent != root)
+ {
+ dist += p->dist;
+ p = p->parent;
+ }
+ if (p == root->left) direction = LEFT;
+ else direction = RIGHT;
+ dist += p->dist;
+
+ if (direction == LEFT)
+ {
+ lsum += dist;
+ nl++;
+ }
+ else
+ {
+ rsum += dist;
+ nr++;
+ }
+ if (dist > (*maxdist)) *maxdist = dist;
+ }
+
+ lmean = lsum / nl;
+ rmean = rsum / nr;
+
+ diff = lmean - rmean;
+ return(diff);
+}
+
+
+static float calc_mean(treeptr nptr, float *maxdist, sint nseqs)
+{
+ float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
+ treeptr p, *path2root;
+ float *dist2node;
+ sint depth = 0, i,j , n = 0;
+ sint nl , nr;
+ sint direction, found;
+
+ path2root = (treeptr *)ckalloc(nseqs * sizeof(treeptr));
+ dist2node = (float *)ckalloc(nseqs * sizeof(float));
+/*
+ determine all nodes between the selected node and the root;
+*/
+ depth = (*maxdist) = dist = 0;
+ nl = nr = 0;
+ p = nptr;
+ while (p != NULL)
+ {
+ path2root[depth] = p;
+ dist += p->dist;
+ dist2node[depth] = dist;
+ p = p->parent;
+ depth++;
+ }
+
+/*
+ *nl = *nr = 0;
+ for each leaf, determine whether the leaf is left or right of the node.
+ (RIGHT = descendant, LEFT = not descendant)
+*/
+ for (i=0; i< numseq; i++)
+ {
+ p = lptr[i];
+ if (p == nptr)
+ {
+ direction = RIGHT;
+ dist = 0.0;
+ }
+ else
+ {
+ direction = LEFT;
+ dist = 0.0;
+/*
+ find the common ancestor.
+*/
+ found = FALSE;
+ n = 0;
+ while ((found == FALSE) && (p->parent != NULL))
+ {
+ for (j=0; j< depth; j++)
+ if (p->parent == path2root[j])
+ {
+ found = TRUE;
+ n = j;
+ }
+ dist += p->dist;
+ p = p->parent;
+ }
+ if (p == nptr) direction = RIGHT;
+ }
+
+ if (direction == LEFT)
+ {
+ lsum += dist;
+ lsum += dist2node[n-1];
+ nl++;
+ }
+ else
+ {
+ rsum += dist;
+ nr++;
+ }
+
+ if (dist > (*maxdist)) *maxdist = dist;
+ }
+
+ dist2node=ckfree((void *)dist2node);
+ path2root=ckfree((void *)path2root);
+
+ lmean = lsum / nl;
+ rmean = rsum / nr;
+
+ diff = lmean - rmean;
+ return(diff);
+}
+
+static void order_nodes(void)
+{
+ sint i;
+ treeptr p;
+
+ for (i=0; i<numseq; i++)
+ {
+ p = lptr[i];
+ while (p != NULL)
+ {
+ p->order++;
+ p = p->parent;
+ }
+ }
+}
+
+
+static sint calc_weight(sint leaf)
+{
+
+ treeptr p;
+ float weight = 0.0;
+
+ p = olptr[leaf];
+ while (p->parent != NULL)
+ {
+ weight += p->dist / p->order;
+ p = p->parent;
+ }
+
+ weight *= 100.0;
+
+ return((sint)weight);
+
+}
+
+/* JP change a little bit: treeptr -> streeptr
+static void group_seqs(treeptr p, sint *next_groups, sint nseqs)
+*/
+
+static void group_seqs(streeptr p, sint *next_groups, sint nseqs)
+{
+ sint i;
+ sint *tmp_groups;
+
+
+ tmp_groups = (sint *)ckalloc((nseqs+1) * sizeof(sint));
+ for (i=0;i<nseqs;i++)
+ tmp_groups[i] = 0;
+
+ if (p->left != NULL)
+ {
+ if (p->left->leaf == NODE)
+ {
+ group_seqs(p->left, next_groups, nseqs);
+ for (i=0;i<nseqs;i++)
+ if (next_groups[i] != 0) tmp_groups[i] = 1;
+ }
+ else
+ {
+ mark_group1(p->left, tmp_groups, nseqs);
+ }
+
+ }
+
+ if (p->right != NULL)
+ {
+ if (p->right->leaf == NODE)
+ {
+ group_seqs(p->right, next_groups, nseqs);
+ for (i=0;i<nseqs;i++)
+ if (next_groups[i] != 0) tmp_groups[i] = 2;
+ }
+ else
+ {
+ mark_group2(p->right, tmp_groups, nseqs);
+ }
+ save_set(nseqs, tmp_groups, p);
+ }
+ for (i=0;i<nseqs;i++)
+ next_groups[i] = tmp_groups[i];
+
+ tmp_groups=ckfree((void *)tmp_groups);
+
+}
+
+/* JP: change a little bit: treeptr -> streeptr
+static void mark_group1(treeptr p, sint *groups, sint n)
+*/
+static void mark_group1(streeptr p, sint *groups, sint n)
+{
+ sint i;
+
+ /* JP
+ fprintf(stdout, "olptr\n");
+ for(i=0;i<n;i++) {
+ fprintf(stdout, "%s\n", olptr[i]->name);
+ }
+ JP */
+ for (i=0;i<n;i++)
+ {
+ if (solptr[i] == p)
+ groups[i] = 1;
+ else
+ groups[i] = 0;
+ }
+}
+
+/* JP: change a little bit: treeptr -> streeptr
+static void mark_group2(treeptr p, sint *groups, sint n)
+*/
+static void mark_group2(streeptr p, sint *groups, sint n)
+{
+ sint i;
+
+ for (i=0;i<n;i++)
+ {
+ if (solptr[i] == p)
+ groups[i] = 2;
+ else if (groups[i] != 0)
+ groups[i] = 1;
+ }
+}
+
+/* JP : adding a parameter of the tree node
+static void save_set(sint n, sint *groups)
+*/
+static void save_set(sint n, sint *groups, streeptr p)
+{
+ sint i;
+
+ for (i=0;i<n;i++)
+ sets[nsets+1][i+1] = groups[i];
+
+ /* JP */
+ grp_ancestor[nsets+1] = p;
+ /*if(p->left->seq==NULL) fprintf(stdout, "null pointers \n");*/
+ nsets++;
+}
+
+
+
+sint calc_similarities(sint nseqs)
+{
+ sint depth = 0, i,j, k, n;
+ sint found;
+ sint nerrs, *seq1,*seq2;
+ treeptr p, *path2root;
+ float dist;
+ float *dist2node, *bad_dist;
+ double **dmat;
+ char err_mess[1024],err1[MAXLINE],reply[MAXLINE];
+
+ path2root = (treeptr *)ckalloc((nseqs) * sizeof(treeptr));
+ dist2node = (float *)ckalloc((nseqs) * sizeof(float));
+ dmat = (double **)ckalloc((nseqs) * sizeof(double *));
+ for (i=0;i<nseqs;i++)
+ dmat[i] = (double *)ckalloc((nseqs) * sizeof(double));
+ seq1 = (sint *)ckalloc((nseqs) * sizeof(sint));
+ seq2 = (sint *)ckalloc((nseqs) * sizeof(sint));
+ bad_dist = (float *)ckalloc((nseqs) * sizeof(float));
+
+ if (nseqs >= 2)
+ {
+/*
+ for each leaf, determine all nodes between the leaf and the root;
+*/
+ for (i = 0;i<nseqs; i++)
+ {
+ depth = dist = 0;
+ p = olptr[i];
+ while (p != NULL)
+ {
+ path2root[depth] = p;
+ dist += p->dist;
+ dist2node[depth] = dist;
+ p = p->parent;
+ depth++;
+ }
+
+/*
+ for each pair....
+*/
+ for (j=0; j < i; j++)
+ {
+ p = olptr[j];
+ dist = 0.0;
+/*
+ find the common ancestor.
+*/
+ found = FALSE;
+ n = 0;
+ while ((found == FALSE) && (p->parent != NULL))
+ {
+ for (k=0; k< depth; k++)
+ if (p->parent == path2root[k])
+ {
+ found = TRUE;
+ n = k;
+ }
+ dist += p->dist;
+ p = p->parent;
+ }
+
+ dmat[i][j] = dist + dist2node[n-1];
+ }
+ }
+
+ nerrs = 0;
+ for (i=0;i<nseqs;i++)
+ {
+ dmat[i][i] = 0.0;
+ for (j=0;j<i;j++)
+ {
+ if (dmat[i][j] < 0.01) dmat[i][j] = 0.01;
+ if (dmat[i][j] > 1.0) {
+ if (dmat[i][j] > 1.1) {
+ seq1[nerrs] = i;
+ seq2[nerrs] = j;
+ bad_dist[nerrs] = dmat[i][j];
+ nerrs++;
+ }
+ dmat[i][j] = 1.0;
+ }
+ }
+ }
+ if (nerrs>0)
+ {
+ strcpy(err_mess,"The following sequences are too divergent to be aligned:\n");
+ for (i=0;i<nerrs && i<5;i++)
+ {
+ sprintf(err1," %s and %s (distance %1.3f)\n",
+ names[seq1[i]+1],
+ names[seq2[i]+1],bad_dist[i]);
+ strcat(err_mess,err1);
+ }
+ strcat(err_mess,"(All distances should be between 0.0 and 1.0)\n");
+ strcat(err_mess,"This may not be fatal but you have been warned!\n");
+ strcat(err_mess,"SUGGESTION: Remove one or more problem sequences and try again");
+ if(interactive)
+ (*reply)=prompt_for_yes_no(err_mess,"Continue ");
+ else (*reply) = 'y';
+ if ((*reply != 'y') && (*reply != 'Y'))
+ return((sint)0);
+ }
+ }
+ else
+ {
+ for (i=0;i<nseqs;i++)
+ {
+ for (j=0;j<i;j++)
+ {
+ dmat[i][j] = tmat[i+1][j+1];
+ }
+ }
+ }
+
+ path2root=ckfree((void *)path2root);
+ dist2node=ckfree((void *)dist2node);
+ for (i=0;i<nseqs;i++)
+ {
+ tmat[i+1][i+1] = 0.0;
+ for (j=0;j<i;j++)
+ {
+ tmat[i+1][j+1] = 100.0 - (dmat[i][j]) * 100.0;
+ tmat[j+1][i+1] = tmat[i+1][j+1];
+ }
+ }
+
+ for (i=0;i<nseqs;i++) dmat[i]=ckfree((void *)dmat[i]);
+ dmat=ckfree((void *)dmat);
+
+ seq1=ckfree((void *)seq1);
+ seq2=ckfree((void *)seq2);
+ bad_dist=ckfree((void *)bad_dist);
+ return((sint)1);
+}
+
+/* JP */
+static void copy_tree(treeptr t1, streeptr t2)
+{
+ streeptr p, p1;
+ treeptr q;
+
+ /*fprintf(stdout, "start copying tree\n"); */
+ if(t1==root){
+ t2 = savail(); sroot = t2;
+ /* fprintf(stdout, "savail \n"); */
+ copy_content(t1, t2);
+
+ /*solptr = (streeptr *)ckalloc((nseqs+1) * sizeof(streeptr)); */
+ }
+
+
+ if(t1->left==NULL) {
+ /* fprintf(stdout, "%s\n", t2->name[1]); */
+ return;
+ }
+ t2->left = savail();
+ t2->right = savail();
+ copy_content(t1->left, t2->left);
+ copy_content(t1->right, t2->right);
+ /*if(t2==sroot) fprintf(stdout, "=======%s+++++\n", t2->left->name[1]);*/
+ copy_tree(t1->left, t2->left);
+ copy_tree(t1->right, t2->right);
+
+}
+
+/* JP: for_align_list */
+extern int seqFormat;
+extern int *seqnumlist;
+extern int filecount;
+extern int *seqlen_array_all;
+extern char **seq_array_all; /* for all the sequences */
+extern char **names_all;
+extern char *am;
+
+static void copy_content(treeptr t1, streeptr t2)
+{
+ sint i,j,k;
+
+ t2->dist = t1->dist;
+ t2->leaf = t1->leaf;
+ t2->order = t1->order;
+
+ /*fprintf(stdout, "%2.1f %d %d\n", t2->dist, t2->leaf, t2->order); */
+ if(t1->leaf) {
+
+ /* JP: for_align_list */
+ if(seqFormat!=CLUSTALIST) {
+ t2->name = ckalloc(2*sizeof(char *));
+ t2->name[1] = ckalloc(100* sizeof(char ));
+ strcpy(t2->name[1], t1->name);
+ /*fprintf(stdout, "%s ", t2->name[1]);*/
+ t2->seqnum = 1;
+
+ /*fprintf(stdout, "nseqshere %d \n", nseqshere);*/
+ for(i=0;i<nseqshere;i++) {
+ if(olptr[i]==t1) {/*fprintf(stdout,"i: %d----------\n",i);*/
+ solptr[i] = t2;
+ t2->seq = ckalloc(2*sizeof(int *));
+ t2->seqlength = seqlen_array[i+1];
+ /*fprintf(stdout, "length: %d\n", seqlen_array[i+1]);*/
+ t2->seq[1] = ckalloc((seqlen_array[i+1]+1)*sizeof(int));
+ for(j=1;j<=seqlen_array[i+1];j++)
+ {
+ t2->seq[1][j] = am2num(amino_acid_codes[seq_array[i+1][j]]);
+ /*fprintf(stdout, "%c", am[t2->seq[1][j]]);*/
+ if(debug>1) fprintf(stdout, "%d ", am2num(amino_acid_codes[seq_array[i+1][j]] ));
+ }
+ if(debug>1)fprintf(stdout, "\n");
+ }
+ }
+ /*fprintf(stdout, "+++\n");*/
+ }
+ /* JP: for_align_list */
+ else {
+ /* find the name that matches t1->name */
+ for(i=1;i<=nseqshere;i++) {
+ if(strcmp(names[i], t1->name)==0) break;
+ }
+
+ t2->name = ckalloc((seqnumlist[i]+1)*sizeof(char *));
+ for(j=1;j<=seqnumlist[i];j++) t2->name[j] = ckalloc(100* sizeof(char ));
+ t2->seqnum = seqnumlist[i];
+ t2->seqlength = seqlen_array[i];
+ t2->seq = ckalloc((seqnumlist[i]+1)*sizeof(int *));
+ for(j=1;j<=seqnumlist[i];j++) t2->seq[j] = ckalloc((t2->seqlength+1)*sizeof(int));
+ /* find the starting sequence number in the seq_array_all list */
+ int tmpcount = 0;
+ for(j=1;j<=i-1;j++) tmpcount+= seqnumlist[j];
+ for(j=1;j<=seqnumlist[i];j++) {
+ strcpy(t2->name[j], names_all[tmpcount+j]);
+ for(k=1;k<=t2->seqlength;k++) {
+ /* pay specially attention to gaps in input sequences */
+ if(seq_array_all[tmpcount+j][k]==gap_pos2) t2->seq[j][k] = 0;
+ else t2->seq[j][k] = am2num(amino_acid_codes[seq_array_all[tmpcount+j][k]]);
+ }
+ }
+ /* test the content: output the sequences */
+ if(debug>11) {
+ fprintf(stdout, "group: %d\n", i);
+ for(j=1;j<=seqnumlist[i];j++) {
+ fprintf(stdout, ">%s\n", t2->name[j]);
+ for(k=1;k<=t2->seqlength;k++) {
+ fprintf(stdout, "%c", amino_acid_codes[seq_array_all[tmpcount+j][k]]);
+ /*fprintf(stdout, "%d%c%d ", seq_array_all[tmpcount+j][k], amino_acid_codes[seq_array_all[tmpcount+j][k]], t2->seq[j][k]);*/
+ }
+ fprintf(stdout, "\n");
+ }
+ fprintf(stdout, "\n");
+ }
+
+ for(i=0;i<nseqshere;i++) {
+ if(olptr[i]==t1) {
+ solptr[i] = t2; break;
+ }
+ }
+
+ }
+
+ }
+}
+
+/* JP */
+double average_group_identity(sint *group)
+{
+ sint i,j;
+ sint count=0;
+ double sum = 0;
+
+ /*fprintf(stdout, "--------\n");*/
+ for(i=0;i<nseqshere;i++) {
+ if(group[i+1]==1) {
+ for(j=0;j<nseqshere;j++) {
+ if(group[j+1]==2) {
+ sum+=tmat[i+1][j+1]; count++;
+ }
+ }
+ }
+ }
+
+ sum = sum/count; if(debug>1)fprintf(stdout, "sum: %5.3f\n", sum);
+ return sum;
+}
+
+/* JP */
+void assign_node(streeptr p, sint *aligned)
+{
+ sint i,j;
+ sint count = 0;
+ sint length = 0;
+
+ if(p->seq) {
+ fprintf(stdout, "assign nodes: sequences already exist\n");
+ exit(0);
+ }
+
+ for(i=1;i<=nseqshere;i++) {
+ if(aligned[i]) {count++;
+ if(seqlen_array[i]> length) length = seqlen_array[i];
+ }
+ }
+ p->seqlength = length;
+ p->seqnum = count;
+
+ p->seq = ckalloc( (count+1) * sizeof( sint *));
+ for(i=1;i<=count;i++) {
+ p->seq[i]= ckalloc((length+1) * sizeof(sint ));
+ }
+ p->name = ckalloc( (count+1) *sizeof( char *));
+ for(i=1;i<=count;i++) {
+ p->name[i]=ckalloc(54*sizeof(char));
+ }
+ count = 0;
+ for(i=1;i<=nseqshere;i++) {
+ if(aligned[i]) {
+ count++;
+ strcpy(p->name[count], names[i]);
+ for(j=1;j<=seqlen_array[i];j++) {
+ if( (seq_array[i][j] == gap_pos1) || (seq_array[i][j]==gap_pos2) ) {
+ p->seq[count][j] = 0;
+ }
+ else {
+ p->seq[count][j] = am2num(amino_acid_codes[seq_array[i][j]]);
+ }
+ }
+ }
+ }
+}
diff --git a/dayhoff.h b/dayhoff.h
new file mode 100644
index 0000000..ae967ab
--- /dev/null
+++ b/dayhoff.h
@@ -0,0 +1,45 @@
+/* DAYHOFF.H
+
+ Table of estimated PAMS (actual no. of substitutions per 100 residues)
+ for a range of observed amino acid distances from 75.0% (the first entry
+ in the array), in 0.1% increments, up to 93.0%.
+
+ These values are used to correct for multiple hits in protein alignments.
+ The values below are for observed distances above 74.9%. For values above
+ 93%, an arbitrary value of 1000 PAMS (1000% substitution) is used.
+
+ These values are derived from a Dayhoff model (1978) of amino acid
+ substitution and assume average amino acid composition and that amino
+ acids replace each other at the same rate as in the original Dayhoff model.
+
+ Up to 75% observed distance, use Kimura's emprical formula to derive
+ the correction. For 75% or greater, use this table. Kimura's formula
+ is accurate up to about 75% and fails completely above 85%.
+*/
+
+int dayhoff_pams[]={
+ 195, /* 75.0% observed d; 195 PAMs estimated = 195% estimated d */
+ 196, /* 75.1% observed d; 196 PAMs estimated */
+ 197, 198, 199, 200, 200, 201, 202, 203,
+ 204, 205, 206, 207, 208, 209, 209, 210, 211, 212,
+ 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,
+ 223, 224, 226, 227, 228, 229, 230, 231, 232, 233,
+ 234, 236, 237, 238, 239, 240, 241, 243, 244, 245,
+ 246, 248, 249, 250, /* 250 PAMs = 80.3% observed d */
+ 252, 253, 254, 255, 257, 258,
+ 260, 261, 262, 264, 265, 267, 268, 270, 271, 273,
+ 274, 276, 277, 279, 281, 282, 284, 285, 287, 289,
+ 291, 292, 294, 296, 298, 299, 301, 303, 305, 307,
+ 309, 311, 313, 315, 317, 319, 321, 323, 325, 328,
+ 330, 332, 335, 337, 339, 342, 344, 347, 349, 352,
+ 354, 357, 360, 362, 365, 368, 371, 374, 377, 380,
+ 383, 386, 389, 393, 396, 399, 403, 407, 410, 414,
+ 418, 422, 426, 430, 434, 438, 442, 447, 451, 456,
+ 461, 466, 471, 476, 482, 487, 493, 498, 504, 511,
+ 517, 524, 531, 538, 545, 553, 560, 569, 577, 586,
+ 595, 605, 615, 626, 637, 649, 661, 675, 688, 703,
+ 719, 736, 754, 775, 796, 819, 845, 874, 907, 945,
+ /* 92.9% observed; 945 PAMs */
+ 988 /* 93.0% observed; 988 PAMs */
+};
+
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 28894d5..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,5 +0,0 @@
-pcma (2.0+20040626-1) UNRELEASED; urgency=low
-
- * Initial release
-
- -- Andreas Tille <tille at debian.org> Thu, 24 May 2012 14:30:13 +0200
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index f599e28..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-10
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 1010ea8..0000000
--- a/debian/control
+++ /dev/null
@@ -1,28 +0,0 @@
-Source: pcma
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
-Section: science
-Priority: optional
-Build-Depends: debhelper (>= 10)
-Standards-Version: 3.9.8
-Vcs-Browser: http://anonscm.debian.org/viewvc/debian-med/trunk/packages/pcma/trunk/
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/pcma/trunk/
-Homepage: http://prodata.swmed.edu/pcma/pcma.php
-
-Package: pcma
-Architecture: any
-Depends: ${shlibs:Depends},
- ${misc:Depends}
-Enhances: t-coffee
-Description: fast and accurate multiple sequence alignment based on profile consistency
- PCMA (profile consistency multiple sequence alignment) is a progressive
- multiple sequence alignment program that combines two different
- alignment strategies. Highly similar sequences are aligned in a fast way
- as in ClustalW, forming pre-aligned groups. The T-Coffee strategy is
- applied to align the relatively divergent groups based on
- profile–profile comparison and consistency. The scoring function for
- local alignments of pre-aligned groups is based on a novel
- profile–profile comparison method that is a generalization of the
- PSI-BLAST approach to profile–sequence comparison. PCMA balances speed
- and accuracy in a flexible way and is suitable for aligning large
- numbers of sequences.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 2703541..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,14 +0,0 @@
-Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Source: http://prodata.swmed.edu/download/pub/PCMA/pcma.tar.gz
-
-Files: *
-Copyright: © 2003-2004 Jimin Pei, Ruslan Sadreyev and Nick V. Grishin
-License: non-free for commercial
-Comment: Check with authors about licensing, they adopted code from clustalw which is now
- free. Thus a change might be possible
-
-
-Files: debian/*
-Copyright: © 2012 Andreas Tille <tille at debian.org>
-License: LGPL-3+
-Comment: Take over license of recent ClustalW for the packaging
diff --git a/debian/get-orig-source b/debian/get-orig-source
deleted file mode 100755
index 6d126e0..0000000
--- a/debian/get-orig-source
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/sh -e
-
-NAME=`dpkg-parsechangelog | awk '/^Source/ { print $2 }'`
-DVERSION=`dpkg-parsechangelog | awk '/^Version:/ { print $2 }' | sed -e 's/^[0-9]*://' -e 's/-.*//'`
-MVERSION=`dpkg-parsechangelog | awk '/^Version:/ { print $2 }' | sed 's/^\([0-9\.]\+\)[+~][-0-9]\+$/\1/'`
-
-mkdir -p ../tarballs
-cd ../tarballs
-wget -q http://prodata.swmed.edu/download/pub/PCMA/pcma.tar.gz
-tar xaf ${NAME}.tar.gz
-VERSION=${MVERSION}+`find $NAME -type f -printf '%T@ %p\n' | sort -n | tail -1 | sed 's/ .*//' | gawk '{print strftime("%Y%m%d", $0)}'`
-
-if [ "$DVERSION" = "$VERSION" ] ; then
- echo "No newer upstream version than $DVERSION available."
- rm -rf ${NAME} ${NAME}.tar.gz
- exit
-fi
-
-mv pcma.tar.gz "$NAME"_"$VERSION".orig.tar.gz
-rm -rf pcma
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 4859fcd..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/make -f
-
-# DH_VERBOSE := 1
-
-%:
- dh $@
-
-get-orig-source:
- mkdir -p ../tarballs
- uscan --verbose --force-download --destdir=../tarballs
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/upstream/metadata b/debian/upstream/metadata
deleted file mode 100644
index 3eeb5ee..0000000
--- a/debian/upstream/metadata
+++ /dev/null
@@ -1,12 +0,0 @@
-Reference:
- Author: Jimin Pei and Ruslan Sadreyev and Nick V. Grishin
- Title: "PCMA: fast and accurate multiple sequence alignment based on profile consistency"
- Journal: Bioinformatics
- Year: 2003
- Volume: 19
- Number: 3
- Pages: 427-428
- DOI: 10.1093/bioinformatics/btg008
- PMID: 12584134
- URL: http://bioinformatics.oxfordjournals.org/content/19/3/427.short
- eprint: http://bioinformatics.oxfordjournals.org/content/19/3/427.full.pdf+html
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index f061df1..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,9 +0,0 @@
-## Source code vanished
-
-version=3
-
-# the tar file does not contain any version number
-# the version is hiddenin 00README
-# we set a static value here
-opts=pasv,uversionmangle=s/^/2/ \
-ftp://iole.swmed.edu/pub/PCMA/pcma(.*)\.tar\.gz
diff --git a/example/1aboA_ref2.dnd b/example/1aboA_ref2.dnd
new file mode 100644
index 0000000..75d4342
--- /dev/null
+++ b/example/1aboA_ref2.dnd
@@ -0,0 +1,40 @@
+(
+(
+(
+(
+(
+1aboA:0.34755,
+(
+1ad5:0.20034,
+1efn:0.18562)
+:0.09105)
+:0.02842,
+(
+(
+1aey:0.30907,
+1awj:0.34006)
+:0.00885,
+1ycsB:0.34992)
+:0.03123)
+:0.01569,
+(
+(
+1gfc:0.23295,
+1sem:0.22319)
+:0.07245,
+1hsp:0.35738)
+:0.01118)
+:0.01517,
+(
+1gbq:0.26371,
+1csk:0.29770)
+:0.06441)
+:0.00483,
+1ark:0.34749,
+(
+(
+1ckb:0.26735,
+1pht:0.34668)
+:0.02403,
+1ihvA:0.55671)
+:0.01408);
diff --git a/example/1aboA_ref2.fa b/example/1aboA_ref2.fa
new file mode 100644
index 0000000..7e0dadc
--- /dev/null
+++ b/example/1aboA_ref2.fa
@@ -0,0 +1,30 @@
+>1aboA
+NLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVN
+>1ark
+TAGKIFRAMYDYMAADADEVSFKDGDAIINVQAIDEGWMYGTVQRTGRTGMLPANYVEAI
+>1gbq
+MEAIAKYDFKATADDELSFKRGDILKVLNEECDQNWYKAELNGKDGFIPKNYIEMKP
+>1ckb
+AEYVRALFDFNGNDEEDLPFKKGDILRIRDKPEEQWWNAEDSEGKRGMIPVPYVEKY
+>1gfc
+GSTYVQALFDFDPQEDGELGFRRGDFIHVMDNSDPNWWKGACHGQTGMFPRNYVTPV
+>1hsp
+GSPTFKCAVKALFDYKAQREDELTFIKSAIIQNVEKQEGGWWRGDYGGKKQLWFPSNYVEEMV
+>1aey
+GKELVLALYDYQEKSPREVTMKKGDILTLLNSTNKDWWKVEVNDRQGFVPAAYVKKL
+>1csk
+GTECIAKYNFHGTAEQDLPFCKGDVLTIVAVTKDPNWYKAKNKVGREGIIPANYVQKR
+>1ad5
+EDIIVVALYDYEAIHHEDLSFQKGDQMVVLEESGEWWKARSLATRKEGYIPSNYVARVD
+>1awj
+RRSFQEPEETLVIALYDYQTNDPQELALRCDEEYYLLDSSEIHWWRVQDKNGHEGYAPSSYLVEKS
+>1efn
+ALFVALYDYEAITEDDLSFHKGEKFQILNSSEGDWWEARSLTTGETGYIPSNYVAPV
+>1sem
+ETKFVQALFDFNPQESGELAFKRGDVITLINKDDPNWWEGQLNNRRGIFPSNYVCPY
+>1ycsB
+KGVIYALWDYEPQNDDELPMKEGDCMTIIHREDEDEIEWWWARLNDKEGYVPRNLLGLYP
+>1pht
+GYQYRALYDYKKEREEDIDLHLGDILTVNKGSLVALGFSDGQEARPEEIGWLNGYNETTGERGDFPGTYVEYIGRKKISP
+>1ihvA
+NFRVYYRDSRDPVWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRD
diff --git a/example/1aboA_ref2.pcma50.aln b/example/1aboA_ref2.pcma50.aln
new file mode 100644
index 0000000..fd56b2d
--- /dev/null
+++ b/example/1aboA_ref2.pcma50.aln
@@ -0,0 +1,36 @@
+CLUSTAL W (1.81) multiple sequence alignment
+
+
+1ckb -------A--EYVRALFDFNGNDEEDLPFKKGDILRIR-------DK--------P-E--
+1pht -------G--YQYRALYDYKKEREEDIDLHLGDILTVNKGSLVALGFSDGQEARPE-E--
+1ad5 -------E-DIIVVALYDYEAIHHEDLSFQKGDQMVVL-------EE--------S----
+1efn ----------ALFVALYDYEAITEDDLSFHKGEKFQIL-------NS--------S-E--
+1aboA ----------NLFVALYDFVASGDNTLSITKGEKLRVL-------GY--------N-H--
+1aey -------G-KELVLALYDYQEKSPREVTMKKGDILTLL-------NS--------T-N--
+1awj RRSFQEPE-ETLVIALYDYQTNDPQELALRCDEEYYLL-------DS--------S-E--
+1ycsB -------K--GVIYALWDYEPQNDDELPMKEGDCMTII-------HR--------E-DED
+1gfc -------G-STYVQALFDFDPQEDGELGFRRGDFIHVM-------DN--------S-D--
+1sem -------E-TKFVQALFDFNPQESGELAFKRGDVITLI-------NK--------D-D--
+1hsp GSPT---F-KCAVKALFDYKAQREDELTFIKSAIIQNV-------EK--------Q-E--
+1gbq ----------MEAIAKYDFKATADDELSFKRGDILKVL-------NE--------ECD--
+1csk -------G--TECIAKYNFHGTAEQDLPFCKGDVLTIV-------AV--------TKD--
+1ark -------TAGKIFRAMYDYMAADADEVSFKDGDAIINV-------QA--------I-D--
+1ihvA -------N-----FRVYY-RDSRDP---VWKGPAKLL--------WK--------G-E--
+ : . .
+
+1ckb -E-QWWNAE-DSE-GKRG-MIPVPYVEKY--------
+1pht -I-GWLNGYNETT-GERG-DFPGTYVEYIGRKKISP-
+1ad5 -G-EWWKARSLAT-RKEG-YIPSNYVARV------D-
+1efn -G-DWWEARSLTT-GETG-YIPSNYVAPV--------
+1aboA -NGEWCEAQ--TK-NGQG-WVPSNYITPV------N-
+1aey -K-DWWKVE--VN-DRQG-FVPAAYVKKL--------
+1awj -I-HWWRVQD-KN-GHEG-YAPSSYLVEK------S-
+1ycsB EI-EWWWAR--LN-DKEG-YVPRNLLGLY------P-
+1gfc -P-NWWKGA--CH-GQTG-MFPRNYVTPV--------
+1sem -P-NWWEGQ--LN-NRRG-IFPSNYVCPY--------
+1hsp -G-GWWRGD--YG-GKKQLWFPSNYVEEM------V-
+1gbq -Q-NWYKAE--LN-GKDG-FIPKNYIEMK------P-
+1csk -P-NWYKAK--NKVGREG-IIPANYVQKR--------
+1ark -E-GWMYGTVQRT-GRTG-MLPANYVEAI--------
+1ihvA -G-AVVI-Q--DN-SDIK-VVPRRKAKII------RD
+ *
diff --git a/example/alnlist b/example/alnlist
new file mode 100644
index 0000000..cf81b61
--- /dev/null
+++ b/example/alnlist
@@ -0,0 +1,4 @@
+@ ALIGNMENT LIST
+ggdef1.aln
+ggdef2.aln
+ggdef3.aln
diff --git a/example/alnlist.aln b/example/alnlist.aln
new file mode 100644
index 0000000..e87344c
--- /dev/null
+++ b/example/alnlist.aln
@@ -0,0 +1,70 @@
+CLUSTAL W (1.81) multiple sequence alignment
+
+
+gi|2983285_168_335 ---KELD----ALKKQAFIDYLTGLKNRRSIQKALEDYFKDYKNYG-YPFSVIMMDLNNF
+gi|2982975_168_338 ---KQIRSFSKALKVDP----LTGLLNRRVLPYILRDVLELSLYTE-TPFSIAMVDIDNF
+gi|2982791_257_426 -----KTLYERMALTDP----LTGLYNRRVFTEMAEKELAKAKRYG-YNFSILMIDIDNF
+gi|4589924_133_299 ---K-------KMGENLIKDYLTGVYNKRYIDQRFSKEAHRNLKES-IPTTVIMTDIDSF
+gi|1742448_283_456 ----LRELFEEVSRHEVGMDVLTKLLNRRFLPTIFKREIAHANRTG-TPLSVLIIDVDKF
+gi|1119215_278_450 LRNNLDHSL-ELAVT----DQLTGLHNRRYMTGQLDSLVKRATLGG-DPVSALLIDIDFF
+gi|4732058_286_453 -----KQTI-ELAVT----DPLTGLYNRRYLDNHLNVLFNRSMARG-RPLSVLITDIDRF
+gi|2983209_436_608 K--LMEKLK-MQSYI----DGLTGAFNRRFLEEIADKIVAQTLRRN-SNLGILMIDVDFF
+gi|4982165_1033_1196 L---RERLYRDRSMK----DPLTGVLSRWYFMERLEEEAYKSSRYK-SPLSIIMCDADDF
+gi|3860799_281_448 L----EQSV-NLAAK----DGLTGLFNRRYFDIHLKQMIEKANKEN-IKLYLLMCDIDNF
+gi|4378845_396_562 Q--ANKQLS-ELANT----DVLTGLLNRRALMAKLSELHQDALLNE-NAISIAMLDVDHF
+gi|1336656_24_194 ---L-FKQYKFQAHF----DFLTGVYNRRKFEETTKALYQQAADTPHFQFALIYMDIDHF
+gi|1724005_184_354 ---L-FKQYKFQAHF----DFLTGVYNRRKFEETTKALYQQAADTPHFQFALIYMDIDHF
+gi|1652841_792_962 ---KLRERLENQSIR----DPLTGLFNRRYLEQFFLQEIGRAKRYNH-SIGVIMGDIDHF
+gi|4980635_289_450 ---R---DYVIASET----DGLTGLFNKRAIMRFLEEVLRSEKN-----IAVAMMDIDDF
+ ** .: : * : *
+
+gi|2983285_168_335 KEINDEYGHLVGDCILKGIGEILRKYLRAK-DAIGRYGGDEFLIILPGVKLEDAVNIARR
+gi|2982975_168_338 KKINDTYGHLFGDKVLKEVAKIIKKNLRRS-DYVFRYGGEEFLILMPSTELKDAVRILEK
+gi|2982791_257_426 KKINDTYGHDVGDLVLKKISEILKRNVRGA-DLVARFGGEEFIVMLSNTNLNGAVKKAEQ
+gi|4589924_133_299 KKVNDTYGHLVGDKILRGFAKVLNNNIRENSDWIGRYGGEEFIIVLNNTNLKNGVKVAEK
+gi|1742448_283_456 KEINDTWGHNTGDEILRKVSQAFYDNVRSS-DYVFRYGGDEFIIVLTEASENETLRTAER
+gi|1119215_278_450 KKINDTFGHDIGDEVLREFALRLASNVR-AIDLPCRYGGEEFVVIMPDTALADALRIAER
+gi|4732058_286_453 KHVNDTYGHDGGDEVLREFSSRVRSTIR-GADLACRYGGEEFVVVMPDTSPEIAAAVAER
+gi|2983209_436_608 KQVNDTYGHDAGDEVLRQIARVIKDNIR-KADYLIRYGGEEFLVLLTDVKEGYAEKVAEK
+gi|4982165_1033_1196 KKINDQFGHVAGDKTLGWLGRKMKSVLR-KSDLVGRYGGEEFIIALPGTSLEEAKIVAEK
+gi|3860799_281_448 KHVNDTYGHQAGDKVLTTVSRILKNTLR-VTDLIARFGGEEFTILLTDIDISKAIETAER
+gi|4378845_396_562 KKINDRYGHLVGDDVMRNAASIIRNNIR-QEDFAGRYGGDEFIIAISA-SIEMSRHIAER
+gi|1336656_24_194 KTINDQYGHHEGDQVLKELGLRLKQTIR-NTDPAARIGGEEFAVLLPNCSLDKAARIAER
+gi|1724005_184_354 KTINDQYGHHEGDQVLKELGLRLKQTIR-NTDPAARIGGEEFAVLLPNCSLDKAARIAER
+gi|1652841_792_962 KQFNDQLGHDAGDHVLKTIGRILQSNIR-GSDIACRYGGEEMTIVLPQTSLEDTLVKAES
+gi|4980635_289_450 KKINDTFGHPVGDEVLRVVANILRETVK-IGKVG-RYGGEEFMVVFETGERDAVVKTMDN
+ * .** ** ** : . . :: . * **:*: : :
+
+gi|2983285_168_335 LKTVIQNHTFYC-ED-KELKVSASFGILE-VNENFNSPEEILKEVDKKLYEAKKNPDHIA
+gi|2982975_168_338 IRKEVENTPICWQ-G-KEIRVTISVGVCSDVYNGLKSPEEYIKCADEKLYLAKRTGKNRV
+gi|2982791_257_426 LRRMIEQTPIELPNG-EKLRVTVSIGVS--TYRGHESLEELIKEADQALYEAKRKGKNRV
+gi|4589924_133_299 LRKIIEKMSFDY-GD-LSLKITSSFGVCE-VSEK-EDPFDTIKNADEKLYMAKMTGRNKT
+gi|1742448_283_456 IRSRVEKTKLKAANG-EDIALSLSIGAA--MFNGHPDYERLIQIADEALYIAKRRGRNRV
+gi|1119215_278_450 IRMHVSGSPFTVAHGREMLNVTISIGVSATAGE-GDTPEALLKRADEGVYQAKASGRNAV
+gi|4732058_286_453 LRAAIESAPFMLKHSGEALNVTASFGIASRIAS-VLTPGQLMKQADLALYEAKNTGRNRV
+gi|2983209_436_608 LRKIIEETPITLPNC-QTIRKTVSIGISEFPKDCEGKFWKCVKFADVALYKAKEEGRNRV
+gi|4982165_1033_1196 LRKAVMEDPEN------TYHITLSFGVAEYKN--GEDPFETIKRADEALYLAKILGKNSV
+gi|3860799_281_448 VRVKIEYMDFYIEGQIEPLKKTISIGVTEYKK--EESIESFIKRADKAMYEAKTTGKNKV
+gi|4378845_396_562 IRNE--FTKIVIKNGETELKITVSIGLFEITT--EESISIALDKADSLLYWAKNNGRNRV
+gi|1336656_24_194 IRSTVSDAPIVLTNG-DELSVTISLGAAHYPNN-TEQPGSLPILADQMLYKAKETGRNRV
+gi|1724005_184_354 IRSTVSDAPIVLTNG-DELSVTISLGAAHYPNN-TEQPGSLPILADQMLYKAKETGRNRV
+gi|1652841_792_962 LRQAIASMEVEYKGK-ELGTLTVSLGVACYPNH-GETMVNIIQAADRALYQAKAAGRNRV
+gi|4980635_289_450 IMENIRNFDWQKIFG-SEKKVTLSGGVA-FSKK-ESSPVELIEEADKKLYTAKRSGKDRY
+ : : * * .* :* ** .
+
+gi|2983285_168_335 ----
+gi|2982975_168_338 VF--
+gi|2982791_257_426 EVFR
+gi|4589924_133_299 VF--
+gi|1742448_283_456 ELW-
+gi|1119215_278_450 V---
+gi|4732058_286_453 V---
+gi|2983209_436_608 VRF-
+gi|4982165_1033_1196 V---
+gi|3860799_281_448 V---
+gi|4378845_396_562 E---
+gi|1336656_24_194 CF--
+gi|1724005_184_354 CF--
+gi|1652841_792_962 VM--
+gi|4980635_289_450 VI--
+
diff --git a/example/alnlist.dnd b/example/alnlist.dnd
new file mode 100644
index 0000000..cadd857
--- /dev/null
+++ b/example/alnlist.dnd
@@ -0,0 +1,4 @@
+(
+gi|2982975_168_338:0.31313,
+gi|4732058_286_453:0.29997,
+gi|1336656_24_194:0.28336);
diff --git a/example/ggdef1.aln b/example/ggdef1.aln
new file mode 100644
index 0000000..6aa0d3b
--- /dev/null
+++ b/example/ggdef1.aln
@@ -0,0 +1,23 @@
+CLUSTAL W (1.81) multiple sequence alignment
+
+
+gi|2983285_168_335 KELD----ALKKQAFIDYLTGLKNRRSIQKALEDYFKDYKNYGYPFSVIMMDLNNFKEIN
+gi|2982975_168_338 KQIRSFSKALKVDP----LTGLLNRRVLPYILRDVLELSLYTETPFSIAMVDIDNFKKIN
+gi|2982791_257_426 --KTLYERMALTDP----LTGLYNRRVFTEMAEKELAKAKRYGYNFSILMIDIDNFKKIN
+gi|4589924_133_299 K-------KMGENLIKDYLTGVYNKRYIDQRFSKEAHRNLKESIPTTVIMTDIDSFKKVN
+gi|1742448_283_456 -LRELFEEVSRHEVGMDVLTKLLNRRFLPTIFKREIAHANRTGTPLSVLIIDVDKFKEIN
+ : ** : *:* : :: : *::.**::*
+
+gi|2983285_168_335 DEYGHLVGDCILKGIGEILRKYLRAK-DAIGRYGGDEFLIILPGVKLEDAVNIARRLKTV
+gi|2982975_168_338 DTYGHLFGDKVLKEVAKIIKKNLRRS-DYVFRYGGEEFLILMPSTELKDAVRILEKIRKE
+gi|2982791_257_426 DTYGHDVGDLVLKKISEILKRNVRGA-DLVARFGGEEFIVMLSNTNLNGAVKKAEQLRRM
+gi|4589924_133_299 DTYGHLVGDKILRGFAKVLNNNIRENSDWIGRYGGEEFIIVLNNTNLKNGVKVAEKLRKI
+gi|1742448_283_456 DTWGHNTGDEILRKVSQAFYDNVRSS-DYVFRYGGDEFIIVLTEASENETLRTAERIRSR
+ * :** ** :*: ..: : :* * : *:**:**:::: .. : :. .:::
+
+gi|2983285_168_335 IQNHTFYC-EDKELKVSASFGILE-VNENFNSPEEILKEVDKKLYEAKKNPDHIA----
+gi|2982975_168_338 VENTPICWQ-GKEIRVTISVGVCSDVYNGLKSPEEYIKCADEKLYLAKRTGKNRVVF--
+gi|2982791_257_426 IEQTPIELPNGEKLRVTVSIGVS--TYRGHESLEELIKEADQALYEAKRKGKNRVEVFR
+gi|4589924_133_299 IEKMSFDY-GDLSLKITSSFGVCE-VSEK-EDPFDTIKNADEKLYMAKMTGRNKTVF--
+gi|1742448_283_456 VEKTKLKAANGEDIALSLSIGAA--MFNGHPDYERLIQIADEALYIAKRRGRNRVELW-
+ ::: : . .: :: *.* . . :: .*: ** ** : .
diff --git a/example/ggdef2.aln b/example/ggdef2.aln
new file mode 100644
index 0000000..d076feb
--- /dev/null
+++ b/example/ggdef2.aln
@@ -0,0 +1,26 @@
+CLUSTAL W (1.81) multiple sequence alignment
+
+
+gi|1119215_278_450 LRNNLDHSL-ELAVTDQLTGLHNRRYMTGQLDSLVKRATLGGDPVSALLIDIDFFKKIND
+gi|4732058_286_453 -----KQTI-ELAVTDPLTGLYNRRYLDNHLNVLFNRSMARGRPLSVLITDIDRFKHVND
+gi|2983209_436_608 K--LMEKLK-MQSYIDGLTGAFNRRFLEEIADKIVAQTLRRNSNLGILMIDVDFFKQVND
+gi|4982165_1033_1196 L---RERLYRDRSMKDPLTGVLSRWYFMERLEEEAYKSSRYKSPLSIIMCDADDFKKIND
+gi|3860799_281_448 L----EQSV-NLAAKDGLTGLFNRRYFDIHLKQMIEKANKENIKLYLLMCDIDNFKHVND
+gi|4378845_396_562 Q--ANKQLS-ELANTDVLTGLLNRRALMAKLSELHQDALLNENAISIAMLDVDHFKKIND
+ .: : * *** .* : . : : : * * **::**
+
+gi|1119215_278_450 TFGHDIGDEVLREFALRLASNVRAIDLPCRYGGEEFVVIMPDTALADALRIAERIRMHVS
+gi|4732058_286_453 TYGHDGGDEVLREFSSRVRSTIRGADLACRYGGEEFVVVMPDTSPEIAAAVAERLRAAIE
+gi|2983209_436_608 TYGHDAGDEVLRQIARVIKDNIRKADYLIRYGGEEFLVLLTDVKEGYAEKVAEKLRKIIE
+gi|4982165_1033_1196 QFGHVAGDKTLGWLGRKMKSVLRKSDLVGRYGGEEFIIALPGTSLEEAKIVAEKLRKAVM
+gi|3860799_281_448 TYGHQAGDKVLTTVSRILKNTLRVTDLIARFGGEEFTILLTDIDISKAIETAERVRVKIE
+gi|4378845_396_562 RYGHLVGDDVMRNAASIIRNNIRQEDFAGRYGGDEFIIAISA-SIEMSRHIAERIRNE--
+ :** **..: . : . :* * *:**:** : :. : **::*
+
+gi|1119215_278_450 GSPFTVAHGREMLNVTISIGVSATAGE-GDTPEALLKRADEGVYQAKASGRNAVV--
+gi|4732058_286_453 SAPFMLKHSGEALNVTASFGIASRIAS-VLTPGQLMKQADLALYEAKNTGRNRVV--
+gi|2983209_436_608 ETPITLPNC-QTIRKTVSIGISEFPKDCEGKFWKCVKFADVALYKAKEEGRNRVVRF
+gi|4982165_1033_1196 EDPEN------TYHITLSFGVAEYKN--GEDPFETIKRADEALYLAKILGKNSVV--
+gi|3860799_281_448 YMDFYIEGQIEPLKKTISIGVTEYKK--EESIESFIKRADKAMYEAKTTGKNKVV--
+gi|4378845_396_562 FTKIVIKNGETELKITVSIGLFEITT--EESISIALDKADSLLYWAKNNGRNRVE--
+ . * *:*: :. ** :* ** *:* *
diff --git a/example/ggdef3.aln b/example/ggdef3.aln
new file mode 100644
index 0000000..af5192b
--- /dev/null
+++ b/example/ggdef3.aln
@@ -0,0 +1,20 @@
+CLUSTAL W (1.81) multiple sequence alignment
+
+
+gi|1336656_24_194 L-FKQYKFQAHFDFLTGVYNRRKFEETTKALYQQAADTPHFQFALIYMDIDHFKTINDQY
+gi|1724005_184_354 L-FKQYKFQAHFDFLTGVYNRRKFEETTKALYQQAADTPHFQFALIYMDIDHFKTINDQY
+gi|1652841_792_962 KLRERLENQSIRDPLTGLFNRRYLEQFFLQEIGRAKRYNH-SIGVIMGDIDHFKQFNDQL
+gi|4980635_289_450 R---DYVIASETDGLTGLFNKRAIMRFLEEVLRSEKN-----IAVAMMDIDDFKKINDTF
+ : * ***::*:* : . :.: ***.** :**
+
+gi|1336656_24_194 GHHEGDQVLKELGLRLKQTIRNTDPAARIGGEEFAVLLPNCSLDKAARIAERIRSTVSDA
+gi|1724005_184_354 GHHEGDQVLKELGLRLKQTIRNTDPAARIGGEEFAVLLPNCSLDKAARIAERIRSTVSDA
+gi|1652841_792_962 GHDAGDHVLKTIGRILQSNIRGSDIACRYGGEEMTIVLPQTSLEDTLVKAESLRQAIASM
+gi|4980635_289_450 GHPVGDEVLRVVANILRETVKIGKVG-RYGGEEFMVVFETGERDAVVKTMDNIMENIRNF
+ ** **.**: :. *:..:: . . * ****: ::: . : . : : . : .
+
+gi|1336656_24_194 PIVLTNGDELSVTISLGAAHYPNNTEQPGSLPILADQMLYKAKETGRNRVCF
+gi|1724005_184_354 PIVLTNGDELSVTISLGAAHYPNNTEQPGSLPILADQMLYKAKETGRNRVCF
+gi|1652841_792_962 EVEYKGKELGTLTVSLGVACYPNHGETMVNIIQAADRALYQAKAAGRNRVVM
+gi|4980635_289_450 DWQKIFGSEKKVTLSGGVA-FSKKESSPVELIEEADKKLYTAKRSGKDRYVI
+ . .:*:* *.* :.:: . .: **: ** ** :*::* :
diff --git a/example/pcma_command b/example/pcma_command
new file mode 100644
index 0000000..8ffd486
--- /dev/null
+++ b/example/pcma_command
@@ -0,0 +1,7 @@
+Example 1: to align a set of fasta format sequences:
+ command: pcma 1aboA_ref2.fa -ave_grp_id=50 -outfile=1aboA_ref2.pcma50.aln
+
+Example 2: to align a list of alignments, alnlist contains the names of
+ the input alignments. The first line of alnlist begins with
+ a '@' character.
+ command: pcma alnlist
diff --git a/gcgcheck.c b/gcgcheck.c
new file mode 100644
index 0000000..7d4b60c
--- /dev/null
+++ b/gcgcheck.c
@@ -0,0 +1,15 @@
+#include <ctype.h> /* because of toupper() */
+int SeqGCGCheckSum(char *seq, int len);
+
+int SeqGCGCheckSum(char *seq, int len)
+{
+ int i;
+ long check;
+
+ for( i=0, check=0; i< len; i++,seq++)
+ check += ((i % 57)+1) * toupper(*seq);
+
+ return(check % 10000);
+}
+
+
diff --git a/general.h b/general.h
new file mode 100644
index 0000000..c09f750
--- /dev/null
+++ b/general.h
@@ -0,0 +1,50 @@
+/* General purpose header file - rf 12/90 */
+
+#ifndef _H_general
+#define _H_general
+
+
+
+/* Macintosh specific */
+#ifdef MAC /* rf 12/9/94 */
+
+#define const /* THINK C doesn't know about these identifiers */
+#define signed
+#define volatile
+#define int long
+#ifndef Boolean
+#define Boolean char
+#endif
+#define pint short /* cast ints in printf statements as pint */
+#define sint int /* cast ints for sequence lengths */
+#define lint int /* cast ints for profile scores */
+
+#else /* not Macintoshs */
+
+#define pint int /* cast ints in printf statements as pint */
+#define sint int /* cast ints for sequence lengths */
+#define lint int /* cast ints for profile scores */
+#ifndef Boolean
+#define Boolean char
+#endif
+
+#endif /* ifdef MAC */
+
+/* definitions for all machines */
+
+#undef TRUE /* Boolean values; first undef them, just in case */
+#undef FALSE
+#define TRUE 1
+#define FALSE 0
+
+#define EOS '\0' /* End-Of-String */
+#define MAXLINE 512 /* Max. line length */
+
+
+#ifdef VMS
+#define signed
+#endif
+
+
+#endif /* ifndef _H_general */
+
diff --git a/interface.c b/interface.c
new file mode 100644
index 0000000..8c72212
--- /dev/null
+++ b/interface.c
@@ -0,0 +1,4124 @@
+/* command line interface for Clustal W */
+/* DES was here MARCH. 1994 */
+/* DES was here SEPT. 1994 */
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include "pcma.h"
+#include "param.h"
+
+/*
+* Prototypes
+*/
+
+#ifdef UNIX
+FILE *open_path(char *);
+#endif
+
+
+static sint check_param(char **args,char *params[], char *param_arg[]);
+static void set_optional_param(void);
+static sint find_match(char *probe, char *list[], sint n);
+static void show_aln(void);
+static void create_parameter_output(void);
+static void reset_align(void);
+static void reset_prf1(void);
+static void reset_prf2(void);
+static void calc_gap_penalty_mask(int prf_length,char *struct_mask,char *gap_mask);
+void print_sec_struct_mask(int prf_length,char *mask,char *struct_mask);
+/*
+* Global variables
+*/
+
+extern sint max_names;
+
+extern Boolean interactive;
+
+extern double **tmat;
+extern float gap_open, gap_extend;
+extern float dna_gap_open, dna_gap_extend;
+extern float prot_gap_open, prot_gap_extend;
+extern float pw_go_penalty, pw_ge_penalty;
+extern float dna_pw_go_penalty, dna_pw_ge_penalty;
+extern float prot_pw_go_penalty, prot_pw_ge_penalty;
+extern char revision_level[];
+extern sint wind_gap,ktup,window,signif;
+extern sint dna_wind_gap, dna_ktup, dna_window, dna_signif;
+extern sint prot_wind_gap,prot_ktup,prot_window,prot_signif;
+extern sint boot_ntrials; /* number of bootstrap trials */
+extern sint nseqs;
+extern sint new_seq;
+extern sint *seqlen_array;
+extern sint divergence_cutoff;
+extern sint debug;
+extern Boolean no_weights;
+extern Boolean neg_matrix;
+extern Boolean quick_pairalign;
+extern Boolean reset_alignments_new; /* DES */
+extern Boolean reset_alignments_all; /* DES */
+extern sint gap_dist;
+extern Boolean no_hyd_penalties, no_pref_penalties;
+extern sint max_aa;
+extern sint gap_pos1, gap_pos2;
+extern sint max_aln_length;
+extern sint *output_index, output_order;
+extern sint profile_no;
+extern short usermat[], pw_usermat[];
+extern short aa_xref[], pw_aa_xref[];
+extern short userdnamat[], pw_userdnamat[];
+extern short dna_xref[], pw_dna_xref[];
+extern sint *seq_weight;
+
+extern Boolean lowercase; /* Flag for GDE output - set on comm. line*/
+extern Boolean cl_seq_numbers;
+extern Boolean output_clustal, output_nbrf, output_phylip, output_gcg, output_gde, output_nexus;
+extern Boolean output_tree_clustal, output_tree_phylip, output_tree_distances, output_tree_nexus;
+extern sint bootstrap_format;
+extern Boolean tossgaps, kimura;
+extern Boolean percent;
+extern Boolean explicit_dnaflag; /* Explicit setting of sequence type on comm.line*/
+extern Boolean usemenu;
+extern Boolean showaln, save_parameters;
+extern Boolean dnaflag;
+extern float transition_weight;
+extern unsigned sint boot_ran_seed;
+
+
+extern FILE *tree;
+extern FILE *clustal_outfile, *gcg_outfile, *nbrf_outfile, *phylip_outfile, *nexus_outfile;
+extern FILE *gde_outfile;
+
+extern char hyd_residues[];
+extern char *amino_acid_codes;
+extern char **args;
+extern char seqname[];
+
+extern char **seq_array;
+extern char **names, **titles;
+
+extern char *gap_penalty_mask1,*gap_penalty_mask2;
+extern char *sec_struct_mask1,*sec_struct_mask2;
+extern sint struct_penalties,struct_penalties1,struct_penalties2;
+extern sint output_struct_penalties;
+extern Boolean use_ss1, use_ss2;
+extern char *ss_name1,*ss_name2;
+
+
+char *ss_name = NULL;
+char *sec_struct_mask = NULL;
+char *gap_penalty_mask = NULL;
+
+char profile1_name[FILENAMELEN+1];
+char profile2_name[FILENAMELEN+1];
+
+Boolean empty;
+Boolean profile1_empty, profile2_empty; /* whether or not profiles */
+
+char outfile_name[FILENAMELEN+1]="";
+
+static char clustal_outname[FILENAMELEN+1], gcg_outname[FILENAMELEN+1];
+static char phylip_outname[FILENAMELEN+1],nbrf_outname[FILENAMELEN+1];
+static char gde_outname[FILENAMELEN+1],nexus_outname[FILENAMELEN+1];
+
+char clustal_tree_name[FILENAMELEN+1]="";
+char dist_tree_name[FILENAMELEN+1]="";
+char phylip_tree_name[FILENAMELEN+1]="";
+char nexus_tree_name[FILENAMELEN+1]="";
+char p1_tree_name[FILENAMELEN+1]="";
+char p2_tree_name[FILENAMELEN+1]="";
+
+static char *params[MAXARGS];
+static char *param_arg[MAXARGS];
+
+static char *cmd_line_type[] = {
+ " ",
+ "=n ",
+ "=f ",
+ "=string ",
+ "=filename ",
+ ""};
+
+static sint numparams;
+static Boolean check_tree = TRUE;
+
+sint profile1_nseqs; /* have been filled; the no. of seqs in prof 1*/
+Boolean use_tree_file = FALSE,new_tree_file = FALSE;
+Boolean use_tree1_file = FALSE, use_tree2_file = FALSE;
+Boolean new_tree1_file = FALSE, new_tree2_file = FALSE;
+
+static char *lin2;
+
+MatMenu dnamatrix_menu = {3,
+ "IUB","iub",
+ "CLUSTALW(1.6)","clustalw",
+ "User defined",""
+ };
+
+MatMenu matrix_menu = {5,
+ "BLOSUM series","blosum",
+ "PAM series","pam",
+ "Gonnet series","gonnet",
+ "Identity matrix","id",
+ "User defined",""
+ };
+
+MatMenu pw_matrix_menu = {5,
+ "BLOSUM 30","blosum",
+ "PAM 350","pam",
+ "Gonnet 250","gonnet",
+ "Identity matrix","id",
+ "User defined",""
+ };
+
+void init_interface(void)
+{
+ empty=TRUE;
+
+ profile1_empty = TRUE; /* */
+ profile2_empty = TRUE; /* */
+
+ lin2 = (char *)ckalloc( (MAXLINE+1) * sizeof (char) );
+
+}
+
+
+
+
+static sint check_param(char **args,char *params[], char *param_arg[])
+{
+
+/*
+#ifndef MAC
+ char *strtok(char *s1, const char *s2);
+#endif
+*/
+ sint len,i,j,k,s,n,match[MAXARGS];
+ Boolean name1 = FALSE;
+
+ for(i=0;i<MAXARGS;i++) {
+ match[i] = 0;
+ }
+
+ if(args[0]==NULL) return;
+
+ params[0]=(char *)ckalloc((strlen(args[0])+1)*sizeof(char));
+ if (args[0][0]!=COMMANDSEP)
+ {
+ name1 = TRUE;
+ strcpy(params[0],args[0]);
+ fprintf(stdout, "%s\n", params[0]);
+ }
+ else
+ strcpy(params[0],&args[0][1]);
+ params[0][strlen(args[0])] = '\0';
+ /* JP */
+ /*
+ fprintf(stdout, "%s %d\n", params[0], strlen(params[0]));
+ */
+
+ for (i=1;i<MAXARGS;i++) {
+ if(args[i]==NULL) break;
+ params[i]=(char *)ckalloc(strlen(args[i])*sizeof(char));
+ for(j=0;j<strlen(args[i])-1;j++)
+ if(isprint(args[i][j+1])) params[i][j]=args[i][j+1];
+ }
+ if (i==MAXARGS) {
+ fprintf(stdout,"Error: too many command line arguments\n");
+ return(-1);
+ }
+/*
+ special case - first parameter is input filename
+*/
+ s = 0;
+ if(name1 == TRUE) {
+ strcpy(seqname, params[0]);
+/* JULIE
+ convert to lower case now
+*/
+#ifndef UNIX
+ for(k=0;k<(sint)strlen(params[0]);++k) seqname[k]=tolower(params[0][k]);
+#else
+ for(k=0;k<(sint)strlen(params[0]);++k) seqname[k]=params[0][k];
+#endif
+ s++;
+ }
+
+ n = i;
+ for (i=s;i<n;i++) {
+ param_arg[i] = NULL;
+ len = (sint)strlen(params[i]);
+ for(j=0; j<len; j++)
+ if(params[i][j] == '=') {
+ param_arg[i] = (char *)ckalloc((len-j) * sizeof(char));
+ strncpy(param_arg[i],¶ms[i][j+1],len-j-1);
+ params[i][j] = EOS;
+/* JULIE
+ convert keywords to lower case now
+*/
+ for(k=0;k<j;++k) params[i][k]=tolower(params[i][k]);
+ param_arg[i][len-j-1] = EOS;
+ break;
+ }
+ }
+
+/*
+ for each parameter given on the command line, first search the list of recognised optional
+ parameters....
+*/
+
+ for (i=0;i<n;i++) {
+ if ((i==0) && (name1 == TRUE)) continue;
+ j = 0;
+ match[i] = -1;
+ for(;;) {
+ if (cmd_line_para[j].str[0] == '\0') break;
+ if (!strcmp(params[i],cmd_line_para[j].str)) {
+ match[i] = j;
+ *cmd_line_para[match[i]].flag = i;
+ if ((cmd_line_para[match[i]].type != NOARG) &&
+ (param_arg[i] == NULL)) {
+ fprintf(stdout,
+ "Error: parameter required for /%s\n",params[i]);
+ print_help();
+ exit(1);
+ }
+/* JULIE
+ convert parameters to lower case now, unless the parameter is a filename
+*/
+#ifdef UNIX
+ else if (cmd_line_para[match[i]].type != FILARG
+ && param_arg[i] != NULL)
+#endif
+ if (param_arg[i]!=0)
+ {
+ for(k=0;k<strlen(param_arg[i]);++k)
+param_arg[i][k]=tolower(param_arg[i][k]);
+ }
+ break;
+ }
+ j++;
+ }
+ }
+/*
+ ....then the list of recognised input files,....
+*/
+ for (i=0;i<n;i++) {
+ if ((i==0) && (name1 == TRUE)) continue;
+ if (match[i] != -1) continue;
+ j = 0;
+ for(;;) {
+ if (cmd_line_file[j].str[0] == '\0') break;
+ if (!strcmp(params[i],cmd_line_file[j].str)) {
+ match[i] = j;
+ *cmd_line_file[match[i]].flag = i;
+ if ((cmd_line_file[match[i]].type != NOARG) &&
+ (param_arg[i] == NULL)) {
+ fprintf(stdout,
+ "Error: parameter required for /%s\n",params[i]);
+ print_help();
+ exit(1);
+ }
+ break;
+ }
+ j++;
+ }
+ }
+/*
+ ....and finally the recognised verbs.
+*/
+ for (i=0;i<n;i++) {
+ if ((i==0) && (name1 == TRUE)) continue;
+ if (match[i] != -1) continue;
+ j = 0;
+ for(;;) {
+ if (cmd_line_verb[j].str[0] == '\0') break;
+ if (!strcmp(params[i],cmd_line_verb[j].str)) {
+ match[i] = j;
+ *cmd_line_verb[match[i]].flag = i;
+ if ((cmd_line_verb[match[i]].type != NOARG) &&
+ (param_arg[i] == NULL)) {
+ fprintf(stdout,
+ "Error: parameter required for /%s\n",params[i]);
+ print_help();
+ exit(1);
+ }
+ break;
+ }
+ j++;
+ }
+ }
+
+/*
+ check for any unrecognised parameters.
+*/
+ for (i=0;i<n;i++) {
+ if (match[i] == -1) {
+ fprintf(stdout,
+ "Error: unknown option %c%s\n",COMMANDSEP,params[i]);
+ fprintf(stdout,
+ " use -options to see the parameters\n");
+ exit(1);
+ }
+ }
+ return(n);
+}
+
+static void set_optional_param(void)
+{
+ int i,temp;
+ int c;
+ float ftemp;
+ char tstr[100];
+
+/****************************************************************************/
+/* look for parameters on command line e.g. gap penalties, k-tuple etc. */
+/****************************************************************************/
+
+/*** ? /score=percent or /score=absolute */
+ if(setscore != -1)
+ if(strlen(param_arg[setscore]) > 0) {
+ temp = find_match(param_arg[setscore],score_arg,2);
+ if(temp == 0)
+ percent = TRUE;
+ else if(temp == 1)
+ percent = FALSE;
+ else
+ fprintf(stdout,"\nUnknown SCORE type: %s\n",
+ param_arg[setscore]);
+ }
+
+/*** ? /seed=n */
+ if(setseed != -1) {
+ temp = 0;
+ if(strlen(param_arg[setseed]) > 0)
+ if (sscanf(param_arg[setseed],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /seed (must be integer)\n");
+ temp = 0;
+ }
+ if(temp > 0) boot_ran_seed = temp;
+ fprintf(stdout,"\ntemp = %d; seed = %u;\n",(pint)temp,boot_ran_seed);
+ }
+
+
+
+
+/*** ? /output=PIR, GCG, GDE or PHYLIP */
+ if(setoutput != -1)
+ if(strlen(param_arg[setoutput]) > 0) {
+ temp = find_match(param_arg[setoutput],output_arg,5);
+ if (temp >= 0 && temp <= 3) {
+ output_clustal = FALSE;
+ output_gcg = FALSE;
+ output_phylip = FALSE;
+ output_nbrf = FALSE;
+ output_gde = FALSE;
+ output_nexus = FALSE;
+ }
+ switch (temp) {
+ case 0: /* GCG */
+ output_gcg = TRUE;
+ break;
+ case 1: /* GDE */
+ output_gde = TRUE;
+ break;
+ case 2: /* PIR */
+ output_nbrf = TRUE;
+ break;
+ case 3: /* PHYLIP */
+ output_phylip = TRUE;
+ break;
+ case 4: /* PHYLIP */
+ output_nexus = TRUE;
+ break;
+ default:
+ fprintf(stdout,"\nUnknown OUTPUT type: %s\n",
+ param_arg[setoutput]);
+ }
+ }
+
+/*** ? /outputtree=NJ or PHYLIP or DIST or NEXUS */
+ if(setoutputtree != -1)
+ if(strlen(param_arg[setoutputtree]) > 0) {
+ temp = find_match(param_arg[setoutputtree],outputtree_arg,4);
+ switch (temp) {
+ case 0: /* NJ */
+ output_tree_clustal = TRUE;
+ break;
+ case 1: /* PHYLIP */
+ output_tree_phylip = TRUE;
+ break;
+ case 2: /* DIST */
+ output_tree_distances = TRUE;
+ break;
+ case 3: /* NEXUS */
+ output_tree_nexus = TRUE;
+ break;
+ default:
+ fprintf(stdout,"\nUnknown OUTPUT TREE type: %s\n",
+ param_arg[setoutputtree]);
+ }
+ }
+
+/*** ? /profile (sets type of second input file to profile) */
+ if(setprofile != -1)
+ profile_type = PROFILE;
+
+/*** ? /sequences (sets type of second input file to list of sequences) */
+ if(setsequences != -1)
+ profile_type = SEQUENCE;
+
+
+
+/*** ? /ktuple=n */
+ if(setktuple != -1) {
+ temp = 0;
+ if(strlen(param_arg[setktuple]) > 0)
+ if (sscanf(param_arg[setktuple],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /ktuple (must be integer)\n");
+ temp = 0;
+ }
+ if(temp > 0) {
+ if(dnaflag) {
+ if(temp <= 4) {
+ ktup = temp;
+ dna_ktup = ktup;
+ wind_gap = ktup + 4;
+ dna_wind_gap = wind_gap;
+ }
+ }
+ else {
+ if(temp <= 2) {
+ ktup = temp;
+ prot_ktup = ktup;
+ wind_gap = ktup + 3;
+ prot_wind_gap = wind_gap;
+ }
+ }
+ }
+ }
+
+/*** ? /pairgap=n */
+ if(setpairgap != -1) {
+ temp = 0;
+ if(strlen(param_arg[setpairgap]) > 0)
+ if (sscanf(param_arg[setpairgap],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /pairgap (must be integer)\n");
+ temp = 0;
+ }
+ if(temp > 0)
+ if(dnaflag) {
+ if(temp > ktup) {
+ wind_gap = temp;
+ dna_wind_gap = wind_gap;
+ }
+ }
+ else {
+ if(temp > ktup) {
+ wind_gap = temp;
+ prot_wind_gap = wind_gap;
+ }
+ }
+ }
+
+
+/*** ? /topdiags=n */
+ if(settopdiags != -1) {
+ temp = 0;
+ if(strlen(param_arg[settopdiags]) > 0)
+ if (sscanf(param_arg[settopdiags],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /topdiags (must be integer)\n");
+ temp = 0;
+ }
+ if(temp > 0)
+ if(dnaflag) {
+ if(temp > ktup) {
+ signif = temp;
+ dna_signif = signif;
+ }
+ }
+ else {
+ if(temp > ktup) {
+ signif = temp;
+ prot_signif = signif;
+ }
+ }
+ }
+
+
+/*** ? /window=n */
+ if(setwindow != -1) {
+ temp = 0;
+ if(strlen(param_arg[setwindow]) > 0)
+ if (sscanf(param_arg[setwindow],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /window (must be integer)\n");
+ temp = 0;
+ }
+ if(temp > 0)
+ if(dnaflag) {
+ if(temp > ktup) {
+ window = temp;
+ dna_window = window;
+ }
+ }
+ else {
+ if(temp > ktup) {
+ window = temp;
+ prot_window = window;
+ }
+ }
+ }
+
+/*** ? /kimura */
+ if(setkimura != -1)
+ kimura = TRUE;
+
+/*** ? /tossgaps */
+ if(settossgaps != -1)
+ tossgaps = TRUE;
+
+
+/*** ? /negative */
+ if(setnegative != -1)
+ neg_matrix = TRUE;
+
+/*** ? /noweights */
+ if(setnoweights!= -1)
+ no_weights = TRUE;
+
+
+/*** ? /pwmatrix=ID (user's file) */
+ if(setpwmatrix != -1)
+ {
+ temp=strlen(param_arg[setpwmatrix]);
+ if(temp > 0) {
+ for(i=0;i<temp;i++)
+ if (isupper(param_arg[setpwmatrix][i]))
+ tstr[i]=tolower(param_arg[setpwmatrix][i]);
+ else
+ tstr[i]=param_arg[setpwmatrix][i];
+ tstr[i]='\0';
+ if (strcmp(tstr,"blosum")==0) {
+ strcpy(pw_mtrxname, tstr);
+ pw_matnum = 1;
+ }
+ else if (strcmp(tstr,"pam")==0) {
+ strcpy(pw_mtrxname, tstr);
+ pw_matnum = 2;
+ }
+ else if (strcmp(tstr,"gonnet")==0) {
+ strcpy(pw_mtrxname, tstr);
+ pw_matnum = 3;
+ }
+ else if (strcmp(tstr,"id")==0) {
+ strcpy(pw_mtrxname, tstr);
+ pw_matnum = 4;
+ }
+ else {
+ if(user_mat(param_arg[setpwmatrix], pw_usermat, pw_aa_xref))
+ {
+ strcpy(pw_mtrxname,param_arg[setpwmatrix]);
+ strcpy(pw_usermtrxname,param_arg[setpwmatrix]);
+ pw_matnum=5;
+ }
+ else exit(1);
+ }
+
+ }
+ }
+
+/*** ? /matrix=ID (user's file) */
+ if(setmatrix != -1)
+ {
+ temp=strlen(param_arg[setmatrix]);
+ if(temp > 0) {
+ for(i=0;i<temp;i++)
+ if (isupper(param_arg[setmatrix][i]))
+ tstr[i]=tolower(param_arg[setmatrix][i]);
+ else
+ tstr[i]=param_arg[setmatrix][i];
+ tstr[i]='\0';
+ if (strcmp(tstr,"blosum")==0) {
+ strcpy(mtrxname, tstr);
+ matnum = 1;
+ }
+ else if (strcmp(tstr,"pam")==0) {
+ strcpy(mtrxname, tstr);
+ matnum = 2;
+ }
+ else if (strcmp(tstr,"gonnet")==0) {
+ strcpy(mtrxname, tstr);
+ matnum = 3;
+ }
+ else if (strcmp(tstr,"id")==0) {
+ strcpy(mtrxname, tstr);
+ matnum = 4;
+ }
+ else {
+ if(user_mat_series(param_arg[setmatrix], usermat, aa_xref))
+ {
+ strcpy(mtrxname,param_arg[setmatrix]);
+ strcpy(usermtrxname,param_arg[setmatrix]);
+ matnum=5;
+ }
+ else exit(1);
+ }
+
+ }
+ }
+
+/*** ? /pwdnamatrix=ID (user's file) */
+ if(setpwdnamatrix != -1)
+ {
+ temp=strlen(param_arg[setpwdnamatrix]);
+ if(temp > 0) {
+ for(i=0;i<temp;i++)
+ if (isupper(param_arg[setpwdnamatrix][i]))
+ tstr[i]=tolower(param_arg[setpwdnamatrix][i]);
+ else
+ tstr[i]=param_arg[setpwdnamatrix][i];
+ tstr[i]='\0';
+ if (strcmp(tstr,"iub")==0) {
+ strcpy(pw_dnamtrxname, tstr);
+ pw_dnamatnum = 1;
+ }
+ else if (strcmp(tstr,"clustalw")==0) {
+ strcpy(pw_dnamtrxname, tstr);
+ pw_dnamatnum = 2;
+ }
+ else {
+ if(user_mat(param_arg[setpwdnamatrix], pw_userdnamat, pw_dna_xref))
+ {
+ strcpy(pw_dnamtrxname,param_arg[setpwdnamatrix]);
+ strcpy(pw_dnausermtrxname,param_arg[setpwdnamatrix]);
+ pw_dnamatnum=3;
+ }
+ else exit(1);
+ }
+
+ }
+ }
+
+/*** ? /matrix=ID (user's file) */
+ if(setdnamatrix != -1)
+ {
+ temp=strlen(param_arg[setdnamatrix]);
+ if(temp > 0) {
+ for(i=0;i<temp;i++)
+ if (isupper(param_arg[setdnamatrix][i]))
+ tstr[i]=tolower(param_arg[setdnamatrix][i]);
+ else
+ tstr[i]=param_arg[setdnamatrix][i];
+ tstr[i]='\0';
+ if (strcmp(tstr,"iub")==0) {
+ strcpy(dnamtrxname, tstr);
+ dnamatnum = 1;
+ }
+ else if (strcmp(tstr,"clustalw")==0) {
+ strcpy(dnamtrxname, tstr);
+ dnamatnum = 2;
+ }
+ else {
+ if(user_mat(param_arg[setdnamatrix], userdnamat, dna_xref))
+ {
+ strcpy(dnamtrxname,param_arg[setdnamatrix]);
+ strcpy(dnausermtrxname,param_arg[setdnamatrix]);
+ dnamatnum=3;
+ }
+ else exit(1);
+ }
+
+ }
+ }
+/*** ? /maxdiv= n */
+ if(setmaxdiv != -1) {
+ temp = 0;
+ if(strlen(param_arg[setmaxdiv]) > 0)
+ if (sscanf(param_arg[setmaxdiv],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /maxdiv (must be integer)\n");
+ temp = 0;
+ }
+ /* if (temp >= 0) divergence_cutoff = temp; */ /* JP: donot change the divergence cutoff */
+ }
+
+/*** ? /gapdist= n */
+ if(setgapdist != -1) {
+ temp = 0;
+ if(strlen(param_arg[setgapdist]) > 0)
+ if (sscanf(param_arg[setgapdist],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /gapdist (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 0)
+ gap_dist = temp;
+ }
+
+/*** ? /debug= n */
+ if(setdebug != -1) {
+ temp = 0;
+ if(strlen(param_arg[setdebug]) > 0)
+ if (sscanf(param_arg[setdebug],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /debug (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 0)
+ debug = temp;
+ }
+
+/*** ? /outfile= (user's file) */
+ if(setoutfile != -1)
+ if(strlen(param_arg[setoutfile]) > 0) {
+ strcpy(outfile_name, param_arg[setoutfile]);
+ }
+
+/*** ? /case= lower/upper */
+ if(setcase != -1)
+ if(strlen(param_arg[setcase]) > 0) {
+ temp = find_match(param_arg[setcase],case_arg,2);
+ if(temp == 0) {
+ lowercase = TRUE;
+ }
+ else if(temp == 1) {
+ lowercase = FALSE;
+ }
+ else
+ fprintf(stdout,"\nUnknown case %s\n",
+ param_arg[setcase]);
+ }
+
+/*** ? /seqnos=off/on */
+ if(setseqno != -1)
+ if(strlen(param_arg[setseqno]) > 0) {
+ temp = find_match(param_arg[setseqno],seqno_arg,2);
+ if(temp == 0) {
+ cl_seq_numbers = FALSE;
+ }
+ else if(temp == 1) {
+ cl_seq_numbers = TRUE;
+ }
+ else
+ fprintf(stdout,"\nUnknown SEQNO option %s\n",
+ param_arg[setseqno]);
+ }
+
+
+/*** ? /gapopen=n */
+ if(setgapopen != -1) {
+ ftemp = 0.0;
+ if(strlen(param_arg[setgapopen]) > 0)
+ if (sscanf(param_arg[setgapopen],"%f",&ftemp)!=1) {
+ fprintf(stdout,"Bad option for /gapopen (must be real number)\n");
+ ftemp = 0.0;
+ }
+ if(ftemp >= 0.0)
+ if(dnaflag) {
+ gap_open = ftemp;
+ dna_gap_open = gap_open;
+ }
+ else {
+ gap_open = ftemp;
+ prot_gap_open = gap_open;
+ }
+ }
+
+
+/*** ? /gapext=n */
+ if(setgapext != -1) {
+ ftemp = 0.0;
+ if(strlen(param_arg[setgapext]) > 0)
+ if (sscanf(param_arg[setgapext],"%f",&ftemp)!=1) {
+ fprintf(stdout,"Bad option for /gapext (must be real number)\n");
+ ftemp = 0.0;
+ }
+ if(ftemp >= 0)
+ if(dnaflag) {
+ gap_extend = ftemp;
+ dna_gap_extend = gap_extend;
+ }
+ else {
+ gap_extend = ftemp;
+ prot_gap_extend = gap_extend;
+ }
+ }
+
+/*** ? /transweight=n*/
+ if(settransweight != -1) {
+ ftemp = 0.0;
+ if(strlen(param_arg[settransweight]) > 0)
+ if (sscanf(param_arg[settransweight],"%f",&ftemp)!=1) {
+ fprintf(stdout,"Bad option for /transweight (must be real number)\n");
+ ftemp = 0.0;
+ }
+ transition_weight=ftemp;
+ }
+
+/*** ? /pwgapopen=n */
+ if(setpwgapopen != -1) {
+ ftemp = 0.0;
+ if(strlen(param_arg[setpwgapopen]) > 0)
+ if (sscanf(param_arg[setpwgapopen],"%f",&ftemp)!=1) {
+ fprintf(stdout,"Bad option for /pwgapopen (must be real number)\n");
+ ftemp = 0.0;
+ }
+ if(ftemp >= 0.0)
+ if(dnaflag) {
+ pw_go_penalty = ftemp;
+ dna_pw_go_penalty = pw_go_penalty;
+ }
+ else {
+ pw_go_penalty = ftemp;
+ prot_pw_go_penalty = pw_go_penalty;
+ }
+ }
+
+
+/*** ? /gapext=n */
+ if(setpwgapext != -1) {
+ ftemp = 0.0;
+ if(strlen(param_arg[setpwgapext]) > 0)
+ if (sscanf(param_arg[setpwgapext],"%f",&ftemp)!=1) {
+ fprintf(stdout,"Bad option for /pwgapext (must be real number)\n");
+ ftemp = 0.0;
+ }
+ if(ftemp >= 0)
+ if(dnaflag) {
+ pw_ge_penalty = ftemp;
+ dna_pw_ge_penalty = pw_ge_penalty;
+ }
+ else {
+ pw_ge_penalty = ftemp;
+ prot_pw_ge_penalty = pw_ge_penalty;
+ }
+ }
+
+
+
+/*** ? /outorder=n */
+ if(setoutorder != -1) {
+ if(strlen(param_arg[setoutorder]) > 0)
+ temp = find_match(param_arg[setoutorder],outorder_arg,2);
+ if(temp == 0) {
+ output_order = INPUT;
+ }
+ else if(temp == 1) {
+ output_order = ALIGNED;
+ }
+ else
+ fprintf(stdout,"\nUnknown OUTPUT ORDER type %s\n",
+ param_arg[setoutorder]);
+ }
+
+/*** ? /bootlabels=n */
+ if(setbootlabels != -1) {
+ if(strlen(param_arg[setbootlabels]) > 0)
+ temp = find_match(param_arg[setbootlabels],bootlabels_arg,2);
+ if(temp == 0) {
+ bootstrap_format = BS_NODE_LABELS;
+ }
+ else if(temp == 1) {
+ bootstrap_format = BS_BRANCH_LABELS;
+ }
+ else
+ fprintf(stdout,"\nUnknown bootlabels type %s\n",
+ param_arg[setoutorder]);
+ }
+
+/*** ? /endgaps */
+ if(setuseendgaps != -1)
+ use_endgaps = FALSE;
+
+/*** ? /nopgap */
+ if(setnopgap != -1)
+ no_pref_penalties = TRUE;
+
+/*** ? /nohgap */
+ if(setnohgap != -1)
+ no_hyd_penalties = TRUE;
+
+/*** ? /novgap */
+ if(setnovgap != -1)
+ no_var_penalties = FALSE;
+
+/*** ? /hgapresidues="string" */
+ if(sethgapres != -1)
+ if(strlen(param_arg[sethgapres]) > 0) {
+ for (i=0;i<strlen(hyd_residues) && i<26;i++) {
+ c = param_arg[sethgapres][i];
+ if (isalpha(c))
+ hyd_residues[i] = (char)toupper(c);
+ else
+ break;
+ }
+ }
+
+
+/*** ? /nosecstr1 */
+ if(setsecstr1 != -1)
+ use_ss1 = FALSE;
+
+/*** ? /nosecstr2 */
+ if(setsecstr2 != -1)
+ use_ss2 = FALSE;
+
+/*** ? /secstroutput */
+ if(setsecstroutput != -1)
+ if(strlen(param_arg[setsecstroutput]) > 0) {
+ temp = find_match(param_arg[setsecstroutput],outputsecstr_arg,4);
+ if(temp >= 0 && temp <= 3)
+ output_struct_penalties = temp;
+ else
+ fprintf(stdout,"\nUnknown case %s\n",
+ param_arg[setsecstroutput]);
+ }
+
+
+/*** ? /helixgap= n */
+ if(sethelixgap != -1) {
+ temp = 0;
+ if(strlen(param_arg[sethelixgap]) > 0)
+ if (sscanf(param_arg[sethelixgap],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /helixgap (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 1 && temp <= 9)
+ helix_penalty = temp;
+ }
+
+/*** ? /strandgap= n */
+ if(setstrandgap != -1) {
+ temp = 0;
+ if(strlen(param_arg[setstrandgap]) > 0)
+ if (sscanf(param_arg[setstrandgap],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /strandgap (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 1 && temp <= 9)
+ strand_penalty = temp;
+ }
+
+/*** ? /loopgap= n */
+ if(setloopgap != -1) {
+ temp = 0;
+ if(strlen(param_arg[setloopgap]) > 0)
+ if (sscanf(param_arg[setloopgap],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /loopgap (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 1 && temp <= 9)
+ loop_penalty = temp;
+ }
+
+/*** ? /terminalgap= n */
+ if(setterminalgap != -1) {
+ temp = 0;
+ if(strlen(param_arg[setterminalgap]) > 0)
+ if (sscanf(param_arg[setterminalgap],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /terminalgap (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 1 && temp <= 9) {
+ helix_end_penalty = temp;
+ strand_end_penalty = temp;
+ }
+ }
+
+/*** ? /helixendin= n */
+ if(sethelixendin != -1) {
+ temp = 0;
+ if(strlen(param_arg[sethelixendin]) > 0)
+ if (sscanf(param_arg[sethelixendin],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /helixendin (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 0 && temp <= 3)
+ helix_end_minus = temp;
+ }
+
+/*** ? /helixendout= n */
+ if(sethelixendout != -1) {
+ temp = 0;
+ if(strlen(param_arg[sethelixendout]) > 0)
+ if (sscanf(param_arg[sethelixendout],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /helixendout (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 0 && temp <= 3)
+ helix_end_plus = temp;
+ }
+
+/*** ? /strandendin= n */
+ if(setstrandendin != -1) {
+ temp = 0;
+ if(strlen(param_arg[setstrandendin]) > 0)
+ if (sscanf(param_arg[setstrandendin],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /strandendin (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 0 && temp <= 3)
+ strand_end_minus = temp;
+ }
+
+/*** ? /strandendout= n */
+ if(setstrandendout != -1) {
+ temp = 0;
+ if(strlen(param_arg[setstrandendout]) > 0)
+ if (sscanf(param_arg[setstrandendout],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /strandendout (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 0 && temp <= 3)
+ strand_end_plus = temp;
+ }
+
+/* JP below */
+/*** ? /ave_grp_id= n */
+ if(setave_grp_id != -1) {
+ temp = 0;
+ if(strlen(param_arg[setave_grp_id]) > 0)
+ if (sscanf(param_arg[setave_grp_id],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /ave_grp_id (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 0)
+ ave_grp_id = temp;
+ }
+
+/*** ? /kk= n */
+ if(setKK != -1) {
+ temp = 0;
+ if(strlen(param_arg[setKK]) > 0)
+ if (sscanf(param_arg[setKK],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /ave_grp_id (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 1)
+ KK = temp;
+ }
+
+/*** ? /cosmetic_penalty= n */
+ if(setcosmetic != -1) {
+ temp = 0;
+ if(strlen(param_arg[setcosmetic]) > 0)
+ if (sscanf(param_arg[setcosmetic],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /cosmetic penalty (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 0)
+ cosmetic_penalty = temp;
+ }
+
+/*** ? /kk= n */
+ if(setoutputfirst != -1) {
+ temp = 0;
+ if(strlen(param_arg[setoutputfirst]) > 0)
+ if (sscanf(param_arg[setoutputfirst],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /outputfirst (must be integer)\n");
+ temp = 0;
+ }
+ if (temp >= 1)
+ outputfirst = temp;
+ }
+
+
+
+}
+
+#ifdef UNIX
+FILE *open_path(char *fname) /* to open in read-only file fname searching for
+ it through all path directories */
+{
+#define Mxdir 70
+ char dir[Mxdir+1], *path, *deb, *fin;
+ FILE *fich;
+ sint lf, ltot;
+ char *path1;
+
+ path=getenv("PATH"); /* get the list of path directories,
+ separated by :
+ */
+
+ /* added for File System Standards - Francois */
+ path1=(char *)ckalloc((strlen(path)+64)*sizeof(char));
+ strcpy(path1,path);
+ strcat(path1,"/usr/share/clustalx:/usr/local/share/clustalx");
+
+ lf=(sint)strlen(fname);
+ deb=path1;
+ do
+ {
+ fin=strchr(deb,':');
+ if(fin!=NULL)
+ { strncpy(dir,deb,fin-deb); ltot=fin-deb; }
+ else
+ { strcpy(dir,deb); ltot=(sint)strlen(dir); }
+ /* now one directory is in string dir */
+ if( ltot + lf + 1 <= Mxdir)
+ {
+ dir[ltot]='/';
+ strcpy(dir+ltot+1,fname); /* now dir is appended with fi
+ lename */
+ if( (fich = fopen(dir,"r") ) != NULL) break;
+ }
+ else fich = NULL;
+ deb=fin+1;
+ }
+ while (fin != NULL);
+ return fich;
+}
+#endif
+
+
+void get_help(char help_pointer) /* Help procedure */
+{
+ FILE *help_file;
+ sint i, number, nlines;
+ Boolean found_help;
+ char temp[MAXLINE+1];
+ char token = '\0';
+ char *digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ char *help_marker = "HELP";
+
+ extern char *help_file_name;
+
+#ifdef VMS
+ if((help_file=fopen(help_file_name,"r","rat=cr","rfm=var"))==NULL) {
+ error("Cannot open help file [%s]",help_file_name);
+ return;
+ }
+#else
+
+#ifdef UNIX
+ if((help_file=open_path(help_file_name))==NULL) {
+ if((help_file=fopen(help_file_name,"r"))==NULL) {
+ error("Cannot open help file [%s]",help_file_name);
+ return;
+ }
+ }
+
+#else
+ if((help_file=fopen(help_file_name,"r"))==NULL) {
+ error("Cannot open help file [%s]",help_file_name);
+ return;
+ }
+#endif
+
+#endif
+/* error("Cannot open help file [%s]",help_file_name);
+ return;
+ }
+*/
+ nlines = 0;
+ number = -1;
+ found_help = FALSE;
+
+ while(TRUE) {
+ if(fgets(temp,MAXLINE+1,help_file) == NULL) {
+ if(!found_help)
+ error("No help found in help file");
+ fclose(help_file);
+ return;
+ }
+ if(strstr(temp,help_marker)) {
+ token = ' ';
+ for(i=strlen(help_marker); i<8; i++)
+ if(strchr(digits, temp[i])) {
+ token = temp[i];
+ break;
+ }
+ }
+ if(token == help_pointer) {
+ found_help = TRUE;
+ while(fgets(temp,MAXLINE+1,help_file)) {
+ if(strstr(temp, help_marker)){
+ if(usemenu) {
+ fprintf(stdout,"\n");
+ getstr("Press [RETURN] to continue",lin2);
+ }
+ fclose(help_file);
+ return;
+ }
+ if(temp[0]!='<') {
+ fputs(temp,stdout);
+ ++nlines;
+ }
+ if(usemenu) {
+ if(nlines >= PAGE_LEN) {
+ fprintf(stdout,"\n");
+ getstr("Press [RETURN] to continue or X to stop",lin2);
+ if(toupper(*lin2) == 'X') {
+ fclose(help_file);
+ return;
+ }
+ else
+ nlines = 0;
+ }
+ }
+ }
+ if(usemenu) {
+ fprintf(stdout,"\n");
+ getstr("Press [RETURN] to continue",lin2);
+ }
+ fclose(help_file);
+ }
+ }
+}
+
+static void show_aln(void) /* Alignment screen display procedure */
+{
+ FILE *file;
+ sint nlines;
+ char temp[MAXLINE+1];
+ char file_name[FILENAMELEN+1];
+
+ if(output_clustal) strcpy(file_name,clustal_outname);
+ else if(output_nbrf) strcpy(file_name,nbrf_outname);
+ else if(output_gcg) strcpy(file_name,gcg_outname);
+ else if(output_phylip) strcpy(file_name,phylip_outname);
+ else if(output_gde) strcpy(file_name,gde_outname);
+ else if(output_nexus) strcpy(file_name,nexus_outname);
+
+#ifdef VMS
+ if((file=fopen(file_name,"r","rat=cr","rfm=var"))==NULL) {
+#else
+ if((file=fopen(file_name,"r"))==NULL) {
+#endif
+ error("Cannot open file [%s]",file_name);
+ return;
+ }
+
+ fprintf(stdout,"\n\n");
+ nlines = 0;
+
+ while(fgets(temp,MAXLINE+1,file)) {
+ fputs(temp,stdout);
+ ++nlines;
+ if(nlines >= PAGE_LEN) {
+ fprintf(stdout,"\n");
+ getstr("Press [RETURN] to continue or X to stop",lin2);
+ if(toupper(*lin2) == 'X') {
+ fclose(file);
+ return;
+ }
+ else
+ nlines = 0;
+ }
+ }
+ fclose(file);
+ fprintf(stdout,"\n");
+ getstr("Press [RETURN] to continue",lin2);
+}
+
+
+void parse_params(Boolean xmenus)
+{
+ sint i,j,len,temp;
+ static sint cl_error_code=0;
+ char path[FILENAMELEN];
+
+
+ Boolean do_align, do_convert, do_align_only, do_tree_only, do_tree, do_boot, do_profile, do_something;
+
+ /*JP */
+ /*
+ if (!xmenus)
+ {
+ fprintf(stdout,"\n\n\n");
+ fprintf(stdout,"PCMA - Profile Consistency Multiple sequence Alignment\n\n");
+ }*/
+
+ do_align = do_convert = do_align_only = do_tree_only = do_tree = do_boot = do_profile = do_something = FALSE;
+
+ *seqname=EOS;
+
+/* JULIE
+ len=(sint)strlen(paramstr);
+ Stop converting command line to lower case - unix, mac, pc are case sensitive
+ for(i=0;i<len;++i) paramstr[i]=tolower(paramstr[i]);
+*/
+
+ numparams = check_param(args, params, param_arg);
+ if (numparams <0) exit(1);
+
+ if(sethelp != -1) {
+ /*JP: disable help */
+ /*
+ get_help('9');
+ */
+ print_help();
+ exit(1);
+ }
+
+ if(setoptions != -1) {
+ //fprintf(stdout,"pcma options:\n");
+ print_help();
+ /*JP: disable options*/
+ /*for (i=0;cmd_line_verb[i].str[0] != '\0';i++) {
+ fprintf(stdout,"$ \t\t%c%s%s",COMMANDSEP,cmd_line_verb[i].str,cmd_line_type[cmd_line_verb[i].type]);
+ if (cmd_line_verb[i].type == OPTARG) {
+ if (cmd_line_verb[i].arg[0][0] != '\0')
+ fprintf(stdout,"=%s",cmd_line_verb[i].arg[0]);
+ for (j=1;cmd_line_verb[i].arg[j][0] != '\0';j++)
+ fprintf(stdout," OR %s",cmd_line_verb[i].arg[j]);
+ }
+ fprintf(stdout,"\n");
+ }
+ for (i=0;cmd_line_file[i].str[0] != '\0';i++) {
+ fprintf(stdout,"\t\t%c%s%s",COMMANDSEP,cmd_line_file[i].str,cmd_line_type[cmd_line_file[i].type]);
+ if (cmd_line_file[i].type == OPTARG) {
+ if (cmd_line_file[i].arg[0][0] != '\0')
+ fprintf(stdout,"=%s",cmd_line_file[i].arg[0]);
+ for (j=1;cmd_line_file[i].arg[j][0] != '\0';j++)
+ fprintf(stdout," OR %s",cmd_line_file[i].arg[j]);
+ }
+ fprintf(stdout,"\n");
+ }
+
+ for (i=0;cmd_line_para[i].str[0] != '\0';i++) {
+ if( (strcmp(cmd_line_para[i].str, "kk")==0) || (strcmp(cmd_line_para[i].str, "ave_grp_id")==0 )
+ || (strcmp(cmd_line_para[i].str, "cosmetic")==0) || (strcmp(cmd_line_para[i].str, "outfile")==0 ) ){
+ fprintf(stdout,"\t\t%c%s%s",COMMANDSEP,cmd_line_para[i].str,cmd_line_type[cmd_line_para[i].type]);
+ if (cmd_line_para[i].type == OPTARG) {
+ if (cmd_line_para[i].arg[0][0] != '\0')
+ fprintf(stdout,"=%s",cmd_line_para[i].arg[0]);
+ for (j=1;cmd_line_para[i].arg[j][0] != '\0';j++)
+ fprintf(stdout," OR %s",cmd_line_para[i].arg[j]);
+ }
+ fprintf(stdout,"\n");
+ }
+ } */
+ /*fprintf(stdout, "* newly introdunced options in PCMA: ave_grp_id, cosmetic, kk.\n");*/
+ exit(1);
+ }
+
+
+/*****************************************************************************/
+/* Check to see if sequence type is explicitely stated..override ************/
+/* the automatic checking (DNA or Protein). /type=d or /type=p *************/
+/*****************************************************************************/
+ if(settype != -1)
+ if(strlen(param_arg[settype])>0) {
+ temp = find_match(param_arg[settype],type_arg,2);
+ if(temp == 0) {
+ dnaflag = FALSE;
+ explicit_dnaflag = TRUE;
+ info("Sequence type explicitly set to Protein");
+ }
+ else if(temp == 1) {
+ info("Sequence type explicitly set to DNA");
+ dnaflag = TRUE;
+ explicit_dnaflag = TRUE;
+ }
+ else
+ fprintf(stdout,"\nUnknown sequence type %s\n",
+ param_arg[settype]);
+ }
+
+
+/***************************************************************************
+* check to see if 1st parameter does not start with '/' i.e. look for an *
+* input file as first parameter. The input file can also be specified *
+* by /infile=fname. *
+****************************************************************************/
+/* JULIE - moved to check_param()
+ if(paramstr[0] != '/') {
+ strcpy(seqname, params[0]);
+ }
+*/
+
+/**************************************************/
+/* Look for /infile=file.ext on the command line */
+/**************************************************/
+
+ if(setinfile != -1) {
+ if(strlen(param_arg[setinfile]) <= 0) {
+ error("Bad sequence file name");
+ exit(1);
+ }
+ strcpy(seqname, param_arg[setinfile]);
+ }
+
+ if(*seqname != EOS) {
+ profile_no = 0;
+ nseqs = readseqs((sint)1);
+ if(nseqs < 2) {
+ if(nseqs < 0) cl_error_code = 2;
+ else if(nseqs == 0) cl_error_code = 3;
+ else cl_error_code = 4;
+ fprintf(stdout,
+ "\nNo. of seqs. read = %d. No alignment!\n",(pint)nseqs);
+ exit(cl_error_code);
+ }
+ /* JP: disable info */
+ /* for(i = 1; i<=nseqs; i++)
+ info("Sequence %d: %-*s %6.d %s",
+ (pint)i,max_names,names[i],(pint)seqlen_array[i],dnaflag?"bp":"aa");
+ */
+ empty = FALSE;
+ do_something = TRUE;
+ }
+
+ set_optional_param();
+
+/*********************************************************/
+/* Look for /profile1=file.ext AND /profile2=file2.ext */
+/* You must give both file names OR neither. */
+/*********************************************************/
+
+ if(setprofile1 != -1) {
+ if(strlen(param_arg[setprofile1]) <= 0) {
+ error("Bad profile 1 file name");
+ exit(1);
+ }
+ strcpy(seqname, param_arg[setprofile1]);
+ profile_no = 1;
+ profile_input();
+ if(nseqs <= 0) {
+ if(nseqs<0) cl_error_code=2;
+ else if(nseqs==0) cl_error_code=3;
+ exit(cl_error_code);
+ }
+ strcpy(profile1_name,seqname);
+ }
+
+ if(setprofile2 != -1) {
+ if(strlen(param_arg[setprofile2]) <= 0) {
+ error("Bad profile 2 file name");
+ exit(1);
+ }
+ if(profile1_empty) {
+ error("Only 1 profile file (profile 2) specified.");
+ exit(1);
+ }
+ strcpy(seqname, param_arg[setprofile2]);
+ profile_no = 2;
+ profile_input();
+ if(nseqs > profile1_nseqs)
+ do_something = do_profile = TRUE;
+ else {
+ if(nseqs<0) cl_error_code=2;
+ else if(nseqs==0) cl_error_code=3;
+ error("No sequences read from profile 2");
+ exit(cl_error_code);
+ }
+ strcpy(profile2_name,seqname);
+ }
+
+/*************************************************************************/
+/* Look for /tree or /bootstrap or /align or /usetree ******************/
+/*************************************************************************/
+
+ if (setbatch != -1)
+ interactive=FALSE;
+
+ if (setinteractive != -1)
+ interactive=TRUE;
+
+ if (interactive) {
+ settree = -1;
+ setbootstrap = -1;
+ setalign = -1;
+ setusetree = -1;
+ setusetree1 = -1;
+ setusetree2 = -1;
+ setnewtree = -1;
+ setconvert = -1;
+ }
+
+ if(settree != -1 )
+ if(empty) {
+ error("Cannot draw tree. No input alignment file");
+ exit(1);
+ }
+ else
+ do_tree = TRUE;
+
+ if(setbootstrap != -1)
+ if(empty) {
+ error("Cannot bootstrap tree. No input alignment file");
+ exit(1);
+ }
+ else {
+ temp = 0;
+ if(param_arg[setbootstrap] != NULL)
+ if (sscanf(param_arg[setbootstrap],"%d",&temp)!=1) {
+ fprintf(stdout,"Bad option for /bootstrap (must be integer)\n");
+ temp = 0;
+ };
+ if(temp > 0) boot_ntrials = temp;
+ do_boot = TRUE;
+ }
+
+ if(setalign != -1)
+ if(empty) {
+ error("Cannot align sequences. No input file");
+ exit(1);
+ }
+ else
+ do_align = TRUE;
+
+ if(setconvert != -1)
+ if(empty) {
+ error("Cannot convert sequences. No input file");
+ exit(1);
+ }
+ else
+ do_convert = TRUE;
+
+ if(setusetree != -1)
+ if(empty) {
+ error("Cannot align sequences. No input file");
+ exit(1);
+ }
+ else {
+ if(strlen(param_arg[setusetree]) == 0) {
+ error("Cannot align sequences. No tree file specified");
+ exit(1);
+ }
+ else {
+ strcpy(phylip_tree_name, param_arg[setusetree]);
+ }
+ use_tree_file = TRUE;
+ do_align_only = TRUE;
+ }
+
+ if(setnewtree != -1)
+ if(empty) {
+ error("Cannot align sequences. No input file");
+ exit(1);
+ }
+ else {
+ if(strlen(param_arg[setnewtree]) == 0) {
+ error("Cannot align sequences. No tree file specified");
+ exit(1);
+ }
+ else {
+ strcpy(phylip_tree_name, param_arg[setnewtree]);
+ }
+ new_tree_file = TRUE;
+ do_tree_only = TRUE;
+ }
+
+ if(setusetree1 != -1)
+ if(profile1_empty) {
+ error("Cannot align profiles. No input file");
+ exit(1);
+ }
+ else if(profile_type == SEQUENCE) {
+ error("Invalid option /usetree1.");
+ exit(1);
+ }
+ else {
+ if(strlen(param_arg[setusetree1]) == 0) {
+ error("Cannot align profiles. No tree file specified");
+ exit(1);
+ }
+ else {
+ strcpy(p1_tree_name, param_arg[setusetree1]);
+ }
+ use_tree1_file = TRUE;
+ do_align_only = TRUE;
+ }
+
+ if(setnewtree1 != -1)
+ if(profile1_empty) {
+ error("Cannot align profiles. No input file");
+ exit(1);
+ }
+ else if(profile_type == SEQUENCE) {
+ error("Invalid option /newtree1.");
+ exit(1);
+ }
+ else {
+ if(strlen(param_arg[setnewtree1]) == 0) {
+ error("Cannot align profiles. No tree file specified");
+ exit(1);
+ }
+ else {
+ strcpy(p1_tree_name, param_arg[setnewtree1]);
+ }
+ new_tree1_file = TRUE;
+ }
+
+ if(setusetree2 != -1)
+ if(profile2_empty) {
+ error("Cannot align profiles. No input file");
+ exit(1);
+ }
+ else if(profile_type == SEQUENCE) {
+ error("Invalid option /usetree2.");
+ exit(1);
+ }
+ else {
+ if(strlen(param_arg[setusetree2]) == 0) {
+ error("Cannot align profiles. No tree file specified");
+ exit(1);
+ }
+ else {
+ strcpy(p2_tree_name, param_arg[setusetree2]);
+ }
+ use_tree2_file = TRUE;
+ do_align_only = TRUE;
+ }
+
+ if(setnewtree2 != -1)
+ if(profile2_empty) {
+ error("Cannot align profiles. No input file");
+ exit(1);
+ }
+ else if(profile_type == SEQUENCE) {
+ error("Invalid option /newtree2.");
+ exit(1);
+ }
+ else {
+ if(strlen(param_arg[setnewtree2]) == 0) {
+ error("Cannot align profiles. No tree file specified");
+ exit(1);
+ }
+ else {
+ strcpy(p2_tree_name, param_arg[setnewtree2]);
+ }
+ new_tree2_file = TRUE;
+ }
+
+
+ if( (!do_tree) && (!do_boot) && (!empty) && (!do_profile) && (!do_align_only) && (!do_tree_only) && (!do_convert))
+ do_align = TRUE;
+
+/*** ? /quicktree */
+ if(setquicktree != -1)
+ quick_pairalign = TRUE;
+
+ if(dnaflag) {
+ gap_open = dna_gap_open;
+ gap_extend = dna_gap_extend;
+ pw_go_penalty = dna_pw_go_penalty;
+ pw_ge_penalty = dna_pw_ge_penalty;
+ ktup = dna_ktup;
+ window = dna_window;
+ signif = dna_signif;
+ wind_gap = dna_wind_gap;
+
+ }
+ else {
+ gap_open = prot_gap_open;
+ gap_extend = prot_gap_extend;
+ pw_go_penalty = prot_pw_go_penalty;
+ pw_ge_penalty = prot_pw_ge_penalty;
+ ktup = prot_ktup;
+ window = prot_window;
+ signif = prot_signif;
+ wind_gap = prot_wind_gap;
+ }
+
+ if(interactive) {
+ if (!xmenus) usemenu = TRUE;
+ return;
+ }
+
+
+ if(!do_something) {
+ error("No input file(s) specified");
+ exit(1);
+ }
+
+
+
+
+/****************************************************************************/
+/* Now do whatever has been requested ***************************************/
+/****************************************************************************/
+
+
+ if(do_profile) {
+ if (profile_type == PROFILE) profile_align(p1_tree_name,p2_tree_name);
+ else new_sequence_align(phylip_tree_name);
+ }
+
+ else if(do_align)
+ align(phylip_tree_name);
+
+ else if(do_convert) {
+ get_path(seqname,path);
+ if(!open_alignment_output(path)) exit(1);
+ create_alignment_output(1,nseqs);
+ }
+
+ else if (do_align_only)
+ get_tree(phylip_tree_name);
+
+ else if(do_tree_only)
+ make_tree(phylip_tree_name);
+
+ else if(do_tree)
+ phylogenetic_tree(phylip_tree_name,clustal_tree_name,dist_tree_name,nexus_tree_name);
+
+ else if(do_boot)
+ bootstrap_tree(phylip_tree_name,clustal_tree_name,nexus_tree_name);
+
+ fprintf(stdout,"\n");
+ exit(0);
+
+/*******whew!***now*go*home****/
+}
+
+
+Boolean user_mat(char *str, short *mat, short *xref)
+{
+ sint maxres;
+
+ FILE *infile;
+
+ if(usemenu)
+ getstr("Enter name of the matrix file",lin2);
+ else
+ strcpy(lin2,str);
+
+ if(*lin2 == EOS) return FALSE;
+
+ if((infile=fopen(lin2,"r"))==NULL) {
+ error("Cannot find matrix file [%s]",lin2);
+ return FALSE;
+ }
+
+ strcpy(str, lin2);
+
+ maxres = read_user_matrix(str, mat, xref);
+ if (maxres <= 0) return FALSE;
+
+ return TRUE;
+}
+
+Boolean user_mat_series(char *str, short *mat, short *xref)
+{
+ sint maxres;
+
+ FILE *infile;
+
+ if(usemenu)
+ getstr("Enter name of the matrix file",lin2);
+ else
+ strcpy(lin2,str);
+
+ if(*lin2 == EOS) return FALSE;
+
+ if((infile=fopen(lin2,"r"))==NULL) {
+ error("Cannot find matrix file [%s]",lin2);
+ return FALSE;
+ }
+
+ strcpy(str, lin2);
+
+ maxres = read_matrix_series(str, mat, xref);
+ if (maxres <= 0) return FALSE;
+
+ return TRUE;
+}
+
+
+
+
+
+
+sint seq_input(Boolean append)
+{
+ sint i;
+ sint local_nseqs;
+
+ if(usemenu) {
+fprintf(stdout,"\n\nSequences should all be in 1 file.\n");
+fprintf(stdout,"\n7 formats accepted: \n");
+fprintf(stdout,
+"NBRF/PIR, EMBL/SwissProt, Pearson (Fasta), GDE, Clustal, GCG/MSF, RSF.\n\n\n");
+/*fprintf(stdout,
+"\nGCG users should use TOPIR to convert their sequence files before use.\n\n\n");*/
+ }
+
+ if (append)
+ local_nseqs = readseqs(nseqs+(sint)1);
+ else
+ local_nseqs = readseqs((sint)1); /* 1 is the first seq to be read */
+ if(local_nseqs < 0) /* file could not be opened */
+ {
+ return local_nseqs;
+ }
+ else if(local_nseqs == 0) /* no sequences */
+ {
+ error("No sequences in file! Bad format?");
+ return local_nseqs;
+ }
+ else
+ {
+ struct_penalties1 = struct_penalties2 = NONE;
+ if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1);
+ if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2);
+ if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1);
+ if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2);
+ if (ss_name1 != NULL) ss_name1=ckfree(ss_name1);
+ if (ss_name2 != NULL) ss_name2=ckfree(ss_name2);
+
+ if(append) nseqs+=local_nseqs;
+ else nseqs=local_nseqs;
+ info("Sequences assumed to be %s",
+ dnaflag?"DNA":"PROTEIN");
+ if (usemenu) {
+ fprintf(stdout,"\n\n");
+ for(i=1; i<=nseqs; i++) {
+/* DES fprintf(stdout,"%s: = ",names[i]); */
+ /* JP: disable info */
+ /* info("Sequence %d: %-*s %6.d %s",
+ (pint)i,max_names,names[i],(pint)seqlen_array[i],dnaflag?"bp":"aa");
+ */
+ }
+ }
+ if(dnaflag) {
+ gap_open = dna_gap_open;
+ gap_extend = dna_gap_extend;
+ }
+ else {
+ gap_open = prot_gap_open;
+ gap_extend = prot_gap_extend;
+ }
+ empty=FALSE;
+ }
+ return local_nseqs;
+}
+
+
+
+
+
+
+
+sint profile_input(void) /* read a profile */
+{ /* profile_no is 1 or 2 */
+ sint local_nseqs, i;
+
+ if(profile_no == 2 && profile1_empty)
+ {
+ error("You must read in profile number 1 first");
+ return 0;
+ }
+
+ if(profile_no == 1) /* for the 1st profile */
+ {
+ local_nseqs = readseqs((sint)1); /* (1) means 1st seq to be read = no. 1 */
+ if(local_nseqs < 0) /* file could not be opened */
+ {
+ return local_nseqs;
+ }
+ else if(local_nseqs == 0) /* no sequences */
+ {
+ error("No sequences in file! Bad format?");
+ return local_nseqs;
+ }
+ else if (local_nseqs > 0)
+ { /* success; found some seqs. */
+ struct_penalties1 = NONE;
+ if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1);
+ if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1);
+ if (ss_name1 != NULL) ss_name1=ckfree(ss_name1);
+ if (struct_penalties != NONE) /* feature table / mask in alignment */
+ {
+ struct_penalties1 = struct_penalties;
+ if (struct_penalties == SECST) {
+ sec_struct_mask1 = (char *)ckalloc((max_aln_length) * sizeof (char));
+ for (i=0;i<max_aln_length;i++)
+ sec_struct_mask1[i] = sec_struct_mask[i];
+ }
+ gap_penalty_mask1 = (char *)ckalloc((max_aln_length) * sizeof (char));
+ for (i=0;i<max_aln_length;i++)
+ gap_penalty_mask1[i] = gap_penalty_mask[i];
+ ss_name1 = (char *)ckalloc( (MAXNAMES+1) * sizeof (char));
+
+ strcpy(ss_name1,ss_name);
+if (debug>0) {
+for (i=0;i<seqlen_array[1];i++)
+ fprintf(stdout,"%c",gap_penalty_mask1[i]);
+fprintf(stdout,"\n");
+}
+ }
+ nseqs = profile1_nseqs = local_nseqs;
+ info("No. of seqs=%d",(pint)nseqs);
+ profile1_empty=FALSE;
+ profile2_empty=TRUE;
+ }
+ }
+ else
+ { /* first seq to be read = profile1_nseqs + 1 */
+ local_nseqs = readseqs(profile1_nseqs+(sint)1);
+ if(local_nseqs < 0) /* file could not be opened */
+ {
+ return local_nseqs;
+ }
+ else if(local_nseqs == 0) /* no sequences */
+ {
+ error("No sequences in file! Bad format?");
+ return local_nseqs;
+ }
+ else if(local_nseqs > 0)
+ {
+ struct_penalties2 = NONE;
+ if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2);
+ if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2);
+ if (ss_name2 != NULL) ss_name2=ckfree(ss_name2);
+ if (struct_penalties != NONE) /* feature table / mask in alignment */
+ {
+ struct_penalties2 = struct_penalties;
+ if (struct_penalties == SECST) {
+ sec_struct_mask2 = (char *)ckalloc((max_aln_length) * sizeof (char));
+ for (i=0;i<max_aln_length;i++)
+ sec_struct_mask2[i] = sec_struct_mask[i];
+ }
+ gap_penalty_mask2 = (char *)ckalloc((max_aln_length) * sizeof (char));
+ for (i=0;i<max_aln_length;i++)
+ gap_penalty_mask2[i] = gap_penalty_mask[i];
+ ss_name2 = (char *)ckalloc( (MAXNAMES+1) * sizeof (char));
+ strcpy(ss_name2,ss_name);
+if (debug>0) {
+for (i=0;i<seqlen_array[profile1_nseqs+1];i++)
+ fprintf(stdout,"%c",gap_penalty_mask2[i]);
+fprintf(stdout,"\n");
+}
+ }
+ info("No. of seqs in profile=%d",(pint)local_nseqs);
+ nseqs = profile1_nseqs + local_nseqs;
+ info("Total no. of seqs =%d",(pint)nseqs);
+ profile2_empty=FALSE;
+ empty = FALSE;
+ }
+
+ }
+ if (sec_struct_mask != NULL) sec_struct_mask=ckfree(sec_struct_mask);
+ if (gap_penalty_mask != NULL) gap_penalty_mask=ckfree(gap_penalty_mask);
+ if (ss_name != NULL) ss_name=ckfree(ss_name);
+
+ if(local_nseqs<=0) return local_nseqs;
+
+ info("Sequences assumed to be %s",
+ dnaflag?"DNA":"PROTEIN");
+ if (usemenu) fprintf(stdout,"\n\n");
+ for(i=profile2_empty?1:profile1_nseqs+1; i<=nseqs; i++) {
+ info("Sequence %d: %-*s %6.d %s",
+ (pint)i,max_names,names[i],(pint)seqlen_array[i],dnaflag?"bp":"aa");
+ }
+ if(dnaflag) {
+ gap_open = dna_gap_open;
+ gap_extend = dna_gap_extend;
+ }
+ else {
+ gap_open = prot_gap_open;
+ gap_extend = prot_gap_extend;
+ }
+
+ return nseqs;
+}
+
+
+
+static void calc_gap_penalty_mask(int prf_length, char *mask, char *gap_mask)
+{
+ int i,j;
+ char *struct_mask;
+
+ struct_mask = (char *)ckalloc((prf_length+1) * sizeof(char));
+/*
+ calculate the gap penalty mask from the secondary structures
+*/
+ i=0;
+ while (i<prf_length) {
+ if (tolower(mask[i]) == 'a' || mask[i] == '$') {
+ for (j = -helix_end_plus; j<0; j++) {
+ if ((i+j>=0) && (tolower(struct_mask[i+j]) != 'a')
+ && (tolower(struct_mask[i+j]) != 'b'))
+ struct_mask[i+j] = 'a';
+ }
+ for (j = 0; j<helix_end_minus; j++) {
+ if (i+j>=prf_length || (tolower(mask[i+j]) != 'a'
+ && mask[i+j] != '$')) break;
+ struct_mask[i+j] = 'a';
+ }
+ i += j;
+ while (tolower(mask[i]) == 'a'
+ || mask[i] == '$') {
+ if (i>=prf_length) break;
+ if (mask[i] == '$') {
+ struct_mask[i] = 'A';
+ i++;
+ break;
+ }
+ else struct_mask[i] = mask[i];
+ i++;
+ }
+ for (j = 0; j<helix_end_minus; j++) {
+ if ((i-j-1>=0) && (tolower(mask[i-j-1]) == 'a'
+ || mask[i-j-1] == '$'))
+ struct_mask[i-j-1] = 'a';
+ }
+ for (j = 0; j<helix_end_plus; j++) {
+ if (i+j>=prf_length) break;
+ struct_mask[i+j] = 'a';
+ }
+ }
+ else if (tolower(mask[i]) == 'b' || mask[i] == '%') {
+ for (j = -strand_end_plus; j<0; j++) {
+ if ((i+j>=0) && (tolower(struct_mask[i+j]) != 'a')
+ && (tolower(struct_mask[i+j]) != 'b'))
+ struct_mask[i+j] = 'b';
+ }
+ for (j = 0; j<strand_end_minus; j++) {
+ if (i+j>=prf_length || (tolower(mask[i+j]) != 'b'
+ && mask[i+j] != '%')) break;
+ struct_mask[i+j] = 'b';
+ }
+ i += j;
+ while (tolower(mask[i]) == 'b'
+ || mask[i] == '%') {
+ if (i>=prf_length) break;
+ if (mask[i] == '%') {
+ struct_mask[i] = 'B';
+ i++;
+ break;
+ }
+ else struct_mask[i] = mask[i];
+ i++;
+ }
+ for (j = 0; j<strand_end_minus; j++) {
+ if ((i-j-1>=0) && (tolower(mask[i-j-1]) == 'b'
+ || mask[i-j-1] == '%'))
+ struct_mask[i-j-1] = 'b';
+ }
+ for (j = 0; j<strand_end_plus; j++) {
+ if (i+j>=prf_length) break;
+ struct_mask[i+j] = 'b';
+ }
+ }
+ else i++;
+ }
+
+ for(i=0;i<prf_length;i++) {
+ switch (struct_mask[i]) {
+ case 'A':
+ gap_mask[i] = helix_penalty+'0';
+ break;
+ case 'a':
+ gap_mask[i] = helix_end_penalty+'0';
+ break;
+ case 'B':
+ gap_mask[i] = strand_penalty+'0';
+ break;
+ case 'b':
+ gap_mask[i] = strand_end_penalty+'0';
+ break;
+ default:
+ gap_mask[i] = loop_penalty+'0';
+ break;
+ }
+ }
+
+ struct_mask=ckfree(struct_mask);
+
+}
+
+void print_sec_struct_mask(int prf_length, char *mask, char *struct_mask)
+{
+ int i,j;
+
+/*
+ calculate the gap penalty mask from the secondary structures
+*/
+ i=0;
+ while (i<prf_length) {
+ if (tolower(mask[i]) == 'a' || mask[i] == '$') {
+ for (j = 0; j<helix_end_minus; j++) {
+ if (i+j>=prf_length || (tolower(mask[i+j]) != 'a'
+ && mask[i+j] != '$')) break;
+ struct_mask[i+j] = 'a';
+ }
+ i += j;
+ while (tolower(mask[i]) == 'a'
+ || mask[i] == '$') {
+ if (i>=prf_length) break;
+ if (mask[i] == '$') {
+ struct_mask[i] = 'A';
+ i++;
+ break;
+ }
+ else struct_mask[i] = mask[i];
+ i++;
+ }
+ for (j = 0; j<helix_end_minus; j++) {
+ if ((i-j-1>=0) && (tolower(mask[i-j-1]) == 'a'
+ || mask[i-j-1] == '$'))
+ struct_mask[i-j-1] = 'a';
+ }
+ }
+ else if (tolower(mask[i]) == 'b' || mask[i] == '%') {
+ for (j = 0; j<strand_end_minus; j++) {
+ if (i+j>=prf_length || (tolower(mask[i+j]) != 'b'
+ && mask[i+j] != '%')) break;
+ struct_mask[i+j] = 'b';
+ }
+ i += j;
+ while (tolower(mask[i]) == 'b'
+ || mask[i] == '%') {
+ if (i>=prf_length) break;
+ if (mask[i] == '%') {
+ struct_mask[i] = 'B';
+ i++;
+ break;
+ }
+ else struct_mask[i] = mask[i];
+ i++;
+ }
+ for (j = 0; j<strand_end_minus; j++) {
+ if ((i-j-1>=0) && (tolower(mask[i-j-1]) == 'b'
+ || mask[i-j-1] == '%'))
+ struct_mask[i-j-1] = 'b';
+ }
+ }
+ else i++;
+ }
+}
+
+
+
+FILE * open_output_file(char *prompt, char *path,
+ char *file_name, char *file_extension)
+
+{ static char temp[FILENAMELEN+1];
+ static char local_prompt[MAXLINE];
+ FILE * file_handle;
+
+/* if (*file_name == EOS) {
+*/ strcpy(file_name,path);
+ strcat(file_name,file_extension);
+/* }
+*/
+ if(strcmp(file_name,seqname)==0) {
+ warning("Output file name is the same as input file.");
+ if (usemenu) {
+ strcpy(local_prompt,"\n\nEnter new name to avoid overwriting ");
+ strcat(local_prompt," [%s]: ");
+ fprintf(stdout,local_prompt,file_name);
+ gets(temp);
+ if(*temp != EOS) strcpy(file_name,temp);
+ }
+ }
+ else if (usemenu) {
+ strcpy(local_prompt,prompt);
+ strcat(local_prompt," [%s]: ");
+ fprintf(stdout,local_prompt,file_name);
+ gets(temp);
+ if(*temp != EOS) strcpy(file_name,temp);
+ }
+
+#ifdef VMS
+ if((file_handle=fopen(file_name,"w","rat=cr","rfm=var"))==NULL) {
+#else
+ if((file_handle=fopen(file_name,"w"))==NULL) {
+#endif
+ error("Cannot open output file [%s]",file_name);
+ return NULL;
+ }
+ return file_handle;
+}
+
+
+
+FILE * open_explicit_file(char *file_name)
+{
+ FILE * file_handle;
+
+ if (*file_name == EOS) {
+ error("Bad output file [%s]",file_name);
+ return NULL;
+ }
+#ifdef VMS
+ if((file_handle=fopen(file_name,"w","rat=cr","rfm=var"))==NULL) {
+#else
+ if((file_handle=fopen(file_name,"w"))==NULL) {
+#endif
+ error("Cannot open output file [%s]",file_name);
+ return NULL;
+ }
+ return file_handle;
+}
+
+
+
+
+
+
+
+/* JP: for_align_list */
+extern int nseqs_all;
+extern sint seqFormat;
+
+void align(char *phylip_name)
+{
+ char path[FILENAMELEN+1];
+ FILE *tree;
+ sint count;
+ int i;
+
+ if(empty && usemenu) {
+ error("No sequences in memory. Load sequences first.");
+ return;
+ }
+
+ struct_penalties1 = struct_penalties2 = NONE;
+ if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1);
+ if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2);
+ if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1);
+ if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2);
+ if (ss_name1 != NULL) ss_name1=ckfree(ss_name1);
+ if (ss_name2 != NULL) ss_name2=ckfree(ss_name2);
+
+
+ get_path(seqname,path);
+/* DES DEBUG
+ fprintf(stdout,"\n\n Seqname = %s \n Path = %s \n\n",seqname,path);
+*/
+ if(usemenu || !interactive) {
+ if(!open_alignment_output(path)) return;
+ }
+
+ if (nseqs >= 2) {
+
+ get_path(seqname,path);
+ if (phylip_name[0]!=EOS) {
+ if((tree = open_explicit_file(
+ phylip_name))==NULL) return;
+ }
+ else {
+ if((tree = open_output_file(
+ "\nEnter name for new GUIDE TREE file ",path,
+ phylip_name,"dnd")) == NULL) return;
+ }
+ }
+
+ if (save_parameters) create_parameter_output();
+
+ if(reset_alignments_new || reset_alignments_all) reset_align();
+
+ info("Start making pairwise alignments\n");
+ /*JP disable info */
+ /* info("Aligning..."); */
+ if(dnaflag) {
+ gap_open = dna_gap_open;
+ gap_extend = dna_gap_extend;
+ pw_go_penalty = dna_pw_go_penalty;
+ pw_ge_penalty = dna_pw_ge_penalty;
+ ktup = dna_ktup;
+ window = dna_window;
+ signif = dna_signif;
+ wind_gap = dna_wind_gap;
+
+ }
+ else {
+ gap_open = prot_gap_open;
+ gap_extend = prot_gap_extend;
+ pw_go_penalty = prot_pw_go_penalty;
+ pw_ge_penalty = prot_pw_ge_penalty;
+ ktup = prot_ktup;
+ window = prot_window;
+ signif = prot_signif;
+ wind_gap = prot_wind_gap;
+
+ }
+
+ if (quick_pairalign)
+ show_pair((sint)0,nseqs,(sint)0,nseqs);
+ else
+ pairalign((sint)0,nseqs,(sint)0,nseqs);
+
+ if (nseqs >= 2) {
+
+ guide_tree(tree,1,nseqs);
+ if(seqFormat!=CLUSTALIST)info("Guide tree file created: [%s]\n",
+ phylip_name);
+ }
+
+
+ count = malign((sint)0,phylip_name);
+
+
+ if (count <= 0) return;
+
+ if (usemenu) fprintf(stdout,"\n\n\n");
+
+ if(seqFormat==CLUSTALIST) nseqs = nseqs_all;
+ if(debug>1) fprintf(stdout, "nseqs: %d\n", nseqs);
+ create_alignment_output(1, nseqs);
+ /* create_alignment_output(1,nseqs); */
+ if (showaln && usemenu) show_aln();
+
+ phylip_name[0]=EOS;
+}
+
+
+
+
+
+void new_sequence_align(char *phylip_name)
+{
+ char path[FILENAMELEN+1];
+ char tree_name[FILENAMELEN+1],temp[MAXLINE+1];
+ Boolean use_tree;
+ FILE *tree;
+ sint i,j,count;
+ float dscore;
+ Boolean save_ss2;
+
+ if(profile1_empty && usemenu) {
+ error("No profile in memory. Input 1st profile first.");
+ return;
+ }
+
+ if(profile2_empty && usemenu) {
+ error("No sequences in memory. Input sequences first.");
+ return;
+ }
+
+ get_path(profile2_name,path);
+
+ if(usemenu || !interactive) {
+ if(!open_alignment_output(path)) return;
+ }
+
+ new_seq = profile1_nseqs+1;
+
+/* check for secondary structure information for list of sequences */
+
+ save_ss2 = use_ss2;
+ if (struct_penalties2 != NONE && use_ss2 == TRUE && (nseqs - profile1_nseqs >
+1)) {
+ if (struct_penalties2 == SECST)
+ warning("Warning: ignoring secondary structure for a list of sequences");
+ else if (struct_penalties2 == GMASK)
+ warning("Warning: ignoring gap penalty mask for a list of sequences");
+ use_ss2 = FALSE;
+ }
+
+ for (i=1;i<=new_seq;i++) {
+ for (j=i+1;j<=new_seq;j++) {
+ dscore = countid(i,j);
+ tmat[i][j] = ((double)100.0 - (double)dscore)/(double)100.0;
+ tmat[j][i] = tmat[i][j];
+ }
+ }
+
+ tree_name[0] = EOS;
+ use_tree = FALSE;
+ if (nseqs >= 2) {
+ if (check_tree && usemenu) {
+ strcpy(tree_name,path);
+ strcat(tree_name,"dnd");
+#ifdef VMS
+ if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL) {
+#else
+ if((tree=fopen(tree_name,"r"))!=NULL) {
+#endif
+ if (usemenu)
+ fprintf(stdout,"\nUse the existing GUIDE TREE file, %s (y/n) ? [y]: ",
+ tree_name);
+ gets(temp);
+ if(*temp != 'n' && *temp != 'N') {
+ strcpy(phylip_name,tree_name);
+ use_tree = TRUE;
+ }
+ fclose(tree);
+ }
+ }
+ else if (!usemenu && use_tree_file) {
+ use_tree = TRUE;
+ }
+ }
+
+ if (save_parameters) create_parameter_output();
+
+ if(reset_alignments_new || reset_alignments_all) {
+/*
+ reset_prf1();
+*/
+ reset_prf2();
+ }
+ else fix_gaps();
+
+ if (struct_penalties1 == SECST)
+
+ calc_gap_penalty_mask(seqlen_array[1],sec_struct_mask1,gap_penalty_mask1);
+
+ if (struct_penalties2 == SECST)
+
+calc_gap_penalty_mask(seqlen_array[profile1_nseqs+1],sec_struct_mask2,gap_penalty_mask2);
+
+
+/* create the new tree file, if necessary */
+
+ if (use_tree == FALSE) {
+
+ if (nseqs >= 2) {
+ get_path(profile2_name,path);
+ if (phylip_name[0]!=EOS) {
+ if((tree = open_explicit_file(
+ phylip_name))==NULL) return;
+ }
+ else {
+ if((tree = open_output_file(
+ "\nEnter name for new GUIDE TREE file ",path,
+ phylip_name,"dnd")) == NULL) return;
+ }
+ }
+ info("Start making pairwise alignments\n");
+ /* JP: disable info */
+ /*
+ info("Aligning...");
+ */
+ if(dnaflag) {
+ gap_open = dna_gap_open;
+ gap_extend = dna_gap_extend;
+ pw_go_penalty = dna_pw_go_penalty;
+ pw_ge_penalty = dna_pw_ge_penalty;
+ ktup = dna_ktup;
+ window = dna_window;
+ signif = dna_signif;
+ wind_gap = dna_wind_gap;
+
+ }
+ else {
+ gap_open = prot_gap_open;
+ gap_extend = prot_gap_extend;
+ pw_go_penalty = prot_pw_go_penalty;
+ pw_ge_penalty = prot_pw_ge_penalty;
+ ktup = prot_ktup;
+ window = prot_window;
+ signif = prot_signif;
+ wind_gap = prot_wind_gap;
+
+ }
+
+ if (quick_pairalign)
+ show_pair((sint)0,nseqs,new_seq-2,nseqs);
+ else
+ pairalign((sint)0,nseqs,new_seq-2,nseqs);
+
+ if (nseqs >= 2) {
+ guide_tree(tree,1,nseqs);
+ if(seqFormat!=CLUSTALIST)info("Guide tree file created: [%s]",
+ phylip_name);
+ }
+ }
+
+ if (new_tree_file) return;
+
+ count = seqalign(new_seq-2,phylip_name);
+
+ use_ss2 = save_ss2;
+
+ if (count <= 0) return;
+
+ if (usemenu) fprintf(stdout,"\n\n\n");
+
+ create_alignment_output(1,nseqs);
+ if (showaln && usemenu) show_aln();
+
+ phylip_name[0]=EOS;
+
+}
+
+
+
+
+
+void make_tree(char *phylip_name)
+{
+ char path[FILENAMELEN+1];
+ FILE *tree;
+
+ if(empty) {
+ error("No sequences in memory. Load sequences first.");
+ return;
+ }
+
+ struct_penalties1 = struct_penalties2 = NONE;
+ if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1);
+ if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2);
+ if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1);
+ if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2);
+ if (ss_name1 != NULL) ss_name1=ckfree(ss_name1);
+ if (ss_name2 != NULL) ss_name2=ckfree(ss_name2);
+
+ if(reset_alignments_new || reset_alignments_all) reset_align();
+
+ get_path(seqname,path);
+
+ if (nseqs < 2) {
+ error("Less than 2 sequences in memory. Phylogenetic tree cannot be built.");
+ return;
+ }
+
+ if (save_parameters) create_parameter_output();
+
+ info("Start making pairwise alignments\n");
+ /*JP: disable info*/
+ /*
+ info("Aligning...");
+ */
+ if(dnaflag) {
+ gap_open = dna_gap_open;
+ gap_extend = dna_gap_extend;
+ pw_go_penalty = dna_pw_go_penalty;
+ pw_ge_penalty = dna_pw_ge_penalty;
+ ktup = dna_ktup;
+ window = dna_window;
+ signif = dna_signif;
+ wind_gap = dna_wind_gap;
+
+ }
+ else {
+ gap_open = prot_gap_open;
+ gap_extend = prot_gap_extend;
+ pw_go_penalty = prot_pw_go_penalty;
+ pw_ge_penalty = prot_pw_ge_penalty;
+ ktup = prot_ktup;
+ window = prot_window;
+ signif = prot_signif;
+ wind_gap = prot_wind_gap;
+
+
+ }
+
+ if (quick_pairalign)
+ show_pair((sint)0,nseqs,(sint)0,nseqs);
+ else
+ pairalign((sint)0,nseqs,(sint)0,nseqs);
+
+ if (nseqs >= 2) {
+ get_path(seqname,path);
+ if (phylip_name[0]!=EOS) {
+ if((tree = open_explicit_file(
+ phylip_name))==NULL) return;
+ }
+ else {
+ if((tree = open_output_file(
+ "\nEnter name for new GUIDE TREE file ",path,
+ phylip_name,"dnd")) == NULL) return;
+ }
+
+ guide_tree(tree,1,nseqs);
+ if(seqFormat!=CLUSTALIST)info("Guide tree file created: [%s]",
+ phylip_name);
+ }
+
+ if(reset_alignments_new || reset_alignments_all) reset_align();
+
+ phylip_name[0]=EOS;
+}
+
+
+
+
+
+
+
+
+
+void get_tree(char *phylip_name)
+{
+ char path[FILENAMELEN+1],temp[MAXLINE+1];
+ sint count;
+
+ if(empty) {
+ error("No sequences in memory. Load sequences first.");
+ return;
+ }
+ struct_penalties1 = struct_penalties2 = NONE;
+ if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1);
+ if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2);
+ if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1);
+ if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2);
+ if (ss_name1 != NULL) ss_name1=ckfree(ss_name1);
+ if (ss_name2 != NULL) ss_name2=ckfree(ss_name2);
+
+
+ get_path(seqname,path);
+
+ if(usemenu || !interactive) {
+ if(!open_alignment_output(path)) return;
+ }
+
+ if(reset_alignments_new || reset_alignments_all) reset_align();
+
+ get_path(seqname,path);
+
+ if (nseqs >= 2) {
+
+ if(usemenu) {
+ strcpy(phylip_name,path);
+ strcat(phylip_name,"dnd");
+
+ fprintf(stdout,"\nEnter a name for the guide tree file [%s]: ",
+ phylip_name);
+ gets(temp);
+ if(*temp != EOS)
+ strcpy(phylip_name,temp);
+ }
+
+ if(usemenu || !interactive) {
+#ifdef VMS
+ if((tree=fopen(phylip_name,"r","rat=cr","rfm=var"))==NULL) {
+#else
+ if((tree=fopen(phylip_name,"r"))==NULL) {
+#endif
+ error("Cannot open tree file [%s]",phylip_name);
+ return;
+ }
+ }
+ }
+ else {
+ info("Start making pairwise alignments\n");
+ /*JP: disable info */
+ /*
+ info("Aligning...");
+ */
+ if(dnaflag) {
+ gap_open = dna_gap_open;
+ gap_extend = dna_gap_extend;
+ pw_go_penalty = dna_pw_go_penalty;
+ pw_ge_penalty = dna_pw_ge_penalty;
+ ktup = dna_ktup;
+ window = dna_window;
+ signif = dna_signif;
+ wind_gap = dna_wind_gap;
+
+ }
+ else {
+ gap_open = prot_gap_open;
+ gap_extend = prot_gap_extend;
+ pw_go_penalty = prot_pw_go_penalty;
+ pw_ge_penalty = prot_pw_ge_penalty;
+ ktup = prot_ktup;
+ window = prot_window;
+ signif = prot_signif;
+ wind_gap = prot_wind_gap;
+
+ }
+
+ if (quick_pairalign)
+ show_pair((sint)0,nseqs,(sint)0,nseqs);
+ else
+ pairalign((sint)0,nseqs,(sint)0,nseqs);
+ }
+
+ if (save_parameters) create_parameter_output();
+
+ count = malign(0,phylip_name);
+ if (count <= 0) return;
+
+ if (usemenu) fprintf(stdout,"\n\n\n");
+
+ create_alignment_output(1,nseqs);
+ if (showaln && usemenu) show_aln();
+
+ phylip_name[0]=EOS;
+}
+
+
+
+
+
+
+
+
+
+
+void profile_align(char *p1_tree_name,char *p2_tree_name)
+{
+ char path[FILENAMELEN+1];
+ char tree_name[FILENAMELEN+1];
+ char temp[MAXLINE+1];
+ Boolean use_tree1,use_tree2;
+ FILE *tree;
+ sint count,i,j,dscore;
+
+ if(profile1_empty || profile2_empty) {
+ error("No sequences in memory. Load sequences first.");
+ return;
+ }
+
+ get_path(profile1_name,path);
+
+ if(usemenu || !interactive) {
+ if(!open_alignment_output(path)) return;
+ }
+
+ if(reset_alignments_new || reset_alignments_all) {
+ reset_prf1();
+ reset_prf2();
+ }
+ else fix_gaps();
+
+ tree_name[0] = EOS;
+ use_tree1 = FALSE;
+ if (profile1_nseqs >= 2) {
+ if (check_tree && usemenu) {
+ strcpy(tree_name,path);
+ strcat(tree_name,"dnd");
+#ifdef VMS
+ if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL) {
+#else
+ if((tree=fopen(tree_name,"r"))!=NULL) {
+#endif
+ fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 1, %s (y/n) ? [y]: ",
+ tree_name);
+ gets(temp);
+ if(*temp != 'n' && *temp != 'N') {
+ strcpy(p1_tree_name,tree_name);
+ use_tree1 = TRUE;
+ }
+ fclose(tree);
+ }
+ }
+ else if (!usemenu && use_tree1_file) {
+ use_tree1 = TRUE;
+ }
+ }
+ tree_name[0] = EOS;
+ use_tree2 = FALSE;
+ get_path(profile2_name,path);
+ if (nseqs-profile1_nseqs >= 2) {
+ if (check_tree && usemenu) {
+ strcpy(tree_name,path);
+ strcat(tree_name,"dnd");
+#ifdef VMS
+ if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL) {
+#else
+ if((tree=fopen(tree_name,"r"))!=NULL) {
+#endif
+ fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 2, %s (y/n) ? [y]: ",
+ tree_name);
+ gets(temp);
+ if(*temp != 'n' && *temp != 'N') {
+ strcpy(p2_tree_name,tree_name);
+ use_tree2 = TRUE;
+ }
+ fclose(tree);
+ }
+ }
+ else if (!usemenu && use_tree2_file) {
+ use_tree2 = TRUE;
+ }
+ }
+
+ if (save_parameters) create_parameter_output();
+
+ if (struct_penalties1 == SECST)
+
+ calc_gap_penalty_mask(seqlen_array[1],sec_struct_mask1,gap_penalty_mask1);
+
+ if (struct_penalties2 == SECST)
+
+ calc_gap_penalty_mask(seqlen_array[profile1_nseqs+1],sec_struct_mask2,gap_penalty_mask2);
+
+ if (use_tree1 == FALSE)
+ if (profile1_nseqs >= 2) {
+ for (i=1;i<=profile1_nseqs;i++) {
+ for (j=i+1;j<=profile1_nseqs;j++) {
+ dscore = countid(i,j);
+ tmat[i][j] = (100.0 - dscore)/100.0;
+ tmat[j][i] = tmat[i][j];
+ }
+ }
+ get_path(profile1_name,path);
+ if (p1_tree_name[0]!=EOS) {
+ if((tree = open_explicit_file(p1_tree_name))==NULL) return;
+ }
+ else {
+ if((tree = open_output_file(
+ "\nEnter name for new GUIDE TREE file for profile 1 ",path,
+ p1_tree_name,"dnd")) == NULL) return;
+ }
+
+ guide_tree(tree,1,profile1_nseqs);
+ if(seqFormat!=CLUSTALIST)info("Guide tree file created: [%s]",
+ p1_tree_name);
+ }
+ if (use_tree2 == FALSE)
+ if(nseqs-profile1_nseqs >= 2) {
+ for (i=1+profile1_nseqs;i<=nseqs;i++) {
+ for (j=i+1;j<=nseqs;j++) {
+ dscore = countid(i,j);
+ tmat[i][j] = (100.0 - dscore)/100.0;
+ tmat[j][i] = tmat[i][j];
+ }
+ }
+ if (p2_tree_name[0]!=EOS) {
+ if((tree = open_explicit_file(p2_tree_name))==NULL) return;
+ }
+ else {
+ get_path(profile2_name,path);
+ if((tree = open_output_file(
+ "\nEnter name for new GUIDE TREE file for profile 2 ",path,
+ p2_tree_name,"dnd")) == NULL) return;
+ }
+ guide_tree(tree,profile1_nseqs+1,nseqs-profile1_nseqs);
+ if(seqFormat!=CLUSTALIST)info("Guide tree file created: [%s]",
+ p2_tree_name);
+ }
+
+ if (new_tree1_file || new_tree2_file) return;
+
+/* do an initial alignment to get the pairwise identities between the two
+profiles - used to set parameters for the final alignment */
+ count = palign1();
+ if (count == 0) return;
+
+ reset_prf1();
+ reset_prf2();
+
+ count = palign2(p1_tree_name,p2_tree_name);
+
+ if (count == 0) return;
+
+ if(usemenu) fprintf(stdout,"\n\n\n");
+
+ create_alignment_output(1,nseqs);
+ if (showaln && usemenu) show_aln();
+
+ p1_tree_name[0]=EOS;
+ p2_tree_name[0]=EOS;
+}
+
+
+
+
+
+void clustal_out(FILE *clusout, sint fres, sint len, sint fseq, sint lseq)
+{
+ static char *seq1;
+ static sint *seq_no;
+ static sint *print_seq_no;
+ char *ss_mask1, *ss_mask2;
+ char temp[MAXLINE];
+ char c;
+ sint val;
+ sint ii,lv1,catident1[NUMRES],catident2[NUMRES],ident,chunks;
+ sint i,j,k,l;
+ sint pos,ptr;
+ sint line_length;
+
+ if(debug>1) fprintf(stdout, "%d %d %d %d\n", fres, len, fres, lseq); fflush(stdout);
+
+/*
+ stop doing this ...... opens duplicate files in VMS DES
+fclose(clusout);
+if ((clusout=fopen(clustal_outname,"w")) == NULL)
+ {
+ fprintf(stdout,"Error opening %s\n",clustal_outfile);
+ return;
+ }
+*/
+
+ seq_no = (sint *)ckalloc((nseqs+1) * sizeof(sint));
+ print_seq_no = (sint *)ckalloc((nseqs+1) * sizeof(sint));
+ for (i=fseq;i<=lseq;i++)
+ {
+ print_seq_no[i] = seq_no[i] = 0;
+ for(j=1;j<fres;j++) {
+ val = seq_array[i][j];
+ if((val >=0) || (val <=max_aa)) seq_no[i]++;
+ }
+ }
+
+
+ seq1 = (char *)ckalloc((max_aln_length+1) * sizeof(char));
+
+ if (struct_penalties1 == SECST && use_ss1 == TRUE) {
+ ss_mask1 = (char *)ckalloc((seqlen_array[1]+10) * sizeof(char));
+ for (i=0;i<seqlen_array[1];i++)
+ ss_mask1[i] = sec_struct_mask1[i];
+ print_sec_struct_mask(seqlen_array[1],sec_struct_mask1,ss_mask1);
+ }
+ if (struct_penalties2 == SECST && use_ss2 == TRUE) {
+ ss_mask2 = (char *)ckalloc((seqlen_array[profile1_nseqs+1]+10) * sizeof(char));
+ for (i=0;i<seqlen_array[profile1_nseqs+1];i++)
+ ss_mask2[i] = sec_struct_mask2[i];
+ print_sec_struct_mask(seqlen_array[profile1_nseqs+1],sec_struct_mask2,ss_mask2);
+ }
+
+ /* JP */
+ fprintf(clusout,"CLUSTAL %s multiple sequence alignment\n\n",
+ revision_level);
+
+/* decide the line length for this alignment - maximum is LINELENGTH */
+ line_length=PAGEWIDTH-max_names;
+ line_length=line_length-line_length % 10; /* round to a multiple of 10*/
+ if (line_length > LINELENGTH) line_length=LINELENGTH;
+
+ chunks = len/line_length;
+ if(len % line_length != 0)
+ ++chunks;
+
+ for(lv1=1;lv1<=chunks;++lv1) {
+ pos = ((lv1-1)*line_length)+1;
+ ptr = (len<pos+line_length-1) ? len : pos+line_length-1;
+
+ fprintf(clusout,"\n"); fflush(stdout);
+
+ if (output_struct_penalties == 0 || output_struct_penalties == 2) {
+ if (struct_penalties1 == SECST && use_ss1 == TRUE) {
+ for(i=pos;i<=ptr;++i) {
+ val=ss_mask1[i+fres-2];
+ if (val == gap_pos1 || val == gap_pos2)
+ temp[i-pos]='-';
+ else
+ temp[i-pos]=val;
+ }
+ temp[ptr-pos+1]=EOS;
+ fprintf(clusout,"!SS_%-*s %s\n",max_names,ss_name1,temp);
+ }
+ }
+ if (output_struct_penalties == 1 || output_struct_penalties == 2) {
+ if (struct_penalties1 != NONE && use_ss1 == TRUE) {
+ for(i=pos;i<=ptr;++i) {
+ val=gap_penalty_mask1[i+fres-2];
+ if (val == gap_pos1 || val == gap_pos2)
+ temp[i-pos]='-';
+ else
+ temp[i-pos]=val;
+ }
+ temp[ptr-pos+1]=EOS;
+ fprintf(clusout,"!GM_%-*s %s\n",max_names,ss_name1,temp);
+ }
+ }
+ if (output_struct_penalties == 0 || output_struct_penalties == 2) {
+ if (struct_penalties2 == SECST && use_ss2 == TRUE) {
+ for(i=pos;i<=ptr;++i) {
+ val=ss_mask2[i+fres-2];
+ if (val == gap_pos1 || val == gap_pos2)
+ temp[i-pos]='-';
+ else
+ temp[i-pos]=val;
+ }
+ temp[ptr-pos+1]=EOS;
+ fprintf(clusout,"!SS_%-*s %s\n",max_names,ss_name2,temp);
+ }
+ }
+ if (output_struct_penalties == 1 || output_struct_penalties == 2) {
+ if (struct_penalties2 != NONE && use_ss2 == TRUE) {
+ for(i=pos;i<=ptr;++i) {
+ val=gap_penalty_mask2[i+fres-2];
+ if (val == gap_pos1 || val == gap_pos2)
+ temp[i-pos]='-';
+ else
+ temp[i-pos]=val;
+ }
+ temp[ptr-pos+1]=EOS;
+ fprintf(clusout,"!GM_%-*s %s\n",max_names,ss_name2,temp);
+ }
+ }
+ for(ii=fseq;ii<=lseq;++ii) {
+ /* JP: for_aln_list */
+ if(seqFormat==CLUSTALIST) i = ii;
+ else i=output_index[ii];
+ print_seq_no[i] = 0;
+ for(j=pos;j<=ptr;++j) {
+ if (j+fres-1<=seqlen_array[i])
+ val = seq_array[i][j+fres-1];
+ else val = -3;
+ if((val == -3) || (val == 253)) break;
+ else if((val < 0) || (val > max_aa))
+ seq1[j]='-';
+ else {
+ seq1[j]=amino_acid_codes[val];
+ seq_no[i]++;
+ print_seq_no[i]=1;
+ }
+ }
+ for(;j<=ptr;++j) seq1[j]='-';
+ strncpy(temp,&seq1[pos],ptr-pos+1);
+ temp[ptr-pos+1]=EOS;
+ fprintf(clusout,"%-*s %s",max_names+5,names[i],temp);
+ if (cl_seq_numbers && print_seq_no[i])
+ fprintf(clusout," %d",seq_no[i]);
+ fprintf(clusout,"\n");
+ }
+
+
+ for(i=pos;i<=ptr;++i) {
+ seq1[i]=' ';
+ ident=0;
+ for(j=1;res_cat1[j-1]!=NULL;j++) catident1[j-1] = 0;
+ for(j=1;res_cat2[j-1]!=NULL;j++) catident2[j-1] = 0;
+ for(j=fseq;j<=lseq;++j) {
+ if((seq_array[fseq][i+fres-1] >=0) &&
+ (seq_array[fseq][i+fres-1] <= max_aa)) {
+ if(seq_array[fseq][i+fres-1] == seq_array[j][i+fres-1])
+ ++ident;
+ for(k=1;res_cat1[k-1]!=NULL;k++) {
+ for(l=0;(c=res_cat1[k-1][l]);l++) {
+ if (amino_acid_codes[seq_array[j][i+fres-1]]==c)
+ {
+ catident1[k-1]++;
+ break;
+ }
+ }
+ }
+ for(k=1;res_cat2[k-1]!=NULL;k++) {
+ for(l=0;(c=res_cat2[k-1][l]);l++) {
+ if (amino_acid_codes[seq_array[j][i+fres-1]]==c)
+ {
+ catident2[k-1]++;
+ break;
+ }
+ }
+ }
+ }
+ }
+ if(ident==lseq-fseq+1)
+ seq1[i]='*';
+ else if (!dnaflag) {
+ for(k=1;res_cat1[k-1]!=NULL;k++) {
+ if (catident1[k-1]==lseq-fseq+1) {
+ seq1[i]=':';
+ break;
+ }
+ }
+ if(seq1[i]==' ')
+ for(k=1;res_cat2[k-1]!=NULL;k++) {
+ if (catident2[k-1]==lseq-fseq+1) {
+ seq1[i]='.';
+ break;
+ }
+ }
+ }
+ }
+ strncpy(temp,&seq1[pos],ptr-pos+1);
+ temp[ptr-pos+1]=EOS;
+ for(k=0;k<max_names+6;k++) fprintf(clusout," ");
+ fprintf(clusout,"%s\n",temp);
+ }
+
+
+ seq1=ckfree((void *)seq1);
+ if (struct_penalties1 == SECST && use_ss1 == TRUE) ckfree(ss_mask1);
+ if (struct_penalties2 == SECST && use_ss2 == TRUE) ckfree(ss_mask2);
+/* DES ckfree(output_index); */
+
+}
+
+
+
+
+/* JP: for_align_list */
+/* only the first sequence in each subalignments are printed */
+
+/* JP: for_align_list */
+extern int *seqnumlist;
+extern int filecount;
+extern int nseqs_all;
+extern int *seqlen_array_all;
+extern char **seq_array_all; /* for all the sequences */
+extern char **names_all;
+extern int max_names;
+extern sint seqFormat;
+
+
+void gcg_out_first(FILE *gcgout, sint fres, sint len, sint fseq, sint lseq)
+{
+/* static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/
+/* static char *nbases = "XACGT"; */
+ char *seq, residue;
+ sint val;
+ sint *all_checks;
+ sint i,ii,chunks,block;
+ sint j,k,pos1,pos2;
+ long grand_checksum;
+
+ /* JP: for_align_list */
+ int *mark;
+ int tmp=1;
+ mark = (int *) ckalloc((nseqs+1) * sizeof(int) );
+ for(i=1;i<=nseqs;i++) mark[i] = 0;
+ for(i=1;i<filecount;i++) {
+ mark[tmp] = 1;
+ tmp+= seqnumlist[i];
+ }
+ mark[tmp] = 1;
+
+ seq = (char *)ckalloc((max_aln_length+1) * sizeof(char));
+ all_checks = (sint *)ckalloc((lseq+1) * sizeof(sint));
+
+ for(i=fseq; i<=lseq; i++) {
+ /* JP: for_align_list */
+ if(mark[i]==0) continue;
+ for(j=fres; j<=fres+len-1; j++) {
+ val = seq_array[i][j];
+ if((val == -3) || (val == 253)) break;
+ else if((val < 0) || (val > max_aa))
+ residue = '.';
+ else {
+ residue = amino_acid_codes[val];
+ }
+ seq[j-fres+1] = residue;
+ }
+/* pad any short sequences with gaps, to make all sequences the same length */
+ for(; j<=fres+len-1; j++)
+ seq[j-fres+1] = '.';
+ all_checks[i] = SeqGCGCheckSum(seq+1, (int)len);
+ }
+
+ grand_checksum = 0;
+ for(i=1; i<=nseqs; i++) grand_checksum += all_checks[output_index[i]];
+ grand_checksum = grand_checksum % 10000;
+ fprintf(gcgout,"PileUp\n\n");
+ fprintf(gcgout,"\n\n MSF:%5d Type: ",(pint)len);
+ if(dnaflag)
+ fprintf(gcgout,"N");
+ else
+ fprintf(gcgout,"P");
+ fprintf(gcgout," Check:%6ld .. \n\n", (long)grand_checksum);
+ for(ii=fseq; ii<=lseq; ii++) {
+ /* JP: for_align_list */
+ /* i = output_index[ii]; */
+ i = ii;
+ if(mark[i]==0) continue;
+/* for(j=0; j<max_names; j++)
+ if(names[i][j] == ' ') names[i][j] = '_'; */
+ fprintf(gcgout,
+ " Name: %s oo Len:%5d Check:%6ld Weight: %.1f\n",
+ names[i],(pint)len,(long)all_checks[i],(float)seq_weight[i-1]*100.0/(float)INT_SCALE_FACTOR);
+ }
+ fprintf(gcgout,"\n//\n");
+
+ chunks = len/GCG_LINELENGTH;
+ if(len % GCG_LINELENGTH != 0) ++chunks;
+ for(block=1; block<=chunks; block++) {
+ fprintf(gcgout,"\n\n");
+ pos1 = ((block-1) * GCG_LINELENGTH) + 1;
+ pos2 = (len<pos1+GCG_LINELENGTH-1)? len : pos1+GCG_LINELENGTH-1;
+ for(ii=fseq; ii<=lseq; ii++) {
+ /* JP: for_align_list */
+ /*i = output_index[ii]; */
+ i = ii;
+ if(mark[i]==0) continue;
+ fprintf(gcgout,"\n%-*s ",max_names+5,names[i]);
+ for(j=pos1, k=1; j<=pos2; j++, k++) {
+/*
+ JULIE -
+ check for sint sequences - pad out with '.' characters to end of alignment
+*/
+ if (j+fres-1<=seqlen_array[i])
+ val = seq_array[i][j+fres-1];
+ else val = -3;
+ if((val == -3) || (val == 253))
+ residue = '.';
+ else if((val < 0) || (val > max_aa))
+ residue = '.';
+ else {
+ residue = amino_acid_codes[val];
+ }
+ fprintf(gcgout,"%c",residue);
+ if(j % 10 == 0) fprintf(gcgout," ");
+ }
+ }
+ }
+/* DES ckfree(output_index); */
+
+ seq=ckfree((void *)seq);
+ all_checks=ckfree((void *)all_checks);
+ mark=ckfree((void*)mark);
+
+ fprintf(gcgout,"\n\n");
+}
+
+
+
+void gcg_out(FILE *gcgout, sint fres, sint len, sint fseq, sint lseq)
+{
+/* static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/
+/* static char *nbases = "XACGT"; */
+ char *seq, residue;
+ sint val;
+ sint *all_checks;
+ sint i,ii,chunks,block;
+ sint j,k,pos1,pos2;
+ long grand_checksum;
+
+ seq = (char *)ckalloc((max_aln_length+1) * sizeof(char));
+ all_checks = (sint *)ckalloc((lseq+1) * sizeof(sint));
+
+ for(i=fseq; i<=lseq; i++) {
+ for(j=fres; j<=fres+len-1; j++) {
+ val = seq_array[i][j];
+ if((val == -3) || (val == 253)) break;
+ else if((val < 0) || (val > max_aa))
+ residue = '.';
+ else {
+ residue = amino_acid_codes[val];
+ }
+ seq[j-fres+1] = residue;
+ }
+/* pad any short sequences with gaps, to make all sequences the same length */
+ for(; j<=fres+len-1; j++)
+ seq[j-fres+1] = '.';
+ all_checks[i] = SeqGCGCheckSum(seq+1, (int)len);
+ }
+
+ grand_checksum = 0;
+ for(i=1; i<=nseqs; i++) grand_checksum += all_checks[output_index[i]];
+ grand_checksum = grand_checksum % 10000;
+ fprintf(gcgout,"PileUp\n\n");
+ fprintf(gcgout,"\n\n MSF:%5d Type: ",(pint)len);
+ if(dnaflag)
+ fprintf(gcgout,"N");
+ else
+ fprintf(gcgout,"P");
+ fprintf(gcgout," Check:%6ld .. \n\n", (long)grand_checksum);
+ for(ii=fseq; ii<=lseq; ii++) {
+ /* JP: for_align_list */
+ if(seqFormat==CLUSTALIST) i = ii;
+ else i = output_index[ii];
+/* for(j=0; j<max_names; j++)
+ if(names[i][j] == ' ') names[i][j] = '_'; */
+ fprintf(gcgout,
+ " Name: %s oo Len:%5d Check:%6ld Weight: %.1f\n",
+ names[i],(pint)len,(long)all_checks[i],(float)seq_weight[i-1]*100.0/(float)INT_SCALE_FACTOR);
+ }
+ fprintf(gcgout,"\n//\n");
+
+ chunks = len/GCG_LINELENGTH;
+ if(len % GCG_LINELENGTH != 0) ++chunks;
+ for(block=1; block<=chunks; block++) {
+ fprintf(gcgout,"\n\n");
+ pos1 = ((block-1) * GCG_LINELENGTH) + 1;
+ pos2 = (len<pos1+GCG_LINELENGTH-1)? len : pos1+GCG_LINELENGTH-1;
+ for(ii=fseq; ii<=lseq; ii++) {
+ /* JP: for_align_list */
+ if(seqFormat==CLUSTALIST) i = ii;
+ else i = output_index[ii];
+ fprintf(gcgout,"\n%-*s ",max_names+5,names[i]);
+ for(j=pos1, k=1; j<=pos2; j++, k++) {
+/*
+ JULIE -
+ check for sint sequences - pad out with '.' characters to end of alignment
+*/
+ if (j+fres-1<=seqlen_array[i])
+ val = seq_array[i][j+fres-1];
+ else val = -3;
+ if((val == -3) || (val == 253))
+ residue = '.';
+ else if((val < 0) || (val > max_aa))
+ residue = '.';
+ else {
+ residue = amino_acid_codes[val];
+ }
+ fprintf(gcgout,"%c",residue);
+ if(j % 10 == 0) fprintf(gcgout," ");
+ }
+ }
+ }
+/* DES ckfree(output_index); */
+
+ seq=ckfree((void *)seq);
+ all_checks=ckfree((void *)all_checks);
+
+ fprintf(gcgout,"\n\n");
+}
+
+
+
+void nexus_out(FILE *nxsout, sint fres, sint len, sint fseq, sint lseq)
+{
+/* static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/
+/* static char *nbases = "XACGT"; */
+ char residue;
+ sint val;
+ sint i,ii,chunks,block;
+ sint j,k,pos1,pos2;
+
+ chunks = len/GCG_LINELENGTH;
+ if(len % GCG_LINELENGTH != 0) ++chunks;
+
+ fprintf(nxsout,"#NEXUS\n");
+
+ fprintf(nxsout,"BEGIN DATA;\n");
+ fprintf(nxsout,"dimensions ntax=%d nchar=%d;\n",(pint)nseqs,(pint)len);
+ fprintf(nxsout,"format missing=?\n");
+ fprintf(nxsout,"symbols=\"");
+ for(i=0;i<=max_aa;i++)
+ fprintf(nxsout,"%c",amino_acid_codes[i]);
+ fprintf(nxsout,"\"\n");
+ fprintf(nxsout,"interleave datatype=");
+ fprintf(nxsout, dnaflag ? "DNA " : "PROTEIN ");
+ fprintf(nxsout,"gap= -;\n");
+ fprintf(nxsout,"\nmatrix");
+
+ for(block=1; block<=chunks; block++) {
+ pos1 = ((block-1) * GCG_LINELENGTH)+1;
+ pos2 = (len<pos1+GCG_LINELENGTH-1)? len : pos1+GCG_LINELENGTH-1;
+ for(ii=fseq; ii<=lseq; ii++) {
+ i = output_index[ii];
+ fprintf(nxsout,"\n%-*s ",max_names+1,names[i]);
+ for(j=pos1, k=1; j<=pos2; j++, k++) {
+ if (j+fres-1<=seqlen_array[i])
+ val = seq_array[i][j+fres-1];
+ else val = -3;
+ if((val == -3) || (val == 253))
+ break;
+ else if((val < 0) || (val > max_aa))
+ residue = '-';
+ else {
+ residue = amino_acid_codes[val];
+ }
+ fprintf(nxsout,"%c",residue);
+ }
+ }
+ fprintf(nxsout,"\n");
+ }
+ fprintf(nxsout,";\nend;\n");
+/* DES ckfree(output_index); */
+
+}
+
+
+
+
+void phylip_out(FILE *phyout, sint fres, sint len, sint fseq, sint lseq)
+{
+/* static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/
+/* static char *nbases = "XACGT"; */
+ char residue;
+ sint val;
+ sint i,ii,chunks,block;
+ sint j,k,pos1,pos2;
+ sint name_len;
+ Boolean warn;
+ char **snames;
+
+ snames=(char **)ckalloc((lseq-fseq+2)*sizeof(char *));
+ name_len=0;
+ for(i=fseq; i<=lseq; i++) {
+ snames[i]=(char *)ckalloc((11)*sizeof(char));
+ ii=strlen(names[i]);
+ strncpy(snames[i],names[i],10);
+ if(name_len<ii) name_len=ii;
+ }
+ if(name_len>10) {
+ warn=FALSE;
+ for(i=fseq; i<=lseq; i++) {
+ for(j=i+1;j<=lseq;j++) {
+ if (strcmp(snames[i],snames[j]) == 0)
+ warn=TRUE;
+ }
+ }
+ if(warn)
+ warning("Truncating sequence names to 10 characters for PHYLIP output.\n"
+ "Names in the PHYLIP format file are NOT unambiguous.");
+ else
+ warning("Truncating sequence names to 10 characters for PHYLIP output.");
+ }
+
+
+ chunks = len/GCG_LINELENGTH;
+ if(len % GCG_LINELENGTH != 0) ++chunks;
+
+ fprintf(phyout,"%6d %6d",(pint)nseqs,(pint)len);
+
+ for(block=1; block<=chunks; block++) {
+ pos1 = ((block-1) * GCG_LINELENGTH)+1;
+ pos2 = (len<pos1+GCG_LINELENGTH-1)? len : pos1+GCG_LINELENGTH-1;
+ for(ii=fseq; ii<=lseq; ii++) {
+ i = output_index[ii];
+ if(block == 1) {
+ fprintf(phyout,"\n%-10s ",snames[i]);
+ }
+ else
+ fprintf(phyout,"\n ");
+ for(j=pos1, k=1; j<=pos2; j++, k++) {
+ if (j+fres-1<=seqlen_array[i])
+ val = seq_array[i][j+fres-1];
+ else val = -3;
+ if((val == -3) || (val == 253))
+ break;
+ else if((val < 0) || (val > max_aa))
+ residue = '-';
+ else {
+ residue = amino_acid_codes[val];
+ }
+ fprintf(phyout,"%c",residue);
+ if(j % 10 == 0) fprintf(phyout," ");
+ }
+ }
+ fprintf(phyout,"\n");
+ }
+/* DES ckfree(output_index); */
+
+ for(i=fseq;i<=lseq;i++)
+ ckfree(snames[i]);
+ ckfree(snames);
+}
+
+
+
+
+
+void nbrf_out(FILE *nbout, sint fres, sint len, sint fseq, sint lseq)
+{
+/* static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/
+/* static char *nbases = "XACGT"; */
+ char *seq, residue;
+ sint val;
+ sint i,ii;
+ sint j,slen;
+ sint line_length;
+
+ seq = (char *)ckalloc((max_aln_length+1) * sizeof(char));
+
+/* decide the line length for this alignment - maximum is LINELENGTH */
+ line_length=PAGEWIDTH-max_names;
+ line_length=line_length-line_length % 10; /* round to a multiple of 10*/
+ if (line_length > LINELENGTH) line_length=LINELENGTH;
+
+ for(ii=fseq; ii<=lseq; ii++) {
+ i = output_index[ii];
+ fprintf(nbout, dnaflag ? ">DL;" : ">P1;");
+ fprintf(nbout, "%s\n%s\n", names[i], titles[i]);
+ slen = 0;
+ for(j=fres; j<fres+len; j++) {
+ val = seq_array[i][j];
+ if((val == -3) || (val == 253))
+ break;
+ else if((val < 0) || (val > max_aa))
+ residue = '-';
+ else {
+ residue = amino_acid_codes[val];
+ }
+ seq[j-fres] = residue;
+ slen++;
+ }
+ for(j=1; j<=slen; j++) {
+ fprintf(nbout,"%c",seq[j-1]);
+ if((j % line_length == 0) || (j == slen))
+ fprintf(nbout,"\n");
+ }
+ fprintf(nbout,"*\n");
+ }
+/* DES ckfree(output_index); */
+
+ seq=ckfree((void *)seq);
+}
+
+
+void gde_out(FILE *gdeout, sint fres, sint len, sint fseq, sint lseq)
+{
+/* static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/
+/* static char *nbases = "XACGT"; */
+ char *seq, residue;
+ sint val;
+ char *ss_mask1, *ss_mask2;
+ sint i,ii;
+ sint j,slen;
+ sint line_length;
+
+ seq = (char *)ckalloc((max_aln_length+1) * sizeof(char));
+
+/* decide the line length for this alignment - maximum is LINELENGTH */
+ line_length=PAGEWIDTH-max_names;
+ line_length=line_length-line_length % 10; /* round to a multiple of 10*/
+ if (line_length > LINELENGTH) line_length=LINELENGTH;
+
+ if (struct_penalties1 == SECST && use_ss1 == TRUE) {
+ ss_mask1 = (char *)ckalloc((seqlen_array[1]+10) * sizeof(char));
+ for (i=0;i<seqlen_array[1];i++)
+ ss_mask1[i] = sec_struct_mask1[i];
+ print_sec_struct_mask(seqlen_array[1],sec_struct_mask1,ss_mask1);
+ }
+ if (struct_penalties2 == SECST && use_ss2 == TRUE) {
+ ss_mask2 = (char *)ckalloc((seqlen_array[profile1_nseqs+1]+10) *
+sizeof(char));
+ for (i=0;i<seqlen_array[profile1_nseqs+1];i++)
+ ss_mask2[i] = sec_struct_mask2[i];
+ print_sec_struct_mask(seqlen_array[profile1_nseqs+1],sec_struct_mask2,ss_mask2);
+
+
+ }
+
+
+ for(ii=fseq; ii<=lseq; ii++) {
+ i = output_index[ii];
+ fprintf(gdeout, dnaflag ? "#" : "%%");
+ fprintf(gdeout, "%s\n", names[i]);
+ slen = 0;
+ for(j=fres; j<fres+len; j++) {
+ val = seq_array[i][j];
+ if((val == -3) || (val == 253))
+ break;
+ else if((val < 0) || (val > max_aa))
+ residue = '-';
+ else {
+ residue = amino_acid_codes[val];
+ }
+ if (lowercase)
+ seq[j-fres] = (char)tolower((int)residue);
+ else
+ seq[j-fres] = residue;
+ slen++;
+ }
+ for(j=1; j<=slen; j++) {
+ fprintf(gdeout,"%c",seq[j-1]);
+ if((j % line_length == 0) || (j == slen))
+ fprintf(gdeout,"\n");
+ }
+ }
+/* DES ckfree(output_index); */
+
+ if (output_struct_penalties == 0 || output_struct_penalties == 2) {
+ if (struct_penalties1 == SECST && use_ss1 == TRUE) {
+ fprintf(gdeout,"\"SS_%-*s\n",max_names,ss_name1);
+ for(i=fres; i<fres+len; i++) {
+ val=ss_mask1[i-1];
+ if (val == gap_pos1 || val == gap_pos2)
+ seq[i-fres]='-';
+ else
+ seq[i-fres]=val;
+ }
+ seq[i-fres]=EOS;
+ for(i=1; i<=len; i++) {
+ fprintf(gdeout,"%c",seq[i-1]);
+ if((i % line_length == 0) || (i == len))
+ fprintf(gdeout,"\n");
+ }
+ }
+
+ if (struct_penalties2 == SECST && use_ss2 == TRUE) {
+ fprintf(gdeout,"\"SS_%-*s\n",max_names,ss_name2);
+ for(i=fres; i<fres+len; i++) {
+ val=ss_mask2[i-1];
+ if (val == gap_pos1 || val == gap_pos2)
+ seq[i-fres]='-';
+ else
+ seq[i-fres]=val;
+ }
+ seq[i]=EOS;
+ for(i=1; i<=len; i++) {
+ fprintf(gdeout,"%c",seq[i-1]);
+ if((i % line_length == 0) || (i == len))
+ fprintf(gdeout,"\n");
+ }
+ }
+ }
+ if (output_struct_penalties == 1 || output_struct_penalties == 2) {
+ if (struct_penalties1 != NONE && use_ss1 == TRUE) {
+ fprintf(gdeout,"\"GM_%-*s\n",max_names,ss_name1);
+ for(i=fres; i<fres+len; i++) {
+ val=gap_penalty_mask1[i-1];
+ if (val == gap_pos1 || val == gap_pos2)
+ seq[i-fres]='-';
+ else
+ seq[i-fres]=val;
+ }
+ seq[i]=EOS;
+ for(i=1; i<=len; i++) {
+ fprintf(gdeout,"%c",seq[i-1]);
+ if((i % line_length == 0) || (i == len))
+ fprintf(gdeout,"\n");
+ }
+ }
+
+ if (struct_penalties2 != NONE && use_ss2 == TRUE) {
+ fprintf(gdeout,"\"GM_%-*s\n",max_names,ss_name2);
+ for(i=fres; i<fres+len; i++) {
+ val=gap_penalty_mask2[i-1];
+ if (val == gap_pos1 || val == gap_pos2)
+ seq[i-fres]='-';
+ else
+ seq[i-fres]=val;
+ }
+ seq[i]=EOS;
+ for(i=1; i<=len; i++) {
+ fprintf(gdeout,"%c",seq[i-1]);
+ if((i % line_length == 0) || (i == len))
+ fprintf(gdeout,"\n");
+ }
+ }
+ }
+
+ if (struct_penalties1 == SECST && use_ss1 == TRUE) ckfree(ss_mask1);
+ if (struct_penalties2 == SECST && use_ss2 == TRUE) ckfree(ss_mask2);
+
+ seq=ckfree((void *)seq);
+}
+
+
+Boolean open_alignment_output(char *path)
+{
+
+ if(!output_clustal && !output_nbrf && !output_gcg &&
+ !output_phylip && !output_gde && !output_nexus) {
+ error("You must select an alignment output format");
+ return FALSE;
+ }
+
+ if(output_clustal)
+ if (outfile_name[0]!=EOS) {
+ strcpy(clustal_outname,outfile_name);
+ /* fprintf(stdout, "%s, %s\n", clustal_outfile, outfile_name); fflush(stdout);*/
+ if((clustal_outfile = open_explicit_file(
+ clustal_outname))==NULL) return FALSE;
+ }
+ else {
+/*DES DEBUG
+fprintf(stdout,"\n\n path = %s\n clustal_outname = %s\n\n",
+path,clustal_outname);
+*/
+ if((clustal_outfile = open_output_file(
+ "\nEnter a name for the CLUSTAL output file ",path,
+ clustal_outname,"aln"))==NULL) return FALSE;
+/* DES DEBUG
+fprintf(stdout,"\n\n path = %s\n clustal_outname = %s\n\n",
+path,clustal_outname);
+*/
+ }
+ if(output_nbrf)
+ if (outfile_name[0]!=EOS) {
+ strcpy(nbrf_outname,outfile_name);
+ if((nbrf_outfile = open_explicit_file(
+ nbrf_outname))==NULL) return FALSE;
+ }
+ else
+ if((nbrf_outfile = open_output_file(
+ "\nEnter a name for the NBRF/PIR output file",path,
+ nbrf_outname,"pir"))==NULL) return FALSE;
+ if(output_gcg)
+ if (outfile_name[0]!=EOS) {
+ strcpy(gcg_outname,outfile_name);
+ if((gcg_outfile = open_explicit_file(
+ gcg_outname))==NULL) return FALSE;
+ }
+ else
+ if((gcg_outfile = open_output_file(
+ "\nEnter a name for the GCG output file ",path,
+ gcg_outname,"msf"))==NULL) return FALSE;
+ if(output_phylip)
+ if (outfile_name[0]!=EOS) {
+ strcpy(phylip_outname,outfile_name);
+ if((phylip_outfile = open_explicit_file(
+ phylip_outname))==NULL) return FALSE;
+ }
+ else
+ if((phylip_outfile = open_output_file(
+ "\nEnter a name for the PHYLIP output file ",path,
+ phylip_outname,"phy"))==NULL) return FALSE;
+ if(output_gde)
+ if (outfile_name[0]!=EOS) {
+ strcpy(gde_outname,outfile_name);
+ if((gde_outfile = open_explicit_file(
+ gde_outname))==NULL) return FALSE;
+ }
+ else
+ if((gde_outfile = open_output_file(
+ "\nEnter a name for the GDE output file ",path,
+ gde_outname,"gde"))==NULL) return FALSE;
+ if(output_nexus)
+ if (outfile_name[0]!=EOS) {
+ strcpy(nexus_outname,outfile_name);
+ if((nexus_outfile = open_explicit_file(
+ nexus_outname))==NULL) return FALSE;
+ }
+ else
+ if((nexus_outfile = open_output_file(
+ "\nEnter a name for the NEXUS output file ",path,
+ nexus_outname,"nxs"))==NULL) return FALSE;
+ return TRUE;
+}
+
+
+
+
+
+void create_alignment_output(sint fseq, sint lseq)
+{
+ sint i,length;
+
+
+ length=0;
+ for (i=fseq;i<=lseq;i++)
+ if (length < seqlen_array[i])
+ length = seqlen_array[i];
+ if (usemenu) info("Consensus length = %d",(pint)length);
+
+ if(output_clustal) {
+ clustal_out(clustal_outfile, 1, length, fseq, lseq);
+ fclose(clustal_outfile);
+ info("CLUSTAL-Alignment file created [%s]",clustal_outname);
+ }
+ if(output_nbrf) {
+ nbrf_out(nbrf_outfile, 1, length, fseq, lseq);
+ fclose(nbrf_outfile);
+ info("NBRF/PIR-Alignment file created [%s]",nbrf_outname);
+ }
+ if(output_gcg) {
+ //fprintf(stdout, "%d %d\n", outputfirst,seqFormat);
+ if(outputfirst && (seqFormat==CLUSTALIST) ) {
+ fprintf(stdout, "output first sequence only\n");
+ gcg_out_first(gcg_outfile, 1, length, fseq, lseq);
+ }
+ else gcg_out(gcg_outfile, 1, length, fseq, lseq);
+ fclose(gcg_outfile);
+ info("GCG-Alignment file created [%s]",gcg_outname);
+ }
+ if(output_phylip) {
+ phylip_out(phylip_outfile, 1, length, fseq, lseq);
+ fclose(phylip_outfile);
+ info("PHYLIP-Alignment file created [%s]",phylip_outname);
+ }
+ if(output_gde) {
+ gde_out(gde_outfile, 1, length, fseq, lseq);
+ fclose(gde_outfile);
+ info("GDE-Alignment file created [%s]",gde_outname);
+ }
+ if(output_nexus) {
+ nexus_out(nexus_outfile, 1, length, fseq, lseq);
+ fclose(nexus_outfile);
+ info("NEXUS-Alignment file created [%s]",nexus_outname);
+ }
+}
+
+
+
+
+
+
+
+static void reset_align(void) /* remove gaps from older alignments (code =
+gap_pos1) */
+{ /* EXCEPT for gaps that were INPUT with the seqs.*/
+ register sint sl; /* which have code = gap_pos2 */
+ sint i,j;
+
+ for(i=1;i<=nseqs;++i) {
+ sl=0;
+ for(j=1;j<=seqlen_array[i];++j) {
+ if(seq_array[i][j] == gap_pos1 && (reset_alignments_new ||
+reset_alignments_all)) continue;
+ if(seq_array[i][j] == gap_pos2 && (reset_alignments_all)) continue;
+ ++sl;
+ seq_array[i][sl]=seq_array[i][j];
+ }
+ seqlen_array[i]=sl;
+ }
+
+
+}
+
+
+
+static void reset_prf1(void) /* remove gaps from older alignments (code =
+gap_pos1) */
+{ /* EXCEPT for gaps that were INPUT with the seqs.*/
+ register sint sl; /* which have code = gap_pos2 */
+ sint i,j;
+
+ if (struct_penalties1 != NONE) {
+ sl=0;
+ for (j=0;j<seqlen_array[1];++j) {
+ if (gap_penalty_mask1[j] == gap_pos1 && (reset_alignments_new ||
+reset_alignments_all)) continue;
+ if (gap_penalty_mask1[j] == gap_pos2 && (reset_alignments_all)) continue;
+ gap_penalty_mask1[sl]=gap_penalty_mask1[j];
+ ++sl;
+ }
+ }
+
+ if (struct_penalties1 == SECST) {
+ sl=0;
+ for (j=0;j<seqlen_array[1];++j) {
+ if (sec_struct_mask1[j] == gap_pos1 && (reset_alignments_new ||
+reset_alignments_all)) continue;
+ if (sec_struct_mask1[j] == gap_pos2 && (reset_alignments_all)) continue;
+ sec_struct_mask1[sl]=sec_struct_mask1[j];
+ ++sl;
+ }
+ }
+
+ for(i=1;i<=profile1_nseqs;++i) {
+ sl=0;
+ for(j=1;j<=seqlen_array[i];++j) {
+ if(seq_array[i][j] == gap_pos1 && (reset_alignments_new ||
+reset_alignments_all)) continue;
+ if(seq_array[i][j] == gap_pos2 && (reset_alignments_all)) continue;
+ ++sl;
+ seq_array[i][sl]=seq_array[i][j];
+ }
+ seqlen_array[i]=sl;
+ }
+
+
+}
+
+
+
+static void reset_prf2(void) /* remove gaps from older alignments (code =
+gap_pos1) */
+{ /* EXCEPT for gaps that were INPUT with the seqs.*/
+ register sint sl; /* which have code = gap_pos2 */
+ sint i,j;
+
+ if (struct_penalties2 != NONE) {
+ sl=0;
+ for (j=0;j<seqlen_array[profile1_nseqs+1];++j) {
+ if (gap_penalty_mask2[j] == gap_pos1 && (reset_alignments_new ||
+reset_alignments_all)) continue;
+ if (gap_penalty_mask2[j] == gap_pos2 && (reset_alignments_all)) continue;
+ gap_penalty_mask2[sl]=gap_penalty_mask2[j];
+ ++sl;
+ }
+ }
+
+ if (struct_penalties2 == SECST) {
+ sl=0;
+ for (j=0;j<seqlen_array[profile1_nseqs+1];++j) {
+ if (sec_struct_mask2[j] == gap_pos1 && (reset_alignments_new ||
+reset_alignments_all)) continue;
+ if (sec_struct_mask2[j] == gap_pos2 && (reset_alignments_all)) continue;
+ sec_struct_mask2[sl]=sec_struct_mask2[j];
+ ++sl;
+ }
+ }
+
+ for(i=profile1_nseqs+1;i<=nseqs;++i) {
+ sl=0;
+ for(j=1;j<=seqlen_array[i];++j) {
+ if(seq_array[i][j] == gap_pos1 && (reset_alignments_new ||
+reset_alignments_all)) continue;
+ if(seq_array[i][j] == gap_pos2 && (reset_alignments_all)) continue;
+ ++sl;
+ seq_array[i][sl]=seq_array[i][j];
+ }
+ seqlen_array[i]=sl;
+ }
+
+
+}
+
+
+
+void fix_gaps(void) /* fix gaps introduced in older alignments (code = gap_pos1) */
+{
+ sint i,j;
+
+ if (struct_penalties1 != NONE) {
+ for (j=0;j<seqlen_array[1];++j) {
+ if (gap_penalty_mask1[j] == gap_pos1)
+ gap_penalty_mask1[j]=gap_pos2;
+ }
+ }
+
+ if (struct_penalties1 == SECST) {
+ for (j=0;j<seqlen_array[1];++j) {
+ if (sec_struct_mask1[j] == gap_pos1)
+ sec_struct_mask1[j]=gap_pos2;
+ }
+ }
+
+ for(i=1;i<=nseqs;++i) {
+ for(j=1;j<=seqlen_array[i];++j) {
+ if(seq_array[i][j] == gap_pos1)
+ seq_array[i][j]=gap_pos2;
+ }
+ }
+}
+
+static sint find_match(char *probe, char *list[], sint n)
+{
+ sint i,j,len;
+ sint count,match=0;
+
+ len = (sint)strlen(probe);
+ for (i=0;i<len;i++) {
+ count = 0;
+ for (j=0;j<n;j++) {
+ if (probe[i] == list[j][i]) {
+ match = j;
+ count++;
+ }
+ }
+ if (count == 0) return((sint)-1);
+ if (count == 1) return(match);
+ }
+ return((sint)-1);
+}
+
+static void create_parameter_output(void)
+{
+ char parname[FILENAMELEN+1], temp[FILENAMELEN+1];
+ char path[FILENAMELEN+1];
+ FILE *parout;
+
+ get_path(seqname,path);
+ strcpy(parname,path);
+ strcat(parname,"par");
+
+ if(usemenu) {
+ fprintf(stdout,"\nEnter a name for the parameter output file [%s]: ",
+ parname);
+ gets(temp);
+ if(*temp != EOS)
+ strcpy(parname,temp);
+ }
+
+/* create a file with execute permissions first */
+ remove(parname);
+/*
+ fd = creat(parname, 0777);
+ close(fd);
+*/
+
+ if((parout = open_explicit_file(parname))==NULL) return;
+
+ fprintf(parout,"clustalw \\\n");
+ if (!empty && profile1_empty) fprintf(parout,"-infile=%s \\\n",seqname);
+ if (!profile1_empty) fprintf(parout,"-profile1=%s\\\n",profile1_name);
+ if (!profile2_empty) fprintf(parout,"-profile2=%s \\\n",profile2_name);
+ if (dnaflag == TRUE) fprintf(parout,"-type=dna \\\n");
+ else fprintf(parout,"-type=protein \\\n");
+
+ if (quick_pairalign) {
+ fprintf(parout,"-quicktree \\\n");
+ fprintf(parout,"-ktuple=%d \\\n",(pint)ktup);
+ fprintf(parout,"-window=%d \\\n",(pint)window);
+ fprintf(parout,"-pairgap=%d \\\n",(pint)wind_gap);
+ fprintf(parout,"-topdiags=%d \\\n",(pint)signif);
+ if (percent) fprintf(parout,"-score=percent \\\n");
+ else fprintf(parout,"-score=absolute \\\n");
+ }
+ else {
+ if (!dnaflag) {
+ fprintf(parout,"-pwmatrix=%s \\\n",pw_mtrxname);
+ fprintf(parout,"-pwgapopen=%.2f \\\n",prot_pw_go_penalty);
+ fprintf(parout,"-pwgapext=%.2f \\\n",prot_pw_ge_penalty);
+ }
+ else {
+ fprintf(parout,"-pwgapopen=%.2f \\\n",pw_go_penalty);
+ fprintf(parout,"-pwgapext=%.2f \\\n",pw_ge_penalty);
+ }
+ }
+
+ if (!dnaflag) {
+ fprintf(parout,"-matrix=%s \\\n",mtrxname);
+ fprintf(parout,"-gapopen=%.2f \\\n",prot_gap_open);
+ fprintf(parout,"-gapext=%.2f \\\n",prot_gap_extend);
+ }
+ else {
+ fprintf(parout,"-gapopen=%.2f \\\n",dna_gap_open);
+ fprintf(parout,"-gapext=%.2f \\\n",dna_gap_extend);
+ }
+
+ fprintf(parout,"-maxdiv=%d \\\n",(pint)divergence_cutoff);
+ if (!use_endgaps) fprintf(parout,"-endgaps \\\n");
+
+ if (!dnaflag) {
+ if (neg_matrix) fprintf(parout,"-negative \\\n");
+ if (no_pref_penalties) fprintf(parout,"-nopgap \\\n");
+ if (no_hyd_penalties) fprintf(parout,"-nohgap \\\n");
+ if (no_var_penalties) fprintf(parout,"-novgap \\\n");
+ fprintf(parout,"-hgapresidues=%s \\\n",hyd_residues);
+ fprintf(parout,"-gapdist=%d \\\n",(pint)gap_dist);
+ }
+ else {
+ fprintf(parout,"-transweight=%.2f \\\n",transition_weight);
+ }
+
+ if (output_gcg) fprintf(parout,"-output=gcg \\\n");
+ else if (output_gde) fprintf(parout,"-output=gde \\\n");
+ else if (output_nbrf) fprintf(parout,"-output=pir \\\n");
+ else if (output_phylip) fprintf(parout,"-output=phylip \\\n");
+ else if (output_nexus) fprintf(parout,"-output=nexus \\\n");
+ if (outfile_name[0]!=EOS) fprintf(parout,"-outfile=%s \\\n",outfile_name);
+ if (output_order==ALIGNED) fprintf(parout,"-outorder=aligned \\\n");
+ else fprintf(parout,"-outorder=input \\\n");
+ if (output_gde)
+ if (lowercase) fprintf(parout,"-case=lower \\\n");
+ else fprintf(parout,"-case=upper \\\n");
+
+
+ fprintf(parout,"-interactive\n");
+
+/*
+ if (kimura) fprintf(parout,"-kimura \\\n");
+ if (tossgaps) fprintf(parout,"-tossgaps \\\n");
+ fprintf(parout,"-seed=%d \\\n",(pint)boot_ran_seed);
+ fprintf(parout,"-bootstrap=%d \\\n",(pint)boot_ntrials);
+*/
+ fclose(parout);
+}
+
+
+/* JP: print help */
+void print_help()
+{
+
+ fprintf(stdout,"\n");
+ fprintf(stdout,"PCMA - Profile Consistency Multiple sequence Alignment\n");
+ fprintf(stdout,"\n");
+ fprintf(stdout,"1.A quick start\n");
+ fprintf(stdout," To align a sequence set in fasta format, use the following command:\n");
+ fprintf(stdout," pcma <target_sequences>\n");
+ fprintf(stdout," Two output files will be generated:\n");
+ fprintf(stdout," <target_sequences>.aln - A multiple sequence alignment in clustal format\n");
+ fprintf(stdout," <target_sequences>.dnd - A dendrogram in phylip format \n");
+ fprintf(stdout,"\n");
+ fprintf(stdout,"2.Usage: pcma <target_sequences> <options> \n");
+ fprintf(stdout," The first command line argument <target_sequences> should be the name of the \n");
+ fprintf(stdout," file containing FASTA format sequences. One IMPORTANT notice is that the \n");
+ fprintf(stdout," sequences should not contain gap characters in them, otherwise the results \n");
+ fprintf(stdout," might be incorrect.\n");
+ fprintf(stdout," \n");
+ fprintf(stdout," Options are in the format of -optionName or -optionName=option.\n");
+ fprintf(stdout," NOTE that there should be no space(s) between \"optionName\", \"=\" and \"option\".\n");
+ fprintf(stdout," Although many of the original ClustalW options are supported in PCMA,\n");
+ fprintf(stdout," changes from default parameters are not recommended for most of them.\n");
+ fprintf(stdout," \n");
+ fprintf(stdout," An example:\n");
+ fprintf(stdout," pcma yfp.fa -ave_grp_id=50 -outfile=yfp.pcma50.aln\n");
+ fprintf(stdout,"\n");
+ fprintf(stdout,"3.Commonly used options\n");
+ fprintf(stdout," -ave_grp_id= Threshold of PERCENTAGE sequence identity above which \n");
+ fprintf(stdout," neighboring groups are aligned by ClustalW and below which \n");
+ fprintf(stdout," neighboring groups are subject to profile consistency measure.\n");
+ fprintf(stdout," If the sequence number is very large, a decrease of the\n");
+ fprintf(stdout," threshold from the default value is recommended.\n");
+ fprintf(stdout," Range [0..100]\n");
+ fprintf(stdout," Default: -ave_grp_id=40\n");
+ fprintf(stdout,"\n");
+ fprintf(stdout," -outfile= Name of the output alignment.\n");
+ fprintf(stdout," If this option is not used, the output alignment will be \n");
+ fprintf(stdout," in clustal format with .aln suffix\n");
+ fprintf(stdout," \n");
+ fprintf(stdout," -output= The output alignment format.\n");
+ fprintf(stdout," Default: -output=clustal\n");
+ fprintf(stdout," Other formats include gcg, phylip and pir.\n");
+ fprintf(stdout," \n");
+ fprintf(stdout," -help or -options Help and options.\n");
+ fprintf(stdout," \n");
+ fprintf(stdout," 4.Newly added function for PCMA\n");
+ fprintf(stdout," PCMA now supports the alignment of several alignments. In this case, the first \n");
+ fprintf(stdout," command-line parameter should be a file containing a list of file names of the\n");
+ fprintf(stdout," alignments to be aligned together. To make this format distinct from a fasta\n");
+ fprintf(stdout," format sequence file, the first line should start with a character \"@\". Each of\n");
+ fprintf(stdout," the other lines contains the file name of an alignment. Here, input alignments\n");
+ fprintf(stdout," CAN have gap characters (usually they do).\n");
+ fprintf(stdout," For example, below is the content of file \"alnlist\": \n");
+ fprintf(stdout," \n");
+ fprintf(stdout," @\n");
+ fprintf(stdout," alignment1.aln\n");
+ fprintf(stdout," alignment2.aln\n");
+ fprintf(stdout," alignment3.aln\n");
+ fprintf(stdout," \n");
+ fprintf(stdout," PCMA (command: pcma alnlist) will generate a new alignment named \"alnlist.aln\". \n");
+ fprintf(stdout," \n");
+}
diff --git a/lib_extension.c b/lib_extension.c
new file mode 100644
index 0000000..fbde046
--- /dev/null
+++ b/lib_extension.c
@@ -0,0 +1,136 @@
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include "pcma.h"
+/*#include "new.h"*/
+
+extern SN ****glib;
+extern sint *seqlen_array;
+extern char **seq_array;
+extern sint ngroups;
+extern streeptr *groupptr;
+
+#define min(x,y) ((x)<=(y) ? (x) : (y))
+
+int HIGHEST_SCORE = 1000; /* do normalization: make the highest score to be 1000 */
+
+void lib_extension()
+{
+
+ sint i, j, k, l;
+ sint ni, nj, nk;
+ SN *ndi, *ndk, *ndj, *ndj_b, *nd;
+ int newstruct = 0;
+ int Max_score = 0;
+
+ /* lib extension: going over all sequence triplets, additional scores stored in sae */
+ for(i=0;i<ngroups;i++) {
+ for(j=i+1;j<ngroups;j++) {
+ for(k=0;k<ngroups;k++) {
+ if(k==i) continue;
+ if(k==j) continue;
+
+ /*fprintf(stdout, "size: %d\n", sizeof(SN) );*/
+ for(ni=1;ni<=groupptr[i]->seqlength;ni++) {
+ /*fprintf(stdout, "i %d j %d k %d ni %d \n", i, j, k, ni);*/
+ ndi = glib[i][k][ni]; /* fprintf(stdout, "newstruct: %d\n", newstruct);fflush(stdout);*/
+ while(ndi!=NULL) {
+ if(ndi->sbe==0) {ndi=ndi->next; continue;}
+ nk = glib[i][k][ni]->ind;
+ ndk = glib[k][j][nk];
+ while(ndk!=NULL) {
+ nj = ndk->ind;
+ if(ndk->sbe==0) {ndk=ndk->next;continue;}
+ fflush(stdout);
+
+ if(glib[i][j][ni]==NULL) {
+ glib[i][j][ni] = SNavail(); newstruct++;
+ glib[i][j][ni]->ind = nj;
+ glib[i][j][ni]->sae += min(ndi->sbe, ndk->sbe);
+ }
+ else {
+ ndj = glib[i][j][ni];
+ ndj_b = ndj;
+ while(ndj!=NULL) {
+ if(ndj->ind == nj) {
+ ndj->sae += min(ndi->sbe, ndk->sbe);
+ break;
+ }
+ else {ndj_b = ndj; ndj = ndj->next;}
+ }
+ if(!ndj) {
+ ndj_b->next = SNavail();newstruct++;
+ ndj = ndj_b->next;
+ ndj->ind = nj;
+ ndj->sae += min(ndi->sbe, ndk->sbe);
+ }
+ }
+
+ /*if(glib[j][i][nj]==NULL) {
+ glib[j][i][nj] = SNavail();
+ glib[j][i][nj]->ind = ni;
+ glib[j][i][nj]->sae+=min(ndi->sbe, ndk->sbe);
+ }
+ else {
+ ndj = glib[j][i][nj];
+ ndj_b = ndj;
+ while(ndj!=NULL) {
+ if(ndj->ind == ni) {
+ ndj->sae += min(ndi->sbe, ndk->sbe);
+ break;
+ }
+ else {ndj_b = ndj; ndj = ndj->next; }
+ }
+ if(!ndj) {
+ ndj_b->next = SNavail();
+ ndj = ndj_b->next;
+ ndj->ind = nj;
+ ndj->sae += min(ndi->sbe, ndk->sbe);
+ }
+ }
+ */
+
+ ndk = ndk->next;
+ }
+ ndi = ndi->next;
+ }
+ }
+ }
+ }
+ }
+
+ /* adding sbe to sae */
+ for(i=0;i<ngroups;i++) {
+ for(j=i+1;j<ngroups;j++) {
+ /*fprintf(stdout, "# %d %d\n", i, j);*/
+ for(ni=1;ni<=groupptr[i]->seqlength;ni++) {
+ nd = glib[i][j][ni];
+ while(nd) {
+ nd->sae += nd->sbe;
+ /* record the largest score to do normalization: make the highest score to be 1000 */
+ if(nd->sae > Max_score) {
+ Max_score = nd->sae;
+ }
+ /*nd->sae = nd->sae/100;
+ fprintf(stdout, "%d %d %d\n", ni, nd->ind, nd->sae);*/
+ nd = nd->next;
+ }
+ }
+ }
+ }
+
+ /* do normalization */
+ for(i=0;i<ngroups;i++) {
+ for(j=i+1;j<ngroups;j++) {
+ for(ni=1;ni<=groupptr[i]->seqlength;ni++) {
+ nd = glib[i][j][ni];
+ while(nd) {
+ nd->sae = nd->sae*HIGHEST_SCORE/Max_score;
+ nd = nd->next;
+ }
+ }
+ }
+ }
+
+}
diff --git a/lib_generation.c b/lib_generation.c
new file mode 100644
index 0000000..f6716a6
--- /dev/null
+++ b/lib_generation.c
@@ -0,0 +1,348 @@
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include "pcma.h"
+/*#include "new.h"*/
+
+extern SN ****glib;
+extern sint *seqlen_array;
+extern char **seq_array;
+extern sint ngroups;
+extern streeptr *groupptr;
+extern streeptr *grp_ancestor;
+extern sint KK;
+extern sint debug;
+extern sint nseqs;
+extern char **names;
+
+extern void generatematrix(int **align1, int alnlength1, int nali1, int indi);
+
+
+static double **scoremt;
+static int **iscoremt;
+
+extern char *am;
+long int totalSN = 0;
+
+static sint av = 0;
+static SN *snarray;
+
+double **sumefflet, ***neffAa, ***pscnt;
+
+void lib_generation()
+{
+ sint i,j,k,l;
+
+ sint ni;
+
+ sint *grp;
+
+ sint local = 1;
+ sint global = 1;
+
+ sint prc=0, prc1=0;
+
+ sumefflet = ckalloc ( (ngroups+1) * sizeof(double *) );
+ for(i=0;i<ngroups;i++)
+ sumefflet[i] = ckalloc ( (groupptr[i]->seqlength+1) * sizeof(double) );
+ neffAa = ckalloc ( (ngroups+1) * sizeof(double **) );
+ for(i=0;i<ngroups;i++) {
+ neffAa[i] = ckalloc ( (groupptr[i]->seqlength+1) * sizeof(double *) );
+ for(j=1;j<=groupptr[i]->seqlength;j++) {
+ neffAa[i][j] = ckalloc ( 21 * sizeof(double) );
+ }
+ }
+
+ pscnt = ckalloc ( (ngroups+1) * sizeof(double **) );
+ for(i=0;i<ngroups;i++) {
+ pscnt[i] = ckalloc ( (groupptr[i]->seqlength+1) * sizeof(double *) );
+ for(j=1;j<=groupptr[i]->seqlength;j++) {
+ pscnt[i][j] = ckalloc ( 21 * sizeof(double) );
+ }
+ }
+
+ //fprintf(stdout, "==\n"); fflush(stdout);
+
+ for(i=0;i<ngroups;i++) {
+ generatematrix(groupptr[i]->seq, groupptr[i]->seqlength, groupptr[i]->seqnum, i);
+ prc1++;
+ if(prc1%LINELENGTH==0) { fprintf(stdout, "\n"); } // start a new line in the output
+
+ if(debug>1) {
+ fprintf(stdout, "******* %d ********\n", i);
+ for(j=1; j<=groupptr[i]->seqlength; j++) {
+ for(k=1;k<=20;k++) {
+ fprintf(stdout, "%8.5f", neffAa[i][j][k]);
+ }
+ fprintf(stdout, "\n");
+ for(k=1;k<=20;k++) {
+ fprintf(stdout, "%8.5f", pscnt[i][j][k]);
+ }
+ fprintf(stdout, "\n");
+ }
+ fprintf(stdout, "\n");
+ }
+ }
+ fprintf(stdout, "\n");
+
+
+ //fprintf(stdout, "==\n"); fflush(stdout);
+
+ scoremt = NULL;
+ iscoremt = NULL;
+
+ /* allocate for the libs [0..ngroups-1][0..ngroups-1] */
+ for(i=0;i<ngroups;i++) {
+ for(j=0;j<ngroups;j++) {
+ /*glib[i][j] = ckalloc(groupptr[i]->seqlength*sizeof(SN *) );*/
+ for(k=1;k<=groupptr[i]->seqlength;k++)
+ glib[i][j][k]=NULL;
+ }
+ }
+
+ for(i=0;i<ngroups;i++) {
+ for(j=i+1;j<ngroups;j++) {
+
+
+ if(debug>1)fprintf(stdout, "Preparing lib for group i: %d j: %d\n", i, j);
+ if(debug>1)fprintf(stdout, "seqnum: %d %d seqlength: %d %d\n", groupptr[i]->seqnum, groupptr[j]->seqnum, groupptr[i]->seqlength, groupptr[j]->seqlength);
+
+
+ /* allocate memory for the scoring matrix between two pre-aligned groups */
+ scoremt = ckalloc( (groupptr[i]->seqlength+1)*sizeof(double *) );
+ for(k=1;k<=groupptr[i]->seqlength;k++)
+ scoremt[k] = ckalloc( (groupptr[j]->seqlength+1)*sizeof(double));
+ iscoremt = ckalloc( (groupptr[i]->seqlength+1)*sizeof(int *) );
+ for(k=1;k<=groupptr[i]->seqlength;k++)
+ iscoremt[k] = ckalloc( (groupptr[j]->seqlength+1)*sizeof(int));
+
+
+ /* profile-profile alignment to obtain the scoring matrix */
+ prfprfmatrix(i, j, groupptr[i]->seqlength, groupptr[j]->seqlength, groupptr[i]->seqnum, groupptr[j]->seqnum, scoremt);
+
+ if(debug>1)fprintf(stdout, "++++++++++\n");
+
+ for(k=1;k<=groupptr[i]->seqlength;k++) {
+ for(l=1;l<=groupptr[j]->seqlength;l++) {
+ iscoremt[k][l] = (int) scoremt[k][l];
+ }
+ }
+
+ if(debug>4) {
+ for(k=1;k<=groupptr[i]->seqlength;k++){
+ fprintf(stdout, "i%d ", k);
+ for(l=1;l<=groupptr[j]->seqlength;l++) {
+ fprintf(stdout, "%d ", iscoremt[k][l]);
+ }
+ fprintf(stdout, "\n");
+ }
+ }
+
+ if(local==1) {
+ /* lib generation */
+ fflush(stdout);
+ SIM(groupptr[i]->seqlength, groupptr[j]->seqlength, KK,iscoremt, 320,32, 2, i,j);
+
+ /* SIM(groupptr[j]->seqlength, groupptr[i]->seqlength, KK,iscoremt, 320 ,32, 2, j,i); */
+ /*SIM(groupptr[i]->seqlength, groupptr[j]->seqlength, KK,iscoremt, 320,128, 2, i,j); */
+ }
+
+
+ if(global==1) {
+
+ /* lib generation from global alignment */
+ grp = ckalloc( (nseqs+1) * sizeof(sint) );
+ for(k=1;k<=nseqs;k++) grp[k] = 0;
+ for(k=1;k<=nseqs;k++) grp[k] = 0;
+
+ for(ni=1;ni<=groupptr[i]->seqnum;ni++) {
+ for(k=1;k<=nseqs;k++) {
+ if(!strcmp(names[k], groupptr[i]->name[ni]) ) {
+ grp[k] = 1;
+ break;
+ }
+ }
+ }
+ for(ni=1;ni<=groupptr[j]->seqnum;ni++) {
+ for(k=1;k<=nseqs;k++) {
+ if(!strcmp(names[k], groupptr[j]->name[ni]) ) {
+ grp[k] = 2;
+ break;
+ }
+ }
+ }
+
+ if(debug>1)fprintf(stdout, "i %d j %d ============\n",i,j); fflush(stdout);
+ prfalign1(grp, i, j);
+ /* prfalign2(grp, i, j, 352, 32, iscoremt); */
+ ckfree((void *)grp);
+ }
+
+ for(k=1;k<=groupptr[i]->seqlength;k++){
+ ckfree((void *)scoremt[k]);
+ ckfree((void *)iscoremt[k]);
+ }
+ ckfree((void *)scoremt);
+ ckfree((void *)iscoremt);
+
+ prc++;
+ if((prc%LINELENGTH)==0) fprintf(stdout, "\n");
+ }
+ }
+
+}
+
+/* SN * SNavail(){
+ SN *cur = ckalloc(sizeof(SN));
+ cur->sbe = 0;
+ cur->sae = 0;
+ cur->ind = 0;
+ cur->next = NULL;
+ totalSN++;
+ if(totalSN%100000==0) {
+ if(debug>1) fprintf(stdout, "memory for SN: %d * %d\n", totalSN, sizeof(SN) );
+ }
+ return cur;
+} */
+
+SN * SNavail(){
+
+ int i;
+
+ if (av%1000==0) {
+ snarray = ckalloc(1000*sizeof(SN) );
+ av = 0;
+ }
+
+ snarray[av].sbe = 0;
+ snarray[av].sae = 0;
+ snarray[av].ind = 0;
+ snarray[av].next = NULL;
+
+ av++;
+ totalSN++;
+ if(totalSN%100000==0) {
+ if(debug>1) fprintf(stdout, "memory for SN: %d * %d\n", totalSN, sizeof(SN) );
+ }
+
+ return (&snarray[av-1]);
+
+}
+
+
+
+
+void AddSbe(SN *node, int indi, int s)
+{
+ SN *nd;
+ nd = node;
+ if(!node) {
+ node = SNavail();
+ node->ind = indi;
+ node->sbe = s;
+ if(debug>1)fprintf(stdout, "%d ", sizeof(node) );
+ if(debug>1)fprintf(stdout, "%d %d\n", node->ind, node->sbe);
+ return;
+ }
+ else {
+ while(nd->next!=NULL) {
+ if(nd->ind == indi) {
+ nd->sbe +=s;
+ return;
+ }
+ nd = nd->next;
+ }
+ nd = SNavail();
+ nd->ind = indi;
+ nd->sbe = s;
+ }
+}
+
+void printLib(int gi, int gj)
+{
+ int i,j,k,l;
+ SN *nd;
+
+ if( (gi>=ngroups) || (gj>=ngroups) ) {
+ fprintf(stdout, "group number exceeds the boundary: max(%d, %d) >= %d", gi, gj, ngroups);
+ exit(0);
+ }
+
+ fprintf(stdout, "Group %d and Group %d\n", gi, gj);
+ for(i=1;i<=groupptr[gi]->seqlength;i++) {
+ nd = glib[gi][gj][i];
+ while(nd) {
+ fprintf(stdout, "%d %d %d %d\n", i, nd->ind, nd->sae, nd->sbe);
+ nd = nd->next;
+ }
+ }
+}
+
+void printMatrix(int gi, int gj)
+{
+ int i,j,k,l;
+ SN *nd;
+
+ int **matrix;
+ matrix = ckalloc( (groupptr[gi]->seqlength+1) *sizeof(int *) );
+ for(j=1;j<=groupptr[gi]->seqlength;j++) {
+ matrix[j] = ckalloc( (groupptr[gj]->seqlength+1)*sizeof(int) );
+ }
+
+ for(i=1;i<=groupptr[gi]->seqlength;i++) {
+ for(j=1;j<=groupptr[gj]->seqlength;j++) {
+ matrix[i][j] = 0;
+ }
+ }
+
+ for(i=1;i<=groupptr[gi]->seqlength;i++) {
+ nd = glib[gi][gj][i];
+ while(nd) {
+ matrix[i][nd->ind] = nd->sae;
+ nd = nd->next;
+ }
+ }
+
+ fprintf(stdout, " ");
+ for(j=1;j<=groupptr[gj]->seqlength;j++) {
+ fprintf(stdout, "%5c", am[groupptr[gj]->seq[1][j]]);
+ }
+ fprintf(stdout, "\n");
+
+ for(i=1;i<=groupptr[gi]->seqlength;i++) {
+ fprintf(stdout, "%c ", am[groupptr[gi]->seq[1][i]]);
+ for(j=1;j<=groupptr[gj]->seqlength;j++) {
+ fprintf(stdout, "%5d", matrix[i][j]);
+ }
+ fprintf(stdout, "\n");
+ }
+
+ for(j=1;j<=groupptr[gi]->seqlength;j++) ckfree((void *)matrix[j]);
+ ckfree((void *)matrix);
+}
+
+
+void printAbstract(int set)
+{
+ int i,j,k,l,gid;
+
+ streeptr tr = grp_ancestor[set];
+
+ for(i=1;i<=tr->abseqnum;i++) {
+ gid = tr->abstractseq[i][0];
+ for(k=1;k<=groupptr[gid]->seqnum;k++) {
+
+ fprintf(stdout, "%s\t", groupptr[gid]->name[k]);
+ for(j=1;j<=tr->abseqlength;j++) {
+ if(tr->abstractseq[i][j]==0) {
+ fprintf(stdout, "-");
+ }
+ else {
+ fprintf(stdout, "%c", am[groupptr[gid]->seq[k][grp_ancestor[set]->abstractseq[i][j]]]);
+ }
+ }
+ fprintf(stdout, "\n");
+ }
+ }
+}
diff --git a/lsim1.c b/lsim1.c
new file mode 100644
index 0000000..36cea50
--- /dev/null
+++ b/lsim1.c
@@ -0,0 +1,1258 @@
+#include <stdio.h>
+#include <math.h>
+#include "pcma.h"
+/*#include "new.h"*/
+
+extern SN ****glib;
+extern streeptr *groupptr;
+extern char *am;
+extern sint debug;
+
+/* extern char name0[], name1[]; */
+/* extern int match, mismh; */
+/* extern char *sq, sqnam[], *seqc0, *seqc1; */
+/* extern char ttitle[], ltitle[]; */
+int min0,min1,max0,max1;
+int smin0, smin1;
+int markx = 0;
+int gscore;
+
+#define min(x,y) ((x)<=(y) ? (x) : (y))
+
+
+static int **vv; /* substitution scores */
+static int q, r; /* gap penalties */
+static int qr; /* qr = q + r */
+
+#ifdef FAR_PTR
+typedef struct ONE
+ { int COL ; struct ONE far * NEXT ;}
+ pair, far * pairptr;
+pairptr far *row, z; /* for saving used aligned pairs */
+#define PAIRNULL (pairptr)NULL
+#else
+typedef struct ONE { int COL ; struct ONE *NEXT ;} pair, *pairptr;
+pairptr *row, z; /* for saving used aligned pairs */
+#define PAIRNULL (pairptr)NULL
+#endif
+static int tt;
+
+typedef struct LNODE
+ { int SCORE;
+ int STARI;
+ int STARJ;
+ int ENDI;
+ int ENDJ;
+ int TOP;
+ int BOT;
+ int LLEFT;
+ int LRIGHT; } vertex,
+#ifdef FAR_PTR
+ far *vertexptr;
+#else
+ *vertexptr;
+#endif
+
+vertexptr *LIST; /* an array for saving k best scores */
+vertexptr low = 0; /* lowest score node in LIST */
+vertexptr most = 0; /* latestly accessed node in LIST */
+static int numnode; /* the number of nodes in LIST */
+
+static int *CC, *DD; /* saving matrix scores */
+static int *RR, *SS, *EE, *FF; /* saving start-points */
+static int *HH, *WW; /* saving matrix scores */
+static int *II, *JJ, *XX, *YY; /* saving start-points */
+static int m1, mm, n1, nn; /* boundaries of recomputed area */
+static int rl, cl; /* left and top boundaries */
+static int lmin; /* minimum score in LIST */
+static int flag; /* indicate if recomputation necessary*/
+
+/* DIAG() assigns value to x if (ii,jj) is never used before */
+#define DIAG(ii, jj, x, value) \
+{ for ( tt = 1, z = row[(ii)]; z != PAIRNULL; z = z->NEXT ) \
+ if ( z->COL == (jj) ) \
+ { tt = 0; break; } \
+ if ( tt ) \
+ x = ( value ); \
+}
+
+/* replace (ss1, xx1, yy1) by (ss2, xx2, yy2) if the latter is large */
+#define ORDER(ss1, xx1, yy1, ss2, xx2, yy2) \
+{ if ( ss1 < ss2 ) \
+ { ss1 = ss2; xx1 = xx2; yy1 = yy2; } \
+ else \
+ if ( ss1 == ss2 ) \
+ { if ( xx1 < xx2 ) \
+ { xx1 = xx2; yy1 = yy2; } \
+ else \
+ if ( xx1 == xx2 && yy1 < yy2 ) \
+ yy1 = yy2; \
+ } \
+}
+
+/* The following definitions are for function diff() */
+
+int diff(), display();
+static int zero = 0; /* int type zero */
+
+#define gap(k) ((k) <= 0 ? 0 : q+r*(k)) /* k-symbol indel score */
+
+static int *sapp; /* Current script append ptr */
+static int last; /* Last script op appended */
+
+static int I, J; /* current positions of A ,B */
+static int no_mat; /* number of matches */
+static int no_mis; /* number of mismatches */
+static int al_len; /* length of alignment */
+ /* Append "Delete k" op */
+#define DEL(k) \
+{ I += k; \
+ al_len += k; \
+ if (last < 0) \
+ last = sapp[-1] -= (k); \
+ else \
+ last = *sapp++ = -(k); \
+}
+ /* Append "Insert k" op */
+#define INS(k) \
+{ J += k; \
+ al_len += k; \
+ if (last < 0) \
+ { sapp[-1] = (k); *sapp++ = last; } \
+ else \
+ last = *sapp++ = (k); \
+}
+
+ /* Append "Replace" op */
+#define REP \
+{ last = *sapp++ = 0; \
+ al_len += 1; \
+}
+
+#ifndef FAR_PTR
+#define FCKALLOC lckalloc
+#else
+#define FCKALLOC flckalloc
+#endif
+
+/* SIM(A,B,M,N,K,V,Q,R) reports K best non-intersecting alignments of
+ the segments of A and B in order of similarity scores, where
+ V[a][b] is the score of aligning a and b, and -(Q+R*i) is the score
+ of an i-symbol indel. */
+
+void SIM(int M,int N,int K,int **V,int Q,int R,int nseq,int gi, int gj)
+{
+ int endi, endj, stari, starj; /* endpoint and startpoint */
+ int score; /* the max score in LIST */
+ int count; /* maximum size of list */
+ register int i, j; /* row and column indices */
+ char *lckalloc(); /* space-allocating function */
+#ifdef FAR_PTR
+ char far *flckalloc();
+#endif
+ int *S; /* saving operations for diff */
+ int nc, nd, ns, nident; /* for display */
+ int tmp; /* for switching min0,min1 */
+ vertexptr cur; /* temporary pointer */
+ vertexptr findmax(); /* return the largest score node */
+ double percent;
+
+ /* JP */
+ int first, second;
+ SN **snd;
+ int *aln_path1, *aln_path2;
+ int tmpfirst, tmpsecond;
+ int n, nblocks;
+ int ii, jj;
+ int tpairs, idpairs;
+ int k;
+ int ngaps, nonegaps;
+
+ /* allocate space for all vectors */
+ j = (N + 1) * sizeof(int);
+ CC = ( int * ) lckalloc(j);
+ DD = ( int * ) lckalloc(j);
+ RR = ( int * ) lckalloc(j);
+ SS = ( int * ) lckalloc(j);
+ EE = ( int * ) lckalloc(j);
+ FF = ( int * ) lckalloc(j);
+ i = (M + 1) * sizeof(int);
+ HH = ( int * ) lckalloc(i);
+ WW = ( int * ) lckalloc(i);
+ II = ( int * ) lckalloc(i);
+ JJ = ( int * ) lckalloc(i);
+ XX = ( int * ) lckalloc(i);
+ YY = ( int * ) lckalloc(i);
+ S = ( int * ) lckalloc(min(i,j)*5/4);
+#ifdef FAR_PTR
+ row = ( pairptr far * ) FCKALLOC( (M + 1) * sizeof(pairptr));
+#else
+ row = ( pairptr * ) lckalloc( (M + 1) * sizeof(pairptr));
+#endif
+
+ /* set up list for each row */
+ if (nseq == 2) for ( i = 1; i <= M; i++ ) row[i]= PAIRNULL;
+ else {
+ z = ( pairptr )FCKALLOC((int)sizeof(pair)*M);
+ for ( i = 1; i <= M; i++,z++) {
+ row[i] = z;
+ z->COL = i;
+ z->NEXT = PAIRNULL;
+ }
+ }
+
+ vv = V;
+ /*for(i=1;i<=M;i++) {
+ for(j=1;j<=N;j++) {
+ fprintf(stdout, "%d ", vv[i][j]);
+ }
+ fprintf(stdout, "\n");
+ }*/
+ q = Q;
+ r = R;
+ qr = q + r;
+
+ /*LIST = ( vertexptr * ) lckalloc( K * sizeof(vertexptr));*/
+ LIST = ckalloc(K*sizeof(vertexptr));
+ for ( i = 0; i < K ; i++ )
+ /*LIST[i] = ( vertexptr ) FCKALLOC( (int) sizeof(vertex));*/
+ LIST[i] = ckalloc(sizeof(vertex));
+
+ numnode = lmin = 0;
+ big_pass(M,N,K,nseq);
+
+ /* Report the K best alignments one by one. After each alignment is
+ output, recompute part of the matrix. First determine the size
+ of the area to be recomputed, then do the recomputation */
+
+ for ( count = K - 1; count >= 0 ; count-- )
+ { if ( numnode == 0 )
+ lfatal("The number of alignments computed is too large");
+ cur = findmax(); /* Return a pointer to a node with max score*/
+ score = cur->SCORE;
+ stari = ++cur->STARI;
+ starj = ++cur->STARJ;
+ endi = cur->ENDI;
+ endj = cur->ENDJ;
+ m1 = cur->TOP;
+ mm = cur->BOT;
+ n1 = cur->LLEFT;
+ nn = cur->LRIGHT;
+ rl = endi - stari + 1;
+ cl = endj - starj + 1;
+ if(debug>1){fprintf(stdout, "score: %d\n", score);
+ fprintf(stdout, "rl: %d; stari: %d; endi: %d\n", rl, stari, endi);
+ fprintf(stdout, "cl: %d; starj: %d; endj: %d\n", cl, starj, endj);
+ }
+
+ /*I = stari - 1;
+ J = starj - 1; */
+ I = stari-1;
+ J = starj-1;
+ sapp = S;
+ last = 0;
+ al_len = 0;
+ no_mat = 0;
+ no_mis = 0;
+ /*diff(&A[stari]-1, &B[starj]-1,rl,cl,q,q);*/
+ /*fprintf(stdout, "%d %d %d %d \n", stari, starj, rl, cl);*/
+ fflush(stdout); /*rl--; cl--;*/
+ diff(stari-1, starj-1, rl,cl,q,q);
+ /*fprintf(stdout, "============\n");fflush(stdout);*/
+
+ min0 = stari;
+ min1 = starj;
+ max0 = stari+rl-1;
+ max1 = starj+cl-1;
+ /*for(i=0;i<rl;i++) {
+ fprintf(stdout, "%d ", *(S+i));} */
+
+
+ fflush(stdout);
+
+ /* JP: determine the alignment paths */
+ aln_path1 = ckalloc( (al_len+1) * sizeof(int));
+ aln_path2 = ckalloc( (al_len+1) *sizeof(int) );
+ first = stari; second = starj;
+ ii=0;jj=0;
+ for(i=0;i<rl,(ii<al_len&&jj<al_len);i++) {
+ if(*(S+i)==0) {
+ ii++;jj++;
+ aln_path1[ii] = 0;
+ aln_path2[jj] = 0;
+ }
+ if(*(S+i)>0) {
+ for(j=1;j<=*(S+i);j++) {
+ jj++;ii++;
+ aln_path2[jj]=0;
+ aln_path1[ii] = 1;
+ }
+ }
+ if(*(S+i)<0) {
+ for(j=1;j<=(-*(S+i));j++) {
+ ii++;jj++;
+ aln_path2[jj]=1;
+ aln_path1[ii]=0;
+ }
+ }
+ }
+
+ /* JP: calculate the average sequence identity between the two blocks */
+ tpairs = 0; idpairs = 0;
+ first = stari; second = starj;
+ for(i=1;i<=al_len;i++) {
+
+ if(aln_path1[i]==1) { second++; continue; }
+ if(aln_path2[i]==1) { first++; continue; }
+
+
+ for(j=1;j<=groupptr[gi]->seqnum;j++) {
+ for(k=1;k<=groupptr[gj]->seqnum;k++) {
+
+ if(groupptr[gi]->seq[j][first] && groupptr[gj]->seq[k][second]) {
+ tpairs++;
+ if(am[groupptr[gi]->seq[j][first]]==am[groupptr[gj]->seq[k][second]])
+ idpairs++;
+ }
+ }
+ }
+ first++; second++;
+ }
+
+
+ /* JP calculate the gap fraction */
+ ngaps = 0; nonegaps = 0;
+ for(i=1;i<=al_len;i++) {
+ if(aln_path1[i]==1) {ngaps++; continue; }
+ if(aln_path2[i]==1) {ngaps++; continue; }
+ nonegaps++;
+ }
+
+ if(debug>11) {
+ fprintf(stdout, "gaps %d nonegaps %d", ngaps, nonegaps);
+ fprintf(stdout, "identity: %d %d \n", idpairs, tpairs);
+ }
+
+
+
+ /* JP NOTICE: normalized score or not */
+ /*score = score/al_len; */ /* using average score */
+ /*score = (int) (100.0*idpairs/tpairs); */
+ /*score = (int) (score/sqrt(al_len) );*/
+ score = endi - stari + 1;
+ if(score > endj -starj +1) score = endj - starj + 1;
+ score = (int) ( (log(score) * 100.0*idpairs/tpairs)*(1.0*nonegaps/(nonegaps+ngaps)) ) ;
+
+
+ /* JP: lib generation */
+ first = stari; second = starj;
+ for(i=0;i<rl, first<=endi;i++) {
+ if(*(S+i)==0) {
+ /* fprintf(stdout, "first: %d; second: %d\n", first, second); */
+
+ if(glib[gi][gj][first] == NULL) {
+ glib[gi][gj][first] = SNavail();
+ glib[gi][gj][first]->ind = second;
+ glib[gi][gj][first]->sbe = score;
+ }
+ else {
+ snd = &glib[gi][gj][first];
+ /*fprintf(stdout, "snd: %d; glib: %d\n", snd, glib[gi][gj][first]);*/
+ while(*snd) {
+ if((*snd)->ind == second) {
+ (*snd)->sbe += score;
+ break;
+ }
+ else {
+ snd = &((*snd)->next);
+ /*fprintf(stdout, "snd: %d\n", snd);*/
+ }
+ }
+ if(!(*snd)) {
+ (*snd) = SNavail();
+ (*snd)->ind = second;
+ (*snd)->sbe = score;
+ /*fprintf(stdout, "====%d %d %d \n", first, (*snd)->ind, (*snd)->sbe);*/
+ }
+ }
+ /* AddSbe(glib[gi][gj][first], second, score/rl);*/
+ if(glib[gj][gi][second]==NULL) {
+ glib[gj][gi][second] = SNavail();
+ glib[gj][gi][second]->ind = first;
+ glib[gj][gi][second]->sbe = score;
+ }
+ else {
+ snd = &glib[gj][gi][second];
+ while(*snd) {
+ if((*snd)->ind == first) {
+ (*snd)->sbe += score;
+ break;
+ }
+ else snd = &((*snd)->next);
+ }
+ if(!*snd) {
+ *snd = SNavail();
+ (*snd)->ind = first;
+ (*snd)->sbe = score;
+ }
+ }
+ /* AddSbe(glib[gj][gi][second], first, score/rl); */
+ first++; second++;
+ }
+ if(*(S+i) > 0) {
+ second+=*(S+i);
+ /*first++; second++; */
+ }
+ if(*(S+i) < 0) {
+ first-=*(S+i);
+ /*first++; second++; */
+ }
+ }
+
+ /* JP: print out the subalignments */
+ if(debug > 11) {
+
+ first = stari;second = starj;
+ nblocks = (al_len-1)/80+1;
+ fprintf(stdout, "subalignment %d score %d average score %d average id %d\n", K-count, score, score/al_len, (int) (100.0*idpairs/tpairs) );
+ fprintf(stdout, "tpairs %d idpairs %d \n", tpairs, idpairs);
+ for(n=0;n<nblocks;n++) {
+ fflush(stdout);
+ fprintf(stdout, "first: %d\n", first);
+ for(j=1;j<=groupptr[gi]->seqnum;j++) {
+ tmpfirst = first;
+ fprintf(stdout, "%s\t", groupptr[gi]->name[j]);
+ for(i=n*80+1;(i<=(n+1)*80&&i<=al_len);i++){
+ if(aln_path1[i]==0) {
+ fprintf(stdout, "%c", am[groupptr[gi]->seq[j][tmpfirst]]);
+ tmpfirst++;
+ }
+ else {fprintf(stdout, "-");}
+ }
+ fprintf(stdout, "\n");
+ }
+ first = tmpfirst;
+
+ fprintf(stdout, "second: %d\n", second);
+ for(j=1;j<=groupptr[gj]->seqnum;j++){
+ tmpsecond = second;
+ fprintf(stdout, "%s\t", groupptr[gj]->name[j]);
+ for(i=n*80+1;(i<=(n+1)*80&&i<=al_len);i++) {
+ if(aln_path2[i]==0) {
+ fprintf(stdout, "%c", am[groupptr[gj]->seq[j][tmpsecond]]);
+ tmpsecond++;
+ }
+ else{fprintf(stdout,"-");}
+ }
+ fprintf(stdout, "\n");
+ }
+ second = tmpsecond;
+ fprintf(stdout, "\n\n");
+ }
+ }
+
+ ckfree(aln_path1);
+ ckfree(aln_path2);
+
+ /*first = i;
+ for(i=0;i<first;i++) {
+ fprintf(stdout, "%d ", *(S+i));}
+ fprintf(stdout, "\n");*/
+
+
+ if ( count )
+ { flag = 0;
+ locate(nseq);
+ if ( flag )
+ small_pass(count,nseq);
+ }
+ }
+
+ /* JP: reinitiate and free memories */
+ low = 0; most = 0;
+ for ( i = 0; i < K ; i++ )
+ LIST[i] = ckfree(LIST[i]);
+ LIST = ckfree(LIST);
+
+ ckfree(CC);
+ ckfree(DD);
+ ckfree(RR);
+ ckfree(SS);
+ ckfree(EE);
+ ckfree(FF);
+ ckfree(HH);
+ ckfree(WW);
+ ckfree(II);
+ ckfree(JJ);
+ ckfree(XX);
+ ckfree(YY);
+ ckfree(S);
+ ckfree(row);
+
+
+}
+
+/* A big pass to compute K best classes */
+
+big_pass(M,N,K,nseq) int M,N,K,nseq;
+{ register int i, j; /* row and column indices */
+ register int c; /* best score at current point */
+ register int f; /* best score ending with insertion */
+ register int d; /* best score ending with deletion */
+ register int p; /* best score at (i-1, j-1) */
+ register int ci, cj; /* end-point associated with c */
+ register int di, dj; /* end-point associated with d */
+ register int fi, fj; /* end-point associated with f */
+ register int pi, pj; /* end-point associated with p */
+ int *va; /* pointer to vv(A[i], B[j]) */
+ int addnode(); /* function for inserting a node */
+
+
+ /* Compute the matrix and save the top K best scores in LIST
+ CC : the scores of the current row
+ RR and EE : the starting point that leads to score CC
+ DD : the scores of the current row, ending with deletion
+ SS and FF : the starting point that leads to score DD */
+ /* Initialize the 0 th row */
+ for ( j = 1; j <= N ; j++ )
+ { CC[j] = 0;
+ RR[j] = 0;
+ EE[j] = j;
+ DD[j] = - (q);
+ SS[j] = 0;
+ FF[j] = j;
+ }
+ for ( i = 1; i <= M; i++)
+ { c = 0; /* Initialize column 0 */
+ f = - (q);
+ ci = fi = i;
+ /*va = vv[A[i]];*/
+ if ( nseq == 2 )
+ { p = 0;
+ pi = i - 1;
+ cj = fj = pj = 0;
+ }
+ else
+ { p = CC[i];
+ pi = RR[i];
+ pj = EE[i];
+ cj = fj = i;
+ }
+ for ( j = (nseq == 2 ? 1 : (i+1)) ; j <= N ; j++ )
+ { f = f - r;
+ c = c - qr;
+ ORDER(f, fi, fj, c, ci, cj)
+ c = CC[j] - qr;
+ ci = RR[j];
+ cj = EE[j];
+ d = DD[j] - r;
+ di = SS[j];
+ dj = FF[j];
+ ORDER(d, di, dj, c, ci, cj)
+ c = 0;
+ DIAG(i, j, c, p+vv[i][j])
+ /*fprintf(stdout, "I %d J %d c %d\n", i,j,c);*/
+ /*DIAG(i, j, c, p+va[B[j]])*/ /* diagonal */
+ if ( c <= 0 )
+ { c = 0; ci = i; cj = j; }
+ else
+ { ci = pi; cj = pj; }
+ ORDER(c, ci, cj, d, di, dj)
+ ORDER(c, ci, cj, f, fi, fj)
+ p = CC[j];
+ CC[j] = c;
+ pi = RR[j];
+ pj = EE[j];
+ RR[j] = ci;
+ EE[j] = cj;
+ DD[j] = d;
+ SS[j] = di;
+ FF[j] = dj;
+ if ( c > lmin ){ /* add the score into list */
+ /*fprintf(stdout, "c: %d; lmin before: %d; ", c, lmin);*/
+ /*fprintf(stdout, "K: %d", K);*/
+ lmin = addnode(c, ci, cj, i, j, K, lmin);
+ /*fprintf(stdout, "lmin after: %d\n", lmin); */
+ }
+ }
+ }
+}
+
+/* Determine the left and top boundaries of the recomputed area */
+
+locate(nseq) int nseq;
+{ register int i, j; /* row and column indices */
+ register int c; /* best score at current point */
+ register int f; /* best score ending with insertion */
+ register int d; /* best score ending with deletion */
+ register int p; /* best score at (i-1, j-1) */
+ register int ci, cj; /* end-point associated with c */
+ register int di, dj; /* end-point associated with d */
+ register int fi, fj; /* end-point associated with f */
+ register int pi, pj; /* end-point associated with p */
+ int cflag, rflag; /* for recomputation */
+ int *va; /* pointer to vv(A[i], B[j]) */
+ int addnode(); /* function for inserting a node */
+ int limit; /* the bound on j */
+
+ /* Reverse pass
+ rows
+ CC : the scores on the current row
+ RR and EE : the endpoints that lead to CC
+ DD : the deletion scores
+ SS and FF : the endpoints that lead to DD
+
+ columns
+ HH : the scores on the current columns
+ II and JJ : the endpoints that lead to HH
+ WW : the deletion scores
+ XX and YY : the endpoints that lead to WW
+ */
+ for ( j = nn; j >= n1 ; j-- )
+ { CC[j] = 0;
+ EE[j] = j;
+ DD[j] = - (q);
+ FF[j] = j;
+ if ( nseq == 2 || j > mm )
+ RR[j] = SS[j] = mm + 1;
+ else
+ RR[j] = SS[j] = j;
+ }
+
+ for ( i = mm; i >= m1; i-- )
+ { c = p = 0;
+ f = - (q);
+ ci = fi = i;
+ pi = i + 1;
+ cj = fj = pj = nn + 1;
+ /*va = vv[A[i]];*/
+ if ( nseq == 2 || n1 > i )
+ limit = n1;
+ else
+ limit = i + 1;
+ for ( j = nn; j >= limit ; j-- )
+ { f = f - r;
+ c = c - qr;
+ ORDER(f, fi, fj, c, ci, cj)
+ c = CC[j] - qr;
+ ci = RR[j];
+ cj = EE[j];
+ d = DD[j] - r;
+ di = SS[j];
+ dj = FF[j];
+ ORDER(d, di, dj, c, ci, cj)
+ c = 0;
+ /* DIAG(i, j, c, p+va[B[j]]) */
+ DIAG(i, j, c, p+vv[i][j]) /* diagonal */
+ if ( c <= 0 )
+ { c = 0; ci = i; cj = j; }
+ else
+ { ci = pi; cj = pj; }
+ ORDER(c, ci, cj, d, di, dj)
+ ORDER(c, ci, cj, f, fi, fj)
+ p = CC[j];
+ CC[j] = c;
+ pi = RR[j];
+ pj = EE[j];
+ RR[j] = ci;
+ EE[j] = cj;
+ DD[j] = d;
+ SS[j] = di;
+ FF[j] = dj;
+ if ( c > lmin )
+ flag = 1;
+ }
+ if ( nseq == 2 || i < n1 )
+ { HH[i] = CC[n1];
+ II[i] = RR[n1];
+ JJ[i] = EE[n1];
+ WW[i] = DD[n1];
+ XX[i] = SS[n1];
+ YY[i] = FF[n1];
+ }
+ }
+
+ for ( rl = m1, cl = n1; ; )
+ { for ( rflag = cflag = 1; ( rflag && m1 > 1 ) || ( cflag && n1 > 1 ) ; )
+ { if ( rflag && m1 > 1 ) /* Compute one row */
+ { rflag = 0;
+ m1--;
+ c = p = 0;
+ f = - (q);
+ ci = fi = m1;
+ pi = m1 + 1;
+ cj = fj = pj = nn + 1;
+ /*va = vv[A[m1]]; */
+ for ( j = nn; j >= n1 ; j-- )
+ { f = f - r;
+ c = c - qr;
+ ORDER(f, fi, fj, c, ci, cj)
+ c = CC[j] - qr;
+ ci = RR[j];
+ cj = EE[j];
+ d = DD[j] - r;
+ di = SS[j];
+ dj = FF[j];
+ ORDER(d, di, dj, c, ci, cj)
+ c = 0;
+ /*DIAG(m1, j, c, p+va[B[j]])*/ /* diagonal */
+ DIAG(m1, j, c, p+vv[m1][j]);
+ if ( c <= 0 )
+ { c = 0; ci = m1; cj = j; }
+ else
+ { ci = pi; cj = pj; }
+ ORDER(c, ci, cj, d, di, dj)
+ ORDER(c, ci, cj, f, fi, fj)
+ p = CC[j];
+ CC[j] = c;
+ pi = RR[j];
+ pj = EE[j];
+ RR[j] = ci;
+ EE[j] = cj;
+ DD[j] = d;
+ SS[j] = di;
+ FF[j] = dj;
+ if ( c > lmin )
+ flag = 1;
+ if ( ! rflag && ( ci > rl && cj > cl || di > rl && dj > cl
+ || fi > rl && fj > cl ) )
+ rflag = 1;
+ }
+ HH[m1] = CC[n1];
+ II[m1] = RR[n1];
+ JJ[m1] = EE[n1];
+ WW[m1] = DD[n1];
+ XX[m1] = SS[n1];
+ YY[m1] = FF[n1];
+ if ( ! cflag && ( ci > rl && cj > cl || di > rl && dj > cl
+ || fi > rl && fj > cl ) )
+ cflag = 1;
+ }
+
+ if ( nseq == 1 && n1 == (m1 + 1) && ! rflag )
+ cflag = 0;
+ if ( cflag && n1 > 1 ) /* Compute one column */
+ { cflag = 0;
+ n1--;
+ c = 0;
+ f = - (q);
+ cj = fj = n1;
+ /*va = vv[B[n1]]; */
+ if ( nseq == 2 || mm < n1 )
+ { p = 0;
+ ci = fi = pi = mm + 1;
+ pj = n1 + 1;
+ limit = mm;
+ }
+ else
+ { p = HH[n1];
+ pi = II[n1];
+ pj = JJ[n1];
+ ci = fi = n1;
+ limit = n1 - 1;
+ }
+ for ( i = limit; i >= m1 ; i-- )
+ { f = f - r;
+ c = c - qr;
+ ORDER(f, fi, fj, c, ci, cj)
+ c = HH[i] - qr;
+ ci = II[i];
+ cj = JJ[i];
+ d = WW[i] - r;
+ di = XX[i];
+ dj = YY[i];
+ ORDER(d, di, dj, c, ci, cj)
+ c = 0;
+ /*DIAG(i, n1, c, p+va[A[i]]) */
+ DIAG(i, n1, c, p+vv[i][n1])
+ if ( c <= 0 )
+ { c = 0; ci = i; cj = n1; }
+ else
+ { ci = pi; cj = pj; }
+ ORDER(c, ci, cj, d, di, dj)
+ ORDER(c, ci, cj, f, fi, fj)
+ p = HH[i];
+ HH[i] = c;
+ pi = II[i];
+ pj = JJ[i];
+ II[i] = ci;
+ JJ[i] = cj;
+ WW[i] = d;
+ XX[i] = di;
+ YY[i] = dj;
+ if ( c > lmin )
+ flag = 1;
+ if ( ! cflag && ( ci > rl && cj > cl || di > rl && dj > cl
+ || fi > rl && fj > cl ) )
+ cflag = 1;
+ }
+ CC[n1] = HH[m1];
+ RR[n1] = II[m1];
+ EE[n1] = JJ[m1];
+ DD[n1] = WW[m1];
+ SS[n1] = XX[m1];
+ FF[n1] = YY[m1];
+ if ( ! rflag && ( ci > rl && cj > cl || di > rl && dj > cl
+ || fi > rl && fj > cl ) )
+ rflag = 1;
+ }
+ }
+ if ( m1 == 1 && n1 == 1 || no_cross() )
+ break;
+ }
+ m1--;
+ n1--;
+}
+
+/* recompute the area on forward pass */
+small_pass(count,nseq) int count, nseq;
+{ register int i, j; /* row and column indices */
+ register int c; /* best score at current point */
+ register int f; /* best score ending with insertion */
+ register int d; /* best score ending with deletion */
+ register int p; /* best score at (i-1, j-1) */
+ register int ci, cj; /* end-point associated with c */
+ register int di, dj; /* end-point associated with d */
+ register int fi, fj; /* end-point associated with f */
+ register int pi, pj; /* end-point associated with p */
+ int *va; /* pointer to vv(A[i], B[j]) */
+ int addnode(); /* function for inserting a node */
+ int limit; /* lower bound on j */
+
+ for ( j = n1 + 1; j <= nn ; j++ )
+ { CC[j] = 0;
+ RR[j] = m1;
+ EE[j] = j;
+ DD[j] = - (q);
+ SS[j] = m1;
+ FF[j] = j;
+ }
+ for ( i = m1 + 1; i <= mm; i++)
+ { c = 0; /* Initialize column 0 */
+ f = - (q);
+ ci = fi = i;
+ /*va = vv[A[i]];*/
+ if ( nseq == 2 || i <= n1 )
+ { p = 0;
+ pi = i - 1;
+ cj = fj = pj = n1;
+ limit = n1 + 1;
+ }
+ else
+ { p = CC[i];
+ pi = RR[i];
+ pj = EE[i];
+ cj = fj = i;
+ limit = i + 1;
+ }
+ for ( j = limit ; j <= nn ; j++ )
+ { f = f - r;
+ c = c - qr;
+ ORDER(f, fi, fj, c, ci, cj)
+ c = CC[j] - qr;
+ ci = RR[j];
+ cj = EE[j];
+ d = DD[j] - r;
+ di = SS[j];
+ dj = FF[j];
+ ORDER(d, di, dj, c, ci, cj)
+ c = 0;
+ /*DIAG(i, j, c, p+va[B[j]])*/ /* diagonal */
+ DIAG(i, j, c, p+vv[i][j])
+ if ( c <= 0 )
+ { c = 0; ci = i; cj = j; }
+ else
+ { ci = pi; cj = pj; }
+ ORDER(c, ci, cj, d, di, dj)
+ ORDER(c, ci, cj, f, fi, fj)
+ p = CC[j];
+ CC[j] = c;
+ pi = RR[j];
+ pj = EE[j];
+ RR[j] = ci;
+ EE[j] = cj;
+ DD[j] = d;
+ SS[j] = di;
+ FF[j] = dj;
+ if ( c > lmin ) /* add the score into list */
+ lmin = addnode(c, ci, cj, i, j, count, lmin);
+ }
+ }
+}
+
+/* Add a new node into list. */
+
+int addnode(c, ci, cj, i, j, K, cost) int c, ci, cj, i, j, K, cost;
+{ int found; /* 1 if the node is in LIST */
+ register int d;
+
+ found = 0;
+ if ( most != 0 && most->STARI == ci && most->STARJ == cj )
+ found = 1;
+ else
+ for ( d = 0; d < numnode ; d++ )
+ { most = LIST[d];
+ if ( most->STARI == ci && most->STARJ == cj )
+ { found = 1;
+ break;
+ }
+ }
+ if ( found )
+ { if ( most->SCORE < c )
+ { most->SCORE = c;
+ most->ENDI = i;
+ most->ENDJ = j;
+ }
+ if ( most->TOP > i ) most->TOP = i;
+ if ( most->BOT < i ) most->BOT = i;
+ if ( most->LLEFT > j ) most->LLEFT = j;
+ if ( most->LRIGHT < j ) most->LRIGHT = j;
+ }
+ else
+ { if ( numnode == K ) /* list full */
+ most = low;
+ else
+ most = LIST[numnode++];
+ most->SCORE = c;
+ most->STARI = ci;
+ most->STARJ = cj;
+ most->ENDI = i;
+ most->ENDJ = j;
+ most->TOP = most->BOT = i;
+ most->LLEFT = most->LRIGHT = j;
+ }
+ if ( numnode == K )
+ { if ( low == most || ! low )
+ { for ( low = LIST[0], d = 1; d < numnode ; d++ )
+ if ( LIST[d]->SCORE < low->SCORE )
+ low = LIST[d];
+ }
+ return ( low->SCORE ) ;
+ }
+ else
+ return cost;
+}
+
+/* Find and remove the largest score in list */
+
+vertexptr findmax()
+{ vertexptr cur;
+ register int i, j;
+
+ for ( j = 0, i = 1; i < numnode ; i++ )
+ if ( LIST[i]->SCORE > LIST[j]->SCORE )
+ j = i;
+ cur = LIST[j];
+ if ( j != --numnode )
+ { LIST[j] = LIST[numnode];
+ LIST[numnode] = cur;
+ }
+ most = LIST[0];
+ if ( low == cur ) low = LIST[0];
+ return ( cur );
+}
+
+/* return 1 if no node in LIST share vertices with the area */
+
+no_cross()
+{ vertexptr cur;
+ register int i;
+
+ for ( i = 0; i < numnode; i++ )
+ { cur = LIST[i];
+ if ( cur->STARI <= mm && cur->STARJ <= nn && cur->BOT >= m1-1 &&
+ cur->LRIGHT >= n1-1 && ( cur->STARI < rl || cur->STARJ < cl ))
+ { if ( cur->STARI < rl ) rl = cur->STARI;
+ if ( cur->STARJ < cl ) cl = cur->STARJ;
+ flag = 1;
+ break;
+ }
+ }
+ if ( i == numnode )
+ return 1;
+ else
+ return 0;
+}
+
+/* diff(A,B,M,N,tb,te) returns the score of an optimum conversion between
+ A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
+ and appends such a conversion to the current script. */
+
+int diff(A,B,M,N,tb,te) int A, B, M, N; int tb, te;
+
+{ int midi, midj, type; /* Midpoint, type, and cost */
+ int midc;
+
+{ register int i, j;
+ register int c, e, d, s;
+ int t, *va;
+#ifdef FAR_PTR
+ char far * flckalloc();
+#else
+ char *lckalloc();
+#endif
+
+/* Boundary cases: M <= 1 or N == 0 */
+
+ if (N <= 0)
+ { if (M > 0) DEL(M)
+ return - gap(M);
+ }
+ if (M <= 1)
+ { if (M <= 0)
+ { INS(N);
+ return - gap(N);
+ }
+ if (tb > te) tb = te;
+ midc = - (tb + r + gap(N) );
+ midj = 0;
+ /*va = vv[A[1]];*/
+ for (j = 1; j <= N; j++)
+ { for ( tt = 1, z = row[I+1]; z != PAIRNULL; z = z->NEXT )
+ if ( z->COL == j+J )
+ { tt = 0; break; }
+ if ( tt )
+ { /*c = va[B[j]] - ( gap(j-1) + gap(N-j) );*/
+ c= vv[A+1][B+j] - ( gap(j-1) + gap(N-j) );
+ /*fprintf(stdout, "1 j: 1 %d; score: %d\n", j, vv[1][j]); */
+ if (c > midc)
+ { midc = c;
+ midj = j;
+ }
+ }
+ }
+ if (midj == 0)
+ { INS(N) DEL(1) }
+ else
+ { if (midj > 1) INS(midj-1)
+ REP
+ /*if ( A[1] == B[midj] )
+ no_mat += 1;
+ else
+ no_mis += 1;
+ */
+ /* mark (A[I],B[J]) as used: put J into list row[I] */
+ I++; J++;
+ z = ( pairptr ) FCKALLOC( (int) sizeof(pair));
+ z->COL = J;
+ z->NEXT = row[I];
+ row[I] = z;
+ if (midj < N) INS(N-midj)
+ }
+ return midc;
+ }
+
+/* Divide: Find optimum midpoint (midi,midj) of cost midc */
+
+ midi = M/2; /* Forward phase: */
+ CC[0] = 0; /* Compute C(M/2,k) & D(M/2,k) for all k */
+ t = -q;
+ for (j = 1; j <= N; j++)
+ { CC[j] = t = t-r;
+ DD[j] = t-q;
+ }
+ t = -tb;
+ for (i = 1; i <= midi; i++)
+ { s = CC[0];
+ CC[0] = c = t = t-r;
+ e = t-q;
+ /*va = vv[A[i]];*/
+ for (j = 1; j <= N; j++)
+ { if ((c = c - qr) > (e = e - r)) e = c;
+ if ((c = CC[j] - qr) > (d = DD[j] - r)) d = c;
+ /*DIAG(i+I, j+J, c, s+va[B[j]]) */
+ DIAG(i+I, j+J, c, s+vv[A+i][B+j])
+ /*fprintf(stdout, "i j: %d %d; score: %d\n", i, j, vv[A+i][B+j]); */
+ if (c < d) c = d;
+ if (c < e) c = e;
+ s = CC[j];
+ CC[j] = c;
+ DD[j] = d;
+ }
+ }
+ DD[0] = CC[0];
+
+ RR[N] = 0; /* Reverse phase: */
+ t = -q; /* Compute R(M/2,k) & S(M/2,k) for all k */
+ for (j = N-1; j >= 0; j--)
+ { RR[j] = t = t-r;
+ SS[j] = t-q;
+ }
+ t = -te;
+ for (i = M-1; i >= midi; i--)
+ { s = RR[N];
+ RR[N] = c = t = t-r;
+ e = t-q;
+ /*va = vv[A[i+1]];*/
+ for (j = N-1; j >= 0; j--)
+ { if ((c = c - qr) > (e = e - r)) e = c;
+ if ((c = RR[j] - qr) > (d = SS[j] - r)) d = c;
+ /*DIAG(i+1+I, j+1+J, c, s+va[B[j+1]]) */
+ DIAG(i+1+I, j+1+J, c, s+vv[A+i+1][B+j+1])
+ /*fprintf(stdout, "i+1, j+1: %d %d; score: %d\n", i+1, j+1, vv[i+1][j+1]);*/
+ if (c < d) c = d;
+ if (c < e) c = e;
+ s = RR[j];
+ RR[j] = c;
+ SS[j] = d;
+ }
+ }
+ SS[N] = RR[N];
+
+ midc = CC[0]+RR[0]; /* Find optimal midpoint */
+ midj = 0;
+ type = 1;
+ for (j = 0; j <= N; j++)
+ if ((c = CC[j] + RR[j]) >= midc)
+ if (c > midc || CC[j] != DD[j] && RR[j] == SS[j])
+ { midc = c;
+ midj = j;
+ }
+ for (j = N; j >= 0; j--)
+ if ((c = DD[j] + SS[j] + q) > midc)
+ { midc = c;
+ midj = j;
+ type = 2;
+ }
+}
+
+/* Conquer: recursively around midpoint */
+
+ if (type == 1)
+ { diff(A,B,midi,midj,tb,q);
+ diff(A+midi,B+midj,M-midi,N-midj,q,te);
+ }
+ else
+ { diff(A,B,midi-1,midj,tb,zero);
+ DEL(2);
+ diff(A+midi+1,B+midj,M-midi-1,N-midj,zero,te);
+ }
+ return midc;
+}
+
+/* Alignment display routine */
+/*
+static char ALINE[51], BLINE[51], CLINE[51];
+
+int display(A,B,M,N,S,AP,BP) char A[], B[]; int M, N; int S[], AP, BP;
+{ register char *a, *b, *c;
+ register int i, j, op;
+ int lines, ap, bp;
+
+ i = j = op = lines = 0;
+ ap = AP;
+ bp = BP;
+ a = ALINE;
+ b = BLINE;
+ c = CLINE;
+ while (i < M || j < N)
+ { if (op == 0 && *S == 0)
+ { op = *S++;
+ *a = sq[A[++i]];
+ *b = sq[B[++j]];
+ *c++ = (*a++ == *b++) ? '|' : ' ';
+ }
+ else
+ { if (op == 0)
+ op = *S++;
+ if (op > 0)
+ { *a++ = ' ';
+ *b++ = sq[B[++j]];
+ op--;
+ }
+ else
+ { *a++ = sq[A[++i]];
+ *b++ = ' ';
+ op++;
+ }
+ *c++ = '-';
+ }
+ if (a >= ALINE+50 || i >= M && j >= N)
+ { *a = *b = *c = '\0';
+ printf("\n%5d ",50*lines++);
+ for (b = ALINE+10; b <= a; b += 10)
+ printf(" . :");
+ if (b <= a+5)
+ printf(" .");
+ printf("\n%5d %s\n %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE);
+ ap = AP + i;
+ bp = BP + j;
+ a = ALINE;
+ b = BLINE;
+ c = CLINE;
+ }
+ }
+}
+*/
+
+/* lib.c - library of C procedures. */
+
+/* lfatal - print message and die */
+lfatal(msg)
+char *msg;
+{
+ fprintf(stderr, "%s\n", msg);
+ exit(1);
+}
+
+/* lfatalf - format message, print it, and die */
+lfatalf(msg, val)
+char *msg, *val;
+{
+ fprintf(stderr, msg, val);
+ putc('\n', stderr);
+ exit(1);
+}
+
+/* ckopen - open file; check for success */
+FILE *ckopen(name, mode)
+char *name, *mode;
+{
+ FILE *fopen(), *fp;
+
+ if ((fp = fopen(name, mode)) == NULL)
+ lfatalf("Cannot open %s.", name);
+ return(fp);
+}
+
+/* lckalloc - allocate space; check for success */
+char *lckalloc(amount)
+int amount;
+{
+ char *malloc(), *p;
+ static long mtotal;
+
+ mtotal += (long)amount;
+
+ if ((p = malloc( (unsigned) amount)) == NULL) {
+ fprintf(stderr,"Ran out of near memory: %d/%ld\n",amount,mtotal);
+ exit(1);
+ }
+ return(p);
+}
+
+#ifdef FAR_PTR
+#define FMALLOC farmalloc
+#define MTYPE long
+#define FFREE farfree
+
+/* flckalloc - allocate space; check for success */
+char far *flckalloc(amount)
+ int amount;
+{
+ static long ftotal;
+ static int nf;
+
+ char far * FMALLOC(), far * p;
+
+ ftotal += (long)amount;
+ nf++;
+
+ if ((p = FMALLOC( (MTYPE) amount)) == (char far *)NULL) {
+ fprintf(stderr,"Ran out of far memory: %d/%ld (%d)\n",
+ amount,ftotal,nf);
+ exit(1);
+ }
+ return(p);
+}
+#endif
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..da0dc21
--- /dev/null
+++ b/makefile
@@ -0,0 +1,60 @@
+install: pcma
+
+clean:
+ rm *.o
+
+OBJECTS = interface.o sequence.o showpair.o malign.o \
+ util.o trees.o gcgcheck.o prfalign.o pairalign.o \
+ calcgapcoeff.o calcprf1.o calcprf2.o calctree.o \
+ readmat.o alnscore.o random.o alcomp2.o lib_generation.o \
+ lsim1.o lib_extension.o prfalignabs.o prfalign1.o prfalign2.o
+
+HEADERS = general.h pcma.h
+
+CC = gcc
+CFLAGS = -c -O
+LFLAGS = -O -lm -g
+
+pcma : $(OBJECTS) amenu.o pcma.o
+ $(CC) -o $@ $(OBJECTS) amenu.o pcma.o $(LFLAGS)
+
+interface.o : interface.c $(HEADERS) param.h
+ $(CC) $(CFLAGS) $*.c
+
+amenu.o : amenu.c $(HEADERS) param.h
+ $(CC) $(CFLAGS) $*.c
+
+readmat.o : readmat.c $(HEADERS) matrices.h
+ $(CC) $(CFLAGS) $*.c
+
+trees.o : trees.c $(HEADERS) dayhoff.h
+ $(CC) $(CFLAGS) $*.c
+
+lib_generation.o : lib_generation.c $(HEADERS)
+ $(CC) $(CFLAGS) $*.c
+
+lib_extension.o : lib_extension.c $(HEADERS)
+ $(CC) $(CFLAGS) $*.c
+
+malign.o : malign.c $(HEADERS)
+ $(CC) $(CFLAGS) $*.c
+
+calctree.o : calctree.c $(HEADERS)
+ $(CC) $(CFLAGS) $*.c
+
+lsim1.o : lsim1.c $(HEADERS)
+ $(CC) $(CFLAGS) $*.c
+
+prfalignabs.o : prfalignabs.c $(HEADERS)
+ $(CC) $(CFLAGS) $*.c
+
+prfalign1.o: prfalign1.c $(HEADERS)
+ $(CC) $(CFLAGS) $*.c
+
+prfalign2.o: prfalign2.c $(HEADERS)
+ $(CC) $(CFLAGS) $*.c
+
+.c.o :
+ $(CC) $(CFLAGS) $?
+
+
diff --git a/malign.c b/malign.c
new file mode 100644
index 0000000..367c38f
--- /dev/null
+++ b/malign.c
@@ -0,0 +1,924 @@
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include "pcma.h"
+/*#include "new.h"*/
+
+/*
+ * Prototypes
+ */
+
+/*
+ * Global Variables
+ */
+
+extern double **tmat;
+extern Boolean no_weights;
+extern sint debug;
+extern sint max_aa;
+extern sint nseqs;
+extern sint profile1_nseqs;
+extern sint nsets;
+extern sint **sets;
+extern sint divergence_cutoff;
+extern sint *seq_weight;
+extern sint output_order, *output_index;
+extern Boolean distance_tree;
+extern char seqname[];
+extern char **names;
+extern sint *seqlen_array;
+extern char **seq_array;
+extern sint ngroups;
+extern streeptr *groupptr;
+extern char *amino_acid_codes;
+extern sint gap_pos1, gap_pos2;
+
+/* JP */
+extern sint res_index(char *,char);
+extern void assign_node(streeptr p, sint *aligned);
+extern streeptr *grp_ancestor;
+extern sint ngroups;
+extern sint ave_grp_id;
+SN **** glib;
+extern sint KK;
+extern sint cosmetic_penalty;
+extern sint seqFormat;
+
+/* JP: for_align_list */
+extern int *seqnumlist;
+extern int filecount;
+extern int nseqs_all;
+extern int *seqlen_array_all;
+extern char **seq_array_all; /* for all the sequences */
+extern char **names_all;
+extern int max_names;
+
+sint malign(sint istart,char *phylip_name) /* full progressive alignment*/
+{
+ static sint *aligned;
+ static sint *group;
+ static sint ix;
+
+ sint *maxid, max, sum;
+ sint *tree_weight;
+ sint i,j,k,l,set,iseq=0;
+ sint status,entries;
+ lint score = 0;
+
+ /* JP */
+ sint **tmpseq_array;
+ char **tmpnames;
+
+ streeptr nd;
+
+ extern char * am;
+
+ info("Start making multiple sequence alignment\n");
+
+/* get the phylogenetic tree from *.ph */ /************* step 1 ***************/
+
+ if (nseqs >= 2)
+ {
+ status = read_tree(phylip_name, (sint)0, nseqs);
+ if (status == 0) return((sint)0);
+ }
+
+/* calculate sequence weights according to branch lengths of the tree -
+ weights in global variable seq_weight normalised to sum to 100 */ /************* step 2 ***************/
+
+ calc_seq_weights((sint)0, nseqs, seq_weight);
+
+/* recalculate tmat matrix as percent similarity matrix */ /************* step 3 ***************/
+
+ status = calc_similarities(nseqs);
+ if (status == 0) return((sint)0);
+
+/* for each sequence, find the most closely related sequence */
+
+ maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+ for (i=1;i<=nseqs;i++)
+ {
+ maxid[i] = -1;
+ for (j=1;j<=nseqs;j++)
+ if (j!=i && maxid[i] < tmat[i][j]) maxid[i] = tmat[i][j];
+ }
+
+/* JP */
+ if(debug>1)fprintf(stdout, "ave_grp_id: %d; KK: %d cosmetic: %d\n", ave_grp_id, KK, cosmetic_penalty);fflush(stdout);
+
+/* group the sequences according to their relative divergence */
+
+ if (istart == 0)
+ {
+ sets = (sint **) ckalloc( (nseqs+1) * sizeof (sint *) );
+ for(i=0;i<=nseqs;i++)
+ sets[i] = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+
+ create_sets((sint)0,nseqs); /************* step 4 ***************/
+ /*JP */
+ /*info("There are %d groups\n",(pint)nsets);*/
+ if(seqFormat!=CLUSTALIST) info("There are %d sequences\n\n", nseqs);
+
+/* clear the memory used for the phylogenetic tree */
+
+ if (nseqs >= 2)
+ clear_tree(NULL);
+
+/* start the multiple alignments......... */
+
+ /* JP */
+ /*info("Aligning...");*/
+
+/* first pass, align closely related sequences first.... */
+
+ ix = 0;
+ aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+ for (i=0;i<=nseqs;i++) aligned[i] = 0;
+
+ /* JP: forming the groups ngroups */
+ ngroups = 0; score = 0;
+ groupptr = ckalloc(nseqs*sizeof(streeptr *)); /************* step 5 ***************/
+
+ /* set ouput order */ /************* step 6 ***************/
+ if(debug>1) fprintf(stdout, "OUTPUT_ORDER: %d\n", output_order);
+ for(set=1;set<=nsets;++set)
+ {
+ entries=0;
+ for (i=1;i<=nseqs;i++)
+ {
+ if ((sets[set][i] != 0) && (maxid[i] >= divergence_cutoff))
+ {
+ entries++;
+ if (aligned[i] == 0)
+ {
+ if (output_order==INPUT)
+ {
+ ++ix;
+ output_index[i] = i;
+ }
+ else output_index[++ix] = i;
+ aligned[i] = 1;
+ }
+ }
+ }
+
+ /*fprintf(stdout, "entries: %d\n", entries);*/
+ /* JP: conditionally align the sequences to form groups */
+ /*if(entries > 0) {score = prfalign(sets[set], aligned);
+ else score=0.0;*/
+
+ /* JP: for_alignlist_set the average group identity threshold to be larger than 100 so that no sequences are merged */
+ if(seqFormat==CLUSTALIST) {ave_grp_id = 101;}
+
+ /* align highly similar sequences by ClustalW */ /************* step 7 ***************/
+ if(entries > 0) {
+ /*fprintf(stdout,"set %d %d*********\n", set, grp_ancestor[set]->left->seq[1][1]);*/
+ /*if(grp_ancestor[set]->right->seq) fprintf(stdout, "--------\n");*/
+ if(grp_ancestor[set]->left->seq && grp_ancestor[set]->right->seq) {
+ if(debug> 1) fprintf(stdout,"average score: %5.3f \n", average_group_identity(sets[set]));
+ if(average_group_identity(sets[set])> ave_grp_id) {
+ if(debug>1)fprintf(stdout,"%5.3f *********\n", average_group_identity(sets[set]));
+ score = prfalign(sets[set], aligned);
+ if(debug>1)fprintf(stdout, "======\n");
+ assign_node(grp_ancestor[set], sets[set]);
+ }
+ else {
+ groupptr[ngroups]=grp_ancestor[set]->left;
+ ngroups++;
+ groupptr[ngroups]=grp_ancestor[set]->right;
+ ngroups++;
+ }
+ }
+
+ if(grp_ancestor[set]->left->seq && (!grp_ancestor[set]->right->seq) ) {
+ /*fprintf(stdout, "left name: %s\n",grp_ancestor[set]->left->name[1]);*/
+ groupptr[ngroups]=grp_ancestor[set]->left;
+ ngroups++;
+ }
+
+ if( (!grp_ancestor[set]->left->seq) && (grp_ancestor[set]->right->seq) ) {
+ /*fprintf(stdout, "right name: %s\n",grp_ancestor[set]->right->name[1]);*/
+ groupptr[ngroups]=grp_ancestor[set]->right;
+ ngroups++;
+ }
+
+ /*if( (!grp_ancestor[set]->left->seq) && (!grp_ancestor[set]->right->seq) ) {
+ groupptr[ngroups]=grp_ancestor[set]->left;
+ ngroups++;
+ }*/
+ }
+
+
+ /* negative score means fatal error... exit now! */
+ /*if (score < 0)
+ {
+ return(-1);
+ }
+ */
+ /*if ((entries > 0) && (score > 0))
+ info("Group %d: Sequences:%4d Score:%d\n",
+ (pint)set,(pint)entries,(pint)score);
+ else
+ info("Group %d: Delayed",
+ (pint)set); */
+
+ }
+
+ /* JP: set still useful */
+ /*for (i=0;i<=nseqs;i++)
+ sets[i]=ckfree((void *)sets[i]);
+ sets=ckfree(sets);
+ */
+
+ /* JP: print out the pre-aligned groups */
+ if(seqFormat!=CLUSTALIST) fprintf(stdout, "Average percentage group identity threshold is %d\n\n", ave_grp_id);
+ fprintf(stdout, "There are %d pre-aligned groups \n\n", ngroups);
+ if(debug>1) {
+ for(i=0;i<ngroups;i++) {
+ fprintf(stdout, "group %d\t", i);
+ fprintf(stdout, "alignment length: %d\n", groupptr[i]->seqlength);
+ for(j=1;j<=groupptr[i]->seqnum;j++) {
+ fprintf(stdout, "%s\t", groupptr[i]->name[j]);
+ for(k=1;k<=groupptr[i]->seqlength;k++) {
+ fprintf(stdout, "%c", am[groupptr[i]->seq[j][k]]);
+ }
+ fprintf(stdout, "\n");
+ }
+ fprintf(stdout,"\n");
+ }
+ }
+
+ }
+ else
+ {
+/* clear the memory used for the phylogenetic tree */
+
+ if (nseqs >= 2)
+ clear_tree(NULL);
+
+ aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+ ix = 0;
+ for (i=1;i<=istart+1;i++)
+ {
+ aligned[i] = 1;
+ ++ix;
+ output_index[i] = i;
+ }
+ for (i=istart+2;i<=nseqs;i++) aligned[i] = 0;
+ }
+
+ /* JP: assigning abstract sequences */ /************* step 8 ***************/
+ for(i=0;i<ngroups;i++) {
+ j = groupptr[i]->seqlength;
+ groupptr[i]->abseqnum = 1;
+ groupptr[i]->abseqlength = j;
+ groupptr[i]->abstractseq = ckalloc( 2 * sizeof(sint *) );
+ groupptr[i]->abstractseq[1] = ckalloc( (j+1) * sizeof (sint) );
+ for(k=1;k<=j;k++) groupptr[i]->abstractseq[1][k] = k;
+ groupptr[i]->abstractseq[1][0] = i;
+ }
+
+
+ /* JP: lib generation */
+ /* allocation for glib, a three dimensional array */ /************* step 9 ***************/
+ if(ngroups>0) {
+ glib = ckalloc(ngroups*sizeof(SN ***) );
+ for(i=0;i<ngroups;i++) {
+ glib[i] = ckalloc(ngroups*sizeof(SN **));
+ for(j=0;j<ngroups;j++)
+ glib[i][j] = ckalloc( (groupptr[i]->seqlength+1)*sizeof(SN *));
+ }
+
+
+
+ fprintf(stdout, "Start generating libray\n");
+ lib_generation(); /************* step 10 ***************/
+ /*printLib(0,1);
+ printLib(1,0);
+ printLib(0,2);
+ printLib(2,0);
+ printLib(1,2);
+ printLib(2,1); */
+
+
+ if(debug>1)fprintf(stdout, "++++++++++++\n"); fflush(stdout);
+
+ /* JP: lib extension */ /************* step 11 ***************/
+ fprintf(stdout, "\n\nStart extending library\n\n");
+ lib_extension();
+ }
+
+ /* printMatrix(0,1); */
+ /* exit(0); */
+
+ /* JP: progressive alignment of the abstract sequences */ /************* step 12 ***************/
+ fprintf(stdout, "Start progressive alignment of pre-aligned groups\n");
+ for(set=1;set<=nsets;set++) {
+ if( (!grp_ancestor[set]->left->abstractseq)&&(!grp_ancestor[set]->right->abstractseq) ) {
+ continue;
+ }
+ if((grp_ancestor[set]->left->abstractseq)&&(grp_ancestor[set]->right->abstractseq) ) {
+ if(debug>1)fprintf(stdout, "SET: %d\n", set);
+ prfalignabs(set); continue;
+ }
+ if((grp_ancestor[set]->left->abstractseq)&&(!grp_ancestor[set]->right->abstractseq) ) {
+ fprintf(stdout,"Error: abstract sequence of the right child do not exist. set %d\n", set);
+ exit(0);
+ }
+ if((!grp_ancestor[set]->left->abstractseq)&&(!grp_ancestor[set]->right->abstractseq) ) {
+ fprintf(stdout, "Error: abstract sequence of the left child do not exist. set %d\n", set);
+ exit(0);
+ }
+ }
+
+ /* change the seq_array */
+ if(seqFormat!=CLUSTALIST) {
+ tmpseq_array = ckalloc( (nseqs+1) *sizeof(sint *));
+ for(j=1;j<=nseqs;j++) {
+ tmpseq_array[j] = ckalloc((seqlen_array[j]+1)*sizeof(sint));
+ for(i=1;i<=seqlen_array[j];i++) {
+ tmpseq_array[j][i] = seq_array[j][i];
+ }
+ }
+ for(j=1;j<=grp_ancestor[nsets]->abseqnum;j++) {
+ nd = groupptr[grp_ancestor[nsets]->abstractseq[j][0]];
+
+ for(i=1;i<=nd->seqnum;i++) {
+ for(k=1;k<=nseqs;k++) {
+ if(!strcmp(nd->name[i], names[k]) ) break;
+ }
+ if(k>nseqs) {
+ fprintf(stdout, "Error: name does not match: %s\n", nd->name[i]);
+ exit(0);
+ }
+ seqlen_array[k] = grp_ancestor[nsets]->abseqlength;
+ realloc_seq(k, seqlen_array[k]);
+ for(l=1;l<=seqlen_array[k];l++) {
+ if(grp_ancestor[nsets]->abstractseq[j][l]==0) {
+ seq_array[k][l] = gap_pos1;
+ if(debug>1)fprintf(stdout, "-");
+ }
+ else {
+ seq_array[k][l] = tmpseq_array[k][grp_ancestor[nsets]->abstractseq[j][l]];
+ /*seq_array[k][l] = nd->seq[i][grp_ancestor[nsets]->abstractseq[j][l]];*/
+ if(debug>1)fprintf(stdout, "%c", am[nd->seq[i][grp_ancestor[nsets]->abstractseq[j][l]]]);
+ }
+ /*if(seq_array[k][l]==0) {
+ seq_array[k][l] = gap_pos1;
+ }
+ else {
+ seq_array[k][l] = res_index(amino_acid_codes, am[seq_array[k][l]]);
+ }*/
+ }
+ }
+ }
+
+ for(j=1;j<=nseqs;j++) ckfree(tmpseq_array[j]);
+ ckfree(tmpseq_array);
+ }
+ /* JP: for_align_list */
+ else {
+ int Nd, tmpcount;
+ int mark;
+ tmpnames = ckalloc((filecount+1) * sizeof(char *) );
+ for(j=1;j<=filecount;j++) tmpnames[j] = ckalloc((100+1) * sizeof(char));
+ for(j=1;j<=filecount;j++) strcpy(tmpnames[j], names[j]);
+ free_aln(filecount);
+ alloc_aln(nseqs_all);
+
+ //fprintf(stdout, "%d %d\n", nseqs_all, grp_ancestor[nsets]->abseqnum);
+
+ for(j=1;j<=grp_ancestor[nsets]->abseqnum;j++) {
+ mark = 0; /* the index of the corresponding filecount */
+ tmpcount = 0; /* the starting index of the group */
+ nd = groupptr[grp_ancestor[nsets]->abstractseq[j][0]];
+ /* locate the index of the subalignment */
+ for(i=1;i<=filecount;i++) {
+ for(k=1;k<=nd->seqnum;k++) {
+ if(debug>11) fprintf(stdout, "%d %s %d %s\n", k, nd->name[k], i, tmpnames[i]);
+ if(strcmp(nd->name[k], tmpnames[i])==0) {mark = i; break;}
+ }
+ if(mark > 0) break;
+ }
+ if(debug>11) fprintf(stdout, "%d\n", mark);
+ for(i=1;i<=mark-1;i++) tmpcount += seqnumlist[i];
+
+ for(i=1;i<=seqnumlist[mark];i++) {
+ k = tmpcount + i;
+ seqlen_array[k] = grp_ancestor[nsets]->abseqlength;
+ alloc_seq(k, seqlen_array[k]);
+ strcpy(names[k], names_all[k]);
+ for(l=1;l<=seqlen_array[k];l++) {
+ if(grp_ancestor[nsets]->abstractseq[j][l]==0) {
+ seq_array[k][l] = gap_pos1;
+ if(debug>11)fprintf(stdout, "-");
+ }
+ else {
+ seq_array[k][l] = seq_array_all[k][grp_ancestor[nsets]->abstractseq[j][l]];
+ /*seq_array[k][l] = nd->seq[i][grp_ancestor[nsets]->abstractseq[j][l]];*/
+ if(debug>11)fprintf(stdout, "%c", am[nd->seq[i][grp_ancestor[nsets]->abstractseq[j][l]]]);
+ }
+ }
+ if(debug>11) fprintf(stdout, "\n");
+ }
+ }
+ for(i=1;i<=nseqs_all;i++) {
+ if(strlen(names[i])>max_names) max_names=strlen(names[i]);
+ }
+ }
+
+/* second pass - align remaining, more divergent sequences..... */
+
+/* if not all sequences were aligned, for each unaligned sequence,
+ find it's closest pair amongst the aligned sequences. */
+
+ group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+ tree_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) );
+ for (i=0;i<nseqs;i++)
+ tree_weight[i] = seq_weight[i];
+
+/* if we haven't aligned any sequences, in the first pass - align the
+two most closely related sequences now */
+ if(ix==0)
+ {
+ max = -1;
+ iseq = 0;
+ for (i=1;i<=nseqs;i++)
+ {
+ for (j=i+1;j<=nseqs;j++)
+ {
+ if (max < tmat[i][j])
+ {
+ max = tmat[i][j];
+ iseq = i;
+ }
+ }
+ }
+ aligned[iseq]=1;
+ if (output_order == INPUT)
+ {
+ ++ix;
+ output_index[iseq] = iseq;
+ }
+ else
+ output_index[++ix] = iseq;
+ }
+
+ while (ix < nseqs)
+ {
+ for (i=1;i<=nseqs;i++) {
+ if (aligned[i] == 0)
+ {
+ maxid[i] = -1;
+ for (j=1;j<=nseqs;j++)
+ if ((maxid[i] < tmat[i][j]) && (aligned[j] != 0))
+ maxid[i] = tmat[i][j];
+ }
+ }
+/* find the most closely related sequence to those already aligned */
+
+ max = -1;
+ iseq = 0;
+ for (i=1;i<=nseqs;i++)
+ {
+ if ((aligned[i] == 0) && (maxid[i] > max))
+ {
+ max = maxid[i];
+ iseq = i;
+ }
+ }
+
+
+/* align this sequence to the existing alignment */
+/* weight sequences with percent identity with profile*/
+/* OR...., multiply sequence weights from tree by percent identity with new sequence */
+ if(no_weights==FALSE) {
+ for (j=0;j<nseqs;j++)
+ if (aligned[j+1] != 0)
+ seq_weight[j] = tree_weight[j] * tmat[j+1][iseq];
+/*
+ Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
+*/
+
+ sum = 0;
+ for (j=0;j<nseqs;j++)
+ if (aligned[j+1] != 0)
+ sum += seq_weight[j];
+ if (sum == 0)
+ {
+ for (j=0;j<nseqs;j++)
+ seq_weight[j] = 1;
+ sum = j;
+ }
+ for (j=0;j<nseqs;j++)
+ if (aligned[j+1] != 0)
+ {
+ seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum;
+ if (seq_weight[j] < 1) seq_weight[j] = 1;
+ }
+ }
+
+ entries = 0;
+ for (j=1;j<=nseqs;j++)
+ if (aligned[j] != 0)
+ {
+ group[j] = 1;
+ entries++;
+ }
+ else if (iseq==j)
+ {
+ group[j] = 2;
+ entries++;
+ }
+ aligned[iseq] = 1;
+
+ score = prfalign(group, aligned);
+ info("Sequence:%d Score:%d",(pint)iseq,(pint)score);
+ if (output_order == INPUT)
+ {
+ ++ix;
+ output_index[iseq] = iseq;
+ }
+ else
+ output_index[++ix] = iseq;
+ }
+
+ group=ckfree((void *)group);
+ aligned=ckfree((void *)aligned);
+ maxid=ckfree((void *)maxid);
+ tree_weight=ckfree((void *)tree_weight);
+
+ /* aln_score(); */
+
+/* make the rest (output stuff) into routine clustal_out in file amenu.c */
+
+ return(nseqs);
+
+}
+
+sint seqalign(sint istart,char *phylip_name) /* sequence alignment to existing profile */
+{
+ static sint *aligned, *tree_weight;
+ static sint *group;
+ static sint ix;
+
+ sint *maxid, max;
+ sint i,j,status,iseq;
+ sint sum,entries;
+ lint score = 0;
+
+
+ info("Start making multiple alignment\n");
+
+/* get the phylogenetic tree from *.ph */
+
+ if (nseqs >= 2)
+ {
+ status = read_tree(phylip_name, (sint)0, nseqs);
+ if (status == 0) return(0);
+ }
+
+/* calculate sequence weights according to branch lengths of the tree -
+ weights in global variable seq_weight normalised to sum to 100 */
+
+ calc_seq_weights((sint)0, nseqs, seq_weight);
+
+ tree_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) );
+ for (i=0;i<nseqs;i++)
+ tree_weight[i] = seq_weight[i];
+
+/* recalculate tmat matrix as percent similarity matrix */
+
+ status = calc_similarities(nseqs);
+ if (status == 0) return((sint)0);
+
+/* for each sequence, find the most closely related sequence */
+
+ maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+ for (i=1;i<=nseqs;i++)
+ {
+ maxid[i] = -1;
+ for (j=1;j<=nseqs;j++)
+ if (maxid[i] < tmat[i][j]) maxid[i] = tmat[i][j];
+ }
+
+/* clear the memory used for the phylogenetic tree */
+
+ if (nseqs >= 2)
+ clear_tree(NULL);
+
+ aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+ ix = 0;
+ for (i=1;i<=istart+1;i++)
+ {
+ aligned[i] = 1;
+ ++ix;
+ output_index[i] = i;
+ }
+ for (i=istart+2;i<=nseqs;i++) aligned[i] = 0;
+
+/* for each unaligned sequence, find it's closest pair amongst the
+ aligned sequences. */
+
+ group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+
+ while (ix < nseqs)
+ {
+ if (ix > 0)
+ {
+ for (i=1;i<=nseqs;i++) {
+ if (aligned[i] == 0)
+ {
+ maxid[i] = -1;
+ for (j=1;j<=nseqs;j++)
+ if ((maxid[i] < tmat[i][j]) && (aligned[j] != 0))
+ maxid[i] = tmat[i][j];
+ }
+ }
+ }
+
+/* find the most closely related sequence to those already aligned */
+
+ max = -1;
+ for (i=1;i<=nseqs;i++)
+ {
+ if ((aligned[i] == 0) && (maxid[i] > max))
+ {
+ max = maxid[i];
+ iseq = i;
+ }
+ }
+
+/* align this sequence to the existing alignment */
+
+ entries = 0;
+ for (j=1;j<=nseqs;j++)
+ if (aligned[j] != 0)
+ {
+ group[j] = 1;
+ entries++;
+ }
+ else if (iseq==j)
+ {
+ group[j] = 2;
+ entries++;
+ }
+ aligned[iseq] = 1;
+
+
+/* EITHER....., set sequence weights equal to percent identity with new sequence */
+/*
+ for (j=0;j<nseqs;j++)
+ seq_weight[j] = tmat[j+1][iseq];
+*/
+/* OR...., multiply sequence weights from tree by percent identity with new sequence */
+ for (j=0;j<nseqs;j++)
+ seq_weight[j] = tree_weight[j] * tmat[j+1][iseq];
+if (debug>1)
+ for (j=0;j<nseqs;j++) if (group[j+1] == 1)fprintf (stdout,"sequence %d: %d\n", j+1,tree_weight[j]);
+/*
+ Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
+*/
+
+ sum = 0;
+ for (j=0;j<nseqs;j++)
+ if (group[j+1] == 1) sum += seq_weight[j];
+ if (sum == 0)
+ {
+ for (j=0;j<nseqs;j++)
+ seq_weight[j] = 1;
+ sum = j;
+ }
+ for (j=0;j<nseqs;j++)
+ {
+ seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum;
+ if (seq_weight[j] < 1) seq_weight[j] = 1;
+ }
+
+if (debug > 1) {
+ fprintf(stdout,"new weights\n");
+ for (j=0;j<nseqs;j++) if (group[j+1] == 1)fprintf( stdout,"sequence %d: %d\n", j+1,seq_weight[j]);
+}
+
+ score = prfalign(group, aligned);
+ info("Sequence:%d Score:%d",(pint)iseq,(pint)score);
+ if (output_order == INPUT)
+ {
+ ++ix;
+ output_index[iseq] = iseq;
+ }
+ else
+ output_index[++ix] = iseq;
+ }
+
+ group=ckfree((void *)group);
+ aligned=ckfree((void *)aligned);
+ maxid=ckfree((void *)maxid);
+
+ /* aln_score(); */
+
+/* make the rest (output stuff) into routine clustal_out in file amenu.c */
+
+ return(nseqs);
+
+}
+
+
+sint palign1(void) /* a profile alignment */
+{
+ sint i,j,temp;
+ sint entries;
+ sint *aligned, *group;
+ float dscore;
+ lint score;
+
+ info("Start of Initial Alignment\n");
+
+/* calculate sequence weights according to branch lengths of the tree -
+ weights in global variable seq_weight normalised to sum to INT_SCALE_FACTOR */
+
+ temp = INT_SCALE_FACTOR/nseqs;
+ for (i=0;i<nseqs;i++) seq_weight[i] = temp;
+
+ distance_tree = FALSE;
+
+/* do the initial alignment......... */
+
+ group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+
+ for(i=1; i<=profile1_nseqs; ++i)
+ group[i] = 1;
+ for(i=profile1_nseqs+1; i<=nseqs; ++i)
+ group[i] = 2;
+ entries = nseqs;
+
+ aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+ for (i=1;i<=nseqs;i++) aligned[i] = 1;
+
+ score = prfalign(group, aligned);
+ info("Sequences:%d Score:%d",(pint)entries,(pint)score);
+ group=ckfree((void *)group);
+ aligned=ckfree((void *)aligned);
+
+ for (i=1;i<=nseqs;i++) {
+ for (j=i+1;j<=nseqs;j++) {
+ dscore = countid(i,j);
+ tmat[i][j] = ((double)100.0 - (double)dscore)/(double)100.0;
+ tmat[j][i] = tmat[i][j];
+ }
+ }
+
+ return(nseqs);
+}
+
+float countid(sint s1, sint s2)
+{
+ char c1,c2;
+ sint i;
+ sint count,total;
+ float score;
+
+ count = total = 0;
+ for (i=1;i<=seqlen_array[s1] && i<=seqlen_array[s2];i++) {
+ c1 = seq_array[s1][i];
+ c2 = seq_array[s2][i];
+ if ((c1>=0) && (c1<max_aa)) {
+ total++;
+ if (c1 == c2) count++;
+ }
+
+ }
+
+ if(total==0) score=0;
+ else
+ score = 100.0 * (float)count / (float)total;
+ return(score);
+
+}
+
+sint palign2(char *p1_tree_name,char *p2_tree_name) /* a profile alignment */
+{
+ sint i,j,sum,entries,status;
+ lint score;
+ sint *aligned, *group;
+ sint *maxid,*p1_weight,*p2_weight;
+ sint dscore;
+
+ info("Start making multiple alignment\n");
+
+/* get the phylogenetic trees from *.ph */
+
+ if (profile1_nseqs >= 2)
+ {
+ status = read_tree(p1_tree_name, (sint)0, profile1_nseqs);
+ if (status == 0) return(0);
+ }
+
+/* calculate sequence weights according to branch lengths of the tree -
+ weights in global variable seq_weight normalised to sum to 100 */
+
+ p1_weight = (sint *) ckalloc( (profile1_nseqs) * sizeof(sint) );
+
+ calc_seq_weights((sint)0, profile1_nseqs, p1_weight);
+
+/* clear the memory for the phylogenetic tree */
+
+ if (profile1_nseqs >= 2)
+ clear_tree(NULL);
+
+ if (nseqs-profile1_nseqs >= 2)
+ {
+ status = read_tree(p2_tree_name, profile1_nseqs, nseqs);
+ if (status == 0) return(0);
+ }
+
+ p2_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) );
+
+ calc_seq_weights(profile1_nseqs,nseqs, p2_weight);
+
+
+/* clear the memory for the phylogenetic tree */
+
+ if (nseqs-profile1_nseqs >= 2)
+ clear_tree(NULL);
+
+/* convert tmat distances to similarities */
+
+ for (i=1;i<nseqs;i++)
+ for (j=i+1;j<=nseqs;j++) {
+ tmat[i][j]=100.0-tmat[i][j]*100.0;
+ tmat[j][i]=tmat[i][j];
+ }
+
+
+/* weight sequences with max percent identity with other profile*/
+
+ maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+ for (i=0;i<profile1_nseqs;i++) {
+ maxid[i] = 0;
+ for (j=profile1_nseqs+1;j<=nseqs;j++)
+ if(maxid[i]<tmat[i+1][j]) maxid[i] = tmat[i+1][j];
+ seq_weight[i] = maxid[i]*p1_weight[i];
+ }
+
+ for (i=profile1_nseqs;i<nseqs;i++) {
+ maxid[i] = -1;
+ for (j=1;j<=profile1_nseqs;j++)
+ if(maxid[i]<tmat[i+1][j]) maxid[i] = tmat[i+1][j];
+ seq_weight[i] = maxid[i]*p2_weight[i];
+ }
+/*
+ Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
+*/
+
+ sum = 0;
+ for (j=0;j<nseqs;j++)
+ sum += seq_weight[j];
+ if (sum == 0)
+ {
+ for (j=0;j<nseqs;j++)
+ seq_weight[j] = 1;
+ sum = j;
+ }
+ for (j=0;j<nseqs;j++)
+ {
+ seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum;
+ if (seq_weight[j] < 1) seq_weight[j] = 1;
+ }
+if (debug > 1) {
+ fprintf(stdout,"new weights\n");
+ for (j=0;j<nseqs;j++) fprintf( stdout,"sequence %d: %d\n", j+1,seq_weight[j]);
+}
+
+
+/* do the alignment......... */
+
+ /* JP: disable info */
+ /*info("Aligning...");*/
+
+ group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+
+ for(i=1; i<=profile1_nseqs; ++i)
+ group[i] = 1;
+ for(i=profile1_nseqs+1; i<=nseqs; ++i)
+ group[i] = 2;
+ entries = nseqs;
+
+ aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+ for (i=1;i<=nseqs;i++) aligned[i] = 1;
+
+ score = prfalign(group, aligned);
+ info("Sequences:%d Score:%d",(pint)entries,(pint)score);
+ group=ckfree((void *)group);
+ p1_weight=ckfree((void *)p1_weight);
+ p2_weight=ckfree((void *)p2_weight);
+ aligned=ckfree((void *)aligned);
+ maxid=ckfree((void *)maxid);
+
+/* DES output_index = (int *)ckalloc( (nseqs+1) * sizeof (int)); */
+ for (i=1;i<=nseqs;i++) output_index[i] = i;
+
+ return(nseqs);
+}
+
diff --git a/matrices.h b/matrices.h
new file mode 100644
index 0000000..f0e14a3
--- /dev/null
+++ b/matrices.h
@@ -0,0 +1,852 @@
+char *amino_acid_order = "ABCDEFGHIKLMNPQRSTVWXYZ";
+
+short blosum30mt[]={
+ 4,
+ 0, 5,
+ -3, -2, 17,
+ 0, 5, -3, 9,
+ 0, 0, 1, 1, 6,
+ -2, -3, -3, -5, -4, 10,
+ 0, 0, -4, -1, -2, -3, 8,
+ -2, -2, -5, -2, 0, -3, -3, 14,
+ 0, -2, -2, -4, -3, 0, -1, -2, 6,
+ 0, 0, -3, 0, 2, -1, -1, -2, -2, 4,
+ -1, -1, 0, -1, -1, 2, -2, -1, 2, -2, 4,
+ 1, -2, -2, -3, -1, -2, -2, 2, 1, 2, 2, 6,
+ 0, 4, -1, 1, -1, -1, 0, -1, 0, 0, -2, 0, 8,
+ -1, -2, -3, -1, 1, -4, -1, 1, -3, 1, -3, -4, -3, 11,
+ 1, -1, -2, -1, 2, -3, -2, 0, -2, 0, -2, -1, -1, 0, 8,
+ -1, -2, -2, -1, -1, -1, -2, -1, -3, 1, -2, 0, -2, -1, 3, 8,
+ 1, 0, -2, 0, 0, -1, 0, -1, -1, 0, -2, -2, 0, -1, -1, -1, 4,
+ 1, 0, -2, -1, -2, -2, -2, -2, 0, -1, 0, 0, 1, 0, 0, -3, 2, 5,
+ 1, -2, -2, -2, -3, 1, -3, -3, 4, -2, 1, 0, -2, -4, -3, -1, -1, 1, 5,
+ -5, -5, -2, -4, -1, 1, 1, -5, -3, -2, -2, -3, -7, -3, -1, 0, -3, -5, -3, 20,
+ 0, -1, -2, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, -1, 0, -1, 0, 0, 0, -2, -1,
+ -4, -3, -6, -1, -2, 3, -3, 0, -1, -1, 3, -1, -4, -2, -1, 0, -2, -1, 1, 5, -1, 9,
+ 0, 0, 0, 0, 5, -4, -2, 0, -3, 1, -1, -1, -1, 0, 4, 0, -1, -1, -3, -1, 0, -2, 4};
+
+
+short blosum35mt[]={
+ 5,
+ -1, 5,
+ -2, -2, 15,
+ -1, 5, -3, 8,
+ -1, 0, -1, 2, 6,
+ -2, -2, -4, -3, -3, 8,
+ 0, 0, -3, -2, -2, -3, 7,
+ -2, 0, -4, 0, -1, -3, -2, 12,
+ -1, -2, -4, -3, -3, 1, -3, -3, 5,
+ 0, 0, -2, -1, 1, -1, -1, -2, -2, 5,
+ -2, -2, -2, -2, -1, 2, -3, -2, 2, -2, 5,
+ 0, -2, -4, -3, -2, 0, -1, 1, 1, 0, 3, 6,
+ -1, 4, -1, 1, -1, -1, 1, 1, -1, 0, -2, -1, 7,
+ -2, -1, -4, -1, 0, -4, -2, -1, -1, 0, -3, -3, -2, 10,
+ 0, 0, -3, -1, 2, -4, -2, -1, -2, 0, -2, -1, 1, 0, 7,
+ -1, -1, -3, -1, -1, -1, -2, -1, -3, 2, -2, 0, -1, -2, 2, 8,
+ 1, 0, -3, -1, 0, -1, 1, -1, -2, 0, -2, -1, 0, -2, 0, -1, 4,
+ 0, -1, -1, -1, -1, -1, -2, -2, -1, 0, 0, 0, 0, 0, 0, -2, 2, 5,
+ 0, -2, -2, -2, -2, 1, -3, -4, 4, -2, 2, 1, -2, -3, -3, -1, -1, 1, 5,
+ -2, -3, -5, -3, -1, 1, -1, -4, -1, 0, 0, 1, -2, -4, -1, 0, -2, -2, -2, 16,
+ 0, -1, -2, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, -1, -1, -1, 0, 0, 0, -1, -1,
+ -1, -2, -5, -2, -1, 3, -2, 0, 0, -1, 0, 0, -2, -3, 0, 0, -1, -2, 0, 3, -1, 8,
+ -1, 0, -2, 1, 5, -3, -2, -1, -3, 1, -2, -2, 0, 0, 4, 0, 0, -1, -2, -1, 0, -1, 4};
+
+short blosum40mt[]={
+ 5,
+ -1, 5,
+ -2, -2, 16,
+ -1, 6, -2, 9,
+ -1, 1, -2, 2, 7,
+ -3, -3, -2, -4, -3, 9,
+ 1, -1, -3, -2, -3, -3, 8,
+ -2, 0, -4, 0, 0, -2, -2, 13,
+ -1, -3, -4, -4, -4, 1, -4, -3, 6,
+ -1, 0, -3, 0, 1, -3, -2, -1, -3, 6,
+ -2, -3, -2, -3, -2, 2, -4, -2, 2, -2, 6,
+ -1, -3, -3, -3, -2, 0, -2, 1, 1, -1, 3, 7,
+ -1, 4, -2, 2, -1, -3, 0, 1, -2, 0, -3, -2, 8,
+ -2, -2, -5, -2, 0, -4, -1, -2, -2, -1, -4, -2, -2, 11,
+ 0, 0, -4, -1, 2, -4, -2, 0, -3, 1, -2, -1, 1, -2, 8,
+ -2, -1, -3, -1, -1, -2, -3, 0, -3, 3, -2, -1, 0, -3, 2, 9,
+ 1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -3, -2, 1, -1, 1, -1, 5,
+ 0, 0, -1, -1, -1, -1, -2, -2, -1, 0, -1, -1, 0, 0, -1, -2, 2, 6,
+ 0, -3, -2, -3, -3, 0, -4, -4, 4, -2, 2, 1, -3, -3, -3, -2, -1, 1, 5,
+ -3, -4, -6, -5, -2, 1, -2, -5, -3, -2, -1, -2, -4, -4, -1, -2, -5, -4, -3, 19,
+ 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -2, -1, -1, 0, 0, -1, -2, -1,
+ -2, -3, -4, -3, -2, 4, -3, 2, 0, -1, 0, 1, -2, -3, -1, -1, -2, -1, -1, 3, -1, 9,
+ -1, 2, -3, 1, 5, -4, -2, 0, -4, 1, -2, -2, 0, -1, 4, 0, 0, -1, -3, -2, -1, -2, 5};
+
+short blosum45mt[]={
+ 5,
+ -1, 4,
+ -1, -2, 12,
+ -2, 5, -3, 7,
+ -1, 1, -3, 2, 6,
+ -2, -3, -2, -4, -3, 8,
+ 0, -1, -3, -1, -2, -3, 7,
+ -2, 0, -3, 0, 0, -2, -2, 10,
+ -1, -3, -3, -4, -3, 0, -4, -3, 5,
+ -1, 0, -3, 0, 1, -3, -2, -1, -3, 5,
+ -1, -3, -2, -3, -2, 1, -3, -2, 2, -3, 5,
+ -1, -2, -2, -3, -2, 0, -2, 0, 2, -1, 2, 6,
+ -1, 4, -2, 2, 0, -2, 0, 1, -2, 0, -3, -2, 6,
+ -1, -2, -4, -1, 0, -3, -2, -2, -2, -1, -3, -2, -2, 9,
+ -1, 0, -3, 0, 2, -4, -2, 1, -2, 1, -2, 0, 0, -1, 6,
+ -2, -1, -3, -1, 0, -2, -2, 0, -3, 3, -2, -1, 0, -2, 1, 7,
+ 1, 0, -1, 0, 0, -2, 0, -1, -2, -1, -3, -2, 1, -1, 0, -1, 4,
+ 0, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 2, 5,
+ 0, -3, -1, -3, -3, 0, -3, -3, 3, -2, 1, 1, -3, -3, -3, -2, -1, 0, 5,
+ -2, -4, -5, -4, -3, 1, -2, -3, -2, -2, -2, -2, -4, -3, -2, -2, -4, -3, -3, 15,
+ 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, -2, -1,
+ -2, -2, -3, -2, -2, 3, -3, 2, 0, -1, 0, 0, -2, -3, -1, -1, -2, -1, -1, 3, -1, 8,
+ -1, 2, -3, 1, 4, -3, -2, 0, -3, 1, -2, -1, 0, -1, 4, 0, 0, -1, -3, -2, -1, -2, 4};
+
+
+short blosum50mt[]={
+ 5,
+ -2, 5,
+ -1, -3, 13,
+ -2, 5, -4, 8,
+ -1, 1, -3, 2, 6,
+ -3, -4, -2, -5, -3, 8,
+ 0, -1, -3, -1, -3, -4, 8,
+ -2, 0, -3, -1, 0, -1, -2, 10,
+ -1, -4, -2, -4, -4, 0, -4, -4, 5,
+ -1, 0, -3, -1, 1, -4, -2, 0, -3, 6,
+ -2, -4, -2, -4, -3, 1, -4, -3, 2, -3, 5,
+ -1, -3, -2, -4, -2, 0, -3, -1, 2, -2, 3, 7,
+ -1, 4, -2, 2, 0, -4, 0, 1, -3, 0, -4, -2, 7,
+ -1, -2, -4, -1, -1, -4, -2, -2, -3, -1, -4, -3, -2, 10,
+ -1, 0, -3, 0, 2, -4, -2, 1, -3, 2, -2, 0, 0, -1, 7,
+ -2, -1, -4, -2, 0, -3, -3, 0, -4, 3, -3, -2, -1, -3, 1, 7,
+ 1, 0, -1, 0, -1, -3, 0, -1, -3, 0, -3, -2, 1, -1, 0, -1, 5,
+ 0, 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 2, 5,
+ 0, -4, -1, -4, -3, -1, -4, -4, 4, -3, 1, 1, -3, -3, -3, -3, -2, 0, 5,
+ -3, -5, -5, -5, -3, 1, -3, -3, -3, -3, -2, -1, -4, -4, -1, -3, -4, -3, -3, 15,
+ -1, -1, -2, -1, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, 0, -1, -3, -1,
+ -2, -3, -3, -3, -2, 4, -3, 2, -1, -2, -1, 0, -2, -3, -1, -1, -2, -2, -1, 2, -1, 8,
+ -1, 2, -3, 1, 5, -4, -2, 0, -3, 1, -3, -1, 0, -1, 4, 0, 0, -1, -3, -2, -1, -2, 5};
+
+short blosum55mt[]={
+ 5,
+ -2, 5,
+ 0, -4, 13,
+ -2, 5, -4, 8,
+ -1, 1, -4, 2, 7,
+ -3, -5, -3, -5, -4, 9,
+ 0, -1, -3, -2, -3, -4, 8,
+ -2, 0, -4, -1, -1, -1, -2, 11,
+ -2, -4, -2, -4, -4, 0, -5, -4, 6,
+ -1, 0, -4, -1, 1, -4, -2, 0, -4, 6,
+ -2, -4, -2, -5, -4, 1, -5, -3, 2, -3, 6,
+ -1, -3, -2, -4, -3, 0, -3, -2, 2, -2, 3, 8,
+ -2, 4, -3, 2, 0, -4, 0, 1, -4, 0, -4, -3, 8,
+ -1, -2, -3, -2, -1, -5, -3, -3, -3, -1, -4, -3, -2, 10,
+ -1, 0, -4, 0, 2, -4, -2, 1, -4, 2, -3, 0, 0, -1, 7,
+ -2, -1, -4, -2, 0, -3, -3, 0, -4, 3, -3, -2, -1, -3, 1, 8,
+ 2, 0, -1, 0, 0, -3, 0, -1, -3, 0, -3, -2, 1, -1, 0, -1, 5,
+ 0, -1, -1, -1, -1, -3, -2, -2, -1, -1, -2, -1, 0, -1, -1, -1, 2, 6,
+ 0, -4, -1, -4, -3, -1, -4, -4, 4, -3, 1, 1, -4, -3, -3, -3, -2, 0, 5,
+ -4, -5, -4, -5, -3, 2, -3, -3, -3, -4, -3, -2, -5, -5, -2, -3, -4, -3, -4, 15,
+ -1, -1, -2, -2, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, -3, -1,
+ -2, -3, -3, -3, -2, 4, -4, 2, -1, -2, -1, -1, -2, -4, -1, -2, -2, -2, -2, 3, -1, 9,
+ -1, 2, -4, 1, 5, -4, -3, 0, -4, 1, -3, -2, 0, -1, 4, 0, 0, -1, -3, -3, -1, -2, 5};
+
+
+short blosum62mt[]={
+ 4,
+ -2, 4,
+ 0, -3, 9,
+ -2, 4, -3, 6,
+ -1, 1, -4, 2, 5,
+ -2, -3, -2, -3, -3, 6,
+ 0, -1, -3, -1, -2, -3, 6,
+ -2, 0, -3, -1, 0, -1, -2, 8,
+ -1, -3, -1, -3, -3, 0, -4, -3, 4,
+ -1, 0, -3, -1, 1, -3, -2, -1, -3, 5,
+ -1, -4, -1, -4, -3, 0, -4, -3, 2, -2, 4,
+ -1, -3, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5,
+ -2, 3, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6,
+ -1, -2, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7,
+ -1, 0, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5,
+ -1, -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5,
+ 1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4,
+ 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5,
+ 0, -3, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4,
+ -3, -4, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11,
+ 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1,
+ -2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, -1, 7,
+ -1, 1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4};
+
+short blosum62mt2[]={
+ 8,
+ -4, 8,
+ 0, -6, 18,
+ -4, 8, -6, 12,
+ -2, 2, -8, 4, 10,
+ -4, -6, -4, -6, -6, 12,
+ 0, -2, -6, -2, -4, -6, 12,
+ -4, 0, -6, -2, 0, -2, -4, 16,
+ -2, -6, -2, -6, -6, 0, -8, -6, 8,
+ -2, 0, -6, -2, 2, -6, -4, -2, -6, 10,
+ -2, -8, -2, -8, -6, 0, -8, -6, 4, -4, 8,
+ -2, -6, -2, -6, -4, 0, -6, -4, 2, -2, 4, 10,
+ -4, 6, -6, 2, 0, -6, 0, 2, -6, 0, -6, -4, 12,
+ -2, -4, -6, -2, -2, -8, -4, -4, -6, -2, -6, -4, -4, 14,
+ -2, 0, -6, 0, 4, -6, -4, 0, -6, 2, -4, 0, 0, -2, 10,
+ -2, -2, -6, -4, 0, -6, -4, 0, -6, 4, -4, -2, 0, -4, 2, 10,
+ 2, 0, -2, 0, 0, -4, 0, -2, -4, 0, -4, -2, 2, -2, 0, -2, 8,
+ 0, -2, -2, -2, -2, -4, -4, -4, -2, -2, -2, -2, 0, -2, -2, -2, 2, 10,
+ 0, -6, -2, -6, -4, -2, -6, -6, 6, -4, 2, 2, -6, -4, -4, -6, -4, 0, 8,
+ -6, -8, -4, -8, -6, 2, -4, -4, -6, -6, -4, -2, -8, -8, -4, -6, -6, -4, -6, 22,
+ 0, -2, -4, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -4, -2, -2, 0, 0, -2, -4, -2,
+ -4, -6, -4, -6, -4, 6, -6, 4, -2, -4, -2, -2, -4, -6, -2, -4, -4, -4, -2, 4, -2, 14,
+ -2, 2, -6, 2, 8, -6, -4, 0, -6, 2, -6, -2, 0, -2, 6, 0, 0, -2, -4, -6, -2, -4, 8};
+
+short blosum65mt[]={
+ 4,
+ -2, 4,
+ 0, -3, 9,
+ -2, 4, -4, 6,
+ -1, 1, -4, 2, 5,
+ -2, -3, -2, -4, -3, 6,
+ 0, -1, -3, -1, -2, -3, 6,
+ -2, 0, -3, -1, 0, -1, -2, 8,
+ -1, -3, -1, -3, -3, 0, -4, -3, 4,
+ -1, 0, -3, -1, 1, -3, -2, -1, -3, 5,
+ -2, -4, -1, -4, -3, 0, -4, -3, 2, -3, 4,
+ -1, -3, -2, -3, -2, 0, -3, -2, 1, -2, 2, 6,
+ -2, 3, -3, 1, 0, -3, -1, 1, -3, 0, -4, -2, 6,
+ -1, -2, -3, -2, -1, -4, -2, -2, -3, -1, -3, -3, -2, 8,
+ -1, 0, -3, 0, 2, -3, -2, 1, -3, 1, -2, 0, 0, -1, 6,
+ -1, -1, -4, -2, 0, -3, -2, 0, -3, 2, -2, -2, 0, -2, 1, 6,
+ 1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -3, -2, 1, -1, 0, -1, 4,
+ 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5,
+ 0, -3, -1, -3, -3, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4,
+ -3, -4, -2, -5, -3, 1, -3, -2, -2, -3, -2, -2, -4, -4, -2, -3, -3, -3, -3, 10,
+ -1, -1, -2, -1, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, -2, -1,
+ -2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -2, -2, -2, -2, -1, 2, -1, 7,
+ -1, 1, -4, 1, 4, -3, -2, 0, -3, 1, -3, -2, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4};
+
+short blosum70mt[]={
+ 4,
+ -2, 4,
+ -1, -4, 9,
+ -2, 4, -4, 6,
+ -1, 1, -4, 1, 5,
+ -2, -4, -2, -4, -4, 6,
+ 0, -1, -3, -2, -2, -4, 6,
+ -2, -1, -4, -1, 0, -1, -2, 8,
+ -2, -4, -1, -4, -4, 0, -4, -4, 4,
+ -1, -1, -4, -1, 1, -3, -2, -1, -3, 5,
+ -2, -4, -2, -4, -3, 0, -4, -3, 2, -3, 4,
+ -1, -3, -2, -3, -2, 0, -3, -2, 1, -2, 2, 6,
+ -2, 3, -3, 1, 0, -3, -1, 0, -4, 0, -4, -2, 6,
+ -1, -2, -3, -2, -1, -4, -3, -2, -3, -1, -3, -3, -2, 8,
+ -1, 0, -3, -1, 2, -3, -2, 1, -3, 1, -2, 0, 0, -2, 6,
+ -2, -1, -4, -2, 0, -3, -3, 0, -3, 2, -3, -2, -1, -2, 1, 6,
+ 1, 0, -1, 0, 0, -3, -1, -1, -3, 0, -3, -2, 0, -1, 0, -1, 4,
+ 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -2, -1, 0, -1, -1, -1, 1, 5,
+ 0, -3, -1, -4, -3, -1, -4, -3, 3, -3, 1, 1, -3, -3, -2, -3, -2, 0, 4,
+ -3, -4, -3, -5, -4, 1, -3, -2, -3, -3, -2, -2, -4, -4, -2, -3, -3, -3, -3, 11,
+ -1, -1, -2, -2, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, -3, -1,
+ -2, -3, -3, -4, -3, 3, -4, 2, -1, -2, -1, -1, -2, -3, -2, -2, -2, -2, -2, 2, -2, 7,
+ -1, 0, -4, 1, 4, -4, -2, 0, -3, 1, -3, -2, 0, -1, 3, 0, 0, -1, -3, -3, -1, -2, 4};
+
+short blosum75mt[]={
+ 4,
+ -2, 4,
+ -1, -4, 9,
+ -2, 4, -4, 6,
+ -1, 1, -5, 1, 5,
+ -3, -4, -2, -4, -4, 6,
+ 0, -1, -3, -2, -3, -4, 6,
+ -2, -1, -4, -1, 0, -2, -2, 8,
+ -2, -4, -1, -4, -4, 0, -5, -4, 4,
+ -1, -1, -4, -1, 1, -4, -2, -1, -3, 5,
+ -2, -4, -2, -4, -4, 0, -4, -3, 1, -3, 4,
+ -1, -3, -2, -4, -2, 0, -3, -2, 1, -2, 2, 6,
+ -2, 3, -3, 1, -1, -4, -1, 0, -4, 0, -4, -3, 6,
+ -1, -2, -4, -2, -1, -4, -3, -2, -3, -1, -3, -3, -3, 8,
+ -1, 0, -3, -1, 2, -4, -2, 1, -3, 1, -3, 0, 0, -2, 6,
+ -2, -1, -4, -2, 0, -3, -3, 0, -3, 2, -3, -2, -1, -2, 1, 6,
+ 1, 0, -1, -1, 0, -3, -1, -1, -3, 0, -3, -2, 0, -1, 0, -1, 5,
+ 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -2, -1, 0, -1, -1, -1, 1, 5,
+ 0, -4, -1, -4, -3, -1, -4, -4, 3, -3, 1, 1, -3, -3, -2, -3, -2, 0, 4,
+ -3, -5, -3, -5, -4, 1, -3, -2, -3, -4, -2, -2, -4, -5, -2, -3, -3, -3, -3, 11,
+ -1, -2, -2, -2, -1, -2, -2, -1, -2, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, -3, -1,
+ -2, -3, -3, -4, -3, 3, -4, 2, -2, -2, -1, -2, -3, -4, -2, -2, -2, -2, -2, 2, -2, 7,
+ -1, 0, -4, 1, 4, -4, -2, 0, -4, 1, -3, -2, 0, -2, 3, 0, 0, -1, -3, -3, -1, -3, 4};
+
+
+short blosum80mt[]={
+ 7,
+ -3, 6,
+ -1, -6, 13,
+ -3, 6, -7, 10,
+ -2, 1, -7, 2, 8,
+ -4, -6, -4, -6, -6, 10,
+ 0, -2, -6, -3, -4, -6, 9,
+ -3, -1, -7, -2, 0, -2, -4, 12,
+ -3, -6, -2, -7, -6, -1, -7, -6, 7,
+ -1, -1, -6, -2, 1, -5, -3, -1, -5, 8,
+ -3, -7, -3, -7, -6, 0, -7, -5, 2, -4, 6,
+ -2, -5, -3, -6, -4, 0, -5, -4, 2, -3, 3, 9,
+ -3, 5, -5, 2, -1, -6, -1, 1, -6, 0, -6, -4, 9,
+ -1, -4, -6, -3, -2, -6, -5, -4, -5, -2, -5, -4, -4, 12,
+ -2, -1, -5, -1, 3, -5, -4, 1, -5, 2, -4, -1, 0, -3, 9,
+ -3, -2, -6, -3, -1, -5, -4, 0, -5, 3, -4, -3, -1, -3, 1, 9,
+ 2, 0, -2, -1, -1, -4, -1, -2, -4, -1, -4, -3, 1, -2, -1, -2, 7,
+ 0, -1, -2, -2, -2, -4, -3, -3, -2, -1, -3, -1, 0, -3, -1, -2, 2, 8,
+ -1, -6, -2, -6, -4, -2, -6, -5, 4, -4, 1, 1, -5, -4, -4, -4, -3, 0, 7,
+ -5, -8, -5, -8, -6, 0, -6, -4, -5, -6, -4, -3, -7, -7, -4, -5, -6, -5, -5, 16,
+ -1, -3, -4, -3, -2, -3, -3, -2, -2, -2, -2, -2, -2, -3, -2, -2, -1, -1, -2, -5, -2,
+ -4, -5, -5, -6, -5, 4, -6, 3, -3, -4, -2, -3, -4, -6, -3, -4, -3, -3, -3, 3, -3, 11,
+ -2, 0, -7, 1, 6, -6, -4, 0, -6, 1, -5, -3, -1, -2, 5, 0, -1, -2, -4, -5, -1, -4, 6};
+
+
+short blosum85mt[]={
+ 5,
+ -2, 4,
+ -1, -4, 9,
+ -2, 4, -5, 7,
+ -1, 0, -5, 1, 6,
+ -3, -4, -3, -4, -4, 7,
+ 0, -1, -4, -2, -3, -4, 6,
+ -2, -1, -5, -2, -1, -2, -3, 8,
+ -2, -5, -2, -5, -4, -1, -5, -4, 5,
+ -1, -1, -4, -1, 0, -4, -2, -1, -3, 6,
+ -2, -5, -2, -5, -4, 0, -5, -3, 1, -3, 4,
+ -2, -4, -2, -4, -3, -1, -4, -3, 1, -2, 2, 7,
+ -2, 4, -4, 1, -1, -4, -1, 0, -4, 0, -4, -3, 7,
+ -1, -3, -4, -2, -2, -4, -3, -3, -4, -2, -4, -3, -3, 8,
+ -1, -1, -4, -1, 2, -4, -3, 1, -4, 1, -3, 0, 0, -2, 6,
+ -2, -2, -4, -2, -1, -4, -3, 0, -4, 2, -3, -2, -1, -2, 1, 6,
+ 1, 0, -2, -1, -1, -3, -1, -1, -3, -1, -3, -2, 0, -1, -1, -1, 5,
+ 0, -1, -2, -2, -1, -3, -2, -2, -1, -1, -2, -1, 0, -2, -1, -2, 1, 5,
+ -1, -4, -1, -4, -3, -1, -4, -4, 3, -3, 0, 0, -4, -3, -3, -3, -2, 0, 5,
+ -3, -5, -4, -6, -4, 0, -4, -3, -3, -5, -3, -2, -5, -5, -3, -4, -4, -4, -3, 11,
+ -1, -2, -3, -2, -1, -2, -2, -2, -2, -1, -2, -1, -2, -2, -1, -2, -1, -1, -1, -3, -2,
+ -3, -4, -3, -4, -4, 3, -5, 2, -2, -3, -2, -2, -3, -4, -2, -3, -2, -2, -2, 2, -2, 7,
+ -1, 0, -5, 1, 4, -4, -3, 0, -4, 1, -4, -2, -1, -2, 4, 0, -1, -1, -3, -4, -1, -3, 4};
+
+short blosum90mt[]={
+ 5,
+ -2, 4,
+ -1, -4, 9,
+ -3, 4, -5, 7,
+ -1, 0, -6, 1, 6,
+ -3, -4, -3, -5, -5, 7,
+ 0, -2, -4, -2, -3, -5, 6,
+ -2, -1, -5, -2, -1, -2, -3, 8,
+ -2, -5, -2, -5, -4, -1, -5, -4, 5,
+ -1, -1, -4, -1, 0, -4, -2, -1, -4, 6,
+ -2, -5, -2, -5, -4, 0, -5, -4, 1, -3, 5,
+ -2, -4, -2, -4, -3, -1, -4, -3, 1, -2, 2, 7,
+ -2, 4, -4, 1, -1, -4, -1, 0, -4, 0, -4, -3, 7,
+ -1, -3, -4, -3, -2, -4, -3, -3, -4, -2, -4, -3, -3, 8,
+ -1, -1, -4, -1, 2, -4, -3, 1, -4, 1, -3, 0, 0, -2, 7,
+ -2, -2, -5, -3, -1, -4, -3, 0, -4, 2, -3, -2, -1, -3, 1, 6,
+ 1, 0, -2, -1, -1, -3, -1, -2, -3, -1, -3, -2, 0, -2, -1, -1, 5,
+ 0, -1, -2, -2, -1, -3, -3, -2, -1, -1, -2, -1, 0, -2, -1, -2, 1, 6,
+ -1, -4, -2, -5, -3, -2, -5, -4, 3, -3, 0, 0, -4, -3, -3, -3, -2, -1, 5,
+ -4, -6, -4, -6, -5, 0, -4, -3, -4, -5, -3, -2, -5, -5, -3, -4, -4, -4, -3, 11,
+ -1, -2, -3, -2, -2, -2, -2, -2, -2, -1, -2, -1, -2, -2, -1, -2, -1, -1, -2, -3, -2,
+ -3, -4, -4, -4, -4, 3, -5, 1, -2, -3, -2, -2, -3, -4, -3, -3, -3, -2, -3, 2, -2, 8,
+ -1, 0, -5, 0, 4, -4, -3, 0, -4, 1, -4, -2, -1, -2, 4, 0, -1, -1, -3, -4, -1, -3, 4};
+
+
+short pam20mt[]={
+ 6,
+ -5, 6,
+ -8,-14, 10,
+ -4, 6,-16, 8,
+ -3, 0,-16, 2, 8,
+ -9,-12,-15,-17,-16, 9,
+ -3, -4,-11, -4, -5,-10, 7,
+ -8, -2, -8, -5, -6, -7,-10, 9,
+ -6, -7, -7, -9, -6, -3,-13,-11, 9,
+ -8, -3,-16, -6, -5,-16, -8, -8, -7, 7,
+ -7,-10,-17,-15,-10, -4,-12, -7, -2, -9, 7,
+ -6,-12,-16,-13, -8, -5,-10,-13, -2, -3, 0, 11,
+ -5, 6,-13, 1, -3,-10, -4, -1, -6, -2, -8,-11, 8,
+ -2, -8, -9, -9, -7,-11, -7, -5,-10, -8, -8, -9, -7, 8,
+ -5, -4,-16, -4, 0,-15, -8, 0, -9, -4, -6, -5, -5, -4, 9,
+ -8, -9, -9,-12,-11,-10,-11, -3, -6, -1,-10, -5, -7, -5, -2, 9,
+ -1, -2, -4, -5, -5, -7, -3, -7, -8, -5, -9, -6, -1, -3, -6, -4, 7,
+ -1, -4, -9, -6, -7,-10, -7, -8, -3, -4, -8, -5, -3, -5, -7, -8, 0, 7,
+ -3, -9, -7, -9, -8, -9, -7, -7, 1,-10, -3, -2, -9, -7, -8, -9, -8, -4, 7,
+-16,-11,-18,-17,-19, -6,-17, -8,-16,-14, -7,-15, -9,-16,-15, -3, -6,-15,-18, 13,
+ -4, -6,-11, -7, -6, -9, -6, -6, -6, -6, -7, -6, -4, -6, -6, -7, -4, -5, -6,-13, -6,
+ -9, -7, -5,-13, -9, 1,-16, -4, -7,-10, -8,-13, -5,-16,-14,-11, -8, -7, -8, -6, -9, 10,
+ -4, -1,-16, 0, 6,-16, -6, -2, -7, -5, -8, -6, -4, -5, 7, -5, -6, -7, -8,-17, -6,-11, 6};
+
+short pam60mt[]={
+ 5,
+ -2, 5,
+ -5, -9, 9,
+ -2, 5,-10, 7,
+ -1, 2,-10, 3, 7,
+ -6, -8, -9,-11,-10, 8,
+ 0, -2, -7, -2, -2, -7, 6,
+ -5, 0, -6, -2, -3, -4, -6, 8,
+ -3, -4, -4, -5, -4, -1, -7, -6, 7,
+ -5, -1,-10, -2, -3,-10, -5, -4, -4, 6,
+ -4, -7,-11, -9, -7, -1, -8, -4, 0, -6, 6,
+ -3, -6,-10, -7, -5, -2, -6, -7, 1, 0, 2, 10,
+ -2, 5, -7, 2, 0, -6, -1, 1, -4, 0, -5, -6, 6,
+ 0, -4, -6, -5, -3, -7, -4, -2, -6, -4, -5, -6, -4, 7,
+ -3, -1,-10, -1, 2, -9, -5, 2, -5, -1, -3, -2, -2, -1, 7,
+ -5, -5, -6, -6, -6, -7, -7, 0, -4, 2, -6, -2, -3, -2, 0, 8,
+ 1, 0, -1, -2, -2, -5, 0, -4, -4, -2, -6, -4, 1, 0, -3, -2, 5,
+ 1, -2, -5, -3, -4, -6, -3, -5, -1, -2, -5, -2, -1, -2, -4, -4, 1, 6,
+ -1, -5, -4, -6, -4, -5, -4, -5, 3, -6, -1, 0, -5, -4, -5, -5, -4, -1, 6,
+-10, -8,-12,-11,-12, -3,-11, -5,-10, -8, -4, -9, -6,-10, -9, 0, -4, -9,-11, 13,
+ -2, -3, -6, -3, -3, -5, -3, -3, -3, -3, -4, -3, -2, -3, -3, -4, -2, -2, -3, -8, -3,
+ -6, -5, -2, -8, -7, 3,-10, -2, -4, -7, -5, -7, -3,-10, -8, -8, -5, -5, -5, -3, -5, 9,
+ -2, 1,-10, 2, 5,-10, -3, 0, -4, -2, -5, -4, -1, -2, 6, -2, -3, -4, -5,-11, -3, -7, 5};
+
+short pam120mt[]={
+ 3,
+ 0, 4,
+ -3, -6, 9,
+ 0, 4, -7, 5,
+ 0, 3, -7, 3, 5,
+ -4, -5, -6, -7, -7, 8,
+ 1, 0, -4, 0, -1, -5, 5,
+ -3, 1, -4, 0, -1, -3, -4, 7,
+ -1, -3, -3, -3, -3, 0, -4, -4, 6,
+ -2, 0, -7, -1, -1, -7, -3, -2, -3, 5,
+ -3, -4, -7, -5, -4, 0, -5, -3, 1, -4, 5,
+ -2, -4, -6, -4, -3, -1, -4, -4, 1, 0, 3, 8,
+ -1, 3, -5, 2, 1, -4, 0, 2, -2, 1, -4, -3, 4,
+ 1, -2, -4, -3, -2, -5, -2, -1, -3, -2, -3, -3, -2, 6,
+ -1, 0, -7, 1, 2, -6, -3, 3, -3, 0, -2, -1, 0, 0, 6,
+ -3, -2, -4, -3, -3, -5, -4, 1, -2, 2, -4, -1, -1, -1, 1, 6,
+ 1, 0, 0, 0, -1, -3, 1, -2, -2, -1, -4, -2, 1, 1, -2, -1, 3,
+ 1, 0, -3, -1, -2, -4, -1, -3, 0, -1, -3, -1, 0, -1, -2, -2, 2, 4,
+ 0, -3, -3, -3, -3, -3, -2, -3, 3, -4, 1, 1, -3, -2, -3, -3, -2, 0, 5,
+ -7, -6, -8, -8, -8, -1, -8, -3, -6, -5, -3, -6, -4, -7, -6, 1, -2, -6, -8, 12,
+ -1, -1, -4, -2, -1, -3, -2, -2, -1, -2, -2, -2, -1, -2, -1, -2, -1, -1, -1, -5, -2,
+ -4, -3, -1, -5, -5, 4, -6, -1, -2, -5, -2, -4, -2, -6, -5, -5, -3, -3, -3, -2, -3, 8,
+ -1, 2, -7, 3, 4, -6, -2, 1, -3, -1, -3, -2, 0, -1, 4, -1, -1, -2, -3, -7, -1, -5, 4};
+
+
+short pam160mt[]={
+ 2,
+ 0, 3,
+ -2, -4, 9,
+ 0, 3, -5, 4,
+ 0, 2, -5, 3, 4,
+ -3, -4, -5, -6, -5, 7,
+ 1, 0, -3, 0, 0, -4, 4,
+ -2, 1, -3, 0, 0, -2, -3, 6,
+ -1, -2, -2, -3, -2, 0, -3, -3, 5,
+ -2, 0, -5, 0, -1, -5, -2, -1, -2, 4,
+ -2, -4, -6, -4, -3, 1, -4, -2, 2, -3, 5,
+ -1, -3, -5, -3, -2, 0, -3, -3, 2, 0, 3, 7,
+ 0, 2, -4, 2, 1, -3, 0, 2, -2, 1, -3, -2, 3,
+ 1, -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -2, -1, 5,
+ -1, 1, -5, 1, 2, -5, -2, 2, -2, 0, -2, -1, 0, 0, 5,
+ -2, -1, -3, -2, -2, -4, -3, 1, -2, 3, -3, -1, -1, -1, 1, 6,
+ 1, 0, 0, 0, 0, -3, 1, -1, -2, -1, -3, -2, 1, 1, -1, -1, 2,
+ 1, 0, -2, -1, -1, -3, -1, -2, 0, 0, -2, -1, 0, 0, -1, -1, 1, 3,
+ 0, -2, -2, -3, -2, -2, -2, -2, 3, -3, 1, 1, -2, -2, -2, -3, -1, 0, 4,
+ -5, -5, -7, -6, -7, -1, -7, -3, -5, -4, -2, -4, -4, -5, -5, 1, -2, -5, -6, 12,
+ 0, -1, -3, -1, -1, -3, -1, -1, -1, -1, -2, -1, 0, -1, -1, -1, 0, 0, -1, -4, -1,
+ -3, -3, 0, -4, -4, 5, -5, 0, -2, -4, -2, -3, -2, -5, -4, -4, -3, -3, -3, -1, -3, 8,
+ 0, 2, -5, 2, 3, -5, -1, 1, -2, 0, -3, -2, 1, -1, 3, 0, -1, -1, -2, -6, -1, -4, 3};
+
+short pam250mt[]={
+ 2,
+ 0, 3,
+ -2, -4, 12,
+ 0, 3, -5, 4,
+ 0, 3, -5, 3, 4,
+ -3, -4, -4, -6, -5, 9,
+ 1, 0, -3, 1, 0, -5, 5,
+ -1, 1, -3, 1, 1, -2, -2, 6,
+ -1, -2, -2, -2, -2, 1, -3, -2, 5,
+ -1, 1, -5, 0, 0, -5, -2, 0, -2, 5,
+ -2, -3, -6, -4, -3, 2, -4, -2, 2, -3, 6,
+ -1, -2, -5, -3, -2, 0, -3, -2, 2, 0, 4, 6,
+ 0, 2, -4, 2, 1, -3, 0, 2, -2, 1, -3, -2, 2,
+ 1, -1, -3, -1, -1, -5, 0, 0, -2, -1, -3, -2, 0, 6,
+ 0, 1, -5, 2, 2, -5, -1, 3, -2, 1, -2, -1, 1, 0, 4,
+ -2, -1, -4, -1, -1, -4, -3, 2, -2, 3, -3, 0, 0, 0, 1, 6,
+ 1, 0, 0, 0, 0, -3, 1, -1, -1, 0, -3, -2, 1, 1, -1, 0, 2,
+ 1, 0, -2, 0, 0, -3, 0, -1, 0, 0, -2, -1, 0, 0, -1, -1, 1, 3,
+ 0, -2, -2, -2, -2, -1, -1, -2, 4, -2, 2, 2, -2, -1, -2, -2, -1, 0, 4,
+ -6, -5, -8, -7, -7, 0, -7, -3, -5, -3, -2, -4, -4, -6, -5, 2, -2, -5, -6, 17,
+ 0, -1, -3, -1, -1, -2, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, 0, 0, -1, -4, -1,
+ -3, -3, 0, -4, -4, 7, -5, 0, -1, -4, -1, -2, -2, -5, -4, -4, -3, -3, -2, 0, -2, 10,
+ 0, 2, -5, 3, 3, -5, 0, 2, -2, 0, -3, -2, 1, 0, 3, 0, 0, -1, -2, -6, -1, -4, 3};
+
+short pam350mt[]={
+ 2,
+ 1, 3,
+ -2, -5, 18,
+ 1, 3, -6, 4,
+ 1, 3, -6, 4, 4,
+ -4, -5, -5, -6, -6, 13,
+ 2, 1, -4, 1, 1, -6, 5,
+ -1, 1, -4, 1, 1, -2, -2, 7,
+ 0, -2, -3, -2, -2, 2, -2, -2, 5,
+ -1, 1, -6, 1, 0, -6, -1, 1, -2, 5,
+ -2, -4, -7, -4, -4, 3, -4, -2, 4, -3, 8,
+ -1, -2, -6, -3, -2, 1, -3, -2, 3, 0, 5, 6,
+ 0, 2, -4, 2, 2, -4, 1, 2, -2, 1, -3, -2, 2,
+ 1, 0, -3, 0, 0, -5, 0, 0, -2, -1, -3, -2, 0, 6,
+ 0, 2, -6, 2, 3, -5, -1, 3, -2, 1, -2, -1, 1, 1, 4,
+ -1, 0, -4, -1, 0, -5, -2, 2, -2, 4, -3, 0, 1, 0, 2, 7,
+ 1, 1, 0, 1, 0, -4, 1, -1, -1, 0, -3, -2, 1, 1, 0, 0, 1,
+ 1, 0, -2, 0, 0, -3, 1, -1, 0, 0, -2, -1, 1, 1, 0, -1, 1, 2,
+ 0, -2, -2, -2, -2, -1, -1, -2, 4, -2, 3, 2, -2, -1, -2, -3, -1, 0, 5,
+ -7, -6,-10, -8, -8, 1, -8, -3, -6, -4, -2, -5, -5, -7, -5, 4, -3, -6, -7, 27,
+ 0, 0, -3, -1, 0, -2, -1, 0, 0, -1, -1, 0, 0, 0, 0, -1, 0, 0, 0, -5, -1,
+ -4, -4, 1, -5, -5, 11, -6, 0, 0, -5, 0, -2, -3, -6, -5, -5, -3, -3, -2, 1, -2, 14,
+ 0, 2, -6, 3, 3, -6, 0, 2, -2, 1, -3, -2, 2, 0, 3, 1, 0, 0, -2, -7, 0, -5, 3};
+
+
+short md_40mt[]={
+ 9,
+ 0, 0,
+ -7, 0, 16,
+ -6, 0,-13, 11,
+ -5, 0,-15, 3, 11,
+-11, 0, -5,-15,-16, 13,
+ -3, 0, -7, -4, -4,-15, 10,
+ -9, 0, -6, -4, -8, -7,-10, 14,
+ -6, 0,-11,-12,-12, -5,-13,-11, 11,
+ -8, 0,-12, -8, -3,-16, -9, -6,-11, 11,
+ -9, 0,-10,-14,-13, -1,-14, -7, -1,-12, 9,
+ -6, 0, -9,-12,-11, -7,-12, -9, 1, -7, 1, 14,
+ -6, 0, -8, 1, -5,-12, -5, 0, -8, -1,-12, -9, 12,
+ -2, 0,-11,-11,-11,-11, -9, -4,-11,-10, -5,-10, -9, 12,
+ -7, 0,-12, -6, 0,-14, -9, 2,-12, -1, -6, -8, -5, -3, 12,
+ -7, 0, -5,-10, -8,-15, -4, 0,-10, 3, -9, -8, -6, -6, 0, 11,
+ 0, 0, -2, -6, -8, -6, -2, -6, -8, -7, -7, -8, 1, -1, -7, -5, 9,
+ 1, 0, -7, -8, -8,-11, -7, -7, -2, -5, -9, -2, -2, -4, -7, -6, 1, 10,
+ -1, 0, -7, -9, -8, -6, -8,-12, 4,-12, -2, 0,-10, -9,-11,-11, -7, -4, 10,
+-14, 0, -4,-15,-15, -7, -7,-13,-13,-13, -8,-11,-14,-14,-11, -4, -9,-12,-10, 18,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+-13, 0, -2, -8,-14, 2,-13, 2, -9,-13, -9,-11, -6,-13, -9,-10, -7,-10,-11, -6, 0, 14,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+short md_120mt[]={
+ 6,
+ 0, 0,
+ -3, 0, 14,
+ -2, 0, -7, 8,
+ -2, 0, -8, 5, 8,
+ -6, 0, -2, -9,-10, 11,
+ 0, 0, -3, 0, -1, -9, 8,
+ -4, 0, -2, -1, -3, -2, -4, 11,
+ -1, 0, -5, -7, -7, -1, -6, -6, 7,
+ -4, 0, -6, -2, 0, -9, -4, -1, -6, 8,
+ -4, 0, -5, -8, -8, 2, -8, -4, 2, -6, 7,
+ -2, 0, -5, -7, -6, -2, -6, -5, 3, -4, 3, 10,
+ -1, 0, -3, 3, -1, -6, -1, 2, -4, 1, -6, -5, 8,
+ 0, 0, -5, -5, -5, -5, -4, -1, -5, -4, -2, -5, -3, 9,
+ -3, 0, -6, -1, 2, -7, -4, 4, -6, 2, -3, -4, -1, 0, 9,
+ -3, 0, -2, -4, -3, -8, -1, 2, -6, 4, -5, -4, -2, -2, 2, 8,
+ 2, 0, 0, -2, -3, -3, 0, -2, -3, -3, -3, -3, 2, 1, -3, -2, 5,
+ 2, 0, -3, -3, -4, -6, -2, -3, 0, -2, -4, 0, 1, 0, -3, -3, 2, 6,
+ 1, 0, -3, -5, -5, -2, -4, -6, 5, -6, 1, 2, -5, -4, -6, -6, -3, 0, 7,
+ -8, 0, 0, -9, -9, -3, -3, -6, -7, -6, -4, -6, -8, -8, -6, -1, -5, -7, -6, 17,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -7, 0, 2, -4, -7, 5, -8, 4, -5, -7, -4, -6, -2, -7, -4, -5, -3, -6, -6, -2, 0, 12,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+short md_250mt[]={
+ 2,
+ 0, 0,
+ -1, 0, 11,
+ -1, 0, -3, 5,
+ -1, 0, -4, 4, 5,
+ -3, 0, 0, -5, -5, 8,
+ 1, 0, -1, 1, 1, -5, 5,
+ -2, 0, 0, 0, 0, 0, -2, 6,
+ 0, 0, -2, -3, -3, 0, -3, -3, 4,
+ -1, 0, -3, 0, 1, -5, -1, 1, -3, 5,
+ -1, 0, -2, -4, -4, 2, -4, -2, 2, -3, 5,
+ 0, 0, -2, -3, -3, 0, -3, -2, 3, -2, 3, 6,
+ 0, 0, -1, 2, 1, -3, 0, 1, -2, 1, -3, -2, 3,
+ 1, 0, -2, -2, -2, -2, -1, 0, -2, -1, 0, -2, -1, 6,
+ -1, 0, -3, 0, 2, -4, -1, 3, -3, 2, -2, -2, 0, 0, 5,
+ -1, 0, -1, -1, 0, -4, 0, 2, -3, 4, -3, -2, 0, -1, 2, 5,
+ 1, 0, 1, 0, -1, -2, 1, -1, -1, -1, -2, -1, 1, 1, -1, -1, 2,
+ 2, 0, -1, -1, -1, -2, 0, -1, 1, -1, -1, 0, 1, 1, -1, -1, 1, 2,
+ 1, 0, -2, -3, -2, 0, -2, -3, 4, -3, 2, 2, -2, -1, -3, -3, -1, 0, 4,
+ -4, 0, 1, -5, -5, -1, -1, -3, -4, -3, -2, -3, -4, -4, -3, 0, -3, -4, -3, 15,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -3, 0, 2, -2, -4, 5, -4, 4, -2, -3, -1, -3, -1, -3, -2, -2, -1, -3, -3, 0, 0, 9,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+short md_350mt[]={
+ 1,
+ 0, 0,
+ 0, 0, 9,
+ 0, 0, -2, 3,
+ 0, 0, -2, 3, 3,
+ -2, 0, 1, -3, -4, 6,
+ 1, 0, 0, 1, 1, -3, 4,
+ -1, 0, 0, 0, 0, 0, -1, 3,
+ 0, 0, -1, -2, -2, 1, -2, -2, 3,
+ -1, 0, -1, 0, 1, -3, 0, 1, -2, 3,
+ -1, 0, -1, -3, -3, 2, -2, -1, 2, -2, 3,
+ 0, 0, -1, -2, -2, 1, -2, -1, 2, -2, 2, 3,
+ 0, 0, -1, 1, 1, -2, 0, 1, -1, 1, -2, -1, 2,
+ 1, 0, -1, -1, -1, -2, -1, 0, -1, -1, 0, -1, 0, 4,
+ -1, 0, -2, 1, 1, -2, 0, 2, -2, 2, -1, -1, 0, 0, 3,
+ -1, 0, 0, 0, 0, -3, 0, 1, -2, 3, -2, -1, 0, 0, 2, 3,
+ 1, 0, 0, 0, 0, -1, 1, 0, -1, 0, -1, -1, 1, 1, 0, 0, 1,
+ 1, 0, 0, 0, -1, -1, 0, -1, 0, 0, -1, 0, 0, 1, -1, 0, 1, 1,
+ 0, 0, -1, -2, -2, 0, -1, -2, 2, -2, 1, 2, -1, -1, -2, -2, 0, 0, 2,
+ -3, 0, 1, -4, -3, 0, -1, -2, -3, -2, -1, -2, -3, -3, -2, 0, -2, -3, -2, 14,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -2, 0, 2, -2, -2, 5, -3, 3, -1, -2, 0, -1, -1, -2, -1, -1, -1, -2, -2, 0, 0, 7,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+
+short idmat[]={
+10,
+ 0, 10,
+ 0, 0, 10,
+ 0, 0, 0, 10,
+ 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10};
+
+short gon40mt[]={
+ 92,
+ 0, 0,
+ -31, 0, 163,
+ -56, 0,-135, 111,
+ -37, 0,-140, 16, 105,
+ -92, 0, -64,-152,-143, 126,
+ -32, 0, -91, -51, -76,-152, 105,
+ -65, 0, -67, -41, -40, -50, -81, 145,
+ -76, 0, -87,-150,-106, -39,-158, -94, 104,
+ -54, 0,-132, -47, -13,-127, -79, -34, -86, 103,
+ -68, 0, -85,-155,-108, -13,-141, -85, 5, -85, 89,
+ -45, 0, -63,-130, -80, -16,-114, -60, 10, -57, 16, 140,
+ -62, 0, -83, 6, -38,-104, -40, -7, -99, -20,-112, -91, 115,
+ -37, 0,-137, -69, -60,-128, -87, -71,-108, -62, -83,-119, -78, 124,
+ -43, 0,-113, -32, 10,-100, -71, 0, -91, 2, -60, -35, -25, -46, 118,
+ -61, 0, -86, -77, -50,-130, -69, -31,-103, 19, -84, -81, -47, -73, -6, 112,
+ 0, 0, -35, -36, -41,-111, -37, -48, -95, -43, -95, -64, -11, -35, -35, -51, 99,
+ -25, 0, -59, -47, -52, -90, -85, -46, -51, -34, -78, -44, -27, -42, -39, -52, 13, 100,
+ -22, 0, -43,-133, -74, -58,-122, -98, 28, -82, -18, -22,-103, -86, -79, -88, -74, -25, 97,
+-120, 0, -68,-171,-131, -6,-108, -70, -93,-127, -71, -72,-119,-149, -87, -63, -98,-120,-115, 181,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -95, 0, -56, -98,-107, 31,-129, 5, -76, -88, -64, -66, -62,-106, -81, -75, -69, -87, -73, 1, 0, 135,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+short gon80mt[]={
+ 75,
+ 0, 0,
+ -10, 0, 154,
+ -31, 0, -93, 96,
+ -17, 0, -94, 31, 88,
+ -64, 0, -39,-111,-102, 114,
+ -11, 0, -61, -26, -47,-115, 97,
+ -39, 0, -43, -17, -17, -26, -53, 127,
+ -43, 0, -54,-106, -73, -15,-114, -64, 86,
+ -30, 0, -88, -21, 4, -89, -50, -12, -59, 85,
+ -43, 0, -55,-109, -75, 7,-104, -57, 22, -58, 77,
+ -26, 0, -39, -88, -53, 3, -83, -38, 25, -37, 31, 117,
+ -34, 0, -55, 21, -13, -75, -18, 9, -71, -2, -79, -62, 97,
+ -16, 0, -93, -42, -35, -93, -58, -45, -75, -37, -58, -78, -48, 114,
+ -22, 0, -76, -9, 23, -70, -44, 14, -60, 17, -39, -19, -6, -24, 95,
+ -36, 0, -60, -44, -23, -90, -43, -10, -71, 33, -58, -53, -22, -45, 11, 97,
+ 14, 0, -15, -14, -19, -77, -16, -25, -62, -20, -64, -41, 5, -14, -15, -27, 78,
+ -5, 0, -34, -24, -27, -62, -52, -24, -28, -15, -49, -25, -7, -20, -18, -27, 25, 81,
+ -6, 0, -21, -89, -51, -31, -86, -65, 41, -54, 3, 1, -69, -57, -51, -60, -43, -9, 80,
+ -87, 0, -43,-124, -98, 16, -81, -43, -63, -89, -44, -45, -86,-112, -62, -41, -72, -87, -80, 173,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -65, 0, -32, -69, -74, 49, -94, 21, -47, -60, -35, -37, -39, -76, -53, -50, -46, -58, -47, 23, 0, 123,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+short gon120mt[]={
+ 59,
+ 0, 0,
+ -1, 0, 144,
+ -18, 0, -69, 82,
+ -9, 0, -68, 35, 72,
+ -48, 0, -26, -87, -78, 102,
+ -3, 0, -45, -14, -31, -92, 90,
+ -26, 0, -31, -7, -6, -14, -37, 110,
+ -27, 0, -36, -80, -55, -3, -87, -48, 72,
+ -19, 0, -64, -8, 11, -67, -34, -2, -44, 69,
+ -30, 0, -39, -82, -57, 15, -82, -42, 28, -44, 66,
+ -17, 0, -26, -64, -40, 11, -65, -28, 29, -27, 34, 95,
+ -20, 0, -41, 26, -1, -58, -7, 14, -55, 5, -61, -46, 80,
+ -6, 0, -68, -28, -22, -72, -41, -31, -56, -24, -44, -56, -32, 105,
+ -12, 0, -56, 1, 25, -53, -30, 17, -43, 20, -30, -14, 1, -14, 74,
+ -23, 0, -45, -27, -10, -68, -30, -1, -53, 36, -44, -38, -10, -30, 16, 83,
+ 16, 0, -7, -5, -9, -58, -6, -14, -44, -10, -47, -29, 10, -5, -7, -15, 60,
+ 2, 0, -21, -13, -15, -47, -35, -14, -17, -6, -34, -16, 0, -10, -9, -16, 26, 64,
+ 0, 0, -11, -65, -38, -17, -65, -47, 42, -39, 13, 10, -50, -42, -36, -44, -28, -3, 65,
+ -68, 0, -29, -96, -78, 27, -66, -28, -46, -68, -29, -31, -68, -89, -49, -30, -57, -67, -59, 166,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -48, 0, -20, -53, -56, 55, -74, 26, -31, -44, -20, -22, -28, -59, -38, -37, -35, -42, -33, 33, 0, 111,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+short gon160mt[]={
+ 46,
+ 0, 0,
+ 3, 0, 135,
+ -11, 0, -53, 70,
+ -4, 0, -52, 34, 59,
+ -38, 0, -18, -70, -62, 91,
+ 2, 0, -34, -7, -21, -76, 82,
+ -18, 0, -23, -1, -1, -7, -27, 93,
+ -18, 0, -25, -62, -43, 3, -70, -37, 59,
+ -12, 0, -48, -1, 13, -53, -24, 2, -35, 55,
+ -22, 0, -29, -65, -45, 19, -67, -32, 30, -34, 57,
+ -12, 0, -19, -50, -31, 14, -52, -21, 29, -21, 34, 76,
+ -12, 0, -31, 26, 5, -47, -2, 15, -44, 8, -48, -36, 65,
+ -1, 0, -52, -19, -14, -58, -30, -22, -43, -16, -35, -42, -22, 96,
+ -7, 0, -42, 6, 23, -41, -21, 17, -32, 20, -24, -12, 5, -8, 56,
+ -16, 0, -35, -16, -3, -53, -21, 3, -41, 35, -35, -29, -4, -21, 17, 71,
+ 16, 0, -2, 0, -3, -45, -1, -8, -33, -4, -36, -23, 11, 0, -2, -9, 44,
+ 5, 0, -14, -6, -8, -36, -24, -8, -12, -2, -24, -11, 3, -4, -4, -9, 23, 50,
+ 1, 0, -6, -49, -30, -8, -52, -35, 40, -30, 17, 14, -38, -32, -27, -34, -20, 0, 53,
+ -55, 0, -21, -78, -64, 32, -55, -19, -34, -54, -20, -22, -55, -74, -40, -24, -47, -54, -45, 158,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -37, 0, -13, -42, -44, 56, -60, 27, -20, -35, -11, -13, -22, -48, -29, -29, -28, -32, -24, 38, 0, 100,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+short gon250mt[]={
+ 24,
+ 0, 0,
+ 5, 0, 115,
+ -3, 0, -32, 47,
+ 0, 0, -30, 27, 36,
+ -23, 0, -8, -45, -39, 70,
+ 5, 0, -20, 1, -8, -52, 66,
+ -8, 0, -13, 4, 4, -1, -14, 60,
+ -8, 0, -11, -38, -27, 10, -45, -22, 40,
+ -4, 0, -28, 5, 12, -33, -11, 6, -21, 32,
+ -12, 0, -15, -40, -28, 20, -44, -19, 28, -21, 40,
+ -7, 0, -9, -30, -20, 16, -35, -13, 25, -14, 28, 43,
+ -3, 0, -18, 22, 9, -31, 4, 12, -28, 8, -30, -22, 38,
+ 3, 0, -31, -7, -5, -38, -16, -11, -26, -6, -23, -24, -9, 76,
+ -2, 0, -24, 9, 17, -26, -10, 12, -19, 15, -16, -10, 7, -2, 27,
+ -6, 0, -22, -3, 4, -32, -10, 6, -24, 27, -22, -17, 3, -9, 15, 47,
+ 11, 0, 1, 5, 2, -28, 4, -2, -18, 1, -21, -14, 9, 4, 2, -2, 22,
+ 6, 0, -5, 0, -1, -22, -11, -3, -6, 1, -13, -6, 5, 1, 0, -2, 15, 25,
+ 1, 0, 0, -29, -19, 1, -33, -20, 31, -17, 18, 16, -22, -18, -15, -20, -10, 0, 34,
+ -36, 0, -10, -52, -43, 36, -40, -8, -18, -35, -7, -10, -36, -50, -27, -16, -33, -35, -26, 142,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -22, 0, -5, -28, -27, 51, -40, 22, -7, -21, 0, -2, -14, -31, -17, -18, -19, -19, -11, 41, 0, 78,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+short gon300mt[]={
+ 16,
+ 0, 0,
+ 5, 0, 104,
+ -1, 0, -24, 37,
+ 1, 0, -23, 23, 27,
+ -18, 0, -5, -37, -31, 60,
+ 5, 0, -15, 3, -4, -42, 58,
+ -6, 0, -10, 5, 4, 0, -10, 45,
+ -6, 0, -7, -30, -21, 11, -36, -16, 33,
+ -2, 0, -21, 6, 11, -26, -7, 5, -17, 24,
+ -9, 0, -10, -32, -22, 19, -36, -14, 25, -17, 33,
+ -5, 0, -6, -24, -16, 15, -28, -10, 22, -11, 24, 31,
+ -1, 0, -14, 18, 9, -25, 5, 10, -22, 8, -24, -17, 27,
+ 3, 0, -23, -4, -2, -30, -11, -8, -20, -3, -18, -19, -6, 66,
+ -1, 0, -18, 9, 14, -20, -6, 9, -15, 13, -13, -8, 7, -1, 18,
+ -4, 0, -17, 0, 5, -25, -6, 6, -19, 22, -18, -13, 4, -6, 13, 37,
+ 8, 0, 1, 5, 3, -22, 4, -1, -14, 2, -17, -11, 7, 4, 2, 0, 15,
+ 5, 0, -3, 1, 1, -17, -7, -1, -4, 2, -9, -5, 4, 2, 1, -1, 11, 17,
+ 0, 0, 1, -23, -15, 4, -26, -15, 26, -13, 17, 15, -17, -14, -12, -15, -8, 0, 26,
+ -29, 0, -7, -42, -36, 36, -34, -5, -13, -28, -4, -6, -30, -41, -23, -14, -27, -28, -19, 132,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -17, 0, -3, -22, -22, 46, -33, 18, -3, -17, 3, 1, -12, -25, -14, -14, -15, -15, -7, 40, 0, 67,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+short gon350mt[]={
+ 10,
+ 0, 0,
+ 4, 0, 93,
+ 0, 0, -19, 29,
+ 1, 0, -17, 19, 20,
+ -14, 0, -3, -30, -25, 51,
+ 5, 0, -12, 4, -2, -35, 51,
+ -4, 0, -8, 5, 4, 1, -7, 33,
+ -4, 0, -5, -24, -17, 11, -29, -13, 27,
+ -1, 0, -16, 6, 9, -21, -4, 5, -13, 18,
+ -7, 0, -7, -25, -18, 18, -30, -11, 22, -14, 28,
+ -4, 0, -4, -19, -13, 14, -23, -8, 19, -9, 21, 23,
+ 0, 0, -11, 15, 9, -20, 5, 8, -18, 7, -19, -14, 20,
+ 3, 0, -18, -2, 0, -25, -7, -5, -16, -2, -15, -14, -3, 56,
+ 0, 0, -14, 8, 11, -16, -4, 7, -11, 10, -11, -7, 6, 0, 12,
+ -2, 0, -13, 2, 6, -20, -4, 6, -15, 18, -14, -11, 4, -4, 10, 28,
+ 6, 0, 1, 5, 3, -18, 5, 0, -11, 2, -13, -9, 6, 4, 2, 1, 10,
+ 4, 0, -2, 2, 1, -13, -5, -1, -3, 2, -7, -4, 4, 2, 1, 0, 8, 11,
+ 0, 0, 2, -18, -12, 5, -21, -11, 22, -10, 16, 14, -13, -11, -9, -12, -6, 0, 21,
+ -24, 0, -4, -35, -29, 35, -30, -3, -9, -23, -1, -3, -24, -34, -19, -12, -22, -23, -14, 124,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -14, 0, -1, -18, -17, 42, -27, 15, -1, -14, 5, 2, -10, -20, -11, -12, -12, -12, -4, 39, 0, 57,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+
+char *nucleic_acid_order = "ABCDGHKMNRSTUVWXY";
+
+short clustalvdnamt[]={
+ 10,
+ 0, 0,
+ 0, 0, 10,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+short swgapdnamt[]={
+ 10,
+ -9, 10,
+ -9, 10, 10,
+ 10, 10, -9, 10,
+ -9, 10, -9, 10, 10,
+ 10, 10, 10, 10, -9, 10,
+ -9, 10, -9, 10, 10, 10, 10,
+ 10, 10, 10, 10, -9, 10, -9, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, -9, 10, 10, 10, 10, 10, 10, 10,
+ -9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ -9, 10, -9, 10, -9, 10, 10, -9, 10, -9, -9, 10,
+ -9, 10, -9, 10, -9, 10, 10, -9, 10, -9, -9, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, -9, -9, 10,
+ 10, 10, -9, 10, -9, 10, 10, 10, 10, 10, -9, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ -9, 10, 10, 10, -9, 10, 10, 10, 10, -9, 10, 10, 10, 10, 10, 10, 10};
+
diff --git a/new.h b/new.h
new file mode 100644
index 0000000..09a8bae
--- /dev/null
+++ b/new.h
@@ -0,0 +1,58 @@
+/* new structures and functions introduced by Jimin Pei */
+
+/* node for a tree containing the sequences */
+typedef struct snode {
+ struct snode *left;
+ struct snode *right;
+ struct snode *parent;
+ float dist;
+ sint leaf;
+ int order;
+ char **name;
+ int **seq;
+ int seqnum;
+ int seqlength;
+ int **abstractseq;
+ short abseqnum;
+ int abseqlength;
+} sstree, *streeptr;
+
+/* node for a linked list of scores */
+typedef struct scorenode {
+ struct scorenode *next;
+ short sbe; /* score before extension */
+ short ind; /* index of the second sequence */
+ int sae; /* score after extension */
+} SN;
+
+
+/* structure of element of an alignment */
+/*typedef struct AlignmentElement {
+ int gid;
+ int pid;
+} AE;
+*/
+
+/* function prototypes */
+/* lib_generation.c */
+SN * SNavail();
+void lib_generation();
+void AddSbe(SN *node, int indi, int s);
+void printLib(int gi, int gj);
+
+
+/* calctree.c */
+void assign_node(streeptr p, sint *aligned);
+double average_group_identity(sint *group);
+
+/* al2comp2.c */
+void prfprfmatrix(int **align1, int **align2, int alnlength1, int alnlength2, int nali1, int nali2, double **prfprfmat);
+
+/*lsim1.c */
+void SIM(int M,int N,int K,int **V,int Q,int R,int nseq,int gi, int gj);
+
+/*lib_extension.c */
+void lib_extension();
+
+/*prfalign1.c */
+lint prfalign1(sint *group, sint gi, sint gj);
diff --git a/pairalign.c b/pairalign.c
new file mode 100644
index 0000000..5d76b8d
--- /dev/null
+++ b/pairalign.c
@@ -0,0 +1,818 @@
+/* Change int h to int gh everywhere DES June 1994 */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "pcma.h"
+
+#define MIN(a,b) ((a)<(b)?(a):(b))
+#define MAX(a,b) ((a)>(b)?(a):(b))
+
+#define gap(k) ((k) <= 0 ? 0 : g + gh * (k))
+#define tbgap(k) ((k) <= 0 ? 0 : tb + gh * (k))
+#define tegap(k) ((k) <= 0 ? 0 : te + gh * (k))
+
+/*
+* Prototypes
+*/
+static void add(sint v);
+static sint calc_score(sint iat, sint jat, sint v1, sint v2);
+static float tracepath(sint tsb1,sint tsb2);
+static void forward_pass(char *ia, char *ib, sint n, sint m);
+static void reverse_pass(char *ia, char *ib);
+static sint diff(sint A, sint B, sint M, sint N, sint tb, sint te);
+static void del(sint k);
+
+/*
+ * Global variables
+ */
+#ifdef MAC
+#define pwint short
+#else
+#define pwint int
+#endif
+static sint int_scale;
+
+extern double **tmat;
+extern float pw_go_penalty;
+extern float pw_ge_penalty;
+extern float transition_weight;
+extern sint nseqs;
+extern sint max_aa;
+extern sint gap_pos1,gap_pos2;
+extern sint max_aln_length;
+extern sint *seqlen_array;
+extern sint debug;
+extern sint mat_avscore;
+extern short blosum30mt[],pam350mt[],idmat[],pw_usermat[],pw_userdnamat[];
+extern short clustalvdnamt[],swgapdnamt[];
+extern short gon250mt[];
+extern short def_dna_xref[],def_aa_xref[],pw_dna_xref[],pw_aa_xref[];
+extern Boolean dnaflag;
+extern char **seq_array;
+extern char *amino_acid_codes;
+extern char pw_mtrxname[];
+extern char pw_dnamtrxname[];
+
+static float mm_score;
+static sint print_ptr,last_print;
+static sint *displ;
+static pwint *HH, *DD, *RR, *SS;
+static sint g, gh;
+static sint seq1, seq2;
+static sint matrix[NUMRES][NUMRES];
+static pwint maxscore;
+static sint sb1, sb2, se1, se2;
+
+
+sint pairalign(sint istart, sint iend, sint jstart, sint jend)
+{
+ short *mat_xref;
+ static sint si, sj, i;
+ static sint n,m,len1,len2;
+ static sint maxres;
+ static short *matptr;
+ static char c;
+ static float gscale,ghscale;
+
+ displ = (sint *)ckalloc((2*max_aln_length+1) * sizeof(sint));
+ HH = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
+ DD = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
+ RR = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
+ SS = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
+
+#ifdef MAC
+ int_scale = 10;
+#else
+ int_scale = 100;
+#endif
+ gscale=ghscale=1.0;
+ if (dnaflag)
+ {
+if (debug>1) fprintf(stdout,"matrix %s\n",pw_dnamtrxname);
+ if (strcmp(pw_dnamtrxname, "iub") == 0)
+ {
+ matptr = swgapdnamt;
+ mat_xref = def_dna_xref;
+ }
+ else if (strcmp(pw_dnamtrxname, "clustalw") == 0)
+ {
+ matptr = clustalvdnamt;
+ mat_xref = def_dna_xref;
+ gscale=0.6667;
+ ghscale=0.751;
+ }
+ else
+ {
+ matptr = pw_userdnamat;
+ mat_xref = pw_dna_xref;
+ }
+ maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale);
+ if (maxres == 0) return((sint)-1);
+
+ matrix[0][4]=transition_weight*matrix[0][0];
+ matrix[4][0]=transition_weight*matrix[0][0];
+ matrix[2][11]=transition_weight*matrix[0][0];
+ matrix[11][2]=transition_weight*matrix[0][0];
+ matrix[2][12]=transition_weight*matrix[0][0];
+ matrix[12][2]=transition_weight*matrix[0][0];
+ }
+ else
+ {
+if (debug>1) fprintf(stdout,"matrix %s\n",pw_mtrxname);
+ if (strcmp(pw_mtrxname, "blosum") == 0)
+ {
+ matptr = blosum30mt;
+ mat_xref = def_aa_xref;
+ }
+ else if (strcmp(pw_mtrxname, "pam") == 0)
+ {
+ matptr = pam350mt;
+ mat_xref = def_aa_xref;
+ }
+ else if (strcmp(pw_mtrxname, "gonnet") == 0)
+ {
+ matptr = gon250mt;
+ int_scale /= 10;
+ mat_xref = def_aa_xref;
+ }
+ else if (strcmp(pw_mtrxname, "id") == 0)
+ {
+ matptr = idmat;
+ mat_xref = def_aa_xref;
+ }
+ else
+ {
+ matptr = pw_usermat;
+ mat_xref = pw_aa_xref;
+ }
+
+ maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale);
+ if (maxres == 0) return((sint)-1);
+ }
+
+
+ for (si=MAX(0,istart);si<nseqs && si<iend;si++)
+ {
+ n = seqlen_array[si+1];
+ len1 = 0;
+ for (i=1;i<=n;i++) {
+ c = seq_array[si+1][i];
+ if ((c!=gap_pos1) && (c != gap_pos2)) len1++;
+ }
+
+ for (sj=MAX(si+1,jstart+1);sj<nseqs && sj<jend;sj++)
+ {
+ m = seqlen_array[sj+1];
+ if(n==0 || m==0) {
+ tmat[si+1][sj+1]=1.0;
+ tmat[sj+1][si+1]=1.0;
+ continue;
+ }
+ len2 = 0;
+ for (i=1;i<=m;i++) {
+ c = seq_array[sj+1][i];
+ if ((c!=gap_pos1) && (c != gap_pos2)) len2++;
+ }
+
+ if (dnaflag) {
+ g = 2 * (float)pw_go_penalty * int_scale*gscale;
+ gh = pw_ge_penalty * int_scale*ghscale;
+ }
+ else {
+ if (mat_avscore <= 0)
+ g = 2 * (float)(pw_go_penalty + log((double)(MIN(n,m))))*int_scale;
+ else
+ g = 2 * mat_avscore * (float)(pw_go_penalty +
+ log((double)(MIN(n,m))))*gscale;
+ gh = pw_ge_penalty * int_scale;
+ }
+
+if (debug>1) fprintf(stdout,"go %d ge %d\n",(pint)g,(pint)gh);
+
+/*
+ align the sequences
+*/
+ seq1 = si+1;
+ seq2 = sj+1;
+
+ forward_pass(&seq_array[seq1][0], &seq_array[seq2][0],
+ n, m);
+
+ reverse_pass(&seq_array[seq1][0], &seq_array[seq2][0]);
+
+ last_print = 0;
+ print_ptr = 1;
+/*
+ sb1 = sb2 = 1;
+ se1 = n-1;
+ se2 = m-1;
+*/
+
+/* use Myers and Miller to align two sequences */
+
+ maxscore = diff(sb1-1, sb2-1, se1-sb1+1, se2-sb2+1,
+ (sint)0, (sint)0);
+
+/* calculate percentage residue identity */
+
+ mm_score = tracepath(sb1,sb2);
+
+ if(len1==0 || len2==0) mm_score=0;
+ else
+ mm_score /= (float)MIN(len1,len2);
+
+ tmat[si+1][sj+1] = ((float)100.0 - mm_score)/(float)100.0;
+ tmat[sj+1][si+1] = ((float)100.0 - mm_score)/(float)100.0;
+
+if (debug>4)
+{
+ fprintf(stdout,"Sequences (%d:%d) Aligned. Score: %d CompScore: %d\n",
+ (pint)si+1,(pint)sj+1,
+ (pint)mm_score,
+ (pint)maxscore/(MIN(len1,len2)*100));
+}
+else
+{
+ /* JP: disable info */
+ /*info("Sequences (%d:%d) Aligned. Score: %d",
+ (pint)si+1,(pint)sj+1,
+ (pint)mm_score);
+ */
+}
+
+ }
+ }
+ displ=ckfree((void *)displ);
+ HH=ckfree((void *)HH);
+ DD=ckfree((void *)DD);
+ RR=ckfree((void *)RR);
+ SS=ckfree((void *)SS);
+
+
+ return((sint)1);
+}
+
+/* JP: pairwise alignments between consensus (representative) sequences
+ from sub-alignments
+ consensus_index: contains the indices of consensus sequences in the seq_array
+ nsub: number of subalignments (filecount)
+*/
+sint pairalign_consensus(int *consensus_index, int nsub)
+{
+ short *mat_xref;
+ static sint si, sj, i;
+ static sint n,m,len1,len2;
+ static sint maxres;
+ static short *matptr;
+ static char c;
+ static float gscale,ghscale;
+
+ displ = (sint *)ckalloc((2*max_aln_length+1) * sizeof(sint));
+ HH = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
+ DD = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
+ RR = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
+ SS = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
+
+#ifdef MAC
+ int_scale = 10;
+#else
+ int_scale = 100;
+#endif
+ gscale=ghscale=1.0;
+ if (dnaflag)
+ {
+if (debug>1) fprintf(stdout,"matrix %s\n",pw_dnamtrxname);
+ if (strcmp(pw_dnamtrxname, "iub") == 0)
+ {
+ matptr = swgapdnamt;
+ mat_xref = def_dna_xref;
+ }
+ else if (strcmp(pw_dnamtrxname, "clustalw") == 0)
+ {
+ matptr = clustalvdnamt;
+ mat_xref = def_dna_xref;
+ gscale=0.6667;
+ ghscale=0.751;
+ }
+ else
+ {
+ matptr = pw_userdnamat;
+ mat_xref = pw_dna_xref;
+ }
+ maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale);
+ if (maxres == 0) return((sint)-1);
+
+ matrix[0][4]=transition_weight*matrix[0][0];
+ matrix[4][0]=transition_weight*matrix[0][0];
+ matrix[2][11]=transition_weight*matrix[0][0];
+ matrix[11][2]=transition_weight*matrix[0][0];
+ matrix[2][12]=transition_weight*matrix[0][0];
+ matrix[12][2]=transition_weight*matrix[0][0];
+ }
+ else
+ {
+if (debug>1) fprintf(stdout,"matrix %s\n",pw_mtrxname);
+ if (strcmp(pw_mtrxname, "blosum") == 0)
+ {
+ matptr = blosum30mt;
+ mat_xref = def_aa_xref;
+ }
+ else if (strcmp(pw_mtrxname, "pam") == 0)
+ {
+ matptr = pam350mt;
+ mat_xref = def_aa_xref;
+ }
+ else if (strcmp(pw_mtrxname, "gonnet") == 0)
+ {
+ matptr = gon250mt;
+ int_scale /= 10;
+ mat_xref = def_aa_xref;
+ }
+ else if (strcmp(pw_mtrxname, "id") == 0)
+ {
+ matptr = idmat;
+ mat_xref = def_aa_xref;
+ }
+ else
+ {
+ matptr = pw_usermat;
+ mat_xref = pw_aa_xref;
+ }
+
+ maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale);
+ if (maxres == 0) return((sint)-1);
+ }
+
+
+ /* for (si=MAX(0,istart);si<nseqs && si<iend;si++) */
+ /* JP */
+ for (si=0;si<nsub;si++)
+ {
+ n = seqlen_array[consensus_index[si+1]];
+ len1 = 0;
+ for (i=1;i<=n;i++) {
+ c = seq_array[consensus_index[si+1]][i];
+ if ((c!=gap_pos1) && (c != gap_pos2)) len1++;
+ }
+
+ /* for (sj=MAX(si+1,jstart+1);sj<nseqs && sj<jend;sj++) */
+ for (sj=si+1;sj<nsub;sj++)
+ {
+ m = seqlen_array[consensus_index[sj+1]];
+ if(n==0 || m==0) {
+ tmat[si+1][sj+1]=1.0;
+ tmat[sj+1][si+1]=1.0;
+ continue;
+ }
+ len2 = 0;
+ for (i=1;i<=m;i++) {
+ c = seq_array[consensus_index[sj+1]][i];
+ if ((c!=gap_pos1) && (c != gap_pos2)) len2++;
+ }
+
+ if (dnaflag) {
+ g = 2 * (float)pw_go_penalty * int_scale*gscale;
+ gh = pw_ge_penalty * int_scale*ghscale;
+ }
+ else {
+ if (mat_avscore <= 0)
+ g = 2 * (float)(pw_go_penalty + log((double)(MIN(n,m))))*int_scale;
+ else
+ g = 2 * mat_avscore * (float)(pw_go_penalty +
+ log((double)(MIN(n,m))))*gscale;
+ gh = pw_ge_penalty * int_scale;
+ }
+
+if (debug>1) fprintf(stdout,"go %d ge %d\n",(pint)g,(pint)gh);
+
+/*
+ align the sequences
+*/
+ seq1 = consensus_index[si+1];
+ seq2 = consensus_index[sj+1];
+
+ forward_pass(&seq_array[seq1][0], &seq_array[seq2][0],
+ n, m);
+
+ reverse_pass(&seq_array[seq1][0], &seq_array[seq2][0]);
+
+ last_print = 0;
+ print_ptr = 1;
+/*
+ sb1 = sb2 = 1;
+ se1 = n-1;
+ se2 = m-1;
+*/
+
+/* use Myers and Miller to align two sequences */
+
+ maxscore = diff(sb1-1, sb2-1, se1-sb1+1, se2-sb2+1,
+ (sint)0, (sint)0);
+
+/* calculate percentage residue identity */
+
+ mm_score = tracepath(sb1,sb2);
+
+ if(len1==0 || len2==0) mm_score=0;
+ else
+ mm_score /= (float)MIN(len1,len2);
+
+ fprintf(stdout, "tmat: %d %d %f\n", si+1, sj+1, tmat[si+1][sj+1]);
+
+ tmat[si+1][sj+1] = ((float)100.0 - mm_score)/(float)100.0;
+ tmat[sj+1][si+1] = ((float)100.0 - mm_score)/(float)100.0;
+
+ fprintf(stdout, "tmat: %d %d %f\n", si+1, sj+1, tmat[si+1][sj+1]);
+
+if (debug>4)
+{
+ fprintf(stdout,"Sequences (%d:%d) Aligned. Score: %d CompScore: %d\n",
+ (pint)si+1,(pint)sj+1,
+ (pint)mm_score,
+ (pint)maxscore/(MIN(len1,len2)*100));
+}
+else
+{
+ /* JP: disable info */
+ /*info("Sequences (%d:%d) Aligned. Score: %d",
+ (pint)si+1,(pint)sj+1,
+ (pint)mm_score);
+ */
+}
+
+ }
+ }
+ displ=ckfree((void *)displ);
+ HH=ckfree((void *)HH);
+ DD=ckfree((void *)DD);
+ RR=ckfree((void *)RR);
+ SS=ckfree((void *)SS);
+
+
+ return((sint)1);
+}
+
+
+static void add(sint v)
+{
+
+ if(last_print<0) {
+ displ[print_ptr-1] = v;
+ displ[print_ptr++] = last_print;
+ }
+ else
+ last_print = displ[print_ptr++] = v;
+}
+
+static sint calc_score(sint iat,sint jat,sint v1,sint v2)
+{
+ sint ipos,jpos;
+ sint ret;
+
+ ipos = v1 + iat;
+ jpos = v2 + jat;
+
+ ret=matrix[(int)seq_array[seq1][ipos]][(int)seq_array[seq2][jpos]];
+
+ return(ret);
+}
+
+
+static float tracepath(sint tsb1,sint tsb2)
+{
+ char c1,c2;
+ sint i1,i2,r;
+ sint i,k,pos,to_do;
+ sint count;
+ float score;
+ char s1[100], s2[100];
+
+ to_do=print_ptr-1;
+ i1 = tsb1;
+ i2 = tsb2;
+
+ pos = 0;
+ count = 0;
+ for(i=1;i<=to_do;++i) {
+
+if (debug>4) fprintf(stdout,"%d ",(pint)displ[i]);
+ if(displ[i]==0) {
+ c1 = seq_array[seq1][i1];
+ c2 = seq_array[seq2][i2];
+
+if (debug>4)
+{
+if (c1>max_aa) s1[pos] = '-';
+else s1[pos]=amino_acid_codes[c1];
+if (c2>max_aa) s2[pos] = '-';
+else s2[pos]=amino_acid_codes[c2];
+}
+
+ if ((c1!=gap_pos1) && (c1 != gap_pos2) &&
+ (c1 == c2)) count++;
+ ++i1;
+ ++i2;
+ ++pos;
+ }
+ else {
+ if((k=displ[i])>0) {
+
+if (debug>4)
+for (r=0;r<k;r++)
+{
+s1[pos+r]='-';
+if (seq_array[seq2][i2+r]>max_aa) s2[pos+r] = '-';
+else s2[pos+r]=amino_acid_codes[seq_array[seq2][i2+r]];
+}
+
+ i2 += k;
+ pos += k;
+ }
+ else {
+
+if (debug>4)
+for (r=0;r<(-k);r++)
+{
+s2[pos+r]='-';
+if (seq_array[seq1][i1+r]>max_aa) s1[pos+r] = '-';
+else s1[pos+r]=amino_acid_codes[seq_array[seq1][i1+r]];
+}
+
+ i1 -= k;
+ pos -= k;
+ }
+ }
+ }
+if (debug>4) fprintf(stdout,"\n");
+if (debug>4)
+{
+for (i=0;i<pos;i++) fprintf(stdout,"%c",s1[i]);
+fprintf(stdout,"\n");
+for (i=0;i<pos;i++) fprintf(stdout,"%c",s2[i]);
+fprintf(stdout,"\n");
+}
+/*
+ if (count <= 0) count = 1;
+*/
+ score = 100.0 * (float)count;
+ return(score);
+}
+
+
+static void forward_pass(char *ia, char *ib, sint n, sint m)
+{
+
+ sint i,j;
+ pwint f,hh,p,t;
+
+ maxscore = 0;
+ se1 = se2 = 0;
+ for (i=0;i<=m;i++)
+ {
+ HH[i] = 0;
+ DD[i] = -g;
+ }
+
+ for (i=1;i<=n;i++)
+ {
+ hh = p = 0;
+ f = -g;
+
+ for (j=1;j<=m;j++)
+ {
+
+ f -= gh;
+ t = hh - g - gh;
+ if (f<t) f = t;
+
+ DD[j] -= gh;
+ t = HH[j] - g - gh;
+ if (DD[j]<t) DD[j] = t;
+
+ hh = p + matrix[(int)ia[i]][(int)ib[j]];
+ if (hh<f) hh = f;
+ if (hh<DD[j]) hh = DD[j];
+ if (hh<0) hh = 0;
+
+ p = HH[j];
+ HH[j] = hh;
+
+ if (hh > maxscore)
+ {
+ maxscore = hh;
+ se1 = i;
+ se2 = j;
+ }
+ }
+ }
+
+}
+
+
+static void reverse_pass(char *ia, char *ib)
+{
+
+ sint i,j;
+ pwint f,hh,p,t;
+ pwint cost;
+
+ cost = 0;
+ sb1 = sb2 = 1;
+ for (i=se2;i>0;i--)
+ {
+ HH[i] = -1;
+ DD[i] = -1;
+ }
+
+ for (i=se1;i>0;i--)
+ {
+ hh = f = -1;
+ if (i == se1) p = 0;
+ else p = -1;
+
+ for (j=se2;j>0;j--)
+ {
+
+ f -= gh;
+ t = hh - g - gh;
+ if (f<t) f = t;
+
+ DD[j] -= gh;
+ t = HH[j] - g - gh;
+ if (DD[j]<t) DD[j] = t;
+
+ hh = p + matrix[(int)ia[i]][(int)ib[j]];
+ if (hh<f) hh = f;
+ if (hh<DD[j]) hh = DD[j];
+
+ p = HH[j];
+ HH[j] = hh;
+
+ if (hh > cost)
+ {
+ cost = hh;
+ sb1 = i;
+ sb2 = j;
+ if (cost >= maxscore) break;
+ }
+ }
+ if (cost >= maxscore) break;
+ }
+
+}
+
+static int diff(sint A,sint B,sint M,sint N,sint tb,sint te)
+{
+ sint type;
+ sint midi,midj,i,j;
+ int midh;
+ static pwint f, hh, e, s, t;
+
+ if(N<=0) {
+ if(M>0) {
+ del(M);
+ }
+
+ return(-(int)tbgap(M));
+ }
+
+ if(M<=1) {
+ if(M<=0) {
+ add(N);
+ return(-(int)tbgap(N));
+ }
+
+ midh = -(tb+gh) - tegap(N);
+ hh = -(te+gh) - tbgap(N);
+ if (hh>midh) midh = hh;
+ midj = 0;
+ for(j=1;j<=N;j++) {
+ hh = calc_score(1,j,A,B)
+ - tegap(N-j) - tbgap(j-1);
+ if(hh>midh) {
+ midh = hh;
+ midj = j;
+ }
+ }
+
+ if(midj==0) {
+ del(1);
+ add(N);
+ }
+ else {
+ if(midj>1)
+ add(midj-1);
+ displ[print_ptr++] = last_print = 0;
+ if(midj<N)
+ add(N-midj);
+ }
+ return midh;
+ }
+
+/* Divide: Find optimum midpoint (midi,midj) of cost midh */
+
+ midi = M / 2;
+ HH[0] = 0.0;
+ t = -tb;
+ for(j=1;j<=N;j++) {
+ HH[j] = t = t-gh;
+ DD[j] = t-g;
+ }
+
+ t = -tb;
+ for(i=1;i<=midi;i++) {
+ s=HH[0];
+ HH[0] = hh = t = t-gh;
+ f = t-g;
+ for(j=1;j<=N;j++) {
+ if ((hh=hh-g-gh) > (f=f-gh)) f=hh;
+ if ((hh=HH[j]-g-gh) > (e=DD[j]-gh)) e=hh;
+ hh = s + calc_score(i,j,A,B);
+ if (f>hh) hh = f;
+ if (e>hh) hh = e;
+
+ s = HH[j];
+ HH[j] = hh;
+ DD[j] = e;
+ }
+ }
+
+ DD[0]=HH[0];
+
+ RR[N]=0;
+ t = -te;
+ for(j=N-1;j>=0;j--) {
+ RR[j] = t = t-gh;
+ SS[j] = t-g;
+ }
+
+ t = -te;
+ for(i=M-1;i>=midi;i--) {
+ s = RR[N];
+ RR[N] = hh = t = t-gh;
+ f = t-g;
+
+ for(j=N-1;j>=0;j--) {
+
+ if ((hh=hh-g-gh) > (f=f-gh)) f=hh;
+ if ((hh=RR[j]-g-gh) > (e=SS[j]-gh)) e=hh;
+ hh = s + calc_score(i+1,j+1,A,B);
+ if (f>hh) hh = f;
+ if (e>hh) hh = e;
+
+ s = RR[j];
+ RR[j] = hh;
+ SS[j] = e;
+
+ }
+ }
+
+ SS[N]=RR[N];
+
+ midh=HH[0]+RR[0];
+ midj=0;
+ type=1;
+ for(j=0;j<=N;j++) {
+ hh = HH[j] + RR[j];
+ if(hh>=midh)
+ if(hh>midh || (HH[j]!=DD[j] && RR[j]==SS[j])) {
+ midh=hh;
+ midj=j;
+ }
+ }
+
+ for(j=N;j>=0;j--) {
+ hh = DD[j] + SS[j] + g;
+ if(hh>midh) {
+ midh=hh;
+ midj=j;
+ type=2;
+ }
+ }
+
+ /* Conquer recursively around midpoint */
+
+
+ if(type==1) { /* Type 1 gaps */
+ diff(A,B,midi,midj,tb,g);
+ diff(A+midi,B+midj,M-midi,N-midj,g,te);
+ }
+ else {
+ diff(A,B,midi-1,midj,tb,0.0);
+ del(2);
+ diff(A+midi+1,B+midj,M-midi-1,N-midj,0.0,te);
+ }
+
+ return midh; /* Return the score of the best alignment */
+}
+
+static void del(sint k)
+{
+ if(last_print<0)
+ last_print = displ[print_ptr-1] -= k;
+ else
+ last_print = displ[print_ptr++] = -(k);
+}
+
+
diff --git a/param.h b/param.h
new file mode 100644
index 0000000..c52dff8
--- /dev/null
+++ b/param.h
@@ -0,0 +1,381 @@
+#define MAXARGS 100
+
+typedef struct {
+ char *str;
+ sint *flag;
+ int type;
+ char **arg;
+} cmd_line_data;
+
+/*
+ command line switches
+*/
+sint setoptions = -1;
+sint sethelp = -1;
+sint setinteractive = -1;
+sint setbatch = -1;
+sint setgapopen = -1;
+sint setgapext = -1;
+sint setpwgapopen = -1;
+sint setpwgapext = -1;
+sint setoutorder = -1;
+sint setbootlabels = -1;
+sint setpwmatrix = -1;
+sint setmatrix = -1;
+sint setpwdnamatrix = -1;
+sint setdnamatrix = -1;
+sint setnegative = -1;
+sint setnoweights = -1;
+sint setoutput = -1;
+sint setoutputtree = -1;
+sint setquicktree = -1;
+sint settype = -1;
+sint setcase = -1;
+sint setseqno = -1;
+sint settransweight = -1;
+sint setseed = -1;
+sint setscore = -1;
+sint setwindow = -1;
+sint setktuple = -1;
+sint setkimura = -1;
+sint settopdiags = -1;
+sint setpairgap = -1;
+sint settossgaps = -1;
+sint setnopgap = -1;
+sint setnohgap = -1;
+sint setnovgap = -1;
+sint sethgapres = -1;
+sint setvgapres = -1;
+sint setuseendgaps = -1;
+sint setmaxdiv = -1;
+sint setgapdist = -1;
+sint setdebug = -1;
+sint setoutfile = -1;
+sint setinfile = -1;
+sint setprofile1 = -1;
+sint setprofile2 = -1;
+sint setalign = -1;
+sint setconvert = -1;
+sint setnewtree = -1;
+sint setusetree = -1;
+sint setnewtree1 = -1;
+sint setusetree1 = -1;
+sint setnewtree2 = -1;
+sint setusetree2 = -1;
+sint setbootstrap = -1;
+sint settree = -1;
+sint setprofile = -1;
+sint setsequences = -1;
+sint setsecstr1 = -1;
+sint setsecstr2 = -1;
+sint setsecstroutput = -1;
+sint sethelixgap = -1;
+sint setstrandgap = -1;
+sint setloopgap = -1;
+sint setterminalgap = -1;
+sint sethelixendin = -1;
+sint sethelixendout = -1;
+sint setstrandendin = -1;
+sint setstrandendout = -1;
+
+/* JP */
+sint setave_grp_id = -1;
+sint setcosmetic = -1;
+sint setKK = -1;
+sint setoutputfirst = -1;
+
+/*
+ multiple alignment parameters
+*/
+float dna_gap_open = 15.0, dna_gap_extend = 6.66;
+float prot_gap_open = 10.0, prot_gap_extend = 0.2;
+sint profile_type = PROFILE;
+sint gap_dist = 4;
+sint output_order = ALIGNED;
+
+/* JP */
+/* sint divergence_cutoff = 30; */
+sint divergence_cutoff = 0;
+sint ave_grp_id = 40;
+sint cosmetic_penalty = 100;
+sint KK = 10;
+sint outputfirst = 0;
+/* JP */
+
+sint matnum = 3;
+char mtrxname[FILENAMELEN+1] = "gonnet";
+sint dnamatnum = 1;
+char dnamtrxname[FILENAMELEN+1] = "iub";
+char hyd_residues[] = "GPSNDQEKR";
+Boolean no_weights = FALSE;
+Boolean neg_matrix = FALSE;
+Boolean no_hyd_penalties = FALSE;
+Boolean no_var_penalties = TRUE;
+Boolean no_pref_penalties = FALSE;
+Boolean use_endgaps = FALSE;
+Boolean endgappenalties = FALSE;
+Boolean reset_alignments_new = FALSE; /* DES */
+Boolean reset_alignments_all = FALSE; /* DES */
+sint output_struct_penalties = 0;
+sint struct_penalties1 = NONE;
+sint struct_penalties2 = NONE;
+Boolean use_ss1 = TRUE;
+Boolean use_ss2 = TRUE;
+sint helix_penalty = 4;
+sint strand_penalty = 4;
+sint loop_penalty = 1;
+sint helix_end_minus = 3;
+sint helix_end_plus = 0;
+sint strand_end_minus = 1;
+sint strand_end_plus = 1;
+sint helix_end_penalty = 2;
+sint strand_end_penalty = 2;
+Boolean use_ambiguities = FALSE;
+
+/*
+ pairwise alignment parameters
+*/
+float dna_pw_go_penalty = 15.0, dna_pw_ge_penalty = 6.66;
+float prot_pw_go_penalty = 10.0, prot_pw_ge_penalty = 0.1;
+sint pw_matnum = 3;
+char pw_mtrxname[FILENAMELEN+1] = "gonnet";
+sint pw_dnamatnum = 1;
+char pw_dnamtrxname[FILENAMELEN+1] = "iub";
+char usermtrxname[FILENAMELEN+1], pw_usermtrxname[FILENAMELEN+1];
+char dnausermtrxname[FILENAMELEN+1], pw_dnausermtrxname[FILENAMELEN+1];
+
+Boolean quick_pairalign = FALSE;
+float transition_weight = 0.5;
+sint new_seq;
+
+/*
+ quick pairwise alignment parameters
+*/
+sint dna_ktup = 2; /* default parameters for DNA */
+sint dna_wind_gap = 5;
+sint dna_signif = 4;
+sint dna_window = 4;
+
+sint prot_ktup = 1; /* default parameters for proteins */
+sint prot_wind_gap = 3;
+sint prot_signif = 5;
+sint prot_window = 5;
+Boolean percent=TRUE;
+Boolean tossgaps = FALSE;
+Boolean kimura = FALSE;
+
+
+sint boot_ntrials = 1000;
+unsigned sint boot_ran_seed = 111;
+
+
+sint debug = 0;
+
+Boolean explicit_dnaflag = FALSE; /* Explicit setting of sequence type on comm.line*/
+Boolean lowercase = TRUE; /* Flag for GDE output - set on comm. line*/
+Boolean cl_seq_numbers = FALSE;
+
+
+Boolean output_clustal = TRUE;
+Boolean output_gcg = FALSE;
+Boolean output_phylip = FALSE;
+Boolean output_nbrf = FALSE;
+Boolean output_gde = FALSE;
+Boolean output_nexus = FALSE;
+Boolean showaln = TRUE;
+Boolean save_parameters = FALSE;
+
+/* DES */
+Boolean output_tree_clustal = FALSE;
+Boolean output_tree_phylip = TRUE;
+Boolean output_tree_distances = FALSE;
+Boolean output_tree_nexus = FALSE;
+sint bootstrap_format = BS_BRANCH_LABELS;
+
+/*These are all the positively scoring groups that occur in the Gonnet Pam250
+matrix. There are strong and weak groups, defined as strong score >0.5 and
+weak score =<0.5. Strong matching columns to be assigned ':' and weak matches
+assigned '.' in the clustal output format.
+*/
+
+char *res_cat1[] = {
+ "STA",
+ "NEQK",
+ "NHQK",
+ "NDEQ",
+ "QHRK",
+ "MILV",
+ "MILF",
+ "HY",
+ "FYW",
+ NULL };
+
+char *res_cat2[] = {
+ "CSA",
+ "ATV",
+ "SAG",
+ "STNK",
+ "STPA",
+ "SGND",
+ "SNDEQK",
+ "NDEQHK",
+ "NEQHRK",
+ "FVLIM",
+ "HFY",
+ NULL };
+
+
+
+static char *type_arg[] = {
+ "protein",
+ "dna",
+ ""};
+
+static char *bootlabels_arg[] = {
+ "node",
+ "branch",
+ ""};
+
+static char *outorder_arg[] = {
+ "input",
+ "aligned",
+ ""};
+
+static char *case_arg[] = {
+ "lower",
+ "upper",
+ ""};
+
+static char *seqno_arg[] = {
+ "off",
+ "on",
+ ""};
+
+static char *score_arg[] = {
+ "percent",
+ "absolute",
+ ""};
+
+static char *output_arg[] = {
+ "gcg",
+ "gde",
+ "pir",
+ "phylip",
+ "nexus",
+ ""};
+
+static char *outputtree_arg[] = {
+ "nj",
+ "phylip",
+ "dist",
+ "nexus",
+ ""};
+
+static char *outputsecstr_arg[] = {
+ "structure",
+ "mask",
+ "both",
+ "none",
+ ""};
+
+/*
+ command line initialisation
+
+ type = 0 no argument
+ type = 1 integer argument
+ type = 2 float argument
+ type = 3 string argument
+ type = 4 filename
+ type = 5 opts
+*/
+#define NOARG 0
+#define INTARG 1
+#define FLTARG 2
+#define STRARG 3
+#define FILARG 4
+#define OPTARG 5
+
+
+/* command line switches for DATA **************************/
+cmd_line_data cmd_line_file[] = {
+ "infile", &setinfile, FILARG, NULL,
+ "profile1", &setprofile1, FILARG, NULL,
+ "profile2", &setprofile2, FILARG, NULL,
+ "", NULL, -1};
+/* command line switches for VERBS **************************/
+cmd_line_data cmd_line_verb[] = {
+ "help", &sethelp, NOARG, NULL,
+ "check", &sethelp, NOARG, NULL,
+ "options", &setoptions, NOARG, NULL,
+ "align", &setalign, NOARG, NULL,
+ "newtree", &setnewtree, FILARG, NULL,
+ "usetree", &setusetree, FILARG, NULL,
+ "newtree1", &setnewtree1, FILARG, NULL,
+ "usetree1", &setusetree1, FILARG, NULL,
+ "newtree2", &setnewtree2, FILARG, NULL,
+ "usetree2", &setusetree2, FILARG, NULL,
+ "bootstrap", &setbootstrap, NOARG, NULL,
+ "tree", &settree, NOARG, NULL,
+ "quicktree", &setquicktree, NOARG, NULL,
+ "convert", &setconvert, NOARG, NULL,
+ "interactive", &setinteractive, NOARG, NULL,
+ "batch", &setbatch, NOARG, NULL,
+ "", NULL, -1};
+/* command line switches for PARAMETERS **************************/
+cmd_line_data cmd_line_para[] = {
+ "type", &settype, OPTARG, type_arg,
+ "profile", &setprofile, NOARG, NULL,
+ "sequences", &setsequences, NOARG, NULL,
+ "matrix", &setmatrix, FILARG, NULL,
+ "dnamatrix", &setdnamatrix, FILARG, NULL,
+ "negative", &setnegative, NOARG, NULL,
+ "noweights", &setnoweights, NOARG, NULL,
+ "gapopen", &setgapopen, FLTARG, NULL,
+ "gapext", &setgapext, FLTARG, NULL,
+ "endgaps", &setuseendgaps, NOARG, NULL,
+ "nopgap", &setnopgap, NOARG, NULL,
+ "nohgap", &setnohgap, NOARG, NULL,
+ "novgap", &setnovgap, NOARG, NULL,
+ "hgapresidues", &sethgapres, STRARG, NULL,
+ "maxdiv", &setmaxdiv, INTARG, NULL,
+ "gapdist", &setgapdist, INTARG, NULL,
+ "pwmatrix", &setpwmatrix, FILARG, NULL,
+ "pwdnamatrix", &setpwdnamatrix, FILARG, NULL,
+ "pwgapopen", &setpwgapopen, FLTARG, NULL,
+ "pwgapext", &setpwgapext, FLTARG, NULL,
+ "ktuple", &setktuple, INTARG, NULL,
+ "window", &setwindow, INTARG, NULL,
+ "pairgap", &setpairgap, INTARG, NULL,
+ "topdiags", &settopdiags, INTARG, NULL,
+ "score", &setscore, OPTARG, score_arg,
+ "transweight", &settransweight, FLTARG, NULL,
+ "seed", &setseed, INTARG, NULL,
+ "kimura", &setkimura, NOARG, NULL,
+ "tossgaps", &settossgaps, NOARG, NULL,
+ "bootlabels", &setbootlabels, OPTARG, bootlabels_arg,
+ "debug", &setdebug, INTARG, NULL,
+ "output", &setoutput, OPTARG, output_arg,
+ "outputtree", &setoutputtree, OPTARG, outputtree_arg,
+ "outfile", &setoutfile, FILARG, NULL,
+ "outorder", &setoutorder, OPTARG, outorder_arg,
+ "case", &setcase, OPTARG, case_arg,
+ "seqnos", &setseqno, OPTARG, seqno_arg,
+ "nosecstr1", &setsecstr1, NOARG, NULL,
+ "nosecstr2", &setsecstr2, NOARG, NULL,
+ "secstrout", &setsecstroutput, OPTARG, outputsecstr_arg,
+ "helixgap", &sethelixgap, INTARG, NULL,
+ "strandgap", &setstrandgap, INTARG, NULL,
+ "loopgap", &setloopgap, INTARG, NULL,
+ "terminalgap", &setterminalgap, INTARG, NULL,
+ "helixendin", &sethelixendin, INTARG, NULL,
+ "helixendout", &sethelixendout, INTARG, NULL,
+ "strandendin", &setstrandendin, INTARG, NULL,
+ "strandendout",&setstrandendout, INTARG, NULL,
+ "ave_grp_id", &setave_grp_id, INTARG, NULL,
+ "cosmetic", &setcosmetic, INTARG, NULL,
+ "kk", &setKK, INTARG, NULL,
+ "outputfirst", &setoutputfirst, INTARG, NULL,
+
+ "", NULL, -1};
+
+
diff --git a/pcma.c b/pcma.c
new file mode 100644
index 0000000..d6e8c9c
--- /dev/null
+++ b/pcma.c
@@ -0,0 +1,123 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#ifdef MAC
+#include <console.h>
+#endif
+#include "pcma.h"
+/*#include "new.h" */
+
+/*
+* Prototypes
+*/
+
+#ifdef MAC
+extern int ccommand(char ***);
+#endif
+
+extern void *ckalloc(size_t);
+extern void init_amenu(void);
+extern void init_interface(void);
+extern void init_matrix(void);
+extern void fill_chartab(void);
+extern void parse_params(Boolean);
+extern void main_menu(void);
+
+/*
+* Global variables
+*/
+double **tmat;
+
+char revision_level[] = "W (1.81)"; /* JULIE march 2000*/
+Boolean interactive=FALSE;
+
+char *help_file_name = "pcma_help";
+
+sint max_names; /* maximum length of names in current alignment file */
+
+float gap_open, gap_extend;
+float pw_go_penalty, pw_ge_penalty;
+
+FILE *tree;
+FILE *clustal_outfile, *gcg_outfile, *nbrf_outfile, *phylip_outfile,
+ *gde_outfile, *nexus_outfile;
+sint *seqlen_array;
+sint max_aln_length;
+short usermat[NUMRES][NUMRES], pw_usermat[NUMRES][NUMRES];
+short def_aa_xref[NUMRES+1], aa_xref[NUMRES+1], pw_aa_xref[NUMRES+1];
+short userdnamat[NUMRES][NUMRES], pw_userdnamat[NUMRES][NUMRES];
+short def_dna_xref[NUMRES+1], dna_xref[NUMRES+1], pw_dna_xref[NUMRES+1];
+sint nseqs;
+sint nsets;
+sint *output_index;
+sint **sets;
+sint *seq_weight;
+sint max_aa;
+sint gap_pos1;
+sint gap_pos2;
+sint mat_avscore;
+sint profile_no;
+
+Boolean usemenu;
+Boolean dnaflag;
+Boolean distance_tree;
+
+char **seq_array;
+char **names,**titles;
+char **args;
+char seqname[FILENAMELEN+1];
+
+char *gap_penalty_mask1 = NULL, *gap_penalty_mask2 = NULL;
+char *sec_struct_mask1 = NULL, *sec_struct_mask2 = NULL;
+sint struct_penalties;
+char *ss_name1 = NULL, *ss_name2 = NULL;
+
+Boolean user_series = FALSE;
+UserMatSeries matseries;
+short usermatseries[MAXMAT][NUMRES][NUMRES];
+short aa_xrefseries[MAXMAT][NUMRES+1];
+
+int main(int argc,char **argv)
+{
+ sint i;
+
+ init_amenu();
+ init_interface();
+ init_matrix();
+
+ fill_chartab();
+
+ if(argc>1) {
+ args = (char **)ckalloc(argc * sizeof(char *));
+
+ for(i=1;i<argc;++i)
+ {
+ args[i-1]=(char *)ckalloc((strlen(argv[i])+1) * sizeof(char));
+ strcpy(args[i-1],argv[i]);
+ }
+ usemenu=FALSE;
+ parse_params(FALSE);
+
+ for(i=0;i<argc-1;i++)
+ ckfree(args[i]);
+ ckfree(args);
+ }
+
+ /* JP */
+ else {
+ // fprintf(stdout, "Please give the sequence file in fasta format as the 1st argument\n");
+
+ print_help();
+ }
+
+ /* JP */
+ exit(0); /* disable the menu interface */
+
+ usemenu=TRUE;
+ interactive=TRUE;
+
+ main_menu();
+
+ exit(0);
+}
+
diff --git a/pcma.h b/pcma.h
new file mode 100644
index 0000000..03c4bb7
--- /dev/null
+++ b/pcma.h
@@ -0,0 +1,310 @@
+/*#include "/us1/user/julie/dmalloc/malloc.h"*/
+/*********************CLUSTALW.H*********************************************/
+/****************************************************************************/
+
+ /*
+ Main header file for PSMA. Uncomment ONE of the following 4 lines
+ depending on which compiler you wish to use.
+ */
+
+/*#define VMS 1 VAX or ALPHA VMS */
+
+/*#define MAC 1 Think_C for Macintosh */
+
+/*#define MSDOS 1 Turbo C for PC's */
+
+#define UNIX 1 /*Ultrix/Decstation, Gnu C for
+ Sun, IRIX/SGI, OSF1/ALPHA */
+
+/***************************************************************************/
+/***************************************************************************/
+
+
+#include "general.h"
+
+#define MAXNAMES 30 /* Max chars read for seq. names */
+#define MAXTITLES 60 /* Title length */
+#define FILENAMELEN 256 /* Max. file name length */
+
+#define UNKNOWN 0
+#define EMBLSWISS 1
+#define PIR 2
+#define PEARSON 3
+#define GDE 4
+#define CLUSTAL 5 /* DES */
+#define MSF 6 /* DES */
+#define RSF 7 /* JULIE */
+#define USER 8 /* DES */
+#define PHYLIP 9 /* DES */
+#define NEXUS 10/* DES */
+#define CLUSTALIST 11 /* JP */
+
+#define NONE 0
+#define SECST 1
+#define GMASK 2
+
+#define PROFILE 0
+#define SEQUENCE 1
+
+#define BS_NODE_LABELS 2
+#define BS_BRANCH_LABELS 1
+
+#define PAGE_LEN 22 /* Number of lines of help sent to screen */
+
+#define PAGEWIDTH 80 /* maximum characters on output file page */
+#define LINELENGTH 60 /* Output file line length */
+#define GCG_LINELENGTH 50
+
+#ifdef VMS /* Defaults for VAX VMS */
+#define COMMANDSEP '/'
+#define DIRDELIM ']' /* Last character before file name in full file
+ specs */
+#define INT_SCALE_FACTOR 1000 /* Scaling factor to convert float to integer for profile scores */
+
+#elif MAC
+#define COMMANDSEP '/'
+#define DIRDELIM ':'
+#define INT_SCALE_FACTOR 100 /* Scaling factor to convert float to integer for profile scores */
+
+#elif MSDOS
+#define COMMANDSEP '/'
+#define DIRDELIM '\\'
+#define INT_SCALE_FACTOR 100 /* Scaling factor to convert float to integer for profile scores */
+
+#elif UNIX
+#define COMMANDSEP '-'
+#define DIRDELIM '/'
+#define INT_SCALE_FACTOR 1000 /* Scaling factor to convert float to integer for profile scores */
+#endif
+
+#define NUMRES 32 /* max size of comparison matrix */
+
+#define INPUT 0
+#define ALIGNED 1
+
+#define LEFT 1
+#define RIGHT 2
+
+#define NODE 0
+#define LEAF 1
+
+#define GAPCOL 32 /* position of gap open penalty in profile */
+#define LENCOL 33 /* position of gap extension penalty in profile */
+
+typedef struct node { /* phylogenetic tree structure */
+ struct node *left;
+ struct node *right;
+ struct node *parent;
+ float dist;
+ sint leaf;
+ int order;
+ char name[64];
+} stree, *treeptr;
+
+typedef struct {
+ char title[30];
+ char string[30];
+} MatMenuEntry;
+
+typedef struct {
+ int noptions;
+ MatMenuEntry opt[10];
+} MatMenu;
+
+#define MAXMAT 10
+
+typedef struct {
+ int llimit;
+ int ulimit;
+ short *matptr;
+ short *aa_xref;
+} SeriesMat;
+
+typedef struct {
+ int nmat;
+ SeriesMat mat[MAXMAT];
+} UserMatSeries;
+
+
+/*
+ Prototypes
+*/
+
+/* alnscore.c */
+void aln_score(void);
+/* interface.c */
+void parse_params(Boolean);
+void init_amenu(void);
+void init_interface(void);
+void main_menu(void);
+FILE *open_output_file(char *, char *, char *, char *);
+FILE *open_explicit_file(char *);
+sint seq_input(Boolean);
+Boolean open_alignment_output(char *);
+void create_alignment_output(sint fseq,sint lseq);
+void align(char *phylip_name);
+void profile_align(char *p1_tree_name,char *p2_tree_name);/* Align 2 alignments */
+void make_tree(char *phylip_name);
+void get_tree(char *phylip_name);
+sint profile_input(void); /* read a profile */
+void new_sequence_align(char *phylip_name);
+Boolean user_mat(char *, short *, short *);
+Boolean user_mat_series(char *, short *, short *);
+void get_help(char);
+void clustal_out(FILE *, sint, sint, sint, sint);
+void nbrf_out(FILE *, sint, sint, sint, sint);
+void gcg_out(FILE *, sint, sint, sint, sint);
+void phylip_out(FILE *, sint, sint, sint, sint);
+void gde_out(FILE *, sint, sint, sint, sint);
+void nexus_out(FILE *, sint, sint, sint, sint);
+void print_sec_struct_mask(int prf_length,char *mask,char *struct_mask);
+void fix_gaps(void);
+
+/*JP*/
+void print_help();
+
+
+/* calcgapcoeff.c */
+void calc_gap_coeff(char **alignment, sint *gaps, sint **profile, Boolean struct_penalties,
+ char *gap_penalty_mask, sint first_seq, sint last_seq,
+ sint prf_length, sint gapcoef, sint lencoef);
+/* calcprf1.c */
+void calc_prf1(sint **profile, char **alignment, sint *gaps, sint matrix[NUMRES ][NUMRES ],
+ sint *seq_weight, sint prf_length, sint first_seq, sint last_seq);
+/* calcprf2.c */
+void calc_prf2(sint **profile, char **alignment, sint *seq_weight, sint prf_length,
+ sint first_seq, sint last_seq);
+/* calctree.c */
+void calc_seq_weights(sint first_seq, sint last_seq,sint *seq_weight);
+void create_sets(sint first_seq, sint last_seq);
+sint read_tree(char *treefile, sint first_seq, sint last_seq);
+void clear_tree(treeptr p);
+sint calc_similarities(sint nseqs);
+/* clustalw.c */
+int main(int argc, char **argv);
+/* gcgcheck.c */
+int SeqGCGCheckSum(char *seq, sint len);
+/* malign.c */
+sint malign(sint istart,char *phylip_name);
+sint seqalign(sint istart,char *phylip_name);
+sint palign1(void);
+float countid(sint s1, sint s2);
+sint palign2(char *p1_tree_name,char *p2_tree_name);
+/* pairalign.c */
+sint pairalign(sint istart, sint iend, sint jstart, sint jend);
+/* prfalign.c */
+lint prfalign(sint *group, sint *aligned);
+/* random.c */
+unsigned long linrand(unsigned long r);
+unsigned long addrand(unsigned long r);
+void addrandinit(unsigned long s);
+/* readmat.c */
+void init_matrix(void);
+sint get_matrix(short *matptr, short *xref, sint matrix[NUMRES ][NUMRES ], Boolean neg_flag,
+ sint scale);
+sint read_user_matrix(char *filename, short *usermat, short *xref);
+sint read_matrix_series(char *filename, short *usermat, short *xref);
+int getargs(char *inline1, char *args[], int max);
+/* sequence.c */
+void fill_chartab(void);
+sint readseqs(sint first_seq);
+/* showpair.c */
+void show_pair(sint istart, sint iend, sint jstart, sint jend);
+/* trees.c */
+void phylogenetic_tree(char *phylip_name,char *clustal_name,char *dist_name, char *nexus_name);
+void bootstrap_tree(char *phylip_name,char *clustal_name, char *nexus_name);
+sint dna_distance_matrix(FILE *tree);
+sint prot_distance_matrix(FILE *tree);
+void guide_tree(FILE *tree,int first_seq,sint nseqs);
+
+/* util.c */
+
+void alloc_aln(sint nseqs);
+void realloc_aln(sint first_seq,sint nseqs);
+void free_aln(sint nseqs);
+void alloc_seq(sint seq_no,sint length);
+void realloc_seq(sint seq_no,sint length);
+void free_seq(sint seq_no);
+
+void *ckalloc(size_t bytes);
+void *ckrealloc(void *ptr, size_t bytes);
+void *ckfree(void *ptr);
+char prompt_for_yes_no(char *title,char *prompt);
+void fatal(char *msg, ...);
+void error(char *msg, ...);
+void warning(char *msg, ...);
+void info(char *msg, ...);
+char *rtrim(char *str);
+char *blank_to_(char *str);
+char *upstr(char *str);
+char *lowstr(char *str);
+void getstr(char *instr, char *outstr);
+double getreal(char *instr, double minx, double maxx, double def);
+int getint(char *instr, int minx, int maxx, int def);
+void do_system(void);
+Boolean linetype(char *line, char *code);
+Boolean keyword(char *line, char *code);
+Boolean blankline(char *line);
+void get_path(char *str, char *path);
+
+
+/*********************************************************/
+/* new structures and functions introduced by Jimin Pei */
+/*********************************************************/
+
+/* node for a tree containing the sequences */
+typedef struct snode {
+ struct snode *left;
+ struct snode *right;
+ struct snode *parent;
+ float dist;
+ sint leaf;
+ int order;
+ char **name;
+ int **seq;
+ int seqnum;
+ int seqlength;
+ int **abstractseq;
+ short abseqnum;
+ int abseqlength;
+} sstree, *streeptr;
+
+/* node for a linked list of scores */
+typedef struct scorenode {
+ struct scorenode *next;
+ short sbe; /* score before extension */
+ short ind; /* index of the second sequence */
+ int sae; /* score after extension */
+} SN;
+
+
+/* structure of element of an alignment */
+/*typedef struct AlignmentElement {
+ int gid;
+ int pid;
+} AE;
+*/
+
+/* function prototypes */
+/* lib_generation.c */
+SN * SNavail();
+void lib_generation();
+void AddSbe(SN *node, int indi, int s);
+void printLib(int gi, int gj);
+
+
+/* calctree.c */
+void assign_node(streeptr p, sint *aligned);
+double average_group_identity(sint *group);
+
+/* al2comp2.c */
+void prfprfmatrix(int indi1, int indi2, int alnlength1, int alnlength2, int nali1, int nali2, double **prfprfmat);
+
+/*lsim1.c */
+void SIM(int M,int N,int K,int **V,int Q,int R,int nseq,int gi, int gj);
+
+/*lib_extension.c */
+void lib_extension();
+
+/*prfalign1.c */
+lint prfalign1(sint *group, sint gi, sint gj);
diff --git a/prfalign.c b/prfalign.c
new file mode 100644
index 0000000..295a778
--- /dev/null
+++ b/prfalign.c
@@ -0,0 +1,1153 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "pcma.h"
+#define ENDALN 127
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+/*
+ * Prototypes
+ */
+static lint pdiff(sint A,sint B,sint i,sint j,sint go1,sint go2);
+static lint prfscore(sint n, sint m);
+static sint gap_penalty1(sint i, sint j,sint k);
+static sint open_penalty1(sint i, sint j);
+static sint ext_penalty1(sint i, sint j);
+static sint gap_penalty2(sint i, sint j,sint k);
+static sint open_penalty2(sint i, sint j);
+static sint ext_penalty2(sint i, sint j);
+static void padd(sint k);
+static void pdel(sint k);
+static void palign(void);
+static void ptracepath(sint *alen);
+static void add_ggaps(void);
+static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2);
+
+/*
+ * Global variables
+ */
+extern double **tmat;
+extern float gap_open, gap_extend;
+extern float transition_weight;
+extern sint gap_pos1, gap_pos2;
+extern sint max_aa;
+extern sint nseqs;
+extern sint *seqlen_array;
+extern sint *seq_weight;
+extern sint debug;
+extern Boolean neg_matrix;
+extern sint mat_avscore;
+extern short blosum30mt[], blosum40mt[], blosum45mt[];
+extern short blosum62mt2[], blosum80mt[];
+extern short pam20mt[], pam60mt[];
+extern short pam120mt[], pam160mt[], pam350mt[];
+extern short gon40mt[], gon80mt[];
+extern short gon120mt[], gon160mt[], gon250mt[], gon350mt[];
+extern short clustalvdnamt[],swgapdnamt[];
+extern short idmat[];
+extern short usermat[];
+extern short userdnamat[];
+extern Boolean user_series;
+extern UserMatSeries matseries;
+
+extern short def_dna_xref[],def_aa_xref[],dna_xref[],aa_xref[];
+extern sint max_aln_length;
+extern Boolean distance_tree;
+extern Boolean dnaflag;
+extern char mtrxname[];
+extern char dnamtrxname[];
+extern char **seq_array;
+extern char *amino_acid_codes;
+extern char *gap_penalty_mask1,*gap_penalty_mask2;
+extern char *sec_struct_mask1,*sec_struct_mask2;
+extern sint struct_penalties1, struct_penalties2;
+extern Boolean use_ss1, use_ss2;
+extern Boolean endgappenalties;
+
+static sint print_ptr,last_print;
+static sint *displ;
+
+static char **alignment;
+static sint *aln_len;
+static sint *aln_weight;
+static char *aln_path1, *aln_path2;
+static sint alignment_len;
+static sint **profile1, **profile2;
+static lint *HH, *DD, *RR, *SS;
+static lint *gS;
+static sint matrix[NUMRES][NUMRES];
+static sint nseqs1, nseqs2;
+static sint prf_length1, prf_length2;
+static sint *gaps;
+static sint gapcoef1,gapcoef2;
+static sint lencoef1,lencoef2;
+static Boolean switch_profiles;
+
+lint prfalign(sint *group, sint *aligned)
+{
+
+ static Boolean found;
+ static Boolean negative;
+ static Boolean error_given=FALSE;
+ static sint i, j, count = 0;
+ static sint NumSeq;
+ static sint len, len1, len2, is, minlen;
+ static sint se1, se2, sb1, sb2;
+ static sint maxres;
+ static sint int_scale;
+ static short *matptr;
+ static short *mat_xref;
+ static char c;
+ static lint score;
+ static float scale;
+ static double logmin,logdiff;
+ static double pcid;
+
+
+ alignment = (char **) ckalloc( nseqs * sizeof (char *) );
+ aln_len = (sint *) ckalloc( nseqs * sizeof (sint) );
+ aln_weight = (sint *) ckalloc( nseqs * sizeof (sint) );
+
+ for (i=0;i<nseqs;i++)
+ if (aligned[i+1] == 0) group[i+1] = 0;
+
+ nseqs1 = nseqs2 = 0;
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 1) nseqs1++;
+ else if (group[i+1] == 2) nseqs2++;
+ }
+
+ if ((nseqs1 == 0) || (nseqs2 == 0)) return(0.0);
+
+ if (nseqs2 > nseqs1)
+ {
+ switch_profiles = TRUE;
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 1) group[i+1] = 2;
+ else if (group[i+1] == 2) group[i+1] = 1;
+ }
+ }
+ else
+ switch_profiles = FALSE;
+
+ int_scale = 100;
+
+/*
+ calculate the mean of the sequence pc identities between the two groups
+*/
+ count = 0;
+ pcid = 0.0;
+ negative=neg_matrix;
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 1)
+ for (j=0;j<nseqs;j++)
+ if (group[j+1] == 2)
+ {
+ count++;
+ pcid += tmat[i+1][j+1];
+ }
+ }
+
+ pcid = pcid/(float)count;
+if (debug > 0) fprintf(stdout,"mean tmat %3.1f\n", pcid);
+
+/* JP */
+/*fprintf(stdout, "%d %c %d %c\n", gap_pos1, amino_acid_codes[gap_pos1], gap_pos2, amino_acid_codes[gap_pos2]);
+for(i=0;i<NUMRES;i++) {
+ fprintf(stdout, "%c ", amino_acid_codes[i]);
+}*/
+
+
+/*
+ Make the first profile.
+*/
+ prf_length1 = 0;
+ for (i=0;i<nseqs;i++)
+ if (group[i+1] == 1)
+ if(seqlen_array[i+1]>prf_length1) prf_length1=seqlen_array[i+1];
+
+ nseqs1 = 0;
+if (debug>0) fprintf(stdout,"sequences profile 1:\n");
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 1)
+ {
+if (debug>0) {
+extern char **names;
+fprintf(stdout,"%s\n",names[i+1]);
+}
+ len = seqlen_array[i+1];
+ alignment[nseqs1] = (char *) ckalloc( (prf_length1+2) * sizeof (char) );
+ /*JP */
+ if(debug>1)fprintf(stdout, "\n");
+ for (j=0;j<len;j++){
+ alignment[nseqs1][j] = seq_array[i+1][j+1];
+ /* JP */
+ /* fprintf(stdout, "%d", alignment[nseqs1][j]); */
+ if(debug>1)fprintf(stdout, "%c", amino_acid_codes[alignment[nseqs1][j]]);
+ /* JP */
+ }
+ for(j=len;j<prf_length1;j++) {
+ alignment[nseqs1][j+1]=gap_pos1;
+ /* JP */
+ if(debug>1)fprintf(stdout, "%c", amino_acid_codes[alignment[nseqs1][j]]);
+ /* JP */
+ }
+ /*JP */
+ if(debug>1)fprintf(stdout, "\n");
+ alignment[nseqs1][prf_length1+1] = ENDALN;
+ aln_len[nseqs1] = prf_length1;
+ aln_weight[nseqs1] = seq_weight[i];
+ nseqs1++;
+ }
+ }
+
+/*
+ Make the second profile.
+*/
+ prf_length2 = 0;
+ for (i=0;i<nseqs;i++)
+ if (group[i+1] == 2)
+ if(seqlen_array[i+1]>prf_length2) prf_length2=seqlen_array[i+1];
+
+ nseqs2 = 0;
+if (debug>0) fprintf(stdout,"sequences profile 2:\n");
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 2)
+ {
+if (debug>0) {
+extern char **names;
+fprintf(stdout,"%s\n",names[i+1]);
+}
+ len = seqlen_array[i+1];
+ alignment[nseqs1+nseqs2] =
+ (char *) ckalloc( (prf_length2+2) * sizeof (char) );
+ for (j=0;j<len;j++)
+ alignment[nseqs1+nseqs2][j] = seq_array[i+1][j+1];
+ for(j=len;j<prf_length2;j++)
+ alignment[nseqs1+nseqs2][j+1]=gap_pos1;
+ alignment[nseqs1+nseqs2][j] = ENDALN;
+ aln_len[nseqs1+nseqs2] = prf_length2;
+ aln_weight[nseqs1+nseqs2] = seq_weight[i];
+ nseqs2++;
+ }
+ }
+
+ max_aln_length = prf_length1 + prf_length2+2;
+
+/*
+ calculate real length of profiles - removing gaps!
+*/
+ len1=0;
+ for (i=0;i<nseqs1;i++)
+ {
+ is=0;
+ for (j=0; j<MIN(aln_len[i],prf_length1); j++)
+ {
+ c = alignment[i][j];
+ if ((c !=gap_pos1) && (c != gap_pos2)) is++;
+ }
+ len1+=is;
+ }
+ len1/=(float)nseqs1;
+
+ len2=0;
+ for (i=nseqs1;i<nseqs2+nseqs1;i++)
+ {
+ is=0;
+ for (j=0; j<MIN(aln_len[i],prf_length2); j++)
+ {
+ c = alignment[i][j];
+ if ((c !=gap_pos1) && (c != gap_pos2)) is++;
+ }
+ len2+=is;
+ }
+ len2/=(float)nseqs2;
+
+ if (dnaflag)
+ {
+ scale=1.0;
+ if (strcmp(dnamtrxname, "iub") == 0)
+ {
+ matptr = swgapdnamt;
+ mat_xref = def_dna_xref;
+ }
+ else if (strcmp(dnamtrxname, "clustalw") == 0)
+ {
+ matptr = clustalvdnamt;
+ mat_xref = def_dna_xref;
+ scale=0.66;
+ }
+ else
+ {
+ matptr = userdnamat;
+ mat_xref = dna_xref;
+ }
+ maxres = get_matrix(matptr, mat_xref, matrix, neg_matrix, int_scale);
+ if (maxres == 0) return((sint)-1);
+/*
+ matrix[0][4]=transition_weight*matrix[0][0];
+ matrix[4][0]=transition_weight*matrix[0][0];
+ matrix[2][11]=transition_weight*matrix[0][0];
+ matrix[11][2]=transition_weight*matrix[0][0];
+ matrix[2][12]=transition_weight*matrix[0][0];
+ matrix[12][2]=transition_weight*matrix[0][0];
+*/
+/* fix suggested by Chanan Rubin at Compugen */
+ matrix[mat_xref[0]][mat_xref[4]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[4]][mat_xref[0]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[2]][mat_xref[11]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[11]][mat_xref[2]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[2]][mat_xref[12]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[12]][mat_xref[2]]=transition_weight*matrix[0][0];
+
+ gapcoef1 = gapcoef2 = 100.0 * gap_open *scale;
+ lencoef1 = lencoef2 = 100.0 * gap_extend *scale;
+ }
+ else
+ {
+ if(len1==0 || len2==0) {
+ logmin=1.0;
+ logdiff=1.0;
+ }
+ else {
+ minlen = MIN(len1,len2);
+ logmin = 1.0/log10((double)minlen);
+ if (len2<len1)
+ logdiff = 1.0+0.5*log10((double)((float)len2/(float)len1));
+ else if (len1<len2)
+ logdiff = 1.0+0.5*log10((double)((float)len1/(float)len2));
+ else logdiff=1.0;
+ if(logdiff<0.9) logdiff=0.9;
+ }
+if(debug>0) fprintf(stdout,"%d %d logmin %f logdiff %f\n",
+(pint)len1,(pint)len2, logmin,logdiff);
+ scale=0.75;
+ if (strcmp(mtrxname, "blosum") == 0)
+ {
+ scale=0.75;
+ if (negative || distance_tree == FALSE) matptr = blosum40mt;
+ else if (pcid > 80.0)
+ {
+ matptr = blosum80mt;
+ }
+ else if (pcid > 60.0)
+ {
+ matptr = blosum62mt2;
+ }
+ else if (pcid > 40.0)
+ {
+ matptr = blosum45mt;
+ }
+ else if (pcid > 30.0)
+ {
+ scale=0.5;
+ matptr = blosum45mt;
+ }
+ else if (pcid > 20.0)
+ {
+ scale=0.6;
+ matptr = blosum45mt;
+ }
+ else
+ {
+ scale=0.6;
+ matptr = blosum30mt;
+ }
+ mat_xref = def_aa_xref;
+
+ }
+ else if (strcmp(mtrxname, "pam") == 0)
+ {
+ scale=0.75;
+ if (negative || distance_tree == FALSE) matptr = pam120mt;
+ else if (pcid > 80.0) matptr = pam20mt;
+ else if (pcid > 60.0) matptr = pam60mt;
+ else if (pcid > 40.0) matptr = pam120mt;
+ else matptr = pam350mt;
+ mat_xref = def_aa_xref;
+ }
+ else if (strcmp(mtrxname, "gonnet") == 0)
+ {
+ scale/=2.0;
+ if (negative || distance_tree == FALSE) matptr = gon250mt;
+ else if (pcid > 35.0)
+ {
+ matptr = gon80mt;
+ scale/=2.0;
+ }
+ else if (pcid > 25.0)
+ {
+ if(minlen<100) matptr = gon250mt;
+ else matptr = gon120mt;
+ }
+ else
+ {
+ if(minlen<100) matptr = gon350mt;
+ else matptr = gon160mt;
+ }
+ mat_xref = def_aa_xref;
+ int_scale /= 10;
+ }
+ else if (strcmp(mtrxname, "id") == 0)
+ {
+ matptr = idmat;
+ mat_xref = def_aa_xref;
+ }
+ else if(user_series)
+ {
+ matptr=NULL;
+ found=FALSE;
+ for(i=0;i<matseries.nmat;i++)
+ if(pcid>=matseries.mat[i].llimit && pcid<=matseries.mat[i].ulimit)
+ {
+ j=i;
+ found=TRUE;
+ break;
+ }
+ if(found==FALSE)
+ {
+ if(!error_given)
+ warning(
+"\nSeries matrix not found for sequence percent identity = %d.\n"
+"(Using first matrix in series as a default.)\n"
+"This alignment may not be optimal!\n"
+"SUGGESTION: Check your matrix series input file and try again.",(int)pcid);
+ error_given=TRUE;
+ j=0;
+ }
+if (debug>0) fprintf(stdout,"pcid %d matrix %d\n",(pint)pcid,(pint)j+1);
+
+ matptr = matseries.mat[j].matptr;
+ mat_xref = matseries.mat[j].aa_xref;
+/* this gives a scale of 0.5 for pcid=llimit and 1.0 for pcid=ulimit */
+ scale=0.5+(pcid-matseries.mat[j].llimit)/((matseries.mat[j].ulimit-matseries.mat[j].llimit)*2.0);
+ }
+ else
+ {
+ matptr = usermat;
+ mat_xref = aa_xref;
+ }
+if(debug>0) fprintf(stdout,"pcid %3.1f scale %3.1f\n",pcid,scale);
+ maxres = get_matrix(matptr, mat_xref, matrix, negative, int_scale);
+ if (maxres == 0)
+ {
+ fprintf(stdout,"Error: matrix %s not found\n", mtrxname);
+ return(-1);
+ }
+
+ if (negative) {
+ gapcoef1 = gapcoef2 = 100.0 * (float)(gap_open);
+ lencoef1 = lencoef2 = 100.0 * gap_extend;
+ }
+ else {
+ if (mat_avscore <= 0)
+ gapcoef1 = gapcoef2 = 100.0 * (float)(gap_open + logmin);
+ else
+ gapcoef1 = gapcoef2 = scale * mat_avscore * (float)(gap_open/(logdiff*logmin));
+ lencoef1 = lencoef2 = 100.0 * gap_extend;
+ }
+ }
+if (debug>0)
+{
+fprintf(stdout,"matavscore %d\n",mat_avscore);
+fprintf(stdout,"Gap Open1 %d Gap Open2 %d Gap Extend1 %d Gap Extend2 %d\n",
+ (pint)gapcoef1,(pint)gapcoef2, (pint)lencoef1,(pint)lencoef2);
+fprintf(stdout,"Matrix %s\n", mtrxname);
+}
+
+ profile1 = (sint **) ckalloc( (prf_length1+2) * sizeof (sint *) );
+ for(i=0; i<prf_length1+2; i++)
+ profile1[i] = (sint *) ckalloc( (LENCOL+2) * sizeof(sint) );
+
+ profile2 = (sint **) ckalloc( (prf_length2+2) * sizeof (sint *) );
+ for(i=0; i<prf_length2+2; i++)
+ profile2[i] = (sint *) ckalloc( (LENCOL+2) * sizeof(sint) );
+
+/*
+ calculate the Gap Coefficients.
+*/
+ gaps = (sint *) ckalloc( (max_aln_length+1) * sizeof (sint) );
+
+ if (switch_profiles == FALSE)
+ calc_gap_coeff(alignment, gaps, profile1, (struct_penalties1 && use_ss1), gap_penalty_mask1,
+ (sint)0, nseqs1, prf_length1, gapcoef1, lencoef1);
+ else
+ calc_gap_coeff(alignment, gaps, profile1, (struct_penalties2 && use_ss2), gap_penalty_mask2,
+ (sint)0, nseqs1, prf_length1, gapcoef1, lencoef1);
+/*
+ calculate the profile matrix.
+*/
+ calc_prf1(profile1, alignment, gaps, matrix,
+ aln_weight, prf_length1, (sint)0, nseqs1);
+
+if (debug>4)
+{
+extern char *amino_acid_codes;
+ for (j=0;j<=max_aa;j++)
+ fprintf(stdout,"%c ", amino_acid_codes[j]);
+ fprintf(stdout,"\n");
+ for (i=0;i<prf_length1;i++)
+ {
+ for (j=0;j<=max_aa;j++)
+ fprintf(stdout,"%d ", (pint)profile1[i+1][j]);
+ fprintf(stdout,"%d ", (pint)profile1[i+1][gap_pos1]);
+ fprintf(stdout,"%d ", (pint)profile1[i+1][gap_pos2]);
+ fprintf(stdout,"%d %d\n",(pint)profile1[i+1][GAPCOL],(pint)profile1[i+1][LENCOL]);
+ }
+}
+
+/*
+ calculate the Gap Coefficients.
+*/
+
+ if (switch_profiles == FALSE)
+ calc_gap_coeff(alignment, gaps, profile2, (struct_penalties2 && use_ss2), gap_penalty_mask2,
+ nseqs1, nseqs1+nseqs2, prf_length2, gapcoef2, lencoef2);
+ else
+ calc_gap_coeff(alignment, gaps, profile2, (struct_penalties1 && use_ss1), gap_penalty_mask1,
+ nseqs1, nseqs1+nseqs2, prf_length2, gapcoef2, lencoef2);
+/*
+ calculate the profile matrix.
+*/
+ calc_prf2(profile2, alignment, aln_weight,
+ prf_length2, nseqs1, nseqs1+nseqs2);
+
+ aln_weight=ckfree((void *)aln_weight);
+
+if (debug>4)
+{
+extern char *amino_acid_codes;
+ for (j=0;j<=max_aa;j++)
+ fprintf(stdout,"%c ", amino_acid_codes[j]);
+ fprintf(stdout,"\n");
+ for (i=0;i<prf_length2;i++)
+ {
+ for (j=0;j<=max_aa;j++)
+ fprintf(stdout,"%d ", (pint)profile2[i+1][j]);
+ fprintf(stdout,"%d ", (pint)profile2[i+1][gap_pos1]);
+ fprintf(stdout,"%d ", (pint)profile2[i+1][gap_pos2]);
+ fprintf(stdout,"%d %d\n",(pint)profile2[i+1][GAPCOL],(pint)profile2[i+1][LENCOL]);
+ }
+}
+
+ aln_path1 = (char *) ckalloc( (max_aln_length+1) * sizeof(char) );
+ aln_path2 = (char *) ckalloc( (max_aln_length+1) * sizeof(char) );
+
+
+/*
+ align the profiles
+*/
+/* use Myers and Miller to align two sequences */
+
+ last_print = 0;
+ print_ptr = 1;
+
+ sb1 = sb2 = 0;
+ se1 = prf_length1;
+ se2 = prf_length2;
+
+ HH = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ DD = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ RR = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ SS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ gS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ displ = (sint *) ckalloc( (max_aln_length+1) * sizeof (sint) );
+
+ score = pdiff(sb1, sb2, se1-sb1, se2-sb2, profile1[0][GAPCOL], profile1[prf_length1][GAPCOL]);
+
+ HH=ckfree((void *)HH);
+ DD=ckfree((void *)DD);
+ RR=ckfree((void *)RR);
+ SS=ckfree((void *)SS);
+ gS=ckfree((void *)gS);
+
+ ptracepath( &alignment_len);
+ /*for(i=0;i<=alignment_len;i++) {
+ fprintf(stdout, "%d\t%d\n", i, displ[i]);
+ }*/
+
+ displ=ckfree((void *)displ);
+
+ add_ggaps();
+
+ for (i=0;i<prf_length1+2;i++)
+ profile1[i]=ckfree((void *)profile1[i]);
+ profile1=ckfree((void *)profile1);
+
+ for (i=0;i<prf_length2+2;i++)
+ profile2[i]=ckfree((void *)profile2[i]);
+ profile2=ckfree((void *)profile2);
+
+ prf_length1 = alignment_len;
+
+ aln_path1=ckfree((void *)aln_path1);
+ aln_path2=ckfree((void *)aln_path2);
+
+ NumSeq = 0;
+ for (j=0;j<nseqs;j++)
+ {
+ if (group[j+1] == 1)
+ {
+ seqlen_array[j+1] = prf_length1;
+ realloc_seq(j+1,prf_length1);
+ for (i=0;i<prf_length1;i++)
+ seq_array[j+1][i+1] = alignment[NumSeq][i];
+ NumSeq++;
+ }
+ }
+ for (j=0;j<nseqs;j++)
+ {
+ if (group[j+1] == 2)
+ {
+ seqlen_array[j+1] = prf_length1;
+ seq_array[j+1] = (char *)realloc(seq_array[j+1], (prf_length1+2) * sizeof (char));
+ realloc_seq(j+1,prf_length1);
+ for (i=0;i<prf_length1;i++)
+ seq_array[j+1][i+1] = alignment[NumSeq][i];
+ NumSeq++;
+ }
+ }
+
+ for (i=0;i<nseqs1+nseqs2;i++)
+ alignment[i]=ckfree((void *)alignment[i]);
+ alignment=ckfree((void *)alignment);
+
+ aln_len=ckfree((void *)aln_len);
+ gaps=ckfree((void *)gaps);
+
+ return(score/100);
+}
+
+static void add_ggaps(void)
+{
+ sint j;
+ sint i,ix;
+ sint len;
+ char *ta;
+
+ ta = (char *) ckalloc( (alignment_len+1) * sizeof (char) );
+
+ for (j=0;j<nseqs1;j++)
+ {
+ ix = 0;
+ for (i=0;i<alignment_len;i++)
+ {
+ if (aln_path1[i] == 2)
+ {
+ if (ix < aln_len[j])
+ ta[i] = alignment[j][ix];
+ else
+ ta[i] = ENDALN;
+ ix++;
+ }
+ else if (aln_path1[i] == 1)
+ {
+/*
+ insertion in first alignment...
+*/
+ ta[i] = gap_pos1;
+ }
+ else
+ {
+ fprintf(stdout,"Error in aln_path\n");
+ }
+ }
+ ta[i] = ENDALN;
+
+ len = alignment_len;
+ alignment[j] = (char *)realloc(alignment[j], (len+2) * sizeof (char));
+ for (i=0;i<len;i++)
+ alignment[j][i] = ta[i];
+ alignment[j][i] = ENDALN;
+ aln_len[j] = len;
+ }
+
+ for (j=nseqs1;j<nseqs1+nseqs2;j++)
+ {
+ ix = 0;
+ for (i=0;i<alignment_len;i++)
+ {
+ if (aln_path2[i] == 2)
+ {
+ if (ix < aln_len[j])
+ ta[i] = alignment[j][ix];
+ else
+ ta[i] = ENDALN;
+ ix++;
+ }
+ else if (aln_path2[i] == 1)
+ {
+/*
+ insertion in second alignment...
+*/
+ ta[i] = gap_pos1;
+ }
+ else
+ {
+ fprintf(stdout,"Error in aln_path\n");
+ }
+ }
+ ta[i] = ENDALN;
+
+ len = alignment_len;
+ alignment[j] = (char *) realloc(alignment[j], (len+2) * sizeof (char) );
+ for (i=0;i<len;i++)
+ alignment[j][i] = ta[i];
+ alignment[j][i] = ENDALN;
+ aln_len[j] = len;
+ }
+
+ ta=ckfree((void *)ta);
+
+ if (struct_penalties1 != NONE)
+ gap_penalty_mask1 = add_ggaps_mask(gap_penalty_mask1,alignment_len,aln_path1,aln_path2);
+ if (struct_penalties1 == SECST)
+ sec_struct_mask1 = add_ggaps_mask(sec_struct_mask1,alignment_len,aln_path1,aln_path2);
+
+ if (struct_penalties2 != NONE)
+ gap_penalty_mask2 = add_ggaps_mask(gap_penalty_mask2,alignment_len,aln_path2,aln_path1);
+ if (struct_penalties2 == SECST)
+ sec_struct_mask2 = add_ggaps_mask(sec_struct_mask2,alignment_len,aln_path2,aln_path1);
+
+if (debug>0)
+{
+ char c;
+ extern char *amino_acid_codes;
+
+ for (i=0;i<nseqs1+nseqs2;i++)
+ {
+ for (j=0;j<alignment_len;j++)
+ {
+ if (alignment[i][j] == ENDALN) break;
+ else if ((alignment[i][j] == gap_pos1) || (alignment[i][j] == gap_pos2)) c = '-';
+ else c = amino_acid_codes[alignment[i][j]];
+ fprintf(stdout,"%c", c);
+ }
+ fprintf(stdout,"\n\n");
+ }
+}
+
+}
+
+static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2)
+{
+ int i,ix;
+ char *ta;
+
+ ta = (char *) ckalloc( (len+1) * sizeof (char) );
+
+ ix = 0;
+ if (switch_profiles == FALSE)
+ {
+ for (i=0;i<len;i++)
+ {
+ if (path1[i] == 2)
+ {
+ ta[i] = mask[ix];
+ ix++;
+ }
+ else if (path1[i] == 1)
+ ta[i] = gap_pos1;
+ }
+ }
+ else
+ {
+ for (i=0;i<len;i++)
+ {
+ if (path2[i] == 2)
+ {
+ ta[i] = mask[ix];
+ ix++;
+ }
+ else if (path2[i] == 1)
+ ta[i] = gap_pos1;
+ }
+ }
+ mask = (char *)realloc(mask,(len+2) * sizeof (char));
+ for (i=0;i<len;i++)
+ mask[i] = ta[i];
+ mask[i] ='\0';
+
+ ta=ckfree((void *)ta);
+
+ return(mask);
+}
+
+static lint prfscore(sint n, sint m)
+{
+ sint ix;
+ lint score;
+
+ score = 0.0;
+ for (ix=0; ix<=max_aa; ix++)
+ {
+ score += (profile1[n][ix] * profile2[m][ix]);
+ }
+ score += (profile1[n][gap_pos1] * profile2[m][gap_pos1]);
+ score += (profile1[n][gap_pos2] * profile2[m][gap_pos2]);
+ return(score/10);
+
+}
+
+static void ptracepath(sint *alen)
+{
+ sint i,j,k,pos,to_do;
+
+ pos = 0;
+
+ to_do=print_ptr-1;
+
+ for(i=1;i<=to_do;++i) {
+if (debug>1) fprintf(stdout,"%d ",(pint)displ[i]);
+ if(displ[i]==0) {
+ aln_path1[pos]=2;
+ aln_path2[pos]=2;
+ ++pos;
+ }
+ else {
+ if((k=displ[i])>0) {
+ for(j=0;j<=k-1;++j) {
+ aln_path2[pos+j]=2;
+ aln_path1[pos+j]=1;
+ }
+ pos += k;
+ }
+ else {
+ k = (displ[i]<0) ? displ[i] * -1 : displ[i];
+ for(j=0;j<=k-1;++j) {
+ aln_path1[pos+j]=2;
+ aln_path2[pos+j]=1;
+ }
+ pos += k;
+ }
+ }
+ }
+if (debug>1) fprintf(stdout,"\n");
+
+ (*alen) = pos;
+
+}
+
+static void pdel(sint k)
+{
+ if(last_print<0)
+ last_print = displ[print_ptr-1] -= k;
+ else
+ last_print = displ[print_ptr++] = -(k);
+}
+
+static void padd(sint k)
+{
+
+ if(last_print<0) {
+ displ[print_ptr-1] = k;
+ displ[print_ptr++] = last_print;
+ }
+ else
+ last_print = displ[print_ptr++] = k;
+}
+
+static void palign(void)
+{
+ displ[print_ptr++] = last_print = 0;
+}
+
+
+static lint pdiff(sint A,sint B,sint M,sint N,sint go1, sint go2)
+{
+ sint midi,midj,type;
+ lint midh;
+
+ static lint t, tl, g, h;
+
+{ static sint i,j;
+ static lint hh, f, e, s;
+
+/* Boundary cases: M <= 1 or N == 0 */
+if (debug>2) fprintf(stdout,"A %d B %d M %d N %d midi %d go1 %d go2 %d\n",
+(pint)A,(pint)B,(pint)M,(pint)N,(pint)M/2,(pint)go1,(pint)go2);
+
+/* if sequence B is empty.... */
+
+ if(N<=0) {
+
+/* if sequence A is not empty.... */
+
+ if(M>0) {
+
+/* delete residues A[1] to A[M] */
+
+ pdel(M);
+ }
+ return(-gap_penalty1(A,B,M));
+ }
+
+/* if sequence A is empty.... */
+
+ if(M<=1) {
+ if(M<=0) {
+
+/* insert residues B[1] to B[N] */
+
+ padd(N);
+ return(-gap_penalty2(A,B,N));
+ }
+
+/* if sequence A has just one residue.... */
+
+ if (go1 == 0)
+ midh = -gap_penalty1(A+1,B+1,N);
+ else
+ midh = -gap_penalty2(A+1,B,1)-gap_penalty1(A+1,B+1,N);
+ midj = 0;
+ for(j=1;j<=N;j++) {
+ hh = -gap_penalty1(A,B+1,j-1) + prfscore(A+1,B+j)
+ -gap_penalty1(A+1,B+j+1,N-j);
+ if(hh>midh) {
+ midh = hh;
+ midj = j;
+ }
+ }
+
+ if(midj==0) {
+ padd(N);
+ pdel(1);
+ }
+ else {
+ if(midj>1) padd(midj-1);
+ palign();
+ if(midj<N) padd(N-midj);
+ }
+ return midh;
+ }
+
+
+/* Divide sequence A in half: midi */
+
+ midi = M / 2;
+
+/* In a forward phase, calculate all HH[j] and HH[j] */
+
+ HH[0] = 0.0;
+ t = -open_penalty1(A,B+1);
+ tl = -ext_penalty1(A,B+1);
+ for(j=1;j<=N;j++) {
+ HH[j] = t = t+tl;
+ DD[j] = t-open_penalty2(A+1,B+j);
+ }
+
+ if (go1 == 0) t = 0;
+ else t = -open_penalty2(A+1,B);
+ tl = -ext_penalty2(A+1,B);
+ for(i=1;i<=midi;i++) {
+ s = HH[0];
+ HH[0] = hh = t = t+tl;
+ f = t-open_penalty1(A+i,B+1);
+
+ for(j=1;j<=N;j++) {
+ g = open_penalty1(A+i,B+j);
+ h = ext_penalty1(A+i,B+j);
+ if ((hh=hh-g-h) > (f=f-h)) f=hh;
+ g = open_penalty2(A+i,B+j);
+ h = ext_penalty2(A+i,B+j);
+ if ((hh=HH[j]-g-h) > (e=DD[j]-h)) e=hh;
+ hh = s + prfscore(A+i, B+j);
+ if (f>hh) hh = f;
+ if (e>hh) hh = e;
+
+ s = HH[j];
+ HH[j] = hh;
+ DD[j] = e;
+
+ }
+ }
+
+ DD[0]=HH[0];
+
+/* In a reverse phase, calculate all RR[j] and SS[j] */
+
+ RR[N]=0.0;
+ tl = 0.0;
+ for(j=N-1;j>=0;j--) {
+ g = -open_penalty1(A+M,B+j+1);
+ tl -= ext_penalty1(A+M,B+j+1);
+ RR[j] = g+tl;
+ SS[j] = RR[j]-open_penalty2(A+M,B+j);
+ gS[j] = open_penalty2(A+M,B+j);
+ }
+
+ tl = 0.0;
+ for(i=M-1;i>=midi;i--) {
+ s = RR[N];
+ if (go2 == 0) g = 0;
+ else g = -open_penalty2(A+i+1,B+N);
+ tl -= ext_penalty2(A+i+1,B+N);
+ RR[N] = hh = g+tl;
+ t = open_penalty1(A+i,B+N);
+ f = RR[N]-t;
+
+ for(j=N-1;j>=0;j--) {
+ g = open_penalty1(A+i,B+j+1);
+ h = ext_penalty1(A+i,B+j+1);
+ if ((hh=hh-g-h) > (f=f-h-g+t)) f=hh;
+ t = g;
+ g = open_penalty2(A+i+1,B+j);
+ h = ext_penalty2(A+i+1,B+j);
+ hh=RR[j]-g-h;
+ if (i==(M-1)) {
+ e=SS[j]-h;
+ }
+ else {
+ e=SS[j]-h-g+open_penalty2(A+i+2,B+j);
+ gS[j] = g;
+ }
+ if (hh > e) e=hh;
+ hh = s + prfscore(A+i+1, B+j+1);
+ if (f>hh) hh = f;
+ if (e>hh) hh = e;
+
+ s = RR[j];
+ RR[j] = hh;
+ SS[j] = e;
+
+ }
+ }
+ SS[N]=RR[N];
+ gS[N] = open_penalty2(A+midi+1,B+N);
+
+/* find midj, such that HH[j]+RR[j] or DD[j]+SS[j]+gap is the maximum */
+
+ midh=HH[0]+RR[0];
+ midj=0;
+ type=1;
+ for(j=0;j<=N;j++) {
+ hh = HH[j] + RR[j];
+ if(hh>=midh)
+ if(hh>midh || (HH[j]!=DD[j] && RR[j]==SS[j])) {
+ midh=hh;
+ midj=j;
+ }
+ }
+
+ for(j=N;j>=0;j--) {
+ hh = DD[j] + SS[j] + gS[j];
+ if(hh>midh) {
+ midh=hh;
+ midj=j;
+ type=2;
+ }
+ }
+}
+
+/* Conquer recursively around midpoint */
+
+
+ if(type==1) { /* Type 1 gaps */
+if (debug>2) fprintf(stdout,"Type 1,1: midj %d\n",(pint)midj);
+ pdiff(A,B,midi,midj,go1,1);
+if (debug>2) fprintf(stdout,"Type 1,2: midj %d\n",(pint)midj);
+ pdiff(A+midi,B+midj,M-midi,N-midj,1,go2);
+ }
+ else {
+if (debug>2) fprintf(stdout,"Type 2,1: midj %d\n",(pint)midj);
+ pdiff(A,B,midi-1,midj,go1, 0);
+ pdel(2);
+if (debug>2) fprintf(stdout,"Type 2,2: midj %d\n",(pint)midj);
+ pdiff(A+midi+1,B+midj,M-midi-1,N-midj,0,go2);
+ }
+
+ return midh; /* Return the score of the best alignment */
+}
+
+/* calculate the score for opening a gap at residues A[i] and B[j] */
+
+static sint open_penalty1(sint i, sint j)
+{
+ sint g;
+
+ if (!endgappenalties &&(i==0 || i==prf_length1)) return(0);
+
+ g = profile2[j][GAPCOL] + profile1[i][GAPCOL];
+ return(g);
+}
+
+/* calculate the score for extending an existing gap at A[i] and B[j] */
+
+static sint ext_penalty1(sint i, sint j)
+{
+ sint h;
+
+ if (!endgappenalties &&(i==0 || i==prf_length1)) return(0);
+
+ h = profile2[j][LENCOL];
+ return(h);
+}
+
+/* calculate the score for a gap of length k, at residues A[i] and B[j] */
+
+static sint gap_penalty1(sint i, sint j, sint k)
+{
+ sint ix;
+ sint gp;
+ sint g, h = 0;
+
+ if (k <= 0) return(0);
+ if (!endgappenalties &&(i==0 || i==prf_length1)) return(0);
+
+ g = profile2[j][GAPCOL] + profile1[i][GAPCOL];
+ for (ix=0;ix<k && ix+j<prf_length2;ix++)
+ h = profile2[ix+j][LENCOL];
+
+ gp = g + h * k;
+ return(gp);
+}
+/* calculate the score for opening a gap at residues A[i] and B[j] */
+
+static sint open_penalty2(sint i, sint j)
+{
+ sint g;
+
+ if (!endgappenalties &&(j==0 || j==prf_length2)) return(0);
+
+ g = profile1[i][GAPCOL] + profile2[j][GAPCOL];
+ return(g);
+}
+
+/* calculate the score for extending an existing gap at A[i] and B[j] */
+
+static sint ext_penalty2(sint i, sint j)
+{
+ sint h;
+
+ if (!endgappenalties &&(j==0 || j==prf_length2)) return(0);
+
+ h = profile1[i][LENCOL];
+ return(h);
+}
+
+/* calculate the score for a gap of length k, at residues A[i] and B[j] */
+
+static sint gap_penalty2(sint i, sint j, sint k)
+{
+ sint ix;
+ sint gp;
+ sint g, h = 0;
+
+ if (k <= 0) return(0);
+ if (!endgappenalties &&(j==0 || j==prf_length2)) return(0);
+
+ g = profile1[i][GAPCOL] + profile2[j][GAPCOL];
+ for (ix=0;ix<k && ix+i<prf_length1;ix++)
+ h = profile1[ix+i][LENCOL];
+
+ gp = g + h * k;
+ return(gp);
+}
diff --git a/prfalign1.c b/prfalign1.c
new file mode 100644
index 0000000..450bb94
--- /dev/null
+++ b/prfalign1.c
@@ -0,0 +1,1349 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "pcma.h"
+/*#include "new.h" JP */
+#define ENDALN 127
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+/*
+ * Prototypes
+ */
+static lint pdiff(sint A,sint B,sint i,sint j,sint go1,sint go2);
+static lint prfscore(sint n, sint m);
+static sint gap_penalty1(sint i, sint j,sint k);
+static sint open_penalty1(sint i, sint j);
+static sint ext_penalty1(sint i, sint j);
+static sint gap_penalty2(sint i, sint j,sint k);
+static sint open_penalty2(sint i, sint j);
+static sint ext_penalty2(sint i, sint j);
+static void padd(sint k);
+static void pdel(sint k);
+static void palign(void);
+static void ptracepath(sint *alen);
+static void add_ggaps(void);
+static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2);
+
+/*
+ * Global variables
+ */
+extern double **tmat;
+extern float gap_open, gap_extend;
+extern float transition_weight;
+extern sint gap_pos1, gap_pos2;
+extern sint max_aa;
+extern sint nseqs;
+extern sint *seqlen_array;
+extern sint *seq_weight;
+extern sint debug;
+extern Boolean neg_matrix;
+extern sint mat_avscore;
+extern short blosum30mt[], blosum40mt[], blosum45mt[];
+extern short blosum62mt2[], blosum80mt[];
+extern short pam20mt[], pam60mt[];
+extern short pam120mt[], pam160mt[], pam350mt[];
+extern short gon40mt[], gon80mt[];
+extern short gon120mt[], gon160mt[], gon250mt[], gon350mt[];
+extern short clustalvdnamt[],swgapdnamt[];
+extern short idmat[];
+extern short usermat[];
+extern short userdnamat[];
+extern Boolean user_series;
+extern UserMatSeries matseries;
+
+extern short def_dna_xref[],def_aa_xref[],dna_xref[],aa_xref[];
+extern sint max_aln_length;
+extern Boolean distance_tree;
+extern Boolean dnaflag;
+extern char mtrxname[];
+extern char dnamtrxname[];
+extern char **seq_array;
+extern char *amino_acid_codes;
+extern char *gap_penalty_mask1,*gap_penalty_mask2;
+extern char *sec_struct_mask1,*sec_struct_mask2;
+extern sint struct_penalties1, struct_penalties2;
+extern Boolean use_ss1, use_ss2;
+extern Boolean endgappenalties;
+
+/* JP */
+extern SN ****glib;
+extern streeptr *groupptr;
+extern char *am;
+
+static sint print_ptr,last_print;
+static sint *displ;
+
+static char **alignment;
+static sint *aln_len;
+static sint *aln_weight;
+static char *aln_path1, *aln_path2;
+static sint alignment_len;
+static sint **profile1, **profile2;
+static lint *HH, *DD, *RR, *SS;
+static lint *gS;
+static sint matrix[NUMRES][NUMRES];
+static sint nseqs1, nseqs2;
+static sint prf_length1, prf_length2;
+static sint *gaps;
+static sint gapcoef1,gapcoef2;
+static sint lencoef1,lencoef2;
+static Boolean switch_profiles;
+
+lint prfalign1(sint *group, sint gi, sint gj)
+{
+
+ static Boolean found;
+ static Boolean negative;
+ static Boolean error_given=FALSE;
+ static sint i, j, count = 0;
+ static sint NumSeq;
+ static sint len, len1, len2, is, minlen;
+ static sint se1, se2, sb1, sb2;
+ static sint maxres;
+ static sint int_scale;
+ static short *matptr;
+ static short *mat_xref;
+ static char c;
+ static lint score;
+ static float scale;
+ static double logmin,logdiff;
+ static double pcid;
+
+ /* JP */
+ static sint k, n;
+ static sint first, second;
+ static sint tmpfirst, tmpsecond, nblocks;
+ static sint tpairs, idpairs;
+ static sint *S;
+ static SN **snd;
+ static sint ngaps, nonegaps;
+
+
+ alignment = (char **) ckalloc( nseqs * sizeof (char *) );
+ aln_len = (sint *) ckalloc( nseqs * sizeof (sint) );
+ aln_weight = (sint *) ckalloc( nseqs * sizeof (sint) );
+
+ /*for (i=0;i<nseqs;i++)
+ if (aligned[i+1] == 0) group[i+1] = 0;
+ */
+
+ nseqs1 = nseqs2 = 0;
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 1) nseqs1++;
+ else if (group[i+1] == 2) nseqs2++;
+ }
+
+if(debug>1) {
+ for(i=0;i<nseqs;i++)
+ {
+ fprintf(stdout, "%d ", group[i+1]);
+ }
+ fprintf(stdout, "\n");
+}
+
+ if ((nseqs1 == 0) || (nseqs2 == 0)) return(0.0);
+
+ /*if (nseqs2 > nseqs1)
+ {
+ switch_profiles = TRUE;
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 1) group[i+1] = 2;
+ else if (group[i+1] == 2) group[i+1] = 1;
+ }
+ gi = k; gi = gj; gj = k;
+ }
+ else
+ switch_profiles = FALSE;
+ */
+ switch_profiles = FALSE;
+
+
+ int_scale = 100;
+
+/*
+ calculate the mean of the sequence pc identities between the two groups
+*/
+ count = 0;
+ pcid = 0.0;
+ negative=neg_matrix;
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 1)
+ for (j=0;j<nseqs;j++)
+ if (group[j+1] == 2)
+ {
+ count++;
+ pcid += tmat[i+1][j+1];
+ }
+ }
+
+ pcid = pcid/(float)count;
+if (debug > 3) fprintf(stdout,"mean tmat %3.1f\n", pcid);
+
+/* JP */
+/*fprintf(stdout, "%d %c %d %c\n", gap_pos1, amino_acid_codes[gap_pos1], gap_pos2, amino_acid_codes[gap_pos2]);
+for(i=0;i<NUMRES;i++) {
+ fprintf(stdout, "%c ", amino_acid_codes[i]);
+}*/
+
+
+/*
+ Make the first profile.
+*/
+ prf_length1 = 0;
+ for (i=0;i<nseqs;i++)
+ if (group[i+1] == 1)
+ if(seqlen_array[i+1]>prf_length1) prf_length1=seqlen_array[i+1];
+
+ nseqs1 = 0;
+if (debug>3) fprintf(stdout,"sequences profile 1:\n");
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 1)
+ {
+if (debug>3) {
+extern char **names;
+fprintf(stdout,"%s\n",names[i+1]);
+}
+ len = seqlen_array[i+1];
+ alignment[nseqs1] = (char *) ckalloc( (prf_length1+2) * sizeof (char) );
+ /*JP */
+ if(debug>3)fprintf(stdout, "\n");
+ for (j=0;j<len;j++){
+ alignment[nseqs1][j] = seq_array[i+1][j+1];
+ /* JP */
+ /* fprintf(stdout, "%d", alignment[nseqs1][j]); */
+ if(debug>3)fprintf(stdout, "%c", amino_acid_codes[alignment[nseqs1][j]]);
+ /* JP */
+ }
+ for(j=len;j<prf_length1;j++) {
+ alignment[nseqs1][j+1]=gap_pos1;
+ /* JP */
+ if(debug>3)fprintf(stdout, "%c", amino_acid_codes[alignment[nseqs1][j]]);
+ /* JP */
+ }
+ /*JP */
+ if(debug>3)fprintf(stdout, "\n");
+ alignment[nseqs1][prf_length1+1] = ENDALN;
+ aln_len[nseqs1] = prf_length1;
+ aln_weight[nseqs1] = seq_weight[i];
+ nseqs1++;
+ }
+ }
+
+/*
+ Make the second profile.
+*/
+ prf_length2 = 0;
+ for (i=0;i<nseqs;i++)
+ if (group[i+1] == 2)
+ if(seqlen_array[i+1]>prf_length2) prf_length2=seqlen_array[i+1];
+
+ nseqs2 = 0;
+if (debug>3) fprintf(stdout,"sequences profile 2:\n");
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 2)
+ {
+if (debug>3) {
+extern char **names;
+fprintf(stdout,"%s\n",names[i+1]);
+}
+ len = seqlen_array[i+1];
+ alignment[nseqs1+nseqs2] =
+ (char *) ckalloc( (prf_length2+2) * sizeof (char) );
+ for (j=0;j<len;j++)
+ alignment[nseqs1+nseqs2][j] = seq_array[i+1][j+1];
+ for(j=len;j<prf_length2;j++)
+ alignment[nseqs1+nseqs2][j+1]=gap_pos1;
+ alignment[nseqs1+nseqs2][j] = ENDALN;
+ aln_len[nseqs1+nseqs2] = prf_length2;
+ aln_weight[nseqs1+nseqs2] = seq_weight[i];
+ nseqs2++;
+ }
+ }
+
+ max_aln_length = prf_length1 + prf_length2+2;
+
+/*
+ calculate real length of profiles - removing gaps!
+*/
+ len1=0;
+ for (i=0;i<nseqs1;i++)
+ {
+ is=0;
+ for (j=0; j<MIN(aln_len[i],prf_length1); j++)
+ {
+ c = alignment[i][j];
+ if ((c !=gap_pos1) && (c != gap_pos2)) is++;
+ }
+ len1+=is;
+ }
+ len1/=(float)nseqs1;
+
+ len2=0;
+ for (i=nseqs1;i<nseqs2+nseqs1;i++)
+ {
+ is=0;
+ for (j=0; j<MIN(aln_len[i],prf_length2); j++)
+ {
+ c = alignment[i][j];
+ if ((c !=gap_pos1) && (c != gap_pos2)) is++;
+ }
+ len2+=is;
+ }
+ len2/=(float)nseqs2;
+
+ if (dnaflag)
+ {
+ scale=1.0;
+ if (strcmp(dnamtrxname, "iub") == 0)
+ {
+ matptr = swgapdnamt;
+ mat_xref = def_dna_xref;
+ }
+ else if (strcmp(dnamtrxname, "clustalw") == 0)
+ {
+ matptr = clustalvdnamt;
+ mat_xref = def_dna_xref;
+ scale=0.66;
+ }
+ else
+ {
+ matptr = userdnamat;
+ mat_xref = dna_xref;
+ }
+ maxres = get_matrix(matptr, mat_xref, matrix, neg_matrix, int_scale);
+ if (maxres == 0) return((sint)-1);
+/*
+ matrix[0][4]=transition_weight*matrix[0][0];
+ matrix[4][0]=transition_weight*matrix[0][0];
+ matrix[2][11]=transition_weight*matrix[0][0];
+ matrix[11][2]=transition_weight*matrix[0][0];
+ matrix[2][12]=transition_weight*matrix[0][0];
+ matrix[12][2]=transition_weight*matrix[0][0];
+*/
+/* fix suggested by Chanan Rubin at Compugen */
+ matrix[mat_xref[0]][mat_xref[4]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[4]][mat_xref[0]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[2]][mat_xref[11]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[11]][mat_xref[2]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[2]][mat_xref[12]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[12]][mat_xref[2]]=transition_weight*matrix[0][0];
+
+ gapcoef1 = gapcoef2 = 100.0 * gap_open *scale;
+ lencoef1 = lencoef2 = 100.0 * gap_extend *scale;
+ }
+ else
+ {
+ if(len1==0 || len2==0) {
+ logmin=1.0;
+ logdiff=1.0;
+ }
+ else {
+ minlen = MIN(len1,len2);
+ logmin = 1.0/log10((double)minlen);
+ if (len2<len1)
+ logdiff = 1.0+0.5*log10((double)((float)len2/(float)len1));
+ else if (len1<len2)
+ logdiff = 1.0+0.5*log10((double)((float)len1/(float)len2));
+ else logdiff=1.0;
+ if(logdiff<0.9) logdiff=0.9;
+ }
+if(debug>3) fprintf(stdout,"%d %d logmin %f logdiff %f\n",
+(pint)len1,(pint)len2, logmin,logdiff);
+ scale=0.75;
+ if (strcmp(mtrxname, "blosum") == 0)
+ {
+ scale=0.75;
+ if (negative || distance_tree == FALSE) matptr = blosum40mt;
+ else if (pcid > 80.0)
+ {
+ matptr = blosum80mt;
+ }
+ else if (pcid > 60.0)
+ {
+ matptr = blosum62mt2;
+ }
+ else if (pcid > 40.0)
+ {
+ matptr = blosum45mt;
+ }
+ else if (pcid > 30.0)
+ {
+ scale=0.5;
+ matptr = blosum45mt;
+ }
+ else if (pcid > 20.0)
+ {
+ scale=0.6;
+ matptr = blosum45mt;
+ }
+ else
+ {
+ scale=0.6;
+ matptr = blosum30mt;
+ }
+ mat_xref = def_aa_xref;
+
+ }
+ else if (strcmp(mtrxname, "pam") == 0)
+ {
+ scale=0.75;
+ if (negative || distance_tree == FALSE) matptr = pam120mt;
+ else if (pcid > 80.0) matptr = pam20mt;
+ else if (pcid > 60.0) matptr = pam60mt;
+ else if (pcid > 40.0) matptr = pam120mt;
+ else matptr = pam350mt;
+ mat_xref = def_aa_xref;
+ }
+ else if (strcmp(mtrxname, "gonnet") == 0)
+ {
+ scale/=2.0;
+ if (negative || distance_tree == FALSE) matptr = gon250mt;
+ else if (pcid > 35.0)
+ {
+ matptr = gon80mt;
+ scale/=2.0;
+ }
+ else if (pcid > 25.0)
+ {
+ if(minlen<100) matptr = gon250mt;
+ else matptr = gon120mt;
+ }
+ else
+ {
+ if(minlen<100) matptr = gon350mt;
+ else matptr = gon160mt;
+ }
+ mat_xref = def_aa_xref;
+ int_scale /= 10;
+ }
+ else if (strcmp(mtrxname, "id") == 0)
+ {
+ matptr = idmat;
+ mat_xref = def_aa_xref;
+ }
+ else if(user_series)
+ {
+ matptr=NULL;
+ found=FALSE;
+ for(i=0;i<matseries.nmat;i++)
+ if(pcid>=matseries.mat[i].llimit && pcid<=matseries.mat[i].ulimit)
+ {
+ j=i;
+ found=TRUE;
+ break;
+ }
+ if(found==FALSE)
+ {
+ if(!error_given)
+ warning(
+"\nSeries matrix not found for sequence percent identity = %d.\n"
+"(Using first matrix in series as a default.)\n"
+"This alignment may not be optimal!\n"
+"SUGGESTION: Check your matrix series input file and try again.",(int)pcid);
+ error_given=TRUE;
+ j=0;
+ }
+if (debug>3) fprintf(stdout,"pcid %d matrix %d\n",(pint)pcid,(pint)j+1);
+
+ matptr = matseries.mat[j].matptr;
+ mat_xref = matseries.mat[j].aa_xref;
+/* this gives a scale of 0.5 for pcid=llimit and 1.0 for pcid=ulimit */
+ scale=0.5+(pcid-matseries.mat[j].llimit)/((matseries.mat[j].ulimit-matseries.mat[j].llimit)*2.0);
+ }
+ else
+ {
+ matptr = usermat;
+ mat_xref = aa_xref;
+ }
+if(debug>3) fprintf(stdout,"pcid %3.1f scale %3.1f\n",pcid,scale);
+ maxres = get_matrix(matptr, mat_xref, matrix, negative, int_scale);
+ if (maxres == 0)
+ {
+ fprintf(stdout,"Error: matrix %s not found\n", mtrxname);
+ return(-1);
+ }
+
+ if (negative) {
+ gapcoef1 = gapcoef2 = 100.0 * (float)(gap_open);
+ lencoef1 = lencoef2 = 100.0 * gap_extend;
+ }
+ else {
+ if (mat_avscore <= 0)
+ gapcoef1 = gapcoef2 = 100.0 * (float)(gap_open + logmin);
+ else
+ gapcoef1 = gapcoef2 = scale * mat_avscore * (float)(gap_open/(logdiff*logmin));
+ lencoef1 = lencoef2 = 100.0 * gap_extend;
+ }
+ }
+if (debug>3)
+{
+fprintf(stdout,"matavscore %d\n",mat_avscore);
+fprintf(stdout,"Gap Open1 %d Gap Open2 %d Gap Extend1 %d Gap Extend2 %d\n",
+ (pint)gapcoef1,(pint)gapcoef2, (pint)lencoef1,(pint)lencoef2);
+fprintf(stdout,"Matrix %s\n", mtrxname);
+}
+
+ profile1 = (sint **) ckalloc( (prf_length1+2) * sizeof (sint *) );
+ for(i=0; i<prf_length1+2; i++)
+ profile1[i] = (sint *) ckalloc( (LENCOL+2) * sizeof(sint) );
+
+ profile2 = (sint **) ckalloc( (prf_length2+2) * sizeof (sint *) );
+ for(i=0; i<prf_length2+2; i++)
+ profile2[i] = (sint *) ckalloc( (LENCOL+2) * sizeof(sint) );
+
+/*
+ calculate the Gap Coefficients.
+*/
+ gaps = (sint *) ckalloc( (max_aln_length+1) * sizeof (sint) );
+
+ if (switch_profiles == FALSE)
+ calc_gap_coeff(alignment, gaps, profile1, (struct_penalties1 && use_ss1), gap_penalty_mask1,
+ (sint)0, nseqs1, prf_length1, gapcoef1, lencoef1);
+ else
+ calc_gap_coeff(alignment, gaps, profile1, (struct_penalties2 && use_ss2), gap_penalty_mask2,
+ (sint)0, nseqs1, prf_length1, gapcoef1, lencoef1);
+/*
+ calculate the profile matrix.
+*/
+ calc_prf1(profile1, alignment, gaps, matrix,
+ aln_weight, prf_length1, (sint)0, nseqs1);
+
+if (debug>4)
+{
+extern char *amino_acid_codes;
+ for (j=0;j<=max_aa;j++)
+ fprintf(stdout,"%c ", amino_acid_codes[j]);
+ fprintf(stdout,"\n");
+ for (i=0;i<prf_length1;i++)
+ {
+ for (j=0;j<=max_aa;j++)
+ fprintf(stdout,"%d ", (pint)profile1[i+1][j]);
+ fprintf(stdout,"%d ", (pint)profile1[i+1][gap_pos1]);
+ fprintf(stdout,"%d ", (pint)profile1[i+1][gap_pos2]);
+ fprintf(stdout,"%d %d\n",(pint)profile1[i+1][GAPCOL],(pint)profile1[i+1][LENCOL]);
+ }
+}
+
+/*
+ calculate the Gap Coefficients.
+*/
+
+ if (switch_profiles == FALSE)
+ calc_gap_coeff(alignment, gaps, profile2, (struct_penalties2 && use_ss2), gap_penalty_mask2,
+ nseqs1, nseqs1+nseqs2, prf_length2, gapcoef2, lencoef2);
+ else
+ calc_gap_coeff(alignment, gaps, profile2, (struct_penalties1 && use_ss1), gap_penalty_mask1,
+ nseqs1, nseqs1+nseqs2, prf_length2, gapcoef2, lencoef2);
+/*
+ calculate the profile matrix.
+*/
+ calc_prf2(profile2, alignment, aln_weight,
+ prf_length2, nseqs1, nseqs1+nseqs2);
+
+ aln_weight=ckfree((void *)aln_weight);
+
+if (debug>4)
+{
+extern char *amino_acid_codes;
+ for (j=0;j<=max_aa;j++)
+ fprintf(stdout,"%c ", amino_acid_codes[j]);
+ fprintf(stdout,"\n");
+ for (i=0;i<prf_length2;i++)
+ {
+ for (j=0;j<=max_aa;j++)
+ fprintf(stdout,"%d ", (pint)profile2[i+1][j]);
+ fprintf(stdout,"%d ", (pint)profile2[i+1][gap_pos1]);
+ fprintf(stdout,"%d ", (pint)profile2[i+1][gap_pos2]);
+ fprintf(stdout,"%d %d\n",(pint)profile2[i+1][GAPCOL],(pint)profile2[i+1][LENCOL]);
+ }
+}
+
+ aln_path1 = (char *) ckalloc( (max_aln_length+1) * sizeof(char) );
+ aln_path2 = (char *) ckalloc( (max_aln_length+1) * sizeof(char) );
+
+
+/*
+ align the profiles
+*/
+/* use Myers and Miller to align two sequences */
+
+ last_print = 0;
+ print_ptr = 1;
+
+ sb1 = sb2 = 0;
+ se1 = prf_length1;
+ se2 = prf_length2;
+
+ HH = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ DD = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ RR = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ SS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ gS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ displ = (sint *) ckalloc( (max_aln_length+1) * sizeof (sint) );
+
+ score = pdiff(sb1, sb2, se1-sb1, se2-sb2, profile1[0][GAPCOL], profile1[prf_length1][GAPCOL]);
+
+ HH=ckfree((void *)HH);
+ DD=ckfree((void *)DD);
+ RR=ckfree((void *)RR);
+ SS=ckfree((void *)SS);
+ gS=ckfree((void *)gS);
+
+ ptracepath( &alignment_len);
+
+ /*for(i=0;i<alignment_len;i++) {
+ fprintf(stdout, "%d ", aln_path1[i]);
+ }fprintf(stdout, "\n");
+ for(i=0;i<alignment_len;i++) {
+ fprintf(stdout, "%d ", aln_path2[i]);
+ }fprintf(stdout, "\n"); fflush(stdout);
+ */
+
+ /* JP: calculate the the fraction of gapped positions */
+ ngaps=0, nonegaps = 0;
+ for(i=0;i<alignment_len;i++) {
+ if(aln_path1[i]==1) {ngaps++; continue;}
+ if(aln_path2[i]==1) {ngaps++; continue;}
+ nonegaps++;
+ }
+ if(debug>1) fprintf(stdout, "gaps: %d nonegaps: %d\n", ngaps, nonegaps );
+
+ /* JP: calculate the average sequence identity between the two blocks */
+ tpairs = 0; idpairs = 0;
+ first = 1; second = 1;
+ if(debug>1)fprintf(stdout, "seqlengths %d %d \n", groupptr[gi]->seqlength, groupptr[gj]->seqlength);
+ for (i=0;i<alignment_len;i++) {
+
+ /*fprintf(stdout, "%d %d \n", first, second); fflush(stdout);*/
+
+ if(aln_path1[i]==1) { second++; continue; }
+ if(aln_path2[i]==1) { first++; continue; }
+
+ for(j=1;j<=groupptr[gi]->seqnum;j++) {
+ for(k=1;k<=groupptr[gj]->seqnum;k++) {
+
+ if(groupptr[gi]->seq[j][first] && groupptr[gj]->seq[k][second]) {
+ tpairs++;
+ if(am[groupptr[gi]->seq[j][first]]==am[groupptr[gj]->seq[k][second]])
+ idpairs++;
+ }
+ }
+ }
+ first++; second++;
+ }
+
+ if(debug>1)fprintf(stdout, "%d %d \n", tpairs, idpairs); fflush(stdout);
+
+ /* JP: print out the subalignments */
+ if(debug > 1) {
+
+ first = 1;second = 1;
+ nblocks = (alignment_len-1)/80+1;
+ fprintf(stdout, "global alignment %d %d score %d average score %d average id %d\n", gi, gj, score, score/alignment_len, (int) (100.0*idpairs/tpairs) );
+ fprintf(stdout, "tpairs %d idpairs %d \n", tpairs, idpairs);
+ for(n=0;n<nblocks;n++) {
+ fflush(stdout);
+ fprintf(stdout, "first: %d\n", first);
+ for(j=1;j<=groupptr[gi]->seqnum;j++) {
+ tmpfirst = first;
+ fprintf(stdout, "%s\t", groupptr[gi]->name[j]);
+ for(i=n*80+1;(i<=(n+1)*80&&i<=alignment_len);i++){
+ if(aln_path1[i-1]==2) {
+ fprintf(stdout, "%c", am[groupptr[gi]->seq[j][tmpfirst]]);
+ tmpfirst++;
+ }
+ else {fprintf(stdout, "-");}
+ }
+ fprintf(stdout, "\n");
+ }
+ first = tmpfirst;
+
+ fprintf(stdout, "second: %d\n", second);
+ for(j=1;j<=groupptr[gj]->seqnum;j++){
+ tmpsecond = second;
+ fprintf(stdout, "%s\t", groupptr[gj]->name[j]);
+ for(i=n*80+1;(i<=(n+1)*80&&i<=alignment_len);i++) {
+ if(aln_path2[i-1]==2) {
+ fprintf(stdout, "%c", am[groupptr[gj]->seq[j][tmpsecond]]);
+ tmpsecond++;
+ }
+ else{fprintf(stdout,"-");}
+ }
+ fprintf(stdout, "\n");
+ }
+ second = tmpsecond;
+ fprintf(stdout, "\n\n");
+ }
+ }
+
+ /* JP: lib generation */
+ if(groupptr[gi]->seqlength > groupptr[gj]->seqlength) {
+ score = groupptr[gj]->seqlength;
+ }
+ else score = groupptr[gi]->seqlength;
+ /*score *= (int) (100.0*idpairs/tpairs);*/
+ score = (int) ( (log(score) * 100.0*idpairs/tpairs)*(1.0*nonegaps/(ngaps+nonegaps) ) );
+
+ /*score = (int) (100.0*idpairs/tpairs);*/
+ if(debug>1) fprintf(stdout, "score %d\n", score);
+ first = 1; second = 1;
+ S = displ;
+ for(i=0;i<alignment_len, first<=groupptr[gi]->seqlength, second<=groupptr[gj]->seqlength;i++) {
+ if(first>groupptr[gi]->seqlength) break;
+ if(second>groupptr[gj]->seqlength) break;
+ if(*(S+i)==0) {
+ if(debug>1)fprintf(stdout, "first: %d; second: %d\n", first, second); fflush(stdout);
+
+ if(glib[gi][gj][first] == NULL) {
+ glib[gi][gj][first] = SNavail();
+ glib[gi][gj][first]->ind = second;
+ glib[gi][gj][first]->sbe = score;
+ }
+ else {
+ snd = &glib[gi][gj][first];
+ /*fprintf(stdout, "snd: %d; glib: %d\n", snd, glib[gi][gj][first]);*/
+ while(*snd) {
+ if((*snd)->ind == second) {
+ (*snd)->sbe += score;
+ break;
+ }
+ else {
+ snd = &((*snd)->next);
+ /*fprintf(stdout, "snd: %d\n", snd);*/
+ }
+ }
+ if(!(*snd)) {
+ (*snd) = SNavail();
+ (*snd)->ind = second;
+ (*snd)->sbe = score;
+ /*fprintf(stdout, "====%d %d %d \n", first, (*snd)->ind, (*snd)->sbe);*/
+ }
+ }
+ /* AddSbe(glib[gi][gj][first], second, score/rl);*/
+ if(glib[gj][gi][second]==NULL) {
+ glib[gj][gi][second] = SNavail();
+ glib[gj][gi][second]->ind = first;
+ glib[gj][gi][second]->sbe = score;
+ }
+ else {
+ snd = &glib[gj][gi][second];
+ while(*snd) {
+ if((*snd)->ind == first) {
+ (*snd)->sbe += score;
+ break;
+ }
+ else snd = &((*snd)->next);
+ }
+ if(!*snd) {
+ *snd = SNavail();
+ (*snd)->ind = first;
+ (*snd)->sbe = score;
+ }
+ }
+ /* AddSbe(glib[gj][gi][second], first, score/rl); */
+ first++; second++;
+ }
+ if(*(S+i) > 0) {
+ second+=*(S+i);
+ /*first++; second++; */
+ }
+ if(*(S+i) < 0) {
+ first-=*(S+i);
+ /*first++; second++; */
+ }
+ }
+
+ fprintf(stdout, "*");
+
+ /*for(i=0;i<=alignment_len;i++) {
+ fprintf(stdout, "%d\t%d\n", i, displ[i]);
+ }*/
+
+ displ=ckfree((void *)displ);
+
+ /*add_ggaps();*/
+
+ for (i=0;i<prf_length1+2;i++)
+ profile1[i]=ckfree((void *)profile1[i]);
+ profile1=ckfree((void *)profile1);
+
+ for (i=0;i<prf_length2+2;i++)
+ profile2[i]=ckfree((void *)profile2[i]);
+ profile2=ckfree((void *)profile2);
+
+ prf_length1 = alignment_len;
+
+ aln_path1=ckfree((void *)aln_path1);
+ aln_path2=ckfree((void *)aln_path2);
+
+ /* JP: do not change the sequences */
+ /*
+ NumSeq = 0;
+ for (j=0;j<nseqs;j++)
+ {
+ if (group[j+1] == 1)
+ {
+ seqlen_array[j+1] = prf_length1;
+ realloc_seq(j+1,prf_length1);
+ for (i=0;i<prf_length1;i++)
+ seq_array[j+1][i+1] = alignment[NumSeq][i];
+ NumSeq++;
+ }
+ }
+ for (j=0;j<nseqs;j++)
+ {
+ if (group[j+1] == 2)
+ {
+ seqlen_array[j+1] = prf_length1;
+ seq_array[j+1] = (char *)realloc(seq_array[j+1], (prf_length1+2) * sizeof (char));
+ realloc_seq(j+1,prf_length1);
+ for (i=0;i<prf_length1;i++)
+ seq_array[j+1][i+1] = alignment[NumSeq][i];
+ NumSeq++;
+ }
+ }
+ */
+
+ for (i=0;i<nseqs1+nseqs2;i++)
+ alignment[i]=ckfree((void *)alignment[i]);
+ alignment=ckfree((void *)alignment);
+
+ aln_len=ckfree((void *)aln_len);
+ gaps=ckfree((void *)gaps);
+
+ return(score/100);
+}
+
+static void add_ggaps(void)
+{
+ sint j;
+ sint i,ix;
+ sint len;
+ char *ta;
+
+ ta = (char *) ckalloc( (alignment_len+1) * sizeof (char) );
+
+ for (j=0;j<nseqs1;j++)
+ {
+ ix = 0;
+ for (i=0;i<alignment_len;i++)
+ {
+ if (aln_path1[i] == 2)
+ {
+ if (ix < aln_len[j])
+ ta[i] = alignment[j][ix];
+ else
+ ta[i] = ENDALN;
+ ix++;
+ }
+ else if (aln_path1[i] == 1)
+ {
+/*
+ insertion in first alignment...
+*/
+ ta[i] = gap_pos1;
+ }
+ else
+ {
+ fprintf(stdout,"Error in aln_path\n");
+ }
+ }
+ ta[i] = ENDALN;
+
+ len = alignment_len;
+ alignment[j] = (char *)realloc(alignment[j], (len+2) * sizeof (char));
+ for (i=0;i<len;i++)
+ alignment[j][i] = ta[i];
+ alignment[j][i] = ENDALN;
+ aln_len[j] = len;
+ }
+
+ for (j=nseqs1;j<nseqs1+nseqs2;j++)
+ {
+ ix = 0;
+ for (i=0;i<alignment_len;i++)
+ {
+ if (aln_path2[i] == 2)
+ {
+ if (ix < aln_len[j])
+ ta[i] = alignment[j][ix];
+ else
+ ta[i] = ENDALN;
+ ix++;
+ }
+ else if (aln_path2[i] == 1)
+ {
+/*
+ insertion in second alignment...
+*/
+ ta[i] = gap_pos1;
+ }
+ else
+ {
+ fprintf(stdout,"Error in aln_path\n");
+ }
+ }
+ ta[i] = ENDALN;
+
+ len = alignment_len;
+ alignment[j] = (char *) realloc(alignment[j], (len+2) * sizeof (char) );
+ for (i=0;i<len;i++)
+ alignment[j][i] = ta[i];
+ alignment[j][i] = ENDALN;
+ aln_len[j] = len;
+ }
+
+ ta=ckfree((void *)ta);
+
+ if (struct_penalties1 != NONE)
+ gap_penalty_mask1 = add_ggaps_mask(gap_penalty_mask1,alignment_len,aln_path1,aln_path2);
+ if (struct_penalties1 == SECST)
+ sec_struct_mask1 = add_ggaps_mask(sec_struct_mask1,alignment_len,aln_path1,aln_path2);
+
+ if (struct_penalties2 != NONE)
+ gap_penalty_mask2 = add_ggaps_mask(gap_penalty_mask2,alignment_len,aln_path2,aln_path1);
+ if (struct_penalties2 == SECST)
+ sec_struct_mask2 = add_ggaps_mask(sec_struct_mask2,alignment_len,aln_path2,aln_path1);
+
+if (debug>3)
+{
+ char c;
+ extern char *amino_acid_codes;
+
+ for (i=0;i<nseqs1+nseqs2;i++)
+ {
+ for (j=0;j<alignment_len;j++)
+ {
+ if (alignment[i][j] == ENDALN) break;
+ else if ((alignment[i][j] == gap_pos1) || (alignment[i][j] == gap_pos2)) c = '-';
+ else c = amino_acid_codes[alignment[i][j]];
+ fprintf(stdout,"%c", c);
+ }
+ fprintf(stdout,"\n\n");
+ }
+}
+
+}
+
+static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2)
+{
+ int i,ix;
+ char *ta;
+
+ ta = (char *) ckalloc( (len+1) * sizeof (char) );
+
+ ix = 0;
+ if (switch_profiles == FALSE)
+ {
+ for (i=0;i<len;i++)
+ {
+ if (path1[i] == 2)
+ {
+ ta[i] = mask[ix];
+ ix++;
+ }
+ else if (path1[i] == 1)
+ ta[i] = gap_pos1;
+ }
+ }
+ else
+ {
+ for (i=0;i<len;i++)
+ {
+ if (path2[i] == 2)
+ {
+ ta[i] = mask[ix];
+ ix++;
+ }
+ else if (path2[i] == 1)
+ ta[i] = gap_pos1;
+ }
+ }
+ mask = (char *)realloc(mask,(len+2) * sizeof (char));
+ for (i=0;i<len;i++)
+ mask[i] = ta[i];
+ mask[i] ='\0';
+
+ ta=ckfree((void *)ta);
+
+ return(mask);
+}
+
+static lint prfscore(sint n, sint m)
+{
+ sint ix;
+ lint score;
+
+ score = 0.0;
+ for (ix=0; ix<=max_aa; ix++)
+ {
+ score += (profile1[n][ix] * profile2[m][ix]);
+ }
+ score += (profile1[n][gap_pos1] * profile2[m][gap_pos1]);
+ score += (profile1[n][gap_pos2] * profile2[m][gap_pos2]);
+ return(score/10);
+
+}
+
+static void ptracepath(sint *alen)
+{
+ sint i,j,k,pos,to_do;
+
+ pos = 0;
+
+ to_do=print_ptr-1;
+
+ for(i=1;i<=to_do;++i) {
+if (debug>1) fprintf(stdout,"%d ",(pint)displ[i]);
+ if(displ[i]==0) {
+ aln_path1[pos]=2;
+ aln_path2[pos]=2;
+ ++pos;
+ }
+ else {
+ if((k=displ[i])>0) {
+ for(j=0;j<=k-1;++j) {
+ aln_path2[pos+j]=2;
+ aln_path1[pos+j]=1;
+ }
+ pos += k;
+ }
+ else {
+ k = (displ[i]<0) ? displ[i] * -1 : displ[i];
+ for(j=0;j<=k-1;++j) {
+ aln_path1[pos+j]=2;
+ aln_path2[pos+j]=1;
+ }
+ pos += k;
+ }
+ }
+ }
+if (debug>1) fprintf(stdout,"\n");
+
+ (*alen) = pos;
+
+}
+
+static void pdel(sint k)
+{
+ if(last_print<0)
+ last_print = displ[print_ptr-1] -= k;
+ else
+ last_print = displ[print_ptr++] = -(k);
+}
+
+static void padd(sint k)
+{
+
+ if(last_print<0) {
+ displ[print_ptr-1] = k;
+ displ[print_ptr++] = last_print;
+ }
+ else
+ last_print = displ[print_ptr++] = k;
+}
+
+static void palign(void)
+{
+ displ[print_ptr++] = last_print = 0;
+}
+
+
+static lint pdiff(sint A,sint B,sint M,sint N,sint go1, sint go2)
+{
+ sint midi,midj,type;
+ lint midh;
+
+ static lint t, tl, g, h;
+
+{ static sint i,j;
+ static lint hh, f, e, s;
+
+/* Boundary cases: M <= 1 or N == 0 */
+if (debug>3) fprintf(stdout,"A %d B %d M %d N %d midi %d go1 %d go2 %d\n",
+(pint)A,(pint)B,(pint)M,(pint)N,(pint)M/2,(pint)go1,(pint)go2);
+
+/* if sequence B is empty.... */
+
+ if(N<=0) {
+
+/* if sequence A is not empty.... */
+
+ if(M>0) {
+
+/* delete residues A[1] to A[M] */
+
+ pdel(M);
+ }
+ return(-gap_penalty1(A,B,M));
+ }
+
+/* if sequence A is empty.... */
+
+ if(M<=1) {
+ if(M<=0) {
+
+/* insert residues B[1] to B[N] */
+
+ padd(N);
+ return(-gap_penalty2(A,B,N));
+ }
+
+/* if sequence A has just one residue.... */
+
+ if (go1 == 0)
+ midh = -gap_penalty1(A+1,B+1,N);
+ else
+ midh = -gap_penalty2(A+1,B,1)-gap_penalty1(A+1,B+1,N);
+ midj = 0;
+ for(j=1;j<=N;j++) {
+ hh = -gap_penalty1(A,B+1,j-1) + prfscore(A+1,B+j)
+ -gap_penalty1(A+1,B+j+1,N-j);
+ if(hh>midh) {
+ midh = hh;
+ midj = j;
+ }
+ }
+
+ if(midj==0) {
+ padd(N);
+ pdel(1);
+ }
+ else {
+ if(midj>1) padd(midj-1);
+ palign();
+ if(midj<N) padd(N-midj);
+ }
+ return midh;
+ }
+
+
+/* Divide sequence A in half: midi */
+
+ midi = M / 2;
+
+/* In a forward phase, calculate all HH[j] and HH[j] */
+
+ HH[0] = 0.0;
+ t = -open_penalty1(A,B+1);
+ tl = -ext_penalty1(A,B+1);
+ for(j=1;j<=N;j++) {
+ HH[j] = t = t+tl;
+ DD[j] = t-open_penalty2(A+1,B+j);
+ }
+
+ if (go1 == 0) t = 0;
+ else t = -open_penalty2(A+1,B);
+ tl = -ext_penalty2(A+1,B);
+ for(i=1;i<=midi;i++) {
+ s = HH[0];
+ HH[0] = hh = t = t+tl;
+ f = t-open_penalty1(A+i,B+1);
+
+ for(j=1;j<=N;j++) {
+ g = open_penalty1(A+i,B+j);
+ h = ext_penalty1(A+i,B+j);
+ if ((hh=hh-g-h) > (f=f-h)) f=hh;
+ g = open_penalty2(A+i,B+j);
+ h = ext_penalty2(A+i,B+j);
+ if ((hh=HH[j]-g-h) > (e=DD[j]-h)) e=hh;
+ hh = s + prfscore(A+i, B+j);
+ if (f>hh) hh = f;
+ if (e>hh) hh = e;
+
+ s = HH[j];
+ HH[j] = hh;
+ DD[j] = e;
+
+ }
+ }
+
+ DD[0]=HH[0];
+
+/* In a reverse phase, calculate all RR[j] and SS[j] */
+
+ RR[N]=0.0;
+ tl = 0.0;
+ for(j=N-1;j>=0;j--) {
+ g = -open_penalty1(A+M,B+j+1);
+ tl -= ext_penalty1(A+M,B+j+1);
+ RR[j] = g+tl;
+ SS[j] = RR[j]-open_penalty2(A+M,B+j);
+ gS[j] = open_penalty2(A+M,B+j);
+ }
+
+ tl = 0.0;
+ for(i=M-1;i>=midi;i--) {
+ s = RR[N];
+ if (go2 == 0) g = 0;
+ else g = -open_penalty2(A+i+1,B+N);
+ tl -= ext_penalty2(A+i+1,B+N);
+ RR[N] = hh = g+tl;
+ t = open_penalty1(A+i,B+N);
+ f = RR[N]-t;
+
+ for(j=N-1;j>=0;j--) {
+ g = open_penalty1(A+i,B+j+1);
+ h = ext_penalty1(A+i,B+j+1);
+ if ((hh=hh-g-h) > (f=f-h-g+t)) f=hh;
+ t = g;
+ g = open_penalty2(A+i+1,B+j);
+ h = ext_penalty2(A+i+1,B+j);
+ hh=RR[j]-g-h;
+ if (i==(M-1)) {
+ e=SS[j]-h;
+ }
+ else {
+ e=SS[j]-h-g+open_penalty2(A+i+2,B+j);
+ gS[j] = g;
+ }
+ if (hh > e) e=hh;
+ hh = s + prfscore(A+i+1, B+j+1);
+ if (f>hh) hh = f;
+ if (e>hh) hh = e;
+
+ s = RR[j];
+ RR[j] = hh;
+ SS[j] = e;
+
+ }
+ }
+ SS[N]=RR[N];
+ gS[N] = open_penalty2(A+midi+1,B+N);
+
+/* find midj, such that HH[j]+RR[j] or DD[j]+SS[j]+gap is the maximum */
+
+ midh=HH[0]+RR[0];
+ midj=0;
+ type=1;
+ for(j=0;j<=N;j++) {
+ hh = HH[j] + RR[j];
+ if(hh>=midh)
+ if(hh>midh || (HH[j]!=DD[j] && RR[j]==SS[j])) {
+ midh=hh;
+ midj=j;
+ }
+ }
+
+ for(j=N;j>=0;j--) {
+ hh = DD[j] + SS[j] + gS[j];
+ if(hh>midh) {
+ midh=hh;
+ midj=j;
+ type=2;
+ }
+ }
+}
+
+/* Conquer recursively around midpoint */
+
+
+ if(type==1) { /* Type 1 gaps */
+if (debug>3) fprintf(stdout,"Type 1,1: midj %d\n",(pint)midj);
+ pdiff(A,B,midi,midj,go1,1);
+if (debug>3) fprintf(stdout,"Type 1,2: midj %d\n",(pint)midj);
+ pdiff(A+midi,B+midj,M-midi,N-midj,1,go2);
+ }
+ else {
+if (debug>3) fprintf(stdout,"Type 2,1: midj %d\n",(pint)midj);
+ pdiff(A,B,midi-1,midj,go1, 0);
+ pdel(2);
+if (debug>3) fprintf(stdout,"Type 2,2: midj %d\n",(pint)midj);
+ pdiff(A+midi+1,B+midj,M-midi-1,N-midj,0,go2);
+ }
+
+ return midh; /* Return the score of the best alignment */
+}
+
+/* calculate the score for opening a gap at residues A[i] and B[j] */
+
+static sint open_penalty1(sint i, sint j)
+{
+ sint g;
+
+ if (!endgappenalties &&(i==0 || i==prf_length1)) return(0);
+
+ g = profile2[j][GAPCOL] + profile1[i][GAPCOL];
+ return(g);
+}
+
+/* calculate the score for extending an existing gap at A[i] and B[j] */
+
+static sint ext_penalty1(sint i, sint j)
+{
+ sint h;
+
+ if (!endgappenalties &&(i==0 || i==prf_length1)) return(0);
+
+ h = profile2[j][LENCOL];
+ return(h);
+}
+
+/* calculate the score for a gap of length k, at residues A[i] and B[j] */
+
+static sint gap_penalty1(sint i, sint j, sint k)
+{
+ sint ix;
+ sint gp;
+ sint g, h = 0;
+
+ if (k <= 0) return(0);
+ if (!endgappenalties &&(i==0 || i==prf_length1)) return(0);
+
+ g = profile2[j][GAPCOL] + profile1[i][GAPCOL];
+ for (ix=0;ix<k && ix+j<prf_length2;ix++)
+ h = profile2[ix+j][LENCOL];
+
+ gp = g + h * k;
+ return(gp);
+}
+/* calculate the score for opening a gap at residues A[i] and B[j] */
+
+static sint open_penalty2(sint i, sint j)
+{
+ sint g;
+
+ if (!endgappenalties &&(j==0 || j==prf_length2)) return(0);
+
+ g = profile1[i][GAPCOL] + profile2[j][GAPCOL];
+ return(g);
+}
+
+/* calculate the score for extending an existing gap at A[i] and B[j] */
+
+static sint ext_penalty2(sint i, sint j)
+{
+ sint h;
+
+ if (!endgappenalties &&(j==0 || j==prf_length2)) return(0);
+
+ h = profile1[i][LENCOL];
+ return(h);
+}
+
+/* calculate the score for a gap of length k, at residues A[i] and B[j] */
+
+static sint gap_penalty2(sint i, sint j, sint k)
+{
+ sint ix;
+ sint gp;
+ sint g, h = 0;
+
+ if (k <= 0) return(0);
+ if (!endgappenalties &&(j==0 || j==prf_length2)) return(0);
+
+ g = profile1[i][GAPCOL] + profile2[j][GAPCOL];
+ for (ix=0;ix<k && ix+i<prf_length1;ix++)
+ h = profile1[ix+i][LENCOL];
+
+ gp = g + h * k;
+ return(gp);
+}
diff --git a/prfalign2.c b/prfalign2.c
new file mode 100644
index 0000000..2fa67d9
--- /dev/null
+++ b/prfalign2.c
@@ -0,0 +1,1267 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "pcma.h"
+/*#include "new.h" JP */
+#define ENDALN 127
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+/*
+ * Prototypes
+ */
+static lint pdiff1(sint A,sint B,sint i,sint j,sint go1,sint go2);
+static lint prfscore(sint n, sint m);
+static sint gap_penalty1(sint i, sint j,sint k);
+static sint open_penalty1(sint i, sint j);
+static sint ext_penalty1(sint i, sint j);
+static sint gap_penalty2(sint i, sint j,sint k);
+static sint open_penalty2(sint i, sint j);
+static sint ext_penalty2(sint i, sint j);
+static void padd(sint k);
+static void pdel(sint k);
+static void palign(void);
+static void ptracepath(sint *alen);
+static void add_ggaps(void);
+static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2);
+
+/*
+ * Global variables
+ */
+extern double **tmat;
+extern float gap_open, gap_extend;
+extern float transition_weight;
+extern sint gap_pos1, gap_pos2;
+extern sint max_aa;
+extern sint nseqs;
+extern sint *seqlen_array;
+extern sint *seq_weight;
+extern sint debug;
+extern Boolean neg_matrix;
+extern sint mat_avscore;
+extern short blosum30mt[], blosum40mt[], blosum45mt[];
+extern short blosum62mt2[], blosum80mt[];
+extern short pam20mt[], pam60mt[];
+extern short pam120mt[], pam160mt[], pam350mt[];
+extern short gon40mt[], gon80mt[];
+extern short gon120mt[], gon160mt[], gon250mt[], gon350mt[];
+extern short clustalvdnamt[],swgapdnamt[];
+extern short idmat[];
+extern short usermat[];
+extern short userdnamat[];
+extern Boolean user_series;
+extern UserMatSeries matseries;
+
+extern short def_dna_xref[],def_aa_xref[],dna_xref[],aa_xref[];
+extern sint max_aln_length;
+extern Boolean distance_tree;
+extern Boolean dnaflag;
+extern char mtrxname[];
+extern char dnamtrxname[];
+extern char **seq_array;
+extern char *amino_acid_codes;
+extern char *gap_penalty_mask1,*gap_penalty_mask2;
+extern char *sec_struct_mask1,*sec_struct_mask2;
+extern sint struct_penalties1, struct_penalties2;
+extern Boolean use_ss1, use_ss2;
+extern Boolean endgappenalties;
+
+/* JP */
+extern SN ****glib;
+extern streeptr *groupptr;
+extern char *am;
+
+static sint print_ptr,last_print;
+static sint *displ;
+
+static char **alignment;
+static sint *aln_len;
+static sint *aln_weight;
+static char *aln_path1, *aln_path2;
+static sint alignment_len;
+static sint **profile1, **profile2;
+static lint *HH, *DD, *RR, *SS;
+static lint *gS;
+static sint matrix[NUMRES][NUMRES];
+static sint nseqs1, nseqs2;
+static sint prf_length1, prf_length2;
+static sint *gaps;
+static sint gapcoef1,gapcoef2;
+static sint lencoef1,lencoef2;
+static Boolean switch_profiles;
+static sint go, ge;
+static sint **vv;
+
+lint prfalign2(sint *group, sint gi, sint gj, sint gapo, sint gape, sint **V)
+{
+
+ static Boolean found;
+ static Boolean negative;
+ static Boolean error_given=FALSE;
+ static sint i, j, count = 0;
+ static sint NumSeq;
+ static sint len, len1, len2, is, minlen;
+ static sint se1, se2, sb1, sb2;
+ static sint maxres;
+ static sint int_scale;
+ static short *matptr;
+ static short *mat_xref;
+ static char c;
+ static lint score;
+ static float scale;
+ static double logmin,logdiff;
+ static double pcid;
+
+ /* JP */
+ static sint k, n;
+ static sint first, second;
+ static sint tmpfirst, tmpsecond, nblocks;
+ static sint tpairs, idpairs;
+ static sint *S;
+ static SN **snd;
+ static sint ngaps, nonegaps;
+
+
+ alignment = (char **) ckalloc( nseqs * sizeof (char *) );
+ aln_len = (sint *) ckalloc( nseqs * sizeof (sint) );
+ aln_weight = (sint *) ckalloc( nseqs * sizeof (sint) );
+
+ /*for (i=0;i<nseqs;i++)
+ if (aligned[i+1] == 0) group[i+1] = 0;
+ */
+
+ nseqs1 = nseqs2 = 0;
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 1) nseqs1++;
+ else if (group[i+1] == 2) nseqs2++;
+ }
+
+if(debug>1) {
+ for(i=0;i<nseqs;i++)
+ {
+ fprintf(stdout, "%d ", group[i+1]);
+ }
+ fprintf(stdout, "\n");
+}
+
+ if ((nseqs1 == 0) || (nseqs2 == 0)) return(0.0);
+
+ /*if (nseqs2 > nseqs1)
+ {
+ switch_profiles = TRUE;
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 1) group[i+1] = 2;
+ else if (group[i+1] == 2) group[i+1] = 1;
+ }
+ gi = k; gi = gj; gj = k;
+ }
+ else
+ switch_profiles = FALSE;
+ */
+ switch_profiles = FALSE;
+
+
+ int_scale = 100;
+
+/*
+ calculate the mean of the sequence pc identities between the two groups
+*/
+ count = 0;
+ pcid = 0.0;
+ negative=neg_matrix;
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 1)
+ for (j=0;j<nseqs;j++)
+ if (group[j+1] == 2)
+ {
+ count++;
+ pcid += tmat[i+1][j+1];
+ }
+ }
+
+ pcid = pcid/(float)count;
+if (debug > 3) fprintf(stdout,"mean tmat %3.1f\n", pcid);
+
+/* JP */
+/*fprintf(stdout, "%d %c %d %c\n", gap_pos1, amino_acid_codes[gap_pos1], gap_pos2, amino_acid_codes[gap_pos2]);
+for(i=0;i<NUMRES;i++) {
+ fprintf(stdout, "%c ", amino_acid_codes[i]);
+}*/
+
+
+/*
+ Make the first profile.
+*/
+ prf_length1 = 0;
+ for (i=0;i<nseqs;i++)
+ if (group[i+1] == 1)
+ if(seqlen_array[i+1]>prf_length1) prf_length1=seqlen_array[i+1];
+
+ nseqs1 = 0;
+if (debug>3) fprintf(stdout,"sequences profile 1:\n");
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 1)
+ {
+if (debug>3) {
+extern char **names;
+fprintf(stdout,"%s\n",names[i+1]);
+}
+ len = seqlen_array[i+1];
+ alignment[nseqs1] = (char *) ckalloc( (prf_length1+2) * sizeof (char) );
+ /*JP */
+ if(debug>3)fprintf(stdout, "\n");
+ for (j=0;j<len;j++){
+ alignment[nseqs1][j] = seq_array[i+1][j+1];
+ /* JP */
+ /* fprintf(stdout, "%d", alignment[nseqs1][j]); */
+ if(debug>3)fprintf(stdout, "%c", amino_acid_codes[alignment[nseqs1][j]]);
+ /* JP */
+ }
+ for(j=len;j<prf_length1;j++) {
+ alignment[nseqs1][j+1]=gap_pos1;
+ /* JP */
+ if(debug>3)fprintf(stdout, "%c", amino_acid_codes[alignment[nseqs1][j]]);
+ /* JP */
+ }
+ /*JP */
+ if(debug>3)fprintf(stdout, "\n");
+ alignment[nseqs1][prf_length1+1] = ENDALN;
+ aln_len[nseqs1] = prf_length1;
+ aln_weight[nseqs1] = seq_weight[i];
+ nseqs1++;
+ }
+ }
+
+/*
+ Make the second profile.
+*/
+ prf_length2 = 0;
+ for (i=0;i<nseqs;i++)
+ if (group[i+1] == 2)
+ if(seqlen_array[i+1]>prf_length2) prf_length2=seqlen_array[i+1];
+
+ nseqs2 = 0;
+if (debug>3) fprintf(stdout,"sequences profile 2:\n");
+ for (i=0;i<nseqs;i++)
+ {
+ if (group[i+1] == 2)
+ {
+if (debug>3) {
+extern char **names;
+fprintf(stdout,"%s\n",names[i+1]);
+}
+ len = seqlen_array[i+1];
+ alignment[nseqs1+nseqs2] =
+ (char *) ckalloc( (prf_length2+2) * sizeof (char) );
+ for (j=0;j<len;j++)
+ alignment[nseqs1+nseqs2][j] = seq_array[i+1][j+1];
+ for(j=len;j<prf_length2;j++)
+ alignment[nseqs1+nseqs2][j+1]=gap_pos1;
+ alignment[nseqs1+nseqs2][j] = ENDALN;
+ aln_len[nseqs1+nseqs2] = prf_length2;
+ aln_weight[nseqs1+nseqs2] = seq_weight[i];
+ nseqs2++;
+ }
+ }
+
+ max_aln_length = prf_length1 + prf_length2+2;
+
+/*
+ calculate real length of profiles - removing gaps!
+*/
+ len1=0;
+ for (i=0;i<nseqs1;i++)
+ {
+ is=0;
+ for (j=0; j<MIN(aln_len[i],prf_length1); j++)
+ {
+ c = alignment[i][j];
+ if ((c !=gap_pos1) && (c != gap_pos2)) is++;
+ }
+ len1+=is;
+ }
+ len1/=(float)nseqs1;
+
+ len2=0;
+ for (i=nseqs1;i<nseqs2+nseqs1;i++)
+ {
+ is=0;
+ for (j=0; j<MIN(aln_len[i],prf_length2); j++)
+ {
+ c = alignment[i][j];
+ if ((c !=gap_pos1) && (c != gap_pos2)) is++;
+ }
+ len2+=is;
+ }
+ len2/=(float)nseqs2;
+
+ if (dnaflag)
+ {
+ scale=1.0;
+ if (strcmp(dnamtrxname, "iub") == 0)
+ {
+ matptr = swgapdnamt;
+ mat_xref = def_dna_xref;
+ }
+ else if (strcmp(dnamtrxname, "clustalw") == 0)
+ {
+ matptr = clustalvdnamt;
+ mat_xref = def_dna_xref;
+ scale=0.66;
+ }
+ else
+ {
+ matptr = userdnamat;
+ mat_xref = dna_xref;
+ }
+ maxres = get_matrix(matptr, mat_xref, matrix, neg_matrix, int_scale);
+ if (maxres == 0) return((sint)-1);
+/*
+ matrix[0][4]=transition_weight*matrix[0][0];
+ matrix[4][0]=transition_weight*matrix[0][0];
+ matrix[2][11]=transition_weight*matrix[0][0];
+ matrix[11][2]=transition_weight*matrix[0][0];
+ matrix[2][12]=transition_weight*matrix[0][0];
+ matrix[12][2]=transition_weight*matrix[0][0];
+*/
+/* fix suggested by Chanan Rubin at Compugen */
+ matrix[mat_xref[0]][mat_xref[4]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[4]][mat_xref[0]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[2]][mat_xref[11]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[11]][mat_xref[2]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[2]][mat_xref[12]]=transition_weight*matrix[0][0];
+ matrix[mat_xref[12]][mat_xref[2]]=transition_weight*matrix[0][0];
+
+ gapcoef1 = gapcoef2 = 100.0 * gap_open *scale;
+ lencoef1 = lencoef2 = 100.0 * gap_extend *scale;
+ }
+ else
+ {
+ if(len1==0 || len2==0) {
+ logmin=1.0;
+ logdiff=1.0;
+ }
+ else {
+ minlen = MIN(len1,len2);
+ logmin = 1.0/log10((double)minlen);
+ if (len2<len1)
+ logdiff = 1.0+0.5*log10((double)((float)len2/(float)len1));
+ else if (len1<len2)
+ logdiff = 1.0+0.5*log10((double)((float)len1/(float)len2));
+ else logdiff=1.0;
+ if(logdiff<0.9) logdiff=0.9;
+ }
+if(debug>3) fprintf(stdout,"%d %d logmin %f logdiff %f\n",
+(pint)len1,(pint)len2, logmin,logdiff);
+ scale=0.75;
+ if (strcmp(mtrxname, "blosum") == 0)
+ {
+ scale=0.75;
+ if (negative || distance_tree == FALSE) matptr = blosum40mt;
+ else if (pcid > 80.0)
+ {
+ matptr = blosum80mt;
+ }
+ else if (pcid > 60.0)
+ {
+ matptr = blosum62mt2;
+ }
+ else if (pcid > 40.0)
+ {
+ matptr = blosum45mt;
+ }
+ else if (pcid > 30.0)
+ {
+ scale=0.5;
+ matptr = blosum45mt;
+ }
+ else if (pcid > 20.0)
+ {
+ scale=0.6;
+ matptr = blosum45mt;
+ }
+ else
+ {
+ scale=0.6;
+ matptr = blosum30mt;
+ }
+ mat_xref = def_aa_xref;
+
+ }
+ else if (strcmp(mtrxname, "pam") == 0)
+ {
+ scale=0.75;
+ if (negative || distance_tree == FALSE) matptr = pam120mt;
+ else if (pcid > 80.0) matptr = pam20mt;
+ else if (pcid > 60.0) matptr = pam60mt;
+ else if (pcid > 40.0) matptr = pam120mt;
+ else matptr = pam350mt;
+ mat_xref = def_aa_xref;
+ }
+ else if (strcmp(mtrxname, "gonnet") == 0)
+ {
+ scale/=2.0;
+ if (negative || distance_tree == FALSE) matptr = gon250mt;
+ else if (pcid > 35.0)
+ {
+ matptr = gon80mt;
+ scale/=2.0;
+ }
+ else if (pcid > 25.0)
+ {
+ if(minlen<100) matptr = gon250mt;
+ else matptr = gon120mt;
+ }
+ else
+ {
+ if(minlen<100) matptr = gon350mt;
+ else matptr = gon160mt;
+ }
+ mat_xref = def_aa_xref;
+ int_scale /= 10;
+ }
+ else if (strcmp(mtrxname, "id") == 0)
+ {
+ matptr = idmat;
+ mat_xref = def_aa_xref;
+ }
+ else if(user_series)
+ {
+ matptr=NULL;
+ found=FALSE;
+ for(i=0;i<matseries.nmat;i++)
+ if(pcid>=matseries.mat[i].llimit && pcid<=matseries.mat[i].ulimit)
+ {
+ j=i;
+ found=TRUE;
+ break;
+ }
+ if(found==FALSE)
+ {
+ if(!error_given)
+ warning(
+"\nSeries matrix not found for sequence percent identity = %d.\n"
+"(Using first matrix in series as a default.)\n"
+"This alignment may not be optimal!\n"
+"SUGGESTION: Check your matrix series input file and try again.",(int)pcid);
+ error_given=TRUE;
+ j=0;
+ }
+if (debug>3) fprintf(stdout,"pcid %d matrix %d\n",(pint)pcid,(pint)j+1);
+
+ matptr = matseries.mat[j].matptr;
+ mat_xref = matseries.mat[j].aa_xref;
+/* this gives a scale of 0.5 for pcid=llimit and 1.0 for pcid=ulimit */
+ scale=0.5+(pcid-matseries.mat[j].llimit)/((matseries.mat[j].ulimit-matseries.mat[j].llimit)*2.0);
+ }
+ else
+ {
+ matptr = usermat;
+ mat_xref = aa_xref;
+ }
+if(debug>3) fprintf(stdout,"pcid %3.1f scale %3.1f\n",pcid,scale);
+ maxres = get_matrix(matptr, mat_xref, matrix, negative, int_scale);
+ if (maxres == 0)
+ {
+ fprintf(stdout,"Error: matrix %s not found\n", mtrxname);
+ return(-1);
+ }
+
+ if (negative) {
+ gapcoef1 = gapcoef2 = 100.0 * (float)(gap_open);
+ lencoef1 = lencoef2 = 100.0 * gap_extend;
+ }
+ else {
+ if (mat_avscore <= 0)
+ gapcoef1 = gapcoef2 = 100.0 * (float)(gap_open + logmin);
+ else
+ gapcoef1 = gapcoef2 = scale * mat_avscore * (float)(gap_open/(logdiff*logmin));
+ lencoef1 = lencoef2 = 100.0 * gap_extend;
+ }
+ }
+if (debug>3)
+{
+fprintf(stdout,"matavscore %d\n",mat_avscore);
+fprintf(stdout,"Gap Open1 %d Gap Open2 %d Gap Extend1 %d Gap Extend2 %d\n",
+ (pint)gapcoef1,(pint)gapcoef2, (pint)lencoef1,(pint)lencoef2);
+fprintf(stdout,"Matrix %s\n", mtrxname);
+}
+
+ profile1 = (sint **) ckalloc( (prf_length1+2) * sizeof (sint *) );
+ for(i=0; i<prf_length1+2; i++)
+ profile1[i] = (sint *) ckalloc( (LENCOL+2) * sizeof(sint) );
+
+ profile2 = (sint **) ckalloc( (prf_length2+2) * sizeof (sint *) );
+ for(i=0; i<prf_length2+2; i++)
+ profile2[i] = (sint *) ckalloc( (LENCOL+2) * sizeof(sint) );
+
+/*
+ calculate the Gap Coefficients.
+*/
+ gaps = (sint *) ckalloc( (max_aln_length+1) * sizeof (sint) );
+
+ if (switch_profiles == FALSE)
+ calc_gap_coeff(alignment, gaps, profile1, (struct_penalties1 && use_ss1), gap_penalty_mask1,
+ (sint)0, nseqs1, prf_length1, gapcoef1, lencoef1);
+ else
+ calc_gap_coeff(alignment, gaps, profile1, (struct_penalties2 && use_ss2), gap_penalty_mask2,
+ (sint)0, nseqs1, prf_length1, gapcoef1, lencoef1);
+/*
+ calculate the profile matrix.
+*/
+ calc_prf1(profile1, alignment, gaps, matrix,
+ aln_weight, prf_length1, (sint)0, nseqs1);
+
+if (debug>4)
+{
+extern char *amino_acid_codes;
+ for (j=0;j<=max_aa;j++)
+ fprintf(stdout,"%c ", amino_acid_codes[j]);
+ fprintf(stdout,"\n");
+ for (i=0;i<prf_length1;i++)
+ {
+ for (j=0;j<=max_aa;j++)
+ fprintf(stdout,"%d ", (pint)profile1[i+1][j]);
+ fprintf(stdout,"%d ", (pint)profile1[i+1][gap_pos1]);
+ fprintf(stdout,"%d ", (pint)profile1[i+1][gap_pos2]);
+ fprintf(stdout,"%d %d\n",(pint)profile1[i+1][GAPCOL],(pint)profile1[i+1][LENCOL]);
+ }
+}
+
+/*
+ calculate the Gap Coefficients.
+*/
+
+ if (switch_profiles == FALSE)
+ calc_gap_coeff(alignment, gaps, profile2, (struct_penalties2 && use_ss2), gap_penalty_mask2,
+ nseqs1, nseqs1+nseqs2, prf_length2, gapcoef2, lencoef2);
+ else
+ calc_gap_coeff(alignment, gaps, profile2, (struct_penalties1 && use_ss1), gap_penalty_mask1,
+ nseqs1, nseqs1+nseqs2, prf_length2, gapcoef2, lencoef2);
+/*
+ calculate the profile matrix.
+*/
+ calc_prf2(profile2, alignment, aln_weight,
+ prf_length2, nseqs1, nseqs1+nseqs2);
+
+ aln_weight=ckfree((void *)aln_weight);
+
+if (debug>4)
+{
+extern char *amino_acid_codes;
+ for (j=0;j<=max_aa;j++)
+ fprintf(stdout,"%c ", amino_acid_codes[j]);
+ fprintf(stdout,"\n");
+ for (i=0;i<prf_length2;i++)
+ {
+ for (j=0;j<=max_aa;j++)
+ fprintf(stdout,"%d ", (pint)profile2[i+1][j]);
+ fprintf(stdout,"%d ", (pint)profile2[i+1][gap_pos1]);
+ fprintf(stdout,"%d ", (pint)profile2[i+1][gap_pos2]);
+ fprintf(stdout,"%d %d\n",(pint)profile2[i+1][GAPCOL],(pint)profile2[i+1][LENCOL]);
+ }
+}
+
+ aln_path1 = (char *) ckalloc( (max_aln_length+1) * sizeof(char) );
+ aln_path2 = (char *) ckalloc( (max_aln_length+1) * sizeof(char) );
+
+
+/*
+ align the profiles
+*/
+/* use Myers and Miller to align two sequences */
+
+ last_print = 0;
+ print_ptr = 1;
+
+ sb1 = sb2 = 0;
+ se1 = prf_length1;
+ se2 = prf_length2;
+
+ HH = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ DD = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ RR = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ SS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ gS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ displ = (sint *) ckalloc( (max_aln_length+1) * sizeof (sint) );
+
+ go = gapo; ge = gape;
+ vv = V;
+ score = pdiff1(sb1, sb2, se1-sb1, se2-sb2, go, go);
+
+ if(debug>1) fprintf(stdout, "prfalign2 score: %d\n", score);
+
+ HH=ckfree((void *)HH);
+ DD=ckfree((void *)DD);
+ RR=ckfree((void *)RR);
+ SS=ckfree((void *)SS);
+ gS=ckfree((void *)gS);
+
+ ptracepath( &alignment_len);
+
+ /*for(i=0;i<alignment_len;i++) {
+ fprintf(stdout, "%d ", aln_path1[i]);
+ }fprintf(stdout, "\n");
+ for(i=0;i<alignment_len;i++) {
+ fprintf(stdout, "%d ", aln_path2[i]);
+ }fprintf(stdout, "\n"); fflush(stdout);
+ */
+
+ /* JP: calculate the the fraction of gapped positions */
+ ngaps=0, nonegaps = 0;
+ for(i=0;i<alignment_len;i++) {
+ if(aln_path1[i]==1) {ngaps++; continue;}
+ if(aln_path2[i]==1) {ngaps++; continue;}
+ nonegaps++;
+ }
+ if(debug>1) fprintf(stdout, "gaps: %d nonegaps: %d\n", ngaps, nonegaps );
+
+ /* JP: calculate the average sequence identity between the two blocks */
+ tpairs = 0; idpairs = 0;
+ first = 1; second = 1;
+ if(debug>1)fprintf(stdout, "seqlengths %d %d \n", groupptr[gi]->seqlength, groupptr[gj]->seqlength);
+ for (i=0;i<alignment_len;i++) {
+
+ /*fprintf(stdout, "%d %d \n", first, second); fflush(stdout);*/
+
+ if(aln_path1[i]==1) { second++; continue; }
+ if(aln_path2[i]==1) { first++; continue; }
+
+ for(j=1;j<=groupptr[gi]->seqnum;j++) {
+ for(k=1;k<=groupptr[gj]->seqnum;k++) {
+
+ if(groupptr[gi]->seq[j][first] && groupptr[gj]->seq[k][second]) {
+ tpairs++;
+ if(am[groupptr[gi]->seq[j][first]]==am[groupptr[gj]->seq[k][second]])
+ idpairs++;
+ }
+ }
+ }
+ first++; second++;
+ }
+
+ if(debug>1)fprintf(stdout, "%d %d \n", tpairs, idpairs); fflush(stdout);
+
+ /* JP: print out the subalignments */
+ if(debug > 1) {
+
+ first = 1;second = 1;
+ nblocks = (alignment_len-1)/80+1;
+ fprintf(stdout, "global alignment %d %d score %d average score %d average id %d\n", gi, gj, score, score/alignment_len, (int) (100.0*idpairs/tpairs) );
+ fprintf(stdout, "tpairs %d idpairs %d \n", tpairs, idpairs);
+ for(n=0;n<nblocks;n++) {
+ fflush(stdout);
+ fprintf(stdout, "first: %d\n", first);
+ for(j=1;j<=groupptr[gi]->seqnum;j++) {
+ tmpfirst = first;
+ fprintf(stdout, "%s\t", groupptr[gi]->name[j]);
+ for(i=n*80+1;(i<=(n+1)*80&&i<=alignment_len);i++){
+ if(aln_path1[i-1]==2) {
+ fprintf(stdout, "%c", am[groupptr[gi]->seq[j][tmpfirst]]);
+ tmpfirst++;
+ }
+ else {fprintf(stdout, "-");}
+ }
+ fprintf(stdout, "\n"); fflush(stdout);
+ }
+ first = tmpfirst;
+
+ fprintf(stdout, "second: %d\n", second); fflush(stdout);
+ for(j=1;j<=groupptr[gj]->seqnum;j++){
+ tmpsecond = second;
+ fprintf(stdout, "%s\t", groupptr[gj]->name[j]);
+ for(i=n*80+1;(i<=(n+1)*80&&i<=alignment_len);i++) {
+ if(aln_path2[i-1]==2) {
+ fprintf(stdout, "%c", am[groupptr[gj]->seq[j][tmpsecond]]);
+ tmpsecond++;
+ }
+ else{fprintf(stdout,"-");}
+ }
+ fprintf(stdout, "\n");
+ }
+ second = tmpsecond;
+ fprintf(stdout, "\n\n");
+ }
+ }
+
+ /* JP: lib generation */
+ if(groupptr[gi]->seqlength > groupptr[gj]->seqlength) {
+ score = groupptr[gj]->seqlength;
+ }
+ else score = groupptr[gi]->seqlength;
+ /* score *= (int) (100.0*idpairs/tpairs); */
+ score = (int) ( (log(score) * 100.0*idpairs/tpairs)*(1.0*nonegaps/(ngaps+nonegaps) ) );
+
+
+ /*score = (int) (100.0*idpairs/tpairs);*/
+ if(debug>1) fprintf(stdout, "score %d\n", score);
+ first = 1; second = 1;
+ S = displ;
+ for(i=0;i<alignment_len, first<=groupptr[gi]->seqlength, second<=groupptr[gj]->seqlength;i++) {
+ if(score<=0) break;
+ if(first>groupptr[gi]->seqlength) break;
+ if(second>groupptr[gj]->seqlength) break;
+ if(*(S+i)==0) {
+ if(debug>1)fprintf(stdout, "first: %d; second: %d\n", first, second); fflush(stdout);
+
+ if(glib[gi][gj][first] == NULL) {
+ glib[gi][gj][first] = SNavail();
+ glib[gi][gj][first]->ind = second;
+ glib[gi][gj][first]->sbe = score;
+ }
+ else {
+ snd = &glib[gi][gj][first];
+ /*fprintf(stdout, "snd: %d; glib: %d\n", snd, glib[gi][gj][first]);*/
+ while(*snd) {
+ if((*snd)->ind == second) {
+ (*snd)->sbe += score;
+ break;
+ }
+ else {
+ snd = &((*snd)->next);
+ /*fprintf(stdout, "snd: %d\n", snd);*/
+ }
+ }
+ if(!(*snd)) {
+ (*snd) = SNavail();
+ (*snd)->ind = second;
+ (*snd)->sbe = score;
+ /*fprintf(stdout, "====%d %d %d \n", first, (*snd)->ind, (*snd)->sbe);*/
+ }
+ }
+ /* AddSbe(glib[gi][gj][first], second, score/rl);*/
+ if(glib[gj][gi][second]==NULL) {
+ glib[gj][gi][second] = SNavail();
+ glib[gj][gi][second]->ind = first;
+ glib[gj][gi][second]->sbe = score;
+ }
+ else {
+ snd = &glib[gj][gi][second];
+ while(*snd) {
+ if((*snd)->ind == first) {
+ (*snd)->sbe += score;
+ break;
+ }
+ else snd = &((*snd)->next);
+ }
+ if(!*snd) {
+ *snd = SNavail();
+ (*snd)->ind = first;
+ (*snd)->sbe = score;
+ }
+ }
+ /* AddSbe(glib[gj][gi][second], first, score/rl); */
+ first++; second++;
+ }
+ if(*(S+i) > 0) {
+ second+=*(S+i);
+ /*first++; second++; */
+ }
+ if(*(S+i) < 0) {
+ first-=*(S+i);
+ /*first++; second++; */
+ }
+ }
+
+ fprintf(stdout, "*");
+
+ /*for(i=0;i<=alignment_len;i++) {
+ fprintf(stdout, "%d\t%d\n", i, displ[i]);
+ }*/
+
+ displ=ckfree((void *)displ);
+
+ /*add_ggaps();*/
+
+ for (i=0;i<prf_length1+2;i++)
+ profile1[i]=ckfree((void *)profile1[i]);
+ profile1=ckfree((void *)profile1);
+
+ for (i=0;i<prf_length2+2;i++)
+ profile2[i]=ckfree((void *)profile2[i]);
+ profile2=ckfree((void *)profile2);
+
+ prf_length1 = alignment_len;
+
+ aln_path1=ckfree((void *)aln_path1);
+ aln_path2=ckfree((void *)aln_path2);
+
+ /* JP: do not change the sequences */
+ /*
+ NumSeq = 0;
+ for (j=0;j<nseqs;j++)
+ {
+ if (group[j+1] == 1)
+ {
+ seqlen_array[j+1] = prf_length1;
+ realloc_seq(j+1,prf_length1);
+ for (i=0;i<prf_length1;i++)
+ seq_array[j+1][i+1] = alignment[NumSeq][i];
+ NumSeq++;
+ }
+ }
+ for (j=0;j<nseqs;j++)
+ {
+ if (group[j+1] == 2)
+ {
+ seqlen_array[j+1] = prf_length1;
+ seq_array[j+1] = (char *)realloc(seq_array[j+1], (prf_length1+2) * sizeof (char));
+ realloc_seq(j+1,prf_length1);
+ for (i=0;i<prf_length1;i++)
+ seq_array[j+1][i+1] = alignment[NumSeq][i];
+ NumSeq++;
+ }
+ }
+ */
+
+ for (i=0;i<nseqs1+nseqs2;i++)
+ alignment[i]=ckfree((void *)alignment[i]);
+ alignment=ckfree((void *)alignment);
+
+ aln_len=ckfree((void *)aln_len);
+ gaps=ckfree((void *)gaps);
+
+ return(score/100);
+}
+
+static void add_ggaps(void)
+{
+ sint j;
+ sint i,ix;
+ sint len;
+ char *ta;
+
+ ta = (char *) ckalloc( (alignment_len+1) * sizeof (char) );
+
+ for (j=0;j<nseqs1;j++)
+ {
+ ix = 0;
+ for (i=0;i<alignment_len;i++)
+ {
+ if (aln_path1[i] == 2)
+ {
+ if (ix < aln_len[j])
+ ta[i] = alignment[j][ix];
+ else
+ ta[i] = ENDALN;
+ ix++;
+ }
+ else if (aln_path1[i] == 1)
+ {
+/*
+ insertion in first alignment...
+*/
+ ta[i] = gap_pos1;
+ }
+ else
+ {
+ fprintf(stdout,"Error in aln_path\n");
+ }
+ }
+ ta[i] = ENDALN;
+
+ len = alignment_len;
+ alignment[j] = (char *)realloc(alignment[j], (len+2) * sizeof (char));
+ for (i=0;i<len;i++)
+ alignment[j][i] = ta[i];
+ alignment[j][i] = ENDALN;
+ aln_len[j] = len;
+ }
+
+ for (j=nseqs1;j<nseqs1+nseqs2;j++)
+ {
+ ix = 0;
+ for (i=0;i<alignment_len;i++)
+ {
+ if (aln_path2[i] == 2)
+ {
+ if (ix < aln_len[j])
+ ta[i] = alignment[j][ix];
+ else
+ ta[i] = ENDALN;
+ ix++;
+ }
+ else if (aln_path2[i] == 1)
+ {
+/*
+ insertion in second alignment...
+*/
+ ta[i] = gap_pos1;
+ }
+ else
+ {
+ fprintf(stdout,"Error in aln_path\n");
+ }
+ }
+ ta[i] = ENDALN;
+
+ len = alignment_len;
+ alignment[j] = (char *) realloc(alignment[j], (len+2) * sizeof (char) );
+ for (i=0;i<len;i++)
+ alignment[j][i] = ta[i];
+ alignment[j][i] = ENDALN;
+ aln_len[j] = len;
+ }
+
+ ta=ckfree((void *)ta);
+
+ if (struct_penalties1 != NONE)
+ gap_penalty_mask1 = add_ggaps_mask(gap_penalty_mask1,alignment_len,aln_path1,aln_path2);
+ if (struct_penalties1 == SECST)
+ sec_struct_mask1 = add_ggaps_mask(sec_struct_mask1,alignment_len,aln_path1,aln_path2);
+
+ if (struct_penalties2 != NONE)
+ gap_penalty_mask2 = add_ggaps_mask(gap_penalty_mask2,alignment_len,aln_path2,aln_path1);
+ if (struct_penalties2 == SECST)
+ sec_struct_mask2 = add_ggaps_mask(sec_struct_mask2,alignment_len,aln_path2,aln_path1);
+
+if (debug>3)
+{
+ char c;
+ extern char *amino_acid_codes;
+
+ for (i=0;i<nseqs1+nseqs2;i++)
+ {
+ for (j=0;j<alignment_len;j++)
+ {
+ if (alignment[i][j] == ENDALN) break;
+ else if ((alignment[i][j] == gap_pos1) || (alignment[i][j] == gap_pos2)) c = '-';
+ else c = amino_acid_codes[alignment[i][j]];
+ fprintf(stdout,"%c", c);
+ }
+ fprintf(stdout,"\n\n");
+ }
+}
+
+}
+
+static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2)
+{
+ int i,ix;
+ char *ta;
+
+ ta = (char *) ckalloc( (len+1) * sizeof (char) );
+
+ ix = 0;
+ if (switch_profiles == FALSE)
+ {
+ for (i=0;i<len;i++)
+ {
+ if (path1[i] == 2)
+ {
+ ta[i] = mask[ix];
+ ix++;
+ }
+ else if (path1[i] == 1)
+ ta[i] = gap_pos1;
+ }
+ }
+ else
+ {
+ for (i=0;i<len;i++)
+ {
+ if (path2[i] == 2)
+ {
+ ta[i] = mask[ix];
+ ix++;
+ }
+ else if (path2[i] == 1)
+ ta[i] = gap_pos1;
+ }
+ }
+ mask = (char *)realloc(mask,(len+2) * sizeof (char));
+ for (i=0;i<len;i++)
+ mask[i] = ta[i];
+ mask[i] ='\0';
+
+ ta=ckfree((void *)ta);
+
+ return(mask);
+}
+
+static lint prfscore(sint n, sint m)
+{
+ sint ix;
+ lint score;
+
+ score = 0.0;
+ /*for (ix=0; ix<=max_aa; ix++)
+ {
+ score += (profile1[n][ix] * profile2[m][ix]);
+ }
+ score += (profile1[n][gap_pos1] * profile2[m][gap_pos1]);
+ score += (profile1[n][gap_pos2] * profile2[m][gap_pos2]);
+ return(score/10);
+ */
+ /* fprintf(stdout, "n %d m %d score %d\n", n, m, vv[n][m]); */
+ return vv[n][m];
+
+}
+
+static void ptracepath(sint *alen)
+{
+ sint i,j,k,pos,to_do;
+
+ pos = 0;
+
+ to_do=print_ptr-1;
+
+ for(i=1;i<=to_do;++i) {
+if (debug>1) fprintf(stdout,"%d ",(pint)displ[i]);
+ if(displ[i]==0) {
+ aln_path1[pos]=2;
+ aln_path2[pos]=2;
+ ++pos;
+ }
+ else {
+ if((k=displ[i])>0) {
+ for(j=0;j<=k-1;++j) {
+ aln_path2[pos+j]=2;
+ aln_path1[pos+j]=1;
+ }
+ pos += k;
+ }
+ else {
+ k = (displ[i]<0) ? displ[i] * -1 : displ[i];
+ for(j=0;j<=k-1;++j) {
+ aln_path1[pos+j]=2;
+ aln_path2[pos+j]=1;
+ }
+ pos += k;
+ }
+ }
+ }
+if (debug>1) fprintf(stdout,"\n");
+
+ (*alen) = pos;
+
+}
+
+static void pdel(sint k)
+{
+ if(last_print<0)
+ last_print = displ[print_ptr-1] -= k;
+ else
+ last_print = displ[print_ptr++] = -(k);
+}
+
+static void padd(sint k)
+{
+
+ if(last_print<0) {
+ displ[print_ptr-1] = k;
+ displ[print_ptr++] = last_print;
+ }
+ else
+ last_print = displ[print_ptr++] = k;
+}
+
+static void palign(void)
+{
+ displ[print_ptr++] = last_print = 0;
+}
+
+
+static lint pdiff1(sint A,sint B,sint M,sint N,sint go1, sint go2)
+{
+ sint midi,midj,type;
+ lint midh;
+
+ static lint t, tl, g, h;
+
+{ static sint i,j;
+ static lint hh, f, e, s;
+
+/* Boundary cases: M <= 1 or N == 0 */
+if (debug>2) fprintf(stdout,"A %d B %d M %d N %d midi %d go1 %d go2 %d\n",
+(pint)A,(pint)B,(pint)M,(pint)N,(pint)M/2,(pint)go1,(pint)go2);
+
+/* if sequence B is empty.... */
+
+ if(N<=0) {
+
+/* if sequence A is not empty.... */
+
+ if(M>0) {
+
+/* delete residues A[1] to A[M] */
+
+ pdel(M);
+ }
+ return (-(go+(M-1)*ge));
+ /*return(-gap_penalty1(A,B,M)); */
+ }
+
+/* if sequence A is empty.... */
+
+ if(M<=1) {
+ if(M<=0) {
+
+/* insert residues B[1] to B[N] */
+
+ padd(N);
+ return( -(go+(N-1)*ge) );
+ /* return(-gap_penalty2(A,B,N)); */
+ }
+
+/* if sequence A has just one residue.... */
+
+ if (go1 == 0)
+ /*midh = -gap_penalty1(A+1,B+1,N); */
+ midh = -(ge + go + N*ge);
+ else
+ midh = -(go2 + ge + go + N*ge);
+ /*midh = -gap_penalty2(A+1,B,1)-gap_penalty1(A+1,B+1,N); */
+
+ midj = 0;
+ for(j=1;j<=N;j++) {
+ /*hh = -gap_penalty1(A,B+1,j-1) + prfscore(A+1,B+j) -gap_penalty1(A+1,B+j+1,N-j); */
+ if(j>1) hh = -(go+(j-1)*ge) + prfscore(A+1, B+j);
+ else hh = prfscore(A+1, B+j);
+ if(j<N) hh += -(go+(N-j)*ge);
+ /* hh = -(go+(j-1)*ge) + absprfscore(setnum, A+1, B+j) -(go+(N-j)*ge); */
+ if(hh>midh) {
+ midh = hh;
+ midj = j;
+ }
+ }
+
+ if(midj==0) {
+ padd(N);
+ pdel(1);
+ }
+ else {
+ if(midj>1) padd(midj-1);
+ palign();
+ if(midj<N) padd(N-midj);
+ }
+ return midh;
+ }
+
+
+/* Divide sequence A in half: midi */
+
+ midi = M / 2;
+
+/* In a forward phase, calculate all HH[j] and HH[j] */
+
+ HH[0] = 0.0;
+ t = -go; tl = -ge;
+ for(j=1;j<=N;j++) {
+ HH[j] = t = t-ge;
+ DD[j] = t-go;
+ }
+
+ t = -go1;
+ for(i=1;i<=midi;i++) {
+ s = HH[0];
+ HH[0] = hh = t = t-ge;
+ f = t-go;
+
+ for(j=1;j<=N;j++) {
+
+ f = f - ge;
+ if( (hh - go -ge) > f ) f = hh -go - ge;
+ DD[j] = DD[j] - ge;
+ if(DD[j] < HH[j] - go -ge) DD[j] = HH[j] -go - ge;
+ hh = s + prfscore(A+i, B+j);
+ if(f>hh) hh = f;
+ if(DD[j]>hh) hh = DD[j];
+
+ s = HH[j];
+ HH[j] = hh;
+
+
+ }
+ }
+
+ DD[0]=HH[0];
+
+/* In a reverse phase, calculate all RR[j] and SS[j] */
+
+ RR[N]=0.0;
+ t = -go;
+ for(j=N-1;j>=0;j--) {
+ RR[j] = t = t -ge;
+ SS[j] = t -go;
+ }
+
+ t = -go2;
+ for(i=M-1;i>=midi;i--) {
+ s = RR[N];
+ RR[N] = hh = t = t - ge;
+ f = t - go;
+
+ for(j=N-1;j>=0;j--) {
+
+ f = f - ge;
+ if(hh-go-ge > f) f = hh -go -ge;
+ SS[j] = SS[j] - ge;
+ if(RR[j] - go -ge > SS[j] ) SS[j] = RR[j] - go -ge;
+
+ hh = s + prfscore(A+i+1, B+j+1);
+ if(f>hh) hh = f;
+ if(SS[j] > hh) hh = SS[j];
+
+ s = RR[j];
+ RR[j] = hh;
+
+ }
+ }
+ SS[N]=RR[N];
+
+/* find midj, such that HH[j]+RR[j] or DD[j]+SS[j]+gap is the maximum */
+
+ midh=HH[0]+RR[0];
+ midj=0;
+ type=1;
+ for(j=0;j<=N;j++) {
+ hh = HH[j] + RR[j];
+ if(hh>=midh)
+ if(hh>midh || (HH[j]!=DD[j] && RR[j]==SS[j])) {
+ midh=hh;
+ midj=j;
+ }
+ }
+
+ for(j=N;j>=0;j--) {
+ hh = DD[j] + SS[j] - go;
+ if(hh>midh) {
+ midh=hh;
+ midj=j;
+ type=2;
+ }
+ }
+}
+
+/* Conquer recursively around midpoint */
+
+
+ /*fprintf(stdout, "%d %d %d %d %d %d %d \n", A, midi, M, B, midj, N, type);*/
+
+
+ if(type==1) { /* Type 1 gaps */
+if (debug>2) fprintf(stdout,"Type 1,1: midj %d\n",(pint)midj);
+ pdiff1(A,B,midi,midj,go1,go);
+if (debug>2) fprintf(stdout,"Type 1,2: midj %d\n",(pint)midj);
+ pdiff1(A+midi,B+midj,M-midi,N-midj,go,go2);
+ }
+ else {
+if (debug>2) fprintf(stdout,"Type 2,1: midj %d\n",(pint)midj);
+ pdiff1(A,B,midi-1,midj,go1, 0);
+ pdel(2);
+if (debug>2) fprintf(stdout,"Type 2,2: midj %d\n",(pint)midj);
+ pdiff1(A+midi+1,B+midj,M-midi-1,N-midj,0,go2);
+ }
+
+ return midh; /* Return the score of the best alignment */
+}
+
+
+
diff --git a/prfalignabs.c b/prfalignabs.c
new file mode 100644
index 0000000..d27faeb
--- /dev/null
+++ b/prfalignabs.c
@@ -0,0 +1,665 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "pcma.h"
+/*#include "new.h"*/
+#define ENDALN 127
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+/*
+ * Prototypes
+ */
+static lint pdiff1(sint A,sint B,sint M,sint N,sint go1, sint go2);
+static lint absprfscore(sint set, sint n, sint m);
+static sint open_penalty1(sint i, sint j);
+static void padd(sint k);
+static void pdel(sint k);
+static void palign(void);
+static void ptracepath(sint *alen);
+static void add_ggaps(void);
+static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2);
+
+/*
+ * Global variables
+ */
+extern double **tmat;
+extern float gap_open, gap_extend;
+extern float transition_weight;
+extern sint gap_pos1, gap_pos2;
+extern sint max_aa;
+extern sint nseqs;
+extern sint *seqlen_array;
+extern sint *seq_weight;
+extern sint debug;
+extern Boolean neg_matrix;
+extern sint mat_avscore;
+extern short blosum30mt[], blosum40mt[], blosum45mt[];
+extern short blosum62mt2[], blosum80mt[];
+extern short pam20mt[], pam60mt[];
+extern short pam120mt[], pam160mt[], pam350mt[];
+extern short gon40mt[], gon80mt[];
+extern short gon120mt[], gon160mt[], gon250mt[], gon350mt[];
+extern short clustalvdnamt[],swgapdnamt[];
+extern short idmat[];
+extern short usermat[];
+extern short userdnamat[];
+extern Boolean user_series;
+extern UserMatSeries matseries;
+
+extern short def_dna_xref[],def_aa_xref[],dna_xref[],aa_xref[];
+extern sint max_aln_length;
+extern Boolean distance_tree;
+extern Boolean dnaflag;
+extern char mtrxname[];
+extern char dnamtrxname[];
+extern char **seq_array;
+extern char *amino_acid_codes;
+extern char *gap_penalty_mask1,*gap_penalty_mask2;
+extern char *sec_struct_mask1,*sec_struct_mask2;
+extern sint struct_penalties1, struct_penalties2;
+extern Boolean use_ss1, use_ss2;
+extern Boolean endgappenalties;
+
+/* JP */
+extern sint ngroups;
+extern streeptr *groupptr;
+extern void assign_node(streeptr p, sint *aligned);
+extern streeptr *grp_ancestor;
+extern sint ngroups;
+extern sint ave_grp_id;
+extern SN **** glib;
+extern sint **sets;
+extern sint cosmetic_penalty;
+static sint setnum;
+static sint go;
+static sint ge;
+
+
+
+static sint print_ptr,last_print;
+static sint *displ;
+
+static char **alignment;
+static sint *aln_len;
+static sint *aln_weight;
+static char *aln_path1, *aln_path2;
+static sint alignment_len;
+static sint **profile1, **profile2;
+static lint *HH, *DD, *RR, *SS;
+static lint *gS;
+static sint matrix[NUMRES][NUMRES];
+static sint nseqs1, nseqs2;
+static sint prf_length1, prf_length2;
+static sint *gaps;
+static sint gapcoef1,gapcoef2;
+static sint lencoef1,lencoef2;
+static Boolean switch_profiles;
+
+lint prfalignabs(sint set)
+{
+
+ static Boolean found;
+ static Boolean negative;
+ static Boolean error_given=FALSE;
+ static sint i, j, count = 0;
+ static sint NumSeq;
+ static sint len, len1, len2, is, minlen;
+ static sint se1, se2, sb1, sb2;
+ static sint maxres;
+ static sint int_scale;
+ static short *matptr;
+ static short *mat_xref;
+ static char c;
+ static lint score;
+ static float scale;
+ static double logmin,logdiff;
+ static double pcid;
+
+ setnum = set;
+ go = cosmetic_penalty; ge = 0;
+
+ nseqs1 = grp_ancestor[set]->left->abseqnum;
+ nseqs2 = grp_ancestor[set]->right->abseqnum;
+ grp_ancestor[set]->abseqnum = nseqs1 + nseqs2;
+ grp_ancestor[set]->abstractseq = ckalloc( (nseqs1 + nseqs2 + 2) * sizeof(int *));
+ /* fprintf(stdout, "nseqs1: %d nseqs2: %d abs: %d\n", nseqs1, nseqs2, grp_ancestor[set]->abseqnum); */
+
+/*
+ align the profiles
+*/
+/* use Myers and Miller to align two sequences */
+
+ last_print = 0;
+ print_ptr = 1;
+
+ sb1 = sb2 = 0;
+ se1 = grp_ancestor[set]->left->abseqlength;
+ se2 = grp_ancestor[set]->right->abseqlength;
+
+ if(debug > 1)fprintf(stdout, "se1: %d; se2: %d\n", se1, se2);
+
+ max_aln_length = se1 + se2 + 2;
+
+ aln_path1 = (char *) ckalloc( (max_aln_length+1) * sizeof(char) );
+ aln_path2 = (char *) ckalloc( (max_aln_length+1) * sizeof(char) );
+
+
+ HH = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ DD = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ RR = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ SS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ gS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+ displ = (sint *) ckalloc( (max_aln_length+1) * sizeof (sint) );
+
+if(debug > 1) {fprintf(stdout, "gap penalties: %d %d %d \n", go, ge, cosmetic_penalty);
+
+ fprintf(stdout, "SET: set\n");
+ if(set==1111) {
+ for(i=1;i<=100;i++) {
+ for(j=1;j<=100;j++) {
+ if(absprfscore(set, i,j)==0) fprintf(stdout, "-");
+ else
+ fprintf(stdout, "%d", absprfscore(set, i,j)) ;
+ }
+ fprintf(stdout, "\n");
+ }
+ }
+
+}
+
+ score = pdiff1(sb1, sb2, se1-sb1, se2-sb2, cosmetic_penalty, cosmetic_penalty);
+ if(debug>1) fprintf(stdout, "%d\n", score);
+
+
+ HH=ckfree((void *)HH);
+ DD=ckfree((void *)DD);
+ RR=ckfree((void *)RR);
+ SS=ckfree((void *)SS);
+ gS=ckfree((void *)gS);
+
+ // NEW: test the displ
+ if(displ[1]==0) {
+ displ[1] = displ[2];
+ displ[2] = 0;
+ }
+
+ ptracepath( &alignment_len);
+ /*fprintf(stdout, "print_ptr: %d\n", print_ptr);*/
+ /*for(i=0;i<=print_ptr;i++) {
+ fprintf(stdout, "%d\t%d\n", i, displ[i]);
+ }*/
+
+ displ=ckfree((void *)displ);
+
+ if(debug>1)
+ for(i=0;i<alignment_len;i++) {
+ fprintf(stdout, "path %d: %d %d\n", i, aln_path1[i], aln_path2[i]);
+ }
+
+ add_ggaps();
+
+ grp_ancestor[set]->abseqlength = alignment_len;
+
+ /*fprintf(stdout, "\n");*/
+
+ /*fprintf(stdout, "%d %d\n", grp_ancestor[set]->left->abstractseq[1][0],
+ grp_ancestor[set]->right->abstractseq[1][0]);
+ fprintf(stdout, "%d %d\n", grp_ancestor[set]->abstractseq[1][0],
+ grp_ancestor[set]->abstractseq[2][0]);
+ */
+
+ if(debug>1)printAbstract(set);
+ fflush(stdout);
+
+ fflush(stdout);
+ aln_path1=ckfree((void *)aln_path1);
+ aln_path2=ckfree((void *)aln_path2);
+ return 0;
+
+ /*exit(0);*/
+
+}
+
+static void add_ggaps(void)
+{
+ sint j;
+ sint i,ix;
+ sint len;
+ char *ta;
+
+ /*fprintf(stdout, "%d %d\n", setnum, grp_ancestor[setnum]->abseqnum);*/
+ if(debug>1)fprintf(stdout, "#########alignment length: %d\n", alignment_len);
+ /*grp_ancestor[setnum]->abstractseq = ckalloc( (nseqs1 + nseqs2 + 2) * sizeof(int *));*/
+ for(i=1;i<=grp_ancestor[setnum]->abseqnum;i++) {
+ grp_ancestor[setnum]->abstractseq[i] = ckalloc((alignment_len+3)*sizeof(sint));
+ }
+
+ for(j=1;j<=nseqs1;j++) {
+ /*fprintf(stdout, "%d ", grp_ancestor[setnum]->left->abstractseq[j][0]);*/
+ grp_ancestor[setnum]->abstractseq[j][0] = grp_ancestor[setnum]->left->abstractseq[j][0];
+ ix = 1;
+ /*for(i=0;i<alignment_len;i++) {
+ fprintf(stdout, "i: %d ; %d %d \n", i, aln_path1[i], aln_path2[i]); fflush(stdout);
+ }*/
+ for(i=0;i<alignment_len;i++) {
+ /*fprintf(stdout, "i: %d ; %d \n", i, aln_path1[i]); fflush(stdout);*/
+ if(aln_path1[i]==2) {
+ /*fprintf(stdout, "ix: %d; %d\n", ix, grp_ancestor[setnum]->left->abstractseq[j][ix]);*/
+ grp_ancestor[setnum]->abstractseq[j][i+1]=grp_ancestor[setnum]->left->abstractseq[j][ix];
+ ix++;
+ }
+ else if(aln_path1[i]==1) {
+ grp_ancestor[setnum]->abstractseq[j][i+1]==0;
+ }
+ else {
+ fprintf(stderr, "Error in aln_path: %d\n", aln_path1[i]); exit(0);
+ }
+ }
+ }
+
+ for(j=nseqs1+1;j<=nseqs1+nseqs2;j++) {
+ grp_ancestor[setnum]->abstractseq[j][0]=grp_ancestor[setnum]->right->abstractseq[j-nseqs1][0];
+ ix = 1;
+ for(i=0;i<alignment_len;i++) {
+ if(aln_path2[i]==2) {
+ grp_ancestor[setnum]->abstractseq[j][i+1]=grp_ancestor[setnum]->right->abstractseq[j-nseqs1][ix];
+ ix++;
+ }
+ else if(aln_path2[i]==1) {
+ grp_ancestor[setnum]->abstractseq[j][i+1]==0;
+ }
+ else {
+ fprintf(stderr, "Error in aln path: %d\n", aln_path2[i]); exit(0);
+ }
+ }
+ }
+
+ for(i=1;i<=nseqs1;i++)
+ ckfree(grp_ancestor[setnum]->left->abstractseq[i]);
+ for(j=1;j<=nseqs2;j++)
+ ckfree(grp_ancestor[setnum]->right->abstractseq[j]);
+ ckfree(grp_ancestor[setnum]->left->abstractseq);
+ ckfree(grp_ancestor[setnum]->right->abstractseq);
+}
+
+static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2)
+{
+ int i,ix;
+ char *ta;
+
+ ta = (char *) ckalloc( (len+1) * sizeof (char) );
+
+ ix = 0;
+ if (switch_profiles == FALSE)
+ {
+ for (i=0;i<len;i++)
+ {
+ if (path1[i] == 2)
+ {
+ ta[i] = mask[ix];
+ ix++;
+ }
+ else if (path1[i] == 1)
+ ta[i] = gap_pos1;
+ }
+ }
+ else
+ {
+ for (i=0;i<len;i++)
+ {
+ if (path2[i] == 2)
+ {
+ ta[i] = mask[ix];
+ ix++;
+ }
+ else if (path2[i] == 1)
+ ta[i] = gap_pos1;
+ }
+ }
+ mask = (char *)realloc(mask,(len+2) * sizeof (char));
+ for (i=0;i<len;i++)
+ mask[i] = ta[i];
+ mask[i] ='\0';
+
+ ta=ckfree((void *)ta);
+
+ return(mask);
+}
+
+/*static lint absprfscore(sint set, sint n, sint m)
+{
+ sint ix;
+ lint score;
+ sint i,j,k;
+ sint gid1, gid2;
+ SN *nd;
+
+ score = 0;
+ for(i=1;i<=nseqs1;i++) {
+ if(grp_ancestor[set]->left->abstractseq[i][n]==0) continue;
+ gid1 = grp_ancestor[set]->left->abstractseq[i][0];
+ for(j=1;j<=nseqs2;j++) {
+ if(grp_ancestor[set]->right->abstractseq[j][m]==0) continue;
+ gid2 = grp_ancestor[set]->right->abstractseq[j][0];
+ nd = glib[gid1][gid2][grp_ancestor[set]->left->abstractseq[i][n]];
+ while(nd!=NULL) {
+ if(nd->ind == grp_ancestor[set]->right->abstractseq[j][m]) {
+ score += nd->sae;
+ break;
+ }
+ else nd = nd->next;
+ }
+ }
+ }
+
+ return(score);
+}
+*/
+
+
+static lint absprfscore(sint set, sint n, sint m)
+{
+ sint ix;
+ lint score;
+ sint i,j,k;
+ sint gid1, gid2;
+ SN *nd;
+
+ score = 0;
+ for(i=1;i<=nseqs1;i++) {
+ if(grp_ancestor[set]->left->abstractseq[i][n]==0) continue;
+ gid1 = grp_ancestor[set]->left->abstractseq[i][0];
+ for(j=1;j<=nseqs2;j++) {
+ if(grp_ancestor[set]->right->abstractseq[j][m]==0) continue;
+ gid2 = grp_ancestor[set]->right->abstractseq[j][0];
+ if(gid1<gid2) {
+ nd = glib[gid1][gid2][grp_ancestor[set]->left->abstractseq[i][n]];
+ while(nd!=NULL) {
+ if(nd->ind == grp_ancestor[set]->right->abstractseq[j][m]) {
+ score += nd->sae;
+ break;
+ }
+ else nd = nd->next;
+ }
+ }
+
+ else {
+ nd = glib[gid2][gid1][grp_ancestor[set]->right->abstractseq[j][m]];
+ while(nd!=NULL) {
+ if(nd->ind == grp_ancestor[set]->left->abstractseq[i][n]) {
+ score += nd->sae;
+ break;
+ }
+ else nd = nd->next;
+ }
+ }
+ }
+ }
+
+ return(score);
+}
+
+
+static void ptracepath(sint *alen)
+{
+ sint i,j,k,pos,to_do;
+
+ pos = 0;
+
+ to_do=print_ptr-1;
+
+ for(i=1;i<=to_do;++i) {
+if (debug>1) fprintf(stdout,"%d ",(pint)displ[i]);
+ if(displ[i]==0) {
+ aln_path1[pos]=2;
+ aln_path2[pos]=2;
+ ++pos;
+ }
+ else {
+ if((k=displ[i])>0) {
+ for(j=0;j<=k-1;++j) {
+ aln_path2[pos+j]=2;
+ aln_path1[pos+j]=1;
+ }
+ pos += k;
+ }
+ else {
+ k = (displ[i]<0) ? displ[i] * -1 : displ[i];
+ for(j=0;j<=k-1;++j) {
+ aln_path1[pos+j]=2;
+ aln_path2[pos+j]=1;
+ }
+ pos += k;
+ }
+ }
+ }
+if (debug>1) fprintf(stdout,"\n");
+
+ (*alen) = pos;
+
+}
+
+static void pdel(sint k)
+{
+ if(last_print<0)
+ last_print = displ[print_ptr-1] -= k;
+ else
+ last_print = displ[print_ptr++] = -(k);
+}
+
+static void padd(sint k)
+{
+
+ if(last_print<0) {
+ displ[print_ptr-1] = k;
+ displ[print_ptr++] = last_print;
+ }
+ else
+ last_print = displ[print_ptr++] = k;
+}
+
+static void palign(void)
+{
+ displ[print_ptr++] = last_print = 0;
+}
+
+
+static lint pdiff1(sint A,sint B,sint M,sint N,sint go1, sint go2)
+{
+ sint midi,midj,type;
+ lint midh;
+
+ static lint t, tl, g, h;
+
+{ static sint i,j;
+ static lint hh, f, e, s;
+
+/* Boundary cases: M <= 1 or N == 0 */
+if (debug>2) fprintf(stdout,"A %d B %d M %d N %d midi %d go1 %d go2 %d\n",
+(pint)A,(pint)B,(pint)M,(pint)N,(pint)M/2,(pint)go1,(pint)go2);
+
+/* if sequence B is empty.... */
+
+ if(N<=0) {
+
+/* if sequence A is not empty.... */
+
+ if(M>0) {
+
+/* delete residues A[1] to A[M] */
+
+ pdel(M);
+ }
+ return (0);
+ /*return(-gap_penalty1(A,B,M)); */
+ }
+
+/* if sequence A is empty.... */
+
+ if(M<=1) {
+ if(M<=0) {
+
+/* insert residues B[1] to B[N] */
+
+ padd(N);
+ return( 0 );
+ /* return(-gap_penalty2(A,B,N)); */
+ }
+
+/* if sequence A has just one residue.... */
+
+ if (go1 == 0)
+ /*midh = -gap_penalty1(A+1,B+1,N); */
+ midh = -(ge + go + N*ge);
+ else
+ midh = -(go2 + ge + go + N*ge);
+ /*midh = -gap_penalty2(A+1,B,1)-gap_penalty1(A+1,B+1,N); */
+
+ midj = 0;
+ for(j=1;j<=N;j++) {
+ /*hh = -gap_penalty1(A,B+1,j-1) + prfscore(A+1,B+j) -gap_penalty1(A+1,B+j+1,N-j); */
+ if(j>1) hh = -(go+(j-1)*ge) + absprfscore(setnum, A+1, B+j);
+ else hh = absprfscore(setnum, A+1, B+j);
+ if(j<N) hh += -(go+(N-j)*ge);
+ /* hh = -(go+(j-1)*ge) + absprfscore(setnum, A+1, B+j) -(go+(N-j)*ge); */
+ if(hh>midh) {
+ midh = hh;
+ midj = j;
+ }
+ }
+
+ if(midj==0) {
+ padd(N);
+ pdel(1);
+ }
+ else {
+ if(midj>1) padd(midj-1);
+ palign();
+ if(midj<N) padd(N-midj);
+ }
+ return midh;
+ }
+
+
+/* Divide sequence A in half: midi */
+
+ midi = M / 2;
+
+/* In a forward phase, calculate all HH[j] and HH[j] */
+
+ HH[0] = 0.0;
+ t = -go; tl = -ge;
+ for(j=1;j<=N;j++) {
+ HH[j] = t = t-ge;
+ DD[j] = t-go;
+ }
+
+ t = -go1;
+ for(i=1;i<=midi;i++) {
+ s = HH[0];
+ HH[0] = hh = t = t-ge;
+ f = t-go;
+
+ for(j=1;j<=N;j++) {
+
+ f = f - ge;
+ if( (hh - go -ge) > f ) f = hh -go - ge;
+ DD[j] = DD[j] - ge;
+ if(DD[j] < HH[j] - go -ge) DD[j] = HH[j] -go - ge;
+ hh = s + absprfscore(setnum, A+i, B+j);
+ if(f>hh) hh = f;
+ if(DD[j]>hh) hh = DD[j];
+
+ s = HH[j];
+ HH[j] = hh;
+
+
+ }
+ }
+
+ DD[0]=HH[0];
+
+/* In a reverse phase, calculate all RR[j] and SS[j] */
+
+ RR[N]=0.0;
+ t = -go;
+ for(j=N-1;j>=0;j--) {
+ RR[j] = t = t -ge;
+ SS[j] = t -go;
+ }
+
+ t = -go2;
+ for(i=M-1;i>=midi;i--) {
+ s = RR[N];
+ RR[N] = hh = t = t - ge;
+ f = t - go;
+
+ for(j=N-1;j>=0;j--) {
+
+ f = f - ge;
+ if(hh-go-ge > f) f = hh -go -ge;
+ SS[j] = SS[j] - ge;
+ if(RR[j] - go -ge > SS[j] ) SS[j] = RR[j] - go -ge;
+
+ hh = s + absprfscore(setnum, A+i+1, B+j+1);
+ if(f>hh) hh = f;
+ if(SS[j] > hh) hh = SS[j];
+
+ s = RR[j];
+ RR[j] = hh;
+
+ }
+ }
+ SS[N]=RR[N];
+
+/* find midj, such that HH[j]+RR[j] or DD[j]+SS[j]+gap is the maximum */
+
+ midh=HH[0]+RR[0];
+ midj=0;
+ type=1;
+ for(j=0;j<=N;j++) {
+ hh = HH[j] + RR[j];
+ if(hh>=midh)
+ if(hh>midh || (HH[j]!=DD[j] && RR[j]==SS[j])) {
+ midh=hh;
+ midj=j;
+ }
+ }
+
+ for(j=N;j>=0;j--) {
+ hh = DD[j] + SS[j] - go;
+ if(hh>midh) {
+ midh=hh;
+ midj=j;
+ type=2;
+ }
+ }
+}
+
+/* Conquer recursively around midpoint */
+
+
+ /*fprintf(stdout, "%d %d %d %d %d %d %d \n", A, midi, M, B, midj, N, type);*/
+
+
+ if(type==1) { /* Type 1 gaps */
+if (debug>2) fprintf(stdout,"Type 1,1: midj %d\n",(pint)midj);
+ pdiff1(A,B,midi,midj,go1,go);
+if (debug>2) fprintf(stdout,"Type 1,2: midj %d\n",(pint)midj);
+ pdiff1(A+midi,B+midj,M-midi,N-midj,go,go2);
+ }
+ else {
+if (debug>2) fprintf(stdout,"Type 2,1: midj %d\n",(pint)midj);
+ pdiff1(A,B,midi-1,midj,go1, 0);
+ pdel(2);
+if (debug>2) fprintf(stdout,"Type 2,2: midj %d\n",(pint)midj);
+ pdiff1(A+midi+1,B+midj,M-midi-1,N-midj,0,go2);
+ }
+
+ return midh; /* Return the score of the best alignment */
+}
+
+
diff --git a/random.c b/random.c
new file mode 100644
index 0000000..c2bbc51
--- /dev/null
+++ b/random.c
@@ -0,0 +1,81 @@
+/*
+*
+* Rand.c
+*
+* - linear and additive congruential random number generators
+* (see R. Sedgewick, Algorithms, Chapter 35)
+*
+* Implementation: R. Fuchs, EMBL Data Library, 1991
+*
+*/
+#include <stdio.h>
+
+unsigned long linrand(unsigned long r);
+unsigned long addrand(unsigned long r);
+void addrandinit(unsigned long s);
+
+static unsigned long mult(unsigned long p,unsigned long q);
+
+
+#define m1 10000
+#define m 100000000
+
+static unsigned long mult(unsigned long p, unsigned long q);
+
+/* linear congruential method
+*
+* linrand() returns an unsigned long random number in the range 0 to r-1
+*/
+
+
+unsigned long linrand(unsigned long r)
+{
+ static unsigned long a=1234567;
+
+ a = (mult(a,31415821)+1) % m;
+ return( ( (a / m1) * r) / m1 );
+}
+
+static unsigned long mult(unsigned long p, unsigned long q)
+{
+ unsigned long p1,p0,q1,q0;
+
+ p1 = p/m1; p0 = p % m1;
+ q1 = q/m1; q0 = q % m1;
+ return((((p0*q1 + p1*q0) % m1) * m1 + p0*q0) % m);
+}
+
+
+/* additive congruential method
+*
+* addrand() returns an unsigned long random number in the range 0 to r-1
+* The random number generator is initialized by addrandinit()
+*/
+
+static unsigned long j;
+static unsigned long a[55];
+
+unsigned long addrand(unsigned long r)
+{
+int x,y;
+/* fprintf(stdout,"\n j = %d",j); */
+ j = (j + 1) % 55;
+/* fprintf(stdout,"\n j = %d",j); */
+ x = (j+23)%55;
+ y = (j+54)%55;
+ a[j] = (a[x] + a[y]) % m;
+/* a[j] = (a[(j+23)%55] + a[(j+54)%55]) % m; */
+/* fprintf(stdout,"\n a[j] = %d",a[j]); */
+ return( ((a[j] / m1) * r) / m1 );
+}
+
+void addrandinit(unsigned long s)
+{
+ a[0] = s;
+ j = 0;
+ do {
+ ++j;
+ a[j] = (mult(31,a[j-1]) + 1) % m;
+ } while (j<54);
+}
+
diff --git a/readmat.c b/readmat.c
new file mode 100644
index 0000000..05a67df
--- /dev/null
+++ b/readmat.c
@@ -0,0 +1,477 @@
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "pcma.h"
+#include "matrices.h"
+
+
+/*
+ * Prototypes
+ */
+static Boolean commentline(char *line);
+
+
+/*
+ * Global variables
+ */
+
+extern char *amino_acid_codes;
+extern sint gap_pos1, gap_pos2;
+extern sint max_aa;
+extern short def_dna_xref[],def_aa_xref[];
+extern sint mat_avscore;
+extern sint debug;
+extern Boolean dnaflag;
+
+extern Boolean user_series;
+extern UserMatSeries matseries;
+extern short usermatseries[MAXMAT][NUMRES][NUMRES];
+extern short aa_xrefseries[MAXMAT][NUMRES+1];
+
+
+void init_matrix(void)
+{
+
+ char c1,c2;
+ short i, j, maxres;
+
+ max_aa = strlen(amino_acid_codes)-2;
+ gap_pos1 = NUMRES-2; /* code for gaps inserted by clustalw */
+ gap_pos2 = NUMRES-1; /* code for gaps already in alignment */
+
+/*
+ set up cross-reference for default matrices hard-coded in matrices.h
+*/
+ for (i=0;i<NUMRES;i++) def_aa_xref[i] = -1;
+ for (i=0;i<NUMRES;i++) def_dna_xref[i] = -1;
+
+ maxres = 0;
+ for (i=0;(c1=amino_acid_order[i]);i++)
+ {
+ for (j=0;(c2=amino_acid_codes[j]);j++)
+ {
+ if (c1 == c2)
+ {
+ def_aa_xref[i] = j;
+ maxres++;
+ break;
+ }
+ }
+ if ((def_aa_xref[i] == -1) && (amino_acid_order[i] != '*'))
+ {
+ error("residue %c in matrices.h is not recognised",
+ amino_acid_order[i]);
+ }
+ }
+
+ maxres = 0;
+ for (i=0;(c1=nucleic_acid_order[i]);i++)
+ {
+ for (j=0;(c2=amino_acid_codes[j]);j++)
+ {
+ if (c1 == c2)
+ {
+ def_dna_xref[i] = j;
+ maxres++;
+ break;
+ }
+ }
+ if ((def_dna_xref[i] == -1) && (nucleic_acid_order[i] != '*'))
+ {
+ error("nucleic acid %c in matrices.h is not recognised",
+ nucleic_acid_order[i]);
+ }
+ }
+}
+
+sint get_matrix(short *matptr, short *xref, sint matrix[NUMRES][NUMRES], Boolean neg_flag, sint scale)
+{
+ sint gg_score = 0;
+ sint gr_score = 0;
+ sint i, j, k, ix = 0;
+ sint ti, tj;
+ sint maxres;
+ sint av1,av2,av3,min, max;
+/*
+ default - set all scores to 0
+*/
+ for (i=0;i<=max_aa;i++)
+ for (j=0;j<=max_aa;j++)
+ matrix[i][j] = 0;
+
+ ix = 0;
+ maxres = 0;
+ for (i=0;i<=max_aa;i++)
+ {
+ ti = xref[i];
+ for (j=0;j<=i;j++)
+ {
+ tj = xref[j];
+ if ((ti != -1) && (tj != -1))
+ {
+ k = matptr[ix];
+ if (ti==tj)
+ {
+ matrix[ti][ti] = k * scale;
+ maxres++;
+ }
+ else
+ {
+ matrix[ti][tj] = k * scale;
+ matrix[tj][ti] = k * scale;
+ }
+ ix++;
+ }
+ }
+ }
+
+ --maxres;
+
+ av1 = av2 = av3 = 0;
+ for (i=0;i<=max_aa;i++)
+ {
+ for (j=0;j<=i;j++)
+ {
+ av1 += matrix[i][j];
+ if (i==j)
+ {
+ av2 += matrix[i][j];
+ }
+ else
+ {
+ av3 += matrix[i][j];
+ }
+ }
+ }
+
+ av1 /= (maxres*maxres)/2;
+ av2 /= maxres;
+ av3 /= ((float)(maxres*maxres-maxres))/2;
+ mat_avscore = -av3;
+
+ min = max = matrix[0][0];
+ for (i=0;i<=max_aa;i++)
+ for (j=1;j<=i;j++)
+ {
+ if (matrix[i][j] < min) min = matrix[i][j];
+ if (matrix[i][j] > max) max = matrix[i][j];
+ }
+
+if (debug>1) fprintf(stdout,"maxres %d\n",(pint)max_aa);
+if (debug>1) fprintf(stdout,"average mismatch score %d\n",(pint)av3);
+if (debug>1) fprintf(stdout,"average match score %d\n",(pint)av2);
+if (debug>1) fprintf(stdout,"average score %d\n",(pint)av1);
+
+/*
+ if requested, make a positive matrix - add -(lowest score) to every entry
+*/
+ if (neg_flag == FALSE)
+ {
+
+if (debug>1) fprintf(stdout,"min %d max %d\n",(pint)min,(pint)max);
+ if (min < 0)
+ {
+ for (i=0;i<=max_aa;i++)
+ {
+ ti = xref[i];
+ if (ti != -1)
+ {
+ for (j=0;j<=max_aa;j++)
+ {
+ tj = xref[j];
+/*
+ if (tj != -1) matrix[ti][tj] -= (2*av3);
+*/
+ if (tj != -1) matrix[ti][tj] -= min;
+ }
+ }
+ }
+ }
+/*
+ gr_score = av3;
+ gg_score = -av3;
+*/
+
+ }
+
+
+
+ for (i=0;i<gap_pos1;i++)
+ {
+ matrix[i][gap_pos1] = gr_score;
+ matrix[gap_pos1][i] = gr_score;
+ matrix[i][gap_pos2] = gr_score;
+ matrix[gap_pos2][i] = gr_score;
+ }
+ matrix[gap_pos1][gap_pos1] = gg_score;
+ matrix[gap_pos2][gap_pos2] = gg_score;
+ matrix[gap_pos2][gap_pos1] = gg_score;
+ matrix[gap_pos1][gap_pos2] = gg_score;
+
+ maxres += 2;
+
+ return(maxres);
+}
+
+
+sint read_matrix_series(char *filename, short *usermat, short *xref)
+{
+ FILE *fd = NULL, *matfd = NULL;
+ char mat_filename[FILENAMELEN];
+ char inline1[1024];
+ sint maxres = 0;
+ sint nmat;
+ sint n,llimit,ulimit;
+
+ if (filename[0] == '\0')
+ {
+ error("comparison matrix not specified");
+ return((sint)0);
+ }
+ if ((fd=fopen(filename,"r"))==NULL)
+ {
+ error("cannot open %s", filename);
+ return((sint)0);
+ }
+
+/* check the first line to see if it's a series or a single matrix */
+ while (fgets(inline1,1024,fd) != NULL)
+ {
+ if (commentline(inline1)) continue;
+ if(linetype(inline1,"CLUSTAL_SERIES"))
+ user_series=TRUE;
+ else
+ user_series=FALSE;
+ break;
+ }
+
+/* it's a single matrix */
+ if(user_series == FALSE)
+ {
+ fclose(fd);
+ maxres=read_user_matrix(filename,usermat,xref);
+ return(maxres);
+ }
+
+/* it's a series of matrices, find the next MATRIX line */
+ nmat=0;
+ matseries.nmat=0;
+ while (fgets(inline1,1024,fd) != NULL)
+ {
+ if (commentline(inline1)) continue;
+ if(linetype(inline1,"MATRIX"))
+ {
+ if(sscanf(inline1+6,"%d %d %s",&llimit,&ulimit,mat_filename)!=3)
+ {
+ error("Bad format in file %s\n",filename);
+ fclose(fd);
+ return((sint)0);
+ }
+ if(llimit<0 || llimit > 100 || ulimit <0 || ulimit>100)
+ {
+ error("Bad format in file %s\n",filename);
+ fclose(fd);
+ return((sint)0);
+ }
+ if(ulimit<=llimit)
+ {
+ error("in file %s: lower limit is greater than upper (%d-%d)\n",filename,llimit,ulimit);
+ fclose(fd);
+ return((sint)0);
+ }
+ n=read_user_matrix(mat_filename,&usermatseries[nmat][0][0],&aa_xrefseries[nmat][0]);
+ if(n<=0)
+ {
+ error("Bad format in matrix file %s\n",mat_filename);
+ fclose(fd);
+ return((sint)0);
+ }
+ matseries.mat[nmat].llimit=llimit;
+ matseries.mat[nmat].ulimit=ulimit;
+ matseries.mat[nmat].matptr=&usermatseries[nmat][0][0];
+ matseries.mat[nmat].aa_xref=&aa_xrefseries[nmat][0];
+ nmat++;
+ }
+ }
+ fclose(fd);
+ matseries.nmat=nmat;
+
+ maxres=n;
+ return(maxres);
+
+}
+
+sint read_user_matrix(char *filename, short *usermat, short *xref)
+{
+ double f;
+ FILE *fd;
+ sint numargs,farg;
+ sint i, j, k = 0;
+ char codes[NUMRES];
+ char inline1[1024];
+ char *args[NUMRES+4];
+ char c1,c2;
+ sint ix1, ix = 0;
+ sint maxres = 0;
+ float scale;
+
+ if (filename[0] == '\0')
+ {
+ error("comparison matrix not specified");
+ return((sint)0);
+ }
+
+ if ((fd=fopen(filename,"r"))==NULL)
+ {
+ error("cannot open %s", filename);
+ return((sint)0);
+ }
+ maxres = 0;
+ while (fgets(inline1,1024,fd) != NULL)
+ {
+ if (commentline(inline1)) continue;
+ if(linetype(inline1,"CLUSTAL_SERIES"))
+ {
+ error("in %s - single matrix expected.", filename);
+ fclose(fd);
+ return((sint)0);
+ }
+/*
+ read residue characters.
+*/
+ k = 0;
+ for (j=0;j<strlen(inline1);j++)
+ {
+ if (isalpha((int)inline1[j])) codes[k++] = inline1[j];
+ if (k>NUMRES)
+ {
+ error("too many entries in matrix %s",filename);
+ fclose(fd);
+ return((sint)0);
+ }
+ }
+ codes[k] = '\0';
+ break;
+ }
+
+ if (k == 0)
+ {
+ error("wrong format in matrix %s",filename);
+ fclose(fd);
+ return((sint)0);
+ }
+
+/*
+ cross-reference the residues
+*/
+ for (i=0;i<NUMRES;i++) xref[i] = -1;
+
+ maxres = 0;
+ for (i=0;(c1=codes[i]);i++)
+ {
+ for (j=0;(c2=amino_acid_codes[j]);j++)
+ if (c1 == c2)
+ {
+ xref[i] = j;
+ maxres++;
+ break;
+ }
+ if ((xref[i] == -1) && (codes[i] != '*'))
+ {
+ warning("residue %c in matrix %s not recognised",
+ codes[i],filename);
+ }
+ }
+
+
+/*
+ get the weights
+*/
+
+ ix = ix1 = 0;
+ while (fgets(inline1,1024,fd) != NULL)
+ {
+ if (inline1[0] == '\n') continue;
+ if (inline1[0] == '#' ||
+ inline1[0] == '!') break;
+ numargs = getargs(inline1, args, (int)(k+1));
+ if (numargs < maxres)
+ {
+ error("wrong format in matrix %s",filename);
+ fclose(fd);
+ return((sint)0);
+ }
+ if (isalpha(args[0][0])) farg=1;
+ else farg=0;
+
+/* decide whether the matrix values are float or decimal */
+ scale=1.0;
+ for(i=0;i<strlen(args[farg]);i++)
+ if(args[farg][i]=='.')
+ {
+/* we've found a float value */
+ scale=10.0;
+ break;
+ }
+
+ for (i=0;i<=ix;i++)
+ {
+ if (xref[i] != -1)
+ {
+ f = atof(args[i+farg]);
+ usermat[ix1++] = (short)(f*scale);
+ }
+ }
+ ix++;
+ }
+ if (ix != k+1)
+ {
+ error("wrong format in matrix %s",filename);
+ fclose(fd);
+ return((sint)0);
+ }
+
+
+ maxres += 2;
+ fclose(fd);
+
+ return(maxres);
+}
+
+int getargs(char *inline1,char *args[],int max)
+{
+
+ char *inptr;
+/*
+#ifndef MAC
+ char *strtok(char *s1, const char *s2);
+#endif
+*/
+ int i;
+
+ inptr=inline1;
+ for (i=0;i<=max;i++)
+ {
+ if ((args[i]=strtok(inptr," \t\n"))==NULL)
+ break;
+ inptr=NULL;
+ }
+
+ return(i);
+}
+
+
+static Boolean commentline(char *line)
+{
+ int i;
+
+ if(line[0] == '#') return TRUE;
+ for(i=0;line[i]!='\n' && line[i]!=EOS;i++) {
+ if(!isspace(line[i]))
+ return FALSE;
+ }
+ return TRUE;
+}
+
diff --git a/sequence.c b/sequence.c
new file mode 100644
index 0000000..d815936
--- /dev/null
+++ b/sequence.c
@@ -0,0 +1,1705 @@
+/********* Sequence input routines for CLUSTAL W *******************/
+/* DES was here. FEB. 1994 */
+/* Now reads PILEUP/MSF and CLUSTAL alignment files */
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include "pcma.h"
+
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+
+
+/*
+* Prototypes
+*/
+
+static char * get_seq(char *,sint *,char *);
+static char * get_clustal_seq(char *,sint *,char *,sint);
+static char * get_msf_seq(char *,sint *,char *,sint);
+static void check_infile(sint *);
+static void p_encode(char *, char *, sint);
+static void n_encode(char *, char *, sint);
+/*JP: res_index made global */
+sint res_index(char *,char);
+static Boolean check_dnaflag(char *, sint);
+static sint count_clustal_seqs(void);
+static sint count_pir_seqs(void);
+static sint count_msf_seqs(void);
+static sint count_rsf_seqs(void);
+static void get_swiss_feature(char *line,sint len);
+static void get_rsf_feature(char *line,sint len);
+static void get_swiss_mask(char *line,sint len);
+static void get_clustal_ss(sint length);
+static void get_embl_ss(sint length);
+static void get_rsf_ss(sint length);
+static void get_gde_ss(sint length);
+static Boolean cl_blankline(char *line);
+
+
+/*
+ * Global variables
+ */
+extern sint max_names;
+FILE *fin;
+extern Boolean usemenu, dnaflag, explicit_dnaflag;
+extern Boolean interactive;
+extern char seqname[];
+extern sint nseqs;
+extern sint *seqlen_array;
+extern sint *output_index;
+extern char **names,**titles;
+extern char **seq_array;
+extern Boolean profile1_empty, profile2_empty;
+extern sint gap_pos2;
+extern sint max_aln_length;
+extern char *gap_penalty_mask, *sec_struct_mask;
+extern sint struct_penalties;
+extern char *ss_name;
+extern sint profile_no;
+extern sint debug;
+
+char *amino_acid_codes = "ABCDEFGHIKLMNPQRSTUVWXYZ-"; /* DES */
+sint seqFormat;
+/* static sint seqFormat; */
+static char chartab[128];
+static char *formatNames[] = {"unknown","EMBL/Swiss-Prot","PIR",
+ "Pearson","GDE","Clustal","Pileup/MSF","RSF","USER","PHYLIP","NEXUS"};
+
+/* JP: for input files being the list of alignments */
+char **clusfilelist;
+int *seqnumlist;
+int filecount;
+int *consensus_index;
+int nseqs_all;
+static sint count_clustalist_seqs(void);
+static void retrieve_file_names();
+int get_consensus_index(int pb, int pe);
+int *seqlen_array_all;
+char **seq_array_all; /* for all the sequences */
+char **names_all;
+void alloc_aln_all(sint nseqs);
+void alloc_seq_all(sint seq_no,sint length);
+static void copy_seq(char *seq, char *naseq, sint l);
+
+void fill_chartab(void) /* Create translation and check table */
+{
+ register sint i;
+ register char c;
+
+ for(i=0;i<128;chartab[i++]=0);
+ for(i=0;(c=amino_acid_codes[i]);i++)
+ chartab[(int)c]=chartab[tolower(c)]=c;
+}
+
+static char * get_msf_seq(char *sname,sint *len,char *tit,sint seqno)
+/* read the seqno_th. sequence from a PILEUP multiple alignment file */
+{
+ static char line[MAXLINE+1];
+ char *seq = NULL;
+ sint i,j,k;
+ unsigned char c;
+
+ fseek(fin,0,0); /* start at the beginning */
+
+ *len=0; /* initialise length to zero */
+ for(i=0;;i++) {
+ if(fgets(line,MAXLINE+1,fin)==NULL) return NULL; /* read the title*/
+ if(linetype(line,"//") ) break; /* lines...ignore*/
+ }
+
+ while (fgets(line,MAXLINE+1,fin) != NULL) {
+ if(!blankline(line)) {
+
+ for(i=1;i<seqno;i++) fgets(line,MAXLINE+1,fin);
+ for(j=0;j<=strlen(line);j++) if(line[j] != ' ') break;
+ for(k=j;k<=strlen(line);k++) if(line[k] == ' ') break;
+ strncpy(sname,line+j,MIN(MAXNAMES,k-j));
+ sname[MIN(MAXNAMES,k-j)]=EOS;
+ rtrim(sname);
+ blank_to_(sname);
+
+ if(seq==NULL)
+ seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+ else
+ seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+ for(i=k;i<=MAXLINE;i++) {
+ c=line[i];
+ if(c == '.' || c == '~' ) c = '-';
+ if(c == '*') c = 'X';
+ if(c == '\n' || c == EOS) break; /* EOL */
+ c=chartab[c];
+ if(c) seq[++(*len)]=c;
+ }
+
+ for(i=0;;i++) {
+ if(fgets(line,MAXLINE+1,fin)==NULL) return seq;
+ if(blankline(line)) break;
+ }
+ }
+ }
+ return seq;
+}
+
+static Boolean cl_blankline(char *line)
+{
+ int i;
+
+ if (line[0] == '!') return TRUE;
+
+ for(i=0;line[i]!='\n' && line[i]!=EOS;i++) {
+ if( isdigit(line[i]) ||
+ isspace(line[i]) ||
+ (line[i] == '*') ||
+ (line[i] == ':') ||
+ (line[i] == '.'))
+ ;
+ else
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static char * get_clustal_seq(char *sname,sint *len,char *tit,sint seqno)
+/* read the seqno_th. sequence from a clustal multiple alignment file */
+{
+ static char line[MAXLINE+1];
+ static char tseq[MAXLINE+1];
+ char *seq = NULL;
+ sint i,j;
+ unsigned char c;
+
+ fseek(fin,0,0); /* start at the beginning */
+
+ *len=0; /* initialise length to zero */
+ fgets(line,MAXLINE+1,fin); /* read the title line...ignore it */
+
+ while (fgets(line,MAXLINE+1,fin) != NULL) {
+ if(!cl_blankline(line)) {
+
+ for(i=1;i<seqno;i++) fgets(line,MAXLINE+1,fin);
+ for(j=0;j<=strlen(line);j++) if(line[j] != ' ') break;
+
+ sscanf(line,"%s%s",sname,tseq);
+ for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;
+ sname[j]=EOS;
+ rtrim(sname);
+ blank_to_(sname);
+
+ if(seq==NULL)
+ seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+ else
+ seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+ for(i=0;i<=MAXLINE;i++) {
+ c=tseq[i];
+ /*if(c == '\n' || c == EOS) break;*/ /* EOL */
+ if(isspace(c) || c == EOS) break; /* EOL */
+ c=chartab[c];
+ if(c) seq[++(*len)]=c;
+ }
+
+ for(i=0;;i++) {
+ if(fgets(line,MAXLINE+1,fin)==NULL) return seq;
+ if(cl_blankline(line)) break;
+ }
+ }
+ }
+
+ return seq;
+}
+
+/* JP: add the option to read file that contains a list of file names that are
+ in clustalw format alignments
+*/
+
+static char * get_clustal_list(char *sname,sint *len,char *tit,sint seqno)
+/* read the seqno_th. sequence from a clustal multiple alignment file */
+{
+ static char line[MAXLINE+1];
+ static char tseq[MAXLINE+1];
+ char *seq = NULL;
+ sint i,j;
+ unsigned char c;
+
+ fseek(fin,0,0); /* start at the beginning */
+
+ *len=0; /* initialise length to zero */
+ fgets(line,MAXLINE+1,fin); /* read the title line...ignore it */
+
+ while (fgets(line,MAXLINE+1,fin) != NULL) {
+ if(!cl_blankline(line)) {
+
+ for(i=1;i<seqno;i++) fgets(line,MAXLINE+1,fin);
+ for(j=0;j<=strlen(line);j++) if(line[j] != ' ') break;
+
+ sscanf(line,"%s%s",sname,tseq);
+ for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;
+ sname[j]=EOS;
+ rtrim(sname);
+ blank_to_(sname);
+
+ if(seq==NULL)
+ seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+ else
+ seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+ for(i=0;i<=MAXLINE;i++) {
+ c=tseq[i];
+ /*if(c == '\n' || c == EOS) break;*/ /* EOL */
+ if(isspace(c) || c == EOS) break; /* EOL */
+ c=chartab[c];
+ if(c) seq[++(*len)]=c;
+ }
+
+ for(i=0;;i++) {
+ if(fgets(line,MAXLINE+1,fin)==NULL) return seq;
+ if(cl_blankline(line)) break;
+ }
+ }
+ }
+ return seq;
+}
+
+static void get_clustal_ss(sint length)
+/* read the structure data from a clustal multiple alignment file */
+{
+ static char title[MAXLINE+1];
+ static char line[MAXLINE+1];
+ static char lin2[MAXLINE+1];
+ static char tseq[MAXLINE+1];
+ static char sname[MAXNAMES+1];
+ sint i,j,len,ix,struct_index=0;
+ char c;
+
+
+ fseek(fin,0,0); /* start at the beginning */
+
+ len=0; /* initialise length to zero */
+ if (fgets(line,MAXLINE+1,fin) == NULL) return; /* read the title line...ignore it */
+
+ if (fgets(line,MAXLINE+1,fin) == NULL) return; /* read the next line... */
+/* skip any blank lines */
+ for (;;) {
+ if(fgets(line,MAXLINE+1,fin)==NULL) return;
+ if(!blankline(line)) break;
+ }
+
+/* look for structure table lines */
+ ix = -1;
+ for(;;) {
+ if(line[0] != '!') break;
+ if(strncmp(line,"!SS",3) == 0) {
+ ix++;
+ sscanf(line+4,"%s%s",sname,tseq);
+ for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;
+ sname[j]=EOS;
+ rtrim(sname);
+ blank_to_(sname);
+ if (interactive) {
+ strcpy(title,"Found secondary structure in alignment file: ");
+ strcat(title,sname);
+ (*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+ }
+ else (*lin2) = 'y';
+ if ((*lin2 != 'n') && (*lin2 != 'N')) {
+ struct_penalties = SECST;
+ struct_index = ix;
+ for (i=0;i<length;i++)
+ {
+ sec_struct_mask[i] = '.';
+ gap_penalty_mask[i] = '.';
+ }
+ strcpy(ss_name,sname);
+ for(i=0;len < length;i++) {
+ c = tseq[i];
+ if(c == '\n' || c == EOS) break; /* EOL */
+ if (!isspace(c)) sec_struct_mask[len++] = c;
+ }
+ }
+ }
+ else if(strncmp(line,"!GM",3) == 0) {
+ ix++;
+ sscanf(line+4,"%s%s",sname,tseq);
+ for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;
+ sname[j]=EOS;
+ rtrim(sname);
+ blank_to_(sname);
+ if (interactive) {
+ strcpy(title,"Found gap penalty mask in alignment file: ");
+ strcat(title,sname);
+ (*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+ }
+ else (*lin2) = 'y';
+ if ((*lin2 != 'n') && (*lin2 != 'N')) {
+ struct_penalties = GMASK;
+ struct_index = ix;
+ for (i=0;i<length;i++)
+ gap_penalty_mask[i] = '1';
+ strcpy(ss_name,sname);
+ for(i=0;len < length;i++) {
+ c = tseq[i];
+ if(c == '\n' || c == EOS) break; /* EOL */
+ if (!isspace(c)) gap_penalty_mask[len++] = c;
+ }
+ }
+ }
+ if (struct_penalties != NONE) break;
+ if(fgets(line,MAXLINE+1,fin)==NULL) return;
+ }
+
+ if (struct_penalties == NONE) return;
+
+/* skip any more comment lines */
+ while (line[0] == '!') {
+ if(fgets(line,MAXLINE+1,fin)==NULL) return;
+ }
+
+/* skip the sequence lines and any comments after the alignment */
+ for (;;) {
+ if(isspace(line[0])) break;
+ if(fgets(line,MAXLINE+1,fin)==NULL) return;
+ }
+
+
+/* read the rest of the alignment */
+
+ for (;;) {
+/* skip any blank lines */
+ for (;;) {
+ if(!blankline(line)) break;
+ if(fgets(line,MAXLINE+1,fin)==NULL) return;
+ }
+/* get structure table line */
+ for(ix=0;ix<struct_index;ix++) {
+ if (line[0] != '!') {
+ if(struct_penalties == SECST)
+ error("bad secondary structure format");
+ else
+ error("bad gap penalty mask format");
+ struct_penalties = NONE;
+ return;
+ }
+ if(fgets(line,MAXLINE+1,fin)==NULL) return;
+ }
+ if(struct_penalties == SECST) {
+ if (strncmp(line,"!SS",3) != 0) {
+ error("bad secondary structure format");
+ struct_penalties = NONE;
+ return;
+ }
+ sscanf(line+4,"%s%s",sname,tseq);
+ for(i=0;len < length;i++) {
+ c = tseq[i];
+ if(c == '\n' || c == EOS) break; /* EOL */
+ if (!isspace(c)) sec_struct_mask[len++] = c;
+ }
+ }
+ else if (struct_penalties == GMASK) {
+ if (strncmp(line,"!GM",3) != 0) {
+ error("bad gap penalty mask format");
+ struct_penalties = NONE;
+ return;
+ }
+ sscanf(line+4,"%s%s",sname,tseq);
+ for(i=0;len < length;i++) {
+ c = tseq[i];
+ if(c == '\n' || c == EOS) break; /* EOL */
+ if (!isspace(c)) gap_penalty_mask[len++] = c;
+ }
+ }
+
+/* skip any more comment lines */
+ while (line[0] == '!') {
+ if(fgets(line,MAXLINE+1,fin)==NULL) return;
+ }
+
+/* skip the sequence lines */
+ for (;;) {
+ if(isspace(line[0])) break;
+ if(fgets(line,MAXLINE+1,fin)==NULL) return;
+ }
+ }
+}
+
+static void get_embl_ss(sint length)
+{
+ static char title[MAXLINE+1];
+ static char line[MAXLINE+1];
+ static char lin2[MAXLINE+1];
+ static char sname[MAXNAMES+1];
+ char feature[MAXLINE+1];
+ sint i;
+
+/* find the start of the sequence entry */
+ for (;;) {
+ while( !linetype(line,"ID") )
+ if (fgets(line,MAXLINE+1,fin) == NULL) return;
+
+ for(i=5;i<=strlen(line);i++) /* DES */
+ if(line[i] != ' ') break;
+ strncpy(sname,line+i,MAXNAMES); /* remember entryname */
+ for(i=0;i<=strlen(sname);i++)
+ if(sname[i] == ' ') {
+ sname[i]=EOS;
+ break;
+ }
+ sname[MAXNAMES]=EOS;
+ rtrim(sname);
+ blank_to_(sname);
+
+/* look for secondary structure feature table / gap penalty mask */
+ while(fgets(line,MAXLINE+1,fin) != NULL) {
+ if (linetype(line,"FT")) {
+ sscanf(line+2,"%s",feature);
+ if (strcmp(feature,"HELIX") == 0 ||
+ strcmp(feature,"STRAND") == 0)
+ {
+
+ if (interactive) {
+ strcpy(title,"Found secondary structure in alignment file: ");
+ strcat(title,sname);
+ (*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+ }
+ else (*lin2) = 'y';
+ if ((*lin2 != 'n') && (*lin2 != 'N')) {
+ struct_penalties = SECST;
+ for (i=0;i<length;i++)
+ sec_struct_mask[i] = '.';
+ do {
+ get_swiss_feature(&line[2],length);
+ fgets(line,MAXLINE+1,fin);
+ } while( linetype(line,"FT") );
+ }
+ else {
+ do {
+ fgets(line,MAXLINE+1,fin);
+ } while( linetype(line,"FT") );
+ }
+ strcpy(ss_name,sname);
+ }
+ }
+ else if (linetype(line,"GM")) {
+ if (interactive) {
+ strcpy(title,"Found gap penalty mask in alignment file: ");
+ strcat(title,sname);
+ (*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+ }
+ else (*lin2) = 'y';
+ if ((*lin2 != 'n') && (*lin2 != 'N')) {
+ struct_penalties = GMASK;
+ for (i=0;i<length;i++)
+ gap_penalty_mask[i] = '1';
+ do {
+ get_swiss_mask(&line[2],length);
+ fgets(line,MAXLINE+1,fin);
+ } while( linetype(line,"GM") );
+ }
+ else {
+ do {
+ fgets(line,MAXLINE+1,fin);
+ } while( linetype(line,"GM") );
+ }
+ strcpy(ss_name,sname);
+ }
+ if (linetype(line,"SQ"))
+ break;
+
+ if (struct_penalties != NONE) break;
+ }
+
+ }
+
+}
+
+static void get_rsf_ss(sint length)
+{
+ static char title[MAXLINE+1];
+ static char line[MAXLINE+1];
+ static char lin2[MAXLINE+1];
+ static char sname[MAXNAMES+1];
+ sint i;
+
+/* skip the comments */
+ while (fgets(line,MAXLINE+1,fin) != NULL) {
+ if(line[strlen(line)-2]=='.' &&
+ line[strlen(line)-3]=='.')
+ break;
+ }
+
+/* find the start of the sequence entry */
+ for (;;) {
+ while (fgets(line,MAXLINE+1,fin) != NULL)
+ if( *line == '{' ) break;
+
+ while( !keyword(line,"name") )
+ if (fgets(line,MAXLINE+1,fin) == NULL) return;
+
+ for(i=5;i<=strlen(line);i++) /* DES */
+ if(line[i] != ' ') break;
+ strncpy(sname,line+i,MAXNAMES); /* remember entryname */
+ for(i=0;i<=strlen(sname);i++)
+ if(sname[i] == ' ') {
+ sname[i]=EOS;
+ break;
+ }
+ sname[MAXNAMES]=EOS;
+ rtrim(sname);
+ blank_to_(sname);
+
+/* look for secondary structure feature table / gap penalty mask */
+ while(fgets(line,MAXLINE+1,fin) != NULL) {
+ if (keyword(line,"feature")) {
+ if (interactive) {
+ strcpy(title,"Found secondary structure in alignment file: ");
+ strcat(title,sname);
+ (*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+ }
+ else (*lin2) = 'y';
+ if ((*lin2 != 'n') && (*lin2 != 'N')) {
+ struct_penalties = SECST;
+ for (i=0;i<length;i++)
+ sec_struct_mask[i] = '.';
+ do {
+ if(keyword(line,"feature"))
+ get_rsf_feature(&line[7],length);
+ fgets(line,MAXLINE+1,fin);
+ } while( !keyword(line,"sequence") );
+ }
+ else {
+ do {
+ fgets(line,MAXLINE+1,fin);
+ } while( !keyword(line,"sequence") );
+ }
+ strcpy(ss_name,sname);
+ }
+ else if (keyword(line,"sequence"))
+ break;
+
+ if (struct_penalties != NONE) break;
+ }
+
+ }
+
+}
+
+static void get_gde_ss(sint length)
+{
+ static char title[MAXLINE+1];
+ static char line[MAXLINE+1];
+ static char lin2[MAXLINE+1];
+ static char sname[MAXNAMES+1];
+ sint i, len, offset = 0;
+ unsigned char c;
+
+ for (;;) {
+ line[0] = '\0';
+/* search for the next comment line */
+ while(*line != '"')
+ if (fgets(line,MAXLINE+1,fin) == NULL) return;
+
+/* is it a secondary structure entry? */
+ if (strncmp(&line[1],"SS_",3) == 0) {
+ for (i=1;i<=MAXNAMES-3;i++) {
+ if (line[i+3] == '(' || line[i+3] == '\n')
+ break;
+ sname[i-1] = line[i+3];
+ }
+ i--;
+ sname[i]=EOS;
+ if (sname[i-1] == '(') sscanf(&line[i+3],"%d",&offset);
+ else offset = 0;
+ for(i--;i > 0;i--)
+ if(isspace(sname[i])) {
+ sname[i]=EOS;
+ }
+ else break;
+ blank_to_(sname);
+
+ if (interactive) {
+ strcpy(title,"Found secondary structure in alignment file: ");
+ strcat(title,sname);
+ (*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+ }
+ else (*lin2) = 'y';
+ if ((*lin2 != 'n') && (*lin2 != 'N')) {
+ struct_penalties = SECST;
+ for (i=0;i<length;i++)
+ sec_struct_mask[i] = '.';
+ len = 0;
+ while(fgets(line,MAXLINE+1,fin)) {
+ if(*line == '%' || *line == '#' || *line == '"') break;
+ for(i=offset;i < length;i++) {
+ c=line[i];
+ if(c == '\n' || c == EOS)
+ break; /* EOL */
+ sec_struct_mask[len++]=c;
+ }
+ if (len > length) break;
+ }
+ strcpy(ss_name,sname);
+ }
+ }
+/* or is it a gap penalty mask entry? */
+ else if (strncmp(&line[1],"GM_",3) == 0) {
+ for (i=1;i<=MAXNAMES-3;i++) {
+ if (line[i+3] == '(' || line[i+3] == '\n')
+ break;
+ sname[i-1] = line[i+3];
+ }
+ i--;
+ sname[i]=EOS;
+ if (sname[i-1] == '(') sscanf(&line[i+3],"%d",&offset);
+ else offset = 0;
+ for(i--;i > 0;i--)
+ if(isspace(sname[i])) {
+ sname[i]=EOS;
+ }
+ else break;
+ blank_to_(sname);
+
+ if (interactive) {
+ strcpy(title,"Found gap penalty mask in alignment file: ");
+ strcat(title,sname);
+ (*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+ }
+ else (*lin2) = 'y';
+ if ((*lin2 != 'n') && (*lin2 != 'N')) {
+ struct_penalties = GMASK;
+ for (i=0;i<length;i++)
+ gap_penalty_mask[i] = '1';
+ len = 0;
+ while(fgets(line,MAXLINE+1,fin)) {
+ if(*line == '%' || *line == '#' || *line == '"') break;
+ for(i=offset;i < length;i++) {
+ c=line[i];
+ if(c == '\n' || c == EOS)
+ break; /* EOL */
+ gap_penalty_mask[len++]=c;
+ }
+ if (len > length) break;
+ }
+ strcpy(ss_name,sname);
+ }
+ }
+ if (struct_penalties != NONE) break;
+ }
+
+}
+
+static void get_swiss_feature(char *line, sint len)
+{
+ char c, s, feature[MAXLINE+1];
+ int i, start_pos, end_pos;
+
+ if (sscanf(line,"%s%d%d",feature,&start_pos,&end_pos) != 3) {
+ return;
+ }
+
+ if (strcmp(feature,"HELIX") == 0) {
+ c = 'A';
+ s = '$';
+ }
+ else if (strcmp(feature,"STRAND") == 0) {
+ c = 'B';
+ s = '%';
+ }
+ else
+ return;
+
+ if(start_pos >=len || end_pos>=len) return;
+
+ sec_struct_mask[start_pos-1] = s;
+ for (i=start_pos;i<end_pos-1;i++)
+ sec_struct_mask[i] = c;
+ sec_struct_mask[end_pos-1] = s;
+
+}
+
+static void get_rsf_feature(char *line, sint len)
+{
+ char c, s;
+ char str1[MAXLINE+1],str2[MAXLINE+1],feature[MAXLINE+1];
+ int i, tmp,start_pos, end_pos;
+
+ if (sscanf(line,"%d%d%d%s%s%s",&start_pos,&end_pos,&tmp,str1,str2,feature) != 6) {
+ return;
+ }
+
+ if (strcmp(feature,"HELIX") == 0) {
+ c = 'A';
+ s = '$';
+ }
+ else if (strcmp(feature,"STRAND") == 0) {
+ c = 'B';
+ s = '%';
+ }
+ else
+ return;
+
+ if(start_pos>=len || end_pos >= len) return;
+ sec_struct_mask[start_pos-1] = s;
+ for (i=start_pos;i<end_pos-1;i++)
+ sec_struct_mask[i] = c;
+ sec_struct_mask[end_pos-1] = s;
+
+}
+
+static void get_swiss_mask(char *line, sint len)
+{
+ int i, value, start_pos, end_pos;
+
+ if (sscanf(line,"%d%d%d",&value,&start_pos,&end_pos) != 3) {
+ return;
+ }
+
+ if (value < 1 || value > 9) return;
+
+ if(start_pos>=len || end_pos >= len) return;
+ for (i=start_pos-1;i<end_pos;i++)
+ gap_penalty_mask[i] = value+'0';
+
+}
+
+static char * get_seq(char *sname,sint *len,char *tit)
+{
+ static char line[MAXLINE+1];
+ char *seq = NULL;
+ sint i, offset = 0;
+ unsigned char c=EOS;
+ Boolean got_seq=FALSE;
+
+ switch(seqFormat) {
+
+/************************************/
+ case EMBLSWISS:
+ while( !linetype(line,"ID") )
+ if (fgets(line,MAXLINE+1,fin) == NULL) return NULL;
+
+ for(i=5;i<=strlen(line);i++) /* DES */
+ if(line[i] != ' ') break;
+ strncpy(sname,line+i,MAXNAMES); /* remember entryname */
+ for(i=0;i<=strlen(sname);i++)
+ if(sname[i] == ' ') {
+ sname[i]=EOS;
+ break;
+ }
+
+ sname[MAXNAMES]=EOS;
+ rtrim(sname);
+ blank_to_(sname);
+
+
+ while( !linetype(line,"SQ") )
+ fgets(line,MAXLINE+1,fin);
+
+ *len=0;
+ while(fgets(line,MAXLINE+1,fin)) {
+ if(got_seq && blankline(line)) break;
+ if( strlen(line) > 2 && line[strlen(line)-2]=='.' && line[strlen(line)-3]=='.' )
+ continue;
+ if(seq==NULL)
+ seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+ else
+ seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+ for(i=0;i<=MAXLINE;i++) {
+ c=line[i];
+ if(c == '\n' || c == EOS || c == '/')
+ break; /* EOL */
+ c=chartab[c];
+ if(c) {
+ got_seq=TRUE;
+ seq[++(*len)]=c;
+ }
+ }
+ if(c == '/') break;
+ }
+ break;
+
+/************************************/
+ case PIR:
+ while(*line != '>')
+ fgets(line,MAXLINE+1,fin);
+ for(i=4;i<=strlen(line);i++) /* DES */
+ if(line[i] != ' ') break;
+ strncpy(sname,line+i,MAXNAMES); /* remember entryname */
+ sname[MAXNAMES]=EOS;
+ rtrim(sname);
+ blank_to_(sname);
+
+ fgets(line,MAXLINE+1,fin);
+ strncpy(tit,line,MAXTITLES);
+ tit[MAXTITLES]=EOS;
+ i=strlen(tit);
+ if(tit[i-1]=='\n') tit[i-1]=EOS;
+
+ *len=0;
+ while(fgets(line,MAXLINE+1,fin)) {
+ if(seq==NULL)
+ seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+ else
+ seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+ for(i=0;i<=MAXLINE;i++) {
+ c=line[i];
+ if(c == '\n' || c == EOS || c == '*')
+ break; /* EOL */
+
+ c=chartab[c];
+ if(c) seq[++(*len)]=c;
+ }
+ if(c == '*') break;
+ }
+ break;
+/***********************************************/
+ case PEARSON:
+ while(*line != '>')
+ fgets(line,MAXLINE+1,fin);
+
+ for(i=1;i<=strlen(line);i++) /* DES */
+ if(line[i] != ' ') break;
+ strncpy(sname,line+i,MAXNAMES); /* remember entryname */
+ for(i=1;i<=strlen(sname);i++) /* DES */
+ if(sname[i] == ' ') break;
+ sname[i]=EOS;
+ rtrim(sname);
+ blank_to_(sname);
+
+ *tit=EOS;
+
+ *len=0;
+ while(fgets(line,MAXLINE+1,fin)) {
+ if(seq==NULL)
+ seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+ else
+ seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+ for(i=0;i<=MAXLINE;i++) {
+ c=line[i];
+ if(c == '\n' || c == EOS || c == '>')
+ break; /* EOL */
+
+ c=chartab[c];
+ if(c) seq[++(*len)]=c;
+ }
+ if(c == '>') break;
+ }
+ break;
+/**********************************************/
+ case GDE:
+ if (dnaflag) {
+ while(*line != '#')
+ fgets(line,MAXLINE+1,fin);
+ }
+ else {
+ while(*line != '%')
+ fgets(line,MAXLINE+1,fin);
+ }
+
+ for (i=1;i<=MAXNAMES;i++) {
+ if (line[i] == '(' || line[i] == '\n')
+ break;
+ sname[i-1] = line[i];
+ }
+ i--;
+ sname[i]=EOS;
+ if (sname[i-1] == '(') sscanf(&line[i],"%d",&offset);
+ else offset = 0;
+ for(i--;i > 0;i--)
+ if(isspace(sname[i])) {
+ sname[i]=EOS;
+ }
+ else break;
+ blank_to_(sname);
+
+ *tit=EOS;
+
+ *len=0;
+ for (i=0;i<offset;i++) seq[++(*len)] = '-';
+ while(fgets(line,MAXLINE+1,fin)) {
+ if(*line == '%' || *line == '#' || *line == '"') break;
+ if(seq==NULL)
+ seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+ else
+ seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+ for(i=0;i<=MAXLINE;i++) {
+ c=line[i];
+ if(c == '\n' || c == EOS)
+ break; /* EOL */
+
+ c=chartab[c];
+ if(c) seq[++(*len)]=c;
+ }
+ }
+ break;
+/***********************************************/
+ case RSF:
+ while(*line != '{')
+ if (fgets(line,MAXLINE+1,fin) == NULL) return NULL;
+
+ while( !keyword(line,"name") )
+ if (fgets(line,MAXLINE+1,fin) == NULL) return NULL;
+
+ for(i=5;i<=strlen(line);i++) /* DES */
+ if(line[i] != ' ') break;
+ strncpy(sname,line+i,MAXNAMES); /* remember entryname */
+ for(i=0;i<=strlen(sname);i++)
+ if(sname[i] == ' ') {
+ sname[i]=EOS;
+ break;
+ }
+
+ sname[MAXNAMES]=EOS;
+ rtrim(sname);
+ blank_to_(sname);
+
+
+ while( !keyword(line,"sequence") )
+ if (fgets(line,MAXLINE+1,fin) == NULL) return NULL;
+
+ *len=0;
+ while(fgets(line,MAXLINE+1,fin)) {
+ if(seq==NULL)
+ seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+ else
+ seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+ for(i=0;i<=MAXLINE;i++) {
+ c=line[i];
+ if(c == EOS || c == '}')
+ break; /* EOL */
+ if( c=='.')
+ seq[++(*len)]='-';
+ c=chartab[c];
+ if(c) seq[++(*len)]=c;
+ }
+ if(c == '}') break;
+ }
+ break;
+/***********************************************/
+ }
+
+ seq[*len+1]=EOS;
+
+ return seq;
+}
+
+
+sint readseqs(sint first_seq) /*first_seq is the #no. of the first seq. to read */
+{
+ char line[FILENAMELEN+1];
+ static char *seq1,sname1[MAXNAMES+1],title[MAXTITLES+1];
+ sint i,j,k,m;
+ sint no_seqs;
+ static sint l1;
+ static Boolean dnaflag1;
+ int currpoint; /* JP */
+
+ if(usemenu)
+ getstr("Enter the name of the sequence file",line);
+ else
+ strcpy(line,seqname);
+ if(*line == EOS) return -1;
+
+ if((fin=fopen(line,"r"))==NULL) {
+ error("Could not open sequence file %s",line);
+ /* return -1; */ /* DES -1 => file not found */
+ }
+ strcpy(seqname,line);
+ no_seqs=0;
+ check_infile(&no_seqs);
+ //fprintf(stdout, "%d %d\n", no_seqs, seqFormat);
+
+ /* JP */
+ if(seqFormat==CLUSTALIST) {
+ k = 1;
+ filecount = no_seqs;
+ if(debug>1) fprintf(stdout, "filecount: %d\n", filecount);
+ clusfilelist = (char **)ckalloc((filecount+1)*sizeof(char*));
+ for(i=0;i<=filecount;i++) clusfilelist[i] = (char *)ckalloc((300+1)*sizeof(char));
+ seqnumlist = (int *) ckalloc((filecount+1)*sizeof(int));
+ consensus_index = (int *) ckalloc((filecount+1)*sizeof(int));
+ fseek(fin,0,0);
+ retrieve_file_names();
+
+ no_seqs=0;
+ for(i=1;i<=filecount;i++) {
+ fclose(fin);
+ if((fin=fopen(clusfilelist[i],"r"))==NULL) {
+ error("Could not open the sequence file [%s]", clusfilelist[i]);
+ }
+ fgets(line,FILENAMELEN,fin);
+ seqnumlist[i] = count_clustal_seqs();
+ no_seqs += seqnumlist[i];
+ }
+ fprintf(stdout, "number of input alignments: %d total number of sequences: %d\n", filecount,no_seqs);
+ for(i=1;i<=filecount;i++) {
+ fprintf(stdout, " %s\n", clusfilelist[i]);
+ }
+ nseqs_all = no_seqs;
+ alloc_aln_all(no_seqs);
+ no_seqs = filecount;
+ nseqs = no_seqs;
+ alloc_aln(filecount);
+ currpoint = 1;
+ for(i=1;i<=filecount;i++) {
+ fclose(fin);
+ if((fin=fopen(clusfilelist[i],"r"))==NULL) {
+ error("Could not open the sequence file [%s]", clusfilelist[i]);
+ }
+ for(j=1;j<=seqnumlist[i];j++) {
+ seq1 = get_clustal_seq(sname1,&l1,title,j);
+ if (l1 > max_aln_length) max_aln_length = l1;
+ seqlen_array_all[k]=l1; /* store the length */
+ strcpy(names_all[k],sname1); /* " " name */
+ //strcpy(titles[k],title); /* " " title */
+ alloc_seq_all(k,l1);
+ p_encode(seq1,seq_array_all[k],l1);
+ if(debug>1) for(m=1;m<=l1;m++) {
+ fprintf(stdout, "%d ", seq_array_all[k][m]);
+ }
+ if(debug>1) fprintf(stdout, "\n");
+ if(seq1!=NULL) seq1=ckfree(seq1);
+ if(debug>1) fprintf(stdout, "%d %s\n", k, names_all[k]);
+ k++;
+ }
+ /* get the index of the sequence (consensus sequence) that shows the largest
+ average sequence identity to other sequences in the same group (i) */
+ consensus_index[i] = get_consensus_index(currpoint, currpoint+seqnumlist[i]-1);
+ //fprintf(stdout, "consensus_index: %d %d\n", i, consensus_index[i]);
+ currpoint += seqnumlist[i];
+ seqlen_array[i] = seqlen_array_all[consensus_index[i]]; /* the length */
+ strcpy(names[i], names_all[consensus_index[i]]); /* name */
+ alloc_seq(i, seqlen_array[i]); /* sequence */
+ copy_seq(seq_array[i],seq_array_all[consensus_index[i]],seqlen_array[i]);
+
+ }
+
+
+ max_aln_length *= 2;
+ /*
+ JULIE
+ check sequence names are all different - otherwise phylip tree is
+ confused.
+ */
+ /* ignore
+ for(i=1;i<=first_seq+no_seqs-1;i++) {
+ for(j=i+1;j<=first_seq+no_seqs-1;j++) {
+ if (strncmp(names[i],names[j],MAXNAMES) == 0) {
+ error("Multiple sequences found with same name, %s (first %d chars are significant)", names[i],MAXNAMES);
+ return 0;
+ }
+ }
+ }
+ */
+ for(i=first_seq;i<=first_seq+no_seqs-1;i++)
+ {
+ if(seqlen_array[i]>max_aln_length)
+ max_aln_length=seqlen_array[i];
+ }
+
+ fclose(fin);
+
+ for(i=first_seq;i<=first_seq+no_seqs-1;i++)
+ {
+ if(strlen(names[i])>max_names)
+ max_names=strlen(names[i]);
+ }
+
+ if(max_names<10) max_names=10;
+ for(i=1;i<k;i++) {
+ if(debug>1) fprintf(stdout, "%d %s\n", i, names_all[i]);
+ for(j=i+1;j<k;j++) {
+ if(strcmp(names_all[i], names_all[j]) ==0 ) {
+ error("There are two sequences with the same name: %s\n", names_all[i]);
+ exit(1);
+ }
+ }
+ }
+ return no_seqs;
+
+ }
+
+
+ /* JP: disable info */
+ /* info("Sequence format is %s",formatNames[seqFormat]);*/
+ if(seqFormat==NEXUS)
+ error("Cannot read nexus format");
+
+/* DES DEBUG
+ fprintf(stdout,"\n\n File name = %s\n\n",seqname);
+*/
+ if(no_seqs == 0)
+ return 0; /* return the number of seqs. (zero here)*/
+
+/*
+ if((no_seqs + first_seq -1) > MAXN) {
+ error("Too many sequences. Maximum is %d",(pint)MAXN);
+ return 0;
+ }
+*/
+
+/* DES */
+/* if(seqFormat == CLUSTAL) {
+ info("no of sequences = %d",(pint)no_seqs);
+ return no_seqs;
+ }
+*/
+ max_aln_length = 0;
+
+/* if this is a multiple alignment, or profile 1 - free any memory used
+by previous alignments, then allocate memory for the new alignment */
+ if(first_seq == 1) {
+ max_names = 0;
+ free_aln(nseqs);
+ alloc_aln(no_seqs);
+ }
+/* otherwise, this is a profile 2, and we need to reallocate the arrays,
+leaving the data for profile 1 intact */
+ else realloc_aln(first_seq,no_seqs);
+
+ for(i=1;i<first_seq;i++)
+ {
+ if(seqlen_array[i]>max_aln_length)
+ max_aln_length=seqlen_array[i];
+ if(strlen(names[i])>max_names)
+ max_names=strlen(names[i]);
+ }
+
+ for(i=first_seq;i<=first_seq+no_seqs-1;i++) { /* get the seqs now*/
+ output_index[i] = i; /* default output order */
+ if(seqFormat == CLUSTAL)
+ seq1=get_clustal_seq(sname1,&l1,title,i-first_seq+1);
+ else if(seqFormat == MSF)
+ seq1=get_msf_seq(sname1,&l1,title,i-first_seq+1);
+ else
+ seq1=get_seq(sname1,&l1,title);
+
+ if(seq1==NULL) break;
+/* JULIE */
+/* Set max length of dynamically allocated arrays in prfalign.c */
+ if (l1 > max_aln_length) max_aln_length = l1;
+ seqlen_array[i]=l1; /* store the length */
+ strcpy(names[i],sname1); /* " " name */
+ strcpy(titles[i],title); /* " " title */
+
+ if(!explicit_dnaflag) {
+ dnaflag1 = check_dnaflag(seq1,l1); /* check DNA/Prot */
+ if(i == 1) dnaflag = dnaflag1;
+ } /* type decided by first seq*/
+ else
+ dnaflag1 = dnaflag;
+
+ alloc_seq(i,l1);
+
+ if(dnaflag)
+ n_encode(seq1,seq_array[i],l1); /* encode the sequence*/
+ else /* as ints */
+ p_encode(seq1,seq_array[i],l1);
+ if(seq1!=NULL) seq1=ckfree(seq1);
+ }
+
+
+ max_aln_length *= 2;
+/*
+ JULIE
+ check sequence names are all different - otherwise phylip tree is
+ confused.
+*/
+ for(i=1;i<=first_seq+no_seqs-1;i++) {
+ for(j=i+1;j<=first_seq+no_seqs-1;j++) {
+ if (strncmp(names[i],names[j],MAXNAMES) == 0) {
+ error("Multiple sequences found with same name, %s (first %d chars are significant)", names[i],MAXNAMES);
+ return 0;
+ }
+ }
+ }
+ for(i=first_seq;i<=first_seq+no_seqs-1;i++)
+ {
+ if(seqlen_array[i]>max_aln_length)
+ max_aln_length=seqlen_array[i];
+ }
+
+/* look for a feature table / gap penalty mask (only if this is a profile) */
+ if (profile_no > 0) {
+ rewind(fin);
+ struct_penalties = NONE;
+ gap_penalty_mask = (char *)ckalloc((max_aln_length+1) * sizeof (char));
+ sec_struct_mask = (char *)ckalloc((max_aln_length+1) * sizeof (char));
+ ss_name = (char *)ckalloc((MAXNAMES+1) * sizeof (char));
+
+ if (seqFormat == CLUSTAL) {
+ get_clustal_ss(max_aln_length);
+ }
+ else if (seqFormat == GDE) {
+ get_gde_ss(max_aln_length);
+ }
+ else if (seqFormat == EMBLSWISS) {
+ get_embl_ss(max_aln_length);
+ }
+ else if (seqFormat == RSF) {
+ get_rsf_ss(max_aln_length);
+ }
+ }
+
+ for(i=first_seq;i<=first_seq+no_seqs-1;i++)
+ {
+ if(strlen(names[i])>max_names)
+ max_names=strlen(names[i]);
+ }
+
+ if(max_names<10) max_names=10;
+
+ fclose(fin);
+
+ return no_seqs; /* return the number of seqs. read in this call */
+}
+
+
+static Boolean check_dnaflag(char *seq, sint slen)
+/* check if DNA or Protein
+ The decision is based on counting all A,C,G,T,U or N.
+ If >= 85% of all characters (except -) are as above => DNA */
+{
+ sint i, c, nresidues, nbases;
+ float ratio;
+ char *dna_codes="ACGTUN";
+
+ nresidues = nbases = 0;
+ for(i=1; i <= slen; i++) {
+ if(seq[i] != '-') {
+ nresidues++;
+ if(seq[i] == 'N')
+ nbases++;
+ else {
+ c = res_index(dna_codes, seq[i]);
+ if(c >= 0)
+ nbases++;
+ }
+ }
+ }
+ if( (nbases == 0) || (nresidues == 0) ) return FALSE;
+ ratio = (float)nbases/(float)nresidues;
+/* DES fprintf(stdout,"\n nbases = %d, nresidues = %d, ratio = %f\n",
+ (pint)nbases,(pint)nresidues,(pint)ratio); */
+ if(ratio >= 0.85)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+
+
+static void check_infile(sint *nseqs)
+{
+ char line[MAXLINE+1];
+ sint i;
+
+ *nseqs=0;
+ while (fgets(line,MAXLINE+1,fin) != NULL) {
+ if(!blankline(line))
+ break;
+ }
+
+ for(i=strlen(line)-1;i>=0;i--)
+ if(isgraph(line[i])) break;
+ line[i+1]=EOS;
+
+ for(i=0;i<=6;i++) line[i] = toupper(line[i]);
+
+ if( linetype(line,"ID") ) { /* EMBL/Swiss-Prot format ? */
+ seqFormat=EMBLSWISS;
+ (*nseqs)++;
+ }
+ else if( linetype(line,"CLUSTAL") ) {
+ seqFormat=CLUSTAL;
+ }
+ else if( linetype(line,"PILEUP") ) {
+ seqFormat = MSF;
+ }
+ else if( linetype(line,"!!AA_MULTIPLE_ALIGNMENT") ) {
+ seqFormat = MSF;
+ dnaflag = FALSE;
+ }
+ else if( linetype(line,"!!NA_MULTIPLE_ALIGNMENT") ) {
+ seqFormat = MSF;
+ dnaflag = TRUE;
+ }
+ else if( strstr(line,"MSF") && line[strlen(line)-1]=='.' &&
+ line[strlen(line)-2]=='.' ) {
+ seqFormat = MSF;
+ }
+ else if( linetype(line,"!!RICH_SEQUENCE") ) {
+ seqFormat = RSF;
+ }
+ else if( linetype(line,"#NEXUS") ) {
+ seqFormat=NEXUS;
+ return;
+ }
+ else if(*line == '>') { /* no */
+ seqFormat=(line[3] == ';')?PIR:PEARSON; /* distinguish PIR and Pearson */
+ (*nseqs)++;
+ }
+ else if( *line == '@' ) {
+ seqFormat = CLUSTALIST;
+ fprintf(stdout, "CLUSTALIST format input\n");
+ dnaflag = FALSE;
+ }
+ else if((*line == '"') || (*line == '%') || (*line == '#')) {
+ seqFormat=GDE; /* GDE format */
+ if (*line == '%') {
+ (*nseqs)++;
+ dnaflag = FALSE;
+ }
+ else if (*line == '#') {
+ (*nseqs)++;
+ dnaflag = TRUE;
+ }
+ }
+ /* JP: CLUSTALIST */
+ else if( linetype(line, "@ alignment list") ) {
+ seqFormat = CLUSTALIST;
+ fprintf(stdout, "%d\n", seqFormat);
+ dnaflag = FALSE;
+ }
+ else {
+ seqFormat=UNKNOWN;
+ return;
+ }
+
+ while(fgets(line,MAXLINE+1,fin) != NULL) {
+ switch(seqFormat) {
+ case EMBLSWISS:
+ if( linetype(line,"ID") )
+ (*nseqs)++;
+ break;
+ case PIR:
+ *nseqs = count_pir_seqs();
+ fseek(fin,0,0);
+ return;
+ case PEARSON:
+ if( *line == '>' )
+ (*nseqs)++;
+ break;
+ case GDE:
+ if(( *line == '%' ) && ( dnaflag == FALSE))
+ (*nseqs)++;
+ else if (( *line == '#') && ( dnaflag == TRUE))
+ (*nseqs)++;
+ break;
+ case CLUSTAL:
+ *nseqs = count_clustal_seqs();
+/* DES */ /* fprintf(stdout,"\nnseqs = %d\n",(pint)*nseqs); */
+ fseek(fin,0,0);
+ return;
+ case MSF:
+ *nseqs = count_msf_seqs();
+ fseek(fin,0,0);
+ return;
+ case RSF:
+ fseek(fin,0,0);
+ *nseqs = count_rsf_seqs();
+ fseek(fin,0,0);
+ return;
+ case USER:
+ /* JP: count the file number */
+ case CLUSTALIST:
+ fseek(fin, 0,0);
+ *nseqs = count_clustalist_seqs();
+ fseek(fin, 0,0);
+ return;
+ default:
+ break;
+ }
+ }
+ fseek(fin,0,0);
+}
+
+
+static sint count_pir_seqs(void)
+/* count the number of sequences in a pir alignment file */
+{
+ char line[MAXLINE+1],c;
+ sint nseqs, i;
+ Boolean seq_ok;
+
+ seq_ok = FALSE;
+ while (fgets(line,MAXLINE+1,fin) != NULL) { /* Look for end of first seq */
+ if(*line == '>') break;
+ for(i=0;seq_ok == FALSE;i++) {
+ c=line[i];
+ if(c == '*') {
+ seq_ok = TRUE; /* ok - end of sequence found */
+ break;
+ } /* EOL */
+ if(c == '\n' || c == EOS)
+ break; /* EOL */
+ }
+ if (seq_ok == TRUE)
+ break;
+ }
+ if (seq_ok == FALSE) {
+ error("PIR format sequence end marker '*'\nmissing for one or more sequences.");
+ return (sint)0; /* funny format*/
+ }
+
+
+ nseqs = 1;
+
+ while (fgets(line,MAXLINE+1,fin) != NULL) {
+ if(*line == '>') { /* Look for start of next seq */
+ seq_ok = FALSE;
+ while (fgets(line,MAXLINE+1,fin) != NULL) { /* Look for end of seq */
+ if(*line == '>') {
+ error("PIR format sequence end marker '*' missing for one or more sequences.");
+ return (sint)0; /* funny format*/
+ }
+ for(i=0;seq_ok == FALSE;i++) {
+ c=line[i];
+ if(c == '*') {
+ seq_ok = TRUE; /* ok - sequence found */
+ break;
+ } /* EOL */
+ if(c == '\n' || c == EOS)
+ break; /* EOL */
+ }
+ if (seq_ok == TRUE) {
+ nseqs++;
+ break;
+ }
+ }
+ }
+ }
+ return (sint)nseqs;
+}
+
+
+static sint count_clustal_seqs(void)
+/* count the number of sequences in a clustal alignment file */
+{
+ char line[MAXLINE+1];
+ sint nseqs;
+
+ while (fgets(line,MAXLINE+1,fin) != NULL) {
+ if(!cl_blankline(line)) break; /* Look for next non- */
+ } /* blank line */
+ nseqs = 1;
+
+ while (fgets(line,MAXLINE+1,fin) != NULL) {
+ if(cl_blankline(line)) return nseqs;
+ nseqs++;
+ }
+
+ return (sint)0; /* if you got to here-funny format/no seqs.*/
+}
+
+/* JP */
+static sint count_clustalist_seqs(void)
+/* count the number of files in a clustalist file */
+{
+ char *s;
+ char line[MAXLINE+1];
+ sint nseqs=-1; /* ignore the first line, which is a header "@ alignment list" */
+ while(fgets(line,MAXLINE+1,fin) != NULL) {
+ for(s=line;*s!='\n';s++) {
+ if(!isspace(*s)) {
+ nseqs++;
+ break;
+ }
+ }
+ }
+ /* fprintf(stdout, "nseqs: %d\n", nseqs); */
+
+ return nseqs;
+}
+/* JP */
+static void retrieve_file_names() {
+ sint i = 0,j;
+ char line[300];
+ char *s;
+ while(fgets(line, 300, fin)!=NULL) {
+ for(s=line;*s!='\n';s++) {
+ if(!isspace(*s)) {
+ strcpy(clusfilelist[i], s);
+ if(debug>1) fprintf(stdout, "%s", clusfilelist[i]);
+ for(j=0;j<=strlen(clusfilelist[i]);j++) {
+ if(isspace(clusfilelist[i][j])) clusfilelist[i][j] = '\0';
+ }
+ i++;
+ break;
+ }
+ }
+ }
+}
+
+/* JP */
+/* get the index of the sequence (consensus sequence) that shows the largest
+ average sequence identity to other sequences in the same group (i) */
+int get_consensus_index(int pb, int pe) {
+
+ int i,j,k,m,n;
+ double tmparray[pe-pb+2];
+ double tmparray1[pe-pb+2][pe-pb+2];
+ double tmp=0;
+
+ for(i=1;i<=pe-pb+1;i++) tmparray[i] = 0;
+ for(i=1;i<=pe-pb+1;i++) for(j=1;j<=pe-pb+1;j++) {
+ tmparray1[i][j] = 0;
+ }
+
+ /*for(i=1;i<=pe-pb+1;i++) {
+ for(j=i+1;j<=pe-pb+1;j++) {
+ for(k=1;k<=seqlen_array[pb+i-1];k++) {
+ if( (seq_array[pb+i-1][k]!=gap_pos2) &&(seq_array[pb+j-1][k]==seq_array[pb+i-1][k]) &&
+ (seq_array[pb+j-1][k]!=gap_pos2) ) {
+ tmparray[i]+=1;
+ tmparray[j]+=1;
+ }
+ }
+ }
+ }*/
+ for(i=1;i<=pe-pb+1;i++) {
+ for(j=1;j<=pe-pb+1;j++) {
+ if(i==j) continue;
+ tmp = 0;
+ for(k=1;k<=seqlen_array_all[pb+i-1];k++) {
+ if( (seq_array_all[pb+i-1][k]!=gap_pos2) &&(seq_array_all[pb+j-1][k]==seq_array_all[pb+i-1][k]) &&
+ (seq_array_all[pb+j-1][k]!=gap_pos2) ) {
+ tmparray[i]+=1;
+ tmparray1[i][j] += 1;
+ }
+ if( (seq_array_all[pb+i-1][k]!=gap_pos2) && (seq_array_all[pb+j-1][k]!=gap_pos2) ) {
+ tmp +=1;
+ }
+ }
+ tmparray1[i][j] /= tmp;
+ }
+ }
+ tmp = 0;
+ m = 1;
+ for(i=1;i<=pe-pb+1;i++) {
+ if(debug>1) fprintf(stdout, "%d %f\n", i, tmparray[i]/(pe-pb)/seqlen_array_all[pb+i-1]);
+ if(tmparray[i]>tmp) { m = i; tmp = tmparray[i]; }
+ }
+
+ /*
+ for(i=1;i<=pe-pb+1;i++) {
+ for(j=1;j<=pe-pb+1;j++)
+ fprintf(stdout, "%6.5f ", tmparray1[i][j]);
+ fprintf(stdout, "\n");
+ }*/
+
+ return m+pb-1;
+}
+
+static sint count_msf_seqs(void)
+{
+/* count the number of sequences in a PILEUP alignment file */
+
+ char line[MAXLINE+1];
+ sint nseqs;
+
+ while (fgets(line,MAXLINE+1,fin) != NULL) {
+ if(linetype(line,"//")) break;
+ }
+
+ while (fgets(line,MAXLINE+1,fin) != NULL) {
+ if(!blankline(line)) break; /* Look for next non- */
+ } /* blank line */
+ nseqs = 1;
+
+ while (fgets(line,MAXLINE+1,fin) != NULL) {
+ if(blankline(line)) return nseqs;
+ nseqs++;
+ }
+
+ return (sint)0; /* if you got to here-funny format/no seqs.*/
+}
+
+static sint count_rsf_seqs(void)
+{
+/* count the number of sequences in a GCG RSF alignment file */
+
+ char line[MAXLINE+1];
+ sint nseqs;
+
+ nseqs = 0;
+/* skip the comments */
+ while (fgets(line,MAXLINE+1,fin) != NULL) {
+ if(line[strlen(line)-2]=='.' &&
+ line[strlen(line)-3]=='.')
+ break;
+ }
+
+ while (fgets(line,MAXLINE+1,fin) != NULL) {
+ if( *line == '{' )
+ nseqs++;
+ }
+ return (sint)nseqs;
+}
+
+static void p_encode(char *seq, char *naseq, sint l)
+{ /* code seq as ints .. use gap_pos2 for gap */
+ register sint i;
+/* static char *aacids="CSTPAGNDEQHRKMILVFYW";*/
+
+ for(i=1;i<=l;i++)
+ if(seq[i] == '-')
+ naseq[i] = gap_pos2;
+ else
+ naseq[i] = res_index(amino_acid_codes,seq[i]);
+ naseq[i] = -3;
+}
+
+static void n_encode(char *seq,char *naseq,sint l)
+{ /* code seq as ints .. use gap_pos2 for gap */
+ register sint i;
+/* static char *nucs="ACGTU"; */
+
+ for(i=1;i<=l;i++) {
+ if(seq[i] == '-') /* if a gap character -> code = gap_pos2 */
+ naseq[i] = gap_pos2; /* this is the code for a gap in */
+ else { /* the input files */
+ naseq[i]=res_index(amino_acid_codes,seq[i]);
+ }
+ }
+ naseq[i] = -3;
+}
+
+sint res_index(char *t,char c)
+{
+ register sint i;
+
+ for(i=0;t[i] && t[i] != c;i++)
+ ;
+ if(t[i]) return(i);
+ else return -1;
+}
+
+void alloc_aln_all(sint nseqs)
+{
+ sint i,j;
+
+ seqlen_array_all = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+
+ seq_array_all = (char **)ckalloc( (nseqs + 1) * sizeof (char *) );
+ for(i=0;i<nseqs+1;i++)
+ seq_array_all[i]=NULL;
+
+ names_all = (char **)ckalloc( (nseqs+1) * sizeof (char *) );
+ for(i=1;i<=nseqs;i++)
+ names_all[i] = (char *)ckalloc((MAXNAMES+1) * sizeof (char));
+
+}
+
+void alloc_seq_all(sint seq_no,sint length)
+{
+ seq_array_all[seq_no] = (char *)ckalloc((length+2) * sizeof (char));
+}
+
+static void copy_seq(char *seq, char *naseq, sint l)
+{ /* code seq as ints .. use gap_pos2 for gap */
+ register sint i;
+/* static char *aacids="CSTPAGNDEQHRKMILVFYW";*/
+
+ for(i=1;i<=l;i++)
+ seq[i] = naseq[i];
+ naseq[i] = -3;
+}
diff --git a/showpair.c b/showpair.c
new file mode 100644
index 0000000..a3412fc
--- /dev/null
+++ b/showpair.c
@@ -0,0 +1,489 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include "pcma.h"
+
+static void make_p_ptrs(sint *tptr, sint *pl, sint naseq, sint l);
+static void make_n_ptrs(sint *tptr, sint *pl, sint naseq, sint len);
+static void put_frag(sint fs, sint v1, sint v2, sint flen);
+static sint frag_rel_pos(sint a1, sint b1, sint a2, sint b2);
+static void des_quick_sort(sint *array1, sint *array2, sint array_size);
+static void pair_align(sint seq_no, sint l1, sint l2);
+
+
+/*
+* Prototypes
+*/
+
+/*
+* Global variables
+*/
+extern sint *seqlen_array;
+extern char **seq_array;
+extern sint dna_ktup, dna_window, dna_wind_gap, dna_signif; /* params for DNA */
+extern sint prot_ktup,prot_window,prot_wind_gap,prot_signif; /* params for prots */
+extern sint nseqs;
+extern Boolean dnaflag;
+extern double **tmat;
+extern sint max_aa;
+extern sint max_aln_length;
+
+static sint next;
+static sint curr_frag,maxsf,vatend;
+static sint **accum;
+static sint *diag_index;
+static char *slopes;
+
+sint ktup,window,wind_gap,signif; /* Pairwise aln. params */
+sint *displ;
+sint *zza, *zzb, *zzc, *zzd;
+
+extern Boolean percent;
+
+
+static void make_p_ptrs(sint *tptr,sint *pl,sint naseq,sint l)
+{
+ static sint a[10];
+ sint i,j,limit,code,flag;
+ char residue;
+
+ for (i=1;i<=ktup;i++)
+ a[i] = (sint) pow((double)(max_aa+1),(double)(i-1));
+
+ limit = (sint) pow((double)(max_aa+1),(double)ktup);
+ for(i=1;i<=limit;++i)
+ pl[i]=0;
+ for(i=1;i<=l;++i)
+ tptr[i]=0;
+
+ for(i=1;i<=(l-ktup+1);++i) {
+ code=0;
+ flag=FALSE;
+ for(j=1;j<=ktup;++j) {
+ residue = seq_array[naseq][i+j-1];
+ if((residue<0) || (residue > max_aa)){
+ flag=TRUE;
+ break;
+ }
+ code += ((residue) * a[j]);
+ }
+ if(flag)
+ continue;
+ ++code;
+ if(pl[code]!=0)
+ tptr[i]=pl[code];
+ pl[code]=i;
+ }
+}
+
+
+static void make_n_ptrs(sint *tptr,sint *pl,sint naseq,sint len)
+{
+ static sint pot[]={ 0, 1, 4, 16, 64, 256, 1024, 4096 };
+ sint i,j,limit,code,flag;
+ char residue;
+
+ limit = (sint) pow((double)4,(double)ktup);
+
+ for(i=1;i<=limit;++i)
+ pl[i]=0;
+ for(i=1;i<=len;++i)
+ tptr[i]=0;
+
+ for(i=1;i<=len-ktup+1;++i) {
+ code=0;
+ flag=FALSE;
+ for(j=1;j<=ktup;++j) {
+ residue = seq_array[naseq][i+j-1];
+ if((residue<0) || (residue>4)){
+ flag=TRUE;
+ break;
+ }
+ code += ((residue) * pot[j]); /* DES */
+ }
+ if(flag)
+ continue;
+ ++code;
+ if(pl[code]!=0)
+ tptr[i]=pl[code];
+ pl[code]=i;
+ }
+}
+
+
+static void put_frag(sint fs,sint v1,sint v2,sint flen)
+{
+ sint end;
+ accum[0][curr_frag]=fs;
+ accum[1][curr_frag]=v1;
+ accum[2][curr_frag]=v2;
+ accum[3][curr_frag]=flen;
+
+ if(!maxsf) {
+ maxsf=1;
+ accum[4][curr_frag]=0;
+ return;
+ }
+
+ if(fs >= accum[0][maxsf]) {
+ accum[4][curr_frag]=maxsf;
+ maxsf=curr_frag;
+ return;
+ }
+ else {
+ next=maxsf;
+ while(TRUE) {
+ end=next;
+ next=accum[4][next];
+ if(fs>=accum[0][next])
+ break;
+ }
+ accum[4][curr_frag]=next;
+ accum[4][end]=curr_frag;
+ }
+}
+
+
+static sint frag_rel_pos(sint a1,sint b1,sint a2,sint b2)
+{
+ sint ret;
+
+ ret=FALSE;
+ if(a1-b1==a2-b2) {
+ if(a2<a1)
+ ret=TRUE;
+ }
+ else {
+ if(a2+ktup-1<a1 && b2+ktup-1<b1)
+ ret=TRUE;
+ }
+ return ret;
+}
+
+
+static void des_quick_sort(sint *array1, sint *array2, sint array_size)
+/* */
+/* Quicksort routine, adapted from chapter 4, page 115 of software tools */
+/* by Kernighan and Plauger, (1986) */
+/* Sort the elements of array1 and sort the */
+/* elements of array2 accordingly */
+/* */
+{
+ sint temp1, temp2;
+ sint p, pivlin;
+ sint i, j;
+ sint lst[50], ust[50]; /* the maximum no. of elements must be*/
+ /* < log(base2) of 50 */
+
+ lst[1] = 1;
+ ust[1] = array_size-1;
+ p = 1;
+
+ while(p > 0) {
+ if(lst[p] >= ust[p])
+ p--;
+ else {
+ i = lst[p] - 1;
+ j = ust[p];
+ pivlin = array1[j];
+ while(i < j) {
+ for(i=i+1; array1[i] < pivlin; i++)
+ ;
+ for(j=j-1; j > i; j--)
+ if(array1[j] <= pivlin) break;
+ if(i < j) {
+ temp1 = array1[i];
+ array1[i] = array1[j];
+ array1[j] = temp1;
+
+ temp2 = array2[i];
+ array2[i] = array2[j];
+ array2[j] = temp2;
+ }
+ }
+
+ j = ust[p];
+
+ temp1 = array1[i];
+ array1[i] = array1[j];
+ array1[j] = temp1;
+
+ temp2 = array2[i];
+ array2[i] = array2[j];
+ array2[j] = temp2;
+
+ if(i-lst[p] < ust[p] - i) {
+ lst[p+1] = lst[p];
+ ust[p+1] = i - 1;
+ lst[p] = i + 1;
+ }
+ else {
+ lst[p+1] = i + 1;
+ ust[p+1] = ust[p];
+ ust[p] = i - 1;
+ }
+ p = p + 1;
+ }
+ }
+ return;
+
+}
+
+
+
+
+
+static void pair_align(sint seq_no,sint l1,sint l2)
+{
+ sint pot[8],i,j,l,m,flag,limit,pos,tl1,vn1,vn2,flen,osptr,fs;
+ sint tv1,tv2,encrypt,subt1,subt2,rmndr;
+ char residue;
+
+ if(dnaflag) {
+ for(i=1;i<=ktup;++i)
+ pot[i] = (sint) pow((double)4,(double)(i-1));
+ limit = (sint) pow((double)4,(double)ktup);
+ }
+ else {
+ for (i=1;i<=ktup;i++)
+ pot[i] = (sint) pow((double)(max_aa+1),(double)(i-1));
+ limit = (sint) pow((double)(max_aa+1),(double)ktup);
+ }
+
+ tl1 = (l1+l2)-1;
+
+ for(i=1;i<=tl1;++i) {
+ slopes[i]=displ[i]=0;
+ diag_index[i] = i;
+ }
+
+
+/* increment diagonal score for each k_tuple match */
+
+ for(i=1;i<=limit;++i) {
+ vn1=zzc[i];
+ while(TRUE) {
+ if(!vn1) break;
+ vn2=zzd[i];
+ while(vn2 != 0) {
+ osptr=vn1-vn2+l2;
+ ++displ[osptr];
+ vn2=zzb[vn2];
+ }
+ vn1=zza[vn1];
+ }
+ }
+
+/* choose the top SIGNIF diagonals */
+
+ des_quick_sort(displ, diag_index, tl1);
+
+ j = tl1 - signif + 1;
+ if(j < 1) j = 1;
+
+/* flag all diagonals within WINDOW of a top diagonal */
+
+ for(i=tl1; i>=j; i--)
+ if(displ[i] > 0) {
+ pos = diag_index[i];
+ l = (1 >pos-window) ? 1 : pos-window;
+ m = (tl1<pos+window) ? tl1 : pos+window;
+ for(; l <= m; l++)
+ slopes[l] = 1;
+ }
+
+ for(i=1; i<=tl1; i++) displ[i] = 0;
+
+
+ curr_frag=maxsf=0;
+
+ for(i=1;i<=(l1-ktup+1);++i) {
+ encrypt=flag=0;
+ for(j=1;j<=ktup;++j) {
+ residue = seq_array[seq_no][i+j-1];
+ if((residue<0) || (residue>max_aa)) {
+ flag=TRUE;
+ break;
+ }
+ encrypt += ((residue)*pot[j]);
+ }
+ if(flag) continue;
+ ++encrypt;
+
+ vn2=zzd[encrypt];
+
+ flag=FALSE;
+ while(TRUE) {
+ if(!vn2) {
+ flag=TRUE;
+ break;
+ }
+ osptr=i-vn2+l2;
+ if(slopes[osptr]!=1) {
+ vn2=zzb[vn2];
+ continue;
+ }
+ flen=0;
+ fs=ktup;
+ next=maxsf;
+
+ /*
+ * A-loop
+ */
+
+ while(TRUE) {
+ if(!next) {
+ ++curr_frag;
+ if(curr_frag>=2*max_aln_length) {
+ info("(Partial alignment)");
+ vatend=1;
+ return;
+ }
+ displ[osptr]=curr_frag;
+ put_frag(fs,i,vn2,flen);
+ }
+ else {
+ tv1=accum[1][next];
+ tv2=accum[2][next];
+ if(frag_rel_pos(i,vn2,tv1,tv2)) {
+ if(i-vn2==accum[1][next]-accum[2][next]) {
+ if(i>accum[1][next]+(ktup-1))
+ fs=accum[0][next]+ktup;
+ else {
+ rmndr=i-accum[1][next];
+ fs=accum[0][next]+rmndr;
+ }
+ flen=next;
+ next=0;
+ continue;
+ }
+ else {
+ if(displ[osptr]==0)
+ subt1=ktup;
+ else {
+ if(i>accum[1][displ[osptr]]+(ktup-1))
+ subt1=accum[0][displ[osptr]]+ktup;
+ else {
+ rmndr=i-accum[1][displ[osptr]];
+ subt1=accum[0][displ[osptr]]+rmndr;
+ }
+ }
+ subt2=accum[0][next]-wind_gap+ktup;
+ if(subt2>subt1) {
+ flen=next;
+ fs=subt2;
+ }
+ else {
+ flen=displ[osptr];
+ fs=subt1;
+ }
+ next=0;
+ continue;
+ }
+ }
+ else {
+ next=accum[4][next];
+ continue;
+ }
+ }
+ break;
+ }
+ /*
+ * End of Aloop
+ */
+
+ vn2=zzb[vn2];
+ }
+ }
+ vatend=0;
+}
+
+void show_pair(sint istart, sint iend, sint jstart, sint jend)
+{
+ sint i,j,dsr;
+ double calc_score;
+
+ accum = (sint **)ckalloc( 5*sizeof (sint *) );
+ for (i=0;i<5;i++)
+ accum[i] = (sint *) ckalloc((2*max_aln_length+1) * sizeof (sint) );
+
+ displ = (sint *) ckalloc( (2*max_aln_length +1) * sizeof (sint) );
+ slopes = (char *)ckalloc( (2*max_aln_length +1) * sizeof (char));
+ diag_index = (sint *) ckalloc( (2*max_aln_length +1) * sizeof (sint) );
+
+ zza = (sint *)ckalloc( (max_aln_length+1) * sizeof (sint) );
+ zzb = (sint *)ckalloc( (max_aln_length+1) * sizeof (sint) );
+
+ zzc = (sint *)ckalloc( (max_aln_length+1) * sizeof (sint) );
+ zzd = (sint *)ckalloc( (max_aln_length+1) * sizeof (sint) );
+
+ if(dnaflag) {
+ ktup = dna_ktup;
+ window = dna_window;
+ signif = dna_signif;
+ wind_gap = dna_wind_gap;
+ }
+ else {
+ ktup = prot_ktup;
+ window = prot_window;
+ signif = prot_signif;
+ wind_gap = prot_wind_gap;
+ }
+
+ fprintf(stdout,"\n\n");
+
+ for(i=istart+1;i<=iend;++i) {
+ if(dnaflag)
+ make_n_ptrs(zza,zzc,i,seqlen_array[i]);
+ else
+ make_p_ptrs(zza,zzc,i,seqlen_array[i]);
+ for(j=jstart+2;j<=jend;++j) {
+ if(dnaflag)
+ make_n_ptrs(zzb,zzd,j,seqlen_array[j]);
+ else
+ make_p_ptrs(zzb,zzd,j,seqlen_array[j]);
+ pair_align(i,seqlen_array[i],seqlen_array[j]);
+ if(!maxsf)
+ calc_score=0.0;
+ else {
+ calc_score=(double)accum[0][maxsf];
+ if(percent) {
+ dsr=(seqlen_array[i]<seqlen_array[j]) ?
+ seqlen_array[i] : seqlen_array[j];
+ calc_score = (calc_score/(double)dsr) * 100.0;
+ }
+ }
+/*
+ tmat[i][j]=calc_score;
+ tmat[j][i]=calc_score;
+*/
+
+ tmat[i][j] = (100.0 - calc_score)/100.0;
+ tmat[j][i] = (100.0 - calc_score)/100.0;
+ /* JP: disable info */
+ /*
+ if(calc_score>0.1)
+ info("Sequences (%d:%d) Aligned. Score: %lg",
+ (pint)i,(pint)j,calc_score);
+ else
+ info("Sequences (%d:%d) Not Aligned",
+ (pint)i,(pint)j);
+ */
+ }
+ }
+
+ for (i=0;i<5;i++)
+ accum[i]=ckfree((void *)accum[i]);
+ accum=ckfree((void *)accum);
+
+ displ=ckfree((void *)displ);
+ slopes=ckfree((void *)slopes);
+ diag_index=ckfree((void *)diag_index);
+
+ zza=ckfree((void *)zza);
+ zzb=ckfree((void *)zzb);
+ zzc=ckfree((void *)zzc);
+ zzd=ckfree((void *)zzd);
+}
+
diff --git a/subtrees.c b/subtrees.c
new file mode 100644
index 0000000..816f2dd
--- /dev/null
+++ b/subtrees.c
@@ -0,0 +1,49 @@
+char **clusfilelist;
+int *seqnumlist;
+int filecount;
+int *consensus_index;
+
+void generate_subtree_roots() {
+
+ int i, j, k;
+ int startn, endn; // starting and ending sequence number of the seq_array that specifying the subtree
+
+ startn = 1;
+ for(i=1;i<=filecount;i++) {
+ endn = startn+seqnumlist[i]-1;
+ calculate_tmat(startn, endn);
+ guide_tree(tree, seqnumlist[i]);
+ startn += seqnumlist[i];
+ }
+}
+
+void calculate_tmat(int startnum, int endnum)
+{
+ int i,j,k,m,n;
+ int identitycount, nongapcount;
+
+ tmat = (double **) ckalloc( (startnum-endnum+2) * sizeof(double *) );
+ for(i=1;i<=startnum-endnum+1;i++)
+ tmat[i] = (double *) ckalloc( (startnum-endnum+1) * sizeof(double) );
+ for(i=startnum;i<=endnum;i++) {
+ for(j=i+1;j<=endnum;j++) {
+
+ m = i-startnum+1;
+ n = j-startnum+1;
+
+ identitycount = nongapcount = 0;
+ for(k=1;k<=seqlen_array[i];k++) {
+ if( (seq_array[i][k]==seq_array[j][k]) && (seq_array[i][k]!=gap_pos2) && (seq_array[j][k]!=gap_pos2) )
+ {
+ identitycount++;
+ }
+ if( (seq_array[i][k]!=gap_pos2) && (seq_array[j][k]!=gap_pos2) )
+ {
+ nongapcount++;
+ }
+ }
+ if(nongapcount>0) tmat[m][n] = tmat[n][m] = 1.0 - 1.0*identitycount/nongapcount;
+ else {tmat[m][n] = 1; }
+ }
+ }
+}
diff --git a/trees.c b/trees.c
new file mode 100644
index 0000000..dd8af30
--- /dev/null
+++ b/trees.c
@@ -0,0 +1,1618 @@
+/* Phyle of filogenetic tree calculating functions for CLUSTAL W */
+/* DES was here FEB. 1994 */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include "pcma.h"
+#include "dayhoff.h" /* set correction for amino acid distances >= 75% */
+
+
+/*
+ * Prototypes
+ */
+Boolean transition(sint base1, sint base2);
+void tree_gap_delete(void);
+void distance_matrix_output(FILE *ofile);
+void nj_tree(char **tree_description, FILE *tree);
+void compare_tree(char **tree1, char **tree2, sint *hits, sint n);
+void print_phylip_tree(char **tree_description, FILE *tree, sint bootstrap);
+void print_nexus_tree(char **tree_description, FILE *tree, sint bootstrap);
+sint two_way_split(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap);
+sint two_way_split_nexus(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap);
+void print_tree(char **tree_description, FILE *tree, sint *totals);
+static Boolean is_ambiguity(char c);
+static void overspill_message(sint overspill,sint total_dists);
+
+
+/*
+ * Global variables
+ */
+
+extern sint max_names;
+
+extern double **tmat; /* general nxn array of reals; allocated from main */
+ /* this is used as a distance matrix */
+extern Boolean dnaflag; /* TRUE for DNA seqs; FALSE for proteins */
+extern Boolean tossgaps; /* Ignore places in align. where ANY seq. has a gap*/
+extern Boolean kimura; /* Use correction for multiple substitutions */
+extern Boolean output_tree_clustal; /* clustal text output for trees */
+extern Boolean output_tree_phylip; /* phylip nested parentheses format */
+extern Boolean output_tree_distances; /* phylip distance matrix */
+extern Boolean output_tree_nexus; /* nexus format tree */
+extern sint bootstrap_format; /* bootstrap file format */
+extern Boolean empty; /* any sequences in memory? */
+extern Boolean usemenu; /* interactive (TRUE) or command line (FALSE) */
+extern sint nseqs;
+extern sint max_aln_length;
+extern sint *seqlen_array; /* the lengths of the sequences */
+extern char **seq_array; /* the sequences */
+extern char **names; /* the seq. names */
+extern char seqname[]; /* name of input file */
+extern sint gap_pos1,gap_pos2;
+extern Boolean use_ambiguities;
+extern char *amino_acid_codes;
+
+extern FILE *tree;
+
+static double *av;
+static double *left_branch, *right_branch;
+static double *save_left_branch, *save_right_branch;
+static sint *boot_totals;
+static sint *tkill;
+/*
+ The next line is a fossil from the days of using the cc ran()
+static int ran_factor;
+*/
+static sint *boot_positions;
+static FILE *phylip_phy_tree_file;
+static FILE *clustal_phy_tree_file;
+static FILE *distances_phy_tree_file;
+static FILE *nexus_phy_tree_file;
+static Boolean verbose;
+static char *tree_gaps;
+static sint first_seq, last_seq;
+ /* array of weights; 1 for use this posn.; 0 don't */
+
+extern sint boot_ntrials; /* number of bootstrap trials */
+extern unsigned sint boot_ran_seed; /* random number generator seed */
+
+/* JP */
+extern char **clusfilelist;
+extern int *seqnumlist;
+extern int filecount;
+extern int *consensus_index;
+
+
+void phylogenetic_tree(char *phylip_name,char *clustal_name,char *dist_name, char *nexus_name)
+/*
+ Calculate a tree using the distances in the nseqs*nseqs array tmat.
+ This is the routine for getting the REAL trees after alignment.
+*/
+{ char path[FILENAMELEN+1];
+ sint i, j;
+ sint overspill = 0;
+ sint total_dists;
+ static char **standard_tree;
+ static char **save_tree;
+ char lin2[10];
+
+ if(empty) {
+ error("You must load an alignment first");
+ return;
+ }
+
+ if(nseqs<2) {
+ error("Alignment has only %d sequences",nseqs);
+ return;
+ }
+ first_seq=1;
+ last_seq=nseqs;
+
+ get_path(seqname,path);
+
+if(output_tree_clustal) {
+ if (clustal_name[0]!=EOS) {
+ if((clustal_phy_tree_file = open_explicit_file(
+ clustal_name))==NULL) return;
+ }
+ else {
+ if((clustal_phy_tree_file = open_output_file(
+ "\nEnter name for CLUSTAL tree output file ",path,
+ clustal_name,"nj")) == NULL) return;
+ }
+}
+
+if(output_tree_phylip) {
+ if (phylip_name[0]!=EOS) {
+ if((phylip_phy_tree_file = open_explicit_file(
+ phylip_name))==NULL) return;
+ }
+ else {
+ if((phylip_phy_tree_file = open_output_file(
+ "\nEnter name for PHYLIP tree output file ",path,
+ phylip_name,"ph")) == NULL) return;
+ }
+}
+
+if(output_tree_distances)
+{
+ if (dist_name[0]!=EOS) {
+ if((distances_phy_tree_file = open_explicit_file(
+ dist_name))==NULL) return;
+ }
+ else {
+ if((distances_phy_tree_file = open_output_file(
+ "\nEnter name for distance matrix output file ",path,
+ dist_name,"dst")) == NULL) return;
+ }
+}
+
+if(output_tree_nexus)
+{
+ if (nexus_name[0]!=EOS) {
+ if((nexus_phy_tree_file = open_explicit_file(
+ nexus_name))==NULL) return;
+ }
+ else {
+ if((nexus_phy_tree_file = open_output_file(
+ "\nEnter name for NEXUS tree output file ",path,
+ nexus_name,"tre")) == NULL) return;
+ }
+}
+ boot_positions = (sint *)ckalloc( (seqlen_array[first_seq]+2) * sizeof (sint) );
+
+ for(j=1; j<=seqlen_array[first_seq]; ++j)
+ boot_positions[j] = j;
+
+ if(output_tree_clustal) {
+ verbose = TRUE; /* Turn on file output */
+ if(dnaflag)
+ overspill = dna_distance_matrix(clustal_phy_tree_file);
+ else
+ overspill = prot_distance_matrix(clustal_phy_tree_file);
+ }
+
+ if(output_tree_phylip) {
+ verbose = FALSE; /* Turn off file output */
+ if(dnaflag)
+ overspill = dna_distance_matrix(phylip_phy_tree_file);
+ else
+ overspill = prot_distance_matrix(phylip_phy_tree_file);
+ }
+
+ if(output_tree_nexus) {
+ verbose = FALSE; /* Turn off file output */
+ if(dnaflag)
+ overspill = dna_distance_matrix(nexus_phy_tree_file);
+ else
+ overspill = prot_distance_matrix(nexus_phy_tree_file);
+ }
+
+ if(output_tree_distances) {
+ verbose = FALSE; /* Turn off file output */
+ if(dnaflag)
+ overspill = dna_distance_matrix(distances_phy_tree_file);
+ else
+ overspill = prot_distance_matrix(distances_phy_tree_file);
+ distance_matrix_output(distances_phy_tree_file);
+ }
+
+/* check if any distances overflowed the distance corrections */
+ if ( overspill > 0 ) {
+ total_dists = (nseqs*(nseqs-1))/2;
+ overspill_message(overspill,total_dists);
+ }
+
+ if(output_tree_clustal) verbose = TRUE; /* Turn on file output */
+
+ standard_tree = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
+ for(i=0; i<nseqs+1; i++)
+ standard_tree[i] = (char *) ckalloc( (nseqs+1) * sizeof(char) );
+ save_tree = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
+ for(i=0; i<nseqs+1; i++)
+ save_tree[i] = (char *) ckalloc( (nseqs+1) * sizeof(char) );
+
+ if(output_tree_clustal || output_tree_phylip || output_tree_nexus)
+ nj_tree(standard_tree,clustal_phy_tree_file);
+
+ for(i=1; i<nseqs+1; i++)
+ for(j=1; j<nseqs+1; j++)
+ save_tree[i][j] = standard_tree[i][j];
+
+ if(output_tree_phylip)
+ print_phylip_tree(standard_tree,phylip_phy_tree_file,0);
+
+ for(i=1; i<nseqs+1; i++)
+ for(j=1; j<nseqs+1; j++)
+ standard_tree[i][j] = save_tree[i][j];
+
+ if(output_tree_nexus)
+ print_nexus_tree(standard_tree,nexus_phy_tree_file,0);
+
+/*
+ print_tree(standard_tree,phy_tree_file);
+*/
+ tree_gaps=ckfree((void *)tree_gaps);
+ boot_positions=ckfree((void *)boot_positions);
+ if (left_branch != NULL) left_branch=ckfree((void *)left_branch);
+ if (right_branch != NULL) right_branch=ckfree((void *)right_branch);
+ if (tkill != NULL) tkill=ckfree((void *)tkill);
+ if (av != NULL) av=ckfree((void *)av);
+ for (i=0;i<nseqs+1;i++)
+ standard_tree[i]=ckfree((void *)standard_tree[i]);
+ standard_tree=ckfree((void *)standard_tree);
+
+ for (i=0;i<nseqs+1;i++)
+ save_tree[i]=ckfree((void *)save_tree[i]);
+ save_tree=ckfree((void *)save_tree);
+
+if(output_tree_clustal) {
+ fclose(clustal_phy_tree_file);
+ info("Phylogenetic tree file created: [%s]",clustal_name);
+}
+
+if(output_tree_phylip) {
+ fclose(phylip_phy_tree_file);
+ info("Phylogenetic tree file created: [%s]",phylip_name);
+}
+
+if(output_tree_distances) {
+ fclose(distances_phy_tree_file);
+ info("Distance matrix file created: [%s]",dist_name);
+}
+
+if(output_tree_nexus) {
+ fclose(nexus_phy_tree_file);
+ info("Nexus tree file created: [%s]",nexus_name);
+}
+
+
+}
+
+static void overspill_message(sint overspill,sint total_dists)
+{
+ char err_mess[1024]="";
+
+ sprintf(err_mess,"%d of the distances out of a total of %d",
+ (pint)overspill,(pint)total_dists);
+ strcat(err_mess,"\n were out of range for the distance correction.");
+ strcat(err_mess,"\n");
+ strcat(err_mess,"\n SUGGESTIONS: 1) remove the most distant sequences");
+ strcat(err_mess,"\n or 2) use the PHYLIP package");
+ strcat(err_mess,"\n or 3) turn off the correction.");
+ strcat(err_mess,"\n Note: Use option 3 with caution! With this degree");
+ strcat(err_mess,"\n of divergence you will have great difficulty");
+ strcat(err_mess,"\n getting robust and reliable trees.");
+ strcat(err_mess,"\n\n");
+ warning(err_mess);
+}
+
+
+
+Boolean transition(sint base1, sint base2) /* TRUE if transition; else FALSE */
+/*
+
+ assumes that the bases of DNA sequences have been translated as
+ a,A = 0; c,C = 1; g,G = 2; t,T,u,U = 3; N = 4;
+ a,A = 0; c,C = 2; g,G = 6; t,T,u,U =17;
+
+ A <--> G and T <--> C are transitions; all others are transversions.
+
+*/
+{
+ if( ((base1 == 0) && (base2 == 6)) || ((base1 == 6) && (base2 == 0)) )
+ return TRUE; /* A <--> G */
+ if( ((base1 ==17) && (base2 == 2)) || ((base1 == 2) && (base2 ==17)) )
+ return TRUE; /* T <--> C */
+ return FALSE;
+}
+
+
+void tree_gap_delete(void) /* flag all positions in alignment that have a gap */
+{ /* in ANY sequence */
+ sint seqn;
+ sint posn;
+
+ tree_gaps = (char *)ckalloc( (max_aln_length+1) * sizeof (char) );
+
+ for(posn=1; posn<=seqlen_array[first_seq]; ++posn) {
+ tree_gaps[posn] = 0;
+ for(seqn=1; seqn<=last_seq-first_seq+1; ++seqn) {
+ if((seq_array[seqn+first_seq-1][posn] == gap_pos1) ||
+ (seq_array[seqn+first_seq-1][posn] == gap_pos2)) {
+ tree_gaps[posn] = 1;
+ break;
+ }
+ }
+ }
+
+}
+
+void distance_matrix_output(FILE *ofile)
+{
+ sint i,j;
+
+ fprintf(ofile,"%6d",(pint)last_seq-first_seq+1);
+ for(i=1;i<=last_seq-first_seq+1;i++) {
+ fprintf(ofile,"\n%-*s ",max_names,names[i]);
+ for(j=1;j<=last_seq-first_seq+1;j++) {
+ fprintf(ofile,"%6.3f ",tmat[i][j]);
+ if(j % 8 == 0) {
+ if(j!=last_seq-first_seq+1) fprintf(ofile,"\n");
+ if(j != last_seq-first_seq+1 ) fprintf(ofile," ");
+ }
+ }
+ }
+}
+
+
+
+void nj_tree(char **tree_description, FILE *tree)
+{
+ register int i;
+ sint l[4],nude,k;
+ sint nc,mini,minj,j,ii,jj;
+ double fnseqs,fnseqs2=0,sumd;
+ double diq,djq,dij,d2r,dr,dio,djo,da;
+ double tmin,total,dmin;
+ double bi,bj,b1,b2,b3,branch[4];
+ sint typei,typej; /* 0 = node; 1 = OTU */
+
+ fnseqs = (double)last_seq-first_seq+1;
+
+/*********************** First initialisation ***************************/
+
+ if(verbose) {
+ fprintf(tree,"\n\n\t\t\tNeighbor-joining Method\n");
+ fprintf(tree,"\n Saitou, N. and Nei, M. (1987)");
+ fprintf(tree," The Neighbor-joining Method:");
+ fprintf(tree,"\n A New Method for Reconstructing Phylogenetic Trees.");
+ fprintf(tree,"\n Mol. Biol. Evol., 4(4), 406-425\n");
+ fprintf(tree,"\n\n This is an UNROOTED tree\n");
+ fprintf(tree,"\n Numbers in parentheses are branch lengths\n\n");
+ }
+
+ if (fnseqs == 2) {
+ if (verbose) fprintf(tree,"Cycle 1 = SEQ: 1 (%9.5f) joins SEQ: 2 (%9.5f)",tmat[first_seq][first_seq+1],tmat[first_seq][first_seq+1]);
+ return;
+ }
+
+ mini = minj = 0;
+
+ left_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
+ right_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
+ tkill = (sint *) ckalloc( (nseqs+1) * sizeof (sint) );
+ av = (double *) ckalloc( (nseqs+1) * sizeof (double) );
+
+ for(i=1;i<=last_seq-first_seq+1;++i)
+ {
+ tmat[i][i] = av[i] = 0.0;
+ tkill[i] = 0;
+ }
+
+/*********************** Enter The Main Cycle ***************************/
+
+ /* for(nc=1; nc<=(last_seq-first_seq+1-3); ++nc) { */ /**start main cycle**/
+ for(nc=1; nc<=(last_seq-first_seq+1-3); ++nc) {
+ sumd = 0.0;
+ for(j=2; j<=last_seq-first_seq+1; ++j)
+ for(i=1; i<j; ++i) {
+ tmat[j][i] = tmat[i][j];
+ sumd = sumd + tmat[i][j];
+ }
+
+ tmin = 99999.0;
+
+/*.................compute SMATij values and find the smallest one ........*/
+
+ for(jj=2; jj<=last_seq-first_seq+1; ++jj)
+ if(tkill[jj] != 1)
+ for(ii=1; ii<jj; ++ii)
+ if(tkill[ii] != 1) {
+ diq = djq = 0.0;
+
+ for(i=1; i<=last_seq-first_seq+1; ++i) {
+ diq = diq + tmat[i][ii];
+ djq = djq + tmat[i][jj];
+ }
+
+ dij = tmat[ii][jj];
+ d2r = diq + djq - (2.0*dij);
+ dr = sumd - dij -d2r;
+ fnseqs2 = fnseqs - 2.0;
+ total= d2r+ fnseqs2*dij +dr*2.0;
+ total= total / (2.0*fnseqs2);
+
+ if(total < tmin) {
+ tmin = total;
+ mini = ii;
+ minj = jj;
+ }
+ }
+
+
+/*.................compute branch lengths and print the results ........*/
+
+
+ dio = djo = 0.0;
+ for(i=1; i<=last_seq-first_seq+1; ++i) {
+ dio = dio + tmat[i][mini];
+ djo = djo + tmat[i][minj];
+ }
+
+ dmin = tmat[mini][minj];
+ dio = (dio - dmin) / fnseqs2;
+ djo = (djo - dmin) / fnseqs2;
+ bi = (dmin + dio - djo) * 0.5;
+ bj = dmin - bi;
+ bi = bi - av[mini];
+ bj = bj - av[minj];
+
+ if( av[mini] > 0.0 )
+ typei = 0;
+ else
+ typei = 1;
+ if( av[minj] > 0.0 )
+ typej = 0;
+ else
+ typej = 1;
+
+ if(verbose)
+ fprintf(tree,"\n Cycle%4d = ",(pint)nc);
+
+/*
+ set negative branch lengths to zero. Also set any tiny positive
+ branch lengths to zero.
+*/ if( fabs(bi) < 0.0001) bi = 0.0;
+ if( fabs(bj) < 0.0001) bj = 0.0;
+
+ if(verbose) {
+ if(typei == 0)
+ fprintf(tree,"Node:%4d (%9.5f) joins ",(pint)mini,bi);
+ else
+ fprintf(tree," SEQ:%4d (%9.5f) joins ",(pint)mini,bi);
+
+ if(typej == 0)
+ fprintf(tree,"Node:%4d (%9.5f)",(pint)minj,bj);
+ else
+ fprintf(tree," SEQ:%4d (%9.5f)",(pint)minj,bj);
+
+ fprintf(tree,"\n");
+ }
+
+
+ left_branch[nc] = bi;
+ right_branch[nc] = bj;
+
+ for(i=1; i<=last_seq-first_seq+1; i++)
+ tree_description[nc][i] = 0;
+
+ if(typei == 0) {
+ for(i=nc-1; i>=1; i--)
+ if(tree_description[i][mini] == 1) {
+ for(j=1; j<=last_seq-first_seq+1; j++)
+ if(tree_description[i][j] == 1)
+ tree_description[nc][j] = 1;
+ break;
+ }
+ }
+ else
+ tree_description[nc][mini] = 1;
+
+ if(typej == 0) {
+ for(i=nc-1; i>=1; i--)
+ if(tree_description[i][minj] == 1) {
+ for(j=1; j<=last_seq-first_seq+1; j++)
+ if(tree_description[i][j] == 1)
+ tree_description[nc][j] = 1;
+ break;
+ }
+ }
+ else
+ tree_description[nc][minj] = 1;
+
+
+/*
+ Here is where the -0.00005 branch lengths come from for 3 or more
+ identical seqs.
+*/
+/* if(dmin <= 0.0) dmin = 0.0001; */
+ if(dmin <= 0.0) dmin = 0.000001;
+ av[mini] = dmin * 0.5;
+
+/*........................Re-initialisation................................*/
+
+ fnseqs = fnseqs - 1.0;
+ tkill[minj] = 1;
+
+ for(j=1; j<=last_seq-first_seq+1; ++j)
+ if( tkill[j] != 1 ) {
+ da = ( tmat[mini][j] + tmat[minj][j] ) * 0.5;
+ if( (mini - j) < 0 )
+ tmat[mini][j] = da;
+ if( (mini - j) > 0)
+ tmat[j][mini] = da;
+ }
+
+ for(j=1; j<=last_seq-first_seq+1; ++j)
+ tmat[minj][j] = tmat[j][minj] = 0.0;
+
+
+/****/ } /**end main cycle**/
+
+/******************************Last Cycle (3 Seqs. left)********************/
+
+ nude = 1;
+
+ for(i=1; i<=last_seq-first_seq+1; ++i)
+ if( tkill[i] != 1 ) {
+ l[nude] = i;
+ nude = nude + 1;
+ }
+
+ b1 = (tmat[l[1]][l[2]] + tmat[l[1]][l[3]] - tmat[l[2]][l[3]]) * 0.5;
+ b2 = tmat[l[1]][l[2]] - b1;
+ b3 = tmat[l[1]][l[3]] - b1;
+
+ branch[1] = b1 - av[l[1]];
+ branch[2] = b2 - av[l[2]];
+ branch[3] = b3 - av[l[3]];
+
+/* Reset tiny negative and positive branch lengths to zero */
+ if( fabs(branch[1]) < 0.0001) branch[1] = 0.0;
+ if( fabs(branch[2]) < 0.0001) branch[2] = 0.0;
+ if( fabs(branch[3]) < 0.0001) branch[3] = 0.0;
+
+ left_branch[last_seq-first_seq+1-2] = branch[1];
+ left_branch[last_seq-first_seq+1-1] = branch[2];
+ left_branch[last_seq-first_seq+1] = branch[3];
+
+ for(i=1; i<=last_seq-first_seq+1; i++)
+ tree_description[last_seq-first_seq+1-2][i] = 0;
+
+ if(verbose)
+ fprintf(tree,"\n Cycle%4d (Last cycle, trichotomy):\n",(pint)nc);
+
+ for(i=1; i<=3; ++i) {
+ if( av[l[i]] > 0.0) {
+ if(verbose)
+ fprintf(tree,"\n\t\t Node:%4d (%9.5f) ",(pint)l[i],branch[i]);
+ for(k=last_seq-first_seq+1-3; k>=1; k--)
+ if(tree_description[k][l[i]] == 1) {
+ for(j=1; j<=last_seq-first_seq+1; j++)
+ if(tree_description[k][j] == 1)
+ tree_description[last_seq-first_seq+1-2][j] = i;
+ break;
+ }
+ }
+ else {
+ if(verbose)
+ fprintf(tree,"\n\t\t SEQ:%4d (%9.5f) ",(pint)l[i],branch[i]);
+ tree_description[last_seq-first_seq+1-2][l[i]] = i;
+ }
+ if(i < 3) {
+ if(verbose)
+ fprintf(tree,"joins");
+ }
+ }
+
+ if(verbose)
+ fprintf(tree,"\n");
+
+}
+
+
+
+
+void bootstrap_tree(char *phylip_name,char *clustal_name, char *nexus_name)
+{
+ sint i,j;
+ int ranno;
+ char path[MAXLINE+1];
+ char dummy[10];
+ char err_mess[1024];
+ static char **sample_tree;
+ static char **standard_tree;
+ static char **save_tree;
+ sint total_dists, overspill = 0, total_overspill = 0;
+ sint nfails = 0;
+
+ if(empty) {
+ error("You must load an alignment first");
+ return;
+ }
+
+ if(nseqs<4) {
+ error("Alignment has only %d sequences",nseqs);
+ return;
+ }
+
+ if(!output_tree_clustal && !output_tree_phylip && !output_tree_nexus) {
+ error("You must select either clustal or phylip or nexus tree output format");
+ return;
+ }
+ get_path(seqname, path);
+
+ if (output_tree_clustal) {
+ if (clustal_name[0]!=EOS) {
+ if((clustal_phy_tree_file = open_explicit_file(
+ clustal_name))==NULL) return;
+ }
+ else {
+ if((clustal_phy_tree_file = open_output_file(
+ "\nEnter name for bootstrap output file ",path,
+ clustal_name,"njb")) == NULL) return;
+ }
+ }
+
+ first_seq=1;
+ last_seq=nseqs;
+
+ if (output_tree_phylip) {
+ if (phylip_name[0]!=EOS) {
+ if((phylip_phy_tree_file = open_explicit_file(
+ phylip_name))==NULL) return;
+ }
+ else {
+ if((phylip_phy_tree_file = open_output_file(
+ "\nEnter name for bootstrap output file ",path,
+ phylip_name,"phb")) == NULL) return;
+ }
+ }
+
+ if (output_tree_nexus) {
+ if (nexus_name[0]!=EOS) {
+ if((nexus_phy_tree_file = open_explicit_file(
+ nexus_name))==NULL) return;
+ }
+ else {
+ if((nexus_phy_tree_file = open_output_file(
+ "\nEnter name for bootstrap output file ",path,
+ nexus_name,"treb")) == NULL) return;
+ }
+ }
+
+ boot_totals = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+ for(i=0;i<nseqs+1;i++)
+ boot_totals[i]=0;
+
+ boot_positions = (sint *)ckalloc( (seqlen_array[first_seq]+2) * sizeof (sint) );
+
+ for(j=1; j<=seqlen_array[first_seq]; ++j) /* First select all positions for */
+ boot_positions[j] = j; /* the "standard" tree */
+
+ if(output_tree_clustal) {
+ verbose = TRUE; /* Turn on file output */
+ if(dnaflag)
+ overspill = dna_distance_matrix(clustal_phy_tree_file);
+ else
+ overspill = prot_distance_matrix(clustal_phy_tree_file);
+ }
+
+ if(output_tree_phylip) {
+ verbose = FALSE; /* Turn off file output */
+ if(dnaflag)
+ overspill = dna_distance_matrix(phylip_phy_tree_file);
+ else
+ overspill = prot_distance_matrix(phylip_phy_tree_file);
+ }
+
+ if(output_tree_nexus) {
+ verbose = FALSE; /* Turn off file output */
+ if(dnaflag)
+ overspill = dna_distance_matrix(nexus_phy_tree_file);
+ else
+ overspill = prot_distance_matrix(nexus_phy_tree_file);
+ }
+
+/* check if any distances overflowed the distance corrections */
+ if ( overspill > 0 ) {
+ total_dists = (nseqs*(nseqs-1))/2;
+ overspill_message(overspill,total_dists);
+ }
+
+ tree_gaps=ckfree((void *)tree_gaps);
+
+ if (output_tree_clustal) verbose = TRUE; /* Turn on screen output */
+
+ standard_tree = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
+ for(i=0; i<nseqs+1; i++)
+ standard_tree[i] = (char *) ckalloc( (nseqs+1) * sizeof(char) );
+
+/* compute the standard tree */
+
+ if(output_tree_clustal || output_tree_phylip || output_tree_nexus)
+ nj_tree(standard_tree,clustal_phy_tree_file);
+
+ if (output_tree_clustal)
+ fprintf(clustal_phy_tree_file,"\n\n\t\t\tBootstrap Confidence Limits\n\n");
+
+/* save the left_branch and right_branch for phylip output */
+ save_left_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
+ save_right_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
+ for (i=1;i<=nseqs;i++) {
+ save_left_branch[i] = left_branch[i];
+ save_right_branch[i] = right_branch[i];
+ }
+/*
+ The next line is a fossil from the days of using the cc ran()
+ ran_factor = RAND_MAX / seqlen_array[first_seq];
+*/
+
+ if(usemenu)
+ boot_ran_seed =
+getint("\n\nEnter seed no. for random number generator ",1,1000,boot_ran_seed);
+
+/* do not use the native cc ran()
+ srand(boot_ran_seed);
+*/
+ addrandinit((unsigned long) boot_ran_seed);
+
+ if (output_tree_clustal)
+ fprintf(clustal_phy_tree_file,"\n Random number generator seed = %7u\n",
+ boot_ran_seed);
+
+ if(usemenu)
+ boot_ntrials =
+getint("\n\nEnter number of bootstrap trials ",1,10000,boot_ntrials);
+
+ if (output_tree_clustal) {
+ fprintf(clustal_phy_tree_file,"\n Number of bootstrap trials = %7d\n",
+ (pint)boot_ntrials);
+
+ fprintf(clustal_phy_tree_file,
+ "\n\n Diagrammatic representation of the above tree: \n");
+ fprintf(clustal_phy_tree_file,"\n Each row represents 1 tree cycle;");
+ fprintf(clustal_phy_tree_file," defining 2 groups.\n");
+ fprintf(clustal_phy_tree_file,"\n Each column is 1 sequence; ");
+ fprintf(clustal_phy_tree_file,"the stars in each line show 1 group; ");
+ fprintf(clustal_phy_tree_file,"\n the dots show the other\n");
+ fprintf(clustal_phy_tree_file,"\n Numbers show occurences in bootstrap samples.");
+ }
+/*
+ print_tree(standard_tree, clustal_phy_tree_file, boot_totals);
+*/
+ verbose = FALSE; /* Turn OFF screen output */
+
+ left_branch=ckfree((void *)left_branch);
+ right_branch=ckfree((void *)right_branch);
+ tkill=ckfree((void *)tkill);
+ av=ckfree((void *)av);
+
+ sample_tree = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
+ for(i=0; i<nseqs+1; i++)
+ sample_tree[i] = (char *) ckalloc( (nseqs+1) * sizeof(char) );
+
+ if (usemenu)
+ fprintf(stdout,"\n\nEach dot represents 10 trials\n\n");
+ total_overspill = 0;
+ nfails = 0;
+ for(i=1; i<=boot_ntrials; ++i) {
+ for(j=1; j<=seqlen_array[first_seq]; ++j) { /* select alignment */
+ /* positions for */
+ ranno = addrand( (unsigned long) seqlen_array[1]) + 1;
+ boot_positions[j] = ranno; /* bootstrap sample */
+ }
+ if(output_tree_clustal) {
+ if(dnaflag)
+ overspill = dna_distance_matrix(clustal_phy_tree_file);
+ else
+ overspill = prot_distance_matrix(clustal_phy_tree_file);
+ }
+
+ if(output_tree_phylip) {
+ if(dnaflag)
+ overspill = dna_distance_matrix(phylip_phy_tree_file);
+ else
+ overspill = prot_distance_matrix(phylip_phy_tree_file);
+ }
+
+ if(output_tree_nexus) {
+ if(dnaflag)
+ overspill = dna_distance_matrix(nexus_phy_tree_file);
+ else
+ overspill = prot_distance_matrix(nexus_phy_tree_file);
+ }
+
+ if( overspill > 0) {
+ total_overspill = total_overspill + overspill;
+ nfails++;
+ }
+
+ tree_gaps=ckfree((void *)tree_gaps);
+
+ if(output_tree_clustal || output_tree_phylip || output_tree_nexus)
+ nj_tree(sample_tree,clustal_phy_tree_file);
+
+ left_branch=ckfree((void *)left_branch);
+ right_branch=ckfree((void *)right_branch);
+ tkill=ckfree((void *)tkill);
+ av=ckfree((void *)av);
+
+ compare_tree(standard_tree, sample_tree, boot_totals, last_seq-first_seq+1);
+ if (usemenu) {
+ if(i % 10 == 0) fprintf(stdout,".");
+ if(i % 100 == 0) fprintf(stdout,"\n");
+ }
+ }
+
+/* check if any distances overflowed the distance corrections */
+ if ( nfails > 0 ) {
+ total_dists = (nseqs*(nseqs-1))/2;
+ fprintf(stdout,"\n");
+ fprintf(stdout,"\n WARNING: %ld of the distances out of a total of %ld times %ld",
+ (long)total_overspill,(long)total_dists,(long)boot_ntrials);
+ fprintf(stdout,"\n were out of range for the distance correction.");
+ fprintf(stdout,"\n This affected %d out of %d bootstrap trials.",
+ (pint)nfails,(pint)boot_ntrials);
+ fprintf(stdout,"\n This may not be fatal but you have been warned!");
+ fprintf(stdout,"\n");
+ fprintf(stdout,"\n SUGGESTIONS: 1) turn off the correction");
+ fprintf(stdout,"\n or 2) remove the most distant sequences");
+ fprintf(stdout,"\n or 3) use the PHYLIP package.");
+ fprintf(stdout,"\n\n");
+ if (usemenu)
+ getstr("Press [RETURN] to continue",dummy);
+ }
+
+
+ boot_positions=ckfree((void *)boot_positions);
+
+ for (i=1;i<nseqs+1;i++)
+ sample_tree[i]=ckfree((void *)sample_tree[i]);
+ sample_tree=ckfree((void *)sample_tree);
+/*
+ fprintf(clustal_phy_tree_file,"\n\n Bootstrap totals for each group\n");
+*/
+ if (output_tree_clustal)
+ print_tree(standard_tree, clustal_phy_tree_file, boot_totals);
+
+ save_tree = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
+ for(i=0; i<nseqs+1; i++)
+ save_tree[i] = (char *) ckalloc( (nseqs+1) * sizeof(char) );
+
+ for(i=1; i<nseqs+1; i++)
+ for(j=1; j<nseqs+1; j++)
+ save_tree[i][j] = standard_tree[i][j];
+
+ if(output_tree_phylip) {
+ left_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
+ right_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
+ for (i=1;i<=nseqs;i++) {
+ left_branch[i] = save_left_branch[i];
+ right_branch[i] = save_right_branch[i];
+ }
+ print_phylip_tree(standard_tree,phylip_phy_tree_file,
+ bootstrap_format);
+ left_branch=ckfree((void *)left_branch);
+ right_branch=ckfree((void *)right_branch);
+ }
+
+ for(i=1; i<nseqs+1; i++)
+ for(j=1; j<nseqs+1; j++)
+ standard_tree[i][j] = save_tree[i][j];
+
+ if(output_tree_nexus) {
+ left_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
+ right_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
+ for (i=1;i<=nseqs;i++) {
+ left_branch[i] = save_left_branch[i];
+ right_branch[i] = save_right_branch[i];
+ }
+ print_nexus_tree(standard_tree,nexus_phy_tree_file,
+ bootstrap_format);
+ left_branch=ckfree((void *)left_branch);
+ right_branch=ckfree((void *)right_branch);
+ }
+
+ boot_totals=ckfree((void *)boot_totals);
+ save_left_branch=ckfree((void *)save_left_branch);
+ save_right_branch=ckfree((void *)save_right_branch);
+
+ for (i=1;i<nseqs+1;i++)
+ standard_tree[i]=ckfree((void *)standard_tree[i]);
+ standard_tree=ckfree((void *)standard_tree);
+
+ for (i=0;i<nseqs+1;i++)
+ save_tree[i]=ckfree((void *)save_tree[i]);
+ save_tree=ckfree((void *)save_tree);
+
+ if (output_tree_clustal)
+ fclose(clustal_phy_tree_file);
+
+ if (output_tree_phylip)
+ fclose(phylip_phy_tree_file);
+
+ if (output_tree_nexus)
+ fclose(nexus_phy_tree_file);
+
+ if (output_tree_clustal)
+ info("Bootstrap output file completed [%s]"
+ ,clustal_name);
+ if (output_tree_phylip)
+ info("Bootstrap output file completed [%s]"
+ ,phylip_name);
+ if (output_tree_nexus)
+ info("Bootstrap output file completed [%s]"
+ ,nexus_name);
+}
+
+
+void compare_tree(char **tree1, char **tree2, sint *hits, sint n)
+{
+ sint i,j,k;
+ sint nhits1, nhits2;
+
+ for(i=1; i<=n-3; i++) {
+ for(j=1; j<=n-3; j++) {
+ nhits1 = 0;
+ nhits2 = 0;
+ for(k=1; k<=n; k++) {
+ if(tree1[i][k] == tree2[j][k]) nhits1++;
+ if(tree1[i][k] != tree2[j][k]) nhits2++;
+ }
+ if((nhits1 == last_seq-first_seq+1) || (nhits2 == last_seq-first_seq+1)) hits[i]++;
+ }
+ }
+}
+
+
+void print_nexus_tree(char **tree_description, FILE *tree, sint bootstrap)
+{
+ sint i;
+ sint old_row;
+
+ fprintf(tree,"#NEXUS\n\n");
+
+ fprintf(tree,"BEGIN TREES;\n\n");
+ fprintf(tree,"\tTRANSLATE\n");
+ for(i=1;i<nseqs;i++) {
+ fprintf(tree,"\t\t%d %s,\n",(pint)i,names[i]);
+ }
+ fprintf(tree,"\t\t%d %s\n",(pint)nseqs,names[nseqs]);
+ fprintf(tree,"\t\t;\n");
+
+ fprintf(tree,"\tUTREE PAUP_1= ");
+
+ if(last_seq-first_seq+1==2) {
+ fprintf(tree,"(%d:%7.5f,%d:%7.5f);",first_seq,tmat[first_seq][first_seq+1],first_seq+1,tmat[first_seq][first_seq+1]);
+ }
+ else {
+
+ fprintf(tree,"(");
+
+ old_row=two_way_split_nexus(tree_description, tree, last_seq-first_seq+1-2,1,bootstrap);
+ fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-2]);
+ if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
+ fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+ fprintf(tree,",");
+
+ old_row=two_way_split_nexus(tree_description, tree, last_seq-first_seq+1-2,2,bootstrap);
+ fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-1]);
+ if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
+ fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+ fprintf(tree,",");
+
+ old_row=two_way_split_nexus(tree_description, tree, last_seq-first_seq+1-2,3,bootstrap);
+ fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1]);
+ if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
+ fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+ fprintf(tree,")");
+ if (bootstrap==BS_NODE_LABELS) fprintf(tree,"TRICHOTOMY");
+ fprintf(tree,";");
+ }
+ fprintf(tree,"\nENDBLOCK;\n");
+}
+
+
+sint two_way_split_nexus
+(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap)
+{
+ sint row, new_row = 0, old_row, col, test_col = 0;
+ Boolean single_seq;
+
+ if(start_row != last_seq-first_seq+1-2) fprintf(tree,"(");
+
+ for(col=1; col<=last_seq-first_seq+1; col++) {
+ if(tree_description[start_row][col] == flag) {
+ test_col = col;
+ break;
+ }
+ }
+
+ single_seq = TRUE;
+ for(row=start_row-1; row>=1; row--)
+ if(tree_description[row][test_col] == 1) {
+ single_seq = FALSE;
+ new_row = row;
+ break;
+ }
+
+ if(single_seq) {
+ tree_description[start_row][test_col] = 0;
+ fprintf(tree,"%d",test_col+first_seq-1);
+ if(start_row == last_seq-first_seq+1-2) {
+ return(0);
+ }
+
+ fprintf(tree,":%7.5f,",left_branch[start_row]);
+ }
+ else {
+ for(col=1; col<=last_seq-first_seq+1; col++) {
+ if((tree_description[start_row][col]==1)&&
+ (tree_description[new_row][col]==1))
+ tree_description[start_row][col] = 0;
+ }
+ old_row=two_way_split_nexus(tree_description, tree, new_row, (sint)1, bootstrap);
+ if(start_row == last_seq-first_seq+1-2) {
+ return(new_row);
+ }
+
+ fprintf(tree,":%7.5f",left_branch[start_row]);
+ if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
+ fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+
+ fprintf(tree,",");
+ }
+
+
+ for(col=1; col<=last_seq-first_seq+1; col++)
+ if(tree_description[start_row][col] == flag) {
+ test_col = col;
+ break;
+ }
+
+ single_seq = TRUE;
+ new_row = 0;
+ for(row=start_row-1; row>=1; row--)
+ if(tree_description[row][test_col] == 1) {
+ single_seq = FALSE;
+ new_row = row;
+ break;
+ }
+
+ if(single_seq) {
+ tree_description[start_row][test_col] = 0;
+ fprintf(tree,"%d",test_col+first_seq-1);
+ fprintf(tree,":%7.5f)",right_branch[start_row]);
+ }
+ else {
+ for(col=1; col<=last_seq-first_seq+1; col++) {
+ if((tree_description[start_row][col]==1)&&
+ (tree_description[new_row][col]==1))
+ tree_description[start_row][col] = 0;
+ }
+ old_row=two_way_split_nexus(tree_description, tree, new_row, (sint)1, bootstrap);
+ fprintf(tree,":%7.5f",right_branch[start_row]);
+ if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
+ fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+
+ fprintf(tree,")");
+ }
+ if ((bootstrap==BS_NODE_LABELS) && (boot_totals[start_row]>0))
+ fprintf(tree,"%d",(pint)boot_totals[start_row]);
+
+ return(start_row);
+}
+
+
+void print_phylip_tree(char **tree_description, FILE *tree, sint bootstrap)
+{
+ sint old_row;
+
+ if(last_seq-first_seq+1==2) {
+ fprintf(tree,"(%s:%7.5f,%s:%7.5f);",names[first_seq],tmat[first_seq][first_seq+1],names[first_seq+1],tmat[first_seq][first_seq+1]);
+ return;
+ }
+
+ fprintf(tree,"(\n");
+
+ old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,1,bootstrap);
+ fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-2]);
+ if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
+ fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+ fprintf(tree,",\n");
+
+ old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,2,bootstrap);
+ fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-1]);
+ if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
+ fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+ fprintf(tree,",\n");
+
+ old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,3,bootstrap);
+ fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1]);
+ if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
+ fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+ fprintf(tree,")");
+ if (bootstrap==BS_NODE_LABELS) fprintf(tree,"TRICHOTOMY");
+ fprintf(tree,";\n");
+}
+
+
+sint two_way_split
+(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap)
+{
+ sint row, new_row = 0, old_row, col, test_col = 0;
+ Boolean single_seq;
+
+ if(start_row != last_seq-first_seq+1-2) fprintf(tree,"(\n");
+
+ for(col=1; col<=last_seq-first_seq+1; col++) {
+ if(tree_description[start_row][col] == flag) {
+ test_col = col;
+ break;
+ }
+ }
+
+ single_seq = TRUE;
+ for(row=start_row-1; row>=1; row--)
+ if(tree_description[row][test_col] == 1) {
+ single_seq = FALSE;
+ new_row = row;
+ break;
+ }
+
+ if(single_seq) {
+ tree_description[start_row][test_col] = 0;
+ fprintf(tree,"%.*s",max_names,names[test_col+first_seq-1]);
+ if(start_row == last_seq-first_seq+1-2) {
+ return(0);
+ }
+
+ fprintf(tree,":%7.5f,\n",left_branch[start_row]);
+ }
+ else {
+ for(col=1; col<=last_seq-first_seq+1; col++) {
+ if((tree_description[start_row][col]==1)&&
+ (tree_description[new_row][col]==1))
+ tree_description[start_row][col] = 0;
+ }
+ old_row=two_way_split(tree_description, tree, new_row, (sint)1, bootstrap);
+ if(start_row == last_seq-first_seq+1-2) {
+ return(new_row);
+ }
+
+ fprintf(tree,":%7.5f",left_branch[start_row]);
+ if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
+ fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+
+ fprintf(tree,",\n");
+ }
+
+
+ for(col=1; col<=last_seq-first_seq+1; col++)
+ if(tree_description[start_row][col] == flag) {
+ test_col = col;
+ break;
+ }
+
+ single_seq = TRUE;
+ new_row = 0;
+ for(row=start_row-1; row>=1; row--)
+ if(tree_description[row][test_col] == 1) {
+ single_seq = FALSE;
+ new_row = row;
+ break;
+ }
+
+ if(single_seq) {
+ tree_description[start_row][test_col] = 0;
+ fprintf(tree,"%.*s",max_names,names[test_col+first_seq-1]);
+ fprintf(tree,":%7.5f)\n",right_branch[start_row]);
+ }
+ else {
+ for(col=1; col<=last_seq-first_seq+1; col++) {
+ if((tree_description[start_row][col]==1)&&
+ (tree_description[new_row][col]==1))
+ tree_description[start_row][col] = 0;
+ }
+ old_row=two_way_split(tree_description, tree, new_row, (sint)1, bootstrap);
+ fprintf(tree,":%7.5f",right_branch[start_row]);
+ if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
+ fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+
+ fprintf(tree,")\n");
+ }
+ if ((bootstrap==BS_NODE_LABELS) && (boot_totals[start_row]>0))
+ fprintf(tree,"%d",(pint)boot_totals[start_row]);
+
+ return(start_row);
+}
+
+
+
+void print_tree(char **tree_description, FILE *tree, sint *totals)
+{
+ sint row,col;
+
+ fprintf(tree,"\n");
+
+ for(row=1; row<=last_seq-first_seq+1-3; row++) {
+ fprintf(tree," \n");
+ for(col=1; col<=last_seq-first_seq+1; col++) {
+ if(tree_description[row][col] == 0)
+ fprintf(tree,"*");
+ else
+ fprintf(tree,".");
+ }
+ if(totals[row] > 0)
+ fprintf(tree,"%7d",(pint)totals[row]);
+ }
+ fprintf(tree," \n");
+ for(col=1; col<=last_seq-first_seq+1; col++)
+ fprintf(tree,"%1d",(pint)tree_description[last_seq-first_seq+1-2][col]);
+ fprintf(tree,"\n");
+}
+
+
+
+sint dna_distance_matrix(FILE *tree)
+{
+ sint m,n;
+ sint j,i;
+ sint res1, res2;
+ sint overspill = 0;
+ double p,q,e,a,b,k;
+
+ tree_gap_delete(); /* flag positions with gaps (tree_gaps[i] = 1 ) */
+
+ if(verbose) {
+ fprintf(tree,"\n");
+ fprintf(tree,"\n DIST = percentage divergence (/100)");
+ fprintf(tree,"\n p = rate of transition (A <-> G; C <-> T)");
+ fprintf(tree,"\n q = rate of transversion");
+ fprintf(tree,"\n Length = number of sites used in comparison");
+ fprintf(tree,"\n");
+ if(tossgaps) {
+ fprintf(tree,"\n All sites with gaps (in any sequence) deleted!");
+ fprintf(tree,"\n");
+ }
+ if(kimura) {
+ fprintf(tree,"\n Distances corrected by Kimura's 2 parameter model:");
+ fprintf(tree,"\n\n Kimura, M. (1980)");
+ fprintf(tree," A simple method for estimating evolutionary ");
+ fprintf(tree,"rates of base");
+ fprintf(tree,"\n substitutions through comparative studies of ");
+ fprintf(tree,"nucleotide sequences.");
+ fprintf(tree,"\n J. Mol. Evol., 16, 111-120.");
+ fprintf(tree,"\n\n");
+ }
+ }
+
+ for(m=1; m<last_seq-first_seq+1; ++m) /* for every pair of sequence */
+ for(n=m+1; n<=last_seq-first_seq+1; ++n) {
+ p = q = e = 0.0;
+ tmat[m][n] = tmat[n][m] = 0.0;
+ for(i=1; i<=seqlen_array[first_seq]; ++i) {
+ j = boot_positions[i];
+ if(tossgaps && (tree_gaps[j] > 0) )
+ goto skip; /* gap position */
+ res1 = seq_array[m+first_seq-1][j];
+ res2 = seq_array[n+first_seq-1][j];
+ if( (res1 == gap_pos1) || (res1 == gap_pos2) ||
+ (res2 == gap_pos1) || (res2 == gap_pos2))
+ goto skip; /* gap in a seq*/
+ if(!use_ambiguities)
+ if( is_ambiguity(res1) || is_ambiguity(res2))
+ goto skip; /* ambiguity code in a seq*/
+ e = e + 1.0;
+ if(res1 != res2) {
+ if(transition(res1,res2))
+ p = p + 1.0;
+ else
+ q = q + 1.0;
+ }
+ skip:;
+ }
+
+
+ /* Kimura's 2 parameter correction for multiple substitutions */
+
+ if(!kimura) {
+ if (e == 0) {
+ fprintf(stdout,"\n WARNING: sequences %d and %d are non-overlapping\n",m,n);
+ k = 0.0;
+ p = 0.0;
+ q = 0.0;
+ }
+ else {
+ k = (p+q)/e;
+ if(p > 0.0)
+ p = p/e;
+ else
+ p = 0.0;
+ if(q > 0.0)
+ q = q/e;
+ else
+ q = 0.0;
+ }
+ tmat[m][n] = tmat[n][m] = k;
+ if(verbose) /* if screen output */
+ fprintf(tree,
+ "%4d vs.%4d: DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n"
+ ,(pint)m,(pint)n,k,p,q,e);
+ }
+ else {
+ if (e == 0) {
+ fprintf(stdout,"\n WARNING: sequences %d and %d are non-overlapping\n",m,n);
+ p = 0.0;
+ q = 0.0;
+ }
+ else {
+ if(p > 0.0)
+ p = p/e;
+ else
+ p = 0.0;
+ if(q > 0.0)
+ q = q/e;
+ else
+ q = 0.0;
+ }
+
+ if( ((2.0*p)+q) == 1.0 )
+ a = 0.0;
+ else
+ a = 1.0/(1.0-(2.0*p)-q);
+
+ if( q == 0.5 )
+ b = 0.0;
+ else
+ b = 1.0/(1.0-(2.0*q));
+
+/* watch for values going off the scale for the correction. */
+ if( (a<=0.0) || (b<=0.0) ) {
+ overspill++;
+ k = 3.5; /* arbitrary high score */
+ }
+ else
+ k = 0.5*log(a) + 0.25*log(b);
+ tmat[m][n] = tmat[n][m] = k;
+ if(verbose) /* if screen output */
+ fprintf(tree,
+ "%4d vs.%4d: DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n"
+ ,(pint)m,(pint)n,k,p,q,e);
+
+ }
+ }
+ return overspill; /* return the number of off-scale values */
+}
+
+
+sint prot_distance_matrix(FILE *tree)
+{
+ sint m,n;
+ sint j,i;
+ sint res1, res2;
+ sint overspill = 0;
+ double p,e,k, table_entry;
+
+
+ tree_gap_delete(); /* flag positions with gaps (tree_gaps[i] = 1 ) */
+
+ if(verbose) {
+ fprintf(tree,"\n");
+ fprintf(tree,"\n DIST = percentage divergence (/100)");
+ fprintf(tree,"\n Length = number of sites used in comparison");
+ fprintf(tree,"\n\n");
+ if(tossgaps) {
+ fprintf(tree,"\n All sites with gaps (in any sequence) deleted");
+ fprintf(tree,"\n");
+ }
+ if(kimura) {
+ fprintf(tree,"\n Distances up tp 0.75 corrected by Kimura's empirical method:");
+ fprintf(tree,"\n\n Kimura, M. (1983)");
+ fprintf(tree," The Neutral Theory of Molecular Evolution.");
+ fprintf(tree,"\n Page 75. Cambridge University Press, Cambridge, England.");
+ fprintf(tree,"\n\n");
+ }
+ }
+
+ for(m=1; m<nseqs; ++m) /* for every pair of sequence */
+ for(n=m+1; n<=nseqs; ++n) {
+ p = e = 0.0;
+ tmat[m][n] = tmat[n][m] = 0.0;
+ for(i=1; i<=seqlen_array[1]; ++i) {
+ j = boot_positions[i];
+ if(tossgaps && (tree_gaps[j] > 0) ) goto skip; /* gap position */
+ res1 = seq_array[m][j];
+ res2 = seq_array[n][j];
+ if( (res1 == gap_pos1) || (res1 == gap_pos2) ||
+ (res2 == gap_pos1) || (res2 == gap_pos2))
+ goto skip; /* gap in a seq*/
+ e = e + 1.0;
+ if(res1 != res2) p = p + 1.0;
+ skip:;
+ }
+
+ if(p <= 0.0)
+ k = 0.0;
+ else
+ k = p/e;
+
+/* DES debug */
+/* fprintf(stdout,"Seq1=%4d Seq2=%4d k =%7.4f \n",(pint)m,(pint)n,k); */
+/* DES debug */
+
+ if(kimura) {
+ if(k < 0.75) { /* use Kimura's formula */
+ if(k > 0.0) k = - log(1.0 - k - (k * k/5.0) );
+ }
+ else {
+ if(k > 0.930) {
+ overspill++;
+ k = 10.0; /* arbitrarily set to 1000% */
+ }
+ else {
+ table_entry = (k*1000.0) - 750.0;
+ k = (double)dayhoff_pams[(int)table_entry];
+ k = k/100.0;
+ }
+ }
+ }
+
+ tmat[m][n] = tmat[n][m] = k;
+ if(verbose) /* if screen output */
+ fprintf(tree,
+ "%4d vs.%4d DIST = %6.4f; length = %6.0f\n",
+ (pint)m,(pint)n,k,e);
+ }
+ return overspill;
+}
+
+
+void guide_tree(FILE *tree,sint firstseq,sint numseqs)
+/*
+ Routine for producing unrooted NJ trees from seperately aligned
+ pairwise distances. This produces the GUIDE DENDROGRAMS in
+ PHYLIP format.
+*/
+{
+ static char **standard_tree;
+ sint i;
+ float dist;
+
+ phylip_phy_tree_file=tree;
+ verbose = FALSE;
+ first_seq=firstseq;
+ last_seq=first_seq+numseqs-1;
+
+ if(numseqs==2) {
+ dist=tmat[firstseq][firstseq+1]/2.0;
+ fprintf(tree,"(%s:%0.5f,%s:%0.5f);\n",
+ names[firstseq],dist,names[firstseq+1],dist);
+ }
+ else {
+ standard_tree = (char **) ckalloc( (last_seq-first_seq+2) * sizeof (char *) );
+ for(i=0; i<last_seq-first_seq+2; i++)
+ standard_tree[i] = (char *) ckalloc( (last_seq-first_seq+2) * sizeof(char));
+
+ nj_tree(standard_tree,clustal_phy_tree_file);
+
+ print_phylip_tree(standard_tree,phylip_phy_tree_file,0);
+
+ if(left_branch != NULL) left_branch=ckfree((void *)left_branch);
+ if(right_branch != NULL) right_branch=ckfree((void *)right_branch);
+ if(tkill != NULL) tkill=ckfree((void *)tkill);
+ if(av != NULL) av=ckfree((void *)av);
+ for (i=1;i<last_seq-first_seq+2;i++)
+ standard_tree[i]=ckfree((void *)standard_tree[i]);
+ standard_tree=ckfree((void *)standard_tree);
+ }
+ fclose(phylip_phy_tree_file);
+
+}
+
+/* JP */
+void guide_tree_consensus(FILE *tree,sint firstseq,sint numseqs)
+/*
+ Routine for producing unrooted NJ trees from seperately aligned
+ pairwise distances. This produces the GUIDE DENDROGRAMS in
+ PHYLIP format.
+*/
+{
+ static char **standard_tree;
+ sint i;
+ float dist;
+
+ phylip_phy_tree_file=tree;
+ verbose = FALSE;
+ first_seq=firstseq;
+ /* last_seq=first_seq+numseqs-1; */
+ /* JP */
+ last_seq = first_seq+numseqs-1;
+
+ if(numseqs==2) {
+ dist=tmat[firstseq][firstseq+1]/2.0;
+ fprintf(tree,"(%s:%0.5f,%s:%0.5f);\n",
+ names[firstseq],dist,names[firstseq+1],dist);
+ }
+ else {
+ standard_tree = (char **) ckalloc( (last_seq-first_seq+2) * sizeof (char *) );
+ for(i=0; i<last_seq-first_seq+2; i++)
+ standard_tree[i] = (char *) ckalloc( (last_seq-first_seq+2) * sizeof(char));
+
+ nj_tree(standard_tree,clustal_phy_tree_file);
+
+ print_phylip_tree(standard_tree,phylip_phy_tree_file,0);
+
+ if(left_branch != NULL) left_branch=ckfree((void *)left_branch);
+ if(right_branch != NULL) right_branch=ckfree((void *)right_branch);
+ if(tkill != NULL) tkill=ckfree((void *)tkill);
+ if(av != NULL) av=ckfree((void *)av);
+ for (i=1;i<last_seq-first_seq+2;i++)
+ standard_tree[i]=ckfree((void *)standard_tree[i]);
+ standard_tree=ckfree((void *)standard_tree);
+ }
+ fclose(phylip_phy_tree_file);
+
+}
+
+static Boolean is_ambiguity(char c)
+{
+ int i;
+ char codes[]="ACGTU";
+
+ if(use_ambiguities==TRUE)
+ {
+ return FALSE;
+ }
+
+ for(i=0;i<5;i++)
+ if(amino_acid_codes[c]==codes[i])
+ return FALSE;
+
+ return TRUE;
+}
+
+void calculate_tmat(int startnum, int endnum);
+void generate_subtree_roots(char *phylip_file_name) {
+
+ int i, j, k;
+ int startn, endn; // starting and ending sequence number of the seq_array that specifying the subtree
+ char phylipname[400];
+
+ startn = 1;
+ for(i=1;i<=filecount;i++) {
+ endn = startn+seqnumlist[i]-1;
+ sprintf(phylipname, "%s%d", seqname, i);
+ fprintf(stdout, "%s\n", phylipname); fflush(stdout);
+ if((tree = open_explicit_file(phylipname))==NULL) return;
+ calculate_tmat(startn, endn);
+ guide_tree(tree, 1, seqnumlist[i]);
+ startn += seqnumlist[i];
+ }
+}
+
+void calculate_tmat(int startnum, int endnum)
+{
+ int i,j,k,m,n;
+ int identitycount, nongapcount;
+
+ tmat = (double **) ckalloc( (endnum-startnum+2) * sizeof(double *) );
+ for(i=1;i<=endnum-startnum+1;i++)
+ tmat[i] = (double *) ckalloc( (endnum-startnum+2) * sizeof(double) );
+ for(i=startnum;i<=endnum;i++) {
+ for(j=i+1;j<=endnum;j++) {
+
+ m = i-startnum+1;
+ n = j-startnum+1;
+ fprintf(stdout, "m: %d n: %d %d %d\n", m,n, seqlen_array[j], seqlen_array[i]);
+
+ identitycount = nongapcount = 0;
+ for(k=1;k<=seqlen_array[j];k++) {
+ if( (seq_array[i][k]==seq_array[j][k]) && (seq_array[i][k]!=gap_pos2) && (seq_array[j][k]!=gap_pos2) )
+ {
+ identitycount++;
+ }
+ if( (seq_array[i][k]!=gap_pos2) && (seq_array[j][k]!=gap_pos2) )
+ {
+ nongapcount++;
+ }
+ }
+ if(nongapcount>0) tmat[m][n] = tmat[n][m] = 1.0 - 1.0*identitycount/nongapcount;
+ else {tmat[m][n] = tmat[n][m] = 1; }
+ }
+ }
+}
diff --git a/util.c b/util.c
new file mode 100644
index 0000000..16dc36e
--- /dev/null
+++ b/util.c
@@ -0,0 +1,405 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include "pcma.h"
+
+extern char **seq_array;
+extern sint *seqlen_array;
+extern char **names,**titles;
+extern sint *output_index;
+extern sint *seq_weight;
+extern double **tmat;
+
+
+/*
+* ckalloc()
+*
+* Tries to allocate "bytes" bytes of memory. Exits program if failed.
+* Return value:
+* Generic pointer to the newly allocated memory.
+*/
+
+void *ckalloc(size_t bytes)
+{
+ register void *ret;
+
+ if( (ret = calloc(bytes, sizeof(char))) == NULL)
+/*
+ if( (ret = malloc(bytes)) == NULL)
+*/
+ fatal("Out of memory\n");
+ else
+ return ret;
+
+ return ret;
+}
+
+/*
+* ckrealloc()
+*
+* Tries to reallocate "bytes" bytes of memory. Exits program if failed.
+* Return value:
+* Generic pointer to the re-allocated memory.
+*/
+
+void *ckrealloc(void *ptr, size_t bytes)
+{
+ register void *ret=NULL;
+
+ if (ptr == NULL)
+ fatal("Bad call to ckrealloc\n");
+ else if( (ret = realloc(ptr, bytes)) == NULL)
+ fatal("Out of memory\n");
+ else
+ return ret;
+
+ return ret;
+}
+
+/*
+* ckfree()
+*
+* Tries to free memory allocated by ckalloc.
+* Return value:
+* None.
+*/
+
+void *ckfree(void *ptr)
+{
+ if (ptr == NULL)
+ warning("Bad call to ckfree\n");
+ else {
+ free(ptr);
+ ptr = NULL;
+ }
+ return ptr;
+}
+
+
+/*
+* rtrim()
+*
+* Removes trailing blanks from a string
+*
+* Return values:
+* Pointer to the processed string
+*/
+
+char * rtrim(char *str)
+{
+ register int p;
+
+ p = strlen(str) - 1;
+
+ while ( isspace(str[p]) )
+ p--;
+
+ str[p + 1] = EOS;
+
+ return str;
+}
+
+
+/*
+* blank_to_()
+*
+* Replace blanks in a string with underscores
+*
+* Also replaces , ; : ( or ) with _
+*
+* Return value:
+* Pointer to the processed string
+*/
+
+char * blank_to_(char *str)
+{
+ int i,p;
+
+ p = strlen(str) - 1;
+
+ for(i=0;i<=p;i++)
+ if(
+ (str[i]==' ') ||
+ (str[i]==';') ||
+ (str[i]==',') ||
+ (str[i]=='(') ||
+ (str[i]==')') ||
+ (str[i]==':')
+ )
+ str[i] = '_';
+
+ return str;
+}
+
+
+/*
+* upstr()
+*
+* Converts string str to uppercase.
+* Return values:
+* Pointer to the converted string.
+*/
+
+char * upstr(char *str)
+{
+ register char *s = str;
+
+ while( (*s = toupper(*s)) )
+ s++;
+
+ return str;
+}
+
+/*
+* lowstr()
+*
+* Converts string str to lower case.
+* Return values:
+* Pointer to the converted string.
+*/
+
+char * lowstr(char *str)
+{
+ register char *s = str;
+
+ while( (*s = tolower(*s)) )
+ s++;
+
+ return str;
+}
+
+void getstr(char *instr,char *outstr)
+{
+ fprintf(stdout,"%s: ",instr);
+ gets(outstr);
+}
+
+double getreal(char *instr,double minx,double maxx,double def)
+{
+ int status;
+ float ret;
+ char line[MAXLINE];
+
+ while(TRUE) {
+ fprintf(stdout,"%s (%.1f-%.1f) [%.1f]: ",instr,minx,maxx,def);
+ gets(line);
+ status=sscanf(line,"%f",&ret);
+ if(status == EOF) return def;
+ if(ret>maxx) {
+ fprintf(stdout,"ERROR: Max. value=%.1f\n\n",maxx);
+ continue;
+ }
+ if(ret<minx) {
+ fprintf(stdout,"ERROR: Min. value=%.1f\n\n",minx);
+ continue;
+ }
+ break;
+ }
+ return (double)ret;
+}
+
+
+int getint(char *instr,int minx,int maxx, int def)
+{
+ int ret,status;
+ char line[MAXLINE];
+
+ while(TRUE) {
+ fprintf(stdout,"%s (%d..%d) [%d]: ",
+ instr,(pint)minx,(pint)maxx,(pint)def);
+ gets(line);
+ status=sscanf(line,"%d",&ret);
+ if(status == EOF) return def;
+ if(ret>maxx) {
+ fprintf(stdout,"ERROR: Max. value=%d\n\n",(pint)maxx);
+ continue;
+ }
+ if(ret<minx) {
+ fprintf(stdout,"ERROR: Min. value=%d\n\n",(pint)minx);
+ continue;
+ }
+ break;
+ }
+ return ret;
+}
+
+void do_system(void)
+{
+ char line[MAXLINE];
+
+ getstr("\n\nEnter system command",line);
+ if(*line != EOS)
+ system(line);
+ fprintf(stdout,"\n\n");
+}
+
+
+Boolean linetype(char *line,char *code)
+{
+ return( strncmp(line,code,strlen(code)) == 0 );
+}
+
+Boolean keyword(char *line,char *code)
+{
+ int i;
+ char key[MAXLINE];
+
+ for(i=0;!isspace(line[i]) && line[i]!=EOS;i++)
+ key[i]=line[i];
+ key[i]=EOS;
+ return( strcmp(key,code) == 0 );
+}
+
+Boolean blankline(char *line)
+{
+ int i;
+
+ for(i=0;line[i]!='\n' && line[i]!=EOS;i++) {
+ if( isdigit(line[i]) ||
+ isspace(line[i]) ||
+ (line[i] == '*') ||
+ (line[i] == ':') ||
+ (line[i] == '.'))
+ ;
+ else
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
+void get_path(char *str,char *path)
+{
+ register int i;
+
+ strcpy(path,str);
+ for(i=strlen(path)-1;i>-1;--i) {
+ if(str[i]==DIRDELIM) {
+ i = -1;
+ break;
+ }
+ if(str[i]=='.') break;
+ }
+ if(i<0)
+ strcat(path,".");
+ else
+ path[i+1]=EOS;
+}
+
+void alloc_aln(sint nseqs)
+{
+ sint i,j;
+
+ seqlen_array = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+
+ seq_array = (char **)ckalloc( (nseqs + 1) * sizeof (char *) );
+ for(i=0;i<nseqs+1;i++)
+ seq_array[i]=NULL;
+
+ names = (char **)ckalloc( (nseqs+1) * sizeof (char *) );
+ for(i=1;i<=nseqs;i++)
+ names[i] = (char *)ckalloc((MAXNAMES+1) * sizeof (char));
+
+ titles = (char **)ckalloc( (nseqs+1) * sizeof (char *) );
+ for(i=1;i<=nseqs;i++)
+ titles[i] = (char *)ckalloc((MAXTITLES+1) * sizeof (char));
+
+ output_index = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+
+ tmat = (double **) ckalloc( (nseqs+1) * sizeof (double *) );
+ for(i=1;i<=nseqs;i++)
+ tmat[i] = (double *)ckalloc( (nseqs+1) * sizeof (double) );
+ for(i=1;i<=nseqs;i++)
+ for(j=1;j<=nseqs;j++)
+ tmat[i][j]=0.0;
+
+ seq_weight = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+ for(i=1;i<=nseqs;i++)
+ seq_weight[i]=100;
+}
+
+void realloc_aln(sint first_seq,sint nseqs)
+{
+ sint i,j;
+
+ seqlen_array = (sint *)ckrealloc(seqlen_array, (first_seq+nseqs+1) * sizeof (sint));
+
+ seq_array = (char **)ckrealloc(seq_array, (first_seq+nseqs+1) * sizeof (char *) );
+ for(i=first_seq;i<first_seq+nseqs+1;i++)
+ seq_array[i]=NULL;
+
+ names = (char **)ckrealloc(names, (first_seq+nseqs+1) * sizeof (char *) );
+ for(i=first_seq;i<first_seq+nseqs;i++)
+ names[i] = (char *)ckalloc((MAXNAMES+1) * sizeof (char));
+
+ titles = (char **)ckrealloc(titles, (first_seq+nseqs+1) * sizeof (char *) );
+ for(i=first_seq;i<first_seq+nseqs;i++)
+ titles[i] = (char *)ckalloc((MAXTITLES+1) * sizeof (char));
+
+ output_index = (sint *)ckrealloc(output_index, (first_seq+nseqs+1) * sizeof (sint));
+
+ seq_weight = (sint *)ckrealloc(seq_weight, (first_seq+nseqs+1) * sizeof (sint));
+ for(i=first_seq;i<first_seq+nseqs;i++)
+ seq_weight[i]=100;
+
+ tmat = (double **) ckrealloc(tmat, (first_seq+nseqs+1) * sizeof (double *) );
+ for(i=1;i<first_seq;i++)
+ tmat[i] = (double *)ckrealloc(tmat[i], (first_seq+nseqs+1) * sizeof (double) );
+ for(i=first_seq;i<first_seq+nseqs;i++)
+ tmat[i] = (double *)ckalloc( (first_seq+nseqs+1) * sizeof (double) );
+ for(i=1;i<first_seq;i++)
+ for(j=first_seq;j<first_seq+nseqs;j++)
+ {
+ tmat[i][j]=0.0;
+ tmat[j][i]=0.0;
+ }
+}
+
+void free_aln(sint nseqs)
+{
+ sint i;
+
+ if(nseqs<=0) return;
+
+ seqlen_array = ckfree(seqlen_array);
+
+ for(i=1;i<=nseqs;i++)
+ seq_array[i] = ckfree(seq_array[i]);
+ seq_array = ckfree(seq_array);
+
+ for(i=1;i<=nseqs;i++)
+ names[i] = ckfree(names[i]);
+ names = ckfree(names);
+
+ for(i=1;i<=nseqs;i++)
+ titles[i] = ckfree(titles[i]);
+ titles = ckfree(titles);
+
+ output_index = ckfree(output_index);
+
+ seq_weight = ckfree(seq_weight);
+
+ for(i=1;i<=nseqs;i++)
+ tmat[i] = ckfree(tmat[i]);
+ tmat = ckfree(tmat);
+}
+
+void alloc_seq(sint seq_no,sint length)
+{
+ seq_array[seq_no] = (char *)ckalloc((length+2) * sizeof (char));
+}
+
+void realloc_seq(sint seq_no,sint length)
+{
+ seq_array[seq_no] = (char *)realloc(seq_array[seq_no], (length+2) * sizeof (char));
+
+}
+
+void free_seq(sint seq_no)
+{
+ seq_array[seq_no]=ckfree(seq_array[seq_no]);
+}
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/pcma.git
More information about the debian-med-commit
mailing list