[med-svn] r129 - in trunk/packages/poa/trunk: . debian

Charles Plessy charles-guest at costa.debian.org
Tue Sep 26 04:38:18 UTC 2006


Author: charles-guest
Date: 2006-09-26 04:38:07 +0000 (Tue, 26 Sep 2006)
New Revision: 129

Added:
   trunk/packages/poa/trunk/align_score.h
   trunk/packages/poa/trunk/blosum80_trunc.mat
   trunk/packages/poa/trunk/make_pscores.pl
   trunk/packages/poa/trunk/msa_format.c
   trunk/packages/poa/trunk/msa_format.h
   trunk/packages/poa/trunk/multidom.pscore
Modified:
   trunk/packages/poa/trunk/Makefile
   trunk/packages/poa/trunk/README
   trunk/packages/poa/trunk/align_lpo.c
   trunk/packages/poa/trunk/align_lpo2.c
   trunk/packages/poa/trunk/align_lpo_po.c
   trunk/packages/poa/trunk/align_lpo_po2.c
   trunk/packages/poa/trunk/align_score.c
   trunk/packages/poa/trunk/buildup_lpo.c
   trunk/packages/poa/trunk/debian/changelog
   trunk/packages/poa/trunk/debian/rules
   trunk/packages/poa/trunk/lpo.c
   trunk/packages/poa/trunk/lpo.h
   trunk/packages/poa/trunk/lpo_format.c
   trunk/packages/poa/trunk/main.c
   trunk/packages/poa/trunk/seq_util.c
   trunk/packages/poa/trunk/seq_util.h
Log:
New upstream release ; adding a test rule

Modified: trunk/packages/poa/trunk/Makefile
===================================================================
--- trunk/packages/poa/trunk/Makefile	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/Makefile	2006-09-26 04:38:07 UTC (rev 129)
@@ -13,6 +13,7 @@
 	black_flag.o \
 	seq_util.o \
 	fasta_format.o \
+	msa_format.o \
 	align_lpo2.o \
 	align_lpo_po2.o \
 	buildup_lpo.o \
@@ -27,7 +28,7 @@
 
 CC = gcc
 #CFLAGS= -g -ansi-strict -W -Wall -DUSE_WEIGHTED_LINKS -DUSE_PROJECT_HEADER -I.
-CFLAGS= -g -DUSE_WEIGHTED_LINKS -DUSE_PROJECT_HEADER -I. -DTRUNCATE_GAP_LENGTH=8
+CFLAGS= -g -DUSE_WEIGHTED_LINKS -DUSE_PROJECT_HEADER -I.
 # -I$(HOME)/lib/include
 # -DREPORT_MAX_ALLOC
 
@@ -48,5 +49,3 @@
 what:
 	@echo poa: partial-order based sequence alignment program
 	@echo liblpo.a: partial-order alignment and utilities function library
-
-

Modified: trunk/packages/poa/trunk/README
===================================================================
--- trunk/packages/poa/trunk/README	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/README	2006-09-26 04:38:07 UTC (rev 129)
@@ -1,19 +1,25 @@
-POA installation notes Sept. 2001
+
+-- POA INSTALLATION NOTES --
+September 2001, updated March 2004.
+
 Chris Lee
 Dept. of Chemistry & Biochemistry
 UCLA
 
+
 I. COMPILATION
+
 To compile this program, simply type 'make poa'.
+This produces an executable for sequence alignment (poa) and also a
+linkable library liblpo.a.  The software has been compiled and tested
+on LINUX and Mac OS X.
 
-This produces an executable for sequence alignment (poa) and also a linkable 
-library liblpo.a.  The software has been compiled and tested on LINUX.
 
 II. RUNNING POA
 
-Poa has a variety of command line options.  Running poa without any arguments 
-will print a list of the possible command line arguments.  POA may be used to
-construct a PO-MSA, or to analyze a PO-MSA.   
+POA has a variety of command line options.  Running POA without any
+arguments will print a list of the possible command line arguments.
+POA may be used to construct a PO-MSA, or to analyze a PO-MSA.
 
 A.  Constructing a PO-MSA
 -------------------------
@@ -21,62 +27,116 @@
 1.  Required Input:
 
 i. An Alignment Score Matrix File:
-A score matrix file is required, because poa uses it to get the residue
-alphabet and indexing.  Even if poa is not being used to perform 
-multiple sequence alignment, this file must be provided. Any basic alignment 
-matrix which may be used with BLAST may be used here.  This file must be the 
-first command line argument without a flag in order to be interpreted by poa 
-as the score matrix file.  An example score matrix file, blosum80.mat, is 
-provided in this directory. 
 
-Note:  Poa is Case Sensitive.
-Poa can align amino acid sequences to nucleotide sequences.  In order to
-distinguish amino acid residues from nucleic acid residues, poa is case 
-sensitive.  Residues that are upper case are interpreted as amino acids
-while residues that are lower case are interpreted as nucleotides.  Poa
-can handle mixed score matrices containing scores for aligning amino acid 
-residues to nucleotide residues as long as the column and row labels of the 
-matrix are case sensitive.  See blosum80.mat file for an example of just
-such a mixed score matrix.
+A score matrix file is required, because POA uses it to get the
+residue alphabet and indexing.  Even if POA is not being used to
+perform multiple sequence alignment, this file must be provided.  Any
+basic alignment matrix which may be used with BLAST may be used here.
+This file must be the first command line argument without a flag in
+order to be interpreted by POA as the score matrix file.  Two example
+score matrix files, blosum80.mat and blosum80_trunc.mat, are provided
+in this directory.  They includes scores for matching nucleotides, as
+well as amino acids.  Header lines may be used to specify gap
+parameters, as in the examples:
 
-ii. A FASTA File:
-A FASTA file is required only if poa is being used to construct a 
+GAP-PENALTIES=A B C
+GAP-TRUNCATION-LENGTH=T
+GAP-DECAY-LENGTH=D
+
+means that the gap opening penalty is A; the gap extension penalty is
+B until the gap length reaches T; the gap extension penalty decreases
+linearly from B to C for gap lengths between T and T+D; and the gap
+extension penalty is C for all longer gaps.  An additional line,
+"GAP-PENALTIES-X=Ax Bx Cx", can be inserted after the GAP-PENALTIES
+line to specify the opening and extension penalties for gaps in the
+first sequence relative to the second sequence in an alignment, i.e.,
+for asymmetric gap scoring.  Use the -v flag to see what gap penalties
+POA is using for a given run.
+
+NOTE: POA is case-sensitive.
+In order to distinguish amino acid residues from nucleic acid
+residues, POA is case sensitive.  Residues that are uppercase are
+interpreted as amino acids, while residues that are lowercase are
+interpreted as nucleotides.  POA can handle mixed score matrices
+containing both amino acid and nucleotide scores, as long as the
+column and row labels of the matrix are case-sensitive.  The
+blosum80.mat file is an example.
+
+ii.  A FASTA file, or MSA Files in PO, CLUSTAL or PIR Format 
+A FASTA file is required only if POA is being used to construct a 
 new PO-MSA from a list of sequences, or to align a list of sequences to
 an already existing PO-MSA (see Analyzing a PO-MSA below).  This FASTA file 
-should contain sequences to be aligned by poa.  The command line argument
-to get poa to accept a FASTA file as input is '-read_fasta FILENAME'.  Poa 
-will interpret FILENAME as the FASTA sequence file.  An example file, 
-multidom.seq, is provided in this directory.  
+should contain sequences to be aligned by POA.  The command line argument
+to get POA to accept a FASTA file as input is '-read_fasta FILENAME'.  POA
+will interpret FILENAME as the FASTA sequence file.  An example file,
+multidom.seq, is provided in this directory.
 
-Poa is case sensitive (see note above).  All residues in protein sequences 
-in the FASTA file must be upper case to be interpreted as amino acids by poa, 
-while all residues in nucleotide sequences in the FASTA file must be lower 
-case to be interpreted as nucleotides by poa.  To switch the case of all of 
-the letters in the FASTA file to upper case, use the '-toupper' command 
-line argument.  To switch the case of all the letters in the FASTA file to 
-lower case, use the '-tolower' command line argument.
+POA is case-sensitive (see NOTE above).  All residues in the FASTA
+file must be uppercase to be interpreted as amino acids by POA, or
+lowercase to be interpreted as nucleotides.  To switch the case of all
+of the letters in the FASTA file to uppercase, use the '-toupper'
+command line argument.  To switch the case of all the letters in the
+FASTA file to lowercase, use the '-tolower' command line argument.
 
+POA will also read in a set of MSA files to be aligned (see below).
+
+
 2.  MSA Construction Options:
 
-i.  Aggressive Fusion:
-During the building up of a PO-MSA, if a node i with label 'a' is aligned
-to an align ring which already contains a node j with label 'a', poa simply
-adds the node to the align ring.  It is possible to force poa to do 
-aggressive fusion, so that when a node i with label 'a' is aligned to
-an align ring which already contains a node j with label 'a', node i is
-fused to node j.  The command line argument for accomplishing this is 
-'-fuse_all'.
+i.  Global vs. Local Alignment
 
+POA will build alignments using local or global alignment.  The
+default is set to local aligment.  To call global alignment, use the
+'-do_global' flag.
+
+ii.  Iterative vs. Progressive Alignment
+
+POA will build the alignment iteratively, aligning sequences and MSAs
+in the order they are provided.  It will also align sequences and MSAs
+in the order dictated by a guide tree built from a matrix of pairwise
+similarity scores.  To call progressive alignment, use the
+'-do_progressive' flag.
+
+To provide POA with a set of pairwise similarity scores, use the
+'-read_pairscores' flag followed by the name of the text file
+containing the list of pairwise similarity scores.  This file should
+be a tab-delimited file, where each row contains two sequence names
+followed by the pairwise similarity score.  The included file
+"multidom.pscore" shows an example.
+
+Example row in pairscore file:
+ABL1_HUMAN	MATK_HUMAN	260.0
+
+To quickly compute a set of pairwise similarity scores, run BLAST, and
+set the similarity scores to the set of BLAST bitscores.  A simple
+BLAST driver/parser is provided as "make_pscores.pl"; you may need to
+modify this script for your particular configuration.  If the
+'-do_progressive' flag is specified without a corresponding pairscore
+file, POA will compute pairwise similarity scores itself, by
+performing all pairwise alignments.  This is very slow, and we do not
+recommend it.
+
+iii.  Aggressive Fusion:
+
+By default, during the building up of a PO-MSA, if a node i with label
+'A' is aligned to a node j with label 'B' that belongs to an align
+ring containing a third node k with label 'A', POA simply adds node i
+to the j-k align ring.  It is possible to force POA to do aggressive
+fusion, so that node i is instead fused to node k.  Use the '-fuse_all'
+flag to accomplish this.
+
+
 3.  MSA Output Formats:
-Poa can output a PO-MSA in several formats simultaneously including CLUSTAL, 
-PIR, and PO.  The PO format is the best format since it contains all of the 
-information in the PO-MSA.  The other formats accurately represent the MSA, 
-but since they are RC-MSA formats, they may lose some of the information in the 
-full PO-MSA.
 
+POA can output a PO-MSA in several formats simultaneously, including
+CLUSTAL, PIR, and PO.  The PO format is the best format since it
+contains all of the information in the PO-MSA.  The other formats
+accurately represent the MSA, but since they are RC-MSA formats, they
+may lose some of the information in the full PO-MSA.
+
 i.  CLUSTAL format:
-This format is the standard CLUSTAL format.  The command line argument to get
-the MSA output in this format is '-clustal FILENAME'.
+This format is the standard CLUSTAL format.  The command line argument
+to get the MSA output in this format is '-clustal FILENAME'.
 
 ii.  PIR format:
 This format is the standard PIR format, which is like FASTA with a '.'
@@ -84,87 +144,112 @@
 output in this format is '-pir FILENAME'.
 
 iii.  PO format:
-This format is the standard PO format.  It is described below in the section
-PO format.  The command line argument to get the MSA output in this format is 
-'-po FILENAME'.
+This format is the standard PO format.  It is described below in the
+section PO format.  The command line argument to get the MSA output in
+this format is '-po FILENAME'.
 
-Example:  Constructing a MSA of Four Protein Sequences
-Running poa with the following statement will take the fasta formatted 
-sequences in the multidom.seq file, construct a PO-MSA using the scoring 
-matrix in the file blosum80.mat, and then output the PO-MSA in CLUSTAL format
-in the file multidom.aln.
+EXAMPLE: Constructing an MSA of Four Protein Sequences
 
-poa -clustal multidom.aln blosum80.mat multidom.seq
+Running POA with the following statement will take the FASTA-formatted
+sequences in the multidom.seq file, construct a PO-MSA using the
+scoring matrix in the file blosum80.mat, and then output the PO-MSA in
+CLUSTAL format to the file multidom.aln.
 
-The output should be identical to the results of figs. 6 & 7 in the paper.
+poa -read_fasta multidom.seq -clustal multidom.aln blosum80.mat
 
+
 4.  Other Output:
 
-i. Score Matrix
-Poa will also print to stdout the score matrix stored in the '.mat' file.  
-The command line argument to get poa to do this is '-printmatrix LETTERSET',
-where LETTERSET is a string of letters to be printed with the score matrix.
-For example, if the score matrix is designed for protein alignment the 
-letter set might be 'ARNDCQEGHILKMFPSTWYV'.
+i.  Score Matrix
 
-ii. Verbose Mode
-Poa will run in verbose mode, printing additional information generated  
-during the run to stdout.  The command line argument to get poa to do run in 
-verbose mode is '-v'.
+POA will also print to stdout the score matrix stored in the '.mat'
+file.  The command line argument to get POA to do this is
+'-printmatrix LETTERSET', where LETTERSET is a string of letters to be
+printed with the score matrix.  For example, if the score matrix is
+designed for protein alignment the letter set might be
+'ARNDCQEGHILKMFPSTWYV'.
 
+ii.  Verbose Mode
+
+POA will run in verbose mode, printing additional information
+generated during the run (such as the set of gap scores used) to
+stdout.  The command line argument for verbose mode is '-v'.
+
+
 B.  Analyzing a PO-MSA
 -----------------------
-Poa can also take a PO format file as input and rebuild the PO-MSA data
-structure.  Once this data structure has been rebuilt, it may be analyzed
-for features.  In 'liblpo.a', the linkable poa library, we have included the
-functions necessary to do heaviest bundling and thereby find consensus
-sequences in the PO-MSA (the details of the heaviest bundling algorithm 
-are described elsewhere).  Poa has been written so that users may create 
-their own functions for analyzing a PO-MSA.  We have not included in the 
-'liblpo.a' library the functions that we wrote to analyze PO-MSAs 
-constructed with ESTs and genome sequence to find snps and alternative 
-splice sites.  However, it is possible to design modular library functions 
-that will look for highly specific  biological features in any PO-MSA data 
-structure.
 
+POA can also take as input an MSA in PO, CLUSTAL or PIR file format
+and rebuild the PO-MSA data structure.  Once this data structure has
+been rebuilt, it may be analyzed for features.  In "liblpo.a", the
+linkable POA library, we have included the functions necessary to do
+heaviest bundling and thereby find consensus sequences in the PO-MSA
+(the details of the heaviest bundling algorithm are described
+elsewhere).  POA has been written so that users may create their own
+functions for analyzing a PO-MSA.  We have not included in the
+"liblpo.a" library the functions that we wrote to analyze PO-MSAs
+constructed with ESTs and genome sequence to find snps and alternative
+splice sites.  However, it is possible to design modular library
+functions that will look for highly specific biological features in
+any PO-MSA data structure.
+
 1.  Required Input:
 
 Before the PO-MSA data structure can be analyzed it must be built.  It can
-be built either from a PO file or from a FASTA file, or from both a PO file 
-and a FASTA file.
+be built iteratively or using a guide tree, or converted from another file
+type.  POA can align a set of FASTA-formatted sequences to each other or
+to an existing PO-MSA.  It can align two PO-MSAs.  It can also align an
+arbitrary set of PO-MSAs.
 
-Note:  POA Requires Either A PO File or a FASTA File
-If neither files are read in by POA it will terminate early, since it 
+Note:  POA Requires Either An MSA File or a FASTA File
+If neither type of file is read in by POA it will terminate early, since it 
 has not received any sequence data.
 
-i. A PO file:
-Poa will read in a PO formatted file.  The command line argument to get
-poa to read in a PO formatted file and rebuild the PO-MSA data structure
-is '-read_po FILENAME'.  
+i.  An MSA file in PO, CLUSTAL or PIR format:
 
-It is possible to filter the PO-MSA data structure as it is being rebuilt.  In 
-order to filter the PO-MSA in the PO file to include only a subset of sequences 
-use the command line argument '-subset FILENAME', where the file named FILENAME 
-contains the list of sequence names to be included in the new PO-MSA. In order 
-to filter the PO-MSA in the PO file to exclude a subset of sequences use the 
-command line argument '-remove FILENAME', where the file named FILENAME contains 
-the list of sequences to be excluded from the new PO-MSA.  The names of 
-sequences to be included or excluded should be in the format 'SOURCENAME= *"
-as they are in the PO file.  Lists of sequence source names can be created by 
-using the unix grep utility on the PO file.     
+POA will read in an MSA file in PO, CLUSTAL or PIR format.  The
+command line argument to get poa to read in an MSA file and rebuild
+the PO-MSA data structure is '-read_msa FILENAME'.  POA automatically
+determines whether the MSA file is in PO, CLUSTAL, or PIR format.  POA
+will read in a second MSA file when the '-read_msa2 FILENAME' flag is
+used.  POA will read in a set of MSA files using the '-read_msa_list
+FILENAME' flag.  The file should contain a list of names of MSA files.
 
-ii. A FASTA File:
-The FASTA file should contain sequences to be aligned by poa.  The command line 
-argument to get poa to accept a FASTA file as input is '-read_fasta FILENAME'.  
-Poa will interpret FILENAME as the FASTA sequence file.  An example file, 
-multidom.seq, is provided in this directory.  (See note above on case sensitivity).
+It is possible to filter the PO-MSA data structure as it is being
+rebuilt.  In order to filter the PO-MSA in the MSA file to include
+only a subset of sequences use the command line argument '-subset
+FILENAME', where the file named FILENAME contains the list of sequence
+names to be included in the new PO-MSA. In order to filter the PO-MSA
+in the MSA file to exclude a subset of sequences, use the command line
+argument '-remove FILENAME', where the file named FILENAME contains
+the list of sequences to be excluded from the new PO-MSA.  The names
+of sequences to be included or excluded should be in the format
+"SOURCENAME=*", as they are in a PO file.  Lists of sequence source
+names can be created by using the unix grep utility on the PO file.
+Each line in the list of sequences to be filtered should read,
+"SOURCENAME=" followed by the name of the sequence,
+e.g. "SOURCENAME=ABL1_HUMAN".  To filter the second PO-MSA read in
+using the '-read_msa2 FILENAME', use the '-subset2' and '-remove2'
+flags.
 
-Note:  POA Can Take Both A PO File And A FASTA File As Input
-If both the '-read_po FILENAME' argument and the '-read_fasta FILENAME' 
-argument are given to poa on the command line, then poa will first rebuild the 
-PO-MSA in the PO file, and then it will align the sequences in the FASTA file 
-to this PO-MSA.   
+ii.  A FASTA File:
 
+The FASTA file should contain sequences to be aligned by POA.  The
+command line argument to get POA to accept a FASTA file as input is
+'-read_fasta FILENAME'.  POA will interpret FILENAME as the FASTA
+sequence file.  An example file, "multidom.seq", is provided in this
+directory.  (See note above on case-sensitivity).
+
+NOTE: POA Can Take Both An MSA File And A FASTA File As Input
+If both the '-read_msa FILENAME' argument and the '-read_fasta FILENAME' 
+argument are given to POA on the command line, then POA will first rebuild the 
+PO-MSA in the MSA file, and then it will align the sequences in the FASTA file 
+to this PO-MSA.  Similarly, if both the '-read_msa_list FILENAME' flag
+and the '-read_fasta FILENAME' flag are given to POA, then POA will 
+rebuild all of the PO-MSAs and will align them to each other and to the 
+sequences in the FASTA file.   
+
+
 2.  Additional PO Utilities:
 
 i.  Consensus Generation Via Heaviest Bundling Algorithm:
@@ -173,7 +258,7 @@
 function adds the new consensus sequences to the PO-MSA by storing new
 consensus sequence indices on the in the PO-MSA nodes corresponding to 
 the consensus sequence paths.  The sequence source names for consensus 
-sequences generated by heaviest bundling is CONSENS'i' where 'i' is the
+sequences generated by heaviest bundling are CONSENS'i' where 'i' is the
 index of the bundle corresponding to the consensus sequence.
 
 The heaviest bundling algorithm can also take as input a bundling 
@@ -189,12 +274,13 @@
 in the SOURCEINFO line for each sequence the index of the bundle to which
 that sequence belongs is give.  Finally, using the command line argument 
 '-best' restricts the MSA output to the consensus sequences generated 
-by heaviest bundling.   
+by heaviest bundling (NB: this applies to PIR output only).
 
+
 III.  PO FILE FORMAT
 
 ****************************HEADER****************************************
-VERSION= ~Current version of poa~
+VERSION= ~Current version of POA,e.g. LPO.1.0~
 NAME=  ~Name of PO-MSA.  Defaults to name of 1st sequence in PO-MSA~
 TITLE=  ~Title of PO-MSA.  Defaults to title of 1st sequence in PO-MSA~
 LENGTH=  ~Number of nodes in PO-MSA~
@@ -232,4 +318,3 @@
 
 
 For more information, see http://www.bioinformatics.ucla.edu/poa.
-

Modified: trunk/packages/poa/trunk/align_lpo.c
===================================================================
--- trunk/packages/poa/trunk/align_lpo.c	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/align_lpo.c	2006-09-26 04:38:07 UTC (rev 129)
@@ -167,30 +167,14 @@
     insert_x_try,insert_x_score,insert_y_score,best_score= -999999;
   LPORowFreeList_T *free_list=NULL;
   LastRightList_T *last_right=NULL;
+
+  int max_gap_length;
+  LPOScore_T *gap_penalty_x, *gap_penalty_y;  
   
-  LPOScore_T *gap_penalty_x, *gap_penalty_y;
-  double gap_decrement;
-  int L_trunc = m->trunc_gap_length, L_decay = m->decay_gap_length;
+  max_gap_length = m->max_gap_length;
+  gap_penalty_x = m->gap_penalty_x;
+  gap_penalty_y = m->gap_penalty_y;
   
-  CALLOC (gap_penalty_x, L_trunc + L_decay + 1, LPOScore_T);
-  CALLOC (gap_penalty_y, L_trunc + L_decay + 1, LPOScore_T);
-
-  gap_penalty_x[0] = m->gap_penalty_set[0][0];  /* INITIALIZE GAP PENALTY LOOKUP */
-  for (i=1;i<L_trunc;i++)
-    gap_penalty_x[i] = m->gap_penalty_set[0][1];
-  gap_decrement = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(L_decay + 1));
-  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
-    gap_penalty_x[i] = m->gap_penalty_set[0][1] - (i-L_trunc+1) * gap_decrement;
-  gap_penalty_x[L_trunc + L_decay] = m->gap_penalty_set[0][2];  /* (REDUNDANT) */
-
-  gap_penalty_y[0] = m->gap_penalty_set[1][0];  /* DIFFERENT PENALTY FOR Y-GAP */
-  for (i=1;i<L_trunc;i++)
-    gap_penalty_y[i] = m->gap_penalty_set[1][1];
-  gap_decrement = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(L_decay + 1));
-  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
-    gap_penalty_y[i] = m->gap_penalty_set[1][1] - (i-L_trunc+1) * gap_decrement;
-  gap_penalty_y[L_trunc + L_decay] = m->gap_penalty_set[1][2];  /* (REDUNDANT) */
-
   CALLOC(score,len_x,LPOScore_T *); /* ALLOCATE MATRIX STORAGE: ROW POINTERS */
   CALLOC(move,len_x,LPOMove_T *);
   CALLOC(gap_length,len_x,LPOGapLength_T *);
@@ -237,8 +221,8 @@
 	  insert_x=i_x;
 	  insert_x_score=insert_x_try;
 	  new_gap_len=current_gap_length+1;
-	  if (new_gap_len>L_trunc+L_decay)  /* PREVENT OVERFLOW */
-	    new_gap_len=L_trunc+L_decay;
+	  if (new_gap_len>max_gap_length)  /* PREVENT OVERFLOW */
+	    new_gap_len=max_gap_length;
 	}
 
 	 /* FIND BEST [X,1] MOVE */
@@ -285,8 +269,8 @@
 	my_move[j].x=0;
 	my_move[j].y=1;
 	my_gap_length[j]=current_gap_length+1;
-	if (my_gap_length[j]>L_trunc+L_decay) /* PREVENT OVERFLOW */
-	  my_gap_length[j]=L_trunc+L_decay;
+	if (my_gap_length[j]>max_gap_length) /* PREVENT OVERFLOW */
+	  my_gap_length[j]=max_gap_length;
       }
       if (my_score[j]>best_score) { /* RECORD BEST MOVE */
 	best_score=my_score[j];
@@ -310,9 +294,6 @@
 			   x_to_y,y_to_x);
 
 
-  FREE (gap_penalty_x);
-  FREE (gap_penalty_y);
-
   free_row_list(nfree_list,free_list);
   FREE(move_base); /* DUMP ALLOCATED MATRIX */
   FREE(score);

Modified: trunk/packages/poa/trunk/align_lpo2.c
===================================================================
--- trunk/packages/poa/trunk/align_lpo2.c	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/align_lpo2.c	2006-09-26 04:38:07 UTC (rev 129)
@@ -168,11 +168,10 @@
   
   DPScore_T *curr_score = NULL, *prev_score = NULL, *init_col_score = NULL, *my_score, *swap;
   
+  int max_gap_length;
   LPOScore_T *gap_penalty_x, *gap_penalty_y;
-  double gap_decrement;
   int *next_gap_array, *next_perp_gap_array;
-  int L_trunc = m->trunc_gap_length, L_decay = m->decay_gap_length;
-    
+  
   LPOScore_T try_score, insert_x_score, insert_y_score, match_score;
   int insert_x_x, insert_x_gap;
   int insert_y_y, insert_y_gap;
@@ -180,35 +179,20 @@
   
   long n_edges = 0;
   
-  CALLOC (gap_penalty_x, L_trunc + L_decay + 2, LPOScore_T);
-  CALLOC (gap_penalty_y, L_trunc + L_decay + 2, LPOScore_T);
-  CALLOC (next_gap_array, L_trunc + L_decay + 2, int);
-  CALLOC (next_perp_gap_array, L_trunc + L_decay + 2, int);
-  
-  gap_penalty_x[0] = m->gap_penalty_set[0][0];  /* INITIALIZE GAP PENALTY LOOKUP */
-  for (i=1;i<L_trunc;i++)
-    gap_penalty_x[i] = m->gap_penalty_set[0][1];
-  gap_decrement = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(L_decay + 1));
-  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
-    gap_penalty_x[i] = m->gap_penalty_set[0][1] - (i-L_trunc+1) * gap_decrement;
-  gap_penalty_x[L_trunc + L_decay] = m->gap_penalty_set[0][2];  /* (REDUNDANT) */
-  gap_penalty_x[L_trunc + L_decay + 1] = 0;  /* INITIAL STATE FOR LOCAL ALIGNMENT */
+  max_gap_length = m->max_gap_length;
+  gap_penalty_x = m->gap_penalty_x;
+  gap_penalty_y = m->gap_penalty_y;
+  CALLOC (next_gap_array, max_gap_length + 2, int);
+  CALLOC (next_perp_gap_array, max_gap_length + 2, int);
 
-  gap_penalty_y[0] = m->gap_penalty_set[1][0];  /* DIFFERENT PENALTY FOR Y-GAP */
-  for (i=1;i<L_trunc;i++)
-    gap_penalty_y[i] = m->gap_penalty_set[1][1];
-  gap_decrement = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(L_decay + 1));
-  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
-    gap_penalty_y[i] = m->gap_penalty_set[1][1] - (i-L_trunc+1) * gap_decrement;
-  gap_penalty_y[L_trunc + L_decay] = m->gap_penalty_set[1][2];  /* (REDUNDANT) */
-  gap_penalty_y[L_trunc + L_decay + 1] = 0;  /* INITIAL STATE FOR LOCAL ALIGNMENT */
-  
-  for (i=0; i<L_trunc+L_decay+2; i++) {
-    /* GAP LENGTH EXTENSION RULE: */
-    /* 0->1, 1->2, 2->3, ..., M-1->M, M->M, M+1->M+1. */
-    next_gap_array[i] = (i < L_trunc+L_decay) ? i+1 : i;
-    next_perp_gap_array[i] = (DOUBLE_GAP_SCORING && i < L_trunc+L_decay+1) ? 0 : next_gap_array[i];
+  /* GAP LENGTH EXTENSION RULE: */
+  /* 0->1, 1->2, 2->3, ..., M-1->M, M->M, M+1->M+1. */
+  for (i=0; i<max_gap_length+1; i++) {
+    next_gap_array[i] = (i<max_gap_length) ? i+1 : i;
+    next_perp_gap_array[i] = (DOUBLE_GAP_SCORING ? 0 : next_gap_array[i]);
   }
+  next_gap_array[max_gap_length+1] = max_gap_length+1;
+  next_perp_gap_array[max_gap_length+1] = max_gap_length+1;
   
   get_seq_left_and_final (lposeq_x, &x_left, &is_final_node_x);
   
@@ -358,8 +342,6 @@
 			    x_to_y, y_to_x);
   
   
-  FREE (gap_penalty_x);
-  FREE (gap_penalty_y);
   FREE (next_gap_array);
   FREE (next_perp_gap_array);
   

Modified: trunk/packages/poa/trunk/align_lpo_po.c
===================================================================
--- trunk/packages/poa/trunk/align_lpo_po.c	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/align_lpo_po.c	2006-09-26 04:38:07 UTC (rev 129)
@@ -106,29 +106,13 @@
   LPOScore_T match_score,previous_score,
     insert_x_try,insert_x_score,insert_y_score,best_score= -999999;
 
-  LPOScore_T *gap_penalty_x, *gap_penalty_y;
-  double gap_decrement;
-  int L_trunc = m->trunc_gap_length, L_decay = m->decay_gap_length;
-    
-  CALLOC (gap_penalty_x, L_trunc + L_decay + 1, LPOScore_T);
-  CALLOC (gap_penalty_y, L_trunc + L_decay + 1, LPOScore_T);
-
-  gap_penalty_x[0] = m->gap_penalty_set[0][0];  /* INITIALIZE GAP PENALTY LOOKUP */
-  for (i=1;i<L_trunc;i++)
-    gap_penalty_x[i] = m->gap_penalty_set[0][1];
-  gap_decrement = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(L_decay + 1));
-  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
-    gap_penalty_x[i] = m->gap_penalty_set[0][1] - (i-L_trunc+1) * gap_decrement;
-  gap_penalty_x[L_trunc + L_decay] = m->gap_penalty_set[0][2];  /* (REDUNDANT) */
-
-  gap_penalty_y[0] = m->gap_penalty_set[1][0];  /* DIFFERENT PENALTY FOR Y-GAP */
-  for (i=1;i<L_trunc;i++)
-    gap_penalty_y[i] = m->gap_penalty_set[1][1];
-  gap_decrement = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(L_decay + 1));
-  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
-    gap_penalty_y[i] = m->gap_penalty_set[1][1] - (i-L_trunc+1) * gap_decrement;
-  gap_penalty_y[L_trunc + L_decay] = m->gap_penalty_set[1][2];  /* (REDUNDANT) */
-
+  int max_gap_length;
+  LPOScore_T *gap_penalty_x, *gap_penalty_y;  
+  
+  max_gap_length = m->max_gap_length;
+  gap_penalty_x = m->gap_penalty_x;
+  gap_penalty_y = m->gap_penalty_y;
+  
   CALLOC(score,len_x,LPOScore_T *); /* ALLOCATE MATRIX STORAGE: ROW POINTERS */
   CALLOC(move,len_x,LPOMove_T *);
   CALLOC(gap_length,len_x,LPOGapLength_T *);
@@ -167,8 +151,8 @@
 	  insert_x=i_x;
 	  insert_x_score=insert_x_try;
 	  new_gap_len=current_gap_length+1;
-	  if (new_gap_len>TRUNCATE_GAP_LENGTH) /* PREVENT OVERFLOW */
-	    new_gap_len=TRUNCATE_GAP_LENGTH;
+	  if (new_gap_len>max_gap_length) /* PREVENT OVERFLOW */
+	    new_gap_len=max_gap_length;
 	}
 
 	 /* FIND BEST [X,Y] MOVE */
@@ -239,8 +223,8 @@
 	my_move[j].x=0;
 	my_move[j].y=insert_y;
 	my_gap_length[j]=current_gap_length+1;
-	if (my_gap_length[j]>TRUNCATE_GAP_LENGTH) /* PREVENT OVERFLOW */
-	  my_gap_length[j]=TRUNCATE_GAP_LENGTH;
+	if (my_gap_length[j]>max_gap_length) /* PREVENT OVERFLOW */
+	  my_gap_length[j]=max_gap_length;
       }
       if (my_score[j]>best_score) { /* RECORD BEST MOVE */
 	best_score=my_score[j];
@@ -254,8 +238,6 @@
   trace_back_lpo_po_alignment(len_x,seq_x,len_y,seq_y,move,best_x,best_y,
 			      x_to_y,y_to_x);
   
-  FREE (gap_penalty_x);
-  FREE (gap_penalty_y);
 
   FREE(score_base); /* FREE MEMORY */
   FREE(move_base);

Modified: trunk/packages/poa/trunk/align_lpo_po2.c
===================================================================
--- trunk/packages/poa/trunk/align_lpo_po2.c	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/align_lpo_po2.c	2006-09-26 04:38:07 UTC (rev 129)
@@ -21,64 +21,87 @@
 }
 DPScore_T;
 
-/** is node 'i' the first node in any sequence in lposeq? */
-static int is_initial_node (int i, LPOSequence_T *lposeq)
+
+
+#define LPO_INITIAL_NODE 1
+#define LPO_FINAL_NODE 2
+
+static void get_lpo_stats (LPOSequence_T *lposeq,
+			   int *n_nodes_ptr, int *n_edges_ptr, int **node_type_ptr,
+			   int **refs_from_right_ptr, int *max_rows_alloced_ptr,
+			   LPOLetterLink_T ***left_links_ptr)
 {
-  LPOLetterSource_T *src = &((lposeq->letter[i]).source);
-  while (src != NULL && src->iseq >= 0) {
-    if (src->ipos == 0) {
-      return 1;
+  int i, j, rows_alloced = 0, max_rows_alloced = 0, n_edges = 0, len = lposeq->length;
+  LPOLetter_T *seq = lposeq->letter;
+  int *node_type;
+  int *refs_from_right, *tmp;
+  LPOLetterSource_T *src;
+  LPOLetterLink_T *lnk, **left_links;
+  
+  CALLOC (node_type, len, int);
+  CALLOC (refs_from_right, len, int);
+  CALLOC (tmp, len, int);
+  CALLOC (left_links, len, LPOLetterLink_T *);
+    
+  for (i=0; i<len; i++) {
+
+    /* NODES CONTAINING THE FIRST RESIDUE IN ANY SEQ ARE 'INITIAL'; */
+    /* DITTO, LAST RESIDUE IN ANY SEQ, 'FINAL'. */
+    for (src = &(seq[i].source); src != NULL && src->iseq >= 0; src = src->more) {
+      if (src->ipos == 0) {
+	node_type[i] = (node_type[i] | LPO_INITIAL_NODE);
+      }
+      if (src->ipos == (lposeq->source_seq[src->iseq]).length - 1) {
+	node_type[i] = (node_type[i] | LPO_FINAL_NODE);
+      }
     }
-    src = src->more;
+
+    /* COUNTING THE LEFT-LINKS BACK TO EACH NODE ALLOWS FOR EFFICIENT */
+    /* MEMORY MANAGEMENT OF 'SCORE' ROWS (in align_lpo_po). */
+    for (lnk = &(seq[i].left); lnk != NULL && lnk->ipos >= 0; lnk = lnk->more) {
+      refs_from_right[lnk->ipos]++;
+      n_edges++;
+    }
   }
-  return 0;
-}
 
-/** is node 'i' the last node in any sequence in lposeq? */
-static int is_final_node (int i, LPOSequence_T *lposeq)
-{
-  LPOLetterSource_T *src = &((lposeq->letter[i]).source);
-  while (src != NULL && src->iseq >= 0) {
-    if (src->ipos == (lposeq->source_seq[src->iseq]).length - 1) {
-      return 1;
+  /* ALL 'INITIAL' NODES (1st in some seq) MUST BE LEFT-LINKED TO -1. */
+  /* THIS ALLOWS FREE ALIGNMENT TO ANY 'BRANCH' IN GLOBAL ALIGNMENT. */
+  for (i=0; i<len; i++) {
+    if ((node_type[i] & LPO_INITIAL_NODE) && seq[i].left.ipos != -1) {
+      CALLOC (left_links[i], 1, LPOLetterLink_T);
+      left_links[i]->ipos = -1;
+      left_links[i]->score = 0;
+      left_links[i]->more = &seq[i].left;
     }
-    src = src->more;
+    else {
+      left_links[i] = &seq[i].left;
+    }
   }
-  return 0;
-}
-
-
-static void get_seq_left_and_final (LPOSequence_T *lposeq_x,
-				    LPOLetterLink_T ***x_left_ptr,
-				    int **is_final_node_ptr)
-{
-  int i, len_x = lposeq_x->length;
-  LPOLetter_T *seq_x = lposeq_x->letter;
-  int *is_final_node_x = NULL;
-  LPOLetterLink_T **x_left = NULL;
   
-  CALLOC (is_final_node_x, len_x, int);
-  
-  for (i=0; i<len_x; i++) {
-    is_final_node_x[i] = is_final_node (i, lposeq_x);
+  for (i=0; i<len; i++) {
+    tmp[i] = refs_from_right[i];
   }
   
-  CALLOC (x_left, len_x, LPOLetterLink_T *);
-  
-  for (i=0; i<len_x; i++) {
-    if (is_initial_node (i, lposeq_x) && seq_x[i].left.ipos != -1) {
-      CALLOC (x_left[i], 1, LPOLetterLink_T);
-      x_left[i]->ipos = -1;
-      x_left[i]->score = 0;
-      x_left[i]->more = &seq_x[i].left;
+  for (i=0; i<len; i++) {
+    rows_alloced++;
+    if (rows_alloced > max_rows_alloced) {
+      max_rows_alloced = rows_alloced;
     }
-    else {
-      x_left[i] = &seq_x[i].left;
+    for (lnk = &(seq[i].left); lnk != NULL && lnk->ipos >= 0; lnk = lnk->more) {
+      if ((--tmp[lnk->ipos]) == 0) {
+	rows_alloced--;
+      }
     }
   }
 
-  (*is_final_node_ptr) = is_final_node_x;
-  (*x_left_ptr) = x_left;
+  FREE (tmp);
+
+  (*n_nodes_ptr) = len;
+  (*n_edges_ptr) = n_edges;
+  (*node_type_ptr) = node_type;
+  (*refs_from_right_ptr) = refs_from_right;
+  (*max_rows_alloced_ptr) = max_rows_alloced;
+  (*left_links_ptr) = left_links;
 }
 
 
@@ -145,7 +168,8 @@
 }
 
 
-/** performs partial order alignment:
+/** (align_lpo_po:)
+    performs partial order alignment:
     lposeq_x and lposeq_y are partial orders;
     returns the alignment in x_to_y[] and y_to_x[], and also 
     returns the alignment score as the return value.
@@ -160,66 +184,73 @@
 			 (int, int, LPOLetter_T *, LPOLetter_T *, ResidueScoreMatrix_T *),
 			 int use_global_alignment)
 {
-  int len_x = lposeq_x->length;
-  int len_y = lposeq_y->length;
   LPOLetter_T *seq_x = lposeq_x->letter;
   LPOLetter_T *seq_y = lposeq_y->letter;
   
-  int i, j, xcount, ycount, prev_gap, next_gap;
+  int len_x, len_y;
+  int n_edges_x, n_edges_y;
+  int *node_type_x, *node_type_y;
+  int *refs_from_right_x, *refs_from_right_y;
+  int max_rows_alloced_x, max_rows_alloced_y, n_score_rows_alloced = 0;
+  
+  int i, j, xcount, ycount, prev_gap;
   int best_x = -1, best_y = -1;
-  LPOScore_T best_score = -999999;
-  int *is_final_node_x, *is_final_node_y;
+  LPOScore_T min_score = -999999, best_score = -999999;
+  int possible_end_square;
   LPOLetterLink_T **x_left = NULL, **y_left = NULL, *xl, *yl;
   DPMove_T **move = NULL, *my_move;
   
   DPScore_T *curr_score = NULL, *prev_score = NULL, *init_col_score = NULL, *my_score;
   DPScore_T **score_rows = NULL;
-  
+
+  int max_gap_length;
   LPOScore_T *gap_penalty_x, *gap_penalty_y;
-  double gap_decrement;
   int *next_gap_array, *next_perp_gap_array;
-  int L_trunc = m->trunc_gap_length, L_decay = m->decay_gap_length;
   
   LPOScore_T try_score, insert_x_score, insert_y_score, match_score;
   int insert_x_x, insert_x_gap;
   int insert_y_y, insert_y_gap;
   int match_x, match_y;
   
-  long n_edges = 0;
+  get_lpo_stats (lposeq_x, &len_x, &n_edges_x, &node_type_x, &refs_from_right_x, &max_rows_alloced_x, &x_left);
+  get_lpo_stats (lposeq_y, &len_y, &n_edges_y, &node_type_y, &refs_from_right_y, &max_rows_alloced_y, &y_left);
 
-  CALLOC (gap_penalty_x, L_trunc + L_decay + 2, LPOScore_T);
-  CALLOC (gap_penalty_y, L_trunc + L_decay + 2, LPOScore_T);
-  CALLOC (next_gap_array, L_trunc + L_decay + 2, int);
-  CALLOC (next_perp_gap_array, L_trunc + L_decay + 2, int);  
+  /*
+    fprintf (stdout, "sequence x:  %ld nodes, %ld edges, %ld rows at most --> %ld mem\n", len_x, n_edges_x, max_rows_alloced_x, max_rows_alloced_x * len_y);
+    fprintf (stdout, "sequence y:  %ld nodes, %ld edges, %ld rows at most --> %ld mem\n", len_y, n_edges_y, max_rows_alloced_y, max_rows_alloced_y * len_x);
+  */
+  
+  /* INITIALIZE GAP PENALTIES: */
+  max_gap_length = m->max_gap_length;
+  gap_penalty_x = m->gap_penalty_x;
+  gap_penalty_y = m->gap_penalty_y;
+  CALLOC (next_gap_array, max_gap_length + 2, int);
+  CALLOC (next_perp_gap_array, max_gap_length + 2, int);
 
-  gap_penalty_x[0] = m->gap_penalty_set[0][0];  /* INITIALIZE GAP PENALTY LOOKUP */
-  for (i=1;i<L_trunc;i++)
-    gap_penalty_x[i] = m->gap_penalty_set[0][1];
-  gap_decrement = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(L_decay + 1));
-  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
-    gap_penalty_x[i] = m->gap_penalty_set[0][1] - (i-L_trunc+1) * gap_decrement;
-  gap_penalty_x[L_trunc + L_decay] = m->gap_penalty_set[0][2];  /* (REDUNDANT) */
-  gap_penalty_x[L_trunc + L_decay + 1] = 0;  /* INITIAL STATE FOR LOCAL ALIGNMENT */
-
-  gap_penalty_y[0] = m->gap_penalty_set[1][0];  /* DIFFERENT PENALTY FOR Y-GAP */
-  for (i=1;i<L_trunc;i++)
-    gap_penalty_y[i] = m->gap_penalty_set[1][1];
-  gap_decrement = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(L_decay + 1));
-  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
-    gap_penalty_y[i] = m->gap_penalty_set[1][1] - (i-L_trunc+1) * gap_decrement;
-  gap_penalty_y[L_trunc + L_decay] = m->gap_penalty_set[1][2];  /* (REDUNDANT) */
-  gap_penalty_y[L_trunc + L_decay + 1] = 0;  /* INITIAL STATE FOR LOCAL ALIGNMENT */
-  
-  for (i=0; i<L_trunc+L_decay+2; i++) {
+  for (i=0; i<max_gap_length+1; i++) {
     /* GAP LENGTH EXTENSION RULE: */
-    /* 0->1, 1->2, 2->3, ..., M-1->M, M->M, M+1->M+1. */
-    next_gap_array[i] = (i < L_trunc+L_decay) ? i+1 : i;
-    next_perp_gap_array[i] = (DOUBLE_GAP_SCORING && i < L_trunc+L_decay+1) ? 0 : next_gap_array[i];
+    /* 0->1, 1->2, 2->3, ..., M-1->M; but M->M. */
+    next_gap_array[i] = (i<max_gap_length) ? i+1 : i;
+    /* PERPENDICULAR GAP (i.e. X FOR A GROWING Y-GAP) IS KEPT AT 0 IF DOUBLE-GAP-SCORING (old scoring) IS USED. */
+    next_perp_gap_array[i] = (DOUBLE_GAP_SCORING ? 0 : next_gap_array[i]);
   }
   
-  get_seq_left_and_final (lposeq_x, &x_left, &is_final_node_x);
-  get_seq_left_and_final (lposeq_y, &y_left, &is_final_node_y);
+  /* GAP LENGTH = M+1 IS USED FOR INITIAL STATE. */
+  /* THIS MUST BE TREATED DIFFERENTLY FOR GLOBAL v. LOCAL ALIGNMENT: */
+  if (0 == use_global_alignment) {   /* FREE EXTENSION OF INITIAL GAP (FOR LOCAL ALIGNMENT) */
+    gap_penalty_x[max_gap_length+1] = gap_penalty_y[max_gap_length+1] = 0;
+    next_gap_array[max_gap_length+1] = next_perp_gap_array[max_gap_length+1] = max_gap_length+1;
+  }
+  else {   /* TREAT INITIAL GAP LIKE ANY OTHER (FOR GLOBAL ALIGNMENT) */
+    gap_penalty_x[max_gap_length+1] = gap_penalty_x[0];
+    gap_penalty_y[max_gap_length+1] = gap_penalty_y[0];
+    next_gap_array[max_gap_length+1] = next_gap_array[0];
+    next_perp_gap_array[max_gap_length+1] = next_perp_gap_array[0];
+  }
   
+  
+  /* ALLOCATE MEMORY FOR 'MOVE' AND 'SCORE' MATRICES: */
+  
   CALLOC (move, len_y, DPMove_T *);
   for (i=0; i<len_y; i++) {
     CALLOC (move[i], len_x, DPMove_T);
@@ -230,25 +261,23 @@
   
   CALLOC (score_rows, len_y+1, DPScore_T *);
   score_rows = &(score_rows[1]);
-  for (i=-1; i<len_y; i++) {
-    CALLOC (score_rows[i], len_x+1, DPScore_T);
-    score_rows[i] = &(score_rows[i][1]);
-  }
+  CALLOC (score_rows[-1], len_x+1, DPScore_T);
+  score_rows[-1] = &(score_rows[-1][1]);
   curr_score = score_rows[-1];
+
+
+  /* FILL INITIAL ROW (-1). */
+  /* GAP LENGTH = M+1 IS USED FOR INITIAL STATE. */
   
-  /* FILL INITIAL ROW. */
-  /* GLOBAL ALIGNMENT: no free gaps */
-  /* LOCAL ALIGNMENT: free initial gap */
-  
   curr_score[-1].score = 0;
-  curr_score[-1].gap_x = curr_score[-1].gap_y =
-    (use_global_alignment) ? 0 : TRUNCATE_GAP_LENGTH+1;
+  curr_score[-1].gap_x = curr_score[-1].gap_y = max_gap_length+1;
   
   for (i=0; i<len_x; i++) {
+    curr_score[i].score = min_score;
     for (xcount = 1, xl = x_left[i]; xl != NULL; xcount++, xl = xl->more) {
       prev_gap = curr_score[xl->ipos].gap_x;
       try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
-      if (xcount == 1 || try_score > curr_score[i].score) {
+      if (try_score > curr_score[i].score) {
 	curr_score[i].score = try_score;
 	curr_score[i].gap_x = next_gap_array[prev_gap];
 	curr_score[i].gap_y = next_perp_gap_array[prev_gap];
@@ -256,14 +285,15 @@
     }
   }
   
-  /* FILL INITIAL COLUMN. */
+  /* FILL INITIAL COLUMN (-1). */
   
   init_col_score[-1] = curr_score[-1];
   for (i=0; i<len_y; i++) {
+    init_col_score[i].score = min_score;
     for (ycount = 1, yl = y_left[i]; yl != NULL; ycount++, yl = yl->more) {
       prev_gap = init_col_score[yl->ipos].gap_y;
       try_score = init_col_score[yl->ipos].score + yl->score - gap_penalty_y[prev_gap];
-      if (ycount == 1 || try_score > init_col_score[i].score) {
+      if (try_score > init_col_score[i].score) {
 	init_col_score[i].score = try_score;
 	init_col_score[i].gap_x = next_perp_gap_array[prev_gap];
 	init_col_score[i].gap_y = next_gap_array[prev_gap];
@@ -278,13 +308,28 @@
   /* OUTER LOOP (i-th position in LPO y): */
   for (i=0; i<len_y; i++) {
     
+    /* ALLOCATE MEMORY FOR 'SCORE' ROW i: */
+    CALLOC (score_rows[i], len_x+1, DPScore_T);
+    score_rows[i] = &(score_rows[i][1]);
+    n_score_rows_alloced++;
+        
     curr_score = score_rows[i];
-    
     curr_score[-1] = init_col_score[i];
           
     /* INNER LOOP (j-th position in LPO x): */
     for (j=0; j<len_x; j++) {
+
+      match_score = (use_global_alignment) ? min_score : 0;
+      match_x = match_y = 0;
       
+      insert_x_score = insert_y_score = min_score;
+      insert_x_x = insert_y_y = 0;
+      insert_x_gap = insert_y_gap = 0;
+      
+      /* THIS SQUARE CAN END THE ALIGNMENT IF WE'RE USING LOCAL ALIGNMENT, */
+      /* OR IF BOTH THE X- AND Y-NODES CONTAIN THE END OF A SEQUENCE. */
+      possible_end_square = ((0 == use_global_alignment) || ((node_type_x[j] & LPO_FINAL_NODE) && (node_type_y[i] & LPO_FINAL_NODE)));
+      
       /* LOOP OVER y-predecessors: */
       for (ycount = 1, yl = y_left[i]; yl != NULL; ycount++, yl = yl->more) {
 	
@@ -293,18 +338,18 @@
 	/* IMPROVE Y-INSERTION?: trace back to (i'=yl->ipos, j) */
 	prev_gap = prev_score[j].gap_y;
 	try_score = prev_score[j].score + yl->score - gap_penalty_y[prev_gap];
-	if (ycount == 1 || try_score > insert_y_score) {
+	if (try_score > insert_y_score) {
 	  insert_y_score = try_score;
 	  insert_y_y = ycount;
 	  insert_y_gap = prev_gap;
 	}
 	
-	/* LOOP OVER x-predecessors: */
+	/* LOOP OVER x-predecessors (INSIDE y-predecessor LOOP): */
 	for (xcount = 1, xl = x_left[j]; xl != NULL; xcount++, xl = xl->more) {
 	  
 	  /* IMPROVE XY-MATCH?: trace back to (i'=yl->ipos, j'=xl->ipos) */
 	  try_score = prev_score[xl->ipos].score + xl->score + yl->score;
-	  if ((xcount == 1 && ycount == 1) || try_score > match_score) {
+	  if (try_score > match_score) {
 	    match_score = try_score;
 	    match_x = xcount;
 	    match_y = ycount;
@@ -312,26 +357,27 @@
 	}
       }
       
-      /* LOOP OVER x-predecessors */
-      /* IMPROVE X-INSERTION?: trace back to (i, j'=xl->ipos) */
+      /* LOOP OVER x-predecessors (OUTSIDE y-predecessor LOOP): */
       for (xcount = 1, xl = x_left[j]; xl != NULL; xcount++, xl = xl->more) {
+
+	/* IMPROVE X-INSERTION?: trace back to (i, j'=xl->ipos) */
 	prev_gap = curr_score[xl->ipos].gap_x;
 	try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
-	if (xcount == 1 || try_score > insert_x_score) {
+	if (try_score > insert_x_score) {
 	  insert_x_score = try_score;
 	  insert_x_x = xcount;
 	  insert_x_gap = prev_gap;
 	}
       }
       
-      if (0 == use_global_alignment && match_score <= 0) {
-	match_score = 0;
-	match_x = match_y = 0;  /* FIRST ALIGNED PAIR */
+      /* USE CUSTOM OR DEFAULT SCORING FUNCTION: */
+      if (scoring_function != NULL) {
+	match_score += scoring_function (j, i, seq_x, seq_y, m);
       }
+      else {
+	match_score += m->score[seq_x[i].letter][seq_y[j].letter];
+      }
       
-      n_edges += (xcount-1)*(ycount-1);
-      match_score += scoring_function (j, i, seq_x, seq_y, m);
-      
       my_score = &curr_score[j];
       my_move = &move[i][j];
       
@@ -353,7 +399,6 @@
       }
       else {
 	/* Y-INSERTION */
-	next_gap = next_gap_array[insert_y_gap];
 	my_score->score = insert_y_score;
 	my_score->gap_x = next_perp_gap_array[insert_y_gap];
 	my_score->gap_y = next_gap_array[insert_y_gap];
@@ -361,9 +406,9 @@
 	my_move->y = insert_y_y;
       }
 
-      /* RECORD BEST START FOR TRACEBACK */
-      /* KEEPING ONLY FINAL-FINAL BESTS FOR GLOBAL ALIGNMENT */    
-      if (my_score->score >= best_score && (0 == use_global_alignment || (is_final_node_x[j] && is_final_node_y[i]))) {
+      /* RECORD BEST ALIGNMENT END FOR TRACEBACK: */
+      if (possible_end_square && my_score->score >= best_score) {
+	/* BREAK TIES BY CHOOSING MINIMUM (x,y): */
 	if (my_score->score > best_score || (j == best_x && i < best_y) || j < best_x) {
 	  best_score = my_score->score;
 	  best_x = j;
@@ -371,39 +416,54 @@
 	}
       }
     }
+
+    /* UPDATE # OF REFS TO 'SCORE' ROWS; FREE MEMORY WHEN POSSIBLE: */
+    for (yl = y_left[i]; yl != NULL; yl = yl->more) if ((j = yl->ipos) >= 0) {
+      if ((--refs_from_right_y[j]) == 0) {
+	score_rows[j] = &(score_rows[j][-1]);
+	FREE (score_rows[j]);
+	n_score_rows_alloced--;
+      }
+    }
+    if (refs_from_right_y[i] == 0) {
+      score_rows[i] = &(score_rows[i][-1]);
+      FREE (score_rows[i]);
+      n_score_rows_alloced--;
+    }
   }
   
   IF_GUARD(best_x>=len_x || best_y>=len_y,1.1,(ERRTXT,"Bounds exceeded!\nbest_x,best_y:%d,%d\tlen:%d,%d\n",best_x,best_y,len_x,len_y),CRASH);
   
-  /*
-    fprintf (stderr, "aligned (%d nodes, %ld edges) to (%d nodes)\n", len_x, n_edges, len_y);
-    fprintf (stderr, "best score %d @ (%d %d)\n", best_score, best_x, best_y);
-  */
-  
+  /**/
+    fprintf (stderr, "aligned (%d nodes, %ld edges) to (%d nodes, %ld edges): ", len_x, n_edges_x, len_y, n_edges_y);
+    fprintf (stderr, "best %s score = %d @ (%d %d)\n", (use_global_alignment ? "global" : "local"), best_score, best_x, best_y);
+    /**/
+    
   /* DYNAMIC PROGRAMING MATRIX COMPLETE, NOW TRACE BACK FROM best_x, best_y */
   trace_back_lpo_alignment (len_x, len_y, move, x_left, y_left,
 			    best_x, best_y,
 			    x_to_y, y_to_x);
 
 
-  FREE (gap_penalty_x);
-  FREE (gap_penalty_y);
+  /* CLEAN UP AND RETURN: */
+  
+  FREE (node_type_x);
+  FREE (node_type_y);
+  
+  FREE (refs_from_right_x);
+  FREE (refs_from_right_y);
+
   FREE (next_gap_array);
   FREE (next_perp_gap_array);
   
-  for (i=-1; i<len_y; i++) {
-    score_rows[i] = &(score_rows[i][-1]);
-    FREE (score_rows[i]);
-  }
+  score_rows[-1] = &(score_rows[-1][-1]);
+  FREE (score_rows[-1]);
   score_rows = &(score_rows[-1]);
   FREE (score_rows);
   
   init_col_score = &(init_col_score[-1]);
   FREE (init_col_score);
-  
-  FREE (is_final_node_x);
-  FREE (is_final_node_y);
-  
+    
   for (i=0; i<len_x; i++) {
     if (x_left[i] != &seq_x[i].left) {
       FREE (x_left[i]);

Modified: trunk/packages/poa/trunk/align_score.c
===================================================================
--- trunk/packages/poa/trunk/align_score.c	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/align_score.c	2006-09-26 04:38:07 UTC (rev 129)
@@ -17,10 +17,10 @@
 
 
 
-/* CATIE: YOU CAN PUT ANY SCORING METHOD YOU WANT INSIDE THIS
+/* YOU CAN PUT ANY SCORING METHOD YOU WANT INSIDE THIS
    FUNCTION. JUST REPLACE THE CONTENTS OF THE FUNCTION WITH
    YOUR SCORING METHOD */
-LPOScore_T caties_scoring_function(int i,
+LPOScore_T matrix_scoring_function(int i,
 				   int j,
 				   LPOLetter_T seq_x[],
 				   LPOLetter_T seq_y[],

Copied: trunk/packages/poa/trunk/align_score.h (from rev 128, trunk/packages/poa/branches/upstream/current/align_score.h)
===================================================================
--- trunk/packages/poa/trunk/align_score.h	                        (rev 0)
+++ trunk/packages/poa/trunk/align_score.h	2006-09-26 04:38:07 UTC (rev 129)
@@ -0,0 +1,15 @@
+#ifndef ALIGN_SCORE_HEADER_INCLUDED
+#define ALIGN_SCORE_HEADER_INCLUDED
+
+#include <default.h>
+#include <poa.h>
+#include <seq_util.h>
+
+/*********************************************************** align_score.c */
+LPOScore_T matrix_scoring_function(int i,
+				   int j,
+				   LPOLetter_T seq_x[],
+				   LPOLetter_T seq_y[],
+				   ResidueScoreMatrix_T *m);
+
+#endif

Copied: trunk/packages/poa/trunk/blosum80_trunc.mat (from rev 128, trunk/packages/poa/branches/upstream/current/blosum80_trunc.mat)
===================================================================
--- trunk/packages/poa/trunk/blosum80_trunc.mat	                        (rev 0)
+++ trunk/packages/poa/trunk/blosum80_trunc.mat	2006-09-26 04:38:07 UTC (rev 129)
@@ -0,0 +1,42 @@
+#  Blosum80
+#  Matrix made by matblas from blosum80.iij
+#  * column uses minimum score
+#  BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 80
+#  Entropy =   0.9868, Expected =  -0.7442
+GAP-TRUNCATION-LENGTH=10
+GAP-DECAY-LENGTH=5
+GAP-PENALTIES=12 6 0
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  ?  a  g  t  c  u  ]  n
+A  7 -3 -3 -3 -1 -2 -2  0 -3 -3 -3 -1 -2 -4 -1  2  0 -5 -4 -1 -3 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9
+R -3  9 -1 -3 -6  1 -1 -4  0 -5 -4  3 -3 -5 -3 -2 -2 -5 -4 -4 -2  0 -2 -9 -9 -9 -9 -9 -9 -9 -9
+N -3 -1  9  2 -5  0 -1 -1  1 -6 -6  0 -4 -6 -4  1  0 -7 -4 -5  5 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9
+D -3 -3  2 10 -7 -1  2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6  6  1 -3 -9 -9 -9 -9 -9 -9 -9 -9
+C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4 -9 -9 -9 -9 -9 -9 -9 -9
+Q -2  1  0 -1 -5  9  3 -4  1 -5 -4  2 -1 -5 -3 -1 -1 -4 -3 -4 -1  5 -2 -9 -9 -9 -9 -9 -9 -9 -9
+E -2 -1 -1  2 -7  3  8 -4  0 -6 -6  1 -4 -6 -2 -1 -2 -6 -5 -4  1  6 -2 -9 -9 -9 -9 -9 -9 -9 -9
+G  0 -4 -1 -3 -6 -4 -4  9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9
+H -3  0  1 -2 -7  1  0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4  3 -5 -1  0 -2 -9 -9 -9 -9 -9 -9 -9 -9
+I -3 -5 -6 -7 -2 -5 -6 -7 -6  7  2 -5  2 -1 -5 -4 -2 -5 -3  4 -6 -6 -2 -9 -9 -9 -9 -9 -9 -9 -9
+L -3 -4 -6 -7 -3 -4 -6 -7 -5  2  6 -4  3  0 -5 -4 -3 -4 -2  1 -7 -5 -2 -9 -9 -9 -9 -9 -9 -9 -9
+K -1  3  0 -2 -6  2  1 -3 -1 -5 -4  8 -3 -5 -2 -1 -1 -6 -4 -4 -1  1 -2 -9 -9 -9 -9 -9 -9 -9 -9
+M -2 -3 -4 -6 -3 -1 -4 -5 -4  2  3 -3  9  0 -4 -3 -1 -3 -3  1 -5 -3 -2 -9 -9 -9 -9 -9 -9 -9 -9
+F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1  0 -5  0 10 -6 -4 -4  0  4 -2 -6 -6 -3 -9 -9 -9 -9 -9 -9 -9 -9
+P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3 -9 -9 -9 -9 -9 -9 -9 -9
+S  2 -2  1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2  7  2 -6 -3 -3  0 -1 -1 -9 -9 -9 -9 -9 -9 -9 -9
+T  0 -2  0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3  2  8 -5 -3  0 -1 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9
+W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3  0 -7 -6 -5 16  3 -5 -8 -5 -5 -9 -9 -9 -9 -9 -9 -9 -9
+Y -4 -4 -4 -6 -5 -3 -5 -6  3 -3 -2 -4 -3  4 -6 -3 -3  3 11 -3 -5 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9
+V -1 -4 -5 -6 -2 -4 -4 -6 -5  4  1 -4  1 -2 -4 -3  0 -5 -3  7 -6 -4 -2 -9 -9 -9 -9 -9 -9 -9 -9
+B -3 -2  5  6 -6 -1  1 -2 -1 -6 -7 -1 -5 -6 -4  0 -1 -8 -5 -6  6  0 -3 -9 -9 -9 -9 -9 -9 -9 -9
+Z -2  0 -1  1 -7  5  6 -4  0 -6 -5  1 -3 -6 -2 -1 -2 -5 -4 -4  0  6 -1 -9 -9 -9 -9 -9 -9 -9 -9
+X -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9
+? -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
+a -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9  4 -2 -2 -2 -2 -9  0
+g -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2  4 -2 -2 -2 -9  0
+t -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2  4 -2  4 -9  0
+c -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 -2  4 -2 -9  0
+u -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2  4 -2  4 -9  0
+] -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
+n -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9  0  0  0  0  0 -9  0

Modified: trunk/packages/poa/trunk/buildup_lpo.c
===================================================================
--- trunk/packages/poa/trunk/buildup_lpo.c	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/buildup_lpo.c	2006-09-26 04:38:07 UTC (rev 129)
@@ -6,7 +6,15 @@
 #include "lpo.h"
 
 
-
+/** if two align-rings are aligned to each other, make sure
+    that the (single) aligned residue pair consists of identical
+    residues, if possible.
+    :
+    ((a,-),(b,-),(c,d),(-,a),(-b)) ==>
+    ((a,a),(b,-),(c,-),(-,d),(-b)) OR ((a,-),(b,b),(c,-),(-,d)).
+    :
+    ((a,-),(c,d),(-,b)) ==> self.
+*/
 void fuse_ring_identities(int len_x,LPOLetter_T seq_x[],
 			  int len_y,LPOLetter_T seq_y[],
 			  LPOLetterRef_T al_x[],
@@ -16,17 +24,64 @@
   LOOP (i,len_y) {
     if (al_y[i]<0 || seq_x[al_y[i]].letter == seq_y[i].letter)
       continue; /* NOT ALIGNED, OR ALREADY IDENTICAL, SO SKIP */
-    for (j=seq_x[al_y[i]].align_ring;j!=al_y[i];j=seq_x[j].align_ring)
+    for (j=seq_x[al_y[i]].align_ring;j!=al_y[i];j=seq_x[j].align_ring) {
       if (seq_x[j].letter == seq_y[i].letter) { /* IDENTICAL! SO FUSE! */
 	al_x[al_y[i]]= INVALID_LETTER_POSITION; /* DISCONNECT FROM OLD */
 	al_y[i]=j; /* CONNECT TO NEW IDENTITY */
 	al_x[j]=i;
 	break; /* SEARCH YE NO FURTHER */
       }
+    }
   }
 }
 
+/** if two align-rings are aligned to each other, make sure
+    that as many identical-residue pairs are fused as possible.
+    :
+    ((a,-),(b,-),(c,d),(-,a),(-b)) ==>
+    ((a,a),(b,b),(c,-),(-,d)).
+    :
+    ((a,-),(c,d),(-,b)) ==> self.
 
+    NB:
+    THIS DOES NOT WORK with the current fuse_lpo function.
+    The fusion 
+*/
+void full_fuse_ring_identities (int len_x, LPOLetter_T *seq_x,
+				int len_y, LPOLetter_T *seq_y,
+				LPOLetterRef_T *al_x,
+				LPOLetterRef_T *al_y)
+{
+  int i, ip, j, jp;  /* i LABELS POS IN seq_x, j LABELS POS IN seq_y */
+  int ck;  /* WAS IDENTICAL-RESIDUE PAIR FOUND? */
+  for (i=0; i<len_x; i++) if ((j = al_x[i]) >= 0) {
+    al_x[i] = al_y[j] = INVALID_LETTER_POSITION;  /* DISCONNECT FROM OLD */
+    
+    /* i,j ARE AN ALIGNED PAIR.  WALK THROUGH RESPECTIVE RINGS: */
+    ip=i; jp=j; ck=0;
+    do /* while (ip!=i) */ {
+      do /* while (jp!=j) */ {
+	if (seq_x[ip].letter == seq_y[jp].letter) {  /* IDENTICAL! SO FUSE! */
+	  al_x[ip] = jp;
+	  al_y[jp] = ip;
+	  ck=1;
+	  jp=j;  /* EXIT TO OUTER LOOP... AT MOST ONE FUSED TO EACH POS IN seq_y. */
+	}
+	else {
+	  jp = seq_y[jp].align_ring;
+	}
+      }
+      while (jp!=j);
+      ip = seq_x[ip].align_ring;
+    }
+    while (ip!=i);
+    
+    if (ck==0) {  /* NO IDENTICAL-RESIDUE PAIR FOUND, SO RECONNECT ORIGINAL */
+      al_x[i] = j;
+      al_y[j] = i;
+    }
+  }
+}
 
 
 /** aligns the sequences in seq[] to the sequence or partial order in
@@ -37,7 +92,7 @@
 			   int nseq,LPOSequence_T seq[],
 			   ResidueScoreMatrix_T *score_matrix,
 			   int use_aggressive_fusion,
-			   int use_global_alignment)
+                           int use_global_alignment)
 {
   int i,max_alloc=0,total_alloc;
   LPOLetterRef_T *al1=NULL,*al2=NULL;
@@ -54,12 +109,12 @@
       fprintf(stderr,"max_alloc: %d bytes\n",max_alloc);
 #endif
       if (max_alloc>POA_MAX_ALLOC) {
-	WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.1 $");
+	WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.9 $");
 	break; /* JUST RETURN AND FINISH */
       }
     }
-    align_lpo(new_seq,&seq[i],
-	      score_matrix,&al1,&al2,use_global_alignment); /* ALIGN ONE MORE SEQ */
+    align_lpo_po (new_seq,&seq[i],
+		  score_matrix,&al1,&al2,NULL,use_global_alignment); /* ALIGN ONE MORE SEQ */
     if (use_aggressive_fusion) 
       fuse_ring_identities(new_seq->length,new_seq->letter,
 			   seq[i].length,seq[i].letter,al1,al2);
@@ -74,7 +129,7 @@
   return new_seq;
 }
 /**@memo example: aligning a set of sequences to a partial order: 
-      lpo_out=buildup_lpo(lpo_in,nseq,seq,&score_matrix,0);
+      lpo_out=buildup_lpo(lpo_in,nseq,seq,&score_matrix);
 */
 
 
@@ -154,7 +209,7 @@
 LPOSequence_T *buildup_clipped_lpo(LPOSequence_T *new_seq,
 				   int nseq,LPOSequence_T seq[],
 				   ResidueScoreMatrix_T *score_matrix,
-				   int use_global_alignment)
+                                   int use_global_alignment)
 {
   int i,ntemp,offset=0,nidentity,length_max=0,match_length=0;
   int total_alloc,max_alloc=0;
@@ -175,12 +230,12 @@
 	      new_seq->length,seq[i].length);
 #endif
       if (max_alloc>POA_MAX_ALLOC) {
-	WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.1 $");
+	WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.9 $");
 	break; /* JUST RETURN AND FINISH */
       }
     }
-    align_lpo(new_seq, &seq[i],
-	      score_matrix,&al1,&al2,use_global_alignment); /* ALIGN ONE MORE SEQ */
+    align_lpo_po (new_seq, &seq[i],
+		  score_matrix,&al1,&al2,NULL,use_global_alignment); /* ALIGN ONE MORE SEQ */
     ntemp=seq[i].length; /* SAVE letter[] BEFORE CLIPPING IT TO ALIGNED AREA*/
     temp=seq[i].letter;
     if ((nidentity=clip_unaligned_ends(seq+i,al2,/*THERE IS AN ALIGNED REGION*/
@@ -204,112 +259,229 @@
 }
 
 
-int find_seq_name(int nseq,LPOSequence_T seq[],char name[])
+/** which LPOSeq is called, or holds a sequence called, `name'? */
+int find_seq_name (int nseq, LPOSequence_T **seq, char name[])
 {
-  int i;
-  LOOP (i,nseq)
-    if (0==strcmp(seq[i].name,name))
+  int i,j;
+  for (i=0;i<nseq;i++) if (seq[i]) {
+    if (0==strcmp(seq[i]->name,name))
       return i;
+    for (j=0;j<seq[i]->nsource_seq;j++) {
+      if (0==strcmp(seq[i]->source_seq[j].name,name))
+	return i;
+    }
+  }
   return -1;
 }
 
 
 typedef struct {
   double score;
-  double bitscore;
   int i;
   int j;
-} SeqPairScore_T;
+}
+SeqPairScore_T;
 
 
-/* SORT IN ASCENDING ORDER BY score, THEN DESCENDING ORDER by bitscore */
-int seqpair_score_qsort_cmp(const void *void_a,const void *void_b)
+/* SORT IN DESCENDING ORDER BY score (SO HIGH SIMILARITY SCORES MERGE FIRST). */
+/* FOR TIES, USE ITERATIVE MERGE ORDER (1-2, then 1-3, then 1-4, etc.) */
+int seqpair_score_qsort_cmp (const void *void_a, const void *void_b)
 {
-  const SeqPairScore_T *a=(const SeqPairScore_T *)void_a,
-  *b=(const SeqPairScore_T *)void_b;
-  if (a->score < b->score)
+  const SeqPairScore_T *a = (const SeqPairScore_T *)void_a;
+  const SeqPairScore_T *b = (const SeqPairScore_T *)void_b;
+
+  if (a->score > b->score)
     return -1;
-  else if (a->score == b->score) {
-    if (a->bitscore > b->bitscore)
-      return -1;
-    else if (a->bitscore == b->bitscore)
-      return 0;
-  }
-  else
+  else if (a->score < b->score)
     return 1;
+  
+  if (a->i > b->i)
+    return 1;
+  else if (a->i < b->i)
+    return -1;
+  
+  if (a->j > b->j)
+    return 1;
+  else if (a->j < b->j)
+    return -1;
+  
+  return 0;
 }
 
 
-
-
-SeqPairScore_T *read_seqpair_scorefile(int nseq,LPOSequence_T seq[],
-			       FILE *ifile,int *p_nscore)
+SeqPairScore_T *read_seqpair_scorefile (int nseq, LPOSequence_T **seq,
+					ResidueScoreMatrix_T *score_matrix,
+					LPOScore_T (*scoring_function)
+					(int,int,LPOLetter_T [],LPOLetter_T [],
+					 ResidueScoreMatrix_T *),
+					int use_global_alignment,
+					int do_progressive, FILE *ifile, int *p_nscore)
 {
-  int i,j,nscore=0;
-  SeqPairScore_T *score=NULL;
-  double v,x;
+  int i,j,nscore=0,max_nscore=0;
+  int *adj_score = NULL;
+  SeqPairScore_T *score_list=NULL;
+  LPOLetterRef_T *al1=NULL,*al2=NULL;
+  double x, min_score=0.0;
   char name1[256],name2[256];
-
-  CALLOC(score,nseq*nseq/2,SeqPairScore_T);
-  while (fscanf(ifile," %s %s %lf %lf",name1,name2,
-		&v,&x)==4) { /*READ SCORE FILE*/
-    i=find_seq_name(nseq,seq,name1);      
-    j=find_seq_name(nseq,seq,name2);
-    if (i<0 || j<0) {
-      WARN_MSG(USERR,(ERRTXT,"invalid sequence pair, not found: %s,%s",name1,name2),"$Revision: 1.2.2.1 $");
-      FREE(score);
-      return NULL;
+  
+  CALLOC (adj_score, nseq, int);
+  
+  max_nscore = nseq*nseq;
+  CALLOC (score_list, max_nscore, SeqPairScore_T);
+    
+  if (ifile) { /* IF PAIR SCORE FILE (PROGRESSIVE ASSUMED) */
+    while (fscanf(ifile," %s %s %lf",name1,name2,&x)==3) {  /* READ SCORE FILE */
+      i=find_seq_name(nseq,seq,name1);
+      j=find_seq_name(nseq,seq,name2);
+      if (i<0 || j<0) {
+	WARN_MSG(USERR,(ERRTXT,"invalid sequence pair, not found: %s,%s",name1,name2),"$Revision: 1.2.2.9 $");
+	FREE (score_list);
+	FREE (adj_score);
+	return NULL;
+      }
+      
+      /* fprintf(stderr,"i=%d,j=%d,x=%.2f\n",i,j,x); */
+      fprintf(stderr,"Saving score from file %d (%s), %d (%s) : %.2f\n",i,name1,j,name2,x);
+      if (i<j) { int swap=i;i=j;j=swap; }  /* DON'T SAVE UPPER (DUPLICATE?) HALF OF THE MATRIX */
+      score_list[nscore].i = i;
+      score_list[nscore].j = j;
+      score_list[nscore].score = x;
+      if (x<min_score) min_score = x;
+      nscore++;
+      if (nscore==max_nscore) {
+	max_nscore *= 2;
+	REALLOC (score_list, max_nscore, SeqPairScore_T);
+      }
+      if (j==i-1) {
+	adj_score[i]=1;
+      }
     }
-
-    /*    fprintf(stderr,"i=%d,j=%d,x=%e\n",i,j,x);*/
-    if (i<j) { /* DON'T SAVE UPPER, DUPLICATE HALF OF THE MATRIX */
-      /*      fprintf(stderr,"Saving score %s,%s:%e\n",name1,name2,x);*/
-      score[nscore].score=x; /* SAVE THE SCORE INTO THE MATRIX */
-      score[nscore].bitscore=v;
-      score[nscore].i=i;
-      score[nscore].j=j;
+  }
+  else if (do_progressive) { /* IF PROGRESSIVE BUT NO PAIR SCORE FILE */
+    for (i=0;i<nseq;i++) for (j=0;j<i;j++) { /* SCORE IS BASED ON LOCAL ALIGNMENT */
+      x = align_lpo_po (seq[i],seq[j],
+			score_matrix,&al1,&al2,scoring_function,use_global_alignment); 
+      FREE(al1); /* DUMP TEMPORARY MAPPING ARRAYS */
+      FREE(al2);
+      fprintf(stderr,"Saving alignment score %d (%s), %d (%s) : %.2f\n",i,seq[i]->name,j,seq[j]->name,x);
+      score_list[nscore].i = i;
+      score_list[nscore].j = j;
+      score_list[nscore].score = x;
+      if (x<min_score) min_score = x;
       nscore++;
+      if (nscore==max_nscore) {
+	max_nscore *= 2;
+	REALLOC (score_list, max_nscore, SeqPairScore_T);
+      }
+      if (j==i-1) {
+	adj_score[i]=1;
+      }
     }
   }
-
-  /* NOW SORT THESE IN ASCENDING ORDER AND HAND BACK TO CALLER */
-  qsort(score,nscore,sizeof(SeqPairScore_T),seqpair_score_qsort_cmp);
+  else {  /* NOT DOING PROGRESSIVE ALIGNMENT */
+    /* USE DEFAULT (=0.0) PAIRSCORES, ENSURING ITERATIVE ALIGNMENT: */
+    fprintf(stderr,"Performing iterative alignment...\n");
+  }
+  
+  for (i=1;i<nseq;i++) {
+    if (0 == adj_score[i]) {
+      score_list[nscore].i = i;
+      score_list[nscore].j = i-1;
+      score_list[nscore].score = min_score - 1.0;
+      nscore++;
+      if (nscore==max_nscore) {
+	max_nscore *= 2;
+	REALLOC (score_list, max_nscore, SeqPairScore_T);
+      }
+    }
+  }
+  FREE (adj_score);
+  
+  /* NOW SORT LIST IN DESCENDING ORDER AND HAND BACK TO CALLER: */
+  qsort(score_list,nscore,sizeof(SeqPairScore_T),seqpair_score_qsort_cmp);
   if (p_nscore) /* RETURN LENGTH OF PAIR SCORE TABLE IF REQUESTED */
     *p_nscore=nscore;
-  return score;
+  return score_list;
 }
 
 
-LPOSequence_T *buildup_progressive_lpo(int nseq,LPOSequence_T seq[],
+LPOSequence_T *buildup_progressive_lpo(int nseq,LPOSequence_T **all_seqs,
 				       ResidueScoreMatrix_T *score_matrix,
 				       int use_aggressive_fusion,
-				       char score_file[],
+                                       int do_progressive,
+				       char score_file[], 
 				       LPOScore_T (*scoring_function)
 				       (int,int,LPOLetter_T [],LPOLetter_T [],
 					ResidueScoreMatrix_T *),
-				       int use_global_alignment)
+                                       int use_global_alignment,
+				       int preserve_sequence_order)
 {
-  int i,j,max_alloc=0,total_alloc;
-  LPOLetterRef_T *al1=NULL,*al2=NULL;
+  int i,j,k,max_alloc=0,total_alloc,min_counts=0;
   SeqPairScore_T *score=NULL;
-  FILE *ifile;
   LPOSequence_T *new_seq=NULL;
+  FILE *ifile=NULL;
   int *seq_cluster=NULL,cluster_i,cluster_j,nscore=0,iscore;
+  int *initial_nseq, *cluster_size, *seq_id_in_cluster;
+  int nseq_tot;
   
-  ifile=fopen(score_file,"r");
-  if (ifile) {
-    if ((score=read_seqpair_scorefile(nseq,seq,ifile,&nscore))==NULL)
+  
+  /* INITIALIZE ALL UNINITIALIZED SEQS: */
+  for (i=0;i<nseq;i++) {
+    if (all_seqs[i]->letter == NULL) {
+      initialize_seqs_as_lpo(1,all_seqs[i],score_matrix);
+    }
+    lpo_index_symbols(all_seqs[i],score_matrix); /* MAKE SURE LPO IS TRANSLATED */  
+  }
+
+  /* RETURN IF NOTHING TO ALIGN */
+  if (nseq<=0)
+    return NULL;
+  else if (nseq==1)
+    return all_seqs[0];
+  
+  
+  new_seq = all_seqs[0];
+    
+  CALLOC(seq_cluster,nseq,int);  /* MAPS SEQS (or CLUSTERS) TO CLUSTER THEY'RE IN */
+  CALLOC(seq_id_in_cluster,nseq,int);  /* INDEXES SEQS (or CLUSTERS) WITHIN EACH CLUSTER */
+  CALLOC(cluster_size,nseq,int);  /* COUNTS SEQS IN SAME CLUSTER (updated w/ merges) */
+  CALLOC(initial_nseq,nseq,int);  /* COUNTS SEQS INITIALLY IN EACH CLUSTER (not updated w/ merges) */
+  
+  for (i=nseq_tot=0;i<nseq;i++) {
+    seq_cluster[i] = i;  /* CREATE TRIVIAL MAPPING, EACH SEQ ITS OWN CLUSTER */
+    seq_id_in_cluster[i] = 0;
+    cluster_size[i] = all_seqs[i]->nsource_seq;
+    initial_nseq[i] = cluster_size[i];
+    nseq_tot += cluster_size[i];
+  }
+    
+  if (score_file) {
+    ifile=fopen(score_file,"r");
+    if (ifile==NULL) {
+      WARN_MSG(USERR,(ERRTXT,"Error reading pair score file %s.\nExiting",
+			score_file),"$Revision: 1.2.2.9 $");
       goto free_and_exit;
-    fclose(ifile);
+    }
   }
-  else
+  else {
+    ifile = NULL;
+  }
+  
+  
+  score = read_seqpair_scorefile(nseq,all_seqs,score_matrix,scoring_function,use_global_alignment,
+				 do_progressive,ifile,&nscore);
+  if (score==NULL) {
+    WARN_MSG(USERR,(ERRTXT,"Error generating pair scores (file %s).\nExiting",
+		    score_file ? score_file : "unspecified"),"$Revision: 1.2.2.9 $");
     goto free_and_exit;
-
-  CALLOC(seq_cluster,nseq,int); /* MAPS SEQS TO CLUSTER THEY'RE IN */
-  LOOP (i,nseq) /* CREATE TRIVIAL MAPPING, EACH SEQ ITS OWN CLUSTER */
-    seq_cluster[i]=i;
-
+  }
+  if (ifile)
+    fclose (ifile);
+  
   for (iscore=0;iscore<nscore;iscore++) {
+    
+    /* NB: NEW CLUSTER ID WILL BE MINIMUM OF INPUT IDs,
+       SO MASTER CLUSTER WILL ALWAYS BE CLUSTER 0. */
     if (seq_cluster[score[iscore].i] < seq_cluster[score[iscore].j]) {
       cluster_i=seq_cluster[score[iscore].i];
       cluster_j=seq_cluster[score[iscore].j];
@@ -321,56 +493,98 @@
     else /* CLUSTERS ALREADY FUSED, SO SKIP THIS PAIR */
       continue;
 
-    fprintf(stderr,"Fusing cluster %s --> %s... score %e,%lf\n",
-	    seq[cluster_j].name,seq[cluster_i].name,score[iscore].score,
-	    score[iscore].bitscore);
-    new_seq=seq+cluster_i; /* THIS WILL BECOME THE NEW MASTER CLUSTER */
-    if (seq[cluster_i].letter == NULL) /* NOT INITIALIZED AT ALL YET */
-      initialize_seqs_as_lpo(1,seq+cluster_i,score_matrix);
-    if (seq[cluster_j].letter == NULL) /* NOT INITIALIZED AT ALL YET */
-      initialize_seqs_as_lpo(1,seq+cluster_j,score_matrix);
-    total_alloc=new_seq->length*seq[cluster_j].length
-      + sizeof(LPOLetter_T)*new_seq->length;
+    fprintf(stderr,"Fusing cluster %d (%s, nseq=%d) --> %d (%s, nseq=%d)... score %.2f\n",
+	    cluster_j,all_seqs[cluster_j]->name,all_seqs[cluster_j]->nsource_seq,
+	    cluster_i,all_seqs[cluster_i]->name,all_seqs[cluster_i]->nsource_seq,
+	    score[iscore].score);
+    
+    new_seq = all_seqs[cluster_i];
+    total_alloc = new_seq->length * (sizeof(LPOLetter_T) + all_seqs[cluster_j]->length);
     if (total_alloc>max_alloc) { /* DP RECTANGLE ARRAY SIZE */
       max_alloc=total_alloc;
 #ifdef REPORT_MAX_ALLOC
       fprintf(stderr,"max_alloc: %d bytes\n",max_alloc);
 #endif
       if (max_alloc>POA_MAX_ALLOC) {
-	WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.1 $");
+	WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.9 $");
 	break; /* JUST RETURN AND FINISH */
       }
     }
-
+    
 #ifdef USE_LOCAL_NEUTRALITY_CORRECTION /* NO LONGER USED */
     if (score_matrix->nfreq>0) { /* CALCULATE BALANCED SCORING ON EACH PO */
       balance_matrix_score(new_seq->length,new_seq->letter,score_matrix);
-      balance_matrix_score(seq[cluster_j].length,seq[cluster_j].letter,
+      balance_matrix_score(all_seqs[cluster_j]->length,all_seqs[cluster_j]->letter,
 			   score_matrix);
     }
 #endif
 
-    align_lpo_po(new_seq, &seq[cluster_j],
-		 score_matrix,&al1,&al2,
-		 scoring_function,use_global_alignment); /* ALIGN ONE MORE SEQ */
-    if (use_aggressive_fusion) 
-      fuse_ring_identities(new_seq->length,new_seq->letter,
-			   seq[cluster_j].length,seq[cluster_j].letter,
-			   al1,al2);
-    fuse_lpo(new_seq,seq+cluster_j,al1,al2); /* BUILD COMPOSITE LPO */
+    buildup_pairwise_lpo(new_seq,all_seqs[cluster_j],score_matrix,
+			 use_aggressive_fusion,
+                         scoring_function,use_global_alignment);
 
-    free_lpo_letters(seq[cluster_j].length,seq[cluster_j].letter,TRUE);
-    seq[cluster_j].letter=NULL; /*MARK AS FREED. DON'T LEAVE DANGLING POINTER*/
-    FREE(al1); /* DUMP TEMPORARY MAPPING ARRAYS */
-    FREE(al2);
+    LOOP (i,nseq) {  /* APPEND ALL MEMBERS OF cluster_j TO cluster_i */
+      if (seq_cluster[i] == cluster_j) {
+	seq_cluster[i] = cluster_i;
+      	seq_id_in_cluster[i] += cluster_size[cluster_i];
+      }
+    }
+    cluster_size[cluster_i] += cluster_size[cluster_j];
+    cluster_size[cluster_j] = 0;
+  }
+  
+  if (preserve_sequence_order) {  /* PUT SEQUENCES WITHIN LPO BACK IN THEIR ORIGINAL ORDER: */
+    int *perm;
+    CALLOC (perm, nseq_tot, int);
 
-    LOOP (i,nseq) /* REINDEX ALL MEMBERS OF cluster_j TO JOIN cluster_i */
-      if (seq_cluster[i]==cluster_j)
-	seq_cluster[i]=cluster_i;
+    for (i=nseq_tot=0; i<nseq; i++) {
+      for (j=0; j<initial_nseq[i]; j++) {
+	perm[seq_id_in_cluster[i] + j] = (nseq_tot++);
+      }
+    }
+    for (i=0; i<nseq_tot; i++) printf ("%d ", perm[i]); printf ("\n");
+
+    reindex_lpo_source_seqs (new_seq, perm);
+    FREE (perm);
   }
+  
+  free_and_exit:
+  FREE (initial_nseq);
+  FREE (seq_cluster);
+  FREE (cluster_size);
+  FREE (seq_id_in_cluster);
+  FREE (score);
+  
+  return new_seq; /* RETURN THE FINAL MASTER CLUSTER... */
+}
 
- free_and_exit:
-  FREE(score);
-  FREE(seq_cluster);
-  return new_seq; /* RETURN THE FINAL MASTER CLUSTER */
+
+LPOSequence_T *buildup_pairwise_lpo(LPOSequence_T seq1[],LPOSequence_T seq2[],
+				    ResidueScoreMatrix_T *score_matrix,
+				    int use_aggressive_fusion,
+       				    LPOScore_T (*scoring_function)
+				    (int,int,LPOLetter_T [],LPOLetter_T [],
+				     ResidueScoreMatrix_T *),
+                                    int use_global_alignment)
+{
+  int min_counts1=0;
+  int min_counts2=0;
+  LPOLetterRef_T *al1=NULL,*al2=NULL;
+
+  lpo_index_symbols(seq1,score_matrix); /* MAKE SURE LPO IS TRANSLATED */  
+  lpo_index_symbols(seq2,score_matrix); /* MAKE SURE LPO IS TRANSLATED */
+  align_lpo_po (seq1, seq2, score_matrix, &al1, &al2,
+		scoring_function, use_global_alignment); /* ALIGN TWO POS */
+  if (use_aggressive_fusion) 
+     fuse_ring_identities(seq1->length,seq1->letter,
+			  seq2->length,seq2->letter,al1,al2);
+  fuse_lpo(seq1,seq2,al1,al2); /* BUILD COMPOSITE LPO */
+  
+  /* FREE LETTERS IN SECOND LPO */
+  free_lpo_letters(seq2->length,seq2->letter,TRUE);
+  seq2->letter=NULL; /*MARK AS FREED. DON'T LEAVE DANGLING POINTER*/
+  FREE(al1); /* DUMP TEMPORARY MAPPING ARRAYS */
+  FREE(al2);
+  return seq1; /* RETURN THE FINAL LPO */
 }
+

Modified: trunk/packages/poa/trunk/debian/changelog
===================================================================
--- trunk/packages/poa/trunk/debian/changelog	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/debian/changelog	2006-09-26 04:38:07 UTC (rev 129)
@@ -1,3 +1,10 @@
+poa (2.0-2) unstable; urgency=low
+
+  * New upstream version.
+  * Added a test rule.
+
+ -- Charles Plessy <charles-debian-nospam at plessy.org>  Tue, 26 Sep 2006 11:17:41 +0900
+
 poa (2.0-1) unstable; urgency=low
 
   * Initial release Closes: #378288

Modified: trunk/packages/poa/trunk/debian/rules
===================================================================
--- trunk/packages/poa/trunk/debian/rules	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/debian/rules	2006-09-26 04:38:07 UTC (rev 129)
@@ -30,6 +30,8 @@
 	-$(MAKE) clean
 	dh_clean debian/poa.1
 
+test:
+	./poa -read_fasta multidom.seq -clustal /dev/stdout -v blosum80.mat
 
 install: build
 	dh_testdir

Modified: trunk/packages/poa/trunk/lpo.c
===================================================================
--- trunk/packages/poa/trunk/lpo.c	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/lpo.c	2006-09-26 04:38:07 UTC (rev 129)
@@ -258,8 +258,53 @@
 }
 
 
+void reindex_lpo_source_seqs (LPOSequence_T *seq, int *perm)
+{
+  int i, j, len, nseq, *map, *invmap;
+  LPOSourceInfo_T tmp;
+  LPOLetterSource_T *src;
+  
+  len = seq->length;
+  nseq = seq->nsource_seq;
+  
+  CALLOC (map, len, int);
+  CALLOC (invmap, len, int);
+  
+  /* BUILD INITIAL MAP AND INVERSE MAP: */
+  for (i=0; i<nseq; i++) {
+    map[i] = invmap[i] = -1;
+  }
+  for (i=0; i<nseq; i++) {
+    map[i] = perm[i];
+    IF_GUARD (map[i]>=nseq || map[i]<0 || invmap[map[i]]!=-1, 1.1, (ERRTXT,"Bad argument! 'perm' must be a permutation of [0,%d]\n",nseq-1), CRASH);
+    invmap[map[i]] = i;
+  }
+  
+  /* RENUMBER SEQS IN 'source_info' ENTRIES FOR ALL LETTERS: */
+  for (i=0; i<len; i++) {
+    for (src=&(seq->letter[i].source); src!=NULL && src->ipos>=0; src=src->more) {
+      src->iseq = map[src->iseq];
+    }
+  }
 
+  /* SHUFFLE 'source_seq' ENTRIES, IN-PLACE, TO NEW ORDER: */
+  /* (THIS DESTROYS map AND invmap.) */
+  for (i=0; i<nseq; i++) if ((j = invmap[i]) != i) {
+    /* SWAP POSITIONS i AND j: */
+    tmp = seq->source_seq[i];
+    seq->source_seq[i] = seq->source_seq[j];
+    seq->source_seq[j] = tmp;
+    /* UPDATE map AND invmap: */
+    map[j] = map[i];
+    invmap[map[i]] = j;
+    map[i] = invmap[i] = i;
+  }
+  
+  FREE (map);
+  FREE (invmap);
+}
 
+
 void copy_lpo_letter(LPOLetter_T *new,LPOLetter_T *old,
 		     LPOLetterRef_T old_to_new[],
 		     int iseq_new[])

Modified: trunk/packages/poa/trunk/lpo.h
===================================================================
--- trunk/packages/poa/trunk/lpo.h	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/lpo.h	2006-09-26 04:38:07 UTC (rev 129)
@@ -13,6 +13,11 @@
 void initialize_seqs_as_lpo(int nseq, Sequence_T seq[],ResidueScoreMatrix_T *m);
 void lpo_index_symbols(LPOSequence_T *lpo,ResidueScoreMatrix_T *m);
 
+/** reindex source sequences in `seq' so that the sequence in position i
+    ends up in position perm[i].  `perm' must be a permutation of the
+    integers [0,seq->nsource_seq-1]. */
+void reindex_lpo_source_seqs (LPOSequence_T *seq, int *perm);
+
 int save_lpo_source(LPOSequence_T *seq,
 		     char name[],
 		     char title[],
@@ -90,15 +95,25 @@
 				  ResidueScoreMatrix_T *score_matrix,
 				   int use_global_alignment);
 
-LPOSequence_T *buildup_progressive_lpo(int nseq,LPOSequence_T seq[],
+LPOSequence_T *buildup_progressive_lpo(int nseq, LPOSequence_T **seqs,
 				       ResidueScoreMatrix_T *score_matrix,
 				       int use_aggressive_fusion,
-				       char score_file[],
+                                       int do_progressive,
+				       char score_file[], 
 				       LPOScore_T (*scoring_function)
 				       (int,int,LPOLetter_T [],LPOLetter_T [],
 					ResidueScoreMatrix_T *),
-				       int use_global_alignment);
+                                       int use_global_alignment,
+				       int preserve_sequence_order);
 				       
+LPOSequence_T *buildup_pairwise_lpo(LPOSequence_T seq1[],LPOSequence_T seq2[],
+				    ResidueScoreMatrix_T *score_matrix,
+				    int use_aggressive_fusion,
+				    LPOScore_T (*scoring_function)
+				    (int,int,LPOLetter_T [],LPOLetter_T [],
+				     ResidueScoreMatrix_T *),
+                                    int use_global_alignment);
+				    
 /**************************************************** lpo_format.c */
 void write_lpo(FILE *ifile,LPOSequence_T *seq,
 	       ResidueScoreMatrix_T *score_matrix);

Modified: trunk/packages/poa/trunk/lpo_format.c
===================================================================
--- trunk/packages/poa/trunk/lpo_format.c	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/lpo_format.c	2006-09-26 04:38:07 UTC (rev 129)
@@ -54,13 +54,15 @@
   int i,j,length,nsource_seq,istart,field_id,*pos_count=NULL,value;
   int weight,bundle_id,last_alloc=0;
   LPOSequence_T *seq=NULL;
-  char c,name[1024],title[4096],version[256];
+  char c,name[1024]="",title[4096]="",version[256]="";
   LPOLetterSource_T save_source={0,0,NULL};
 
   CALLOC(seq,1,LPOSequence_T);
   fscanf(ifile,"VERSION=%s",version);
-  if (fscanf(ifile," NAME=%[^\n] TITLE=%[^\n] LENGTH=%d SOURCECOUNT=%d",
-	     name,title,&length,&nsource_seq)!=4)
+  fscanf(ifile," NAME=%[^\n]",name);
+  fscanf(ifile," TITLE=%[^\n]",title);
+  if (fscanf(ifile," LENGTH=%d SOURCECOUNT=%d",
+	     &length,&nsource_seq)!=2)
     return NULL;
   STRNCPY(seq->name,name,SEQUENCE_NAME_MAX);
   seq->title=strdup(title);
@@ -146,7 +148,7 @@
   int nlink,*link_list=NULL,*match_pos=NULL,*ring_old=NULL;
   int *pos_compact=NULL,npos_compact=0,keep_this_letter,retention_mode;
   LPOSequence_T *seq=NULL;
-  char c,name[1024],title[4096],version[256];
+  char c,name[1024]="",title[4096]="",version[256]="";
   LPOLetterSource_T save_source={0,0,NULL},*source=NULL;
 
   if (remove_listed_sequences)
@@ -156,8 +158,10 @@
 
   CALLOC(seq,1,LPOSequence_T);
   fscanf(ifile,"VERSION=%s",version);
-  if (fscanf(ifile," NAME=%[^\n] TITLE=%[^\n] LENGTH=%d SOURCECOUNT=%d",
-	     name,title,&length,&nsource_seq)!=4)
+  fscanf(ifile," NAME=%[^\n]",name);
+  fscanf(ifile," TITLE=%[^\n]",title);
+  if (fscanf(ifile," LENGTH=%d SOURCECOUNT=%d",
+	     &length,&nsource_seq)!=2)
     return NULL;
   STRNCPY(seq->name,name,SEQUENCE_NAME_MAX);
   seq->title=strdup(title);
@@ -174,9 +178,13 @@
   }
 
   LOOPF(i,nsource_seq) { /* SAVE SOURCE INFO LIST */
-    if (fscanf(ifile," SOURCENAME=%[^\n] SOURCEINFO=%d %d %d %d %[^\n]",
-	       name,&length,&istart,&weight,&bundle_id,title)!=6)
+    if (fscanf(ifile," SOURCENAME=%[^\n] SOURCEINFO=%d %d %d %d",
+	       name,&length,&istart,&weight,&bundle_id)!=5)
       return NULL;
+    /* SKIP WHITESPACE BEFORE TITLE; ALLOW EMPTY TITLE. */
+    fscanf(ifile,"%*[ \t]");
+    if (fscanf(ifile,"%[^\n]",title)!=1)
+      title[0] = '\0';
     STRNCPY(seq->source_seq[i].name,name,SEQUENCE_NAME_MAX);
     seq->source_seq[i].length=length;
     seq->source_seq[i].istart=istart;
@@ -376,10 +384,6 @@
   }
   if (p_seq_pos)
     *p_seq_pos = seq_pos;
-  if (p_seq_pos)
-    *p_seq_pos = seq_pos;
-  if (p_seq_pos)
-    *p_seq_pos = seq_pos;
   return nring;
 }
 

Modified: trunk/packages/poa/trunk/main.c
===================================================================
--- trunk/packages/poa/trunk/main.c	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/main.c	2006-09-26 04:38:07 UTC (rev 129)
@@ -1,76 +1,80 @@
 
 
-#include <lpo.h>
+#include "lpo.h"
+#include "msa_format.h"
+#include "align_score.h"
 
-extern LPOScore_T caties_scoring_function(int i,
-				   int j,
-				   LPOLetter_T seq_x[],
-				   LPOLetter_T seq_y[],
-				   ResidueScoreMatrix_T *m)
-     ;  /* INCLUDE THIS HERE SO WE CAN PASS IT TO buildup_progressive_lpo() */
 
+static LPOSequence_T *read_partial_order_file (char *po_filename, char *subset_filename, int remove_listed_seqs, int keep_all_links, int do_switch_case, ResidueScoreMatrix_T *mat);
 
-
 int main(int argc,char *argv[])
 {
-  int i,ibundle=ALL_BUNDLES,nframe_seq=0,use_reverse_complement=0;
+  int i,j,ibundle=ALL_BUNDLES,nframe_seq=0,use_reverse_complement=0;
   int nseq=0,do_switch_case=dont_switch_case,do_analyze_bundles=0;
-  char score_file[256],seq_file[256],*comment=NULL,*al_name="test align";
+  int nseq_in_list=0,n_input_seqs=0,max_input_seqs=0;
+  char score_file[256],seq_file[256],po_list_entry_filename[256],*comment=NULL,*al_name="test align";
   ResidueScoreMatrix_T score_matrix; /* DEFAULT GAP PENALTIES*/
-  Sequence_T *seq=NULL,*lpo_out=NULL,*frame_seq=NULL,*dna_lpo=NULL,
-  *lpo_in=NULL;
-  FILE *seq_ifile=NULL,*errfile=stderr,*logfile=NULL,*lpo_file_out=NULL,
-    *subset_ifile=NULL;
+  LPOSequence_T *seq=NULL,*lpo_out=NULL,*frame_seq=NULL,*dna_lpo=NULL,*lpo_in=NULL;
+  LPOSequence_T **input_seqs=NULL;
+  FILE *errfile=stderr,*logfile=NULL,*lpo_file_out=NULL,*po_list_file=NULL,*seq_ifile=NULL;
   char *print_matrix_letters=NULL,*fasta_out=NULL,*po_out=NULL,*matrix_filename=NULL,
-  *seq_filename=NULL,*frame_dna_filename=NULL,*po_filename=NULL,
-  *hbmin=NULL,*numeric_data=NULL,*numeric_data_name="Nmiscall",*dna_to_aa=NULL,
-    *pair_score_file=NULL,*aafreq_file=NULL,*termval_file=NULL,
-    *bold_seq_name=NULL,*subset_file=NULL,*rm_subset_file=NULL;
+    *seq_filename=NULL,*frame_dna_filename=NULL,*po_filename=NULL,*po2_filename=NULL,
+    *po_list_filename=NULL, *hbmin=NULL,*numeric_data=NULL,*numeric_data_name="Nmiscall",
+    *dna_to_aa=NULL,*pair_score_file=NULL,*aafreq_file=NULL,*termval_file=NULL,
+    *bold_seq_name=NULL,*subset_file=NULL,*subset2_file=NULL,*rm_subset_file=NULL,
+    *rm_subset2_file=NULL;
   float bundling_threshold=0.9;
   int exit_code=0,count_sequence_errors=0,please_print_snps=0,
     report_consensus_seqs=0,report_major_allele=0,use_aggressive_fusion=0;
   int show_allele_evidence=0,please_collapse_lines=0,keep_all_links=0;
-  int remove_listed_seqs=0,please_report_similarity;
+  int remove_listed_seqs=0,remove_listed_seqs2=0,please_report_similarity;
+  int do_global=0, do_progressive=0, do_preserve_sequence_order=0;
   char *reference_seq_name="CONSENS%d",*clustal_out=NULL;
-  int use_global_alignment=0;
-  
 
   black_flag_init(argv[0],PROGRAM_VERSION);
 
   if (argc<2) {
-    fprintf(stderr,"\nUsage: %s [OPTIONS] matrixfile\n\n"
-"  OPTIONS\t\t\tFUNCTION\n"
-"  -------\t\t\t________\n"
+    fprintf(stderr,"\nUsage: %s [OPTIONS] MATRIXFILE\n"
+"Align a set of sequences or alignments using the scores in MATRIXFILE.\n"
+"Example: %s -read_fasta multidom.seq -clustal m.aln blosum80.mat\n\n"
 "INPUT:\n"
-"  -read_fasta FILENAME\t\tReads in FASTA sequence file.\n"
-"  -tolower\t\t\tForces FASTA sequences to lowercase\n"
-"\t\t\t\t(nucleotides in our matrix files)\n"
-"  -toupper\t\t\tForces FASTA sequences to UPPERCASE\n"
-"\t\t\t\t(amino acids in our matrix files)\n"
-"  -read_po FILENAME\t\tReads in PO file.\n"
-"  -subset FILENAME\t\tFilters PO-MSA to include list of seqs in file.\n"
-"  -remove FILENAME\t\tFilters PO-MSA to exclude list of seqs in file.\n"
-"\n"
+"  -read_fasta FILE       Read in FASTA sequence file.\n"
+"  -read_msa FILE         Read in MSA alignment file.\n"
+"  -read_msa2 FILE        Read in second MSA file. \n" 
+"  -subset FILE           Filter MSA to include list of seqs in file.\n"
+"  -subset2 FILE          Filter second MSA to include list of seqs in file.\n"
+"  -remove FILE           Filter MSA to exclude list of seqs in file.\n"
+"  -remove2 FILE          Filter second MSA to exclude list of seqs in file.\n"
+"  -read_msa_list FILE    Read an MSA from each filename listed in file.\n"
+"  -tolower               Force FASTA/MSA sequences to lowercase\n"
+"                           (nucleotides in our matrix files)\n"
+"  -toupper               Force FASTA/MSA sequences to UPPERCASE\n"
+"                           (amino acids in our matrix files)\n"
 "\nALIGNMENT:\n"
-"  -pair_score FILENAME\t\tReads tab-delimited file of sequence-sequence\n"
-"\t\t\t\tsimilarity scores for constructing a guide-tree\n"
-"\t\t\t\tand performing progressive alignment using\n"
-"\t\t\t\tPO-PO alignment steps.\n"
-"  -fuse_all\t\t\tFuses identical letters on align rings.\n"
+"  -do_global             Do global alignment.\n"
+"  -do_progressive        Perform progressive alignment using a guide tree\n"
+"                           built by neighbor joining from a set of\n"
+"                           sequence-sequence similarity scores.\n"
+"  -read_pairscores FILE  Read tab-delimited file of similarity scores.\n"
+"                           (If not provided, scores are constructed\n"
+"                           using pairwise sequence alignment.)\n"
+"  -fuse_all              Fuse identical letters on align rings.\n"
 "\nANALYSIS:\n"
-"  -hb\t\t\t\tPerforms heaviest bundling to generate consensi.\n"
-"  -hbmin VALUE\t\t\tBundles into heaviest bundle seqs with percent id >= value.\n"
+"  -hb                    Perform heaviest bundling to generate consensi.\n"
+"  -hbmin VALUE           Include in heaviest bundle sequences with\n"
+"                           percent ID (as a fraction) >= value.\n"
 "\nOUTPUT:\n"
-"  -best\t\t\t\tRestricts MSA output to heaviest bundles.\n"
-"  -pir FILENAME\t\t\tWrites out MSA in PIR format.\n"
-"  -clustal FILENAME\t\tWrites out MSA in CLUSTAL format.\n"
-"  -po FILENAME\t\t\tWrites out MSA in PO format.\n"
-"  -printmatrix LETTERSET\tPrints score matrix to stdout.\n"
-"  -v\t\t\t\tRuns in verbose mode.\n\n"
-"  Note:  Either the -read_fasta or -read_po argument must be used with poa,\n" 
-"         since a FASTA file or PO file must be read in by poa.\n\n"
-"For more information, see http://www.bioinformatics.ucla.edu/poa\n\n"
-	    ,argv[0]);
+"  -pir FILE              Write out MSA in PIR format.\n"
+"  -clustal FILE          Write out MSA in CLUSTAL format.\n"
+"  -po FILE               Write out MSA in PO format.\n"
+"  -preserve_seqorder     Write out MSA with sequences in their input order.\n"
+"  -printmatrix LETTERS   Print score matrix to stdout.\n"
+"  -best                  Restrict MSA output to heaviest bundles (PIR only).\n"
+"  -v                     Run in verbose mode (e.g. output gap penalties).\n\n"
+"  NOTE:  One of the -read_fasta, -read_msa, or -read_msa_list arguments\n" 
+"         must be used, since a sequence or alignment file is required.\n\n"
+"For more information, see http://www.bioinformatics.ucla.edu/poa.\n\n"
+	    ,argv[0],argv[0]);
     exit(-1);
   }
 
@@ -81,160 +85,237 @@
     ARGMATCH_VAL("-best",ibundle,0); /*RESTRICT FASTA OUTPUT TO HB */
     ARGMATCH_VAL("-hb",do_analyze_bundles,1);/*CALCULATE HEAVIEST BUNDLING*/
     ARGGET("-printmatrix",print_matrix_letters);
-    ARGGET("-read_po",po_filename); /* READ A PO FILE FOR ALIGNMENT/ANALYSIS*/
+    ARGGET("-read_msa",po_filename); /* READ A MSA FILE FOR ALIGNMENT/ANALYSIS*/
+    ARGGET("-read_msa2",po2_filename); /* READ A SECOND MSA FILE FOR ALIGNMENT/ANALYSIS*/  
+    ARGGET("-read_msa_list",po_list_filename); /* READ A LIST OF MSAs FOR ALIGNMENT/ANALYSIS */
     ARGGET("-pir",fasta_out); /* SAVE FASTA-PIR FORMAT ALIGNMENT FILE */
     ARGGET("-clustal",clustal_out); /* SAVE CLUSTAL FORMAT ALIGNMENT FILE */
     ARGGET("-po",po_out); /* SAVE PO FORMAT ALIGNMENT FILE */
+    ARGMATCH("-preserve_seqorder",do_preserve_sequence_order);  /* DO PRESERVE SEQUENCE ORDER */
     ARGGET("-hbmin",hbmin); /* SET THRESHOLD FOR BUNDLING */
     ARGMATCH("-fuse_all",use_aggressive_fusion);
-    ARGMATCH("-global",use_global_alignment);
-    ARGGET("-pair_score",pair_score_file); /* FILENAME TO READ PAIR SCORES*/
+    ARGMATCH("-do_global",do_global); /* DO GLOBAL */
+    ARGGET("-read_pairscores",pair_score_file); /* FILENAME TO READ PAIR SCORES*/
+    ARGMATCH("-do_progressive", do_progressive); /* DO PROGRESSIVE ALIGNMENT */
     ARGGET("-subset",subset_file); /* FILENAME TO READ SEQ SUBSET LIST*/
+    ARGGET("-subset2",subset2_file); /* FILENAME TO READ SEQ SUBSET LIST*/
     ARGGET("-remove",rm_subset_file); /* FILENAME TO READ SEQ REMOVAL LIST*/
+    ARGGET("-remove2",rm_subset2_file); /* FILENAME TO READ SEQ REMOVAL LIST*/
     ARGGET("-read_fasta",seq_filename); /* READ FASTA FILE FOR ALIGNMENT */
-    NEXTARG(matrix_filename); /* NON-FLAG ARG SHOULD BE MATRIX FILE */
-   
+    NEXTARG(matrix_filename); /* NON-FLAG ARG SHOULD BE MATRIX FILE */   
   }
 
-  if (rm_subset_file) { /* TREAT SUBSET FILE AS LIST OF SEQS TO REMOVE */
-    subset_file=rm_subset_file;
-    remove_listed_seqs=1;
+  
+  /** CHECK FOR CONFLICTING FLAGS **/
+  
+  if (po_list_filename && (po_filename || po2_filename)) {
+    WARN_MSG(USERR,(ERRTXT, "Error: The -read_po_list and -read_po flags cannot be used at the same time.\nExiting."), "$Revision: 1.2.2.9 $");
+    exit_code = 1;
+    goto free_memory_and_exit;
   }
-
+  
+  if (((subset_file || rm_subset_file) && !po_filename) || ((subset2_file || rm_subset2_file) && !po2_filename)) {
+    WARN_MSG(USERR,(ERRTXT, "Error: Each -subset/-remove flag must have a corresponding -read_po flag.\nExiting."),"$Revision: 1.2.2.9 $");
+    exit_code = 1;
+    goto free_memory_and_exit;
+  }
+  
+  if ((subset_file && rm_subset_file) || (subset2_file && rm_subset2_file)) {
+    WARN_MSG(USERR,(ERRTXT, "Error: The -subset and -remove flags cannot be used at the same time.\nExiting."),"$Revision: 1.2.2.9 $");
+    exit_code = 1;
+    goto free_memory_and_exit;
+  }
+  
+  if (rm_subset_file) {
+    subset_file = rm_subset_file;
+    remove_listed_seqs = 1;
+  }
+  if (rm_subset2_file) {
+    subset2_file = rm_subset2_file;
+    remove_listed_seqs2 = 1;
+  }
+  
   if (hbmin)
-    bundling_threshold=atof(hbmin);
+    bundling_threshold=atof(hbmin);  
 
   if (!matrix_filename ||
       read_score_matrix(matrix_filename,&score_matrix)<=0){/* READ MATRIX */
-    WARN_MSG(USERR,(ERRTXT,"Error reading matrix file %s.  Exiting",
-		    matrix_filename? matrix_filename: "(null)"),"$Revision: 1.2 $");
+    WARN_MSG(USERR,(ERRTXT,"Error reading matrix file %s.\nExiting",
+		    matrix_filename ? matrix_filename: "because none specified"),"$Revision: 1.2.2.9 $");
     exit_code=1; /* SIGNAL ERROR CONDITION */
     goto free_memory_and_exit;
   }
-
+  
   if (logfile) {
-    fprintf(logfile, "X-Gap Penalties: %d %d %d\n",
+    fprintf(logfile,"X-Gap Penalties (Open, Aff1, Aff2; LTrunc, LDecay): %d %d %d %d %d\n",
 	    score_matrix.gap_penalty_set[0][0],
 	    score_matrix.gap_penalty_set[0][1],
-	    score_matrix.gap_penalty_set[0][2]);
-    fprintf(logfile, "Y-Gap Penalties: %d %d %d\n",
+	    score_matrix.gap_penalty_set[0][2],
+	    score_matrix.trunc_gap_length,
+	    score_matrix.decay_gap_length);
+    fprintf(logfile,"X-Gap Penalties (0, 1, 2, ...): ");
+    for (i=0; i<=score_matrix.max_gap_length; i++) {
+      fprintf (logfile, "%d ", score_matrix.gap_penalty_x[i]);
+    }
+    fprintf(logfile,"... \n");
+    fprintf(logfile,"Y-Gap Penalties (Open, Aff1, Aff2; LTrunc, LDecay): %d %d %d %d %d\n",
 	    score_matrix.gap_penalty_set[1][0],
 	    score_matrix.gap_penalty_set[1][1],
-	    score_matrix.gap_penalty_set[1][2]);
+	    score_matrix.gap_penalty_set[1][2],
+	    score_matrix.trunc_gap_length,
+	    score_matrix.decay_gap_length);
+    fprintf(logfile,"Y-Gap Penalties (0, 1, 2, ...): ");
+    for (i=0; i<=score_matrix.max_gap_length; i++) {
+      fprintf (logfile, "%d ", score_matrix.gap_penalty_y[i]);
+    }
+    fprintf(logfile,"... \n");
   }
   
   if (print_matrix_letters) /* USER WANTS US TO PRINT A MATRIX */
     print_score_matrix(stdout,&score_matrix,print_matrix_letters
 		       /*"ARNDCQEGHILKMFPSTWYV"*/);
-
-  if (po_filename) { /* READ PARTIAL ORDER FILE */
-    if (seq_ifile=fopen(po_filename,"r")) {
-      if (subset_file) { /* LIST OF SEQS TO FILTER THE PO WITH */
-	subset_ifile=fopen(subset_file,"r");
-	if (!subset_ifile) {
-	  WARN_MSG(USERR,(ERRTXT,"Error reading subset file %s.  Exiting",
-			  subset_file),"$Revision: 1.2 $");
-	  exit_code=1; /* SIGNAL ERROR CONDITION */
-	  goto free_memory_and_exit;
-	}
-	lpo_in=read_lpo_select(seq_ifile,subset_ifile,keep_all_links,
-			       remove_listed_seqs);
-	fclose(subset_ifile);
-      }
-      else
-	lpo_in=read_lpo(seq_ifile); /* READ LPO NORMALLY W/O FILTERING */
-      fclose(seq_ifile);
+  
+  
+  /** READ INPUT FILES **/
+  
+  n_input_seqs = 0;
+  max_input_seqs = 10;
+  CALLOC (input_seqs, max_input_seqs, LPOSequence_T *);
+  
+  if (po_filename) {
+    lpo_in = read_partial_order_file (po_filename, subset_file, remove_listed_seqs, keep_all_links, do_switch_case, &score_matrix);
+    if (lpo_in == NULL) {
+      exit_code = 1;
+      goto free_memory_and_exit;
     }
-    if (!lpo_in) {
-      WARN_MSG(USERR,(ERRTXT,"Error reading PO file %s!!!\nExiting.",
-		      po_filename),"$Revision: 1.2 $");
-      exit_code=1; /* SIGNAL ERROR CONDITION */
+    fprintf(errfile,"...Read %d sequences from MSA file %s...\n",lpo_in->nsource_seq,po_filename);
+    input_seqs[n_input_seqs++] = lpo_in;
+    lpo_in = NULL;
+  }
+  
+  if (po2_filename) {
+    lpo_in = read_partial_order_file (po2_filename, subset2_file, remove_listed_seqs2, keep_all_links, do_switch_case, &score_matrix);
+    if (lpo_in == NULL) {
+      exit_code = 1;
       goto free_memory_and_exit;
     }
+    fprintf(errfile,"...Read %d sequences from second MSA file %s...\n",lpo_in->nsource_seq,po2_filename);
+    input_seqs[n_input_seqs++] = lpo_in;
+    lpo_in = NULL;
   }
-
-  if (seq_filename) { /* READ SEQUENCES TO ALIGN */
-    seq_ifile=fopen(seq_filename,"r"); /* READ SEQUENCE DATABASE */
-    if (seq_ifile) {
-      nseq=read_fasta(seq_ifile,&seq,do_switch_case,&comment);
-      fclose(seq_ifile);
+  
+  if (po_list_filename) {
+    po_list_file = fopen (po_list_filename, "r");
+    while (po_list_file && fscanf (po_list_file, " %s", po_list_entry_filename) == 1) {
+      lpo_in = read_partial_order_file (po_list_entry_filename, NULL, 0, 0, do_switch_case, &score_matrix);
+      if (lpo_in == NULL) {
+	exit_code = 1;
+	goto free_memory_and_exit;
+      }
+      fprintf(errfile,"...Read %d sequences from PO list entry %s...\n",lpo_in->nsource_seq,po_list_entry_filename);
+      nseq_in_list += lpo_in->nsource_seq;
+      input_seqs[n_input_seqs++] = lpo_in;
+      lpo_in = NULL;
+      if (n_input_seqs == max_input_seqs) {
+	max_input_seqs *= 2;
+	REALLOC (input_seqs, max_input_seqs, LPOSequence_T *);
+      }
     }
+    if (nseq_in_list==0) {
+      WARN_MSG(USERR,(ERRTXT,"Error reading PO list file %s.\nExiting",
+		      po_list_file),"$Revision: 1.2.2.9 $");
+      exit_code=1; /* SIGNAL ERROR CONDITION */
+      goto free_memory_and_exit;      
+    }
   }
   
-  if (nseq>0) { /* INITIALIZE THE SEQUENCES AND RUN THE ALIGNMENT */
-    fprintf(errfile,"...Read %d sequences from %s...\n",nseq,seq_filename);
-    /*initialize_seqs_as_lpo(nseq,seq,&score_matrix); */
-    if (lpo_in) { /* ADD TO OUR EXISTING ALIGNMENT */
-      lpo_out=buildup_lpo(lpo_in,nseq,seq,&score_matrix,
-			  use_aggressive_fusion,
-			  use_global_alignment);/*BUILD ALIGNMENT*/
-      lpo_in=NULL; /* DATA STRUCTURE NOW POINTED TO BY lpo_out, 
-		      SO DON'T FREE IT TWICE!! */
+  if (seq_filename) {
+    seq_ifile = fopen (seq_filename, "r");
+    if (seq_ifile == NULL) {
+      WARN_MSG(USERR,(ERRTXT,"Couldn't open sequence file %s.\nExiting",
+		      seq_filename),"$Revision: 1.2.2.9 $");
+      exit_code=1; /* SIGNAL ERROR CONDITION */
+      goto free_memory_and_exit;
     }
-    else if (pair_score_file) {
-      lpo_out=buildup_progressive_lpo(nseq,seq,&score_matrix,
-				      use_aggressive_fusion,
-				      pair_score_file,
-				      caties_scoring_function,
-				      use_global_alignment);
+    nseq = read_fasta (seq_ifile, &seq, do_switch_case, &comment);
+    fclose (seq_ifile);
+    if (nseq == 0) {
+      WARN_MSG(USERR,(ERRTXT,"Error reading sequence file %s.\nExiting",
+		      seq_filename),"$Revision: 1.2.2.9 $");
+      exit_code=1; /* SIGNAL ERROR CONDITION */
+      goto free_memory_and_exit;
     }
-    else /* OTHERWISE JUST BUILDUP ON TOP OF FIRST READ SEQUENCE */
-      lpo_out=buildup_lpo(seq,nseq-1,seq+1,&score_matrix,
-			  use_aggressive_fusion,
-			  use_global_alignment);
+    fprintf(errfile,"...Read %d sequences from sequence file %s...\n",nseq,seq_filename);
+    for (i=0; i<nseq; i++) {
+      input_seqs[n_input_seqs++] = &(seq[i]);
+      initialize_seqs_as_lpo(1,&(seq[i]),&score_matrix);
+      if (n_input_seqs == max_input_seqs) {
+	max_input_seqs *= 2;
+	REALLOC (input_seqs, max_input_seqs, LPOSequence_T *);
+      }
+    }
   }
-  else if (lpo_in) /* FILTERED LPO... CAN PRINT OUT AS LPO */
-    lpo_out=lpo_in;
-  else { /* HMM... NO DATA TO WORK WITH AT ALL.  MUST BE AN ERROR. COMPLAIN!*/
-    WARN_MSG(USERR,(ERRTXT,"Error reading sequence file %s and PO file %s. Exiting.", 
-            seq_filename? seq_filename: "because none specified",
-            po_filename? po_filename:  "because none specified"),"$Revision: 1.2 $");
+
+
+  /** BUILD AND ANALYZE OUTPUT PO-MSA **/
+
+  if (n_input_seqs == 0) { /* HMM.. NO DATA. */
+    WARN_MSG(USERR,(ERRTXT,"No input sequences were provided; use one of the -read_ flags.\nExiting."), "$Revision: 1.2.2.9 $");
     exit_code=1; /* SIGNAL ERROR CONDITION */
     goto free_memory_and_exit;
   }
+  else {
+    lpo_out = buildup_progressive_lpo (n_input_seqs, input_seqs, &score_matrix,
+				       use_aggressive_fusion, do_progressive, pair_score_file,
+				       matrix_scoring_function, do_global, do_preserve_sequence_order);
+  }
   
   if (comment) { /* SAVE THE COMMENT LINE AS TITLE OF OUR LPO */
     FREE(lpo_out->title);
     lpo_out->title=strdup(comment);
   }
-
-  if (do_analyze_bundles) /* DIVIDE INTO BUNDLES W/ CONSENSUS */
+  
+  /* DIVIDE INTO BUNDLES W/ CONSENSUS USING PERCENT ID */
+  if (do_analyze_bundles)
     generate_lpo_bundles(lpo_out,bundling_threshold);
 
   if (po_out) { /* WRITE FINAL PARTIAL ORDER ALIGNMENT TO OUTPUT */
     if (lpo_file_out=fopen(po_out, "w")) {
        write_lpo(lpo_file_out,lpo_out,&score_matrix);
        fclose(lpo_file_out);
+       fprintf(errfile,"...Wrote %d sequences to PO file %s...\n",lpo_out->nsource_seq,po_out);
     }
     else {
       WARN_MSG(USERR,(ERRTXT,"*** Could not save PO file %s.  Exiting.",
-	      po_out),"$Revision: 1.2 $");
+		      po_out),"$Revision: 1.2.2.9 $");
       exit_code=1; /* SIGNAL ERROR CONDITION */ 
     }
   }
 
-  if (fasta_out) { /* WRITE FINAL ALIGNMENT IN FASTA FORMAT */
-    if (seq_ifile=fopen(fasta_out,"w")) { /* FASTA ALIGNMENT*/
+  if (fasta_out) { /* WRITE FINAL ALIGNMENT IN FASTA-PIR FORMAT */
+    if (seq_ifile=fopen(fasta_out,"w")) { /* FASTA-PIR ALIGNMENT*/
       write_lpo_bundle_as_fasta(seq_ifile,lpo_out,score_matrix.nsymbol,
 				score_matrix.symbol,ibundle);
       fclose(seq_ifile);
+      fprintf(errfile,"...Wrote %d sequences to FASTA-PIR file %s...\n",lpo_out->nsource_seq,fasta_out);
     }
     else {
-      WARN_MSG(USERR,(ERRTXT,"*** Could not save FASTA file %s.  Exiting.",
-	      fasta_out),"$Revision: 1.2 $");
+      WARN_MSG(USERR,(ERRTXT,"*** Could not save FASTA-PIR file %s.  Exiting.",
+		      fasta_out),"$Revision: 1.2.2.9 $");
       exit_code=1; /* SIGNAL ERROR CONDITION */ 
    }
   }
 
-  if (clustal_out) { /* WRITE FINAL ALIGNMENT IN FASTA FORMAT */
-    if (seq_ifile=fopen(clustal_out,"w")) { /* FASTA ALIGNMENT*/
+  if (clustal_out) { /* WRITE FINAL ALIGNMENT IN CLUSTAL FORMAT */
+    if (seq_ifile=fopen(clustal_out,"w")) { /* CLUSTAL ALIGNMENT*/
       export_clustal_seqal(seq_ifile,lpo_out,score_matrix.nsymbol,
 			   score_matrix.symbol);
       fclose(seq_ifile);
+      fprintf(errfile,"...Wrote %d sequences to CLUSTAL file %s...\n",lpo_out->nsource_seq,clustal_out);
     }
     else {
       WARN_MSG(USERR,(ERRTXT,"*** Could not save CLUSTAL file %s.  Exiting.",
-	      fasta_out),"$Revision: 1.2 $");
+		      clustal_out),"$Revision: 1.2.2.9 $");
       exit_code=1; /* SIGNAL ERROR CONDITION */ 
    }
   }
@@ -242,14 +323,64 @@
 
 
  free_memory_and_exit: /* FREE ALL DYNAMICALLY ALLOCATED DATA!!!! */
+  
   if (dna_lpo)
     free_lpo_sequence(dna_lpo,TRUE);
-  if (lpo_in)
-    free_lpo_sequence(lpo_in,TRUE);
-  if (lpo_out != seq && lpo_out != lpo_in)
-    free_lpo_sequence(lpo_out,TRUE);
-  LOOPB (i,nseq) /* seq[] WAS ALLOCATED AS ONE ARRAY, SO FREE seq[0] LAST */
-    free_lpo_sequence(seq+i,i==0); /* FREE BLOCK AFTER ALL HOLDERS EMPTY*/
-  exit(exit_code);
+  
+  for (i=0;i<n_input_seqs;i++) {
+    for (j=0;j<nseq;j++) {
+      if (input_seqs[i]==&(seq[j]))
+	break;
+    }
+    free_lpo_sequence(input_seqs[i],(j==nseq));
+  }
+  FREE (input_seqs);
+  if (nseq>0) FREE (seq);
+  
+  exit (exit_code);
 }
 
+
+
+static LPOSequence_T *read_partial_order_file (char *po_filename, char *subset_filename, int remove_listed_seqs, int keep_all_links, int do_switch_case, ResidueScoreMatrix_T *mat)
+{
+  LPOSequence_T *lpo_in;
+  FILE *po_file=NULL, *subset_file=NULL;
+  
+  if (!po_filename)
+    return NULL;
+  
+  po_file = fopen (po_filename, "r");
+  if (!po_file) {
+    WARN_MSG (USERR, (ERRTXT,"Couldn't open MSA file %s.\nExiting.",po_filename), "$Revision: 1.2.2.9 $");
+    return NULL;
+  }
+  
+  if (subset_filename) {
+    subset_file = fopen (subset_filename, "r");
+    if (!subset_file) {
+      WARN_MSG (USERR, (ERRTXT,"Couldn't open subset file %s.\nExiting.",subset_filename), "$Revision: 1.2.2.9 $");
+      return NULL;
+    }
+  }
+  
+  if (subset_file) {
+    lpo_in = read_msa_select (po_file, UNKNOWN_MSA, subset_file, keep_all_links, remove_listed_seqs, do_switch_case, mat);
+    fclose (subset_file);
+    fclose (po_file);
+    if (lpo_in==NULL || lpo_in->nsource_seq == 0) {
+      WARN_MSG (USERR, (ERRTXT,"MSA file %s, filtered with subset file %s, couldn't be read or contains no sequences.\nExiting.", po_filename, subset_filename), "$Revision: 1.2.2.9 $");
+      return NULL;
+    }
+  }
+  else {
+    lpo_in = read_msa (po_file, UNKNOWN_MSA, do_switch_case, mat);
+    fclose (po_file);
+    if (lpo_in==NULL || lpo_in->nsource_seq == 0) {
+      WARN_MSG (USERR, (ERRTXT,"MSA file %s couldn't be read or contains no sequences.\nExiting.", po_filename), "$Revision: 1.2.2.9 $");
+      return NULL;
+    }
+  }
+
+  return lpo_in;
+}

Copied: trunk/packages/poa/trunk/make_pscores.pl (from rev 128, trunk/packages/poa/branches/upstream/current/make_pscores.pl)
===================================================================
--- trunk/packages/poa/trunk/make_pscores.pl	                        (rev 0)
+++ trunk/packages/poa/trunk/make_pscores.pl	2006-09-26 04:38:07 UTC (rev 129)
@@ -0,0 +1,48 @@
+#!/usr/bin/perl
+#
+# Usage: make_pscores.pl SEQFILE SCOREFILE
+#
+# Runs BLAST and writes output to a file "SEQFILE.out".
+# This file is parsed into lines of the form "seqname1 \t seqname2 \t bitscore"
+# (suitable for input to POA), which are written to SCOREFILE.
+#
+# NB: The bitscore increases with increasing sequence similarity.
+#
+
+$seq_file = $ARGV[0];
+$pscore_file = $ARGV[1];
+
+open(PSCORE_OUT, ">$pscore_file");
+system("./formatdb -i $seq_file -p T");
+system("./blastall -p blastp -d $seq_file -i $seq_file -M BLOSUM80 -o $seq_file.out");
+
+open(BLAST_OUT, "<$seq_file.out");
+while(<BLAST_OUT>){
+     @my_parse = split(/\s/, $_);
+     if ($my_parse[0] =~ /^Query=/){
+	 $seq_name1 = $my_parse[1];
+	 while(<BLAST_OUT>){         
+	    if ($_ =~ />/){
+		last;
+            }
+            if ($_ =~ /bits/){
+		while(<BLAST_OUT>){
+		    if ($_ =~ />/){
+			last;
+		    }
+                    @other_parse = split(/\s+/, $_);
+                    $seq_name2 = $other_parse[0]; 
+	            $bit_score = $other_parse[1];
+                    if ($seq_name2 ne ""){
+                       printf PSCORE_OUT "$seq_name1\t$seq_name2\t$bit_score.0\n";
+		   }
+                }
+               if ($_ =~ />/){
+		last;
+               } 
+            }
+	 }
+     }
+}
+close(BLAST_OUT);
+close(PSCORE_OUT);

Copied: trunk/packages/poa/trunk/msa_format.c (from rev 128, trunk/packages/poa/branches/upstream/current/msa_format.c)
===================================================================
--- trunk/packages/poa/trunk/msa_format.c	                        (rev 0)
+++ trunk/packages/poa/trunk/msa_format.c	2006-09-26 04:38:07 UTC (rev 129)
@@ -0,0 +1,619 @@
+
+
+/****************/
+/* msa_format.c */
+/****************/
+
+/* ---
+   Implementation of "msa_format.h":
+   
+   Functions for reading CLUSTAL- and FASTA-PIR-formatted files
+   into the LPOSequence_T data structure, and for determining file
+   type from the initial line(s) of a file.
+   --- */
+
+
+
+#include "msa_format.h"
+
+
+/** is `ch' an allowed residue? (a-z OR A-Z OR ? OR [ OR ]) */
+static int is_residue_char (char ch);
+
+/** is `ch' an allowed gap? (. OR -) */
+static int is_gap_char (char ch);
+
+/** could `ch' be the first character of a sequence name? (NOT # AND NOT * AND NOT whitespace) */
+static int is_name_first_char (char ch);
+
+
+/** decide which sequences in an RC-MSA alignment to keep based on a filter file `select_ifile'.
+    remove discarded sequences and reindex the remaining ones.
+    returns the NEW number of sequences. */
+static int filter_sequence_set (int n_seqs, FILE *select_ifile, int remove_listed_sequences,
+				char **seq_names, char **seq_titles, char **aln_mat, int *aln_lengths);
+
+
+/** remove sequence bad_seq_id from each letter containing it.
+    ASSUMES THAT NO LETTER CONTAINS _ONLY_ bad_seq_id... e.g. bad_seq_id IS A CONSENSUS.
+    either keep or don't keep links present only in the removed sequence. */ 
+static void strip_seq_from_lpo (LPOSequence_T *lposeq, int bad_seq_id, int keep_all_links);
+
+
+static LPOSequence_T *read_clustal (FILE *ifile, const char *first_line,
+				    FILE *select_ifile, int remove_listed_sequences,
+				    int do_switch_case, ResidueScoreMatrix_T *score_matrix);
+
+static LPOSequence_T *read_pir (FILE *ifile, const char *first_line,
+				FILE *select_ifile, int remove_listed_sequences,
+				int do_switch_case, ResidueScoreMatrix_T *score_matrix);
+
+
+
+/** Reads an MSA from a file.  If `format' is UNKNOWN_MSA, the file
+    format is determined from the first line(s) of the file.
+    Uses `select_ifile', if non-NULL, to filter the sequence set.
+    Uppercases or lowercases sequence characters according to `do_switch_case'.
+    Indexes LPO symbols using `score_matrix'.
+*/
+LPOSequence_T *read_msa_select (FILE *ifile, msa_file_format format,
+				FILE *select_ifile, int keep_all_links, int remove_listed_sequences,
+				int do_switch_case, ResidueScoreMatrix_T *score_matrix)
+{
+  static char line[512]="";
+
+  /* USE format TO FIX FILE FORMAT IF POSSIBLE. */
+  /* OTHERWISE, PO FILES START WITH 'VERSION=' (and this line is discarded),
+     FASTA-PIR FILES START WITH '>', AND CLUSTAL FILES WITH 'CLUSTAL' OR SIMPLY
+     WITH THE FIRST ALIGNMENT LINE.
+     LINES STARTING WITH whitespace OR '#' OR '*' ARE IGNORED. */
+  
+  while (format!=UNKNOWN_MSA || fgets (line, sizeof(line)-1, ifile)) {
+    
+    if (format==PIR_MSA || line[0] == '>') {
+      return read_pir (ifile, line,
+		       select_ifile, remove_listed_sequences,
+		       do_switch_case, score_matrix);
+    }
+    else if (format==PO_MSA || 0==strncmp(line,"VERSION=",8)) {
+      if (select_ifile != NULL) {
+	return read_lpo_select (ifile, select_ifile, keep_all_links, remove_listed_sequences);
+      }
+      else {
+	return read_lpo (ifile);
+      }
+    }
+    else if (format==CLUSTAL_MSA || 0==strncmp(line,"CLUSTAL",7)) {
+      return read_clustal (ifile, line,
+			   select_ifile, remove_listed_sequences,
+			   do_switch_case, score_matrix);
+    }
+    else if (line[0] == '#' || line[0] == '*' || line[0] == ' ' || line[0] == '\t' || line[0] == '\n' || line[0] == '\r') {
+      continue;
+    }
+    else {
+      WARN_MSG(USERR,(ERRTXT, "Unable to determine MSA file type... trying CLUSTAL.\n"),"$Revision: 1.1.2.3 $");
+      return read_clustal (ifile, line,
+			   select_ifile, remove_listed_sequences,
+			   do_switch_case, score_matrix);
+    }
+  }
+  
+  WARN_MSG(USERR,(ERRTXT, "No data in MSA file.\n"),"$Revision: 1.1.2.3 $");
+
+  return NULL;
+}
+
+/** Reads an MSA from a file (cf. read_msa_select).
+ */
+LPOSequence_T *read_msa (FILE *ifile, msa_file_format format,
+			 int do_switch_case, ResidueScoreMatrix_T *score_matrix)
+{
+  return read_msa_select (ifile, format,
+			  NULL, 0, 0,
+			  do_switch_case, score_matrix);
+}
+
+
+
+
+/** Reads a CLUSTAL-formatted alignment file.
+ */
+LPOSequence_T *read_clustal (FILE *fp, const char *first_line,
+			     FILE *select_ifile, int remove_listed_sequences,
+			     int do_switch_case, ResidueScoreMatrix_T *score_matrix)
+{
+  int i, j, n_seqs=0, curr_seq=0, expect_repeats=0, expect_header=1, line_num=0;
+  char ch;  
+  char **seq_names=NULL, **seq_titles=NULL, **aln_mat=NULL;
+  int *aln_lengths=NULL;
+  char line[512]="", name[512]="", aln[512]="";
+  LPOSequence_T *lposeq = NULL;
+  
+  while ((first_line!=NULL && line_num==0) || fgets (line, sizeof(line)-1, fp)) {
+    if (first_line!=NULL && line_num==0) {
+      strcpy (line, first_line);
+    }
+    line_num++;
+    
+    if (expect_header) {  /* LOOKING FOR 'CLUSTAL' HEADER LINE */
+      if (0 == strncmp(line,"CLUSTAL",7)) {  /* FOUND IT; STOP LOOKING */
+	expect_header=0;
+	continue;
+      }
+      else if (0 == is_name_first_char(line[0])) {  /* COMMENT LINE; KEEP LOOKING */
+	continue;
+      }
+      else {
+	expect_header=0;  /* FOUND SOMETHING ELSE; NO HEADER, SO CONTINUE */
+      }
+    }
+    
+    if (0 == is_name_first_char(line[0])) {  /* BLOCK SEPARATOR? */
+      curr_seq = 0;
+      if (n_seqs>0) {
+	expect_repeats=1;  /* YES; NOW EXPECT SAME SEQUENCES OVER AGAIN IN EACH BLOCK */
+      }
+      continue;
+    }
+    
+    if (sscanf (line, "%s %[^\n\r]", name, aln) < 2) {
+      WARN_MSG(USERR,(ERRTXT, "Error: Trouble reading CLUSTAL-formatted file near line %d: \n>>>\n%s<<<\nBailing out.\n",line_num,line),
+	       "$Revision: 1.1.2.3 $");
+      goto free_memory_and_exit;
+    }
+    
+    if (0 == expect_repeats) {  /* FIRST BLOCK STILL, SO MAKE ROOM FOR NEW SEQ */
+      n_seqs++;
+      
+      REALLOC (seq_names, n_seqs, char *);
+      seq_names[n_seqs-1] = strdup(name);
+      REALLOC (seq_titles, n_seqs, char *);
+      seq_titles[n_seqs-1] = strdup("");
+      
+      REALLOC (aln_mat, n_seqs, char *);
+      aln_mat[n_seqs-1] = strdup(" ");
+      REALLOC (aln_lengths, n_seqs, int);
+      aln_lengths[n_seqs-1] = 0;
+    }
+    else if (curr_seq>=n_seqs || strcmp(name,seq_names[curr_seq])) {  /* NAME SHOULD BE A REPEAT */
+      WARN_MSG(USERR,(ERRTXT, "Error: Trouble reading CLUSTAL-formatted file at line %d: \n>>>\n%s<<<\nSequence name (%s) does not match expected sequence name (%s).  Bailing out.\n",line_num,line,name,seq_names[curr_seq]),
+	       "$Revision: 1.1.2.3 $");
+      goto free_memory_and_exit;
+    }
+    
+    REALLOC (aln_mat[curr_seq], aln_lengths[curr_seq] + strlen(aln), char);
+    for (i=0; i<strlen(aln); i++) {
+      ch = aln[i];
+      if (is_residue_char(ch) || is_gap_char(ch)) {
+	aln_mat[curr_seq][aln_lengths[curr_seq]++] = aln[i];
+      }
+    }
+    curr_seq++;
+  }
+
+  n_seqs = filter_sequence_set (n_seqs, select_ifile, remove_listed_sequences, seq_names, seq_titles, aln_mat, aln_lengths);
+  
+  lposeq = lpo_from_aln_mat (n_seqs, seq_names, seq_titles, aln_mat, aln_lengths, do_switch_case, score_matrix);
+  
+ free_memory_and_exit:
+  for (i=0; i<n_seqs; i++) {
+    FREE (seq_names[i]);
+    FREE (seq_titles[i]);
+    FREE (aln_mat[i]);
+  }
+  FREE (seq_names);
+  FREE (seq_titles);
+  FREE (aln_mat);
+  FREE (aln_lengths);
+  
+  if (n_seqs>0) {
+    strcpy (lposeq->name, lposeq->source_seq[0].name);
+    FREE (lposeq->title);
+    lposeq->title = strdup (lposeq->source_seq[0].title);
+  }
+  
+  return lposeq;
+}
+
+/** Reads a FASTA-PIR-formatted alignment file.
+ */
+LPOSequence_T *read_pir (FILE *fp, const char *first_line,
+			 FILE *select_ifile, int remove_listed_sequences,
+			 int do_switch_case, ResidueScoreMatrix_T *score_matrix)
+{
+  int i, j, n_seqs=0, curr_seq=0, line_num=0;
+  char ch;  
+  char **seq_names=NULL, **seq_titles=NULL, **aln_mat=NULL;
+  int *aln_lengths=NULL, *keep_seq=NULL;
+  char line[512]="", name[512]="", title[512]="", aln[512]="";
+  LPOSequence_T *lposeq = NULL;
+  
+  while ((first_line!=NULL && line_num==0) || fgets (line, sizeof(line)-1, fp)) {
+    if (first_line!=NULL && line_num==0) {
+      strcpy (line, first_line);
+    }
+    line_num++;
+    
+    if (line[0] == '>') {  /* HEADER LINE FOR NEW SEQUENCE */
+      title[0] = '\0';
+      if (sscanf (line, ">%s %[^\n\r]", name, title) < 1) {
+	WARN_MSG(USERR,(ERRTXT, "Error: Trouble reading PIR-formatted file near line %d (no sequence name?):\n>>>\n%s<<<\nBailing out.\n",line_num,line),
+		 "$Revision: 1.1.2.3 $");
+	goto free_memory_and_exit;
+      }
+      
+      n_seqs++;
+      curr_seq=n_seqs-1;
+      REALLOC (seq_names, n_seqs, char *);
+      seq_names[n_seqs-1] = strdup(name);
+      REALLOC (seq_titles, n_seqs, char *);
+      seq_titles[n_seqs-1] = strdup(title);
+      REALLOC (aln_mat, n_seqs, char *);
+      aln_mat[n_seqs-1] = strdup(" ");
+      REALLOC (aln_lengths, n_seqs, int);
+      aln_lengths[n_seqs-1] = 0;
+    }
+    else if (line[0]=='#' || line[0]=='*') {  /* COMMENT LINE */
+      continue;
+    }
+    else {  /* ALIGNMENT ROW FOR CURRENT SEQUENCE */
+      sscanf (line, "%[^\n\r]", aln);
+      if (n_seqs==0) {
+	WARN_MSG(USERR,(ERRTXT, "Error: Trouble reading PIR-formatted file near line %d (no preceding '>seqname' line?):\n>>>\n%s<<<\nBailing out.\n",line_num,line),
+		 "$Revision: 1.1.2.3 $");
+	goto free_memory_and_exit;
+      }
+      
+      REALLOC (aln_mat[curr_seq], aln_lengths[curr_seq] + strlen(aln), char);
+      for (i=0; i<strlen(aln); i++) {
+	ch = aln[i];
+	if (is_residue_char(ch) || is_gap_char(ch)) {
+	  aln_mat[curr_seq][aln_lengths[curr_seq]++] = aln[i];
+	}
+      }
+    }
+  }
+
+  n_seqs = filter_sequence_set (n_seqs, select_ifile, remove_listed_sequences, seq_names, seq_titles, aln_mat, aln_lengths);  
+
+  lposeq = lpo_from_aln_mat (n_seqs, seq_names, seq_titles, aln_mat, aln_lengths, do_switch_case, score_matrix);
+  
+ free_memory_and_exit:
+  for (i=0; i<n_seqs; i++) {
+    FREE (seq_names[i]);
+    FREE (seq_titles[i]);
+    FREE (aln_mat[i]);
+  }
+  FREE (seq_names);
+  FREE (seq_titles);
+  FREE (aln_mat);
+  FREE (aln_lengths);
+  
+  if (n_seqs>0) {
+    strcpy (lposeq->name, lposeq->source_seq[0].name);
+    FREE (lposeq->title);
+    lposeq->title = strdup (lposeq->source_seq[0].title);
+  }
+  
+  return lposeq;
+}
+
+/** Creates an LPO from an RC-MSA alignment matrix.
+ */
+LPOSequence_T *lpo_from_aln_mat (int n_seqs, char **seq_names, char **seq_titles, char **aln_mat, int *aln_lengths, int do_switch_case, ResidueScoreMatrix_T *score_matrix)
+{
+  int i, j, k, len, col, res_id, letter_id;
+  char ch;
+  LPOSequence_T *curr_seq, *lposeq;
+  int **column_ids; /** which column contains each residue */
+  int **res_ids; /** which residue is in each column */
+  int *al_x, *al_y;
+  int max_aln_length = 0;
+  char *consens_row;
+
+  if (n_seqs==0)
+    return NULL;
+  
+  CALLOC (column_ids, n_seqs+1, int *);
+  CALLOC (res_ids, n_seqs+1, int *);
+  for (i=0; i<n_seqs; i++) {
+    if (aln_lengths[i] > max_aln_length) {
+      max_aln_length = aln_lengths[i];
+    }
+  }
+  for (i=0; i<n_seqs+1; i++) {
+    CALLOC (column_ids[i], max_aln_length, int);
+    CALLOC (res_ids[i], max_aln_length, int);
+  }
+
+  /* GET CONSENSUS ROW THAT ALIGNS TO SOME OTHER ROW IN EACH COLUMN */
+  /* CONSENSUS ROW IS _NECESSARY_ TO DEAL WITH THE POSSIBILITY THAT, e.g., UNORDERED RESIDUES
+     IN seqs 0 & 1 ARE ORDERED BY ALIGNMENT WITH seq 2. */
+  
+  CALLOC (consens_row, max_aln_length, char);
+  for (col=0; col<max_aln_length; col++) {
+    consens_row[col] = '-';
+    for (i=0; i<n_seqs; i++) {
+      if (col < aln_lengths[i] && is_residue_char(aln_mat[i][col])) {
+	consens_row[col] = aln_mat[i][col];
+	break;
+      }
+    }
+  }
+
+  /* MAKE SEQUENCE FROM CONSENSUS ROW */
+
+  CALLOC (lposeq, 1, LPOSequence_T);
+  strcpy (lposeq->name, "consens_row");
+  lposeq->title = strdup ("");
+  
+  CALLOC (lposeq->sequence, max_aln_length+1, char);
+  for (len=col=0; col<max_aln_length; col++) {
+    ch = consens_row[col];
+    if (do_switch_case == switch_case_to_lower) {
+      ch = tolower(ch);
+    }
+    else if (do_switch_case == switch_case_to_upper) {
+      ch = toupper(ch);
+    }
+    if (is_residue_char(ch)) {
+      lposeq->sequence[len] = ch;
+      column_ids[0][len] = col;
+      res_ids[0][col] = len;
+      len++;
+    }
+  }
+  lposeq->length = len;
+  lposeq->sequence[col] = '\0';
+  REALLOC (lposeq->sequence, len+1, char);
+    
+  initialize_seqs_as_lpo (1, lposeq, score_matrix);
+  
+  for (i=0; i<n_seqs; i++) {
+
+    CALLOC (curr_seq, 1, LPOSequence_T);
+    strcpy (curr_seq->name, seq_names[i]);
+    curr_seq->title = strdup (seq_titles[i]);
+
+    /* READ CHARACTERS FROM ALIGNMENT ROW INTO SEQUENCE: */
+    CALLOC (curr_seq->sequence, aln_lengths[i]+1, char);
+    for (len=col=0; col<aln_lengths[i]; col++) {
+      ch = aln_mat[i][col];
+      if (do_switch_case == switch_case_to_lower) {
+	ch = tolower(ch);
+      }
+      else if (do_switch_case == switch_case_to_upper) {
+	ch = toupper(ch);
+      }
+      if (is_residue_char(ch)) {
+	curr_seq->sequence[len] = ch;
+	column_ids[i+1][len] = col;
+	res_ids[i+1][col] = len;
+	len++;
+      }
+    }
+    curr_seq->length = len;
+    curr_seq->sequence[len] = '\0';
+    REALLOC (curr_seq->sequence, len+1, char);
+    
+    initialize_seqs_as_lpo (1, curr_seq, score_matrix);
+    
+    /* ALIGN THIS SEQUENCE TO EXISTING ALIGNMENT USING CONSENSUS ROW */
+
+    build_seq_to_po_index (lposeq);
+    
+    CALLOC (al_x, len, int);
+    CALLOC (al_y, lposeq->length, int);
+    
+    for (j=0; j<lposeq->length; j++) {
+      al_y[j] = INVALID_LETTER_POSITION;
+    }
+    
+    /* FOR EACH RESIDUE IN THIS SEQUENCE: */
+    for (j=0; j<len; j++) {
+      col = column_ids[i+1][j];
+      
+      /* GET AN OLD NODE IN CONSENSUS ROW ALIGNED TO THIS ONE: */
+      res_id = res_ids[0][col];
+      
+      /* STORE INFO IN al_x,al_y: */
+      letter_id = lposeq->source_seq[0].seq_to_po[res_id];
+      al_x[j] = letter_id;
+      al_y[letter_id] = j;
+    }
+        
+    /* DO THE FUSION-BASED BUILDUP */
+    
+    fuse_ring_identities (lposeq->length, lposeq->letter,
+			  curr_seq->length, curr_seq->letter,
+			  al_y, al_x);
+    
+    fuse_lpo (lposeq, curr_seq, al_y, al_x);
+    
+    free_lpo_sequence (curr_seq, 0);
+    FREE (al_x);
+    FREE (al_y);
+    FREE (curr_seq);
+  }
+  
+  /* STRIP OUT CONSENSUS SEQUENCE */
+  
+  strip_seq_from_lpo (lposeq, /*remove #*/ 0, /*0=don't keep all links*/ 0);
+  
+  for (i=0; i<n_seqs+1; i++) {
+    FREE (column_ids[i]);
+    FREE (res_ids[i]);
+  }
+  FREE (column_ids);
+  FREE (res_ids);
+  FREE (consens_row);
+  
+  return lposeq;
+}
+
+
+static int is_residue_char (char ch)
+{
+  if (ch>='a' && ch<='z') return 1;
+  if (ch>='A' && ch<='Z') return 1;
+  if (ch=='?' || ch=='[' || ch==']') return 1;
+  return 0;
+}
+
+static int is_gap_char (char ch)
+{
+  if (ch=='.' || ch=='-') return 1;
+  return 0;
+}
+
+static int is_name_first_char (char ch)
+{
+  if (ch=='\n' || ch=='\r' || ch==' ' || ch=='\t' || ch=='#' || ch=='*') return 0;
+  return 1;
+}
+
+static int filter_sequence_set (int n_seqs, FILE *select_ifile, int remove_listed_sequences,
+				char **seq_names, char **seq_titles, char **aln_mat, int *aln_lengths)
+{
+  int i, j;
+  int *keep_seq;
+  char name[512]="";
+  
+  /* DECIDE WHICH SEQUENCES TO KEEP: */
+  
+  CALLOC (keep_seq, n_seqs, int);
+  
+  for (i=0; i<n_seqs; i++) {
+    keep_seq[i] = ((!select_ifile || remove_listed_sequences) ? 1 : 0);
+  }
+  
+  while (select_ifile && fscanf (select_ifile, "SOURCENAME=%[^\r\n]%*[\r\n]", name) >= 1) {
+    for (i=0; i<n_seqs; i++) {
+      if (0 == strcmp(seq_names[i],name)) {
+	keep_seq[i] = (remove_listed_sequences ? 0 : 1);
+      }
+    }
+  }
+  
+  /* FREE MEM FOR DISCARDED SEQUENCES: */
+  for (i=0; i<n_seqs; i++) if (keep_seq[i]==0) {
+    FREE (seq_names[i]);
+    FREE (seq_titles[i]);
+    FREE (aln_mat[i]);
+  }
+
+  /* COMPACT SEQUENCE LIST (MOVE i<--j): */
+  for (i=j=0; j<n_seqs; i++, j++) {
+    while (j<n_seqs && keep_seq[j]==0)
+      j++;
+    if (j==n_seqs)
+      break;
+    if (j<n_seqs && i!=j) {
+      seq_names[i] = seq_names[j];
+      seq_titles[i] = seq_titles[j];
+      aln_mat[i] = aln_mat[j];
+      aln_lengths[i] = aln_lengths[j];
+    }
+  }
+  
+  FREE (keep_seq);
+  
+  return i;
+}
+
+
+
+void strip_seq_from_lpo (LPOSequence_T *lposeq, int bad_seq_id, int keep_all_links)
+{
+  int i, j, id_left, id_right, len, n_seqs;
+  LPOLetterLink_T *lnk, *tmp_lnk;
+  LPOLetterSource_T *src, *prev, *tmp_src;
+  LPOLetter_T *lett;
+  
+  n_seqs = lposeq->nsource_seq - 1;
+  
+  /* FREE MEM ASSOCIATED WITH BAD SEQ */
+  FREE (lposeq->source_seq[bad_seq_id].title);
+  FREE (lposeq->source_seq[bad_seq_id].sequence);
+  FREE (lposeq->source_seq[bad_seq_id].seq_to_po);
+  FREE (lposeq->source_seq[bad_seq_id].po_to_seq);
+
+  /* COMPACT SOURCE LIST */
+  for (i=0; i<n_seqs; i++) if (i>=bad_seq_id) {
+    lposeq->source_seq[i] = lposeq->source_seq[i+1];
+  }
+  /* This de-allocation causes a bug in conjunction with
+     later REBUFF calls: */
+  /* REALLOC (lposeq->source_seq, n_seqs, LPOSourceInfo_T); */
+  lposeq->nsource_seq = n_seqs;
+  
+  
+  /* RENUMBER SOURCES IN EACH LETTER */
+  for (i=0; i<lposeq->length; i++) {
+    lett = &(lposeq->letter[i]);
+    prev = NULL;
+    src = &(lett->source);
+    while (src != NULL && src->iseq >= 0) {
+      if (src->iseq == bad_seq_id) {
+	if (prev) {  /* NOT IN LIST HEAD, SO RELINK FROM PREV AND FREE */
+	  prev->more = src->more;
+	  FREE (src);
+	  src = prev->more;
+	}
+	else {  /* IN LIST HEAD, SO REASSIGN HEAD DATA, RELINK, FREE */
+	  if (src->more) {
+	    tmp_src = src->more;
+	    src->ipos = tmp_src->ipos;
+	    src->iseq = tmp_src->iseq;
+	    src->more = tmp_src->more;
+	    FREE (tmp_src);
+	  }
+	  else {
+	    src->ipos = -1;
+	    src->iseq = -1;
+	  }
+	}
+      }
+      else {
+	if (src->iseq>=bad_seq_id)
+	  src->iseq--;
+	prev = src;
+	src = src->more;
+      }
+    }
+  }
+  
+  if (0==keep_all_links) {
+
+    build_seq_to_po_index (lposeq);
+    
+    /* FREE ALL LINK INFO */
+    for (i=0; i<lposeq->length; i++) {
+      lett = &(lposeq->letter[i]);
+      for (lnk = &(lett->left); lnk->more != NULL; ) {
+	tmp_lnk = lnk->more;
+	lnk->more = tmp_lnk->more;
+	FREE (tmp_lnk);
+      }
+      lnk->ipos = -1;
+      for (lnk = &(lett->right); lnk->more != NULL; ) {
+	tmp_lnk = lnk->more;
+	lnk->more = tmp_lnk->more;
+	FREE (tmp_lnk);
+      }
+      lnk->ipos = -1;
+    }
+    
+    /* REBUILD LINK INFO FROM REMAINING SEQS */
+    for (i=0; i<n_seqs; i++) {
+      len = lposeq->source_seq[i].length;
+      for (j=0; j<len-1; j++) {
+	id_left = lposeq->source_seq[i].seq_to_po[j];
+	id_right = lposeq->source_seq[i].seq_to_po[j+1];
+	add_lpo_link (&(lposeq->letter[id_left].right), id_right);
+	add_lpo_link (&(lposeq->letter[id_right].left), id_left);
+      }
+    }
+  }
+}

Copied: trunk/packages/poa/trunk/msa_format.h (from rev 128, trunk/packages/poa/branches/upstream/current/msa_format.h)
===================================================================
--- trunk/packages/poa/trunk/msa_format.h	                        (rev 0)
+++ trunk/packages/poa/trunk/msa_format.h	2006-09-26 04:38:07 UTC (rev 129)
@@ -0,0 +1,52 @@
+
+
+/****************/
+/* msa_format.h */
+/****************/
+
+/* ---
+   Functions for reading CLUSTAL- and FASTA-PIR-formatted files
+   into the LPOSequence_T data structure, and for determining file
+   type from the initial line(s) of a file.
+   --- */
+
+
+#ifndef MSA_FORMAT_HEADER_INCLUDED
+#define MSA_FORMAT_HEADER_INCLUDED
+
+#include "default.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+/** types of MSA files supported */
+typedef enum {
+  UNKNOWN_MSA, CLUSTAL_MSA, PIR_MSA, PO_MSA
+}
+msa_file_format;
+
+
+/** Reads an MSA from a file (cf. read_msa_select).
+ */
+LPOSequence_T *read_msa (FILE *ifile, msa_file_format format,
+			 int do_switch_case, ResidueScoreMatrix_T *score_matrix);
+
+
+/** Reads an MSA from a file.  If `format' is UNKNOWN_MSA, the file
+    format is determined from the first line(s) of the file.
+    Uses `select_ifile', if non-NULL, to filter the sequence set.
+    Uppercases or lowercases sequence characters according to `do_switch_case'.
+    Indexes LPO symbols using `score_matrix'.
+*/
+LPOSequence_T *read_msa_select (FILE *ifile, msa_file_format format,
+				FILE *select_ifile, int keep_all_links, int remove_listed_sequences,
+				int do_switch_case, ResidueScoreMatrix_T *score_matrix);
+
+
+/** Creates an LPO from an RC-MSA alignment matrix.
+ */
+LPOSequence_T *lpo_from_aln_mat (int n_seqs, char **seq_names, char **seq_titles, char **aln_mat, int *aln_lengths,
+				 int do_switch_case, ResidueScoreMatrix_T *score_matrix);
+
+
+
+#endif  /* MSA_FORMAT_HEADER_INCLUDED */

Copied: trunk/packages/poa/trunk/multidom.pscore (from rev 128, trunk/packages/poa/branches/upstream/current/multidom.pscore)
===================================================================
--- trunk/packages/poa/trunk/multidom.pscore	                        (rev 0)
+++ trunk/packages/poa/trunk/multidom.pscore	2006-09-26 04:38:07 UTC (rev 129)
@@ -0,0 +1,16 @@
+ABL1_HUMAN	ABL1_HUMAN	2475.0
+ABL1_HUMAN	MATK_HUMAN	260.0
+ABL1_HUMAN	GRB2_HUMAN	100.0
+ABL1_HUMAN	CRKL_HUMAN	63.0
+CRKL_HUMAN	CRKL_HUMAN	736.0
+CRKL_HUMAN	GRB2_HUMAN	70.0
+CRKL_HUMAN	ABL1_HUMAN	60.0
+CRKL_HUMAN	MATK_HUMAN	55.0
+GRB2_HUMAN	GRB2_HUMAN	551.0
+GRB2_HUMAN	ABL1_HUMAN	100.0
+GRB2_HUMAN	MATK_HUMAN	77.0
+GRB2_HUMAN	CRKL_HUMAN	70.0
+MATK_HUMAN	MATK_HUMAN	1215.0
+MATK_HUMAN	ABL1_HUMAN	279.0
+MATK_HUMAN	GRB2_HUMAN	69.0
+MATK_HUMAN	CRKL_HUMAN	55.0

Modified: trunk/packages/poa/trunk/seq_util.c
===================================================================
--- trunk/packages/poa/trunk/seq_util.c	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/seq_util.c	2006-09-26 04:38:07 UTC (rev 129)
@@ -94,7 +94,7 @@
   
   ifile=fopen(filename,"r");
   if (!ifile) {
-    WARN_MSG(USERR,(ERRTXT,"Can't open alignment matrix from %s\n",filename),"$Revision: 1.2.2.1 $");
+    WARN_MSG(USERR,(ERRTXT,"Can't open alignment matrix from %s\n",filename),"$Revision: 1.2.2.2 $");
     return -2; /* FAILED TO FIND FILE TO READ */
   }
 
@@ -165,6 +165,37 @@
   }
   fclose(ifile);
 
+  /* CONSTRUCT GAP PENALTY ARRAYS FROM GAP PARAMETERS: */
+  m->max_gap_length = m->trunc_gap_length + m->decay_gap_length;
+  CALLOC (m->gap_penalty_x, m->max_gap_length+2, LPOScore_T);
+  CALLOC (m->gap_penalty_y, m->max_gap_length+2, LPOScore_T);
+
+  /*** GAP OPENING PENALTY @ L=0->1 */
+  m->gap_penalty_x[0] = m->gap_penalty_set[0][0];
+  m->gap_penalty_y[0] = m->gap_penalty_set[1][0];
+  
+  /*** 1st AFFINE EXTENSION PENALTY (A1) @ L=1->2,2->3,...T-1->T */
+  for (i=1;i<m->trunc_gap_length;i++) {
+    m->gap_penalty_x[i] = m->gap_penalty_set[0][1];
+    m->gap_penalty_y[i] = m->gap_penalty_set[1][1];
+  }
+  
+  /*** DECAYING EXTENSION PENALTY (A1-->A2; skipped if D=0) @ L=T->T+1,...T+D-1->T+D */
+  for (i=0;i<m->decay_gap_length;i++) {
+    double dec_x = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(m->decay_gap_length + 1));
+    double dec_y = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(m->decay_gap_length + 1));
+    m->gap_penalty_x[i+m->trunc_gap_length] = m->gap_penalty_set[0][1] - (i+1) * dec_x;
+    m->gap_penalty_y[i+m->trunc_gap_length] = m->gap_penalty_set[1][1] - (i+1) * dec_y;
+  }
+  
+  /*** 2nd AFFINE EXTENSION PENALTY (A2) @ L>=T+D */
+  m->gap_penalty_x[m->max_gap_length] = m->gap_penalty_set[0][2];
+  m->gap_penalty_y[m->max_gap_length] = m->gap_penalty_set[1][2];
+  
+  m->gap_penalty_x[m->max_gap_length+1] = 0;  /* DON'T REMOVE THIS!... SPECIAL STATE USED IN align_lpo. */
+  m->gap_penalty_y[m->max_gap_length+1] = 0;  /* DON'T REMOVE THIS!... SPECIAL STATE USED IN align_lpo. */
+  
+  
   LOOPF (i,nsymb) {
     Score_matrix_row= m->score[i]; /* ROW TO USE FOR SORTING best_match */
     LOOP (j,nsymb)

Modified: trunk/packages/poa/trunk/seq_util.h
===================================================================
--- trunk/packages/poa/trunk/seq_util.h	2006-09-26 04:35:01 UTC (rev 128)
+++ trunk/packages/poa/trunk/seq_util.h	2006-09-26 04:38:07 UTC (rev 129)
@@ -35,9 +35,14 @@
   char symbol[MATRIX_SYMBOL_MAX];
   ResidueScore_T score[MATRIX_SYMBOL_MAX][MATRIX_SYMBOL_MAX];
   int best_match[MATRIX_SYMBOL_MAX][MATRIX_SYMBOL_MAX];
-  ResidueScore_T gap_penalty_set[2][3];
+  
+  ResidueScore_T gap_penalty_set[2][3]; 
   int trunc_gap_length;
   int decay_gap_length;
+  
+  ResidueScore_T *gap_penalty_x, *gap_penalty_y;
+  int max_gap_length;
+  
   int nfreq; /* STORE FREQUENCIES OF AMINO ACIDS FOR BALANCING MATRIX...*/
   char freq_symbol[MATRIX_SYMBOL_MAX];
   float freq[MATRIX_SYMBOL_MAX];




More information about the debian-med-commit mailing list