[med-svn] r91 - in trunk/packages: . poa poa/branches
poa/branches/upstream poa/branches/upstream/current
Charles Plessy
charles-guest at costa.debian.org
Wed Aug 23 13:45:43 UTC 2006
Author: charles-guest
Date: 2006-08-23 13:45:39 +0000 (Wed, 23 Aug 2006)
New Revision: 91
Added:
trunk/packages/poa/
trunk/packages/poa/branches/
trunk/packages/poa/branches/upstream/
trunk/packages/poa/branches/upstream/current/
trunk/packages/poa/branches/upstream/current/Makefile
trunk/packages/poa/branches/upstream/current/README
trunk/packages/poa/branches/upstream/current/align_lpo.c
trunk/packages/poa/branches/upstream/current/align_lpo2.c
trunk/packages/poa/branches/upstream/current/align_lpo_po.c
trunk/packages/poa/branches/upstream/current/align_lpo_po2.c
trunk/packages/poa/branches/upstream/current/align_score.c
trunk/packages/poa/branches/upstream/current/black_flag.c
trunk/packages/poa/branches/upstream/current/black_flag.h
trunk/packages/poa/branches/upstream/current/blosum80.mat
trunk/packages/poa/branches/upstream/current/buildup_lpo.c
trunk/packages/poa/branches/upstream/current/create_seq.c
trunk/packages/poa/branches/upstream/current/default.h
trunk/packages/poa/branches/upstream/current/fasta_format.c
trunk/packages/poa/branches/upstream/current/heaviest_bundle.c
trunk/packages/poa/branches/upstream/current/lpo.c
trunk/packages/poa/branches/upstream/current/lpo.h
trunk/packages/poa/branches/upstream/current/lpo_format.c
trunk/packages/poa/branches/upstream/current/main.c
trunk/packages/poa/branches/upstream/current/multidom.seq
trunk/packages/poa/branches/upstream/current/numeric_data.c
trunk/packages/poa/branches/upstream/current/poa.h
trunk/packages/poa/branches/upstream/current/project.h
trunk/packages/poa/branches/upstream/current/remove_bundle.c
trunk/packages/poa/branches/upstream/current/seq_util.c
trunk/packages/poa/branches/upstream/current/seq_util.h
trunk/packages/poa/branches/upstream/current/stringptr.c
trunk/packages/poa/tags/
Log:
[svn-inject] Installing original source of poa
Added: trunk/packages/poa/branches/upstream/current/Makefile
===================================================================
--- trunk/packages/poa/branches/upstream/current/Makefile 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/Makefile 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,52 @@
+
+AR=ar rc
+
+TARGETS=poa liblpo.a poa_doc libbflag.a
+
+# align_score.c CAN BE USED TO ADD CUSTOMIZED SCORING FUNCTIONS
+OBJECTS= \
+ align_score.o \
+ main.o
+
+
+LIBOBJECTS= \
+ black_flag.o \
+ seq_util.o \
+ fasta_format.o \
+ align_lpo2.o \
+ align_lpo_po2.o \
+ buildup_lpo.o \
+ lpo.o \
+ heaviest_bundle.o \
+ lpo_format.o \
+ create_seq.o \
+ remove_bundle.o \
+ numeric_data.o \
+ stringptr.o
+
+
+CC = gcc
+#CFLAGS= -g -ansi-strict -W -Wall -DUSE_WEIGHTED_LINKS -DUSE_PROJECT_HEADER -I.
+CFLAGS= -g -DUSE_WEIGHTED_LINKS -DUSE_PROJECT_HEADER -I. -DTRUNCATE_GAP_LENGTH=8
+# -I$(HOME)/lib/include
+# -DREPORT_MAX_ALLOC
+
+clean:
+ rm -f $(OBJECTS) $(LIBOBJECTS) $(TARGETS)
+
+liblpo.a: $(LIBOBJECTS)
+ rm -f $@
+ $(AR) $@ $(LIBOBJECTS)
+ ranlib $@
+
+
+
+# NB: LIBRARY MUST FOLLOW OBJECTS OR LINK FAILS WITH UNRESOLVED REFERENCES!!
+poa: $(OBJECTS) liblpo.a
+ $(CC) -o $@ $(OBJECTS) -lm liblpo.a
+
+what:
+ @echo poa: partial-order based sequence alignment program
+ @echo liblpo.a: partial-order alignment and utilities function library
+
+
Added: trunk/packages/poa/branches/upstream/current/README
===================================================================
--- trunk/packages/poa/branches/upstream/current/README 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/README 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,235 @@
+POA installation notes Sept. 2001
+Chris Lee
+Dept. of Chemistry & Biochemistry
+UCLA
+
+I. COMPILATION
+To compile this program, simply type 'make poa'.
+
+This produces an executable for sequence alignment (poa) and also a linkable
+library liblpo.a. The software has been compiled and tested on LINUX.
+
+II. RUNNING POA
+
+Poa has a variety of command line options. Running poa without any arguments
+will print a list of the possible command line arguments. POA may be used to
+construct a PO-MSA, or to analyze a PO-MSA.
+
+A. Constructing a PO-MSA
+-------------------------
+
+1. Required Input:
+
+i. An Alignment Score Matrix File:
+A score matrix file is required, because poa uses it to get the residue
+alphabet and indexing. Even if poa is not being used to perform
+multiple sequence alignment, this file must be provided. Any basic alignment
+matrix which may be used with BLAST may be used here. This file must be the
+first command line argument without a flag in order to be interpreted by poa
+as the score matrix file. An example score matrix file, blosum80.mat, is
+provided in this directory.
+
+Note: Poa is Case Sensitive.
+Poa can align amino acid sequences to nucleotide sequences. In order to
+distinguish amino acid residues from nucleic acid residues, poa is case
+sensitive. Residues that are upper case are interpreted as amino acids
+while residues that are lower case are interpreted as nucleotides. Poa
+can handle mixed score matrices containing scores for aligning amino acid
+residues to nucleotide residues as long as the column and row labels of the
+matrix are case sensitive. See blosum80.mat file for an example of just
+such a mixed score matrix.
+
+ii. A FASTA File:
+A FASTA file is required only if poa is being used to construct a
+new PO-MSA from a list of sequences, or to align a list of sequences to
+an already existing PO-MSA (see Analyzing a PO-MSA below). This FASTA file
+should contain sequences to be aligned by poa. The command line argument
+to get poa to accept a FASTA file as input is '-read_fasta FILENAME'. Poa
+will interpret FILENAME as the FASTA sequence file. An example file,
+multidom.seq, is provided in this directory.
+
+Poa is case sensitive (see note above). All residues in protein sequences
+in the FASTA file must be upper case to be interpreted as amino acids by poa,
+while all residues in nucleotide sequences in the FASTA file must be lower
+case to be interpreted as nucleotides by poa. To switch the case of all of
+the letters in the FASTA file to upper case, use the '-toupper' command
+line argument. To switch the case of all the letters in the FASTA file to
+lower case, use the '-tolower' command line argument.
+
+2. MSA Construction Options:
+
+i. Aggressive Fusion:
+During the building up of a PO-MSA, if a node i with label 'a' is aligned
+to an align ring which already contains a node j with label 'a', poa simply
+adds the node to the align ring. It is possible to force poa to do
+aggressive fusion, so that when a node i with label 'a' is aligned to
+an align ring which already contains a node j with label 'a', node i is
+fused to node j. The command line argument for accomplishing this is
+'-fuse_all'.
+
+3. MSA Output Formats:
+Poa can output a PO-MSA in several formats simultaneously including CLUSTAL,
+PIR, and PO. The PO format is the best format since it contains all of the
+information in the PO-MSA. The other formats accurately represent the MSA,
+but since they are RC-MSA formats, they may lose some of the information in the
+full PO-MSA.
+
+i. CLUSTAL format:
+This format is the standard CLUSTAL format. The command line argument to get
+the MSA output in this format is '-clustal FILENAME'.
+
+ii. PIR format:
+This format is the standard PIR format, which is like FASTA with a '.'
+character representing gaps. The command line argument to get the MSA
+output in this format is '-pir FILENAME'.
+
+iii. PO format:
+This format is the standard PO format. It is described below in the section
+PO format. The command line argument to get the MSA output in this format is
+'-po FILENAME'.
+
+Example: Constructing a MSA of Four Protein Sequences
+Running poa with the following statement will take the fasta formatted
+sequences in the multidom.seq file, construct a PO-MSA using the scoring
+matrix in the file blosum80.mat, and then output the PO-MSA in CLUSTAL format
+in the file multidom.aln.
+
+poa -clustal multidom.aln blosum80.mat multidom.seq
+
+The output should be identical to the results of figs. 6 & 7 in the paper.
+
+4. Other Output:
+
+i. Score Matrix
+Poa will also print to stdout the score matrix stored in the '.mat' file.
+The command line argument to get poa to do this is '-printmatrix LETTERSET',
+where LETTERSET is a string of letters to be printed with the score matrix.
+For example, if the score matrix is designed for protein alignment the
+letter set might be 'ARNDCQEGHILKMFPSTWYV'.
+
+ii. Verbose Mode
+Poa will run in verbose mode, printing additional information generated
+during the run to stdout. The command line argument to get poa to do run in
+verbose mode is '-v'.
+
+B. Analyzing a PO-MSA
+-----------------------
+Poa can also take a PO format file as input and rebuild the PO-MSA data
+structure. Once this data structure has been rebuilt, it may be analyzed
+for features. In 'liblpo.a', the linkable poa library, we have included the
+functions necessary to do heaviest bundling and thereby find consensus
+sequences in the PO-MSA (the details of the heaviest bundling algorithm
+are described elsewhere). Poa has been written so that users may create
+their own functions for analyzing a PO-MSA. We have not included in the
+'liblpo.a' library the functions that we wrote to analyze PO-MSAs
+constructed with ESTs and genome sequence to find snps and alternative
+splice sites. However, it is possible to design modular library functions
+that will look for highly specific biological features in any PO-MSA data
+structure.
+
+1. Required Input:
+
+Before the PO-MSA data structure can be analyzed it must be built. It can
+be built either from a PO file or from a FASTA file, or from both a PO file
+and a FASTA file.
+
+Note: POA Requires Either A PO File or a FASTA File
+If neither files are read in by POA it will terminate early, since it
+has not received any sequence data.
+
+i. A PO file:
+Poa will read in a PO formatted file. The command line argument to get
+poa to read in a PO formatted file and rebuild the PO-MSA data structure
+is '-read_po FILENAME'.
+
+It is possible to filter the PO-MSA data structure as it is being rebuilt. In
+order to filter the PO-MSA in the PO file to include only a subset of sequences
+use the command line argument '-subset FILENAME', where the file named FILENAME
+contains the list of sequence names to be included in the new PO-MSA. In order
+to filter the PO-MSA in the PO file to exclude a subset of sequences use the
+command line argument '-remove FILENAME', where the file named FILENAME contains
+the list of sequences to be excluded from the new PO-MSA. The names of
+sequences to be included or excluded should be in the format 'SOURCENAME= *"
+as they are in the PO file. Lists of sequence source names can be created by
+using the unix grep utility on the PO file.
+
+ii. A FASTA File:
+The FASTA file should contain sequences to be aligned by poa. The command line
+argument to get poa to accept a FASTA file as input is '-read_fasta FILENAME'.
+Poa will interpret FILENAME as the FASTA sequence file. An example file,
+multidom.seq, is provided in this directory. (See note above on case sensitivity).
+
+Note: POA Can Take Both A PO File And A FASTA File As Input
+If both the '-read_po FILENAME' argument and the '-read_fasta FILENAME'
+argument are given to poa on the command line, then poa will first rebuild the
+PO-MSA in the PO file, and then it will align the sequences in the FASTA file
+to this PO-MSA.
+
+2. Additional PO Utilities:
+
+i. Consensus Generation Via Heaviest Bundling Algorithm:
+The heaviest bundling algorithm finds consensus sequences in the
+PO-MSA. The command line argument for heaviest bundling is '-hb'. This
+function adds the new consensus sequences to the PO-MSA by storing new
+consensus sequence indices on the in the PO-MSA nodes corresponding to
+the consensus sequence paths. The sequence source names for consensus
+sequences generated by heaviest bundling is CONSENS'i' where 'i' is the
+index of the bundle corresponding to the consensus sequence.
+
+The heaviest bundling algorithm can also take as input a bundling
+threshold value. The command line argument for setting a bundling
+threshold value for heaviest bundling is '-hbmin VALUE'. This threshold
+is used during the process of associating sequences with bundles. If a
+sequence has a percentage of nodes shared with bundle 'i' greater than this
+threshold value, it is associated with bundle 'i'. Iterative heaviest
+bundling can also be affected by the bundling threshold. A detailed
+description of heaviest bundling and heaviest bundling thresholds is given
+elsewhere. The consensus sequences corresponding to bundles generated
+by heaviest bundling are listed in the sequence source list. Additionally,
+in the SOURCEINFO line for each sequence the index of the bundle to which
+that sequence belongs is give. Finally, using the command line argument
+'-best' restricts the MSA output to the consensus sequences generated
+by heaviest bundling.
+
+III. PO FILE FORMAT
+
+****************************HEADER****************************************
+VERSION= ~Current version of poa~
+NAME= ~Name of PO-MSA. Defaults to name of 1st sequence in PO-MSA~
+TITLE= ~Title of PO-MSA. Defaults to title of 1st sequence in PO-MSA~
+LENGTH= ~Number of nodes in PO-MSA~
+SOURCECOUNT= ~Number of sequences in PO-MSA~
+
+*********************SEQUENCE SOURCE LIST*********************************
+
+/* For each sequence in the PO-MSA: */
+SOURCENAME= ~Name of sequence taken from FASTA sequence header~
+SOURCEINFO= ~Number of nodes in sequence~
+ ~Index of first node containing sequence~
+ ~Sequence weight~
+ ~Index of bundle containing sequence~
+ ~Title of sequence taken from FASTA sequence header~
+
+/* Example: */
+SOURCENAME=GRB2_HUMAN
+SOURCEINFO=217 10 0 3 GROWTH FACTOR RECEPTOR-BOUND PROTEIN 2 (GRB2 ADAPTOR PROTEIN)(SH2)
+
+********************PO-MSA DATA STRUCTURE*********************************
+
+/* For each node in the PO-MSA: */
+~Residue label~:~'L' delimited index list of other nodes with edges into node~
+ ~'S' delimited index list of sequences stored in each node~
+ ~'A' index of next node in same align ring~
+ NB: align ring indices must form a cycle.
+ e.g. if two nodes 121 and 122 are aligned, then
+ the line for node 121 indicates "A122", and
+ the line for node 122 indicates "A121".
+
+/* Example: */
+F:L156L155L22S2S3S7A158
+
+********************END***************************************************
+
+
+For more information, see http://www.bioinformatics.ucla.edu/poa.
+
Added: trunk/packages/poa/branches/upstream/current/align_lpo.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/align_lpo.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/align_lpo.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,329 @@
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+
+typedef struct {
+ unsigned char x:6;
+ unsigned char y:1;
+ unsigned char is_aligned:1;
+} LPOMove_T;
+
+
+typedef unsigned char LPOGapLength_T;
+
+
+
+
+
+void trace_back_lpo_alignment(int len_x,LPOLetter_T seq_x[],
+ int len_y,LPOLetter_T seq_y[],
+ LPOMove_T **move,
+ LPOLetterRef_T best_x,
+ LPOLetterRef_T best_y,
+ LPOLetterRef_T **x_to_y,
+ LPOLetterRef_T **y_to_x)
+{
+ int i;
+ LPOLetterRef_T new_x,*x_al=NULL,*y_al=NULL;
+ LPOLetterLink_T *left;
+
+
+ CALLOC(x_al,len_x,LPOLetterRef_T);
+ CALLOC(y_al,len_y,LPOLetterRef_T);
+ LOOP (i,len_x) x_al[i]= INVALID_LETTER_POSITION;
+ LOOP (i,len_y) y_al[i]= INVALID_LETTER_POSITION;
+
+ while (best_x>=0 && best_y>=0) {
+ if (move[best_x][best_y].is_aligned) {/* ALIGNED! MAP best_x <--> best_y */
+ x_al[best_x]=best_y;
+ y_al[best_y]=best_x;
+ }
+
+ if (0==move[best_x][best_y].x /* HIT START OF THE ALIGNMENT, SO QUIT */
+ && 0==move[best_x][best_y].y)
+ break;
+
+ if ((i=move[best_x][best_y].x)>0) { /* TRACE BACK ON X */
+ for (left= &seq_x[best_x].left;i-- >0;left=left->more) /* USE iTH MOVE*/
+ new_x = left->ipos;
+ }
+ else /* NO MOVE ON X */
+ new_x=best_x;
+
+ if (move[best_x][best_y].y>0) /* ASSUMING seq_y A LINEAR SEQUENCE*/
+ best_y=SEQ_Y_LEFT(best_y); /* TRACE BACK ON Y */
+ best_x=new_x;
+ }
+
+
+ if (x_to_y) /* HAND BACK ALIGNMENT RECIPROCAL MAPPINGS */
+ *x_to_y = x_al;
+ else
+ free(x_al);
+ if (y_to_x)
+ *y_to_x = y_al;
+ else
+ free(y_al);
+ return;
+}
+
+
+
+
+
+/* FOR CONTROLLING THE FREEING OF score[] ARRAYS */
+typedef struct {
+ LPOLetterRef_T last_right; /* LAST POSITION WHICH REFERENCES THIS POSITION*/
+ LPOLetterRef_T my_pos; /* INDEX OF THIS POSITION */
+} LastRightList_T;
+
+
+/* SORT IN ASCENDING ORDER BY last_right */
+int last_right_qsort_cmp(const void *void_a,const void *void_b)
+{
+ const LastRightList_T *a=(const LastRightList_T *)void_a,
+ *b=(const LastRightList_T *)void_b;
+ if (a->last_right < b->last_right)
+ return -1;
+ if (a->last_right == b->last_right)
+ return 0;
+ else
+ return 1;
+}
+
+
+
+
+LastRightList_T *last_right_list(int len,LPOLetter_T seq[])
+{
+ int i;
+ LastRightList_T *list=NULL;
+ LPOLetterLink_T *right;
+
+ CALLOC(list,len,LastRightList_T);
+ LOOP (i,len) {
+ list[i].last_right=list[i].my_pos=i; /* DEFAULT: NOTHING RIGHT OF THIS*/
+ for (right= &seq[i].right;right && right->ipos>=0;right=right->more)
+ if (right->ipos>list[i].last_right)
+ list[i].last_right=right->ipos;
+ }
+ qsort(list,len,sizeof(LastRightList_T),last_right_qsort_cmp);
+ return list;
+}
+
+
+
+
+
+typedef struct {
+ LPOScore_T *score;
+ LPOGapLength_T *gap_length;
+} LPORowFreeList_T;
+
+
+void free_row_list(int nfree_list,LPORowFreeList_T free_list[])
+{
+/* fprintf(stderr,"Maximum #rows allocated was %d\n\n",nfree_list);*/
+ while (nfree_list-- >0) { /* DUMP EVERYTHING STORED ON THE FREE LIST */
+ free(free_list[nfree_list].score);
+ free(free_list[nfree_list].gap_length);
+ }
+}
+
+
+
+
+
+/** performs partial order alignment:
+ seq_x[] may be a partial order;
+ seq_y[] is assumed to be a linear order (regular sequence);
+ returns the alignment in x_to_y[] and y_to_x, and also
+ returns the alignment score as the return value */
+LPOScore_T align_lpo (LPOSequence_T *lposeq_x,
+ LPOSequence_T *lposeq_y,
+ ResidueScoreMatrix_T *m,
+ LPOLetterRef_T **x_to_y,
+ LPOLetterRef_T **y_to_x,
+ int use_global_alignment)
+{
+ int len_x = lposeq_x->length;
+ int len_y = lposeq_y->length;
+ LPOLetter_T *seq_x = lposeq_x->letter;
+ LPOLetter_T *seq_y = lposeq_y->letter;
+
+ int i,j,j_left,best_x,best_y,nfree_list=0,ilast_right=0;
+ LPOScore_T **score=NULL,*my_score,*my_matrix;
+ LPOMove_T **move=NULL,*move_base=NULL,*my_move;
+ LPOGapLength_T **gap_length=NULL,*my_gap_length;
+ LPOLetterLink_T *left,*my_left;
+ int new_gap_len,insert_x,previous_x,previous_y,
+ i_x,new_score,new_x,new_y,current_gap_length;
+ LPOScore_T match_score,previous_score,
+ insert_x_try,insert_x_score,insert_y_score,best_score= -999999;
+ LPORowFreeList_T *free_list=NULL;
+ LastRightList_T *last_right=NULL;
+
+ LPOScore_T *gap_penalty_x, *gap_penalty_y;
+ double gap_decrement;
+ int L_trunc = m->trunc_gap_length, L_decay = m->decay_gap_length;
+
+ CALLOC (gap_penalty_x, L_trunc + L_decay + 1, LPOScore_T);
+ CALLOC (gap_penalty_y, L_trunc + L_decay + 1, LPOScore_T);
+
+ gap_penalty_x[0] = m->gap_penalty_set[0][0]; /* INITIALIZE GAP PENALTY LOOKUP */
+ for (i=1;i<L_trunc;i++)
+ gap_penalty_x[i] = m->gap_penalty_set[0][1];
+ gap_decrement = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(L_decay + 1));
+ for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+ gap_penalty_x[i] = m->gap_penalty_set[0][1] - (i-L_trunc+1) * gap_decrement;
+ gap_penalty_x[L_trunc + L_decay] = m->gap_penalty_set[0][2]; /* (REDUNDANT) */
+
+ gap_penalty_y[0] = m->gap_penalty_set[1][0]; /* DIFFERENT PENALTY FOR Y-GAP */
+ for (i=1;i<L_trunc;i++)
+ gap_penalty_y[i] = m->gap_penalty_set[1][1];
+ gap_decrement = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(L_decay + 1));
+ for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+ gap_penalty_y[i] = m->gap_penalty_set[1][1] - (i-L_trunc+1) * gap_decrement;
+ gap_penalty_y[L_trunc + L_decay] = m->gap_penalty_set[1][2]; /* (REDUNDANT) */
+
+ CALLOC(score,len_x,LPOScore_T *); /* ALLOCATE MATRIX STORAGE: ROW POINTERS */
+ CALLOC(move,len_x,LPOMove_T *);
+ CALLOC(gap_length,len_x,LPOGapLength_T *);
+ CALLOC(free_list,len_x,LPORowFreeList_T);
+ CALLOC(move_base,len_x*(len_y+1),LPOMove_T); /*ALLOCATE MATRIX RECTANGLE */
+ last_right=last_right_list(len_x,seq_x); /* GET SORTED LIST TO CONTROL FREE*/
+
+ LOOPF (i,len_x) {/* BUILD UP DP MATRIX, ROW BY ROW */
+ if (nfree_list>0) { /* TAKE THE NEW ROW FROM THE FREE LIST */
+ nfree_list--; /* MOVE BACK TO LAST FREE LIST ENTRY */
+ score[i]=free_list[nfree_list].score+1; /* LEAVE SPACE FOR [-1] ENTRY */
+ gap_length[i]=free_list[nfree_list].gap_length+1;
+ }
+ else { /* NEED TO ALLOCATE A NEW ROW */
+ CALLOC(score[i],len_y+1,LPOScore_T);
+ score[i]++; /* LEAVE SPACE FOR [-1] ENTRY */
+ CALLOC(gap_length[i],len_y+1,LPOGapLength_T);
+ gap_length[i]++; /* LEAVE SPACE FOR [-1] ENTRY */
+ }
+ move[i]=move_base+i*(len_y+1)+1; /* LEAVE SPACE FOR [-1] ENTRY */
+ my_move=move[i]; /* USED TO SPEED UP MATRIX ACCESS INSIDE INNER LOOP*/
+ my_score=score[i];
+ my_gap_length=gap_length[i];
+ score[i][-1]= -999999; /* UNACCEPTABLE SCORE ENSURES -1 NEVER CHOSEN*/
+ my_matrix=m->score[seq_x[i].letter];
+ if (seq_x[i].left.ipos>=0) /* AT LEAST ONE VALID POSITION TO THE LEFT */
+ my_left= &seq_x[i].left;
+ else /* THERE IS NO POSITION TO THE LEFT */
+ my_left=NULL;
+ LOOPF (j,len_y) {
+ j_left=SEQ_Y_LEFT(j); /* POSITION TO THE LEFT OF j */
+ previous_score=previous_x=previous_y=0;
+ i_x=1;
+ insert_x_score= -999999;
+ for (left=my_left;left;left=left->more) {
+ if (move[left->ipos][j].x>0) /* COULD BE [X,0] GAP CONTINUATION */
+ current_gap_length=gap_length[left->ipos][j];
+ else/*NOT AN EXTENSION OF A [X,0] GAP, SO TREAT AS START OF NEW GAP*/
+ current_gap_length=0;
+ insert_x_try=score[left->ipos][j] /* FIND BEST [X,0] MOVE */
+ + left->score /* INCLUDE WEIGHTING FROM THIS EDGE */
+ - gap_penalty_x[current_gap_length];
+ if (insert_x_try>insert_x_score) { /* IF BEST insert_x MOVE, SAVE*/
+ insert_x=i_x;
+ insert_x_score=insert_x_try;
+ new_gap_len=current_gap_length+1;
+ if (new_gap_len>L_trunc+L_decay) /* PREVENT OVERFLOW */
+ new_gap_len=L_trunc+L_decay;
+ }
+
+ /* FIND BEST [X,1] MOVE */
+ if (score[left->ipos][j_left]+left->score>previous_score) {
+ previous_score=score[left->ipos][j_left]+left->score;
+ previous_x=i_x;
+ previous_y=1; /* ASSUMING seq_y JUST LINEAR SEQUENCE */
+ }
+ i_x++; /* ADVANCE X MOVE INDEX */
+ } /* DONE SCANNING PREDECESSORS ON X */
+
+ match_score = previous_score /* TAKE BEST PREDECESSOR */
+ + my_matrix[seq_y[j].letter];
+
+ if (match_score>insert_x_score) { /* PREFER [X,1] MOVE */
+ new_score=match_score;
+ new_x=previous_x;
+ new_y=previous_y;
+ new_gap_len=0;
+ }
+ else { /* PREFER [X,0] MOVE */
+ new_score=insert_x_score;
+ new_x=insert_x;
+ new_y=0;
+ }
+
+ /* [0,1] MOVE */
+ if (my_move[j_left].y==1) /* COULD BE [0,1] GAP CONTINUATION */
+ current_gap_length=my_gap_length[j_left];
+ else /* NOT AN EXTENSION OF A [0,1] GAP, SO TREAT AS START OF NEW GAP*/
+ current_gap_length=0;
+ insert_y_score=my_score[j_left]-gap_penalty_y[current_gap_length];
+
+ if (insert_y_score<new_score) { /* [new_x,new_y] MOVE IS BEST */
+ my_score[j]=new_score;
+ my_move[j].x=new_x;
+ my_move[j].y=new_y;
+ my_gap_length[j]=new_gap_len;
+ if (new_gap_len==0)
+ my_move[j].is_aligned=1;
+ }
+ else { /* [0,1] MOVE IS BEST */
+ my_score[j]=insert_y_score;
+ my_move[j].x=0;
+ my_move[j].y=1;
+ my_gap_length[j]=current_gap_length+1;
+ if (my_gap_length[j]>L_trunc+L_decay) /* PREVENT OVERFLOW */
+ my_gap_length[j]=L_trunc+L_decay;
+ }
+ if (my_score[j]>best_score) { /* RECORD BEST MOVE */
+ best_score=my_score[j];
+ best_x=i;
+ best_y=j;
+ }
+ }
+ while (ilast_right<len_x && last_right[ilast_right].last_right<=i) {
+ free_list[nfree_list].gap_length= /* PUSH ROW ONTO FREE LIST FOR REUSE*/
+ gap_length[last_right[ilast_right].my_pos]-1;/*READJUST TO BASE ADDR!*/
+ free_list[nfree_list].score=score[last_right[ilast_right].my_pos]-1;
+ score[last_right[ilast_right].my_pos]=NULL; /*PREVENT DANGLING POINTER!*/
+ gap_length[last_right[ilast_right].my_pos]=NULL;
+ nfree_list++; /* INCREMENT FREE LIST SIZE */
+ ilast_right++; /* MOVE TO THE NEXT POTENTIAL ROW FOR FREEING */
+ }
+ } /* DYNAMIC PROGRAMING MATRIX COMPLETE, NOW TRACE BACK FROM best_x,best_y*/
+
+ IF_GUARD(best_x>=len_x || best_y>=len_y,1.1,(ERRTXT,"Bounds exceeded!\nbest_x,best_y:%d,%d\tlen:%d,%d\n",best_x,best_y,len_x,len_y),CRASH);
+ trace_back_lpo_alignment(len_x,seq_x,len_y,seq_y,move,best_x,best_y,
+ x_to_y,y_to_x);
+
+
+ FREE (gap_penalty_x);
+ FREE (gap_penalty_y);
+
+ free_row_list(nfree_list,free_list);
+ FREE(move_base); /* DUMP ALLOCATED MATRIX */
+ FREE(score);
+ FREE(move);
+ FREE(gap_length);
+ FREE(free_list);
+ FREE(last_right);
+ return best_score;
+}
+
+
+
+
+
Added: trunk/packages/poa/branches/upstream/current/align_lpo2.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/align_lpo2.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/align_lpo2.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,389 @@
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+/** set nonzero for old scoring (gap-opening penalty for X-Y transition) */
+#define DOUBLE_GAP_SCORING (0)
+
+
+typedef struct {
+ unsigned char x:7;
+ unsigned char y:1;
+}
+DPMove_T;
+
+typedef struct {
+ LPOScore_T score;
+ short gap_x, gap_y;
+}
+DPScore_T;
+
+/** is node 'i' the first node in any sequence in lposeq? */
+static int is_initial_node (int i, LPOSequence_T *lposeq)
+{
+ LPOLetterSource_T *src = &((lposeq->letter[i]).source);
+ while (src != NULL && src->iseq >= 0) {
+ if (src->ipos == 0) {
+ return 1;
+ }
+ src = src->more;
+ }
+ return 0;
+}
+
+/** is node 'i' the last node in any sequence in lposeq? */
+static int is_final_node (int i, LPOSequence_T *lposeq)
+{
+ LPOLetterSource_T *src = &((lposeq->letter[i]).source);
+ while (src != NULL && src->iseq >= 0) {
+ if (src->ipos == (lposeq->source_seq[src->iseq]).length - 1) {
+ return 1;
+ }
+ src = src->more;
+ }
+ return 0;
+}
+
+
+static void get_seq_left_and_final (LPOSequence_T *lposeq_x,
+ LPOLetterLink_T ***x_left_ptr,
+ int **is_final_node_ptr)
+{
+ int i, len_x = lposeq_x->length;
+ LPOLetter_T *seq_x = lposeq_x->letter;
+ int *is_final_node_x = NULL;
+ LPOLetterLink_T **x_left = NULL;
+
+ CALLOC (is_final_node_x, len_x, int);
+
+ for (i=0; i<len_x; i++) {
+ is_final_node_x[i] = is_final_node (i, lposeq_x);
+ }
+
+ CALLOC (x_left, len_x, LPOLetterLink_T *);
+
+ for (i=0; i<len_x; i++) {
+ if (is_initial_node (i, lposeq_x) && seq_x[i].left.ipos != -1) {
+ CALLOC (x_left[i], 1, LPOLetterLink_T);
+ x_left[i]->ipos = -1;
+ x_left[i]->score = 0;
+ x_left[i]->more = &seq_x[i].left;
+ }
+ else {
+ x_left[i] = &seq_x[i].left;
+ }
+ }
+
+ (*is_final_node_ptr) = is_final_node_x;
+ (*x_left_ptr) = x_left;
+}
+
+
+static void trace_back_lpo_alignment (int len_x, int len_y,
+ DPMove_T **move,
+ LPOLetterLink_T **x_left,
+ LPOLetterRef_T best_x, LPOLetterRef_T best_y,
+ LPOLetterRef_T **x_to_y,
+ LPOLetterRef_T **y_to_x)
+{
+ int i, xmove, ymove;
+ LPOLetterRef_T *x_al = NULL, *y_al = NULL;
+ LPOLetterLink_T *left;
+
+ CALLOC (x_al, len_x, LPOLetterRef_T);
+ CALLOC (y_al, len_y, LPOLetterRef_T);
+ LOOP (i,len_x) x_al[i] = INVALID_LETTER_POSITION;
+ LOOP (i,len_y) y_al[i] = INVALID_LETTER_POSITION;
+
+ while (best_x >= 0 && best_y >= 0) {
+
+ xmove = move[best_y][best_x].x;
+ ymove = move[best_y][best_x].y;
+
+ if (xmove>0 && ymove>0) { /* ALIGNED! MAP best_x <--> best_y */
+ x_al[best_x]=best_y;
+ y_al[best_y]=best_x;
+ }
+
+ if (xmove == 0 && ymove == 0) { /* FIRST ALIGNED PAIR */
+ x_al[best_x]=best_y;
+ y_al[best_y]=best_x;
+ break; /* FOUND START OF ALIGNED REGION, SO WE'RE DONE */
+ }
+
+ if (xmove>0) { /* TRACE BACK ON X */
+ left = x_left[best_x];
+ while ((--xmove)>0) {
+ left = left->more;
+ }
+ best_x = left->ipos;
+ }
+
+ if (ymove>0) { /* TRACE BACK ON Y */
+ best_y--;
+ }
+ }
+
+ if (x_to_y) /* HAND BACK ALIGNMENT RECIPROCAL MAPPINGS */
+ *x_to_y = x_al;
+ else
+ free(x_al);
+ if (y_to_x)
+ *y_to_x = y_al;
+ else
+ free(y_al);
+
+ return;
+}
+
+
+/** performs partial order alignment:
+ lposeq_x may be a partial order;
+ lposeq_y is assumed to be a linear order (regular sequence);
+ returns the alignment in x_to_y[] and y_to_x[], and also
+ returns the alignment score as the return value.
+*/
+
+LPOScore_T align_lpo (LPOSequence_T *lposeq_x,
+ LPOSequence_T *lposeq_y,
+ ResidueScoreMatrix_T *m,
+ LPOLetterRef_T **x_to_y,
+ LPOLetterRef_T **y_to_x,
+ int use_global_alignment)
+{
+ int len_x = lposeq_x->length;
+ int len_y = lposeq_y->length;
+ LPOLetter_T *seq_x = lposeq_x->letter;
+ LPOLetter_T *seq_y = lposeq_y->letter;
+
+ int i, j, xcount, prev_gap, next_gap;
+ int best_x = -2, best_y = -2;
+ LPOScore_T best_score = -999999;
+ int *is_final_node_x;
+ LPOLetterLink_T **x_left = NULL, *xl;
+ DPMove_T **move = NULL, *my_move;
+
+ DPScore_T *curr_score = NULL, *prev_score = NULL, *init_col_score = NULL, *my_score, *swap;
+
+ LPOScore_T *gap_penalty_x, *gap_penalty_y;
+ double gap_decrement;
+ int *next_gap_array, *next_perp_gap_array;
+ int L_trunc = m->trunc_gap_length, L_decay = m->decay_gap_length;
+
+ LPOScore_T try_score, insert_x_score, insert_y_score, match_score;
+ int insert_x_x, insert_x_gap;
+ int insert_y_y, insert_y_gap;
+ int match_x, match_y;
+
+ long n_edges = 0;
+
+ CALLOC (gap_penalty_x, L_trunc + L_decay + 2, LPOScore_T);
+ CALLOC (gap_penalty_y, L_trunc + L_decay + 2, LPOScore_T);
+ CALLOC (next_gap_array, L_trunc + L_decay + 2, int);
+ CALLOC (next_perp_gap_array, L_trunc + L_decay + 2, int);
+
+ gap_penalty_x[0] = m->gap_penalty_set[0][0]; /* INITIALIZE GAP PENALTY LOOKUP */
+ for (i=1;i<L_trunc;i++)
+ gap_penalty_x[i] = m->gap_penalty_set[0][1];
+ gap_decrement = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(L_decay + 1));
+ for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+ gap_penalty_x[i] = m->gap_penalty_set[0][1] - (i-L_trunc+1) * gap_decrement;
+ gap_penalty_x[L_trunc + L_decay] = m->gap_penalty_set[0][2]; /* (REDUNDANT) */
+ gap_penalty_x[L_trunc + L_decay + 1] = 0; /* INITIAL STATE FOR LOCAL ALIGNMENT */
+
+ gap_penalty_y[0] = m->gap_penalty_set[1][0]; /* DIFFERENT PENALTY FOR Y-GAP */
+ for (i=1;i<L_trunc;i++)
+ gap_penalty_y[i] = m->gap_penalty_set[1][1];
+ gap_decrement = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(L_decay + 1));
+ for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+ gap_penalty_y[i] = m->gap_penalty_set[1][1] - (i-L_trunc+1) * gap_decrement;
+ gap_penalty_y[L_trunc + L_decay] = m->gap_penalty_set[1][2]; /* (REDUNDANT) */
+ gap_penalty_y[L_trunc + L_decay + 1] = 0; /* INITIAL STATE FOR LOCAL ALIGNMENT */
+
+ for (i=0; i<L_trunc+L_decay+2; i++) {
+ /* GAP LENGTH EXTENSION RULE: */
+ /* 0->1, 1->2, 2->3, ..., M-1->M, M->M, M+1->M+1. */
+ next_gap_array[i] = (i < L_trunc+L_decay) ? i+1 : i;
+ next_perp_gap_array[i] = (DOUBLE_GAP_SCORING && i < L_trunc+L_decay+1) ? 0 : next_gap_array[i];
+ }
+
+ get_seq_left_and_final (lposeq_x, &x_left, &is_final_node_x);
+
+ CALLOC (move, len_y, DPMove_T *);
+ for (i=0; i<len_y; i++) {
+ CALLOC (move[i], len_x, DPMove_T);
+ }
+
+ CALLOC (init_col_score, len_y+1, DPScore_T);
+ init_col_score = &(init_col_score[1]);
+
+ CALLOC (curr_score, len_x+1, DPScore_T);
+ curr_score = &(curr_score[1]);
+ CALLOC (prev_score, len_x+1, DPScore_T);
+ prev_score = &(prev_score[1]);
+
+ /* FILL INITIAL ROW. */
+ /* GLOBAL ALIGNMENT: no free gaps */
+ /* LOCAL ALIGNMENT: free initial gap */
+
+ curr_score[-1].score = 0;
+ curr_score[-1].gap_x = curr_score[-1].gap_y =
+ (use_global_alignment) ? 0 : TRUNCATE_GAP_LENGTH+1;
+
+ for (i=0; i<len_x; i++) {
+ for (xcount = 1, xl = x_left[i]; xl != NULL; xcount++, xl = xl->more) {
+ prev_gap = curr_score[xl->ipos].gap_x;
+ try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
+ if (xcount == 1 || try_score > curr_score[i].score) {
+ curr_score[i].score = try_score;
+ curr_score[i].gap_x = next_gap_array[prev_gap];
+ curr_score[i].gap_y = next_perp_gap_array[prev_gap];
+ }
+ }
+ }
+
+ /* FILL INITIAL COLUMN. */
+
+ init_col_score[-1] = curr_score[-1];
+ for (i=0; i<len_y; i++) {
+ prev_gap = init_col_score[i-1].gap_y;
+ init_col_score[i].score = init_col_score[i-1].score - gap_penalty_y[prev_gap];
+ init_col_score[i].gap_x = next_perp_gap_array[prev_gap];
+ init_col_score[i].gap_y = next_gap_array[prev_gap];
+ }
+
+
+ /** MAIN DYNAMIC PROGRAMMING LOOP **/
+
+
+ /* OUTER LOOP (i-th position in linear seq y): */
+ for (i=0; i<len_y; i++) {
+
+ swap = prev_score; prev_score = curr_score; curr_score = swap;
+
+ curr_score[-1] = init_col_score[i];
+
+ /* INNER LOOP (j-th position in LPO x): */
+ for (j=0; j<len_x; j++) {
+
+ /* ONLY POSSIBLE Y-INSERTION: trace back to (i-1, j). */
+ prev_gap = prev_score[j].gap_y;
+ insert_y_score = prev_score[j].score - gap_penalty_y[prev_gap];
+ insert_y_gap = prev_gap;
+
+ /* ONLY ONE Y-PREDECESSOR */
+ match_y = insert_y_y = 1;
+
+ /* LOOP OVER x-predecessors: */
+ for (xcount = 1, xl = x_left[j]; xl != NULL; xcount++, xl = xl->more) {
+
+ /* IMPROVE XY-MATCH?: trace back to (i-1, j'=xl->ipos) */
+ try_score = prev_score[xl->ipos].score + xl->score;
+ if (xcount == 1 || try_score > match_score) {
+ match_score = try_score;
+ match_x = xcount;
+ }
+
+ /* IMPROVE X-INSERTION?: trace back to (i, j'=xl->ipos) */
+ prev_gap = curr_score[xl->ipos].gap_x;
+ try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
+ if (xcount == 1 || try_score > insert_x_score) {
+ insert_x_score = try_score;
+ insert_x_x = xcount;
+ insert_x_gap = prev_gap;
+ }
+ }
+
+ if (0 == use_global_alignment && match_score <= 0) {
+ match_score = 0;
+ match_x = match_y = 0; /* FIRST ALIGNED PAIR */
+ }
+
+ n_edges += (xcount-1);
+ match_score += m->score[(int)seq_x[j].letter][(int)seq_y[i].letter];
+
+ my_score = &curr_score[j];
+ my_move = &move[i][j];
+
+ if (match_score > insert_y_score && match_score > insert_x_score) {
+ /* XY-MATCH */
+ my_score->score = match_score;
+ my_score->gap_x = 0;
+ my_score->gap_y = 0;
+ my_move->x = match_x;
+ my_move->y = match_y;
+ }
+ else if (insert_x_score > insert_y_score) {
+ /* X-INSERTION */
+ my_score->score = insert_x_score;
+ my_score->gap_x = next_gap_array[insert_x_gap];
+ my_score->gap_y = next_perp_gap_array[insert_x_gap];
+ my_move->x = insert_x_x;
+ my_move->y = 0;
+ }
+ else {
+ /* Y-INSERTION */
+ my_score->score = insert_y_score;
+ my_score->gap_x = next_perp_gap_array[insert_y_gap];
+ my_score->gap_y = next_gap_array[insert_y_gap];
+ my_move->x = 0;
+ my_move->y = insert_y_y;
+ }
+
+ /* RECORD BEST START FOR TRACEBACK */
+ /* KEEPING ONLY FINAL-FINAL BESTS FOR GLOBAL ALIGNMENT */
+ if (my_score->score >= best_score && (0 == use_global_alignment || (is_final_node_x[j] && i==len_y-1))) {
+ if (my_score->score > best_score || (j == best_x && i < best_y) || j < best_x) {
+ best_score = my_score->score;
+ best_x = j;
+ best_y = i;
+ }
+ }
+ }
+ }
+
+ IF_GUARD(best_x>=len_x || best_y>=len_y,1.1,(ERRTXT,"Bounds exceeded!\nbest_x,best_y:%d,%d\tlen:%d,%d\n",best_x,best_y,len_x,len_y),CRASH);
+
+ /*
+ fprintf (stderr, "aligned (%d nodes, %ld edges) to (%d nodes)\n", len_x, n_edges/len_y, len_y);
+ fprintf (stderr, "best score %d @ (%d %d)\n", best_score, best_x, best_y);
+ */
+
+ /* DYNAMIC PROGRAMING MATRIX COMPLETE, NOW TRACE BACK FROM best_x, best_y */
+ trace_back_lpo_alignment (len_x, len_y, move, x_left,
+ best_x, best_y,
+ x_to_y, y_to_x);
+
+
+ FREE (gap_penalty_x);
+ FREE (gap_penalty_y);
+ FREE (next_gap_array);
+ FREE (next_perp_gap_array);
+
+ prev_score = &(prev_score[-1]);
+ FREE (prev_score);
+ curr_score = &(curr_score[-1]);
+ FREE (curr_score);
+
+ init_col_score = &(init_col_score[-1]);
+ FREE (init_col_score);
+
+ FREE (is_final_node_x);
+
+ for (i=0; i<len_x; i++) {
+ if (x_left[i] != NULL && x_left[i] != &seq_x[i].left) {
+ FREE (x_left[i]);
+ }
+ }
+ FREE (x_left);
+
+ for (i=0; i<len_y; i++) {
+ FREE (move[i]);
+ }
+ FREE (move);
+
+ return best_score;
+}
Added: trunk/packages/poa/branches/upstream/current/align_lpo_po.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/align_lpo_po.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/align_lpo_po.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,272 @@
+
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+
+
+
+typedef struct {
+ unsigned char x;
+ unsigned char y:7;
+ unsigned char is_aligned:1;
+} LPOMove_T;
+
+
+typedef unsigned char LPOGapLength_T;
+
+
+
+
+
+void trace_back_lpo_po_alignment(int len_x,LPOLetter_T seq_x[],
+ int len_y,LPOLetter_T seq_y[],
+ LPOMove_T **move,
+ LPOLetterRef_T best_x,
+ LPOLetterRef_T best_y,
+ LPOLetterRef_T **x_to_y,
+ LPOLetterRef_T **y_to_x)
+{
+ int i;
+ LPOLetterRef_T new_x,*x_al=NULL,*y_al=NULL;
+ LPOLetterLink_T *left;
+
+
+ CALLOC(x_al,len_x,LPOLetterRef_T);
+ CALLOC(y_al,len_y,LPOLetterRef_T);
+ LOOP (i,len_x) x_al[i]= INVALID_LETTER_POSITION;
+ LOOP (i,len_y) y_al[i]= INVALID_LETTER_POSITION;
+
+ while (best_x>=0 && best_y>=0) {
+ if (move[best_x][best_y].is_aligned) {/* ALIGNED! MAP best_x <--> best_y */
+ x_al[best_x]=best_y;
+ y_al[best_y]=best_x;
+ }
+
+ if (0==move[best_x][best_y].x /* HIT START OF THE ALIGNMENT, SO QUIT */
+ && 0==move[best_x][best_y].y)
+ break;
+
+ if ((i=move[best_x][best_y].x)>0) { /* TRACE BACK ON X */
+ for (left= &seq_x[best_x].left;--i >0;left=left->more); /* USE iTH MOVE*/
+ new_x = left->ipos;
+ }
+ else new_x=best_x;
+ if ((i=move[best_x][best_y].y)>0) { /* TRACE BACK ON Y */
+ for (left= &seq_y[best_y].left;--i >0;left=left->more); /* USE iTH MOVE*/
+ best_y = left->ipos;
+ }
+ best_x=new_x;
+ }
+
+
+ if (x_to_y) /* HAND BACK ALIGNMENT RECIPROCAL MAPPINGS */
+ *x_to_y = x_al;
+ else
+ free(x_al);
+ if (y_to_x)
+ *y_to_x = y_al;
+ else
+ free(y_al);
+ return;
+}
+
+
+
+/** performs partial order alignment:
+ seq_x[] may be a partial order;
+ seq_y[] may be a partial order;
+ returns the alignment in x_to_y[] and y_to_x, and also
+ returns the alignment score as the return value */
+LPOScore_T align_lpo_po (LPOSequence_T *lposeq_x,
+ LPOSequence_T *lposeq_y,
+ ResidueScoreMatrix_T *m,
+ LPOLetterRef_T **x_to_y,
+ LPOLetterRef_T **y_to_x,
+ LPOScore_T (*scoring_function)
+ (int,int,LPOLetter_T [],LPOLetter_T [],ResidueScoreMatrix_T *),
+ int use_global_alignment)
+{
+ int len_x = lposeq_x->length;
+ int len_y = lposeq_y->length;
+ LPOLetter_T *seq_x = lposeq_x->letter;
+ LPOLetter_T *seq_y = lposeq_y->letter;
+
+ int i,j,j_left,best_x,best_y,nfree_list=0,ilast_right=0;
+ LPOScore_T **score=NULL,*my_score,*my_matrix,*score_base=NULL;
+ LPOMove_T **move=NULL,*move_base=NULL,*my_move;
+ LPOGapLength_T **gap_length=NULL,*my_gap_length,*gap_length_base=NULL;
+ LPOLetterLink_T *left,*my_left,*y_left;
+ int new_gap_len,insert_x,previous_x,previous_y,
+ i_x,new_score,new_x,new_y,current_gap_length,i_y,insert_y;
+ LPOScore_T match_score,previous_score,
+ insert_x_try,insert_x_score,insert_y_score,best_score= -999999;
+
+ LPOScore_T *gap_penalty_x, *gap_penalty_y;
+ double gap_decrement;
+ int L_trunc = m->trunc_gap_length, L_decay = m->decay_gap_length;
+
+ CALLOC (gap_penalty_x, L_trunc + L_decay + 1, LPOScore_T);
+ CALLOC (gap_penalty_y, L_trunc + L_decay + 1, LPOScore_T);
+
+ gap_penalty_x[0] = m->gap_penalty_set[0][0]; /* INITIALIZE GAP PENALTY LOOKUP */
+ for (i=1;i<L_trunc;i++)
+ gap_penalty_x[i] = m->gap_penalty_set[0][1];
+ gap_decrement = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(L_decay + 1));
+ for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+ gap_penalty_x[i] = m->gap_penalty_set[0][1] - (i-L_trunc+1) * gap_decrement;
+ gap_penalty_x[L_trunc + L_decay] = m->gap_penalty_set[0][2]; /* (REDUNDANT) */
+
+ gap_penalty_y[0] = m->gap_penalty_set[1][0]; /* DIFFERENT PENALTY FOR Y-GAP */
+ for (i=1;i<L_trunc;i++)
+ gap_penalty_y[i] = m->gap_penalty_set[1][1];
+ gap_decrement = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(L_decay + 1));
+ for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+ gap_penalty_y[i] = m->gap_penalty_set[1][1] - (i-L_trunc+1) * gap_decrement;
+ gap_penalty_y[L_trunc + L_decay] = m->gap_penalty_set[1][2]; /* (REDUNDANT) */
+
+ CALLOC(score,len_x,LPOScore_T *); /* ALLOCATE MATRIX STORAGE: ROW POINTERS */
+ CALLOC(move,len_x,LPOMove_T *);
+ CALLOC(gap_length,len_x,LPOGapLength_T *);
+ CALLOC(score_base,len_x*(len_y+1),LPOScore_T); /*ALLOCATE MATRIX RECTANGLE */
+ CALLOC(move_base,len_x*(len_y+1),LPOMove_T); /*ALLOCATE MATRIX RECTANGLE */
+ CALLOC(gap_length_base,len_x*(len_y+1),LPOGapLength_T); /*ALLOCATE MATRIX RECTANGLE */
+
+
+ LOOPF (i,len_x) {/* BUILD UP DP MATRIX, ROW BY ROW */
+ score[i]=score_base+i*(len_y+1)+1; /* LEAVE SPACE FOR [-1] ENTRY */
+ move[i]=move_base+i*(len_y+1)+1; /* LEAVE SPACE FOR [-1] ENTRY */
+ gap_length[i]=gap_length_base+i*(len_y+1)+1; /*LEAVE SPACE FOR [-1] ENTRY*/
+ my_move=move[i]; /* USED TO SPEED UP MATRIX ACCESS INSIDE INNER LOOP*/
+ my_score=score[i];
+ my_gap_length=gap_length[i];
+ score[i][-1]= -999999; /* UNACCEPTABLE SCORE ENSURES -1 NEVER CHOSEN*/
+ /* my_matrix=m->score[seq_x[i].letter]; NOT USED */
+ if (seq_x[i].left.ipos>=0) /* AT LEAST ONE VALID POSITION TO THE LEFT */
+ my_left= &seq_x[i].left;
+ else /* THERE IS NO POSITION TO THE LEFT */
+ my_left=NULL;
+ LOOPF (j,len_y) {
+ j_left=SEQ_Y_LEFT(j); /* POSITION TO THE LEFT OF j */
+ previous_score=previous_x=previous_y=0;
+ i_x=1;
+ insert_x_score= -999999;
+ for (left=my_left;left;left=left->more) {
+ if (move[left->ipos][j].x>0) /* COULD BE [X,0] GAP CONTINUATION */
+ current_gap_length=gap_length[left->ipos][j];
+ else/*NOT AN EXTENSION OF A [X,0] GAP, SO TREAT AS START OF NEW GAP*/
+ current_gap_length=0;
+ insert_x_try=score[left->ipos][j] /* FIND BEST [X,0] MOVE */
+ + left->score /* INCLUDE WEIGHTING FROM THIS EDGE */
+ - gap_penalty_x[current_gap_length];
+ if (insert_x_try>insert_x_score) { /* IF BEST insert_x MOVE, SAVE*/
+ insert_x=i_x;
+ insert_x_score=insert_x_try;
+ new_gap_len=current_gap_length+1;
+ if (new_gap_len>TRUNCATE_GAP_LENGTH) /* PREVENT OVERFLOW */
+ new_gap_len=TRUNCATE_GAP_LENGTH;
+ }
+
+ /* FIND BEST [X,Y] MOVE */
+ if (seq_y[j].left.ipos>=0){/*AT LEAST ONE VALID POSITION TO THE LEFT*/
+ i_y=1;
+ for (y_left= &seq_y[j].left;y_left;y_left=y_left->more) {
+ if (score[left->ipos][y_left->ipos] + left->score + y_left->score
+ >previous_score) {
+ previous_score=score[left->ipos][y_left->ipos]
+ + left->score + y_left->score;
+ previous_x=i_x;
+ previous_y=i_y;
+ }
+ i_y++;
+ }
+ }
+ i_x++; /* ADVANCE X MOVE INDEX */
+ } /* DONE SCANNING PREDECESSORS ON X */
+
+ match_score = previous_score /* TAKE BEST PREDECESSOR */
+ + scoring_function(i,j,seq_x,seq_y,m);
+#ifdef USE_LOCAL_NEUTRALITY_CORRECTION /* NO LONGER USED */
+ if (seq_x[i].score<seq_y[j].score) /* USE STRONGEST NEUTRALITY ADJ'T*/
+ match_score += seq_x[i].score;
+ else
+ match_score += seq_y[j].score;
+#endif
+ if (match_score>insert_x_score) { /* PREFER [X,Y] MOVE */
+ new_score=match_score;
+ new_x=previous_x;
+ new_y=previous_y;
+ new_gap_len=0;
+ }
+ else { /* PREFER [X,0] MOVE */
+ new_score=insert_x_score;
+ new_x=insert_x;
+ new_y=0;
+ }
+
+ /* [0,Y] MOVE */
+ insert_y_score= -999999;
+ if (seq_y[j].left.ipos>=0){/*AT LEAST ONE VALID POSITION TO THE LEFT*/
+ i_y=1;
+ for (y_left= &seq_y[j].left;y_left;y_left=y_left->more) {
+ if (my_move[y_left->ipos].y>0) /* COULD BE [0,1] GAP CONTINUATION */
+ current_gap_length=my_gap_length[y_left->ipos];
+ else/*NOT AN EXTENSION OF A [0,1] GAP, SO TREAT AS START OF NEW GAP*/
+ current_gap_length=0;
+ if (insert_y_score <
+ my_score[y_left->ipos]-gap_penalty_y[current_gap_length]) {
+ insert_y_score=my_score[y_left->ipos]-gap_penalty_y[current_gap_length];
+ insert_y=i_y;
+ }
+ i_y++;
+ }
+ }
+
+ if (insert_y_score<new_score) { /* [new_x,new_y] MOVE IS BEST */
+ my_score[j]=new_score;
+ my_move[j].x=new_x;
+ my_move[j].y=new_y;
+ my_gap_length[j]=new_gap_len;
+ if (new_gap_len==0)
+ my_move[j].is_aligned=1;
+ }
+ else { /* [0,Y] MOVE IS BEST */
+ my_score[j]=insert_y_score;
+ my_move[j].x=0;
+ my_move[j].y=insert_y;
+ my_gap_length[j]=current_gap_length+1;
+ if (my_gap_length[j]>TRUNCATE_GAP_LENGTH) /* PREVENT OVERFLOW */
+ my_gap_length[j]=TRUNCATE_GAP_LENGTH;
+ }
+ if (my_score[j]>best_score) { /* RECORD BEST MOVE */
+ best_score=my_score[j];
+ best_x=i;
+ best_y=j;
+ }
+ }
+ } /* DYNAMIC PROGRAMING MATRIX COMPLETE, NOW TRACE BACK FROM best_x,best_y*/
+
+ IF_GUARD(best_x>=len_x || best_y>=len_y,1.1,(ERRTXT,"Bounds exceeded!\nbest_x,best_y:%d,%d\tlen:%d,%d\n",best_x,best_y,len_x,len_y),CRASH);
+ trace_back_lpo_po_alignment(len_x,seq_x,len_y,seq_y,move,best_x,best_y,
+ x_to_y,y_to_x);
+
+ FREE (gap_penalty_x);
+ FREE (gap_penalty_y);
+
+ FREE(score_base); /* FREE MEMORY */
+ FREE(move_base);
+ FREE(gap_length_base);
+ FREE(score);
+ FREE(move);
+ FREE(gap_length);
+ return best_score;
+}
+
+
+
+
+
Added: trunk/packages/poa/branches/upstream/current/align_lpo_po2.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/align_lpo_po2.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/align_lpo_po2.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,427 @@
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+/** set nonzero for old scoring (gap-opening penalty for X-Y transition) */
+#define DOUBLE_GAP_SCORING (0)
+
+
+typedef struct {
+ unsigned char x;
+ unsigned char y;
+}
+DPMove_T;
+
+typedef struct {
+ LPOScore_T score;
+ short gap_x, gap_y;
+}
+DPScore_T;
+
+/** is node 'i' the first node in any sequence in lposeq? */
+static int is_initial_node (int i, LPOSequence_T *lposeq)
+{
+ LPOLetterSource_T *src = &((lposeq->letter[i]).source);
+ while (src != NULL && src->iseq >= 0) {
+ if (src->ipos == 0) {
+ return 1;
+ }
+ src = src->more;
+ }
+ return 0;
+}
+
+/** is node 'i' the last node in any sequence in lposeq? */
+static int is_final_node (int i, LPOSequence_T *lposeq)
+{
+ LPOLetterSource_T *src = &((lposeq->letter[i]).source);
+ while (src != NULL && src->iseq >= 0) {
+ if (src->ipos == (lposeq->source_seq[src->iseq]).length - 1) {
+ return 1;
+ }
+ src = src->more;
+ }
+ return 0;
+}
+
+
+static void get_seq_left_and_final (LPOSequence_T *lposeq_x,
+ LPOLetterLink_T ***x_left_ptr,
+ int **is_final_node_ptr)
+{
+ int i, len_x = lposeq_x->length;
+ LPOLetter_T *seq_x = lposeq_x->letter;
+ int *is_final_node_x = NULL;
+ LPOLetterLink_T **x_left = NULL;
+
+ CALLOC (is_final_node_x, len_x, int);
+
+ for (i=0; i<len_x; i++) {
+ is_final_node_x[i] = is_final_node (i, lposeq_x);
+ }
+
+ CALLOC (x_left, len_x, LPOLetterLink_T *);
+
+ for (i=0; i<len_x; i++) {
+ if (is_initial_node (i, lposeq_x) && seq_x[i].left.ipos != -1) {
+ CALLOC (x_left[i], 1, LPOLetterLink_T);
+ x_left[i]->ipos = -1;
+ x_left[i]->score = 0;
+ x_left[i]->more = &seq_x[i].left;
+ }
+ else {
+ x_left[i] = &seq_x[i].left;
+ }
+ }
+
+ (*is_final_node_ptr) = is_final_node_x;
+ (*x_left_ptr) = x_left;
+}
+
+
+static void trace_back_lpo_alignment (int len_x, int len_y,
+ DPMove_T **move,
+ LPOLetterLink_T **x_left,
+ LPOLetterLink_T **y_left,
+ LPOLetterRef_T best_x, LPOLetterRef_T best_y,
+ LPOLetterRef_T **x_to_y,
+ LPOLetterRef_T **y_to_x)
+{
+ int i, xmove, ymove;
+ LPOLetterRef_T *x_al = NULL, *y_al = NULL;
+ LPOLetterLink_T *left;
+
+ CALLOC (x_al, len_x, LPOLetterRef_T);
+ CALLOC (y_al, len_y, LPOLetterRef_T);
+ LOOP (i,len_x) x_al[i] = INVALID_LETTER_POSITION;
+ LOOP (i,len_y) y_al[i] = INVALID_LETTER_POSITION;
+
+ while (best_x >= 0 && best_y >= 0) {
+
+ xmove = move[best_y][best_x].x;
+ ymove = move[best_y][best_x].y;
+
+ if (xmove>0 && ymove>0) { /* ALIGNED! MAP best_x <--> best_y */
+ x_al[best_x]=best_y;
+ y_al[best_y]=best_x;
+ }
+
+ if (xmove == 0 && ymove == 0) { /* FIRST ALIGNED PAIR */
+ x_al[best_x]=best_y;
+ y_al[best_y]=best_x;
+ break; /* FOUND START OF ALIGNED REGION, SO WE'RE DONE */
+ }
+
+ if (xmove>0) { /* TRACE BACK ON X */
+ left = x_left[best_x];
+ while ((--xmove)>0) {
+ left = left->more;
+ }
+ best_x = left->ipos;
+ }
+
+ if (ymove>0) { /* TRACE BACK ON Y */
+ left = y_left[best_y];
+ while ((--ymove)>0) {
+ left = left->more;
+ }
+ best_y = left->ipos;
+ }
+ }
+
+ if (x_to_y) /* HAND BACK ALIGNMENT RECIPROCAL MAPPINGS */
+ *x_to_y = x_al;
+ else
+ free(x_al);
+ if (y_to_x)
+ *y_to_x = y_al;
+ else
+ free(y_al);
+
+ return;
+}
+
+
+/** performs partial order alignment:
+ lposeq_x and lposeq_y are partial orders;
+ returns the alignment in x_to_y[] and y_to_x[], and also
+ returns the alignment score as the return value.
+*/
+
+LPOScore_T align_lpo_po (LPOSequence_T *lposeq_x,
+ LPOSequence_T *lposeq_y,
+ ResidueScoreMatrix_T *m,
+ LPOLetterRef_T **x_to_y,
+ LPOLetterRef_T **y_to_x,
+ LPOScore_T (*scoring_function)
+ (int, int, LPOLetter_T *, LPOLetter_T *, ResidueScoreMatrix_T *),
+ int use_global_alignment)
+{
+ int len_x = lposeq_x->length;
+ int len_y = lposeq_y->length;
+ LPOLetter_T *seq_x = lposeq_x->letter;
+ LPOLetter_T *seq_y = lposeq_y->letter;
+
+ int i, j, xcount, ycount, prev_gap, next_gap;
+ int best_x = -1, best_y = -1;
+ LPOScore_T best_score = -999999;
+ int *is_final_node_x, *is_final_node_y;
+ LPOLetterLink_T **x_left = NULL, **y_left = NULL, *xl, *yl;
+ DPMove_T **move = NULL, *my_move;
+
+ DPScore_T *curr_score = NULL, *prev_score = NULL, *init_col_score = NULL, *my_score;
+ DPScore_T **score_rows = NULL;
+
+ LPOScore_T *gap_penalty_x, *gap_penalty_y;
+ double gap_decrement;
+ int *next_gap_array, *next_perp_gap_array;
+ int L_trunc = m->trunc_gap_length, L_decay = m->decay_gap_length;
+
+ LPOScore_T try_score, insert_x_score, insert_y_score, match_score;
+ int insert_x_x, insert_x_gap;
+ int insert_y_y, insert_y_gap;
+ int match_x, match_y;
+
+ long n_edges = 0;
+
+ CALLOC (gap_penalty_x, L_trunc + L_decay + 2, LPOScore_T);
+ CALLOC (gap_penalty_y, L_trunc + L_decay + 2, LPOScore_T);
+ CALLOC (next_gap_array, L_trunc + L_decay + 2, int);
+ CALLOC (next_perp_gap_array, L_trunc + L_decay + 2, int);
+
+ gap_penalty_x[0] = m->gap_penalty_set[0][0]; /* INITIALIZE GAP PENALTY LOOKUP */
+ for (i=1;i<L_trunc;i++)
+ gap_penalty_x[i] = m->gap_penalty_set[0][1];
+ gap_decrement = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(L_decay + 1));
+ for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+ gap_penalty_x[i] = m->gap_penalty_set[0][1] - (i-L_trunc+1) * gap_decrement;
+ gap_penalty_x[L_trunc + L_decay] = m->gap_penalty_set[0][2]; /* (REDUNDANT) */
+ gap_penalty_x[L_trunc + L_decay + 1] = 0; /* INITIAL STATE FOR LOCAL ALIGNMENT */
+
+ gap_penalty_y[0] = m->gap_penalty_set[1][0]; /* DIFFERENT PENALTY FOR Y-GAP */
+ for (i=1;i<L_trunc;i++)
+ gap_penalty_y[i] = m->gap_penalty_set[1][1];
+ gap_decrement = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(L_decay + 1));
+ for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+ gap_penalty_y[i] = m->gap_penalty_set[1][1] - (i-L_trunc+1) * gap_decrement;
+ gap_penalty_y[L_trunc + L_decay] = m->gap_penalty_set[1][2]; /* (REDUNDANT) */
+ gap_penalty_y[L_trunc + L_decay + 1] = 0; /* INITIAL STATE FOR LOCAL ALIGNMENT */
+
+ for (i=0; i<L_trunc+L_decay+2; i++) {
+ /* GAP LENGTH EXTENSION RULE: */
+ /* 0->1, 1->2, 2->3, ..., M-1->M, M->M, M+1->M+1. */
+ next_gap_array[i] = (i < L_trunc+L_decay) ? i+1 : i;
+ next_perp_gap_array[i] = (DOUBLE_GAP_SCORING && i < L_trunc+L_decay+1) ? 0 : next_gap_array[i];
+ }
+
+ get_seq_left_and_final (lposeq_x, &x_left, &is_final_node_x);
+ get_seq_left_and_final (lposeq_y, &y_left, &is_final_node_y);
+
+ CALLOC (move, len_y, DPMove_T *);
+ for (i=0; i<len_y; i++) {
+ CALLOC (move[i], len_x, DPMove_T);
+ }
+
+ CALLOC (init_col_score, len_y+1, DPScore_T);
+ init_col_score = &(init_col_score[1]);
+
+ CALLOC (score_rows, len_y+1, DPScore_T *);
+ score_rows = &(score_rows[1]);
+ for (i=-1; i<len_y; i++) {
+ CALLOC (score_rows[i], len_x+1, DPScore_T);
+ score_rows[i] = &(score_rows[i][1]);
+ }
+ curr_score = score_rows[-1];
+
+ /* FILL INITIAL ROW. */
+ /* GLOBAL ALIGNMENT: no free gaps */
+ /* LOCAL ALIGNMENT: free initial gap */
+
+ curr_score[-1].score = 0;
+ curr_score[-1].gap_x = curr_score[-1].gap_y =
+ (use_global_alignment) ? 0 : TRUNCATE_GAP_LENGTH+1;
+
+ for (i=0; i<len_x; i++) {
+ for (xcount = 1, xl = x_left[i]; xl != NULL; xcount++, xl = xl->more) {
+ prev_gap = curr_score[xl->ipos].gap_x;
+ try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
+ if (xcount == 1 || try_score > curr_score[i].score) {
+ curr_score[i].score = try_score;
+ curr_score[i].gap_x = next_gap_array[prev_gap];
+ curr_score[i].gap_y = next_perp_gap_array[prev_gap];
+ }
+ }
+ }
+
+ /* FILL INITIAL COLUMN. */
+
+ init_col_score[-1] = curr_score[-1];
+ for (i=0; i<len_y; i++) {
+ for (ycount = 1, yl = y_left[i]; yl != NULL; ycount++, yl = yl->more) {
+ prev_gap = init_col_score[yl->ipos].gap_y;
+ try_score = init_col_score[yl->ipos].score + yl->score - gap_penalty_y[prev_gap];
+ if (ycount == 1 || try_score > init_col_score[i].score) {
+ init_col_score[i].score = try_score;
+ init_col_score[i].gap_x = next_perp_gap_array[prev_gap];
+ init_col_score[i].gap_y = next_gap_array[prev_gap];
+ }
+ }
+ }
+
+
+ /** MAIN DYNAMIC PROGRAMMING LOOP **/
+
+
+ /* OUTER LOOP (i-th position in LPO y): */
+ for (i=0; i<len_y; i++) {
+
+ curr_score = score_rows[i];
+
+ curr_score[-1] = init_col_score[i];
+
+ /* INNER LOOP (j-th position in LPO x): */
+ for (j=0; j<len_x; j++) {
+
+ /* LOOP OVER y-predecessors: */
+ for (ycount = 1, yl = y_left[i]; yl != NULL; ycount++, yl = yl->more) {
+
+ prev_score = score_rows[yl->ipos];
+
+ /* IMPROVE Y-INSERTION?: trace back to (i'=yl->ipos, j) */
+ prev_gap = prev_score[j].gap_y;
+ try_score = prev_score[j].score + yl->score - gap_penalty_y[prev_gap];
+ if (ycount == 1 || try_score > insert_y_score) {
+ insert_y_score = try_score;
+ insert_y_y = ycount;
+ insert_y_gap = prev_gap;
+ }
+
+ /* LOOP OVER x-predecessors: */
+ for (xcount = 1, xl = x_left[j]; xl != NULL; xcount++, xl = xl->more) {
+
+ /* IMPROVE XY-MATCH?: trace back to (i'=yl->ipos, j'=xl->ipos) */
+ try_score = prev_score[xl->ipos].score + xl->score + yl->score;
+ if ((xcount == 1 && ycount == 1) || try_score > match_score) {
+ match_score = try_score;
+ match_x = xcount;
+ match_y = ycount;
+ }
+ }
+ }
+
+ /* LOOP OVER x-predecessors */
+ /* IMPROVE X-INSERTION?: trace back to (i, j'=xl->ipos) */
+ for (xcount = 1, xl = x_left[j]; xl != NULL; xcount++, xl = xl->more) {
+ prev_gap = curr_score[xl->ipos].gap_x;
+ try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
+ if (xcount == 1 || try_score > insert_x_score) {
+ insert_x_score = try_score;
+ insert_x_x = xcount;
+ insert_x_gap = prev_gap;
+ }
+ }
+
+ if (0 == use_global_alignment && match_score <= 0) {
+ match_score = 0;
+ match_x = match_y = 0; /* FIRST ALIGNED PAIR */
+ }
+
+ n_edges += (xcount-1)*(ycount-1);
+ match_score += scoring_function (j, i, seq_x, seq_y, m);
+
+ my_score = &curr_score[j];
+ my_move = &move[i][j];
+
+ if (match_score > insert_y_score && match_score > insert_x_score) {
+ /* XY-MATCH */
+ my_score->score = match_score;
+ my_score->gap_x = 0;
+ my_score->gap_y = 0;
+ my_move->x = match_x;
+ my_move->y = match_y;
+ }
+ else if (insert_x_score > insert_y_score) {
+ /* X-INSERTION */
+ my_score->score = insert_x_score;
+ my_score->gap_x = next_gap_array[insert_x_gap];
+ my_score->gap_y = next_perp_gap_array[insert_x_gap];
+ my_move->x = insert_x_x;
+ my_move->y = 0;
+ }
+ else {
+ /* Y-INSERTION */
+ next_gap = next_gap_array[insert_y_gap];
+ my_score->score = insert_y_score;
+ my_score->gap_x = next_perp_gap_array[insert_y_gap];
+ my_score->gap_y = next_gap_array[insert_y_gap];
+ my_move->x = 0;
+ my_move->y = insert_y_y;
+ }
+
+ /* RECORD BEST START FOR TRACEBACK */
+ /* KEEPING ONLY FINAL-FINAL BESTS FOR GLOBAL ALIGNMENT */
+ if (my_score->score >= best_score && (0 == use_global_alignment || (is_final_node_x[j] && is_final_node_y[i]))) {
+ if (my_score->score > best_score || (j == best_x && i < best_y) || j < best_x) {
+ best_score = my_score->score;
+ best_x = j;
+ best_y = i;
+ }
+ }
+ }
+ }
+
+ IF_GUARD(best_x>=len_x || best_y>=len_y,1.1,(ERRTXT,"Bounds exceeded!\nbest_x,best_y:%d,%d\tlen:%d,%d\n",best_x,best_y,len_x,len_y),CRASH);
+
+ /*
+ fprintf (stderr, "aligned (%d nodes, %ld edges) to (%d nodes)\n", len_x, n_edges, len_y);
+ fprintf (stderr, "best score %d @ (%d %d)\n", best_score, best_x, best_y);
+ */
+
+ /* DYNAMIC PROGRAMING MATRIX COMPLETE, NOW TRACE BACK FROM best_x, best_y */
+ trace_back_lpo_alignment (len_x, len_y, move, x_left, y_left,
+ best_x, best_y,
+ x_to_y, y_to_x);
+
+
+ FREE (gap_penalty_x);
+ FREE (gap_penalty_y);
+ FREE (next_gap_array);
+ FREE (next_perp_gap_array);
+
+ for (i=-1; i<len_y; i++) {
+ score_rows[i] = &(score_rows[i][-1]);
+ FREE (score_rows[i]);
+ }
+ score_rows = &(score_rows[-1]);
+ FREE (score_rows);
+
+ init_col_score = &(init_col_score[-1]);
+ FREE (init_col_score);
+
+ FREE (is_final_node_x);
+ FREE (is_final_node_y);
+
+ for (i=0; i<len_x; i++) {
+ if (x_left[i] != &seq_x[i].left) {
+ FREE (x_left[i]);
+ }
+ }
+ FREE (x_left);
+
+ for (i=0; i<len_y; i++) {
+ if (y_left[i] != &seq_y[i].left) {
+ FREE (y_left[i]);
+ }
+ }
+ FREE (y_left);
+
+ for (i=0; i<len_y; i++) {
+ FREE (move[i]);
+ }
+ FREE (move);
+
+ return best_score;
+}
Added: trunk/packages/poa/branches/upstream/current/align_score.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/align_score.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/align_score.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,31 @@
+
+
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+
+typedef struct {
+ unsigned char x;
+ unsigned char y:7;
+ unsigned char is_aligned:1;
+} LPOMove_T;
+
+
+
+/* CATIE: YOU CAN PUT ANY SCORING METHOD YOU WANT INSIDE THIS
+ FUNCTION. JUST REPLACE THE CONTENTS OF THE FUNCTION WITH
+ YOUR SCORING METHOD */
+LPOScore_T caties_scoring_function(int i,
+ int j,
+ LPOLetter_T seq_x[],
+ LPOLetter_T seq_y[],
+ ResidueScoreMatrix_T *m)
+{
+ return m->score[seq_x[i].letter][seq_y[j].letter]; /*TRIVIAL SCORING FUNC:
+ JUST USE MATRIX VALUE*/
+}
Added: trunk/packages/poa/branches/upstream/current/black_flag.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/black_flag.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/black_flag.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,125 @@
+
+
+
+#include "default.h" /* ~~I */
+
+
+
+
+char ERRTXT[1024]
+ ="";
+char *Program_name="black_flag";
+char *Program_version="unknown";
+int Already_reported_crash=0;
+
+int black_flag(int bug_level,
+ char sourcefile[],
+ int sourceline,
+ char sourcefile_revision[])
+{
+ static int last_line= -1;
+ char *error_names[max_black_flag_type]
+ ={"CRASH","DIED","EXCEPTION","BAD_DATA","WARNING","DEBUG"};
+
+ switch (bug_level) {
+#ifndef DEBUG_USER_VERSION
+ case DEBUG_black_flag_type: /* IN DEV'T VERSION JUST CRASH!!! */
+#ifdef DEBUG_VERSION
+ case TRAP_black_flag_type: /* FOR DEBUG VERSION AND TRAP, CAUSE A CORE DUMP*/
+#endif
+#endif
+ case CRASH_black_flag_type: /* COULD INCLUDE MECHANISMS TO SEND EMAIL? */
+ /* print message at level 1, i.e. if we are printing anything at all */
+ PRINT_DEBUG(1,(DBOUT,"black_flag: %s %s:%s %s %s,%d\n%s\nend_black_flag\n",
+ error_names[bug_level],
+ Program_name,Program_version,
+ sourcefile,sourcefile_revision,sourceline,
+ ERRTXT[0]? ERRTXT:""));
+ if (Already_reported_crash)
+ return 1;
+ Already_reported_crash=1;
+ abort(); /* FORCE A CORE DUMP */
+
+
+ default: /* JUST PRINT THE ERROR */
+ PRINT_DEBUG(1,(DBOUT,"black_flag: %s %s:%s %s %s,%d\n%s\nend_black_flag\n",
+ error_names[bug_level],
+ Program_name,Program_version,
+ sourcefile,sourcefile_revision,sourceline,
+ ERRTXT[0]? ERRTXT:""));
+ break;
+
+ }
+
+ ERRTXT[0]='\0'; /* RESET THE ERROR TEXT */
+ last_line=sourceline;
+ return 1; /* SEND SIGNAL TO HANDLER CLAUSE TO DEAL WITH THIS ERROR */
+}
+
+
+
+
+
+
+
+
+void handle_crash(int sigcode)
+{
+ char *crash_mode;
+
+ if (Already_reported_crash)
+ exit(-1); /* IN ENDLESS LOOP REPORTING CRASH OVER & OVER? */
+
+ Already_reported_crash=1;
+ black_flag(CRASH_black_flag_type,"",0,"");
+ if ((crash_mode=getenv("HANDLE_CRASH")) && 0==strcmp(crash_mode,"NOCORE"))
+ exit(-1);
+ else
+ return; /*ENVIRONMENT SETTING ASKS US TO DUMP A CORE IMMEDIATELY */
+}
+
+
+
+
+
+int handle_crash_init(void (*crash_fun)())
+{
+#define HANDLE_CRASH_MAX 5
+ int i,signal_type[HANDLE_CRASH_MAX]
+ ={SIGSEGV,
+#ifdef SIGBUS /* LINUX DOESN'T HAVE BUS ERRORS? */
+ SIGBUS,
+#else
+ SIGSEGV, /* REUSE SEGV AS DUMMY ENTRY */
+#endif
+ SIGABRT,SIGFPE,SIGTRAP}; /*LIST OF SIGNALS TO HANDLE*/
+ if (!crash_fun) /* NO CRASH FUNCTION SUPPLIED, SO RESET TO DEFAULT */
+ crash_fun = SIG_DFL; /* RESET TO STANDARD CRASH BEHAVIOR */
+
+ LOOP (i,HANDLE_CRASH_MAX) /* SET THIS HANDLER FOR ALL OUR SIGNALS */
+ signal(signal_type[i],crash_fun);
+ return 0;
+}
+
+void black_flag_init_args(int narg,char *arg[],char progversion[])
+{
+ int i,len=0;
+ LOOP (i,narg)
+ len+=strlen(arg[i])+2;
+ CALLOC(Program_name,len,char);
+ LOOPF (i,narg) {
+ strcat(Program_name,arg[i]);
+ strcat(Program_name," ");
+ }
+ if (progversion)
+ Program_version=progversion;
+ handle_crash_init(handle_crash);
+}
+
+
+
+void black_flag_init(char progname[],char progversion[])
+{
+ black_flag_init_args(1,&progname,progversion);
+}
+
Added: trunk/packages/poa/branches/upstream/current/black_flag.h
===================================================================
--- trunk/packages/poa/branches/upstream/current/black_flag.h 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/black_flag.h 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,245 @@
+
+
+
+#ifndef BLACK_FLAG_HEADER_INCLUDED
+#define BLACK_FLAG_HEADER_INCLUDED 1
+
+#include <signal.h>
+
+extern char ERRTXT[];
+#define DBOUT stderr
+
+enum {
+ CRASH_black_flag_type,
+ TRAP_black_flag_type,
+ COPE_black_flag_type,
+ USERR_black_flag_type,
+ WARN_black_flag_type,
+ DEBUG_black_flag_type,
+ max_black_flag_type
+ };
+
+
+#define NOERRMSG (ERRTXT,"")
+
+
+
+
+
+/********************************************************************
+ *
+ * IF_PARANOID:
+ * IF_PARANOID(CONDITION,REVISION,MESSAGE)
+ *
+ * error checking that will significantly slow execution or
+ * otherwise is desirable only in situations of extreme
+ * unction, e.g. during our debugging!!!
+ *
+ * The beauty of the IF_PARANOID idea is that you should sprinkle
+ * it liberally EVERYWHERE in your code without thought for
+ * whether it is necessary or might hurt performance,
+ * because it will NOT even be compiled into the program in
+ * the production version!!!!!!
+ *
+ * use IF_PARANOID checks EVERYWHERE you can think of
+ * definite error signals, even if you think "That shouldn't
+ * EVER happen".
+ *
+ * if the IF_PARANOID CONDITIONAL is TRUE, the program will abort();
+ *
+ *
+ *
+ * Also, our emacs custom highlighting system has been programmed
+ * to show IF_PARANOID and IF_DEBUG lines in a dim gray, so
+ * that these extensive error checks will not visually obscure
+ * the layout & organization of your algorithms.
+ *
+ * black_flag() is smart about printing both version information
+ * such as the vmake version name the executable was created by,
+ * and also the exact revision number of the file in which the
+ * error occured.
+ *
+ *
+ *-------------------------------------------------
+ *
+ * IF_DEBUG:
+ * IF_DEBUG(CONDITION,REVISION,MESSAGE)
+ *
+ * also not expected to be included in a final production
+ * release, but should not impact performance so significantly that
+ * it's unpleasant to test such a version in regular use patterns.
+ * Classic examples would be fairly pedantic checks at the entry
+ * and exit of all functions, testing for conditions "that shouldn't
+ * ever happen."
+ *
+ * if the IF_DEBUG CONDITIONAL is TRUE, the program will abort();
+ *
+ *
+ *-------------------------------------------------
+ *
+ * IF_GUARD:
+ * IF_GUARD(CONDITION,REVISION,MESSAGE,LEVEL)
+ *
+ * checks INCLUDED in final production versions, but taking
+ * advantage of the black_flag system to allow us to easily
+ * control what will be done in response to an error
+ * in any given executable, via compile-time flags:
+ *
+ * e.g.
+ * print error info on stderr, or to special log files,
+ *
+ * force a core dump,
+ *
+ * run dbx via an auto script to generate a stack frame,
+ * and mail the results to develop-support at mag.com,
+ * etc.
+ *
+ * IF_GUARD differs from IF_PARANOID and IF_DEBUG in that it
+ * requires an error_level argument, which must be one of
+ *
+ * CRASH ... the error is fatal, abort()
+ *
+ * TRAP ... the error is being trapped, but
+ * not handled. e.g. quiting from
+ * a function because some essential
+ * file was missing...
+ *
+ * COPE ... the error is being handled nicely.
+ * the handler code following IF_GUARD
+ * knows how to correct for the situation.
+ *
+ * USERR ... the user appears to have done something
+ * that makes no sense; they must be
+ * confused by our interface.
+ *
+ * WARN ... suspicious data or input, not
+ * definitely an error.
+ *
+ *
+ *
+ *-------------------------------------------------
+ *
+ * Examples:
+ *
+ * IF_PARANOID((iatom<0),4.6,(ERRTXT,"wacky iatom=%d",iatom));
+ *
+ * IF_DEBUG((iatom<0),4.6,(ERRTXT,"wacky iatom=%d",iatom));
+ *
+ * IF_GUARD((iatom<0),4.6,(ERRTXT,"wacky iatom=%d",iatom),CRASH);
+ *
+ * IF_GUARD((iatom<0),4.6,(ERRTXT,"wacky iatom=%d",iatom),COPE) {
+ * put some code to handle the error condition here;
+ * }
+ *
+ * USE THE 4.6 KEYWORD TO PUT IN VERSION NUMBERS AUTOMATICALLY.
+ *
+ *******************************************************************/
+
+
+#if defined(PARANOID_VERSION) /*???????????????????*/
+#ifndef DEBUG_VERSION
+#define DEBUG_VERSION
+#endif
+
+#define IF_PARANOID(CONDITION,REVISION,MESSAGE) \
+ if (CONDITION) {\
+ sprintf MESSAGE ;\
+ black_flag(DEBUG_black_flag_type,__FILE__,__LINE__,STRINGIFY(REVISION));\
+ }
+
+#else /*????????????????????????????????????????????*/
+#define IF_PARANOID(CONDITION,REVISION,MESSAGE)
+#endif /* !PARANOID_VERSION ??????????????????????*/
+
+
+
+
+
+
+
+
+
+
+#if defined(DEBUG_VERSION) /*??????????*/
+#define IF_DEBUG(CONDITION,REVISION,MESSAGE) \
+ if (CONDITION) {\
+ sprintf MESSAGE ;\
+ black_flag(DEBUG_black_flag_type,__FILE__,__LINE__,STRINGIFY(REVISION));\
+ }
+
+#else /*???????????????????????????????????????????????????????????*/
+#define IF_DEBUG(CONDITION,REVISION,MESSAGE)
+#endif /* !DEBUG_VERSION ????????????????????????????????????????*/
+
+
+
+
+
+
+
+
+/*********************************************************************
+ *
+ * IF_GUARD:
+ * the basic black_flag macro, for production version trapping
+ * and handling of errors.
+ *
+ * Essentially adds the flexibility of handling / recording
+ * errors however you like in black_flag(). Also, the programmer
+ * can provide, or omit, a clause following this macro that
+ * will only be executed if the CONDITION is true, allowing
+ * any kind of handling you wish.
+ *
+ ********************************************************************/
+
+
+#define IF_GUARD(CONDITION,REVISION,MESSAGE,LEVEL) \
+ if ((CONDITION) ? \
+ (sprintf MESSAGE,\
+ black_flag(CONCAT_MACRO(LEVEL,_black_flag_type),__FILE__,__LINE__,\
+ STRINGIFY(REVISION))) : 0)
+
+
+
+
+#define WARN_DEBUG(REVISION,MESSAGE,LEVEL) \
+(sprintf MESSAGE,\
+ black_flag(CONCAT_MACRO(LEVEL,_black_flag_type),__FILE__,__LINE__,\
+ STRINGIFY(REVISION)))
+
+
+#define WARN_MSG(LEVEL,MESSAGE,REVISION) \
+(sprintf MESSAGE,\
+ black_flag(CONCAT_MACRO(LEVEL,_black_flag_type),__FILE__,__LINE__,\
+ REVISION))
+
+
+
+/*********************************************************************
+ *
+ * OUT_OF_BOUNDS:
+ * checks if
+ * MIN <= INDEX < MAX
+ *
+ * e.g.
+ * OUT_OF_BOUNDS(ivar,0,nvar)
+ *
+ ********************************************************************/
+
+#define OUT_OF_BOUNDS(INDEX,MINIMUM_BOUND,MAXIMUM_BOUND) \
+((INDEX)<(MINIMUM_BOUND) || (INDEX)>=(MAXIMUM_BOUND))
+
+void handle_crash(int sigcode);
+int handle_crash_init(void (*crash_fun)());
+int black_flag(int bug_level,
+ char sourcefile[],
+ int sourceline,
+ char sourcefile_revision[]);
+
+char *Program_name;
+char *Program_version;
+
+void black_flag_init(char progname[],char progversion[]);
+void black_flag_init_args(int narg,char *arg[],char progversion[]);
+
+#endif
Added: trunk/packages/poa/branches/upstream/current/blosum80.mat
===================================================================
--- trunk/packages/poa/branches/upstream/current/blosum80.mat 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/blosum80.mat 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,40 @@
+# Blosum80
+# Matrix made by matblas from blosum80.iij
+# * column uses minimum score
+# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+# Blocks Database = /data/blocks_5.0/blocks.dat
+# Cluster Percentage: >= 80
+# Entropy = 0.9868, Expected = -0.7442
+GAP-PENALTIES=12 6 6
+ A R N D C Q E G H I L K M F P S T W Y V B Z X ? a g t c u ] n
+A 7 -3 -3 -3 -1 -2 -2 0 -3 -3 -3 -1 -2 -4 -1 2 0 -5 -4 -1 -3 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9
+R -3 9 -1 -3 -6 1 -1 -4 0 -5 -4 3 -3 -5 -3 -2 -2 -5 -4 -4 -2 0 -2 -9 -9 -9 -9 -9 -9 -9 -9
+N -3 -1 9 2 -5 0 -1 -1 1 -6 -6 0 -4 -6 -4 1 0 -7 -4 -5 5 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9
+D -3 -3 2 10 -7 -1 2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6 6 1 -3 -9 -9 -9 -9 -9 -9 -9 -9
+C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4 -9 -9 -9 -9 -9 -9 -9 -9
+Q -2 1 0 -1 -5 9 3 -4 1 -5 -4 2 -1 -5 -3 -1 -1 -4 -3 -4 -1 5 -2 -9 -9 -9 -9 -9 -9 -9 -9
+E -2 -1 -1 2 -7 3 8 -4 0 -6 -6 1 -4 -6 -2 -1 -2 -6 -5 -4 1 6 -2 -9 -9 -9 -9 -9 -9 -9 -9
+G 0 -4 -1 -3 -6 -4 -4 9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9
+H -3 0 1 -2 -7 1 0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4 3 -5 -1 0 -2 -9 -9 -9 -9 -9 -9 -9 -9
+I -3 -5 -6 -7 -2 -5 -6 -7 -6 7 2 -5 2 -1 -5 -4 -2 -5 -3 4 -6 -6 -2 -9 -9 -9 -9 -9 -9 -9 -9
+L -3 -4 -6 -7 -3 -4 -6 -7 -5 2 6 -4 3 0 -5 -4 -3 -4 -2 1 -7 -5 -2 -9 -9 -9 -9 -9 -9 -9 -9
+K -1 3 0 -2 -6 2 1 -3 -1 -5 -4 8 -3 -5 -2 -1 -1 -6 -4 -4 -1 1 -2 -9 -9 -9 -9 -9 -9 -9 -9
+M -2 -3 -4 -6 -3 -1 -4 -5 -4 2 3 -3 9 0 -4 -3 -1 -3 -3 1 -5 -3 -2 -9 -9 -9 -9 -9 -9 -9 -9
+F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1 0 -5 0 10 -6 -4 -4 0 4 -2 -6 -6 -3 -9 -9 -9 -9 -9 -9 -9 -9
+P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3 -9 -9 -9 -9 -9 -9 -9 -9
+S 2 -2 1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2 7 2 -6 -3 -3 0 -1 -1 -9 -9 -9 -9 -9 -9 -9 -9
+T 0 -2 0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3 2 8 -5 -3 0 -1 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9
+W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3 0 -7 -6 -5 16 3 -5 -8 -5 -5 -9 -9 -9 -9 -9 -9 -9 -9
+Y -4 -4 -4 -6 -5 -3 -5 -6 3 -3 -2 -4 -3 4 -6 -3 -3 3 11 -3 -5 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9
+V -1 -4 -5 -6 -2 -4 -4 -6 -5 4 1 -4 1 -2 -4 -3 0 -5 -3 7 -6 -4 -2 -9 -9 -9 -9 -9 -9 -9 -9
+B -3 -2 5 6 -6 -1 1 -2 -1 -6 -7 -1 -5 -6 -4 0 -1 -8 -5 -6 6 0 -3 -9 -9 -9 -9 -9 -9 -9 -9
+Z -2 0 -1 1 -7 5 6 -4 0 -6 -5 1 -3 -6 -2 -1 -2 -5 -4 -4 0 6 -1 -9 -9 -9 -9 -9 -9 -9 -9
+X -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9
+? -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
+a -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 4 -2 -2 -2 -2 -9 0
+g -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 4 -2 -2 -2 -9 0
+t -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 4 -2 4 -9 0
+c -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 -2 4 -2 -9 0
+u -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 4 -2 4 -9 0
+] -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
+n -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 0 0 0 0 0 -9 0
Added: trunk/packages/poa/branches/upstream/current/buildup_lpo.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/buildup_lpo.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/buildup_lpo.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,376 @@
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+
+void fuse_ring_identities(int len_x,LPOLetter_T seq_x[],
+ int len_y,LPOLetter_T seq_y[],
+ LPOLetterRef_T al_x[],
+ LPOLetterRef_T al_y[])
+{
+ int i,j;
+ LOOP (i,len_y) {
+ if (al_y[i]<0 || seq_x[al_y[i]].letter == seq_y[i].letter)
+ continue; /* NOT ALIGNED, OR ALREADY IDENTICAL, SO SKIP */
+ for (j=seq_x[al_y[i]].align_ring;j!=al_y[i];j=seq_x[j].align_ring)
+ if (seq_x[j].letter == seq_y[i].letter) { /* IDENTICAL! SO FUSE! */
+ al_x[al_y[i]]= INVALID_LETTER_POSITION; /* DISCONNECT FROM OLD */
+ al_y[i]=j; /* CONNECT TO NEW IDENTITY */
+ al_x[j]=i;
+ break; /* SEARCH YE NO FURTHER */
+ }
+ }
+}
+
+
+
+
+/** aligns the sequences in seq[] to the sequence or partial order in
+ new_seq; seq[] must be linear orders (regular sequences);
+ the alignment is built up by iterative partial order alignment,
+ and the resulting partial order is returned in new_seq */
+LPOSequence_T *buildup_lpo(LPOSequence_T *new_seq,
+ int nseq,LPOSequence_T seq[],
+ ResidueScoreMatrix_T *score_matrix,
+ int use_aggressive_fusion,
+ int use_global_alignment)
+{
+ int i,max_alloc=0,total_alloc;
+ LPOLetterRef_T *al1=NULL,*al2=NULL;
+
+ lpo_index_symbols(new_seq,score_matrix); /* MAKE SURE LPO IS TRANSLATED */
+ for (i=0;i<nseq;i++) { /* ALIGN ALL SEQUENCES TO my_lpo ONE BY ONE */
+ if (seq[i].letter == NULL) /* HMM. HASN'T BEEN INITIALIZED AT ALL YET */
+ initialize_seqs_as_lpo(1,seq+i,score_matrix);
+ total_alloc=new_seq->length*seq[i].length
+ + sizeof(LPOLetter_T)*new_seq->length;
+ if (total_alloc>max_alloc) { /* DP RECTANGLE ARRAY SIZE */
+ max_alloc=total_alloc;
+#ifdef REPORT_MAX_ALLOC
+ fprintf(stderr,"max_alloc: %d bytes\n",max_alloc);
+#endif
+ if (max_alloc>POA_MAX_ALLOC) {
+ WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.1 $");
+ break; /* JUST RETURN AND FINISH */
+ }
+ }
+ align_lpo(new_seq,&seq[i],
+ score_matrix,&al1,&al2,use_global_alignment); /* ALIGN ONE MORE SEQ */
+ if (use_aggressive_fusion)
+ fuse_ring_identities(new_seq->length,new_seq->letter,
+ seq[i].length,seq[i].letter,al1,al2);
+ fuse_lpo(new_seq,seq+i,al1,al2); /* BUILD COMPOSITE LPO */
+
+ free_lpo_letters(seq[i].length,seq[i].letter,TRUE);/*NO NEED TO KEEP*/
+ seq[i].letter=NULL; /* MARK AS FREED... DON'T LEAVE DANGLING POINTER! */
+ FREE(al1); /* DUMP TEMPORARY MAPPING ARRAYS */
+ FREE(al2);
+ }
+
+ return new_seq;
+}
+/**@memo example: aligning a set of sequences to a partial order:
+ lpo_out=buildup_lpo(lpo_in,nseq,seq,&score_matrix,0);
+*/
+
+
+
+/** CLIPS seq->letter[] TO JUST THE SEGMENT ALIGNED TO letter_x[] via al_x[]
+ DOES *NOT* FREE existing seq->letter[]; YOU MUST KEEP IT OR FREE IT YOURSELF*/
+int clip_unaligned_ends(LPOSequence_T *seq,
+ LPOLetterRef_T al[],
+ int len_x,LPOLetter_T letter_x[],
+ LPOLetterRef_T al_x[],int *offset,int *match_length)
+{
+ int i,j=0,start,end,new_length,allow_end_length=0,nidentity=0;
+ LPOLetter_T *temp=NULL;
+ CALLOC(temp,seq->length,LPOLetter_T); /* ALLOCATE NEW letter[] COPY */
+ for (start=0;start<seq->length;start++) /* FIND 1ST ALIGNED POS */
+ if (al[start]>=0)
+ break;
+
+ for (end=seq->length -1;end>=0;end--) /* FIND LAST ALIGNED POS */
+ if (al[end]>=0)
+ break;
+
+ for (i=start;i<=end;i++) /* COUNT IDENTITIES TO letter_x[] */
+ if (al[i]>=0 && seq->letter[i].letter==letter_x[al[i]].letter)
+ nidentity++;
+ if (match_length) /* RETURN THE MATCH LENGTH TO THE CALLER */
+ *match_length = end-start+1;
+
+ if (start>allow_end_length) /* ALLOW EXTRA RESIDUES ON EITHER END*/
+ start-=allow_end_length;
+ else /* KEEP IN BOUNDS */
+ start=0;
+ if (end+allow_end_length<seq->length)
+ end+=allow_end_length;
+ else /* KEEP IN BOUNDS */
+ end=seq->length-1;
+
+ LOOP (i,len_x) /* WE ARE SHIFTING al TO THE RIGHT BY start POSITIONS */
+ if (al_x[i]>=0) /* SO WE HAVE TO TRANSLATE al_x CORRESPONDINGLY */
+ al_x[i]-= start;
+
+ seq->length=end-start+1; /* NOW TRANSLATE left, right, align_ring, ring_id*/
+ memcpy(temp,seq->letter+start,sizeof(LPOLetter_T)*(seq->length));
+ LOOP (i,seq->length) { /* THIS *ONLY* WORKS FOR PURE LINEAR SEQUENCE!!! */
+ temp[i].left.ipos -= start; /*IF <0, BECOMES INVALID BY DEFINITION, OK*/
+ temp[i].right.ipos -= start;
+ if (temp[i].right.ipos>=seq->length) /* PAST THE NEW, CLIPPED END */
+ temp[i].right.ipos= INVALID_LETTER_POSITION;
+ temp[i].ring_id=temp[i].align_ring=i;
+ }
+
+ if (offset) /* RETURN THE OFFSET TO THE CALLER */
+ *offset = start;
+
+ seq->letter=temp; /* NEW START: FIRST ALIGNED POSITION */
+ return nidentity; /* NEW LENGTH: FROM 1ST TO LAST ALIGNED POS*/
+}
+
+
+
+
+
+void restore_lpo_size(LPOSequence_T *seq,int length,LPOLetter_T *letter)
+{
+
+ free_lpo_letters(seq->length,seq->letter,TRUE); /* DUMP CLIPPED VERSION*/
+ seq->length=length; /* RESTORE ORIGINAL length AND letter[] */
+ seq->letter=letter;
+}
+
+
+
+/** BUILDS UP ALIGNMENT, BUT CLIPS UNALIGNED ENDS OF EACH NEW SEQUENCE ADDED
+-------------------------------------------------------
+---------------------------------------------------------------------------
+*/
+LPOSequence_T *buildup_clipped_lpo(LPOSequence_T *new_seq,
+ int nseq,LPOSequence_T seq[],
+ ResidueScoreMatrix_T *score_matrix,
+ int use_global_alignment)
+{
+ int i,ntemp,offset=0,nidentity,length_max=0,match_length=0;
+ int total_alloc,max_alloc=0;
+ LPOLetterRef_T *al1=NULL,*al2=NULL;
+ LPOLetter_T *temp;
+ float identity_max=0.,f;
+
+ lpo_index_symbols(new_seq,score_matrix); /* MAKE SURE LPO IS TRANSLATED */
+ for (i=0;i<nseq;i++) { /* ALIGN ALL SEQUENCES TO new_seq ONE BY ONE */
+ if (seq[i].letter == NULL) /* HMM. HASN'T BEEN INITIALIZED AT ALL YET */
+ initialize_seqs_as_lpo(1,seq+i,score_matrix);
+ total_alloc=new_seq->length*seq[i].length
+ + sizeof(LPOLetter_T)*new_seq->length;
+ if (total_alloc>max_alloc) { /* DP RECTANGLE ARRAY SIZE */
+ max_alloc=total_alloc;
+#ifdef REPORT_MAX_ALLOC
+ fprintf(stderr,"max_alloc: %d bytes (%d x %d)\n",max_alloc,
+ new_seq->length,seq[i].length);
+#endif
+ if (max_alloc>POA_MAX_ALLOC) {
+ WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.1 $");
+ break; /* JUST RETURN AND FINISH */
+ }
+ }
+ align_lpo(new_seq, &seq[i],
+ score_matrix,&al1,&al2,use_global_alignment); /* ALIGN ONE MORE SEQ */
+ ntemp=seq[i].length; /* SAVE letter[] BEFORE CLIPPING IT TO ALIGNED AREA*/
+ temp=seq[i].letter;
+ if ((nidentity=clip_unaligned_ends(seq+i,al2,/*THERE IS AN ALIGNED REGION*/
+ new_seq->length,new_seq->letter,al1,&offset,
+ &match_length))>0) {
+ f=nidentity/(float)match_length; /* CALCULATE IDENTITY FRACTION */
+ if (0==i /*f>identity_max*/) { /* REPORT IDENTITY OF TOP HIT */
+ identity_max=nidentity;
+ length_max=match_length;
+ }
+ fuse_lpo(new_seq,seq+i,al1,al2+offset); /*ADD CLIPPED REGION TO LPO*/
+ }
+ restore_lpo_size(seq+i,ntemp,temp); /* REVERT FROM CLIPPED TO ORIGINAL*/
+ FREE(al1); /* DUMP TEMPORARY MAPPING ARRAYS FROM align_lpo() */
+ FREE(al2);
+ }
+
+ fprintf(stderr,"%s\tmaximum identity\t%3.1f%%\t%.0f/%d\n",new_seq->name,
+ 100*identity_max/length_max,identity_max,length_max);
+ return new_seq;
+}
+
+
+int find_seq_name(int nseq,LPOSequence_T seq[],char name[])
+{
+ int i;
+ LOOP (i,nseq)
+ if (0==strcmp(seq[i].name,name))
+ return i;
+ return -1;
+}
+
+
+typedef struct {
+ double score;
+ double bitscore;
+ int i;
+ int j;
+} SeqPairScore_T;
+
+
+/* SORT IN ASCENDING ORDER BY score, THEN DESCENDING ORDER by bitscore */
+int seqpair_score_qsort_cmp(const void *void_a,const void *void_b)
+{
+ const SeqPairScore_T *a=(const SeqPairScore_T *)void_a,
+ *b=(const SeqPairScore_T *)void_b;
+ if (a->score < b->score)
+ return -1;
+ else if (a->score == b->score) {
+ if (a->bitscore > b->bitscore)
+ return -1;
+ else if (a->bitscore == b->bitscore)
+ return 0;
+ }
+ else
+ return 1;
+}
+
+
+
+
+SeqPairScore_T *read_seqpair_scorefile(int nseq,LPOSequence_T seq[],
+ FILE *ifile,int *p_nscore)
+{
+ int i,j,nscore=0;
+ SeqPairScore_T *score=NULL;
+ double v,x;
+ char name1[256],name2[256];
+
+ CALLOC(score,nseq*nseq/2,SeqPairScore_T);
+ while (fscanf(ifile," %s %s %lf %lf",name1,name2,
+ &v,&x)==4) { /*READ SCORE FILE*/
+ i=find_seq_name(nseq,seq,name1);
+ j=find_seq_name(nseq,seq,name2);
+ if (i<0 || j<0) {
+ WARN_MSG(USERR,(ERRTXT,"invalid sequence pair, not found: %s,%s",name1,name2),"$Revision: 1.2.2.1 $");
+ FREE(score);
+ return NULL;
+ }
+
+ /* fprintf(stderr,"i=%d,j=%d,x=%e\n",i,j,x);*/
+ if (i<j) { /* DON'T SAVE UPPER, DUPLICATE HALF OF THE MATRIX */
+ /* fprintf(stderr,"Saving score %s,%s:%e\n",name1,name2,x);*/
+ score[nscore].score=x; /* SAVE THE SCORE INTO THE MATRIX */
+ score[nscore].bitscore=v;
+ score[nscore].i=i;
+ score[nscore].j=j;
+ nscore++;
+ }
+ }
+
+ /* NOW SORT THESE IN ASCENDING ORDER AND HAND BACK TO CALLER */
+ qsort(score,nscore,sizeof(SeqPairScore_T),seqpair_score_qsort_cmp);
+ if (p_nscore) /* RETURN LENGTH OF PAIR SCORE TABLE IF REQUESTED */
+ *p_nscore=nscore;
+ return score;
+}
+
+
+LPOSequence_T *buildup_progressive_lpo(int nseq,LPOSequence_T seq[],
+ ResidueScoreMatrix_T *score_matrix,
+ int use_aggressive_fusion,
+ char score_file[],
+ LPOScore_T (*scoring_function)
+ (int,int,LPOLetter_T [],LPOLetter_T [],
+ ResidueScoreMatrix_T *),
+ int use_global_alignment)
+{
+ int i,j,max_alloc=0,total_alloc;
+ LPOLetterRef_T *al1=NULL,*al2=NULL;
+ SeqPairScore_T *score=NULL;
+ FILE *ifile;
+ LPOSequence_T *new_seq=NULL;
+ int *seq_cluster=NULL,cluster_i,cluster_j,nscore=0,iscore;
+
+ ifile=fopen(score_file,"r");
+ if (ifile) {
+ if ((score=read_seqpair_scorefile(nseq,seq,ifile,&nscore))==NULL)
+ goto free_and_exit;
+ fclose(ifile);
+ }
+ else
+ goto free_and_exit;
+
+ CALLOC(seq_cluster,nseq,int); /* MAPS SEQS TO CLUSTER THEY'RE IN */
+ LOOP (i,nseq) /* CREATE TRIVIAL MAPPING, EACH SEQ ITS OWN CLUSTER */
+ seq_cluster[i]=i;
+
+ for (iscore=0;iscore<nscore;iscore++) {
+ if (seq_cluster[score[iscore].i] < seq_cluster[score[iscore].j]) {
+ cluster_i=seq_cluster[score[iscore].i];
+ cluster_j=seq_cluster[score[iscore].j];
+ }
+ else if (seq_cluster[score[iscore].j] < seq_cluster[score[iscore].i]) {
+ cluster_i=seq_cluster[score[iscore].j];
+ cluster_j=seq_cluster[score[iscore].i];
+ }
+ else /* CLUSTERS ALREADY FUSED, SO SKIP THIS PAIR */
+ continue;
+
+ fprintf(stderr,"Fusing cluster %s --> %s... score %e,%lf\n",
+ seq[cluster_j].name,seq[cluster_i].name,score[iscore].score,
+ score[iscore].bitscore);
+ new_seq=seq+cluster_i; /* THIS WILL BECOME THE NEW MASTER CLUSTER */
+ if (seq[cluster_i].letter == NULL) /* NOT INITIALIZED AT ALL YET */
+ initialize_seqs_as_lpo(1,seq+cluster_i,score_matrix);
+ if (seq[cluster_j].letter == NULL) /* NOT INITIALIZED AT ALL YET */
+ initialize_seqs_as_lpo(1,seq+cluster_j,score_matrix);
+ total_alloc=new_seq->length*seq[cluster_j].length
+ + sizeof(LPOLetter_T)*new_seq->length;
+ if (total_alloc>max_alloc) { /* DP RECTANGLE ARRAY SIZE */
+ max_alloc=total_alloc;
+#ifdef REPORT_MAX_ALLOC
+ fprintf(stderr,"max_alloc: %d bytes\n",max_alloc);
+#endif
+ if (max_alloc>POA_MAX_ALLOC) {
+ WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.1 $");
+ break; /* JUST RETURN AND FINISH */
+ }
+ }
+
+#ifdef USE_LOCAL_NEUTRALITY_CORRECTION /* NO LONGER USED */
+ if (score_matrix->nfreq>0) { /* CALCULATE BALANCED SCORING ON EACH PO */
+ balance_matrix_score(new_seq->length,new_seq->letter,score_matrix);
+ balance_matrix_score(seq[cluster_j].length,seq[cluster_j].letter,
+ score_matrix);
+ }
+#endif
+
+ align_lpo_po(new_seq, &seq[cluster_j],
+ score_matrix,&al1,&al2,
+ scoring_function,use_global_alignment); /* ALIGN ONE MORE SEQ */
+ if (use_aggressive_fusion)
+ fuse_ring_identities(new_seq->length,new_seq->letter,
+ seq[cluster_j].length,seq[cluster_j].letter,
+ al1,al2);
+ fuse_lpo(new_seq,seq+cluster_j,al1,al2); /* BUILD COMPOSITE LPO */
+
+ free_lpo_letters(seq[cluster_j].length,seq[cluster_j].letter,TRUE);
+ seq[cluster_j].letter=NULL; /*MARK AS FREED. DON'T LEAVE DANGLING POINTER*/
+ FREE(al1); /* DUMP TEMPORARY MAPPING ARRAYS */
+ FREE(al2);
+
+ LOOP (i,nseq) /* REINDEX ALL MEMBERS OF cluster_j TO JOIN cluster_i */
+ if (seq_cluster[i]==cluster_j)
+ seq_cluster[i]=cluster_i;
+ }
+
+ free_and_exit:
+ FREE(score);
+ FREE(seq_cluster);
+ return new_seq; /* RETURN THE FINAL MASTER CLUSTER */
+}
Added: trunk/packages/poa/branches/upstream/current/create_seq.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/create_seq.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/create_seq.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,53 @@
+
+
+#include "default.h"
+#include "seq_util.h"
+
+
+
+
+void save_sequence_fields(Sequence_T *seq,
+ char seq_name[],char seq_title[],int length)
+{
+ STRNCPY(seq->name,seq_name,SEQUENCE_NAME_MAX);
+ if (seq_title)
+ seq->title=strdup(seq_title);
+ else
+ seq->title=strdup("untitled");
+ seq->length=length; /* SAVE LENGTH */
+}
+
+
+
+int create_seq(int nseq,Sequence_T **p_seq,
+ char seq_name[],char seq_title[],char tmp_seq[],
+ int do_switch_case)
+{
+ int i,j;
+ Sequence_T *seq;
+
+ REBUFF(*p_seq,nseq,SEQUENCE_BUFFER_CHUNK,Sequence_T); /* ALLOCATE MEMORY*/
+ seq= (*p_seq)+nseq; /* SET POINTER TO NEWLY ALLOCATED ELEMENT */
+
+ for (i=j=0;tmp_seq[i];i++) /* ELIMINATE WHITE SPACE */
+ if (!isspace(tmp_seq[i]))
+ tmp_seq[j++]=tmp_seq[i];
+ tmp_seq[j]='\0'; /* TERMINATE COMPRESSED STRING*/
+ seq->sequence=strdup(tmp_seq); /* SAVE A DYNAMIC COPY */
+ save_sequence_fields(seq,seq_name,seq_title,j);
+
+ switch (do_switch_case) {
+ case switch_case_to_lower:
+ LOOP (i,seq->length)
+ seq->sequence[i]=tolower(tmp_seq[i]);
+ break;
+ case switch_case_to_upper:
+ LOOP (i,seq->length)
+ seq->sequence[i]=toupper(tmp_seq[i]);
+ break;
+ }
+
+ return 1;
+}
+
+
Added: trunk/packages/poa/branches/upstream/current/default.h
===================================================================
--- trunk/packages/poa/branches/upstream/current/default.h 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/default.h 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,253 @@
+#ifndef DEFAULT_HEADER_INCLUDED
+#define DEFAULT_HEADER_INCLUDED 1
+
+#ifndef MODULE_NAME
+#define MODULE_NAME "main"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <math.h>
+#include <string.h>
+#include <ctype.h>
+
+
+#include "black_flag.h"
+
+
+
+typedef void *voidptr; /* ~~e: should be moved out to generic typing header
+ --- */
+typedef int (*funptr)();
+
+#define LOOPB(i,size) for ((i)=(size);(i)-- >0;)
+#define LOOP(i,size) for ((i)=(size);(i)-- >0;)
+#define LOOPF(i,size) for ((i)=0;(i)<(size);(i)++)
+#define LOOP_FINISHED(i,size) ((i)<0 || (i)>=(size))
+
+
+/**@memo example usage of argument reading macros
+ FOR_ARGS(i,argc) {
+ ARGMATCH_VAL("-tolower",do_switch_case,switch_case_to_lower);
+ ARGMATCH("-seq_err_report",count_sequence_errors);
+ ARGGET("-printmatrix",print_matrix_letters);
+ NEXTARG(matrix_filename);
+ } */
+
+#define FOR_ARGS(INDEX,ARGC) for (INDEX=1;INDEX<ARGC;INDEX++)
+#define ARGMATCH(FLAGSTR,VAR) if (0==strcmp(argv[i],FLAGSTR)) (VAR)=1
+#define ARGMATCH_VAL(FLAGSTR,VAR,VAL) if (0==strcmp(argv[i],FLAGSTR)) (VAR)=(VAL)
+#define ARGGET(FLAGSTR,VAR) if (0==strcmp(argv[i],FLAGSTR)) {(VAR)=argv[++i];continue;}
+#define NEXTARG(VAR) if ('-'!=argv[i][0] && !(VAR)) {(VAR)=argv[i];continue;}
+
+
+
+
+#ifndef TRUE /* DEFINE true AND false SYMBOLS IN CASE NOT ALREADY THERE*/
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#if defined(__STDC__) || defined(__ANSI_CPP__) /*###################*/
+/* ~~a: ansi-dependent concatenation macro */
+#define CONCAT_MACRO0(a,b) a ## b
+#else
+#define CONCAT_MACRO0(a,b) a/**/b
+#endif /* !__STDC__ ##################################################*/
+
+#define CONCAT_MACRO(a,b) CONCAT_MACRO0(a,b)
+
+#if defined(__STDC__) || defined(__ANSI_CPP__) /*###################*/
+#define STRINGIFY(NAME) # NAME
+#else
+#define STRINGIFY(NAME) "NAME"
+#endif /* !__STDC__ ##################################################*/
+
+/* #define PRINT_DEBUG(MESSAGE) printf MESSAGE */
+
+
+/***********************************************************************
+ *
+ * STRNCPY: guaranteed overflow-protected, null-terminated strcpy
+ *
+ * NB: this macro forces termination at end position S1[LEN-1]
+ * which will cause non-intuitive side-effects ESPECIALLY
+ * if LEN is incorrect!!!! ie. STRNCPY will garbage your
+ * memory by sticking zeroes into it if you give it the wrong
+ * LEN.
+ *
+ * Make sure that the LEN you give it is correct!!!!!!!
+ *
+ **********************************************************************/
+
+#define STRNCPY(S1,S2,LEN) \
+ (strncpy(S1,S2,LEN),(((char *)(S1))[(LEN)-1]='\0'))
+
+
+
+
+
+
+
+
+/*******************************************************************
+ *
+ * STRINGPTR:
+ * holds string and count of last alloc'd size. Conveniently
+ * manages static and dynamic strings, allowing auto-resize
+ * without worrying about memory management.
+ *
+ ***********************************************************************/
+
+typedef struct { /* ~~d */
+ char *p;
+ int last_alloc;
+} stringptr; /* --- */
+
+
+#define STRINGPTR_BUFFER_CHUNK 4096
+#define STRINGPTR_EMPTY_INIT {NULL,0}
+
+char *stringptr_cat(stringptr *s1,const char s2[]);
+char *stringptr_cpy(stringptr *s1,const char s2[]);
+int stringptr_free(stringptr *s);
+
+
+
+
+
+
+#ifdef DEBUG
+#define PRINT_DEBUG(CODE,MESSAGE) {if (CODE<=2) fprintf MESSAGE ;}
+#else
+#define PRINT_DEBUG(CODE,MESSAGE) {if (CODE<=1) fprintf MESSAGE ;}
+#endif
+
+
+
+
+#define MEM0_REQUEST_BAD 0
+
+/* DEFAULT ACTION TO PERFORM IF malloc FAILS */
+#define MALLOC_FAILURE_ACTION abort()
+/* DEFAULT ACTION TO PERFORM IF realloc FAILS */
+#define REALLOC_FAILURE_ACTION abort()
+
+
+
+#define CALLOC(memptr,N,ATYPE) \
+/* printf("CALLOC(%s=%d,%s=%d,sizeof(%s)=%d), %s %d\n",STRINGIFY(memptr),(memptr), \
+ STRINGIFY(N),(N),STRINGIFY(ATYPE),sizeof(ATYPE),__FILE__,__LINE__);*/ \
+ if ((N)<=0) { \
+ if (MEM0_REQUEST_BAD) {\
+ fprintf(stderr,"%s, line %d: *** invalid memory request: %s[%d].\n",\
+ __FILE__,__LINE__,STRINGIFY(memptr),(N)); \
+ MALLOC_FAILURE_ACTION; \
+ }\
+ } \
+ else if (NULL == ((memptr)=(ATYPE *)calloc((size_t)(N),sizeof(ATYPE)))) { \
+ fprintf(stderr,"%s, line %d: *** out of memory \n",__FILE__,__LINE__); \
+ fprintf(stderr,"Unable to meet request: %s[%d]\n",STRINGIFY(memptr),(N)); \
+ fprintf(stderr,"requested %d x %d bytes \n",(N),sizeof(ATYPE)); \
+ MALLOC_FAILURE_ACTION; \
+ }
+
+
+
+
+
+
+
+
+
+/*******************************************************************
+ *
+ * FREE:
+ * dumps storage if a non-zero pointer, and resets pointer to
+ * NULL to indicate its data freed.
+ *
+ ******************************************************************/
+
+#define FREE(P) if (P) {free(P);(P)=NULL;}
+
+
+#define REALLOC(memptr,NUM,ATYPE) \
+/* printf("REALLOC(%s=%d,%s=%d,sizeof(%s)=%d), %s %d\n",STRINGIFY(memptr), \
+ (memptr),STRINGIFY(NUM),(NUM),STRINGIFY(ATYPE),sizeof(ATYPE),\
+ __FILE__,__LINE__);*/ \
+ if ((NUM)<=0) { \
+ if (MEM0_REQUEST_BAD) {\
+ fprintf(stderr,"%s, line %d: *** invalid memory request: %s[%d].\n",\
+ __FILE__,__LINE__,STRINGIFY(memptr),(NUM)); \
+ REALLOC_FAILURE_ACTION; \
+ }\
+ } \
+ else { \
+ voidptr temp_ptr=realloc((void *)(memptr),sizeof(ATYPE)*(size_t)(NUM));\
+ if (temp_ptr) \
+ (memptr)=(ATYPE *)temp_ptr;\
+ else { \
+ fprintf(stderr,"%s, line %d: *** out of memory \n",__FILE__,__LINE__); \
+ fprintf(stderr,"Unable to meet request: %s\n",STRINGIFY(memptr)); \
+ fprintf(stderr,"requested %d x %d bytes \n",(NUM),sizeof(ATYPE)); \
+ REALLOC_FAILURE_ACTION; \
+ } \
+ }
+
+
+/**********************************************************************
+ *
+ * REBUFF
+ * GUARANTEES ALLOCATION OF NULL-INITIALIZED BLOCK OF BUF ELEMENTS
+ * OF THE CORRECT DATA TYPE & SIZE, SUFFICIENT TO HOLD AT LEAST ONE
+ * ELEMENT ADDED TO THE CURRENT SIZE OF THE ARRAY
+ *
+ **********************************************************************/
+
+#define REBUFF(memptr,NUM,BUF,ATYPE) if ((NUM)<=0) { \
+ CALLOC(memptr,BUF,ATYPE) } \
+ else if (0 == (NUM)%(BUF)) { \
+ char *p_rebuff_size;\
+ REALLOC(memptr,(NUM)+(BUF),ATYPE); \
+ p_rebuff_size=(char *)(memptr)+(NUM)*sizeof(ATYPE); \
+ memset((voidptr)p_rebuff_size,0,(size_t)((BUF)*sizeof(ATYPE)));\
+ }
+
+
+
+
+
+
+
+
+
+/*******************************************************************
+ *
+ * GETMEM:
+ * allocates memory either via malloc or realloc, guaranteeing
+ * space to store at least one more data entry. Data is
+ * alloc'd in blocks of size BUF; the total # alloc'd is
+ * kept in LAST. The algorithm can go wrong if LAST
+ * is incorrect.
+ *
+ ******************************************************************/
+
+#define GETMEM(memptr,NUM,LAST,BUF,TYPE) \
+ if ((LAST)<=0) {\
+ CALLOC(memptr,((NUM)+1)-((NUM)+1)%(BUF)+(BUF),TYPE);\
+ (LAST)=((NUM)+1)-((NUM)+1)%(BUF)+(BUF);\
+ }\
+ else if (((NUM)+1)>(LAST)) { \
+ REALLOC(memptr,((NUM)+1)-((NUM)+1)%(BUF)+(BUF),TYPE);\
+ (LAST)=((NUM)+1)-((NUM)+1)%(BUF)+(BUF);\
+ }
+
+
+
+
+
+#endif
Added: trunk/packages/poa/branches/upstream/current/fasta_format.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/fasta_format.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/fasta_format.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,89 @@
+
+#include "default.h"
+#include "seq_util.h"
+
+
+
+/** reads FASTA formatted sequence file, and saves the sequences to
+ the array seq[]; any comment line preceded by a hash-mark will be saved
+ to comment */
+int read_fasta(FILE *seq_file,Sequence_T **seq,
+ int do_switch_case,char **comment)
+{
+ int c,nseq=0,length=0;
+ char seq_name[FASTA_NAME_MAX]="",
+ line[SEQ_LENGTH_MAX],seq_title[FASTA_NAME_MAX]="";
+ char *p;
+ stringptr tmp_seq=STRINGPTR_EMPTY_INIT;
+
+ /* read in sequences */
+ while (fgets(line,sizeof(line)-1,seq_file)) {
+ if ((p=strrchr(line,'\n'))) /* REMOVE NEWLINE FROM END OF LINE */
+ *p= '\0'; /* TRUNCATE THE STRING */
+ switch (line[0]) {
+ case '#': /* SEQUENCE COMMENT, SAVE IT */
+ if (comment) /* SAVE COMMENT FOR CALLER TO USE */
+ *comment = strdup(line+1);
+ break;
+
+ case '>': /* SEQUENCE HEADER LINE */
+ if (seq_name[0] && tmp_seq.p && tmp_seq.p[0]) { /* WE HAVE A SEQUENCE, SO SAVE IT! */
+ if (create_seq(nseq,seq,seq_name,seq_title,tmp_seq.p,do_switch_case))
+ nseq++;
+ }
+ seq_name[0]='\0';
+ if (sscanf(line+1,"%s %[^\n]", /* SKIP PAST > TO READ SEQ NAME*/
+ seq_name,seq_title)<2)
+ strcpy(seq_title,"untitled"); /* PROTECT AGAINST MISSING NAME */
+ if (tmp_seq.p)
+ tmp_seq.p[0]='\0'; /* RESET TO EMPTY SEQUENCE */
+ length=0;
+ break;
+
+ case '*': /* IGNORE LINES STARTING WITH *... DON'T TREAT AS SEQUENCE! */
+ break;
+
+ default: /* READ AS ACTUAL SEQUENCE DATA, ADD TO OUR SEQUENCE */
+ if (seq_name[0]) /* IF WE'RE CURRENTLY READING A SEQUENCE, SAVE IT */
+ stringptr_cat_pos(&tmp_seq,line,&length);
+ }
+
+ c=getc(seq_file); /* ?FIRST CHARACTER IS UNIGENE CLUSTER TERMINATOR? */
+ if (c==EOF)
+ break;
+ else {
+ ungetc(c,seq_file); /* PUT THE CHARACTER BACK */
+ if (c=='#' && nseq>0) /* UNIGENE CLUSTER TERMINATOR, SO DONE!*/
+ break;
+ }
+ }
+ if (seq_name[0] && tmp_seq.p && tmp_seq.p[0]) { /* WE HAVE A SEQUENCE, SO SAVE IT! */
+ if (create_seq(nseq,seq,seq_name,seq_title,tmp_seq.p,do_switch_case))
+ nseq++;
+ }
+ stringptr_free(&tmp_seq);
+ return nseq; /* TOTAL NUMBER OF SEQUENCES CREATED */
+}
+
+/**@memo example: reading FASTA format file:
+ seq_ifile=fopen(seq_filename,"r");
+ if (seq_ifile) {
+ nseq=read_fasta(seq_ifile,&seq,do_switch_case,&comment);
+ fclose(seq_ifile);
+ }
+*/
+
+
+
+/** writes a FASTA formatted file, saving the sequence given in seq[] */
+void write_fasta(FILE *ifile,char name[],char title[],char seq[])
+{
+ int j;
+
+ fprintf(ifile,">%s %s\n",name,title? title : "untitled");
+ for (j=0;j<strlen(seq);j+=60)
+ fprintf(ifile,"%.60s\n",seq+j);
+
+ return;
+}
+
Added: trunk/packages/poa/branches/upstream/current/heaviest_bundle.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/heaviest_bundle.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/heaviest_bundle.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,173 @@
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+
+
+/** finds the heaviest traversal of the LPO seq[], using dynamic programming;
+ at each node the heaviest link is chosen to buildup traversals; finally,
+ the traversal with the heaviest overall link weight is returned as an
+ array of position indices. The length of the array is stored in
+ *p_best_len*/
+LPOLetterRef_T *heaviest_bundle(int len,LPOLetter_T seq[],
+ int nsource_seq,LPOSourceInfo_T source_seq[],
+ int *p_best_len)
+{
+ int i,j,best_right,iright,ibest= -1,best_len=0;
+ LPOLetterRef_T *best_path=NULL,*path=NULL;
+ LPOLetterLink_T *right;
+ LPOLetterSource_T *source;
+ LPOScore_T *score=NULL,best_score= -999999,right_score;
+ int *contains_pos=NULL,my_overlap,right_overlap;
+
+ CALLOC(path,len,LPOLetterRef_T); /* GET MEMORY FOR DYNAMIC PROGRAMMING */
+ CALLOC(score,len,LPOScore_T);
+ CALLOC(contains_pos,nsource_seq,int);
+ LOOP (i,nsource_seq) /* RESET TO NOT MATCH ANY POSITIONS */
+ contains_pos[i]= INVALID_LETTER_POSITION;
+
+ LOOPB (i,len) { /* FIND HEAVIEST PATH BY DYNAMIC PROGRAMMING */
+ source= &seq[i].source; /* MARK SEQUENCES CONTAINING THIS POSITION */
+ memset(contains_pos,0,nsource_seq*sizeof(int)); /* ERASE ARRAY */
+ do
+ if (source_seq[source->iseq].weight>0) /* EXCLUDE ZERO WEIGHT SEQS */
+ contains_pos[source->iseq]=source->ipos+1; /* right MUST BE ADJACENT*/
+ while (source=source->more); /* KEEP COUNTING TILL NO more */
+
+ right_score=right_overlap=0; /*DEFAULT MOVE: NOTHING TO THE RIGHT*/
+ best_right= INVALID_LETTER_POSITION;
+ for (right= &seq[i].right;right && right->ipos>=0;right=right->more) {
+ my_overlap=0; /* OVERLAP CALCULATION */
+ source= &seq[right->ipos].source;/*COUNT SEQS SHARED IN i AND right*/
+ do /* BIAS OVERLAP CALCULATION BY SEQUENCE WEIGHTING */
+ if (contains_pos[source->iseq]==source->ipos) /* YES, ADJACENT! */
+ my_overlap += source_seq[source->iseq].weight;
+ while (source=source->more); /* KEEP COUNTING TILL NO more */
+
+ if (my_overlap>right_overlap /* FIND BEST RIGHT MOVE: BEST OVERLAP */
+ || (my_overlap==right_overlap && score[right->ipos]>right_score)) {
+ right_overlap=my_overlap;
+ right_score=score[right->ipos];
+ best_right=right->ipos;
+ }
+ }
+
+ path[i]=best_right; /* SAVE THE BEST PATH FOUND */
+ score[i]=right_score+right_overlap; /* SAVE THE SCORE */
+ if (score[i]>best_score) { /* RECORD BEST SCORE IN WHOLE LPO */
+ ibest=i;
+ best_score=score[i];
+ }
+ }
+
+ CALLOC(best_path,len,LPOLetterRef_T); /* MEMORY FOR STORING BEST PATH */
+ for (;ibest>=0;ibest=path[ibest]) /* BACK TRACK THE BEST PATH */
+ best_path[best_len++]=ibest;
+
+ FREE(path); /* DUMP SCRATCH MEMORY */
+ FREE(score);
+ FREE(contains_pos);
+
+ if (p_best_len) /* RETURN best_path AND ITS LENGTH */
+ *p_best_len = best_len;
+ return best_path;
+}
+
+
+
+
+int assign_sequence_bundle_id(int path_length,LPOLetterRef_T path[],
+ LPOSequence_T *seq,int bundle_id,
+ float minimum_fraction)
+{
+ int i,*bundle_count=NULL,nseq_in_bundle=0;
+ LPOLetterSource_T *source;
+
+ CALLOC(bundle_count,seq->nsource_seq,int);
+ LOOP (i,path_length) /* COUNT #POSITIONS OF EACH SEQ ARE IN path */
+ for (source= &seq->letter[path[i]].source;source;source=source->more)
+ bundle_count[source->iseq]++;
+
+ LOOP (i,seq->nsource_seq) {/* FOR EACH SEQ OVER THRESHOLD, ASSIGN bundle_id*/
+/* printf("bundle %d:\t%s\t%d/%d %d",bundle_id,seq->source_seq[i].name,
+ bundle_count[i],seq->source_seq[i].length,seq->source_seq[i].weight);*/
+ if (seq->source_seq[i].bundle_id<0 /* NOT YET BUNDLED */
+ && seq->source_seq[i].length*minimum_fraction <= bundle_count[i]) {
+/* printf(" +++++++++++++++++");*/
+ seq->source_seq[i].bundle_id = bundle_id; /* ASSIGN TO THIS BUNDLE */
+ seq->source_seq[i].weight = 0; /* REMOVE FROM FUTURE heaviest_bundle */
+ nseq_in_bundle++;
+ }
+ /* printf("\n");*/
+ }
+
+ FREE(bundle_count);
+ return nseq_in_bundle; /* RETURN COUNT OF SEQUENCES IN BUNDLE */
+}
+
+
+
+
+/** assigns weights for bundling based upon /hb_weight arguments
+ in source_seq titles */
+
+void assign_hb_weights(int nsource_seq,LPOSourceInfo_T source_seq[])
+{
+ int i,weight;
+ char *p;
+ LOOP (i,nsource_seq) {
+ if (source_seq[i].title &&
+ (p=strstr(source_seq[i].title,"/hb_weight="))) {
+ weight=atoi(p+11);
+ if (weight!=0){ /* 0 COULD MEAN atoi FAILED TO PARSE ARG. IGNORE IT*/
+ source_seq[i].weight = weight;
+ fprintf(stderr,"assigned weight=%d to %s\n",source_seq[i].weight,source_seq[i].name);
+ }
+ else
+ WARN_MSG(USERR,(ERRTXT,"hb_weight zero or unreadable: %s\nIgnored",p),"$Revision: 1.2 $");
+ }
+ }
+}
+
+
+
+/** generates the complete set of heaviest_bundle traversals of the the LPO
+ seq, using iterative heaviest_bundle() and requiring that at least
+ minimum_fraction of the positions in a sequence match the heaviest
+ bundle path, for that sequence to be assigned to that bundle
+---------------------------------------------------------------
+------------------------------------------------------------*/
+void generate_lpo_bundles(LPOSequence_T *seq,float minimum_fraction)
+{
+ int nbundled=0,ibundle=0,path_length,iseq,count;
+ LPOLetterRef_T *path=NULL;
+ char name[256],title[1024];
+
+ /* assign_hb_weights(seq->nsource_seq,seq->source_seq); TURN THIS ON!!*/
+ while (nbundled < seq->nsource_seq) {/* PULL OUT BUNDLES ONE BY ONE */
+ path=heaviest_bundle(seq->length,seq->letter,/*GET NEXT HEAVIEST BUNDLE*/
+ seq->nsource_seq,seq->source_seq,&path_length);
+ if (!path || path_length<10) /* ??!? FAILED TO FIND A BUNDLE ??? */
+ goto premature_warning;
+ sprintf(name,"CONSENS%d",ibundle);
+ /* NEXT, MARK SEQUENCES THAT FIT THIS BUNDLE ADEQUATELY */
+ count=assign_sequence_bundle_id(path_length,path,seq,ibundle,
+ minimum_fraction);
+ sprintf(title,"consensus produced by heaviest_bundle, containing %d seqs",
+ count); /* DON'T INCLUDE CONSENSUS ITSELF IN THE COUNT! */
+ iseq=add_path_sequence(path_length,path,seq,name,title);/*BUILD CONSENSUS*/
+ seq->source_seq[iseq].bundle_id=ibundle++; /* INCREMENT BUNDLE ID */
+ nbundled+=count; /* KEEP TRACK OF TOTAL SEQUENCES BUNDLED */
+
+ if (count<1) {
+ premature_warning:
+ fprintf(stderr,"*** WARNING: bundling ended prematurely after %d bundles.\nNo sequences fit inside this last bundle.\nA total of %d sequences incuding consensus were bundled.\n\n",ibundle,nbundled);
+ break;
+ }
+ }
+}
+
Added: trunk/packages/poa/branches/upstream/current/lpo.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/lpo.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/lpo.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,743 @@
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+/** INITIALIZES LINEARIZED PARTIAL ORDER DATA STRUCTURES FOR
+ A LINEAR SEQUENCE */
+void lpo_init(LPOSequence_T *seq)
+{
+ int i;
+
+ CALLOC(seq->letter,seq->length,LPOLetter_T);
+
+ LOOP (i,seq->length) {
+ seq->letter[i].left.ipos = SEQ_Y_LEFT(i); /* JUST A LINEAR SEQ */
+ seq->letter[i].right.ipos= SEQ_Y_RIGHT(i);
+ seq->letter[i].source.iseq=0; /* TRIVIAL SOURCE: POINT TO SELF */
+ seq->letter[i].source.ipos=i;
+ seq->letter[i].align_ring = seq->letter[i].ring_id=i; /* POINT AT SELF */
+ seq->letter[i].letter = seq->sequence[i]; /* COPY OUR AA LETTER */
+ }
+ seq->letter[seq->length -1].right.ipos= INVALID_LETTER_POSITION;
+ /* NB: letter[0].left.ipos IS INVALID_LETTER_POSITION THANKS TO SEQ_Y_LEFT()
+ ABOVE */
+ /* BECAUSE PO CAN CONTAIN MULTIPLE SEQUENCES, WE ALSO KEEP A SOURCE LIST.
+ FOR PURE LINEAR SEQUENCE, THE LIST IS JUST OUR STARTING SEQUENCE */
+ save_lpo_source(seq,seq->name,seq->title,seq->length,1,NO_BUNDLE,0,NULL);
+ return;
+}
+
+
+
+
+
+/**@memo initialize one or more regular sequences (linear orders) to LPO form.
+ This step is REQUIRED before running partial order alignment.
+ This routine processes each sequence with limit_residues() and
+ index_symbols(), then builds a linear LPO using lpo_init(). */
+
+void initialize_seqs_as_lpo(int nseq, Sequence_T seq[],ResidueScoreMatrix_T *m)
+{
+ int i;
+ LOOP (i,nseq) {/* EXCLUDE LETTERS THAT AREN'T IN MATRIX */
+ limit_residues(seq[i].sequence,m->symbol);
+ /* TRANSLATE FROM ASCII LETTERS TO COMPACTED NUMBERICAL INDEX*/
+ index_symbols(seq[i].length,seq[i].sequence,seq[i].sequence,
+ m->nsymbol,m->symbol);
+ lpo_init(seq+i); /* CREATE TRIVIAL, LINEAR SEQUENCE LPO */
+ }
+}
+
+
+/** translates letter symbols on lpo.letter[i] to indexes used in scoring
+ matrix*/
+void lpo_index_symbols(Sequence_T *lpo,ResidueScoreMatrix_T *m)
+{
+ int i;
+
+ if (lpo->letter == NULL) { /* HMM. HASN'T BEEN INITIALIZED AT ALL YET */
+ initialize_seqs_as_lpo(1,lpo,m);
+ return;
+ }
+ if (lpo->letter[0].letter < m->nsymbol)
+ return; /* LOOKS LIKE IT'S ALREADY TRANSLATED TO INDEXES */
+ LOOP (i,lpo->length) /* READ FROM FILE, MAY NOT BE TRANSLATED YET */
+ index_symbols(1,&lpo->letter[i].letter,&lpo->letter[i].letter,
+ m->nsymbol,m->symbol);
+}
+
+
+
+
+
+/** finds ipos for the designated sequence in the designated letter,
+ or INVALID_LETTER_POSITION if iseq is not found. */
+int find_letter_source(LPOLetter_T *letter,int iseq)
+{
+ LPOLetterSource_T *source;
+ for (source= &letter->source;source;source=source->more)/*SCAN SOURCES*/
+ if (source->iseq == iseq) /*MATCH! */
+ return source->ipos; /* RETURN ITS SEQUENCE POSITION */
+ return INVALID_LETTER_POSITION;
+}
+
+
+
+
+
+/**@memo finds links to iseq, starting from letter[ipos] and proceeding in the
+ specified direction up to max_step links. Optionally, the search
+ can be constrained to only match node iseq:match_ipos.
+ If iseq is found, the number
+ of links connecting its letter to letter[ipos] is reported. Otherwise
+ it returns max_step+1 to indicate iseq was not found within the given
+ distance. If p_ipos or p_letter_pos are non-NULL, it will return
+ the ipos of the position found in iseq, or the index of the found
+ letter[], respectively. */
+int find_sequence_link(int iseq,
+ int match_ipos,
+ int start_pos,
+ LPOLetter_T letter[],
+ int max_step,
+ int please_go_right,
+ int *p_ipos,
+ int *p_letter_pos)
+{
+ int nstep,ipos;
+ LPOLetterLink_T *link,*link0;
+
+ if (please_go_right) /* CHOOSE THE DESIRED DIRECTION */
+ link0= &letter[start_pos].right;
+ else
+ link0= &letter[start_pos].left;
+
+ for (link=link0;link && link->ipos>=0;link=link->more) /*SCAN ALL LINKS*/
+ if ((ipos=find_letter_source(letter+link->ipos,iseq))>=0
+ && (match_ipos<0 /* NO CONSTRAINT ON match_ipos */
+ || match_ipos==ipos)) { /* POSITION MATCHES */
+ if (p_ipos) /* HAND BACK THE SEQUENCE ipos TO CALLER */
+ *p_ipos = ipos;
+ if (p_letter_pos) /* HAND BACK THE letter[] INDEX TO CALLER */
+ *p_letter_pos = link->ipos;
+ return 1; /* FOUND SEQUENCE ONLY ONE LINK AWAY FROM HERE! */
+ }
+
+ if (max_step>1) { /* OK TO TRY ANOTHER LAYER OF RECURSION */
+ for (link=link0;link && link->ipos>=0;link=link->more) { /*SCAN ALL LINKS*/
+ nstep=find_sequence_link(iseq,match_ipos,link->ipos,letter,max_step-1,
+ please_go_right,p_ipos,p_letter_pos);
+ if (nstep<max_step) /* FOUND IT! SO RETURN LINK COUNT */
+ return nstep+1;
+ }
+ }
+
+ return max_step+1; /* INDICATE iseq NOT FOUND IN THE SPECIFIED RANGE */
+}
+
+
+
+/** constructs a mapping from [iseq,ipos] -> iletter and back. This is
+ saved as an index seq_to_po[ipos]==>iletter and
+ po_to_seq[iletter]==>ipos .
+ At the same time, it also constructs the actual sequence of the
+ individual source sequences from the data stored in the LPO. */
+void build_seq_to_po_index(LPOSequence_T *seq)
+{
+ int i,j;
+ LPOLetterSource_T *source;
+
+ LOOP (i,seq->nsource_seq) { /* DUMP EXISTING INDEX, ALLOC NEW*/
+ FREE(seq->source_seq[i].seq_to_po);
+ FREE(seq->source_seq[i].po_to_seq);
+ FREE(seq->source_seq[i].sequence);
+ CALLOC(seq->source_seq[i].seq_to_po,seq->source_seq[i].length,int);
+ CALLOC(seq->source_seq[i].po_to_seq,seq->length,int);
+ CALLOC(seq->source_seq[i].sequence,seq->source_seq[i].length+1,char);
+ LOOP (j,seq->length) /*DEFAULT: PO LETTERS DON'T MAP TO ANY LETTER IN SEQ*/
+ seq->source_seq[i].po_to_seq[j]= INVALID_LETTER_POSITION;
+ }
+
+ LOOP (i,seq->length) { /* MAP EVERY LETTER ONTO SOURCE INDEXES */
+ for (source= &seq->letter[i].source;source;source=source->more) {
+ seq->source_seq[source->iseq].seq_to_po[source->ipos]=i;/*INVERSE*/
+ seq->source_seq[source->iseq].po_to_seq[i]=source->ipos;/*MAPPING*/
+ seq->source_seq[source->iseq].sequence[source->ipos]=seq->letter[i].letter;
+ }
+ }
+}
+
+
+
+
+/** CREATES A NEW SOURCEINFO ENTRY ON seq->source_seq[], SAVING THE
+ FIELDS PASSED BY THE CALLER */
+int save_lpo_source(LPOSequence_T *seq,
+ char name[],
+ char title[],
+ int length,
+ int weight,
+ int bundle_id,
+ int ndata,
+ LPONumericData_T data[])
+{
+ int i,j;
+
+ REBUFF(seq->source_seq,seq->nsource_seq,SOURCE_SEQ_BUFFER_CHUNK,
+ LPOSourceInfo_T);
+ i=seq->nsource_seq;
+ seq->source_seq[i].title=strdup(title? title:"untitled");
+ seq->source_seq[i].length=length;
+ seq->source_seq[i].weight=weight; /* DEFAULT WEIGHTING */
+ seq->source_seq[i].bundle_id= bundle_id;
+ STRNCPY(seq->source_seq[i].name,name,SEQUENCE_NAME_MAX);
+
+ LOOPF(j,ndata) /* SAVE NUMERIC DATA FOR THIS SOURCE */
+ cp_numeric_data(seq->source_seq+i,data+j);
+
+ return seq->nsource_seq++; /* INCREMENT SOURCE SEQUENCE LIST COUNT */
+}
+
+
+
+/** SAVE source_seq[] ENTRIES FROM ONE LPO TO ANOTHER */
+int *save_lpo_source_list(LPOSequence_T *seq,
+ int nsource_seq,
+ LPOSourceInfo_T source_seq[])
+{
+ int i,*list=NULL;
+
+ CALLOC(list,nsource_seq,int);
+ LOOPF (i,nsource_seq)
+ list[i]=save_lpo_source(seq,source_seq[i].name,source_seq[i].title,
+ source_seq[i].length,source_seq[i].weight,
+ source_seq[i].bundle_id,
+ source_seq[i].ndata,source_seq[i].data);
+
+ return list; /*HAND BACK LIST OF NEW INDICES ASSIGNED TO THESE source_seq*/
+}
+
+
+
+/** ADDS A LINK TO ipos TO THE LINKED LIST STORED IN list,
+ ALLOCATING A NEW LPOLetterLink IF NEEDED */
+LPOLetterLink_T *add_lpo_link(LPOLetterLink_T *list,LPOLetterRef_T ipos)
+{
+ if (list->ipos <0) { /* FIRST ENTRY IS EMPTY, SO USE IT */
+ list->ipos = ipos;
+ return list;/*RETURNS PTR TO LINK IN WHICH ipos STORED */
+ }
+ do { /* SCAN LIST CHECKING IF ALREADY STORED */
+ if (list->ipos == ipos) /* ALREADY STORED, NO NEED TO STORE AGAIN */
+ return list;/*RETURNS PTR TO LINK IN WHICH ipos STORED */
+ } while (list->more? (list=list->more):0);
+
+ CALLOC(list->more,1,LPOLetterLink_T); /* ADD ENTRY TO LINKED LIST */
+ list->more->ipos=ipos; /* SAVE THE LETTER REFERENCE */
+ return list->more;/*RETURNS PTR TO LINK IN WHICH ipos STORED */
+}
+
+
+
+
+void add_lpo_sources(LPOLetterSource_T *new_s,LPOLetterSource_T *old_s,
+ int iseq_new[])/*TRANSLATION TO NEW source_seq[] INDEX*/
+{
+ for (;new_s->more;new_s=new_s->more);/* GO TO END*/
+ for (;old_s;old_s=old_s->more) {/*SAVE SOURCES*/
+ if (new_s->ipos>=0) { /* ALREADY A SOURCE HERE, SO CREATE NEW ENTRY */
+ CALLOC(new_s->more,1,LPOLetterSource_T);
+ new_s=new_s->more;
+ }
+ new_s->iseq=iseq_new[old_s->iseq]; /* SAVE SEQUENCE ID, POSITION */
+ new_s->ipos=old_s->ipos;
+ }
+}
+
+
+
+
+void copy_lpo_letter(LPOLetter_T *new,LPOLetter_T *old,
+ LPOLetterRef_T old_to_new[],
+ int iseq_new[])
+{
+ LPOLetterLink_T *link;
+ new->letter=old->letter; /* SAVE ITS SEQUENCE LETTER */
+ add_lpo_sources(&new->source,&old->source,iseq_new); /* SAVE SOURCES */
+ for (link= &old->left;link && link->ipos>=0;link=link->more) /*SAVE left*/
+ add_lpo_link(&new->left,old_to_new[link->ipos]);
+ for (link= &old->right;link && link->ipos>=0;link=link->more)/*SAVE right*/
+ add_lpo_link(&new->right,old_to_new[link->ipos]);
+ return;
+}
+
+
+
+ /** FUSES RING a AND RING b BY CROSSLINK OPERATION */
+void crosslink_rings(LPOLetterRef_T a,LPOLetterRef_T b,LPOLetter_T seq[])
+{
+ LPOLetterRef_T align_ring;
+
+ if (seq[a].ring_id==seq[b].ring_id) /* ALREADY ON SAME RING, DO NOTHING! */
+ return;
+ else if (seq[a].ring_id<seq[b].ring_id) { /* a<b SO RESET b.ring_id */
+ align_ring=b; /* TRAVERSE RING b */
+ do seq[align_ring].ring_id=seq[a].ring_id; /* ENFORCE a.ring_id == b.ring_id*/
+ while ((align_ring=seq[align_ring].align_ring) != b);
+ }
+ else { /* a>b SO RESET a.ring_id */
+ align_ring=a; /* TRAVERSE RING a */
+ do seq[align_ring].ring_id=seq[b].ring_id; /* ENFORCE a.ring_id == b.ring_id*/
+ while ((align_ring=seq[align_ring].align_ring) != a);
+ }
+
+ align_ring=seq[a].align_ring; /* JUST LIKE A SWAP */
+ seq[a].align_ring=seq[b].align_ring; /* CROSSLINK THE TWO RINGS */
+ seq[b].align_ring=align_ring;
+ return;
+}
+
+
+
+
+void copy_old_ring_to_new(LPOLetterRef_T start, /* START OF RING TO COPY*/
+ LPOLetter_T old_lpo[],
+ LPOLetter_T new_lpo[],
+ LPOLetterRef_T old_to_new[]) /* MAPPING */
+{
+ LPOLetterRef_T ipos,next_pos;
+ for (ipos=start;(next_pos=old_lpo[ipos].align_ring) != start;ipos=next_pos)
+ crosslink_rings(old_to_new[ipos],old_to_new[next_pos],new_lpo);
+}
+
+
+
+/* TEMPORARY: THESE CONSTANTS CONTROL SEGMENT FUSION
+ TESTING THE FOLLOWING NUMBERS ON DNA ASSEMBLY*/
+#define FISSION_BREAK 5 /* LENGTH OF MISMATCH THAT SPLITS INTO SEGMENTS */
+#define MINIMUM_FUSION 10 /* MINIMUM #IDENTITIES FOR SEGMENT TO FUSE */
+#define FUSION_PERCENT 0.8 /* MINIMUM OVERALL IDENTITY FOR SEGMENT TO FUSE*/
+
+void mark_fusion_segments(int len_x,LPOLetter_T seq_x[],
+ int len_y,LPOLetter_T seq_y[],
+ LPOLetterRef_T y_to_x[],
+ int fission_break,
+ int minimum_fusion_length,
+ float minimum_fusion_identity,
+ char do_fuse[])
+{
+ int i,i_x,i_y,mismatch_length=0,identity_count=0,fission_break_point= -1;
+
+ LOOP (i_y,len_y) {
+ if ((i_x=y_to_x[i_y])>=0 && seq_x[i_x].letter==seq_y[i_y].letter)
+ do_fuse[i_y]=1;
+ }
+#ifdef SOURCE_EXCLUDED
+ LOOPF (i_y,len_y) {
+ if ((i_x=y_to_x[i_y])<0 /* NOT ALIGNED AT ALL */
+ || seq_x[i_x].letter!=seq_y[i_y].letter /* MISMATCH */
+ || i_y==len_y-1) { /* END OF SEQ, MUST CHECK LAST SEGMENT! */
+ if (++mismatch_length>=fission_break /* BREAK POINT */
+ || i_y==len_y-1) { /* END OF SEQ, MUST CHECK LAST SEGMENT! */
+ if (identity_count>=minimum_fusion_length /* END OF A FUSION SEGMENT?*/
+ && minimum_fusion_identity
+ *(i_y-mismatch_length-fission_break_point)<= identity_count) {
+ /* YES, MARK PRECEEDING SEGMENT FOR FUSION*/
+ for (i=i_y-fission_break;i>fission_break_point;i--)/*MARK SEGMENT!*/
+ if ((i_x=y_to_x[i])>=0 && seq_x[i_x].letter==seq_y[i].letter)
+ do_fuse[i]=1; /* IDENTITY! MARK THIS POSITION TO BE FUSED */
+ }
+ fission_break_point=i_y; /* NO FUSION SEGMENT CAN EXTEND PAST HERE */
+ identity_count=0; /* RESET IDENTITY COUNTER FOR STARTING NEXT SEGMENT*/
+ }
+ }
+ else { /* PERFECT IDENTITY */
+ mismatch_length=0;
+ identity_count++;
+ }
+ }
+#endif
+}
+
+
+
+
+int reindex_lpo_fusion(int len_x,LPOLetter_T seq_x[],
+ int len_y,LPOLetter_T seq_y[],
+ LPOLetterRef_T x_to_y[],
+ LPOLetterRef_T y_to_x[],
+ LPOLetterRef_T new_x[],
+ LPOLetterRef_T new_y[],
+ int fission_break,
+ int minimum_fusion_length,
+ float minimum_fusion_identity)
+{
+ int new_len=0;
+ LPOLetterRef_T i_x,i_y,i_ring,end_of_ring= -1;
+ char *do_fuse=NULL;
+
+ CALLOC(do_fuse,len_y,char); /* MARK POSITIONS TO FUSE seq_y TO seq_x */
+ mark_fusion_segments(len_x,seq_x,len_y,seq_y,y_to_x,fission_break,
+ minimum_fusion_length,minimum_fusion_identity,do_fuse);
+
+ for (i_x=i_y=new_len=0;i_x<len_x;i_x++) { /* BUILD new_x, new_y */
+ for (i_ring=i_x;i_ring<len_x && seq_x[i_ring].ring_id==seq_x[i_x].ring_id;
+ i_ring++) /* SCAN X RING, AND SEE IF ANYTHING ALIGNS TO Y */
+ if (x_to_y[i_ring]>=0) { /* IF SO, INSERT Y NOW TO KEEP X RING TOGETHER*/
+ while (i_y<x_to_y[i_ring]) /* CONCATENATE LBEFORE ALIGNED LETTER */
+ new_y[i_y++]=new_len++; /* ADD TO new_lpo */
+ break;
+ }
+
+ if (x_to_y[i_x]>=0 /* ALIGNED, SO FIRST INSERT PRECEEDING FROM seq_y */
+ && i_y<len_y) {
+ for (i_ring=seq_y[i_y].align_ring;i_ring!=i_y; /* CIRCLE THE RING*/
+ i_ring=seq_y[i_ring].align_ring) /*FIND END OF i_y ALIGNMENT RING*/
+ if (i_ring>end_of_ring) /* FIND MAXIMUM INDEX ON THIS RING */
+ end_of_ring=i_ring; /* RING GUARANTEED TO BE ONE CONTIGUOUS BLOCK*/
+
+ if (do_fuse[i_y]) /* THIS POSITION MEETS OUR FUSION CRITERIA, SO FUSE*/
+ new_y[i_y++]=new_len; /* USE SAME INDEX AS WILL BE USED FOR i_x */
+ else /* NOT IDENTICAL, SO GIVE IT ITS OWN LETTER */
+ new_y[i_y++]=new_len++; /* ADD TO new_lpo */
+ }
+ new_x[i_x]=new_len++; /* ADD TO new_lpo */
+
+ while (i_y<=end_of_ring) /* CONCATENATE LETTERS ALIGNED TO i_y */
+ new_y[i_y++]=new_len++; /*KEEP ALIGNED LETTERS AS ONE CONTIGUOUS BLOCK!*/
+ }
+ while (i_y<len_y) { /* CONCATENATE TAIL OF seq_y ONTO new_lpo */
+ new_y[i_y++]=new_len++; /* ADD TO new_lpo */
+ }
+
+ FREE(do_fuse);
+ return new_len;
+}
+
+
+void remap_x_to_new(int nremap_x,/*TRANSLATE INDICES IN remap_x USING new_x*/
+ LPOLetterRef_T remap_x[],/* ARRAY OF INDICES TO TRANSLATE*/
+ int len_x,
+ LPOLetterRef_T new_x[]) /* MAPPING FROM OLD -> NEW */
+{
+ int i;
+
+ LOOP (i,nremap_x) {
+ if (remap_x[i]>=0 && remap_x[i]<len_x) /* KEEP IN BOUNDS */
+ remap_x[i]=new_x[remap_x[i]];
+ else /* BOOM!! OUT OF RANGE!! */
+ abort(); /* DELIBERATELY CRASH TO TRAP THE ERROR! */
+ }
+}
+
+
+
+LPOSequence_T *copy_fuse_lpo_remap(LPOSequence_T *holder_x, /*FUSES TWO LPOs*/
+ LPOSequence_T *holder_y, /*MAKES *NEW* HOLDER */
+ LPOLetterRef_T x_to_y[],
+ LPOLetterRef_T y_to_x[],
+ int nremap_x,
+ LPOLetterRef_T remap_x[])
+{
+ int i,new_len,*iseq_new=NULL,len_x,len_y;
+ LPOLetterRef_T i_x,i_y,*new_x=NULL,*new_y=NULL;
+ LPOLetter_T *new_lpo=NULL,*seq_x,*seq_y;
+ LPOSequence_T *new_seq=NULL;
+
+ len_x=holder_x->length; /* GET letter ARRAY FROM BOTH x AND y */
+ seq_x=holder_x->letter;
+ len_y=holder_y->length;
+ seq_y=holder_y->letter;
+
+ CALLOC(new_seq,1,LPOSequence_T); /* CREATE A NEW HOLDER */
+ CALLOC(new_x,len_x,LPOLetterRef_T);
+ CALLOC(new_y,len_y,LPOLetterRef_T);
+ new_len=reindex_lpo_fusion(len_x,seq_x,len_y,seq_y,x_to_y,y_to_x,new_x,new_y,
+ FISSION_BREAK,MINIMUM_FUSION,FUSION_PERCENT);
+ CALLOC(new_lpo,new_len,LPOLetter_T); /* ALLOCATE NEW LINEARIZED PO */
+ LOOP (i,new_len) { /* INITIALIZE ALL LINKS TO INVALID */
+ new_lpo[i].left.ipos=new_lpo[i].right.ipos=new_lpo[i].source.ipos
+ = INVALID_LETTER_POSITION;
+ new_lpo[i].align_ring=new_lpo[i].ring_id=i; /* POINT TO SELF */
+ }
+ new_seq->length=new_len; /* SAVE NEW LPO ARRAY IN NEW HOLDER */
+ new_seq->letter=new_lpo;
+
+ iseq_new=save_lpo_source_list(new_seq,holder_x->nsource_seq,/*COPY x SOURCE*/
+ holder_x->source_seq);
+ LOOP (i_x,len_x) /* COPY LETTER DATA TO CORRESPONDING LETTERS OF NEW LPO */
+ copy_lpo_letter(new_lpo+new_x[i_x],seq_x+i_x,new_x,iseq_new);
+ FREE(iseq_new);
+ iseq_new=save_lpo_source_list(new_seq,holder_y->nsource_seq,/*COPY y SOURCE*/
+ holder_y->source_seq);
+ LOOP (i_y,len_y)
+ copy_lpo_letter(new_lpo+new_y[i_y],seq_y+i_y,new_y,iseq_new);
+ FREE(iseq_new);
+
+ LOOP (i_x,len_x) /* COPY OLD ALIGNMENT RINGS TO THE NEW LPO */
+ copy_old_ring_to_new(i_x,seq_x,new_lpo,new_x);
+ LOOP (i_y,len_y)
+ copy_old_ring_to_new(i_y,seq_y,new_lpo,new_y);
+
+ LOOP (i_x,len_x) /* SAVE ALIGNMENT OF seq_x AND seq_y TO new_lpo.align_ring*/
+ if (x_to_y[i_x]>=0) /* seq_x[i_x] ALIGNED TO seq_y, SO SAVE! */
+ crosslink_rings(new_x[i_x],new_y[x_to_y[i_x]],new_lpo);
+
+ if (remap_x) /* CONVERT OLD INDEX TABLE TO NEW REFERENCE SYSTEM */
+ remap_x_to_new(nremap_x,remap_x,len_x,new_x);
+ FREE(new_x); /* DUMP SCRATCH MEMORY AND RETURN */
+ FREE(new_y);
+ return new_seq; /* HAND BACK THE NEW LPO CONTAINING THE FUSION */
+}
+
+
+/** fuses the two partial orders holder_x and holder_y, based upon the
+ letter_x <--> letter_y mapping specified by x_to_y and y_to_x (which
+ must be consistent!) A new LPO is created to store the result;
+ neither holder_x or holder_y are changed */
+LPOSequence_T *copy_fuse_lpo(LPOSequence_T *holder_x, /*WRAPPER: NO REMAPPING*/
+ LPOSequence_T *holder_y,
+ LPOLetterRef_T x_to_y[],
+ LPOLetterRef_T y_to_x[])
+{
+ return copy_fuse_lpo_remap(holder_x,holder_y,x_to_y,y_to_x,0,NULL);
+}
+
+
+
+
+LPOSequence_T *copy_lpo(LPOSequence_T *holder_x)
+{
+ int i;
+ LPOSequence_T dummy,*new_copy;
+ LPOLetterRef_T *x_to_y=NULL;
+
+ memset(&dummy,0,sizeof(dummy)); /* BLANK ALL FIELDS */
+ CALLOC(x_to_y,holder_x->length,LPOLetterRef_T);
+ LOOP (i,holder_x->length)
+ x_to_y[i]= INVALID_LETTER_POSITION;
+
+ new_copy=copy_fuse_lpo(holder_x,&dummy,x_to_y,x_to_y);
+ FREE(x_to_y);
+ return new_copy;
+}
+
+
+
+
+
+void translate_lpo(int old_len,LPOLetterRef_T old_to_new[],
+ LPOLetter_T seq[])
+{
+ int i,block_end;
+ LPOLetterLink_T *link;
+ block_end=old_len;
+ LOOPB (i,old_len) { /* TRANSLATE AND SHIFT THE WHOLE LPO */
+ seq[i].align_ring=old_to_new[seq[i].align_ring]; /* TRANSLATE INDICES*/
+ seq[i].ring_id=old_to_new[seq[i].ring_id];
+ for (link= &seq[i].left;link && link->ipos>=0;link=link->more)
+ link->ipos = old_to_new[link->ipos];
+ for (link= &seq[i].right;link && link->ipos>=0;link=link->more)
+ link->ipos = old_to_new[link->ipos];
+
+ if ((0==i || /* HIT SEQ START, SO COPY THE BLOCK NOW!*/
+ old_to_new[i] != old_to_new[i-1]+1) /*BLOCK BOUNDARY*/
+ && old_to_new[i] != i) { /* ACTUALLY REQUIRES A SHIFT */
+ memmove(seq+old_to_new[i],seq+i,(block_end-i)*sizeof(LPOLetter_T));
+ block_end=i; /* RESET TO POINT TO END OF NEXT BLOCK TO COPY */
+ } /* BLOCK NOW SHIFTED TO ITS NEW LOCATION */
+ }
+}
+
+
+
+LPOSequence_T *fuse_lpo_remap(LPOSequence_T *holder_x,
+ LPOSequence_T *holder_y,
+ LPOLetterRef_T x_to_y[],
+ LPOLetterRef_T y_to_x[],
+ int nremap_x,
+ LPOLetterRef_T remap_x[])
+{
+ int i,new_len,*iseq_new=NULL,len_x,len_y;
+ LPOLetterRef_T i_x,i_y,*new_x=NULL,*new_y=NULL;
+ LPOLetter_T *new_lpo=NULL,*seq_x,*seq_y;
+
+ len_x=holder_x->length; /* GET letter ARRAY FROM BOTH x AND y */
+ seq_x=holder_x->letter;
+ len_y=holder_y->length;
+ seq_y=holder_y->letter;
+
+ CALLOC(new_x,len_x,LPOLetterRef_T);
+ CALLOC(new_y,len_y,LPOLetterRef_T);
+ new_len=reindex_lpo_fusion(len_x,seq_x,len_y,seq_y,x_to_y,y_to_x,new_x,new_y,
+ FISSION_BREAK,MINIMUM_FUSION,FUSION_PERCENT);
+ REALLOC(seq_x,new_len,LPOLetter_T); /* EXPAND seq_x TO HOLD FUSED LPO */
+ translate_lpo(len_x,new_x,seq_x); /* SHIFT ALL THE LETTERS TO NEW LOCATIONS*/
+ new_lpo=seq_x; /* THE EXPANDED VERSION OF seq_x IS OUR NEW LPO */
+ holder_x->length=new_len; /* SAVE NEW LPO ARRAY IN NEW HOLDER */
+ holder_x->letter=new_lpo;
+
+ LOOP (i_y,len_y) { /* INITIALIZE ALL LETTERS FOR STORING seq_y TO BLANK */
+ if (y_to_x[i_y]>=0 && new_x[y_to_x[i_y]]==new_y[i_y])/*i_y FUSED TO i_x*/
+ continue; /* THIS LETTER IS ALREADY PART OF seq_x SO DON'T OVERWRITE!!*/
+ i=new_y[i_y]; /* TRANSLATE TO NEW INDEXING */
+ memset(new_lpo+i,0,sizeof(LPOLetter_T)); /* NULL INITIALIZE IT! */
+ new_lpo[i].left.ipos=new_lpo[i].right.ipos=new_lpo[i].source.ipos
+ = INVALID_LETTER_POSITION; /* RESET TO UNLINKED STATE */
+ new_lpo[i].align_ring=new_lpo[i].ring_id=i; /* POINT TO SELF */
+ }
+
+ iseq_new=save_lpo_source_list(holder_x,holder_y->nsource_seq,/*COPY y SRC*/
+ holder_y->source_seq);
+ LOOP (i_y,len_y) /* COPY LETTER DATA TO CORRESPONDING LETTERS OF NEW LPO */
+ copy_lpo_letter(new_lpo+new_y[i_y],seq_y+i_y,new_y,iseq_new);
+ FREE(iseq_new);
+
+ LOOP (i_y,len_y) /* COPY OLD ALIGNMENT RINGS TO THE NEW LPO */
+ copy_old_ring_to_new(i_y,seq_y,new_lpo,new_y);
+
+ LOOP (i_x,len_x) /* SAVE ALIGNMENT OF seq_x AND seq_y TO new_lpo.align_ring*/
+ if (x_to_y[i_x]>=0) /* seq_x[i_x] ALIGNED TO seq_y, SO SAVE! */
+ crosslink_rings(new_x[i_x],new_y[x_to_y[i_x]],new_lpo);
+
+ if (remap_x) /* CONVERT OLD INDEX TABLE TO NEW REFERENCE SYSTEM */
+ remap_x_to_new(nremap_x,remap_x,len_x,new_x);
+ FREE(new_x); /* DUMP SCRATCH MEMORY AND RETURN */
+ FREE(new_y);
+ return holder_x; /* HAND BACK x LPO CONTAINING THE FUSION */
+}
+
+
+/** fuses the two partial orders holder_x and holder_y, based upon the
+ letter_x <--> letter_y mapping specified by x_to_y and y_to_x (which
+ must be consistent!) The result is returned in holder_x */
+LPOSequence_T *fuse_lpo(LPOSequence_T *holder_x, /*WRAPPER: NO REMAPPING*/
+ LPOSequence_T *holder_y,
+ LPOLetterRef_T x_to_y[],
+ LPOLetterRef_T y_to_x[])
+{
+ return fuse_lpo_remap(holder_x,holder_y,x_to_y,y_to_x,0,NULL);
+}
+
+
+
+/** FREES the linked list link including all nodes beneath it; NB: link
+ itself is freed, so DO NOT pass a static LPOLetterLink */
+void free_lpo_link_list(LPOLetterLink_T *link)
+{
+ LPOLetterLink_T *next;
+ for (;link;link=next) { /* DUMP ALL THE LINKS */
+ next=link->more;
+ free(link);
+ }
+}
+
+/** FREES the linked list source including all nodes beneath it; NB: source
+ itself is freed, so DO NOT pass a static LPOLetterSource */
+void free_lpo_source_list(LPOLetterSource_T *source)
+{
+ LPOLetterSource_T *next;
+ for (;source;source=next) { /* DUMP ALL THE SOURCE ENTRIES */
+ next=source->more;
+ free(source);
+ }
+}
+
+
+/** FREES ALL DATA ASSOCIATED WITH letter[], AND OPTIONALLY letter ITSELF*/
+void free_lpo_letters(int nletter,LPOLetter_T *letter,int please_free_block)
+{
+ int i;
+ if (!letter) /* NOTHING TO FREE... */
+ return;
+ LOOP (i,nletter) { /* DUMP ALL LINKED LISTS */
+ if (letter[i].left.more)
+ free_lpo_link_list(letter[i].left.more);
+ if (letter[i].right.more)
+ free_lpo_link_list(letter[i].right.more);
+ if (letter[i].source.more)
+ free_lpo_source_list(letter[i].source.more);
+ }
+ if (please_free_block) /*DON'T ALWAYS WANT TO FREE... MIGHT BE IN AN ARRAY*/
+ free(letter);
+}
+
+
+
+
+void free_lpo_sourceinfo(int nsource_seq,LPOSourceInfo_T *source_seq,
+ int please_free_block)
+{
+ int i;
+ LOOP (i,nsource_seq) {
+ FREE(source_seq[i].title);
+ FREE(source_seq[i].sequence);
+ FREE(source_seq[i].seq_to_po);
+ FREE(source_seq[i].po_to_seq);
+ free_lpo_numeric_data(source_seq[i].ndata,source_seq[i].data,TRUE);
+ source_seq[i].data=NULL; /* DON'T LEAVE DANGLING POINTER! */
+ }
+ if (please_free_block)
+ free(source_seq);
+}
+
+
+
+
+/** FREES ALL DATA FROM seq, AND OPTIONALLY seq ITSELF */
+void free_lpo_sequence(LPOSequence_T *seq,int please_free_holder)
+{
+ int i;
+ if (!seq) /* NOTHING TO FREE... */
+ return;
+ free_lpo_letters(seq->length,seq->letter,TRUE);
+ seq->letter=NULL; /* MARK AS FREED... DON'T LEAVE DANGLING POINTER! */
+ FREE(seq->title);
+ FREE(seq->sequence);
+ if (seq->source_seq) {
+ free_lpo_sourceinfo(seq->nsource_seq,seq->source_seq,TRUE);
+ seq->source_seq=NULL; /* MARK AS FREED... DON'T LEAVE DANGLING POINTER! */
+ }
+ if (please_free_holder) /*DON'T ALWAYS WANT TO FREE... MIGHT BE IN AN ARRAY*/
+ free(seq);
+}
+
+/**@memo {\bfEXAMPLE}: dump the LPO dna_lpo, and all its associated data: \begin{verbatim}
+ if (dna_lpo)
+ free_lpo_sequence(dna_lpo,TRUE);
+\end{verbatim}
+*/
+
+
+/** creates a new sequence which follows path[] through the LPO seq,
+ and gives it the specified name and title */
+int add_path_sequence(int path_length,
+ LPOLetterRef_T path[],
+ LPOSequence_T *seq,
+ char name[],
+ char title[])
+{
+ int i,iseq_new;
+ LPOSourceInfo_T *new_seq;
+ LPOLetterSource_T save_source={0,0,NULL};
+
+ /* CREATE SOURCE ENTRY FOR THIS SEQUENCE */
+ iseq_new=save_lpo_source(seq,name,title,path_length,0,NO_BUNDLE,0,NULL);
+
+ LOOP (i,path_length) { /* ADD THIS AS SOURCE TO ALL POSITIONS IN path */
+ save_source.ipos=i;
+ add_lpo_sources(&seq->letter[path[i]].source,&save_source,&iseq_new);
+ } /* NB: THIS DOESN'T CHECK THAT path IS A VALID WALK THRU THE PARTIAL ORDER
+ MIGHT BE A GOOD IDEA TO CATCH POSSIBLE ERRORS IN path */
+ return iseq_new; /* RETURN INDEX OF NEWLY CREATED ENTRY */
+}
+
+/**@memo EXAMPLE: create a consensus sequence from a path:
+ iseq=add_path_sequence(path_length,path,seq,name,title);
+-------------------------------------------------------
+-------------------------------------------------
+*/
+
Added: trunk/packages/poa/branches/upstream/current/lpo.h
===================================================================
--- trunk/packages/poa/branches/upstream/current/lpo.h 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/lpo.h 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,171 @@
+
+
+#ifndef LPO_HEADER_INCLUDED
+#define LPO_HEADER_INCLUDED
+
+#include <default.h>
+#include <poa.h>
+#include <seq_util.h>
+
+/*********************************************************** lpo.c */
+void lpo_init(LPOSequence_T *seq);
+
+void initialize_seqs_as_lpo(int nseq, Sequence_T seq[],ResidueScoreMatrix_T *m);
+void lpo_index_symbols(LPOSequence_T *lpo,ResidueScoreMatrix_T *m);
+
+int save_lpo_source(LPOSequence_T *seq,
+ char name[],
+ char title[],
+ int length,
+ int weight,
+ int bundle_id,
+ int ndata,
+ LPONumericData_T data[]);
+
+int *save_lpo_source_list(LPOSequence_T *seq,
+ int nsource_seq,
+ LPOSourceInfo_T source_seq[]);
+
+LPOLetterLink_T *add_lpo_link(LPOLetterLink_T *list,LPOLetterRef_T ipos);
+
+void add_lpo_sources(LPOLetterSource_T *new_s,LPOLetterSource_T *old_s,
+ int iseq_new[]);
+
+void crosslink_rings(LPOLetterRef_T a,LPOLetterRef_T b,LPOLetter_T seq[]);
+
+LPOSequence_T *copy_fuse_lpo(LPOSequence_T *holder_x,
+ LPOSequence_T *holder_y,
+ LPOLetterRef_T x_to_y[],
+ LPOLetterRef_T y_to_x[]);
+
+LPOSequence_T *copy_lpo(LPOSequence_T *holder_x);
+
+LPOSequence_T *fuse_lpo(LPOSequence_T *holder_x,
+ LPOSequence_T *holder_y,
+ LPOLetterRef_T x_to_y[],
+ LPOLetterRef_T y_to_x[]);
+
+void free_lpo_letters(int nletter,LPOLetter_T *letter,int please_free_block);
+
+void free_lpo_sequence(LPOSequence_T *seq,int please_free_holder);
+
+int add_path_sequence(int path_length,
+ LPOLetterRef_T path[],
+ LPOSequence_T *seq,
+ char name[],
+ char title[]);
+
+
+/************************************************** FROM align_lpo.c */
+int align_lpo(LPOSequence_T *lposeq_x,
+ LPOSequence_T *lposeq_y,
+ ResidueScoreMatrix_T *m,
+ LPOLetterRef_T **x_to_y,
+ LPOLetterRef_T **y_to_x,
+ int use_global_alignment);
+
+
+
+/************************************************** FROM align_lpo_po.c */
+LPOScore_T align_lpo_po(LPOSequence_T *lposeq_x,
+ LPOSequence_T *lposeq_y,
+ ResidueScoreMatrix_T *m,
+ LPOLetterRef_T **x_to_y,
+ LPOLetterRef_T **y_to_x,
+ LPOScore_T (*scoring_function)
+ (int,int,LPOLetter_T [],LPOLetter_T [],
+ ResidueScoreMatrix_T *),
+ int use_global_alignment);
+
+
+/************************************************** FROM buildup_lpo.c */
+LPOSequence_T *buildup_lpo(LPOSequence_T *new_seq,
+ int nseq,LPOSequence_T seq[],
+ ResidueScoreMatrix_T *score_matrix,
+ int use_aggressive_fusion,
+ int use_global_alignment);
+
+LPOSequence_T *buildup_clipped_lpo(LPOSequence_T *new_seq,
+ int nseq,LPOSequence_T seq[],
+ ResidueScoreMatrix_T *score_matrix,
+ int use_global_alignment);
+
+LPOSequence_T *buildup_progressive_lpo(int nseq,LPOSequence_T seq[],
+ ResidueScoreMatrix_T *score_matrix,
+ int use_aggressive_fusion,
+ char score_file[],
+ LPOScore_T (*scoring_function)
+ (int,int,LPOLetter_T [],LPOLetter_T [],
+ ResidueScoreMatrix_T *),
+ int use_global_alignment);
+
+/**************************************************** lpo_format.c */
+void write_lpo(FILE *ifile,LPOSequence_T *seq,
+ ResidueScoreMatrix_T *score_matrix);
+
+LPOSequence_T *read_lpo(FILE *ifile);
+LPOSequence_T *read_lpo_select(FILE *ifile,FILE *select_ifile,
+ int keep_all_links,int remove_listed_sequences);
+
+void write_lpo_as_fasta(FILE *ifile,LPOSequence_T *seq,
+ int nsymbol,char symbol[]);
+
+void write_lpo_bundle_as_fasta(FILE *ifile,LPOSequence_T *seq,
+ int nsymbol,char symbol[],int ibundle);
+void export_clustal_seqal(FILE *ifile,
+ LPOSequence_T *seq,
+ int nsymbol,char symbol[]);
+
+
+/****************************************************** heaviest_bundle.c */
+void generate_lpo_bundles(LPOSequence_T *seq,float minimum_fraction);
+
+
+/****************************************************** make_frame.c */
+LPOSequence_T *build_3_frames(char dna_seq[],char name[],char title[],
+ LPOScore_T frameshift_score,
+ ResidueScoreMatrix_T *m);
+LPOSequence_T *map_protein_to_dna(char dna_name[],
+ LPOSequence_T *lpo_dna,
+ int nseq,
+ Sequence_T seq[],
+ ResidueScoreMatrix_T *score_matrix,
+ int use_aggressive_fusion);
+
+
+/****************************************************** remove_bundle.c */
+int remove_bundle(LPOSequence_T *seq,int ibundle,int delete_all_others);
+
+
+
+/******************************************************* numeric_data.c */
+LPONumericData_T *new_numeric_data(LPOSourceInfo_T *source_seq,
+ char name[],
+ char title[],
+ double initial_value);
+
+LPONumericData_T *find_numeric_data(LPOSourceInfo_T *source_seq,
+ char name[]);
+
+void free_lpo_numeric_data(int ndata,LPONumericData_T *data,
+ int please_free_block);
+
+void new_numeric_data_sets(LPOSourceInfo_T *source_seq,
+ int nset,char *set_names[],
+ char source_name_fmt[],
+ char target_name_fmt[],
+ char title_fmt[]);
+void read_numeric_data(int nsource_seq,
+ LPOSourceInfo_T source_seq[],
+ FILE *ifile);
+LPONumericData_T *cp_numeric_data(LPOSourceInfo_T *source_seq,
+ LPONumericData_T *data);
+
+/******************************************************* balance_matrix.c */
+int read_aa_frequencies(char filename[],ResidueScoreMatrix_T *score_matrix)
+ ;
+void balance_matrix_score(int nletter,LPOLetter_T letter[],
+ ResidueScoreMatrix_T *score_matrix);
+
+
+#endif
Added: trunk/packages/poa/branches/upstream/current/lpo_format.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/lpo_format.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/lpo_format.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,528 @@
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+/** writes the LPO in seq to the stream ifile; optionally a symbol table
+ may be given for translating the letters in the LPO to text */
+void write_lpo(FILE *ifile,LPOSequence_T *seq,
+ ResidueScoreMatrix_T *score_matrix)
+{
+ int i;
+ LPOLetterLink_T *link;
+ LPOLetterSource_T *source;
+
+ fprintf(ifile,"VERSION=LPO.0.1\n");
+ fprintf(ifile,"NAME=%s\nTITLE=%s\nLENGTH=%d\nSOURCECOUNT=%d\n",
+ seq->name,seq->title,seq->length,seq->nsource_seq);
+
+ LOOPF (i,seq->nsource_seq)
+ fprintf(ifile,"SOURCENAME=%s\nSOURCEINFO=%d %d %d %d %s\n",
+ seq->source_seq[i].name,seq->source_seq[i].length,
+ seq->source_seq[i].istart,seq->source_seq[i].weight,
+ seq->source_seq[i].bundle_id,seq->source_seq[i].title);
+
+ LOOPF (i,seq->length) {
+ fprintf(ifile,"%c:",
+ seq->letter[i].letter < score_matrix->nsymbol ?
+ score_matrix->symbol[seq->letter[i].letter]
+ : seq->letter[i].letter);
+ for (link= &seq->letter[i].left;link && link->ipos>=0;link=link->more)
+ fprintf(ifile,"L%d",link->ipos);
+ for (source= &seq->letter[i].source;source;source=source->more)
+ fprintf(ifile,"S%d",source->iseq); /* SOURCE ID */
+ if (seq->letter[i].align_ring!=i) /* ALIGNED TO SOMETHING ELSE */
+ fprintf(ifile,"A%d",seq->letter[i].align_ring);
+ fputc('\n',ifile);
+ }
+}
+/**@memo example: writing a PO file:
+ if (lpo_file_out)
+ write_lpo(lpo_file_out,lpo_out,score_matrix.symbol);
+*/
+
+
+
+
+/** reads an LPO from the stream ifile, dynamically allocates memory for
+it, and returns a pointer to the LPO */
+LPOSequence_T *read_lpo(FILE *ifile)
+{
+ int i,j,length,nsource_seq,istart,field_id,*pos_count=NULL,value;
+ int weight,bundle_id,last_alloc=0;
+ LPOSequence_T *seq=NULL;
+ char c,name[1024],title[4096],version[256];
+ LPOLetterSource_T save_source={0,0,NULL};
+
+ CALLOC(seq,1,LPOSequence_T);
+ fscanf(ifile,"VERSION=%s",version);
+ if (fscanf(ifile," NAME=%[^\n] TITLE=%[^\n] LENGTH=%d SOURCECOUNT=%d",
+ name,title,&length,&nsource_seq)!=4)
+ return NULL;
+ STRNCPY(seq->name,name,SEQUENCE_NAME_MAX);
+ seq->title=strdup(title);
+ seq->length=length;
+ seq->nsource_seq=nsource_seq;
+ CALLOC(seq->letter,length,LPOLetter_T);
+ GETMEM(seq->source_seq,nsource_seq,last_alloc,SOURCE_SEQ_BUFFER_CHUNK,LPOSourceInfo_T);
+ CALLOC(pos_count,nsource_seq,int);
+ LOOP (i,length) { /* INITIALIZE ALL LINKS TO INVALID */
+ seq->letter[i].align_ring=i; /* POINT TO SELF */
+ seq->letter[i].ring_id= INVALID_LETTER_POSITION; /* BLANK! */
+ seq->letter[i].left.ipos=seq->letter[i].right.ipos=
+ seq->letter[i].source.ipos= INVALID_LETTER_POSITION;
+ }
+
+ LOOPF(i,nsource_seq) { /* SAVE SOURCE INFO LIST */
+ if (fscanf(ifile," SOURCENAME=%[^\n] SOURCEINFO=%d %d %d %d",
+ name,&length,&istart,&weight,&bundle_id)!=5)
+ return NULL;
+ /* SKIP WHITESPACE BEFORE TITLE; ALLOW EMPTY TITLE. */
+ fscanf(ifile,"%*[ \t]");
+ if (fscanf(ifile,"%[^\n]",title)!=1)
+ title[0] = '\0';
+ STRNCPY(seq->source_seq[i].name,name,SEQUENCE_NAME_MAX);
+ seq->source_seq[i].length=length;
+ seq->source_seq[i].istart=istart;
+ seq->source_seq[i].weight=weight;
+ seq->source_seq[i].bundle_id=bundle_id;
+ seq->source_seq[i].title=strdup(title);
+ }
+
+ LOOPF (i,seq->length) { /* NOW READ THE ACTUAL PARTIAL ORDER */
+ if (fscanf(ifile," %c:",&c)!=1) /* READ SEQUENCE LETTER */
+ return NULL;
+ seq->letter[i].letter=c;
+ while ((field_id=getc(ifile))!=EOF && '\n'!=field_id) {/* READ FIELDS*/
+ if (1!=fscanf(ifile,"%d",&value))
+ return NULL;
+ switch (field_id) {
+ case 'L':
+ add_lpo_link(&seq->letter[i].left,value); /* ADD LEFT-RIGHT LINKS*/
+ add_lpo_link(&seq->letter[value].right,i);
+ break;
+ case 'S': /* SAVE THE SOURCE ID */
+ save_source.ipos=pos_count[value]++;
+ add_lpo_sources(&seq->letter[i].source,&save_source,&value);
+ break;
+ case 'A': /* SAVE THE ALIGN RING POINTER */
+ seq->letter[i].align_ring=value;
+ break;
+ }
+ }
+ }
+
+ LOOPF (i,seq->length) { /* SET ring_id TO MINIMUM VALUE ON EACH RING */
+ if (seq->letter[i].ring_id<0) {/* NEW RING, UPDATE IT! */
+ j=i; /* GO AROUND THE ENTIRE RING, SETTING ring_id TO i */
+ do seq->letter[j].ring_id=i; /* i IS MINIMUM VALUE ON THIS RING */
+ while ((j=seq->letter[j].align_ring)!=i);
+ }
+ }
+
+ FREE(pos_count);
+ return seq;
+}
+
+
+
+#define INVALID_LPO_LINK (-99)
+
+enum {
+ default_retention_mode,
+ default_no_retention_mode
+};
+
+/** reads an LPO from the stream ifile, dynamically allocates memory for
+it, and returns a pointer to the LPO */
+LPOSequence_T *read_lpo_select(FILE *ifile,FILE *select_file,
+ int keep_all_links,int remove_listed_sequences)
+{
+ int i,j,k,length,nsource_seq,istart,field_id,*pos_count=NULL,value;
+ int weight,bundle_id,last_alloc=0,*iseq_compact=NULL,*last_pos=NULL;
+ int nlink,*link_list=NULL,*match_pos=NULL,*ring_old=NULL;
+ int *pos_compact=NULL,npos_compact=0,keep_this_letter,retention_mode;
+ LPOSequence_T *seq=NULL;
+ char c,name[1024],title[4096],version[256];
+ LPOLetterSource_T save_source={0,0,NULL},*source=NULL;
+
+ if (remove_listed_sequences)
+ retention_mode=default_retention_mode;/*KEEP SEQS AS DFLT, SKIP IF LISTED*/
+ else /* SKIP SEQS UNLESS LISTED IN select_file */
+ retention_mode=default_no_retention_mode;
+
+ CALLOC(seq,1,LPOSequence_T);
+ fscanf(ifile,"VERSION=%s",version);
+ if (fscanf(ifile," NAME=%[^\n] TITLE=%[^\n] LENGTH=%d SOURCECOUNT=%d",
+ name,title,&length,&nsource_seq)!=4)
+ return NULL;
+ STRNCPY(seq->name,name,SEQUENCE_NAME_MAX);
+ seq->title=strdup(title);
+ seq->length=length;
+ seq->nsource_seq=nsource_seq;
+ CALLOC(seq->letter,length,LPOLetter_T);
+ GETMEM(seq->source_seq,nsource_seq,last_alloc,SOURCE_SEQ_BUFFER_CHUNK,LPOSourceInfo_T);
+ CALLOC(pos_count,nsource_seq,int);
+ LOOP (i,length) { /* INITIALIZE ALL LINKS TO INVALID */
+ seq->letter[i].align_ring=i; /* POINT TO SELF */
+ seq->letter[i].ring_id= INVALID_LETTER_POSITION; /* BLANK! */
+ seq->letter[i].left.ipos=seq->letter[i].right.ipos=
+ seq->letter[i].source.ipos= INVALID_LETTER_POSITION;
+ }
+
+ LOOPF(i,nsource_seq) { /* SAVE SOURCE INFO LIST */
+ if (fscanf(ifile," SOURCENAME=%[^\n] SOURCEINFO=%d %d %d %d %[^\n]",
+ name,&length,&istart,&weight,&bundle_id,title)!=6)
+ return NULL;
+ STRNCPY(seq->source_seq[i].name,name,SEQUENCE_NAME_MAX);
+ seq->source_seq[i].length=length;
+ seq->source_seq[i].istart=istart;
+ seq->source_seq[i].weight=weight;
+ seq->source_seq[i].bundle_id=bundle_id;
+ seq->source_seq[i].title=strdup(title);
+ }
+
+ CALLOC(iseq_compact,nsource_seq,int);
+ CALLOC(last_pos,nsource_seq,int);
+ CALLOC(match_pos,nsource_seq,int);
+ if (select_file) {
+ LOOP (i,nsource_seq) /* DEFAULT: MARKED AS INVALID */
+ iseq_compact[i]= -retention_mode;
+ while (fscanf(select_file,"SOURCENAME=%[^\n]\n",name)==1) {
+ LOOP (i,nsource_seq)
+ if (strcmp(seq->source_seq[i].name,name)==0) {
+ iseq_compact[i]= retention_mode-1;
+ break;
+ }
+ }
+ }
+
+ j=0;
+ LOOPF (i,nsource_seq) {
+ last_pos[i]= -1; /* DEFAULT: INVALID */
+ if (iseq_compact[i]>=0) {
+ iseq_compact[i]=j;
+ if (i>j)
+ memcpy(seq->source_seq+j,seq->source_seq+i,sizeof(LPOSourceInfo_T));
+ match_pos[j]= INVALID_LPO_LINK; /* DEFAULT: NO VALID LINK! */
+ j++;
+ }
+ else { /* EXCLUDE THIS SEQUENCE FROM THE FILTERED LPO */
+ iseq_compact[i]= INVALID_LETTER_POSITION;
+ if (seq->source_seq[i].title)
+ free(seq->source_seq[i].title);
+ }
+ }
+ seq->nsource_seq=nsource_seq=j; /* COMPACTED COUNT OF SEQUENCES TO KEEP*/
+
+ CALLOC(link_list,seq->length,int); /*TEMPORARY DATA FOR COMPACTION MAPPING */
+ CALLOC(pos_compact,seq->length,int);
+ CALLOC(ring_old,seq->length,int);
+ npos_compact=0;
+
+ LOOPF (i,seq->length) { /* NOW READ THE ACTUAL PARTIAL ORDER */
+ if (fscanf(ifile," %c:",&c)!=1) /* READ SEQUENCE LETTER */
+ return NULL;
+ seq->letter[npos_compact].letter=c;
+ nlink=0;
+ keep_this_letter=0; /*DEFAULT */
+ while ((field_id=getc(ifile))!=EOF && '\n'!=field_id) {/* READ FIELDS*/
+ if (1!=fscanf(ifile,"%d",&value))
+ return NULL;
+ switch (field_id) {
+ case 'L':
+ if (pos_compact[value]>=0) /*COULD BE VALID LINK: WAIT TO CHECK SRCs*/
+ link_list[nlink++]=pos_compact[value]; /* SAVE IT TEMPORARILY */
+ break;
+ case 'S': /* SAVE THE SOURCE ID */
+ if (iseq_compact[value]>=0) { /*KEEP THIS SOURCE, SO KEEP THIS LETTER*/
+ keep_this_letter=1;
+ value=iseq_compact[value]; /* TRANSLATE TO ITS COMPACTED INDEX*/
+ if (last_pos[value]>=0) { /* MAKE SURE WE HAVE LINK TO LAST POSITION */
+ LOOP (j,nlink) /* CHECK TO SEE IF LINK ALREADY SAVED */
+ if (link_list[j]==last_pos[value])
+ break;
+ if (LOOP_FINISHED(j,nlink)) { /* NO LINK??? ADD IT!!! */
+ link_list[nlink++]=last_pos[value];
+ }
+ }
+ last_pos[value]=npos_compact; /* THIS SEQ POS IS AT THIS NODE */
+ save_source.ipos=pos_count[value]++;/* COUNT LENGTH OF THIS SEQ */
+ add_lpo_sources(&seq->letter[npos_compact].source,
+ &save_source,&value);
+ seq->letter[npos_compact].ring_id= INVALID_LETTER_POSITION;
+ seq->letter[npos_compact].align_ring=i;
+ ring_old[i]=i; /* DEFAULT: SELF-RING OF ONE LETTER*/
+ }
+ break;
+ case 'A': /* SAVE THE ALIGN RING POINTER */
+ ring_old[i]=value; /* SAVE OLD ALIGN RING INDICES */
+ if (keep_this_letter) /* TEMP'Y: SAVE REVERSE MAPPING TO A-R INDICES*/
+ seq->letter[npos_compact].align_ring=i;
+ break;
+ }
+ }
+ if (keep_this_letter) {
+ for (source= &seq->letter[npos_compact].source;source;source=source->more)
+ match_pos[source->iseq]=source->ipos - 1; /*VALID LINK MUST MATCH m_p*/
+
+ LOOPF (j,nlink) { /*ADD LEFT-RIGHT LINKS*/
+ for (source= &seq->letter[link_list[j]].source;source;source=source->more)
+ if (keep_all_links /* KEEP LINKS EVEN IF NOT FROM SELECTED SEQS*/
+ || source->ipos == match_pos[source->iseq]) { /*VALID LINK! */
+ add_lpo_link(&seq->letter[npos_compact].left,link_list[j]);
+ add_lpo_link(&seq->letter[link_list[j]].right,npos_compact);
+ break; /* SAVED THIS LINK! */
+ }
+ }
+ for (source= &seq->letter[npos_compact].source;source;source=source->more)
+ match_pos[source->iseq]= INVALID_LPO_LINK;/*DFLT:NO VALID LINK*/
+
+ pos_compact[i]=npos_compact++;
+ }
+ else
+ pos_compact[i]= INVALID_LETTER_POSITION;
+ }
+ seq->length=npos_compact;
+
+ LOOPF (i,seq->length) { /* SET ring_id TO MINIMUM VALUE ON EACH RING */
+ if (seq->letter[i].ring_id<0) {/* NEW RING, UPDATE IT! */
+ j=seq->letter[i].align_ring; /* GO AROUND THE ENTIRE RING, SETTING ring_id TO i */
+ do {
+ /*printf("i=%d\tj=%d\tpos_compact[j]=%d\tring_old[j]=%d\n",i,j,pos_compact[j],ring_old[j]);*/
+ if (pos_compact[j]>=0) {
+ k=pos_compact[j];
+ seq->letter[k].ring_id=i; /* i IS MINIMUM VALUE ON THIS RING */
+ }
+ j=ring_old[j]; /* ADVANCE TO NEXT LETTER ON THE RING */
+ if (pos_compact[j]>=0) /* IF VALID, POINT IT BACK TO PREVIOUS LETTER*/
+ seq->letter[pos_compact[j]].align_ring=k;
+ }
+ while (pos_compact[j]!=i); /* STOP WHEN WE'VE COMPLETED THE RING, BACK TO START*/
+ }
+ }
+
+ FREE(pos_count);
+ FREE(pos_compact);
+ FREE(iseq_compact);
+ FREE(last_pos);
+ FREE(link_list);
+ FREE(ring_old);
+ FREE(match_pos);
+ return seq;
+}
+
+/*
+LPOSequence_T *read_lpo(FILE *ifile)
+{
+ return read_lpo_select(ifile,NULL);
+}
+*/
+
+
+
+
+#define FASTA_GAP_CHARACTER '.'
+int xlate_lpo_to_al(LPOSequence_T *seq,
+ int nsymbol,char symbol[],int ibundle,
+ char gap_character,
+ char ***p_seq_pos,char **p_p,char **p_include)
+{
+ int i,j,iring=0,nring=0,current_ring=0,iprint;
+ char **seq_pos=NULL,*p=NULL,*include_in_save=NULL;
+ LPOLetterSource_T *source;
+
+ LOOPF (i,seq->length) /* COUNT TOTAL #ALIGNMENT RINGS IN THE LPO */
+ if (seq->letter[i].ring_id != current_ring) { /* NEXT RING */
+ current_ring=seq->letter[i].ring_id;
+ nring++;
+ }
+ nring++; /* DON'T FORGET TO COUNT THE LAST RING!!! */
+
+ CALLOC(seq_pos,seq->nsource_seq,char *); /* ALLOCATE MAP ARRAY*/
+ CALLOC(p,seq->nsource_seq*nring,char);
+ LOOP (i,seq->nsource_seq) /* BUILD POINTER ARRAY INTO MAP ARRAY */
+ seq_pos[i]=p+i*nring;
+ memset(p,gap_character,seq->nsource_seq*nring);
+ /* DEFAULT IS NO SEQUENCE PRESENT AT THIS POSITION */
+
+ current_ring=0; /* RESET TO BEGINNING */
+ LOOPF (i,seq->length) { /* NOW MAP THE LPO TO A FLAT LINEAR ORDER */
+ if (seq->letter[i].ring_id != current_ring) { /* NEXT RING */
+ current_ring=seq->letter[i].ring_id;
+ iring++;
+ } /* MAP EACH SOURCE SEQ ONTO LINEAR ORDER INDEXED BY iring */
+ for (source= &seq->letter[i].source;source;source=source->more)
+ if (symbol && seq->letter[i].letter<nsymbol) /* TRANSLATE TO symbol */
+ seq_pos[source->iseq][iring]= symbol[seq->letter[i].letter];
+ else /* NO NEED TO TRANSLATE */
+ seq_pos[source->iseq][iring]= seq->letter[i].letter;
+ }
+
+ if (ibundle>=0) { /* ONLY SAVE SEQS THAT ARE IN THIS BUNDLE */
+ CALLOC(include_in_save,nring,char); /* BLANK FLAGS: WHAT RINGS TO SHOW*/
+ LOOP (iring,nring) { /* CHECK EACH RING TO SEE IF IT'S IN BUNDLE */
+ LOOP (i,seq->nsource_seq) {
+ if (seq_pos[i][iring]!=gap_character /* ALIGNED HERE! */
+ && seq->source_seq[i].bundle_id == ibundle) { /* PART OF BUNDLE!*/
+ include_in_save[iring]=1; /* SO INCLUDE THIS RING */
+ break;
+ }
+ }
+ }
+ }
+ if (p_seq_pos)
+ *p_seq_pos = seq_pos;
+ if (p_seq_pos)
+ *p_seq_pos = seq_pos;
+ if (p_seq_pos)
+ *p_seq_pos = seq_pos;
+ return nring;
+}
+
+
+/** writes the LPO in FASTA format, including all sequences in the
+ specified bundle */
+void write_lpo_bundle_as_fasta(FILE *ifile,LPOSequence_T *seq,
+ int nsymbol,char symbol[],int ibundle)
+{
+ int i,j,nring=0,iprint;
+ char **seq_pos=NULL,*p=NULL,*include_in_save=NULL;
+
+ nring=xlate_lpo_to_al(seq,nsymbol,symbol,ibundle, /* TRANSLATE TO */
+ FASTA_GAP_CHARACTER, /* RC-MSA FMT */
+ &seq_pos,&p,&include_in_save);
+ LOOPF (i,seq->nsource_seq) { /* NOW WRITE OUT FASTA FORMAT */
+ if (ibundle<0 /* PRINT ALL BUNDLES */
+ || seq->source_seq[i].bundle_id == ibundle) { /* OR JUST THIS BUNDLE*/
+ fprintf(ifile,">%s %s",seq->source_seq[i].name,seq->source_seq[i].title);
+ iprint=0;
+ LOOPF (j,nring) { /* WRITE OUT 60 CHARACTER SEQUENCE LINES */
+ if (NULL==include_in_save || include_in_save[j]) {
+ fprintf(ifile,"%s%c",iprint%60? "":"\n", seq_pos[i][j]);
+ iprint++; /* KEEP COUNT OF PRINTED CHARACTERS */
+ }
+ }
+ fputc('\n',ifile);
+ }
+ }
+
+ FREE(p); /* DUMP TEMPORARY MEMORY */
+ FREE(include_in_save);
+ FREE(seq_pos);
+}
+/**@memo example: writing FASTA format file:
+ if (seq_ifile=fopen(fasta_out,"w")) {
+ write_lpo_bundle_as_fasta(seq_ifile,lpo_out,
+ score_matrix.nsymbol,score_matrix.symbol,ibundle);
+ fclose(seq_ifile);
+ }
+*/
+
+
+/** writes the LPO in FASTA format, including all sequences in all bundles
+-------------------------------------------------------
+---------------------------------------------------------------------------
+*/
+void write_lpo_as_fasta(FILE *ifile,LPOSequence_T *seq,
+ int nsymbol,char symbol[])
+{ /* WRAPPER FUNCTION FOR SAVING ALL BUNDLES!! */
+ write_lpo_bundle_as_fasta(ifile,seq,nsymbol,symbol,ALL_BUNDLES);
+}
+
+
+
+
+/****************************************************************
+ *
+ * WRITE_SEQUENCES
+ *
+ * This function writes out sequences. Each line of sequnces
+ * will be written out in blocks with spacing in between
+ * each block.
+ *
+ ***************************************************************/
+
+int write_sequences(FILE *ifile,
+ LPOSequence_T *seq,
+ int indent,
+ int nblock, /* # OF BLOCKS */
+ int block_size, /* SIZE OF BLOCKS */
+ int block_spacing, /* SPACING BETWEEN BLOCKS */
+ int paragraph_spacing, /* SPACING BETWEEN PRAGRAPHS */
+ int names, /* BOOLEAN: PRINT NAMES AFTER 1ST PARAGRAPH? */
+ char gap_char,
+ int nsymbol,char symbol[])
+{
+ int i,ip,ial,iseq,iblock,nparagraph,remainder,ipos,ipos_local,broken;
+ int len,nring;
+ char **seq_pos=NULL,*p=NULL,*include_in_save=NULL;
+
+ nring=xlate_lpo_to_al(seq,nsymbol,symbol,ALL_BUNDLES, /* TRANSLATE TO */
+ gap_char, /* RC-MSA FMT */
+ &seq_pos,&p,&include_in_save);
+
+ nparagraph = nring/(nblock*block_size);/*# OF FULL PARAGRAPHS */
+ remainder = nring%(nblock*block_size);
+ if (remainder != 0)
+ nparagraph++;
+ LOOPF(ip,nparagraph){ /* FOR EACH PARAGRAPH */
+ LOOPF(iseq,seq->nsource_seq){ /* FOR EACH SEQUENCE */
+ if (ip == 0 || names){
+ if ((len=strlen(seq->source_seq[iseq].name))>indent-1){ /*MUST WE TRUNCATE NAME? */
+ LOOPF(i,indent-1) /* PRINT AS MUCH OF NAME AS POSSIBLE */
+ putc(seq->source_seq[iseq].name[i],ifile);
+ putc(' ',ifile); /* LEAVE A SPACE */
+ }
+ else{ /* PRINT WHOLE NAME */
+ fprintf(ifile,"%s",seq->source_seq[iseq].name); /* PRINT NAME */
+ LOOP(i,indent-len) /* INDENT LINE */
+ putc(' ',ifile);
+ }
+ }
+ else{
+ LOOP(i,indent) /* INDENT LINE */
+ putc(' ',ifile);
+ }
+ LOOPF(iblock,nblock){ /* FOR EACH BLOCK */
+ broken=0;
+ LOOPF(i,block_size){
+ ipos = i + (iblock*block_size) + (ip*nblock*block_size);
+ if (ipos>=nring){
+ broken=1;
+ break;
+ }
+ putc(seq_pos[iseq][ipos],ifile); /* APPROP SYMBOL FOR THIS POS*/
+ }
+ if (broken)
+ break;
+ LOOP(i,block_spacing) /* ADD SPACING BETWEEN BLOCKS */
+ putc(' ',ifile);
+ } /* END OF BLOCK LOOP */
+ putc('\n',ifile); /* NEW LINE AT END OF SEQUENCE LINE */
+ } /* END OF SEQ LOOP */
+ LOOP(i,paragraph_spacing) /* ADD SPACING BETWEEN PARAGRAPHS */
+ putc('\n',ifile);
+ } /* END OF PARAGRAPH LOOP */
+ done:
+ FREE(p); /* DUMP TEMPORARY MEMORY */
+ FREE(include_in_save);
+ FREE(seq_pos);
+ return 0;
+}
+
+
+
+void export_clustal_seqal(FILE *ifile,
+ LPOSequence_T *seq,
+ int nsymbol,char symbol[])
+{
+ fprintf(ifile,"CLUSTAL W (1.74) multiple sequence alignment\n\n\n");
+ /* WRITE OUT SEQUNENCES: INDENT 36, 1 BLOCK OF 50 CHARS 0 CHARS BLOCK
+ SPACING, 2 LINES PARAGRAPH SPACING, PRINT NAMES ON ALL LINES. */
+ write_sequences(ifile,seq,36,1,50,0,2,1,'-',nsymbol,symbol);
+}
+
Added: trunk/packages/poa/branches/upstream/current/main.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/main.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/main.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,255 @@
+
+
+#include <lpo.h>
+
+extern LPOScore_T caties_scoring_function(int i,
+ int j,
+ LPOLetter_T seq_x[],
+ LPOLetter_T seq_y[],
+ ResidueScoreMatrix_T *m)
+ ; /* INCLUDE THIS HERE SO WE CAN PASS IT TO buildup_progressive_lpo() */
+
+
+
+int main(int argc,char *argv[])
+{
+ int i,ibundle=ALL_BUNDLES,nframe_seq=0,use_reverse_complement=0;
+ int nseq=0,do_switch_case=dont_switch_case,do_analyze_bundles=0;
+ char score_file[256],seq_file[256],*comment=NULL,*al_name="test align";
+ ResidueScoreMatrix_T score_matrix; /* DEFAULT GAP PENALTIES*/
+ Sequence_T *seq=NULL,*lpo_out=NULL,*frame_seq=NULL,*dna_lpo=NULL,
+ *lpo_in=NULL;
+ FILE *seq_ifile=NULL,*errfile=stderr,*logfile=NULL,*lpo_file_out=NULL,
+ *subset_ifile=NULL;
+ char *print_matrix_letters=NULL,*fasta_out=NULL,*po_out=NULL,*matrix_filename=NULL,
+ *seq_filename=NULL,*frame_dna_filename=NULL,*po_filename=NULL,
+ *hbmin=NULL,*numeric_data=NULL,*numeric_data_name="Nmiscall",*dna_to_aa=NULL,
+ *pair_score_file=NULL,*aafreq_file=NULL,*termval_file=NULL,
+ *bold_seq_name=NULL,*subset_file=NULL,*rm_subset_file=NULL;
+ float bundling_threshold=0.9;
+ int exit_code=0,count_sequence_errors=0,please_print_snps=0,
+ report_consensus_seqs=0,report_major_allele=0,use_aggressive_fusion=0;
+ int show_allele_evidence=0,please_collapse_lines=0,keep_all_links=0;
+ int remove_listed_seqs=0,please_report_similarity;
+ char *reference_seq_name="CONSENS%d",*clustal_out=NULL;
+ int use_global_alignment=0;
+
+
+ black_flag_init(argv[0],PROGRAM_VERSION);
+
+ if (argc<2) {
+ fprintf(stderr,"\nUsage: %s [OPTIONS] matrixfile\n\n"
+" OPTIONS\t\t\tFUNCTION\n"
+" -------\t\t\t________\n"
+"INPUT:\n"
+" -read_fasta FILENAME\t\tReads in FASTA sequence file.\n"
+" -tolower\t\t\tForces FASTA sequences to lowercase\n"
+"\t\t\t\t(nucleotides in our matrix files)\n"
+" -toupper\t\t\tForces FASTA sequences to UPPERCASE\n"
+"\t\t\t\t(amino acids in our matrix files)\n"
+" -read_po FILENAME\t\tReads in PO file.\n"
+" -subset FILENAME\t\tFilters PO-MSA to include list of seqs in file.\n"
+" -remove FILENAME\t\tFilters PO-MSA to exclude list of seqs in file.\n"
+"\n"
+"\nALIGNMENT:\n"
+" -pair_score FILENAME\t\tReads tab-delimited file of sequence-sequence\n"
+"\t\t\t\tsimilarity scores for constructing a guide-tree\n"
+"\t\t\t\tand performing progressive alignment using\n"
+"\t\t\t\tPO-PO alignment steps.\n"
+" -fuse_all\t\t\tFuses identical letters on align rings.\n"
+"\nANALYSIS:\n"
+" -hb\t\t\t\tPerforms heaviest bundling to generate consensi.\n"
+" -hbmin VALUE\t\t\tBundles into heaviest bundle seqs with percent id >= value.\n"
+"\nOUTPUT:\n"
+" -best\t\t\t\tRestricts MSA output to heaviest bundles.\n"
+" -pir FILENAME\t\t\tWrites out MSA in PIR format.\n"
+" -clustal FILENAME\t\tWrites out MSA in CLUSTAL format.\n"
+" -po FILENAME\t\t\tWrites out MSA in PO format.\n"
+" -printmatrix LETTERSET\tPrints score matrix to stdout.\n"
+" -v\t\t\t\tRuns in verbose mode.\n\n"
+" Note: Either the -read_fasta or -read_po argument must be used with poa,\n"
+" since a FASTA file or PO file must be read in by poa.\n\n"
+"For more information, see http://www.bioinformatics.ucla.edu/poa\n\n"
+ ,argv[0]);
+ exit(-1);
+ }
+
+ FOR_ARGS(i,argc) { /* READ ALL THE ARGUMENTS */
+ ARGMATCH_VAL("-tolower",do_switch_case,switch_case_to_lower);
+ ARGMATCH_VAL("-toupper",do_switch_case,switch_case_to_upper);
+ ARGMATCH_VAL("-v",logfile,stdout);
+ ARGMATCH_VAL("-best",ibundle,0); /*RESTRICT FASTA OUTPUT TO HB */
+ ARGMATCH_VAL("-hb",do_analyze_bundles,1);/*CALCULATE HEAVIEST BUNDLING*/
+ ARGGET("-printmatrix",print_matrix_letters);
+ ARGGET("-read_po",po_filename); /* READ A PO FILE FOR ALIGNMENT/ANALYSIS*/
+ ARGGET("-pir",fasta_out); /* SAVE FASTA-PIR FORMAT ALIGNMENT FILE */
+ ARGGET("-clustal",clustal_out); /* SAVE CLUSTAL FORMAT ALIGNMENT FILE */
+ ARGGET("-po",po_out); /* SAVE PO FORMAT ALIGNMENT FILE */
+ ARGGET("-hbmin",hbmin); /* SET THRESHOLD FOR BUNDLING */
+ ARGMATCH("-fuse_all",use_aggressive_fusion);
+ ARGMATCH("-global",use_global_alignment);
+ ARGGET("-pair_score",pair_score_file); /* FILENAME TO READ PAIR SCORES*/
+ ARGGET("-subset",subset_file); /* FILENAME TO READ SEQ SUBSET LIST*/
+ ARGGET("-remove",rm_subset_file); /* FILENAME TO READ SEQ REMOVAL LIST*/
+ ARGGET("-read_fasta",seq_filename); /* READ FASTA FILE FOR ALIGNMENT */
+ NEXTARG(matrix_filename); /* NON-FLAG ARG SHOULD BE MATRIX FILE */
+
+ }
+
+ if (rm_subset_file) { /* TREAT SUBSET FILE AS LIST OF SEQS TO REMOVE */
+ subset_file=rm_subset_file;
+ remove_listed_seqs=1;
+ }
+
+ if (hbmin)
+ bundling_threshold=atof(hbmin);
+
+ if (!matrix_filename ||
+ read_score_matrix(matrix_filename,&score_matrix)<=0){/* READ MATRIX */
+ WARN_MSG(USERR,(ERRTXT,"Error reading matrix file %s. Exiting",
+ matrix_filename? matrix_filename: "(null)"),"$Revision: 1.2 $");
+ exit_code=1; /* SIGNAL ERROR CONDITION */
+ goto free_memory_and_exit;
+ }
+
+ if (logfile) {
+ fprintf(logfile, "X-Gap Penalties: %d %d %d\n",
+ score_matrix.gap_penalty_set[0][0],
+ score_matrix.gap_penalty_set[0][1],
+ score_matrix.gap_penalty_set[0][2]);
+ fprintf(logfile, "Y-Gap Penalties: %d %d %d\n",
+ score_matrix.gap_penalty_set[1][0],
+ score_matrix.gap_penalty_set[1][1],
+ score_matrix.gap_penalty_set[1][2]);
+ }
+
+ if (print_matrix_letters) /* USER WANTS US TO PRINT A MATRIX */
+ print_score_matrix(stdout,&score_matrix,print_matrix_letters
+ /*"ARNDCQEGHILKMFPSTWYV"*/);
+
+ if (po_filename) { /* READ PARTIAL ORDER FILE */
+ if (seq_ifile=fopen(po_filename,"r")) {
+ if (subset_file) { /* LIST OF SEQS TO FILTER THE PO WITH */
+ subset_ifile=fopen(subset_file,"r");
+ if (!subset_ifile) {
+ WARN_MSG(USERR,(ERRTXT,"Error reading subset file %s. Exiting",
+ subset_file),"$Revision: 1.2 $");
+ exit_code=1; /* SIGNAL ERROR CONDITION */
+ goto free_memory_and_exit;
+ }
+ lpo_in=read_lpo_select(seq_ifile,subset_ifile,keep_all_links,
+ remove_listed_seqs);
+ fclose(subset_ifile);
+ }
+ else
+ lpo_in=read_lpo(seq_ifile); /* READ LPO NORMALLY W/O FILTERING */
+ fclose(seq_ifile);
+ }
+ if (!lpo_in) {
+ WARN_MSG(USERR,(ERRTXT,"Error reading PO file %s!!!\nExiting.",
+ po_filename),"$Revision: 1.2 $");
+ exit_code=1; /* SIGNAL ERROR CONDITION */
+ goto free_memory_and_exit;
+ }
+ }
+
+ if (seq_filename) { /* READ SEQUENCES TO ALIGN */
+ seq_ifile=fopen(seq_filename,"r"); /* READ SEQUENCE DATABASE */
+ if (seq_ifile) {
+ nseq=read_fasta(seq_ifile,&seq,do_switch_case,&comment);
+ fclose(seq_ifile);
+ }
+ }
+
+ if (nseq>0) { /* INITIALIZE THE SEQUENCES AND RUN THE ALIGNMENT */
+ fprintf(errfile,"...Read %d sequences from %s...\n",nseq,seq_filename);
+ /*initialize_seqs_as_lpo(nseq,seq,&score_matrix); */
+ if (lpo_in) { /* ADD TO OUR EXISTING ALIGNMENT */
+ lpo_out=buildup_lpo(lpo_in,nseq,seq,&score_matrix,
+ use_aggressive_fusion,
+ use_global_alignment);/*BUILD ALIGNMENT*/
+ lpo_in=NULL; /* DATA STRUCTURE NOW POINTED TO BY lpo_out,
+ SO DON'T FREE IT TWICE!! */
+ }
+ else if (pair_score_file) {
+ lpo_out=buildup_progressive_lpo(nseq,seq,&score_matrix,
+ use_aggressive_fusion,
+ pair_score_file,
+ caties_scoring_function,
+ use_global_alignment);
+ }
+ else /* OTHERWISE JUST BUILDUP ON TOP OF FIRST READ SEQUENCE */
+ lpo_out=buildup_lpo(seq,nseq-1,seq+1,&score_matrix,
+ use_aggressive_fusion,
+ use_global_alignment);
+ }
+ else if (lpo_in) /* FILTERED LPO... CAN PRINT OUT AS LPO */
+ lpo_out=lpo_in;
+ else { /* HMM... NO DATA TO WORK WITH AT ALL. MUST BE AN ERROR. COMPLAIN!*/
+ WARN_MSG(USERR,(ERRTXT,"Error reading sequence file %s and PO file %s. Exiting.",
+ seq_filename? seq_filename: "because none specified",
+ po_filename? po_filename: "because none specified"),"$Revision: 1.2 $");
+ exit_code=1; /* SIGNAL ERROR CONDITION */
+ goto free_memory_and_exit;
+ }
+
+ if (comment) { /* SAVE THE COMMENT LINE AS TITLE OF OUR LPO */
+ FREE(lpo_out->title);
+ lpo_out->title=strdup(comment);
+ }
+
+ if (do_analyze_bundles) /* DIVIDE INTO BUNDLES W/ CONSENSUS */
+ generate_lpo_bundles(lpo_out,bundling_threshold);
+
+ if (po_out) { /* WRITE FINAL PARTIAL ORDER ALIGNMENT TO OUTPUT */
+ if (lpo_file_out=fopen(po_out, "w")) {
+ write_lpo(lpo_file_out,lpo_out,&score_matrix);
+ fclose(lpo_file_out);
+ }
+ else {
+ WARN_MSG(USERR,(ERRTXT,"*** Could not save PO file %s. Exiting.",
+ po_out),"$Revision: 1.2 $");
+ exit_code=1; /* SIGNAL ERROR CONDITION */
+ }
+ }
+
+ if (fasta_out) { /* WRITE FINAL ALIGNMENT IN FASTA FORMAT */
+ if (seq_ifile=fopen(fasta_out,"w")) { /* FASTA ALIGNMENT*/
+ write_lpo_bundle_as_fasta(seq_ifile,lpo_out,score_matrix.nsymbol,
+ score_matrix.symbol,ibundle);
+ fclose(seq_ifile);
+ }
+ else {
+ WARN_MSG(USERR,(ERRTXT,"*** Could not save FASTA file %s. Exiting.",
+ fasta_out),"$Revision: 1.2 $");
+ exit_code=1; /* SIGNAL ERROR CONDITION */
+ }
+ }
+
+ if (clustal_out) { /* WRITE FINAL ALIGNMENT IN FASTA FORMAT */
+ if (seq_ifile=fopen(clustal_out,"w")) { /* FASTA ALIGNMENT*/
+ export_clustal_seqal(seq_ifile,lpo_out,score_matrix.nsymbol,
+ score_matrix.symbol);
+ fclose(seq_ifile);
+ }
+ else {
+ WARN_MSG(USERR,(ERRTXT,"*** Could not save CLUSTAL file %s. Exiting.",
+ fasta_out),"$Revision: 1.2 $");
+ exit_code=1; /* SIGNAL ERROR CONDITION */
+ }
+ }
+
+
+
+ free_memory_and_exit: /* FREE ALL DYNAMICALLY ALLOCATED DATA!!!! */
+ if (dna_lpo)
+ free_lpo_sequence(dna_lpo,TRUE);
+ if (lpo_in)
+ free_lpo_sequence(lpo_in,TRUE);
+ if (lpo_out != seq && lpo_out != lpo_in)
+ free_lpo_sequence(lpo_out,TRUE);
+ LOOPB (i,nseq) /* seq[] WAS ALLOCATED AS ONE ARRAY, SO FREE seq[0] LAST */
+ free_lpo_sequence(seq+i,i==0); /* FREE BLOCK AFTER ALL HOLDERS EMPTY*/
+ exit(exit_code);
+}
+
Added: trunk/packages/poa/branches/upstream/current/multidom.seq
===================================================================
--- trunk/packages/poa/branches/upstream/current/multidom.seq 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/multidom.seq 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,42 @@
+>ABL1_HUMAN PROTO-ONCOGENE TYROSINE-PROTEIN KINASE ABL (EC 2.7.1.112) (P150) (C-ABL).
+MLEICLKLVG CKSKKGLSSS SSCYLEEALQ RPVASDFEPQ GLSEAARWNS KENLLAGPSE
+NDPNLFVALY DFVASGDNTL SITKGEKLRV LGYNHNGEWC EAQTKNGQGW VPSNYITPVN
+SLEKHSWYHG PVSRNAAEYL LSSGINGSFL VRESESSPGQ RSISLRYEGR VYHYRINTAS
+DGKLYVSSES RFNTLAELVH HHSTVADGLI TTLHYPAPKR NKPTVYGVSP NYDKWEMERT
+DITMKHKLGG GQYGEVYEGV WKKYSLTVAV KTLKEDTMEV EEFLKEAAVM KEIKHPNLVQ
+LLGVCTREPP FYIITEFMTY GNLLDYLREC NRQEVNAVVL LYMATQISSA MEYLEKKNFI
+HRDLAARNCL VGENHLVKVA DFGLSRLMTG DTYTAHAGAK FPIKWTAPES LAYNKFSIKS
+DVWAFGVLLW EIATYGMSPY PGIDLSQVYE LLEKDYRMER PEGCPEKVYE LMRACWQWNP
+SDRPSFAEIH QAFETMFQES SISDEVEKEL GKQGVRGAVS TLLQAPELPT KTRTSRRAAE
+HRDTTDVPEM PHSKGQGESD PLDHEPAVSP LLPRKERGPP EGGLNEDERL LPKDKKTNLF
+SALIKKKKKT APTPPKRSSS FREMDGQPER RGAGEEEGRD ISNGALAFTP LDTADPAKSP
+KPSNGAGVPN GALRESGGSG FRSPHLWKKS STLTSSRLAT GEEEGGGSSS KRFLRSCSAS
+CVPHGAKDTE WRSVTLPRDL QSTGRQFDSS TFGGHKSEKP ALPRKRAGEN RSDQVTRGTV
+TPPPRLVKKN EEAADEVFKD IMESSPGSSP PNLTPKPLRR QVTVAPASGL PHKEEAEKGS
+ALGTPAAAEP VTPTSKAGSG APGGTSKGPA EESRVRRHKH SSESPGRDKG KLSRLKPAPP
+PPPAASAGKA GGKPSQSPSQ EAAGEAVLGA KTKATSLVDA VNSDAAKPSQ PGEGLKKPVL
+PATPKPQSAK PSGTPISPAP VPSTLPSASS ALAGDQPSST AFIPLISTRV SLRKTRQPPE
+RIASGAITKG VVLDSTEALC LAISRNSEQM ASHSAVLEAG KNLYTFCVSY VDSIQQMRNK
+FAFREAINKL ENNLRELQIC PATAGSGPAA TQDFSKLLSS VKEISDIVQR
+>CRKL_HUMAN CRK-LIKE PROTEIN.
+MSSARFDSSD RSAWYMGPVS RQEAQTRLQG QRHGMFLVRD SSTCPGDYVL SVSENSRVSH
+YIINSLPNRR FKIGDQEFDH LPALLEFYKI HYLDTTTLIE PAPRYPSPPM GSVSAPNLPT
+AEDNLEYVRT LYDFPGNDAE DLPFKKGEIL VIIEKPEEQW WSARNKDGRV GMIPVPYVEK
+LVRSSPHGKH GNRNSNSYGI PEPAHAYAQP QTTTPLPAVS GSPGAAITPL PSTQNGPVFA
+KAIQKRVPCA YDKTALALEV GDIVKVTRMN INGQWEGEVN GRKGLFPFTH VKIFDPQNPD
+ENE
+>GRB2_HUMAN GROWTH FACTOR RECEPTOR-BOUND PROTEIN 2 (GRB2 ADAPTOR PROTEIN)(SH2)
+MEAIAKYDFK ATADDELSFK RGDILKVLNE ECDQNWYKAE LNGKDGFIPK NYIEMKPHPW
+FFGKIPRAKA EEMLSKQRHD GAFLIRESES APGDFSLSVK FGNDVQHFKV LRDGAGKYFL
+WVVKFNSLNE LVDYHRSTSV SRNQQIFLRD IEQVPQQPTY VQALFDFDPQ EDGELGFRRG
+DFIHVMDNSD PNWWKGACHG QTGMFPRNYV TPVNRNV
+>MATK_HUMAN MEGAKARYOCYTE-ASSOCIATED TYROSINE-PROTEIN KINASE (EC 2.7.1.112) (TYROSINE-PROTEIN KINASE CTK) (PROTEIN KINASE HYL) (HEMATOPOIETIC CONSENSUS TYROSINE-LACKING KINASE).
+MAGRGSLVSW RAFHGCDSAE ELPRVSPRFL RAWHPPPVSA RMPTRRWAPG TQCITKCEHT
+RPKPGELAFR KGDVVTILEA CENKSWYRVK HHTSGQEGLL AAGALREREA LSADPKLSLM
+PWFHGKISGQ EAVQQLQPPE DGLFLVRESA RHPGDYVLCV SFGRDVIHYR VLHRDGHLTI
+DEAVFFCNLM DMVEHYSKDK GAICTKLVRP KRKHGTKSAE EELARAGWLL NLQHLTLGAQ
+IGEGEFGAVL QGEYLGQKVA VKNIKCDVTA QAFLDETAVM TKMQHENLVR LLGVILHQGL
+YIVMEHVSKG NLVNFLRTRG RALVNTAQLL QFSLHVAEGM EYLESKKLVH RDLAARNILV
+SEDLVAKVSD FGLAKAERKG LDSSRLPVKW TAPEALKHGK FTSKSDVWSF GVLLWEVFSY
+GRAPYPKMSL KEVSEAVEKG YRMEPPEGCP GPVHVLMSSC WEAEPARRPP FRKLAEKLAR
+ELRSAGAPAS VSGQDADGST SPRSQEP
Added: trunk/packages/poa/branches/upstream/current/numeric_data.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/numeric_data.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/numeric_data.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,145 @@
+
+
+#include <lpo.h>
+
+/**@memo create a new LPONumericData_T record for the designated source_seq.
+ Dynamically allocates data storage array equal in length to the length of
+ the source_seq. Initializes the array values to initial_value if non-zero.
+ Returns a pointer to the new LPONumericData_T and also adds it to the
+ source_seq->data[] list.
+*/
+LPONumericData_T *new_numeric_data(LPOSourceInfo_T *source_seq,
+ char name[],
+ char title[],
+ double initial_value)
+{
+ int i;
+ LPONumericData_T *data=NULL;
+ REBUFF(source_seq->data,source_seq->ndata,NUMDATA_BUFFER_CHUNK,LPONumericData_T);
+ data=source_seq->data + source_seq->ndata++;
+
+ STRNCPY(data->name,name,SEQUENCE_NAME_MAX); /* COPY NAME AND TITLE */
+ if (title)
+ data->title=strdup(title);
+
+ CALLOC(data->data,source_seq->length,double); /* ALLOCATE THE ARRAY */
+ if (initial_value) /* INITIALIZE VALUES IF DESIRED */
+ LOOP (i,source_seq->length)
+ data->data[i]=initial_value;
+ return data; /* RETURN POINTER TO THE NEW DATA HOLDER */
+}
+
+
+
+LPONumericData_T *cp_numeric_data(LPOSourceInfo_T *source_seq,
+ LPONumericData_T *data)
+{
+ int i;
+ LPONumericData_T *new_data;
+ new_data=new_numeric_data(source_seq,data->name,data->title,0);
+ LOOP (i,source_seq->length) /* COPY ALL THE VALUES */
+ new_data->data[i]=data->data[i];
+ return new_data;
+}
+
+
+
+/**@memo finds LPONumericData from the source_seq, matching the specified
+ name, or returns NULL if not found. */
+LPONumericData_T *find_numeric_data(LPOSourceInfo_T *source_seq,
+ char name[])
+{
+ int i;
+ LOOP (i,source_seq->ndata)
+ if (0==strcmp(source_seq->data[i].name,name)) /*FOUND IT. RETURN POINTER*/
+ return source_seq->data+i;
+ return NULL; /* NOT FOUND! */
+}
+
+
+
+/**@memo frees the set of numeric_data passed as arguments. If requested,
+ will also free the block of memory for the array of entries data[]. */
+void free_lpo_numeric_data(int ndata,LPONumericData_T *data,
+ int please_free_block)
+{
+ int i;
+ LOOP (i,ndata) { /* DUMP ASSOCIATED ARRAYS */
+ FREE(data[i].title);
+ FREE(data[i].data);
+ }
+ if (please_free_block) /* DUMP THE BLOCK ITSELF */
+ free(data);
+}
+
+
+
+
+
+/**@memo creates one or more new numeric_data for a given sequence,
+ based on the presence of corresponding named numeric_data. Specifically,
+ the list of set_names[] is processed one by one, creating a source_name
+ according to the source_name_fmt string, and finding a numeric_data
+ entry with that name. If it is not found, the routine calls exit(-1).
+ If it is found, a new set of numeric_data is created, with a name
+ created according to target_name_fmt, and titled according to
+ the title_fmt string and source_data->title. */
+void new_numeric_data_sets(LPOSourceInfo_T *source_seq,
+ int nset,char *set_names[],
+ char source_name_fmt[],
+ char target_name_fmt[],
+ char title_fmt[])
+{
+ int j;
+ LPONumericData_T *data;
+ char name[256],title[4096];
+
+ LOOPF (j,nset) {
+ sprintf(name,source_name_fmt,set_names[j]); /* GENERATE NAME TO MATCH*/
+ data=find_numeric_data(source_seq,name); /*FIND SOURCE DATA*/
+ if (!data) {
+ WARN_MSG(USERR,(ERRTXT,"*** could not find dataset %s for seq %s.\nExiting\n\n",
+ name,source_seq->name),"$Revision: 1.2 $");
+ exit(-1);
+ }
+ sprintf(name,target_name_fmt,set_names[j]); /* GENERATE NEW NAME */
+ sprintf(title,title_fmt,data->title);
+ new_numeric_data(source_seq,name,title,0.); /* CREATE NEW ARRAY */
+ }
+}
+
+
+
+
+/**@memo reads a stream of FASTA-formatted numeric data, and stores them in
+the corresponding set of source_seq, matching the sequences by name.
+Multiple numeric data entries can be read from a single stream. */
+void read_numeric_data(int nsource_seq,
+ LPOSourceInfo_T source_seq[],
+ FILE *ifile)
+{
+ int i,j;
+ char line[4096],seq_name[128],data_name[1024],title[2048];
+ LPONumericData_T *data;
+
+ while (fgets(line,sizeof(line),ifile)) {
+ title[0]='\0';
+ if (sscanf(line,">%s NUMERIC_DATA=%s %s",seq_name,data_name,title)>=2) {
+ LOOP (i,nsource_seq) /* FIND THE MATCHING SEQUENCE */
+ if (0==strcmp(seq_name,source_seq[i].name)) /* MATCH */
+ break;
+ if (LOOP_FINISHED(i,nsource_seq)) /* SEQ NOT FOUND!! */
+ WARN_MSG(USERR,(ERRTXT,"Error! NUMERIC_DATA %s, sequence %s does not exist. Skipping.\n\n",data_name,seq_name),"$Revision: 1.2 $");
+ else { /* FOUND THE SEQ, SAVE THE DATA */
+ if (data=find_numeric_data(source_seq+i,data_name)) /*REUSE EXISTING*/
+ WARN_MSG(WARN,(ERRTXT,"NUMERIC_DATA %s already exists on sequence %s. Overwriting.\n",data_name,seq_name),"$Revision: 1.2 $");
+ else /* CREATE A NEW DATA HOLDER */
+ data=new_numeric_data(source_seq+i,data_name,title,0.);
+ LOOPF (j,source_seq[i].length) /* READ IN THE VALUES */
+ fscanf(ifile," %lf",data->data+j);
+ }
+ }
+ }
+}
+
+
Added: trunk/packages/poa/branches/upstream/current/poa.h
===================================================================
--- trunk/packages/poa/branches/upstream/current/poa.h 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/poa.h 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,252 @@
+
+
+#ifndef POA_HEADER_INCLUDED
+#define POA_HEADER_INCLUDED
+
+
+
+
+/** MAXIMUM GAP LENGTH TRACKED IN align_lpo;
+ LARGER THAN THIS WILL BE CAPPED
+ (DEFAULT VALUE) */
+#ifndef TRUNCATE_GAP_LENGTH
+#define TRUNCATE_GAP_LENGTH 16
+#endif
+
+/** LENGTH OVER WHICH GAP PENALTY DECAYS IN align_lpo
+ (DEFAULT VALUE) */
+#ifndef DECAY_GAP_LENGTH
+#define DECAY_GAP_LENGTH 0
+#endif
+
+#define REV_COMP_STRING "/rev_comp"
+
+
+/** THE NULL LETTER-REFERENCE */
+#define INVALID_LETTER_POSITION (-1)
+
+typedef int LPOLetterRef_T;
+
+
+typedef int LPOScore_T;
+
+ /** NEEDED FOR seq_util.h */
+typedef LPOScore_T ResidueScore_T;
+#define RESIDUE_SCORE_DEFINED
+
+/** linked list for storing source origin (sequence position)
+ from which this letter was derived */
+struct LPOLetterSource_S {
+ /** index of the sequence, referencing the source_seq[] array*/
+ int iseq;
+ /** index of the corresponding position in that sequence */
+ LPOLetterRef_T ipos;
+ /** next node in the linked list */
+ struct LPOLetterSource_S *more;
+} ;
+
+typedef struct LPOLetterSource_S LPOLetterSource_T;
+
+
+/** linked list for connecting an LPOLetter to either right
+ or left */
+struct LPOLetterLink_S {
+ /** ADJACENT LETTER LINKED TO THIS LETTER */
+ LPOLetterRef_T ipos;
+#ifdef USE_WEIGHTED_LINKS
+ /** transition cost for traversing this link */
+ LPOScore_T score;
+#endif
+ /** next node in the linked list */
+ struct LPOLetterLink_S *more;
+} ;
+
+typedef struct LPOLetterLink_S LPOLetterLink_T;
+
+
+/** the chunk size for allocating additional
+ letters in an LPOLetter_T array */
+#define LPO_LETTER_BUFFER_CHUNK 64
+
+
+/** Structure for storing individual LPO Letters*/
+struct LPOLetter_S {
+ /** ADJACENT LETTER(S) TO THE LEFT */
+ LPOLetterLink_T left;
+ /** ADJACENT LETTER(S) TO THE RIGHT */
+ LPOLetterLink_T right;
+ /** SOURCE SEQ POSITION(S) */
+ LPOLetterSource_T source;
+ /** CIRCULAR LIST OF ALIGNED POSITIONS */
+ LPOLetterRef_T align_ring;
+ /** MINIMUM INDEX OF ALL POSITIONS ON THE RING */
+ LPOLetterRef_T ring_id;
+ /** SCORE FOR BALANCING PARTIAL ORDER EFFECTS ON MATRIX NEUTRALITY */
+ float score;
+ /** THE ACTUAL RESIDUE CODE! */
+ char letter;
+} ;
+
+
+
+typedef struct LPOLetter_S LPOLetter_T;
+
+
+/** maximum length of a sequence name */
+#define SEQUENCE_NAME_MAX 32
+/** buffer chunk size for expanding a block of seq storage */
+#define SEQUENCE_BUFFER_CHUNK 8
+
+/** buffer chunk size for expanding a source_seq[] array */
+#define SOURCE_SEQ_BUFFER_CHUNK 16
+
+
+
+#define NUMDATA_BUFFER_CHUNK 4
+/** storage for quantitative data attached to a sequence */
+struct LPONumericData_S { /** */
+ char name[SEQUENCE_NAME_MAX]; /** */
+ char *title; /** */
+ double *data;
+};
+typedef struct LPONumericData_S LPONumericData_T;
+
+
+/** Structure for storing individual source sequence information,
+ stuff like name, title etc. */
+struct LPOSourceInfo_S { /** */
+ char name[SEQUENCE_NAME_MAX]; /** */
+ char *title; /** */
+ char *sequence; /** */
+ int *seq_to_po; /** */
+ int *po_to_seq; /** */
+ LPONumericData_T *data; /** */
+ int ndata; /** */
+ int length; /** */
+ int istart;
+ /** FOR PURPOSES OF HEAVIEST BUNDLE CALCULATION */
+ int weight;
+ /** WHAT BUNDLE IS THIS A MEMBER OF? */
+ int bundle_id;
+};
+
+typedef struct LPOSourceInfo_S LPOSourceInfo_T;
+
+/** the NULL bundle-reference */
+#define NO_BUNDLE (-1)
+
+/** bundle-reference meaning "include all bundles" */
+#define ALL_BUNDLES (-1)
+
+
+/** holder for an LPO sequence, its letters,
+ and associated information */
+struct LPOSequence_S {/** */
+ int length;/** */
+ LPOLetter_T *letter;/** */
+ char *title;/** */
+ char *sequence;/** */
+ char name[SEQUENCE_NAME_MAX];/** */
+ int nsource_seq;/** */
+ LPOSourceInfo_T *source_seq;
+};
+
+typedef struct LPOSequence_S LPOSequence_T;
+
+
+typedef LPOSequence_T Sequence_T;
+
+
+/**@memo GENERAL FORM IS seq_y[j].left.ipos */
+#define SEQ_Y_LEFT(j) (j-1)
+#define SEQ_Y_RIGHT(j) (j+1)
+
+
+
+/**@memo Data structure for analyzing sequence differences in MSA*/
+struct LPOLetterCount_S {
+ unsigned int is_error:2;
+ unsigned int meets_criteria:1;
+ unsigned int seq_count:29;
+};
+
+typedef struct LPOLetterCount_S LPOLetterCount_T;
+
+/** classification of sequence differences */
+enum {
+ no_error,
+ substitution_error,
+ insertion_error,
+ deletion_error,
+ max_error_states
+};
+
+
+
+
+
+
+
+/** DON'T ALLOCATE MORE THAN THIS TOTAL AMOUNT OF MEMORY
+---------------------------------------------------------------
+---------------------------------------------------------------
+*/
+#define POA_MAX_ALLOC 300000000
+
+
+#endif
+
+
+
+
+
+
+/**@name The lpo library*/
+/*@{*/
+/**@memo This set of web pages documents the functionality of the lpo
+function library. This is a set of C functions for reading, writing,
+creating, manipulating, and aligning partial order sequences. These
+functions divide into several groups:
+\begin{itemize}
+\item \URL[File utilities]{General.html#read_fasta}: reading and writing
+FASTA and po files
+\item \URL[lpo utilities]{General.html#add_lpo_link}: creating, fusing,
+freeing, manipulating lpo data
+\item \URL[alignment]{General.html#align_lpo}: aligning one or more linear
+sequences to an lpo
+\item analysis: analyzing lpo structure, e.g. to find consensus
+\end{itemize}
+*/
+
+/**@memo Click \URL[here]{../poa} for more information about partial
+order alignment.*/
+
+/*@}*/
+
+
+
+/**@name linking to the lpo library */
+/*@{*/
+/**@memo To use function from this library in your code, you must
+ do two things. First you must
+include <lpo.h> in your source files, to access the prototypes.
+ Second, when you compile, you must tell the compiler where the
+ lpo header and library files are located. {\bfNB it appears gcc
+loads libraries in reverse order of the command line arguments, so
+you have to specify your source files BEFORE the -llpo library argument
+on the command line, or the linker will give you unresolved reference
+errors}. e.g. \begin{verbatim}
+
+gcc -o myprog myfile.c -I~leec/lib/include -L~leec/lib -llpo
+\end{verbatim}
+*/
+
+/**@memo Click \URL[here]{../poa} for more information about partial
+order alignment.*/
+
+/*@}*/
+
+
+
+
+
Added: trunk/packages/poa/branches/upstream/current/project.h
===================================================================
--- trunk/packages/poa/branches/upstream/current/project.h 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/project.h 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,11 @@
+
+#ifndef PROGRAM_NAME
+#define PROGRAM_NAME "poa"
+#endif
+#ifndef PROGRAM_VERSION
+#define PROGRAM_VERSION "v1.0.0"
+#endif
+
+
+#include "poa.h"
+
Added: trunk/packages/poa/branches/upstream/current/remove_bundle.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/remove_bundle.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/remove_bundle.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,158 @@
+
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+
+
+int compact_links(LPOLetterLink_T *list,int old_to_new[])
+{
+ LPOLetterLink_T *link=NULL,*link_last=NULL,*next_link,*link_head=NULL;
+
+ CALLOC(link,1,LPOLetterLink_T);
+ memcpy(link,list,sizeof(LPOLetterLink_T));
+
+ for (;link && link->ipos>=0;link=next_link){
+ next_link=link->more;
+ if (old_to_new[link->ipos]<0) /* THIS POSITION NO LONGER EXISTS! */
+ free(link); /* DELETE THIS LINK ENTRY */
+ else { /* COPY THIS BACK TO PREVIOUS LINK ENTRY: COMPACT THE LIST*/
+ link->ipos = old_to_new[link->ipos]; /* REMAP TO NEW INDEX SYSTEM */
+ if (link_last) /* CONNECT TO PREVIOUS NODE IN LIST */
+ link_last->more=link;
+ else /* THIS IS THE NEW HEAD OF THE LIST */
+ link_head=link;
+ link_last=link;
+ }
+ }
+ if (link) /* AN EMPTY LINK I.E. link->ipos<0 ... JUNK IT */
+ free(link);
+ if (link_last) /* TERMINATE LAST NODE IN LIST */
+ link_last->more=NULL;
+ if (link_head) {
+ memcpy(list,link_head,sizeof(LPOLetterLink_T));
+ free(link_head);
+ return 1; /* COMPACTED LINK LIST IS NON-EMPTY */
+ }
+ else { /* NOTHING LEFT IN LIST, SO BLANK IT */
+ list->more=NULL;
+ list->ipos= INVALID_LETTER_POSITION;
+ return 0; /* COMPACTED LINK LIST IS EMPTY */
+ }
+}
+
+
+
+
+
+
+
+int compact_sources(LPOLetterSource_T *list,int ibundle_delete,
+ LPOSourceInfo_T source_seq[])
+{
+ LPOLetterSource_T *source=NULL,*source_last=NULL,*next_source,*source_head=NULL;
+
+ CALLOC(source,1,LPOLetterSource_T);
+ memcpy(source,list,sizeof(LPOLetterSource_T));
+
+ for (;source;source=next_source){
+ next_source=source->more;
+ if (source_seq[source->iseq].bundle_id == ibundle_delete)
+ free(source); /* DELETE THIS SOURCE ENTRY */
+ else { /* COPY THIS BACK TO PREVIOUS SOURCE ENTRY: COMPACT THE LIST*/
+ if (source_last) /* CONNECT TO PREVIOUS NODE IN LIST */
+ source_last->more=source;
+ else /* THIS IS THE NEW HEAD OF THE LIST */
+ source_head=source;
+ source_last=source;
+ }
+ }
+ if (source_last) /* TERMINATE LAST NODE IN LIST */
+ source_last->more=NULL;
+ if (source_head) {
+ memcpy(list,source_head,sizeof(LPOLetterSource_T));
+ free(source_head);
+ return 1; /* COMPACTED SOURCE LIST IS NON-EMPTY */
+ }
+ else { /* NOTHING LEFT IN LIST, SO BLANK IT */
+ list->more=NULL;
+ list->ipos= INVALID_LETTER_POSITION;
+ return 0; /* COMPACTED SOURCE LIST IS EMPTY */
+ }
+}
+
+
+
+
+
+
+void reindex_compact_rings(LPOSequence_T *seq)
+{
+ int i,iring= -1,ring_start= -1;
+ LOOPF (i,seq->length) { /* REMOVE ALL LINKS TO OLD, DELETED POSITIONS */
+ if (iring==seq->letter[i].ring_id) {
+ seq->letter[i].ring_id=ring_start; /* USE FIRST INDEX ON RING */
+ seq->letter[i].align_ring= i-1; /* LINK TO LETTER TO LEFT */
+ }
+ else { /* START OF A NEW RING */
+ if (ring_start>=0)
+ seq->letter[ring_start].align_ring = i-1;
+ iring=seq->letter[i].ring_id;
+ seq->letter[i].ring_id=seq->letter[i].align_ring=ring_start=i;
+ }
+ }
+ if (ring_start>=0)
+ seq->letter[ring_start].align_ring = i-1;
+}
+
+
+
+
+#define DELETE_THIS_BUNDLE (-2)
+
+int remove_bundle(LPOSequence_T *seq,int ibundle,int delete_all_others)
+{
+ int i,j=0,*old_to_new=NULL,new_length;
+
+ if (delete_all_others) { /* INSTEAD OF DELETING THIS BUNDLE, */
+ LOOP (i,seq->nsource_seq) /*MARK ALL OTHERS TO BE DELETED! */
+ if (seq->source_seq[i].bundle_id!=ibundle)
+ seq->source_seq[i].bundle_id = DELETE_THIS_BUNDLE;
+ ibundle=DELETE_THIS_BUNDLE;
+ }
+
+ CALLOC(old_to_new,seq->length,int); /* CREATE MAPPING ARRAY */
+ LOOPF (i,seq->length) {
+ if (compact_sources(&seq->letter[i].source,ibundle,seq->source_seq)){
+ if (i>j) /* COPY LETTER TO COMPACTED POSITION */
+ memcpy(seq->letter+j,seq->letter+i,sizeof(LPOLetter_T));
+ old_to_new[i]=j++; /* SAVE MAPPING FROM OLD TO NEW, COMPACTED POSITION*/
+ }
+ else {
+ free_lpo_letters(1,seq->letter+i,FALSE); /* DUMP DATA FOR THIS LETTER */
+ old_to_new[i]= INVALID_LETTER_POSITION;
+ }
+ }
+ new_length=j;
+ if (new_length<seq->length) /* ERASE UNUSED PORTIONS AFTER COMPACTED ARRAY*/
+ memset(seq->letter+new_length,0,(seq->length - new_length)*sizeof(LPOLetter_T));
+
+ LOOP (i,new_length) { /* REMOVE ALL LINKS TO OLD, DELETED POSITIONS */
+ compact_links(&seq->letter[i].left,old_to_new);
+ compact_links(&seq->letter[i].right,old_to_new);
+ }
+ seq->length=new_length;
+ reindex_compact_rings(seq);
+
+ FREE(old_to_new);
+ return new_length;
+}
+
+
+
+
+
Added: trunk/packages/poa/branches/upstream/current/seq_util.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/seq_util.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/seq_util.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,265 @@
+
+#include "default.h"
+#include "seq_util.h"
+
+
+
+/** randomizes seq[] by shuffling, and places the result in randseq[];
+ if randseq[] and seq[] are distinct, seq[] is left unchanged */
+void shuffle_seq(int len,
+ char seq[],
+ char randseq[])
+{
+ int i,j;
+ char c;
+
+ for (i=0;i<len;i++) {
+ j=rand()%len; /* CHOOSE RANDOM POSITION j */
+ c=seq[i]; /* SWAP POSITIONS i AND j */
+ randseq[i]=seq[j];
+ randseq[j]=c;
+ }
+
+ return;
+}
+
+
+
+
+
+
+/**@memo TRANSLATE FROM ASCII LETTERS TO COMPACTED NUMBERICAL INDEX:
+ index_symbols(seq[i].length,seq[i].sequence,seq[i].sequence,
+ m->nsymbol,m->symbol);
+*/
+/** converts characters in seq[] to the INDEX of the matching character in
+ symbols[], and returns the result in out[] */
+void index_symbols(int nseq,char seq[],char out[],
+ int nsymbs,char symbols[])
+{
+ int i,j,k;
+ LOOP (i,nseq) {
+ k=nsymbs-1; /* DEFAULT: UNMATCHABLE SYMBOL */
+ LOOP (j,nsymbs) { /* FIND MATCHING SYMBOL */
+ if (symbols[j]==seq[i]) { /* FOUND IT! */
+ k=j;
+ break;
+ }
+ }
+ out[i]=k; /* SAVE THE TRANSLATED CODE */
+ }
+ return;
+}
+
+
+
+
+
+
+int *Score_matrix_row=NULL;
+
+int best_match_qsort_cmp(const void *void_a,const void *void_b)
+{
+ int *a=(int *)void_a,*b=(int *)void_b;
+
+ if (Score_matrix_row[*a]>Score_matrix_row[*b])
+ return -1;
+ else if (Score_matrix_row[*a]<Score_matrix_row[*b])
+ return 1;
+ else /* EQUAL */
+ return 0;
+}
+
+
+
+
+#ifdef SOURCE_EXCLUDED
+char DNA_symbols[1024];
+float DNA_rescale_score;
+#endif
+
+/** reads an alignment scoring matrix in the pam format */
+int read_score_matrix(char filename[],ResidueScoreMatrix_T *m)
+{
+ int i,j,k,nsymb=0,found_symbol_line=0,isymb;
+ char line[1024],dna_codes[256];
+ FILE *ifile;
+
+ /* GAP PENALTY DEFAULTS */
+ m->gap_penalty_set[0][0]=m->gap_penalty_set[1][0]=12; /*SAVE PENALTIES*/
+ m->gap_penalty_set[0][1]=m->gap_penalty_set[1][1]=2;
+ m->gap_penalty_set[0][2]=m->gap_penalty_set[1][2]=0;
+ m->trunc_gap_length = TRUNCATE_GAP_LENGTH;
+ m->decay_gap_length = DECAY_GAP_LENGTH;
+
+ ifile=fopen(filename,"r");
+ if (!ifile) {
+ WARN_MSG(USERR,(ERRTXT,"Can't open alignment matrix from %s\n",filename),"$Revision: 1.2.2.1 $");
+ return -2; /* FAILED TO FIND FILE TO READ */
+ }
+
+ while (fgets(line,1023,ifile)) {
+ if ('#'==line[0] || '\n'==line[0]) /* SKIP COMMENT OR BLANK LINES */
+ continue;
+
+ else if (1==sscanf(line,"GAP-TRUNCATION-LENGTH=%d",&i)) {
+ m->trunc_gap_length = i;
+ }
+
+ else if (1==sscanf(line,"GAP-DECAY-LENGTH=%d",&i)) {
+ m->decay_gap_length = i;
+ }
+
+ else if (3==sscanf(line,"GAP-PENALTIES=%d %d %d",&i,&j,&k)) {
+ m->gap_penalty_set[0][0]=m->gap_penalty_set[1][0]=i; /*SAVE PENALTIES*/
+ m->gap_penalty_set[0][1]=m->gap_penalty_set[1][1]=j;
+ m->gap_penalty_set[0][2]=m->gap_penalty_set[1][2]=k;
+ }
+
+ else if (3==sscanf(line,"GAP-PENALTIES-X=%d %d %d",&i,&j,&k)) {
+ m->gap_penalty_set[1][0]=i; /*SAVE PENALTIES ONLY FOR X DIRECTION*/
+ m->gap_penalty_set[1][1]=j;
+ m->gap_penalty_set[1][2]=k;
+ }
+
+#ifdef SOURCE_EXCLUDED
+ else if (1==sscanf(line,"DNACODES=%99s",dna_codes)) { /* READ DNACODES*/
+ strcpy(DNA_symbols,dna_codes);/*SYMBOLS COUNTED AS DNA FOR AUTORECOG*/
+ }
+
+ else if (1==sscanf(line,"DNASCALE=%f",&DNA_rescale_score))
+ continue;
+#endif
+
+ else if (!found_symbol_line) { /* READ THIS LINE AS LIST OF SEQ SYMBOLS*/
+ for (i=0;'\0'!=line[i];i++)
+ if (!isspace(line[i])) /* IGNORE WHITESPACE */
+ m->symbol[nsymb++]=line[i]; /* SAVE TO LIST OF SYMBOLS */
+ found_symbol_line=1; /* SET FLAG SO WE NOW READ MATRIX SCORE VALUES */
+ }
+
+ else { /* READ SCORING MATRIX LINES */
+ found_symbol_line=0; /* DEFAULT: FAILED TO FIND MATCHING SYMBOL IN LIST*/
+ LOOP (isymb,nsymb) /* FIND MATCH TO THIS SYMBOL */
+ if (m->symbol[isymb]==line[0]) {
+ found_symbol_line=1; /* SIGNAL THAT WE SUCCESFULLY FOUND MATCH */
+ j=1; /* SKIP FIRST CHARACTER: OUR SEQUENCE SYMBOL */
+ LOOPF (i,nsymb) { /* READ ALL THE SCORE VALUES ON THIS LINE */
+ if (1==sscanf(line+j,"%d%n",&(m->score[isymb][i]),&k))
+ j+=k; /* ADVANCE THE READING POSITION */
+ else { /* MISSING SCORE DATA: ERROR! */
+ IF_GUARD(1,5.23,(ERRTXT,"Missing score value for pair %c:%c",
+ m->symbol[isymb],m->symbol[i]),TRAP)
+ ;
+ fclose(ifile); /* CLOSE OUR STREAM */
+ return -1;
+ }
+ }
+ break;
+ }
+ IF_GUARD(!found_symbol_line,1.5,(ERRTXT,"Missing or unknown sequence symbol: %c",line[0]),TRAP) { /* ERROR: AN INVALID SYMBOL, NOT IN LIST */
+ fclose(ifile); /* CLOSE OUR STREAM */
+ return -1;
+ }
+ }
+ }
+ fclose(ifile);
+
+ LOOPF (i,nsymb) {
+ Score_matrix_row= m->score[i]; /* ROW TO USE FOR SORTING best_match */
+ LOOP (j,nsymb)
+ m->best_match[i][j] = j;
+ qsort(m->best_match[i],nsymb,sizeof(m->best_match[0][0]),
+ best_match_qsort_cmp);
+#ifdef SOURCE_EXCLUDED
+ printf("%c SORT",m->symbol[i]); /* TEST: PRINT OUT SORTED TABLE */
+ LOOPF (j,nsymb)
+ printf("\t%c:%d",m->symbol[m->best_match[i][j]],
+ m->score[i][m->best_match[i][j]]);
+ printf("\n");
+#endif
+ }
+
+ m->symbol[nsymb]='\0'; /* TERMINATE THE SYMBOL STRING */
+ m->nsymbol=nsymb;
+ return nsymb;
+}
+
+
+
+/** prints a scoring matrix, only including those symbols in subset[] */
+void print_score_matrix(FILE *ifile,ResidueScoreMatrix_T *m,char subset[])
+{
+ int i,i_m,j,j_m,nsubset;
+
+ nsubset=strlen(subset);
+
+ printf(" ");
+ LOOPF (i,nsubset)
+ printf(" %c",subset[i]);
+ printf("\n");
+
+ LOOPF (i,nsubset) {
+ LOOP (i_m,m->nsymbol)
+ if (m->symbol[i_m]==subset[i])
+ break;
+ printf("%c",subset[i]);
+ LOOPF (j,nsubset) {
+ LOOP (j_m,m->nsymbol)
+ if (m->symbol[j_m]==subset[j])
+ break;
+ printf("%3d",m->score[i_m][j_m]);
+ }
+ printf("\n");
+ }
+ return;
+}
+
+
+
+/** restricts seq[] to the set of allowed characters given by symbol[];
+ other characters will be replaced by the default symbol[0] */
+int limit_residues(char seq[],char symbol[])
+{
+ int i,len,nreplace=0;
+
+ len=strlen(seq);
+ for (i=strspn(seq,symbol);i<len;i=i+1+strspn(seq+i+1,symbol)) {
+ seq[i]=symbol[0]; /* FORCE IT TO BE A VALID SYMBOL */
+ nreplace++; /* COUNT TOTAL REPLACEMENTS */
+ }
+ return nreplace;
+}
+
+
+
+
+
+/** RETURNS THE COMPLEMENTARY BASE *IN LOWER CASE* */
+char complementary_base(char base)
+{
+ switch (base) {
+ case 'A': case 'a': return 't';
+ case 'T': case 't': case 'U': case 'u': return 'a';
+ case 'G': case 'g': return 'c';
+ case 'C': case 'c': return 'g';
+ default: return base;
+ }
+}
+
+/** REVERSE COMPLEMENTS seq[] IN PLACE, AND RETURNS POINTER TO seq
+---------------------------------------------------------------
+-----------------------------------------------------------
+*/
+char *reverse_complement(char seq[])
+{
+ int i,j;
+ char c;
+ for ((i=0),(j=strlen(seq)-1);i<=j;i++,j--) {/* SWAP FROM ENDS TO CENTER*/
+ c=complementary_base(seq[i]); /* SWAP ENDS AND COMPLEMENT */
+ seq[i]=complementary_base(seq[j]);
+ seq[j]=c;
+ }
+ return seq;
+}
+
Added: trunk/packages/poa/branches/upstream/current/seq_util.h
===================================================================
--- trunk/packages/poa/branches/upstream/current/seq_util.h 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/seq_util.h 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,72 @@
+
+#ifndef SEQ_UTIL_HEADER_INCLUDED
+#define SEQ_UTIL_HEADER_INCLUDED
+
+/* NB: YOU *MUST* TYPEDEF Sequence_T ELSEWHERE, IN ORDER FOR THE PROTOTYPES
+ BELOW TO WORK... */
+
+/* USE_PROJECT_HEADER IS A GENERAL WAY TO SNEAK IN CUSTOMIZATION
+ BEFORE PROCESSING DEFINITIONS IN THIS FILE */
+#ifdef USE_PROJECT_HEADER
+#include "project.h"
+#endif
+
+#ifndef RESIDUE_SCORE_DEFINED
+typedef int ResidueScore_T; /* DEFAULT: USE int FOR SCORING */
+#endif
+
+
+
+
+#define FASTA_NAME_MAX 4096 /*define the max length of the name */
+#define SEQ_LENGTH_MAX 32768 /* define the maximum length of each seq */
+
+enum {
+ dont_switch_case,
+ switch_case_to_lower,
+ switch_case_to_upper
+};
+
+
+
+#define MATRIX_SYMBOL_MAX 128
+typedef struct {
+ int nsymbol;
+ char symbol[MATRIX_SYMBOL_MAX];
+ ResidueScore_T score[MATRIX_SYMBOL_MAX][MATRIX_SYMBOL_MAX];
+ int best_match[MATRIX_SYMBOL_MAX][MATRIX_SYMBOL_MAX];
+ ResidueScore_T gap_penalty_set[2][3];
+ int trunc_gap_length;
+ int decay_gap_length;
+ int nfreq; /* STORE FREQUENCIES OF AMINO ACIDS FOR BALANCING MATRIX...*/
+ char freq_symbol[MATRIX_SYMBOL_MAX];
+ float freq[MATRIX_SYMBOL_MAX];
+} ResidueScoreMatrix_T;
+
+/****************************************************** seq_util.c */
+void shuffle_seq(int len,
+ char seq[],
+ char randseq[]);
+
+void index_symbols(int nseq,char seq[],char out[],
+ int nsymbs,char symbols[]);
+
+int read_score_matrix(char filename[],ResidueScoreMatrix_T *m);
+
+void print_score_matrix(FILE *ifile,ResidueScoreMatrix_T *m,char subset[]);
+
+int limit_residues(char seq[],char symbol[]);
+
+int create_seq(int nseq,Sequence_T **seq,char seq_name[],char seq_title[],char tmp_seq[],int do_switch_case);
+
+char *reverse_complement(char seq[]);
+
+
+
+/******************************************************* fasta_format.c */
+int read_fasta(FILE *seq_file,Sequence_T **seq,
+ int do_switch_case,char **comment);
+
+void write_fasta(FILE *ifile,char name[],char title[],char seq[]);
+
+#endif
Added: trunk/packages/poa/branches/upstream/current/stringptr.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/stringptr.c 2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/stringptr.c 2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,95 @@
+
+#include "default.h"
+
+
+/*******************************************************************
+ *
+ * STRINGPTR_CAT:
+ * holds string and count of last alloc'd size. Conveniently
+ * manages static and dynamic strings, allowing auto-resize
+ * without worrying about memory management.
+ *
+ * This function adds any string to the string ptr. It even
+ * insulates against possibility that S2 is a substring of S1
+ * by pre-copying it.
+ *
+ ******************************************************************/
+
+char *stringptr_cat_pos(stringptr *s1,const char s2[],int *pos) /* ~~g --- */
+{
+ int s2_len,total_len;
+ char *stringptr_cat_temp_p=NULL,*s2_temp=NULL;
+
+ if (s2 == NULL) /* it might be better to make this a debug cope */
+ return s1->p;
+
+ if (0 == s1->last_alloc) /* SAVE STATIC STRING */
+ stringptr_cat_temp_p = s1->p; /* SAVE OLD */
+
+ total_len=s2_len=strlen(s2)+1;
+ CALLOC(s2_temp,s2_len,char); /* ALLOCATE TEMP STORAGE */
+ memcpy(s2_temp,s2,s2_len); /* COPY THE STRING */
+
+ if (s1->p) { /* CALCULATE ADDITIONAL SPACE NEEDED FOR ORIGINAL STRING */
+ if (pos) /* USE THE CALLER-SUPPLIED STRING LENGTH */
+ total_len += *pos;
+ else /* OTHERWISE MEASURE IT */
+ total_len += strlen(s1->p);
+ }
+
+ GETMEM(s1->p,total_len,
+ s1->last_alloc,STRINGPTR_BUFFER_CHUNK,char);
+
+ if (stringptr_cat_temp_p) /* PUT OLD STATIC STRING BACK IN */
+ strcpy(s1->p,stringptr_cat_temp_p);
+
+ if (pos) { /* ATTACH NEW STRING AT CALLER-SUPPLIED END-POSITION */
+ strcpy(s1->p + *pos,s2_temp);
+ *pos = total_len-1; /* EXCLUDE TERMINATOR */
+ }
+ else /* OTHERWISE strcat AS USUAL */
+ strcat(s1->p,s2_temp);
+
+ FREE(s2_temp); /* FREE THE TEMP STORAGE */
+
+ return s1->p;
+}
+
+
+char *stringptr_cat(stringptr *s1,const char s2[]) /* ~~g --- */
+{
+ return stringptr_cat_pos(s1,s2,NULL);
+}
+
+
+char *stringptr_cpy(stringptr *s1,const char s2[]) /* ~~g --- */
+{
+ GETMEM(s1->p,strlen(s2)+1,s1->last_alloc,STRINGPTR_BUFFER_CHUNK,char);
+ strcpy(s1->p,s2);
+
+ return s1->p;
+}
+
+
+
+
+
+
+/**stringptr_free*************************************************
+ *
+ * stringptr_free:
+ * AUTHOR: tal
+ * Wed Jul 27 03:04:41 PDT 1994
+ *
+ * Frees stringptr type.
+ *
+ ***************************************************************/
+
+int stringptr_free(stringptr *s) /* ~~g --- */
+{
+ FREE(s->p);
+ s->last_alloc=0;
+ return 0;
+}
+
+
More information about the debian-med-commit
mailing list