[med-svn] r1652 - in trunk/packages/poa/trunk: . debian
tille at alioth.debian.org
tille at alioth.debian.org
Sat Mar 22 17:20:39 UTC 2008
Author: tille
Date: 2008-03-22 17:20:38 +0000 (Sat, 22 Mar 2008)
New Revision: 1652
Removed:
trunk/packages/poa/trunk/Makefile
trunk/packages/poa/trunk/README
trunk/packages/poa/trunk/align_lpo.c
trunk/packages/poa/trunk/align_lpo2.c
trunk/packages/poa/trunk/align_lpo_po.c
trunk/packages/poa/trunk/align_lpo_po2.c
trunk/packages/poa/trunk/align_score.c
trunk/packages/poa/trunk/align_score.h
trunk/packages/poa/trunk/black_flag.c
trunk/packages/poa/trunk/black_flag.h
trunk/packages/poa/trunk/blosum80.mat
trunk/packages/poa/trunk/blosum80_trunc.mat
trunk/packages/poa/trunk/buildup_lpo.c
trunk/packages/poa/trunk/create_seq.c
trunk/packages/poa/trunk/default.h
trunk/packages/poa/trunk/fasta_format.c
trunk/packages/poa/trunk/heaviest_bundle.c
trunk/packages/poa/trunk/lpo.c
trunk/packages/poa/trunk/lpo.h
trunk/packages/poa/trunk/lpo_format.c
trunk/packages/poa/trunk/main.c
trunk/packages/poa/trunk/make_pscores.pl
trunk/packages/poa/trunk/msa_format.c
trunk/packages/poa/trunk/msa_format.h
trunk/packages/poa/trunk/multidom.pscore
trunk/packages/poa/trunk/multidom.seq
trunk/packages/poa/trunk/numeric_data.c
trunk/packages/poa/trunk/poa.h
trunk/packages/poa/trunk/project.h
trunk/packages/poa/trunk/remove_bundle.c
trunk/packages/poa/trunk/seq_util.c
trunk/packages/poa/trunk/seq_util.h
trunk/packages/poa/trunk/stringptr.c
Modified:
trunk/packages/poa/trunk/debian/changelog
trunk/packages/poa/trunk/debian/control
Log:
Removed upstream source according group policy, added myself to uploaders
Deleted: trunk/packages/poa/trunk/Makefile
===================================================================
--- trunk/packages/poa/trunk/Makefile 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/Makefile 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,51 +0,0 @@
-
-AR=ar rc
-
-TARGETS=poa liblpo.a poa_doc libbflag.a
-
-# align_score.c CAN BE USED TO ADD CUSTOMIZED SCORING FUNCTIONS
-OBJECTS= \
- align_score.o \
- main.o
-
-
-LIBOBJECTS= \
- black_flag.o \
- seq_util.o \
- fasta_format.o \
- msa_format.o \
- align_lpo2.o \
- align_lpo_po2.o \
- buildup_lpo.o \
- lpo.o \
- heaviest_bundle.o \
- lpo_format.o \
- create_seq.o \
- remove_bundle.o \
- numeric_data.o \
- stringptr.o
-
-
-CC = gcc
-#CFLAGS= -g -ansi-strict -W -Wall -DUSE_WEIGHTED_LINKS -DUSE_PROJECT_HEADER -I.
-CFLAGS= -g -DUSE_WEIGHTED_LINKS -DUSE_PROJECT_HEADER -I.
-# -I$(HOME)/lib/include
-# -DREPORT_MAX_ALLOC
-
-clean:
- rm -f $(OBJECTS) $(LIBOBJECTS) $(TARGETS)
-
-liblpo.a: $(LIBOBJECTS)
- rm -f $@
- $(AR) $@ $(LIBOBJECTS)
- ranlib $@
-
-
-
-# NB: LIBRARY MUST FOLLOW OBJECTS OR LINK FAILS WITH UNRESOLVED REFERENCES!!
-poa: $(OBJECTS) liblpo.a
- $(CC) -o $@ $(OBJECTS) -lm liblpo.a
-
-what:
- @echo poa: partial-order based sequence alignment program
- @echo liblpo.a: partial-order alignment and utilities function library
Deleted: trunk/packages/poa/trunk/README
===================================================================
--- trunk/packages/poa/trunk/README 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/README 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,320 +0,0 @@
-
--- POA INSTALLATION NOTES --
-September 2001, updated March 2004.
-
-Chris Lee
-Dept. of Chemistry & Biochemistry
-UCLA
-
-
-I. COMPILATION
-
-To compile this program, simply type 'make poa'.
-This produces an executable for sequence alignment (poa) and also a
-linkable library liblpo.a. The software has been compiled and tested
-on LINUX and Mac OS X.
-
-
-II. RUNNING POA
-
-POA has a variety of command line options. Running POA without any
-arguments will print a list of the possible command line arguments.
-POA may be used to construct a PO-MSA, or to analyze a PO-MSA.
-
-A. Constructing a PO-MSA
--------------------------
-
-1. Required Input:
-
-i. An Alignment Score Matrix File:
-
-A score matrix file is required, because POA uses it to get the
-residue alphabet and indexing. Even if POA is not being used to
-perform multiple sequence alignment, this file must be provided. Any
-basic alignment matrix which may be used with BLAST may be used here.
-This file must be the first command line argument without a flag in
-order to be interpreted by POA as the score matrix file. Two example
-score matrix files, blosum80.mat and blosum80_trunc.mat, are provided
-in this directory. They includes scores for matching nucleotides, as
-well as amino acids. Header lines may be used to specify gap
-parameters, as in the examples:
-
-GAP-PENALTIES=A B C
-GAP-TRUNCATION-LENGTH=T
-GAP-DECAY-LENGTH=D
-
-means that the gap opening penalty is A; the gap extension penalty is
-B until the gap length reaches T; the gap extension penalty decreases
-linearly from B to C for gap lengths between T and T+D; and the gap
-extension penalty is C for all longer gaps. An additional line,
-"GAP-PENALTIES-X=Ax Bx Cx", can be inserted after the GAP-PENALTIES
-line to specify the opening and extension penalties for gaps in the
-first sequence relative to the second sequence in an alignment, i.e.,
-for asymmetric gap scoring. Use the -v flag to see what gap penalties
-POA is using for a given run.
-
-NOTE: POA is case-sensitive.
-In order to distinguish amino acid residues from nucleic acid
-residues, POA is case sensitive. Residues that are uppercase are
-interpreted as amino acids, while residues that are lowercase are
-interpreted as nucleotides. POA can handle mixed score matrices
-containing both amino acid and nucleotide scores, as long as the
-column and row labels of the matrix are case-sensitive. The
-blosum80.mat file is an example.
-
-ii. A FASTA file, or MSA Files in PO, CLUSTAL or PIR Format
-A FASTA file is required only if POA is being used to construct a
-new PO-MSA from a list of sequences, or to align a list of sequences to
-an already existing PO-MSA (see Analyzing a PO-MSA below). This FASTA file
-should contain sequences to be aligned by POA. The command line argument
-to get POA to accept a FASTA file as input is '-read_fasta FILENAME'. POA
-will interpret FILENAME as the FASTA sequence file. An example file,
-multidom.seq, is provided in this directory.
-
-POA is case-sensitive (see NOTE above). All residues in the FASTA
-file must be uppercase to be interpreted as amino acids by POA, or
-lowercase to be interpreted as nucleotides. To switch the case of all
-of the letters in the FASTA file to uppercase, use the '-toupper'
-command line argument. To switch the case of all the letters in the
-FASTA file to lowercase, use the '-tolower' command line argument.
-
-POA will also read in a set of MSA files to be aligned (see below).
-
-
-2. MSA Construction Options:
-
-i. Global vs. Local Alignment
-
-POA will build alignments using local or global alignment. The
-default is set to local aligment. To call global alignment, use the
-'-do_global' flag.
-
-ii. Iterative vs. Progressive Alignment
-
-POA will build the alignment iteratively, aligning sequences and MSAs
-in the order they are provided. It will also align sequences and MSAs
-in the order dictated by a guide tree built from a matrix of pairwise
-similarity scores. To call progressive alignment, use the
-'-do_progressive' flag.
-
-To provide POA with a set of pairwise similarity scores, use the
-'-read_pairscores' flag followed by the name of the text file
-containing the list of pairwise similarity scores. This file should
-be a tab-delimited file, where each row contains two sequence names
-followed by the pairwise similarity score. The included file
-"multidom.pscore" shows an example.
-
-Example row in pairscore file:
-ABL1_HUMAN MATK_HUMAN 260.0
-
-To quickly compute a set of pairwise similarity scores, run BLAST, and
-set the similarity scores to the set of BLAST bitscores. A simple
-BLAST driver/parser is provided as "make_pscores.pl"; you may need to
-modify this script for your particular configuration. If the
-'-do_progressive' flag is specified without a corresponding pairscore
-file, POA will compute pairwise similarity scores itself, by
-performing all pairwise alignments. This is very slow, and we do not
-recommend it.
-
-iii. Aggressive Fusion:
-
-By default, during the building up of a PO-MSA, if a node i with label
-'A' is aligned to a node j with label 'B' that belongs to an align
-ring containing a third node k with label 'A', POA simply adds node i
-to the j-k align ring. It is possible to force POA to do aggressive
-fusion, so that node i is instead fused to node k. Use the '-fuse_all'
-flag to accomplish this.
-
-
-3. MSA Output Formats:
-
-POA can output a PO-MSA in several formats simultaneously, including
-CLUSTAL, PIR, and PO. The PO format is the best format since it
-contains all of the information in the PO-MSA. The other formats
-accurately represent the MSA, but since they are RC-MSA formats, they
-may lose some of the information in the full PO-MSA.
-
-i. CLUSTAL format:
-This format is the standard CLUSTAL format. The command line argument
-to get the MSA output in this format is '-clustal FILENAME'.
-
-ii. PIR format:
-This format is the standard PIR format, which is like FASTA with a '.'
-character representing gaps. The command line argument to get the MSA
-output in this format is '-pir FILENAME'.
-
-iii. PO format:
-This format is the standard PO format. It is described below in the
-section PO format. The command line argument to get the MSA output in
-this format is '-po FILENAME'.
-
-EXAMPLE: Constructing an MSA of Four Protein Sequences
-
-Running POA with the following statement will take the FASTA-formatted
-sequences in the multidom.seq file, construct a PO-MSA using the
-scoring matrix in the file blosum80.mat, and then output the PO-MSA in
-CLUSTAL format to the file multidom.aln.
-
-poa -read_fasta multidom.seq -clustal multidom.aln blosum80.mat
-
-
-4. Other Output:
-
-i. Score Matrix
-
-POA will also print to stdout the score matrix stored in the '.mat'
-file. The command line argument to get POA to do this is
-'-printmatrix LETTERSET', where LETTERSET is a string of letters to be
-printed with the score matrix. For example, if the score matrix is
-designed for protein alignment the letter set might be
-'ARNDCQEGHILKMFPSTWYV'.
-
-ii. Verbose Mode
-
-POA will run in verbose mode, printing additional information
-generated during the run (such as the set of gap scores used) to
-stdout. The command line argument for verbose mode is '-v'.
-
-
-B. Analyzing a PO-MSA
------------------------
-
-POA can also take as input an MSA in PO, CLUSTAL or PIR file format
-and rebuild the PO-MSA data structure. Once this data structure has
-been rebuilt, it may be analyzed for features. In "liblpo.a", the
-linkable POA library, we have included the functions necessary to do
-heaviest bundling and thereby find consensus sequences in the PO-MSA
-(the details of the heaviest bundling algorithm are described
-elsewhere). POA has been written so that users may create their own
-functions for analyzing a PO-MSA. We have not included in the
-"liblpo.a" library the functions that we wrote to analyze PO-MSAs
-constructed with ESTs and genome sequence to find snps and alternative
-splice sites. However, it is possible to design modular library
-functions that will look for highly specific biological features in
-any PO-MSA data structure.
-
-1. Required Input:
-
-Before the PO-MSA data structure can be analyzed it must be built. It can
-be built iteratively or using a guide tree, or converted from another file
-type. POA can align a set of FASTA-formatted sequences to each other or
-to an existing PO-MSA. It can align two PO-MSAs. It can also align an
-arbitrary set of PO-MSAs.
-
-Note: POA Requires Either An MSA File or a FASTA File
-If neither type of file is read in by POA it will terminate early, since it
-has not received any sequence data.
-
-i. An MSA file in PO, CLUSTAL or PIR format:
-
-POA will read in an MSA file in PO, CLUSTAL or PIR format. The
-command line argument to get poa to read in an MSA file and rebuild
-the PO-MSA data structure is '-read_msa FILENAME'. POA automatically
-determines whether the MSA file is in PO, CLUSTAL, or PIR format. POA
-will read in a second MSA file when the '-read_msa2 FILENAME' flag is
-used. POA will read in a set of MSA files using the '-read_msa_list
-FILENAME' flag. The file should contain a list of names of MSA files.
-
-It is possible to filter the PO-MSA data structure as it is being
-rebuilt. In order to filter the PO-MSA in the MSA file to include
-only a subset of sequences use the command line argument '-subset
-FILENAME', where the file named FILENAME contains the list of sequence
-names to be included in the new PO-MSA. In order to filter the PO-MSA
-in the MSA file to exclude a subset of sequences, use the command line
-argument '-remove FILENAME', where the file named FILENAME contains
-the list of sequences to be excluded from the new PO-MSA. The names
-of sequences to be included or excluded should be in the format
-"SOURCENAME=*", as they are in a PO file. Lists of sequence source
-names can be created by using the unix grep utility on the PO file.
-Each line in the list of sequences to be filtered should read,
-"SOURCENAME=" followed by the name of the sequence,
-e.g. "SOURCENAME=ABL1_HUMAN". To filter the second PO-MSA read in
-using the '-read_msa2 FILENAME', use the '-subset2' and '-remove2'
-flags.
-
-ii. A FASTA File:
-
-The FASTA file should contain sequences to be aligned by POA. The
-command line argument to get POA to accept a FASTA file as input is
-'-read_fasta FILENAME'. POA will interpret FILENAME as the FASTA
-sequence file. An example file, "multidom.seq", is provided in this
-directory. (See note above on case-sensitivity).
-
-NOTE: POA Can Take Both An MSA File And A FASTA File As Input
-If both the '-read_msa FILENAME' argument and the '-read_fasta FILENAME'
-argument are given to POA on the command line, then POA will first rebuild the
-PO-MSA in the MSA file, and then it will align the sequences in the FASTA file
-to this PO-MSA. Similarly, if both the '-read_msa_list FILENAME' flag
-and the '-read_fasta FILENAME' flag are given to POA, then POA will
-rebuild all of the PO-MSAs and will align them to each other and to the
-sequences in the FASTA file.
-
-
-2. Additional PO Utilities:
-
-i. Consensus Generation Via Heaviest Bundling Algorithm:
-The heaviest bundling algorithm finds consensus sequences in the
-PO-MSA. The command line argument for heaviest bundling is '-hb'. This
-function adds the new consensus sequences to the PO-MSA by storing new
-consensus sequence indices on the in the PO-MSA nodes corresponding to
-the consensus sequence paths. The sequence source names for consensus
-sequences generated by heaviest bundling are CONSENS'i' where 'i' is the
-index of the bundle corresponding to the consensus sequence.
-
-The heaviest bundling algorithm can also take as input a bundling
-threshold value. The command line argument for setting a bundling
-threshold value for heaviest bundling is '-hbmin VALUE'. This threshold
-is used during the process of associating sequences with bundles. If a
-sequence has a percentage of nodes shared with bundle 'i' greater than this
-threshold value, it is associated with bundle 'i'. Iterative heaviest
-bundling can also be affected by the bundling threshold. A detailed
-description of heaviest bundling and heaviest bundling thresholds is given
-elsewhere. The consensus sequences corresponding to bundles generated
-by heaviest bundling are listed in the sequence source list. Additionally,
-in the SOURCEINFO line for each sequence the index of the bundle to which
-that sequence belongs is give. Finally, using the command line argument
-'-best' restricts the MSA output to the consensus sequences generated
-by heaviest bundling (NB: this applies to PIR output only).
-
-
-III. PO FILE FORMAT
-
-****************************HEADER****************************************
-VERSION= ~Current version of POA,e.g. LPO.1.0~
-NAME= ~Name of PO-MSA. Defaults to name of 1st sequence in PO-MSA~
-TITLE= ~Title of PO-MSA. Defaults to title of 1st sequence in PO-MSA~
-LENGTH= ~Number of nodes in PO-MSA~
-SOURCECOUNT= ~Number of sequences in PO-MSA~
-
-*********************SEQUENCE SOURCE LIST*********************************
-
-/* For each sequence in the PO-MSA: */
-SOURCENAME= ~Name of sequence taken from FASTA sequence header~
-SOURCEINFO= ~Number of nodes in sequence~
- ~Index of first node containing sequence~
- ~Sequence weight~
- ~Index of bundle containing sequence~
- ~Title of sequence taken from FASTA sequence header~
-
-/* Example: */
-SOURCENAME=GRB2_HUMAN
-SOURCEINFO=217 10 0 3 GROWTH FACTOR RECEPTOR-BOUND PROTEIN 2 (GRB2 ADAPTOR PROTEIN)(SH2)
-
-********************PO-MSA DATA STRUCTURE*********************************
-
-/* For each node in the PO-MSA: */
-~Residue label~:~'L' delimited index list of other nodes with edges into node~
- ~'S' delimited index list of sequences stored in each node~
- ~'A' index of next node in same align ring~
- NB: align ring indices must form a cycle.
- e.g. if two nodes 121 and 122 are aligned, then
- the line for node 121 indicates "A122", and
- the line for node 122 indicates "A121".
-
-/* Example: */
-F:L156L155L22S2S3S7A158
-
-********************END***************************************************
-
-
-For more information, see http://www.bioinformatics.ucla.edu/poa.
Deleted: trunk/packages/poa/trunk/align_lpo.c
===================================================================
--- trunk/packages/poa/trunk/align_lpo.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/align_lpo.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,310 +0,0 @@
-
-
-#include "default.h"
-#include "poa.h"
-#include "seq_util.h"
-#include "lpo.h"
-
-
-
-typedef struct {
- unsigned char x:6;
- unsigned char y:1;
- unsigned char is_aligned:1;
-} LPOMove_T;
-
-
-typedef unsigned char LPOGapLength_T;
-
-
-
-
-
-void trace_back_lpo_alignment(int len_x,LPOLetter_T seq_x[],
- int len_y,LPOLetter_T seq_y[],
- LPOMove_T **move,
- LPOLetterRef_T best_x,
- LPOLetterRef_T best_y,
- LPOLetterRef_T **x_to_y,
- LPOLetterRef_T **y_to_x)
-{
- int i;
- LPOLetterRef_T new_x,*x_al=NULL,*y_al=NULL;
- LPOLetterLink_T *left;
-
-
- CALLOC(x_al,len_x,LPOLetterRef_T);
- CALLOC(y_al,len_y,LPOLetterRef_T);
- LOOP (i,len_x) x_al[i]= INVALID_LETTER_POSITION;
- LOOP (i,len_y) y_al[i]= INVALID_LETTER_POSITION;
-
- while (best_x>=0 && best_y>=0) {
- if (move[best_x][best_y].is_aligned) {/* ALIGNED! MAP best_x <--> best_y */
- x_al[best_x]=best_y;
- y_al[best_y]=best_x;
- }
-
- if (0==move[best_x][best_y].x /* HIT START OF THE ALIGNMENT, SO QUIT */
- && 0==move[best_x][best_y].y)
- break;
-
- if ((i=move[best_x][best_y].x)>0) { /* TRACE BACK ON X */
- for (left= &seq_x[best_x].left;i-- >0;left=left->more) /* USE iTH MOVE*/
- new_x = left->ipos;
- }
- else /* NO MOVE ON X */
- new_x=best_x;
-
- if (move[best_x][best_y].y>0) /* ASSUMING seq_y A LINEAR SEQUENCE*/
- best_y=SEQ_Y_LEFT(best_y); /* TRACE BACK ON Y */
- best_x=new_x;
- }
-
-
- if (x_to_y) /* HAND BACK ALIGNMENT RECIPROCAL MAPPINGS */
- *x_to_y = x_al;
- else
- free(x_al);
- if (y_to_x)
- *y_to_x = y_al;
- else
- free(y_al);
- return;
-}
-
-
-
-
-
-/* FOR CONTROLLING THE FREEING OF score[] ARRAYS */
-typedef struct {
- LPOLetterRef_T last_right; /* LAST POSITION WHICH REFERENCES THIS POSITION*/
- LPOLetterRef_T my_pos; /* INDEX OF THIS POSITION */
-} LastRightList_T;
-
-
-/* SORT IN ASCENDING ORDER BY last_right */
-int last_right_qsort_cmp(const void *void_a,const void *void_b)
-{
- const LastRightList_T *a=(const LastRightList_T *)void_a,
- *b=(const LastRightList_T *)void_b;
- if (a->last_right < b->last_right)
- return -1;
- if (a->last_right == b->last_right)
- return 0;
- else
- return 1;
-}
-
-
-
-
-LastRightList_T *last_right_list(int len,LPOLetter_T seq[])
-{
- int i;
- LastRightList_T *list=NULL;
- LPOLetterLink_T *right;
-
- CALLOC(list,len,LastRightList_T);
- LOOP (i,len) {
- list[i].last_right=list[i].my_pos=i; /* DEFAULT: NOTHING RIGHT OF THIS*/
- for (right= &seq[i].right;right && right->ipos>=0;right=right->more)
- if (right->ipos>list[i].last_right)
- list[i].last_right=right->ipos;
- }
- qsort(list,len,sizeof(LastRightList_T),last_right_qsort_cmp);
- return list;
-}
-
-
-
-
-
-typedef struct {
- LPOScore_T *score;
- LPOGapLength_T *gap_length;
-} LPORowFreeList_T;
-
-
-void free_row_list(int nfree_list,LPORowFreeList_T free_list[])
-{
-/* fprintf(stderr,"Maximum #rows allocated was %d\n\n",nfree_list);*/
- while (nfree_list-- >0) { /* DUMP EVERYTHING STORED ON THE FREE LIST */
- free(free_list[nfree_list].score);
- free(free_list[nfree_list].gap_length);
- }
-}
-
-
-
-
-
-/** performs partial order alignment:
- seq_x[] may be a partial order;
- seq_y[] is assumed to be a linear order (regular sequence);
- returns the alignment in x_to_y[] and y_to_x, and also
- returns the alignment score as the return value */
-LPOScore_T align_lpo (LPOSequence_T *lposeq_x,
- LPOSequence_T *lposeq_y,
- ResidueScoreMatrix_T *m,
- LPOLetterRef_T **x_to_y,
- LPOLetterRef_T **y_to_x,
- int use_global_alignment)
-{
- int len_x = lposeq_x->length;
- int len_y = lposeq_y->length;
- LPOLetter_T *seq_x = lposeq_x->letter;
- LPOLetter_T *seq_y = lposeq_y->letter;
-
- int i,j,j_left,best_x,best_y,nfree_list=0,ilast_right=0;
- LPOScore_T **score=NULL,*my_score,*my_matrix;
- LPOMove_T **move=NULL,*move_base=NULL,*my_move;
- LPOGapLength_T **gap_length=NULL,*my_gap_length;
- LPOLetterLink_T *left,*my_left;
- int new_gap_len,insert_x,previous_x,previous_y,
- i_x,new_score,new_x,new_y,current_gap_length;
- LPOScore_T match_score,previous_score,
- insert_x_try,insert_x_score,insert_y_score,best_score= -999999;
- LPORowFreeList_T *free_list=NULL;
- LastRightList_T *last_right=NULL;
-
- int max_gap_length;
- LPOScore_T *gap_penalty_x, *gap_penalty_y;
-
- max_gap_length = m->max_gap_length;
- gap_penalty_x = m->gap_penalty_x;
- gap_penalty_y = m->gap_penalty_y;
-
- CALLOC(score,len_x,LPOScore_T *); /* ALLOCATE MATRIX STORAGE: ROW POINTERS */
- CALLOC(move,len_x,LPOMove_T *);
- CALLOC(gap_length,len_x,LPOGapLength_T *);
- CALLOC(free_list,len_x,LPORowFreeList_T);
- CALLOC(move_base,len_x*(len_y+1),LPOMove_T); /*ALLOCATE MATRIX RECTANGLE */
- last_right=last_right_list(len_x,seq_x); /* GET SORTED LIST TO CONTROL FREE*/
-
- LOOPF (i,len_x) {/* BUILD UP DP MATRIX, ROW BY ROW */
- if (nfree_list>0) { /* TAKE THE NEW ROW FROM THE FREE LIST */
- nfree_list--; /* MOVE BACK TO LAST FREE LIST ENTRY */
- score[i]=free_list[nfree_list].score+1; /* LEAVE SPACE FOR [-1] ENTRY */
- gap_length[i]=free_list[nfree_list].gap_length+1;
- }
- else { /* NEED TO ALLOCATE A NEW ROW */
- CALLOC(score[i],len_y+1,LPOScore_T);
- score[i]++; /* LEAVE SPACE FOR [-1] ENTRY */
- CALLOC(gap_length[i],len_y+1,LPOGapLength_T);
- gap_length[i]++; /* LEAVE SPACE FOR [-1] ENTRY */
- }
- move[i]=move_base+i*(len_y+1)+1; /* LEAVE SPACE FOR [-1] ENTRY */
- my_move=move[i]; /* USED TO SPEED UP MATRIX ACCESS INSIDE INNER LOOP*/
- my_score=score[i];
- my_gap_length=gap_length[i];
- score[i][-1]= -999999; /* UNACCEPTABLE SCORE ENSURES -1 NEVER CHOSEN*/
- my_matrix=m->score[seq_x[i].letter];
- if (seq_x[i].left.ipos>=0) /* AT LEAST ONE VALID POSITION TO THE LEFT */
- my_left= &seq_x[i].left;
- else /* THERE IS NO POSITION TO THE LEFT */
- my_left=NULL;
- LOOPF (j,len_y) {
- j_left=SEQ_Y_LEFT(j); /* POSITION TO THE LEFT OF j */
- previous_score=previous_x=previous_y=0;
- i_x=1;
- insert_x_score= -999999;
- for (left=my_left;left;left=left->more) {
- if (move[left->ipos][j].x>0) /* COULD BE [X,0] GAP CONTINUATION */
- current_gap_length=gap_length[left->ipos][j];
- else/*NOT AN EXTENSION OF A [X,0] GAP, SO TREAT AS START OF NEW GAP*/
- current_gap_length=0;
- insert_x_try=score[left->ipos][j] /* FIND BEST [X,0] MOVE */
- + left->score /* INCLUDE WEIGHTING FROM THIS EDGE */
- - gap_penalty_x[current_gap_length];
- if (insert_x_try>insert_x_score) { /* IF BEST insert_x MOVE, SAVE*/
- insert_x=i_x;
- insert_x_score=insert_x_try;
- new_gap_len=current_gap_length+1;
- if (new_gap_len>max_gap_length) /* PREVENT OVERFLOW */
- new_gap_len=max_gap_length;
- }
-
- /* FIND BEST [X,1] MOVE */
- if (score[left->ipos][j_left]+left->score>previous_score) {
- previous_score=score[left->ipos][j_left]+left->score;
- previous_x=i_x;
- previous_y=1; /* ASSUMING seq_y JUST LINEAR SEQUENCE */
- }
- i_x++; /* ADVANCE X MOVE INDEX */
- } /* DONE SCANNING PREDECESSORS ON X */
-
- match_score = previous_score /* TAKE BEST PREDECESSOR */
- + my_matrix[seq_y[j].letter];
-
- if (match_score>insert_x_score) { /* PREFER [X,1] MOVE */
- new_score=match_score;
- new_x=previous_x;
- new_y=previous_y;
- new_gap_len=0;
- }
- else { /* PREFER [X,0] MOVE */
- new_score=insert_x_score;
- new_x=insert_x;
- new_y=0;
- }
-
- /* [0,1] MOVE */
- if (my_move[j_left].y==1) /* COULD BE [0,1] GAP CONTINUATION */
- current_gap_length=my_gap_length[j_left];
- else /* NOT AN EXTENSION OF A [0,1] GAP, SO TREAT AS START OF NEW GAP*/
- current_gap_length=0;
- insert_y_score=my_score[j_left]-gap_penalty_y[current_gap_length];
-
- if (insert_y_score<new_score) { /* [new_x,new_y] MOVE IS BEST */
- my_score[j]=new_score;
- my_move[j].x=new_x;
- my_move[j].y=new_y;
- my_gap_length[j]=new_gap_len;
- if (new_gap_len==0)
- my_move[j].is_aligned=1;
- }
- else { /* [0,1] MOVE IS BEST */
- my_score[j]=insert_y_score;
- my_move[j].x=0;
- my_move[j].y=1;
- my_gap_length[j]=current_gap_length+1;
- if (my_gap_length[j]>max_gap_length) /* PREVENT OVERFLOW */
- my_gap_length[j]=max_gap_length;
- }
- if (my_score[j]>best_score) { /* RECORD BEST MOVE */
- best_score=my_score[j];
- best_x=i;
- best_y=j;
- }
- }
- while (ilast_right<len_x && last_right[ilast_right].last_right<=i) {
- free_list[nfree_list].gap_length= /* PUSH ROW ONTO FREE LIST FOR REUSE*/
- gap_length[last_right[ilast_right].my_pos]-1;/*READJUST TO BASE ADDR!*/
- free_list[nfree_list].score=score[last_right[ilast_right].my_pos]-1;
- score[last_right[ilast_right].my_pos]=NULL; /*PREVENT DANGLING POINTER!*/
- gap_length[last_right[ilast_right].my_pos]=NULL;
- nfree_list++; /* INCREMENT FREE LIST SIZE */
- ilast_right++; /* MOVE TO THE NEXT POTENTIAL ROW FOR FREEING */
- }
- } /* DYNAMIC PROGRAMING MATRIX COMPLETE, NOW TRACE BACK FROM best_x,best_y*/
-
- IF_GUARD(best_x>=len_x || best_y>=len_y,1.1,(ERRTXT,"Bounds exceeded!\nbest_x,best_y:%d,%d\tlen:%d,%d\n",best_x,best_y,len_x,len_y),CRASH);
- trace_back_lpo_alignment(len_x,seq_x,len_y,seq_y,move,best_x,best_y,
- x_to_y,y_to_x);
-
-
- free_row_list(nfree_list,free_list);
- FREE(move_base); /* DUMP ALLOCATED MATRIX */
- FREE(score);
- FREE(move);
- FREE(gap_length);
- FREE(free_list);
- FREE(last_right);
- return best_score;
-}
-
-
-
-
-
Deleted: trunk/packages/poa/trunk/align_lpo2.c
===================================================================
--- trunk/packages/poa/trunk/align_lpo2.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/align_lpo2.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,371 +0,0 @@
-
-#include "default.h"
-#include "poa.h"
-#include "seq_util.h"
-#include "lpo.h"
-
-
-/** set nonzero for old scoring (gap-opening penalty for X-Y transition) */
-#define DOUBLE_GAP_SCORING (0)
-
-
-typedef struct {
- unsigned char x:7;
- unsigned char y:1;
-}
-DPMove_T;
-
-typedef struct {
- LPOScore_T score;
- short gap_x, gap_y;
-}
-DPScore_T;
-
-/** is node 'i' the first node in any sequence in lposeq? */
-static int is_initial_node (int i, LPOSequence_T *lposeq)
-{
- LPOLetterSource_T *src = &((lposeq->letter[i]).source);
- while (src != NULL && src->iseq >= 0) {
- if (src->ipos == 0) {
- return 1;
- }
- src = src->more;
- }
- return 0;
-}
-
-/** is node 'i' the last node in any sequence in lposeq? */
-static int is_final_node (int i, LPOSequence_T *lposeq)
-{
- LPOLetterSource_T *src = &((lposeq->letter[i]).source);
- while (src != NULL && src->iseq >= 0) {
- if (src->ipos == (lposeq->source_seq[src->iseq]).length - 1) {
- return 1;
- }
- src = src->more;
- }
- return 0;
-}
-
-
-static void get_seq_left_and_final (LPOSequence_T *lposeq_x,
- LPOLetterLink_T ***x_left_ptr,
- int **is_final_node_ptr)
-{
- int i, len_x = lposeq_x->length;
- LPOLetter_T *seq_x = lposeq_x->letter;
- int *is_final_node_x = NULL;
- LPOLetterLink_T **x_left = NULL;
-
- CALLOC (is_final_node_x, len_x, int);
-
- for (i=0; i<len_x; i++) {
- is_final_node_x[i] = is_final_node (i, lposeq_x);
- }
-
- CALLOC (x_left, len_x, LPOLetterLink_T *);
-
- for (i=0; i<len_x; i++) {
- if (is_initial_node (i, lposeq_x) && seq_x[i].left.ipos != -1) {
- CALLOC (x_left[i], 1, LPOLetterLink_T);
- x_left[i]->ipos = -1;
- x_left[i]->score = 0;
- x_left[i]->more = &seq_x[i].left;
- }
- else {
- x_left[i] = &seq_x[i].left;
- }
- }
-
- (*is_final_node_ptr) = is_final_node_x;
- (*x_left_ptr) = x_left;
-}
-
-
-static void trace_back_lpo_alignment (int len_x, int len_y,
- DPMove_T **move,
- LPOLetterLink_T **x_left,
- LPOLetterRef_T best_x, LPOLetterRef_T best_y,
- LPOLetterRef_T **x_to_y,
- LPOLetterRef_T **y_to_x)
-{
- int i, xmove, ymove;
- LPOLetterRef_T *x_al = NULL, *y_al = NULL;
- LPOLetterLink_T *left;
-
- CALLOC (x_al, len_x, LPOLetterRef_T);
- CALLOC (y_al, len_y, LPOLetterRef_T);
- LOOP (i,len_x) x_al[i] = INVALID_LETTER_POSITION;
- LOOP (i,len_y) y_al[i] = INVALID_LETTER_POSITION;
-
- while (best_x >= 0 && best_y >= 0) {
-
- xmove = move[best_y][best_x].x;
- ymove = move[best_y][best_x].y;
-
- if (xmove>0 && ymove>0) { /* ALIGNED! MAP best_x <--> best_y */
- x_al[best_x]=best_y;
- y_al[best_y]=best_x;
- }
-
- if (xmove == 0 && ymove == 0) { /* FIRST ALIGNED PAIR */
- x_al[best_x]=best_y;
- y_al[best_y]=best_x;
- break; /* FOUND START OF ALIGNED REGION, SO WE'RE DONE */
- }
-
- if (xmove>0) { /* TRACE BACK ON X */
- left = x_left[best_x];
- while ((--xmove)>0) {
- left = left->more;
- }
- best_x = left->ipos;
- }
-
- if (ymove>0) { /* TRACE BACK ON Y */
- best_y--;
- }
- }
-
- if (x_to_y) /* HAND BACK ALIGNMENT RECIPROCAL MAPPINGS */
- *x_to_y = x_al;
- else
- free(x_al);
- if (y_to_x)
- *y_to_x = y_al;
- else
- free(y_al);
-
- return;
-}
-
-
-/** performs partial order alignment:
- lposeq_x may be a partial order;
- lposeq_y is assumed to be a linear order (regular sequence);
- returns the alignment in x_to_y[] and y_to_x[], and also
- returns the alignment score as the return value.
-*/
-
-LPOScore_T align_lpo (LPOSequence_T *lposeq_x,
- LPOSequence_T *lposeq_y,
- ResidueScoreMatrix_T *m,
- LPOLetterRef_T **x_to_y,
- LPOLetterRef_T **y_to_x,
- int use_global_alignment)
-{
- int len_x = lposeq_x->length;
- int len_y = lposeq_y->length;
- LPOLetter_T *seq_x = lposeq_x->letter;
- LPOLetter_T *seq_y = lposeq_y->letter;
-
- int i, j, xcount, prev_gap, next_gap;
- int best_x = -2, best_y = -2;
- LPOScore_T best_score = -999999;
- int *is_final_node_x;
- LPOLetterLink_T **x_left = NULL, *xl;
- DPMove_T **move = NULL, *my_move;
-
- DPScore_T *curr_score = NULL, *prev_score = NULL, *init_col_score = NULL, *my_score, *swap;
-
- int max_gap_length;
- LPOScore_T *gap_penalty_x, *gap_penalty_y;
- int *next_gap_array, *next_perp_gap_array;
-
- LPOScore_T try_score, insert_x_score, insert_y_score, match_score;
- int insert_x_x, insert_x_gap;
- int insert_y_y, insert_y_gap;
- int match_x, match_y;
-
- long n_edges = 0;
-
- max_gap_length = m->max_gap_length;
- gap_penalty_x = m->gap_penalty_x;
- gap_penalty_y = m->gap_penalty_y;
- CALLOC (next_gap_array, max_gap_length + 2, int);
- CALLOC (next_perp_gap_array, max_gap_length + 2, int);
-
- /* GAP LENGTH EXTENSION RULE: */
- /* 0->1, 1->2, 2->3, ..., M-1->M, M->M, M+1->M+1. */
- for (i=0; i<max_gap_length+1; i++) {
- next_gap_array[i] = (i<max_gap_length) ? i+1 : i;
- next_perp_gap_array[i] = (DOUBLE_GAP_SCORING ? 0 : next_gap_array[i]);
- }
- next_gap_array[max_gap_length+1] = max_gap_length+1;
- next_perp_gap_array[max_gap_length+1] = max_gap_length+1;
-
- get_seq_left_and_final (lposeq_x, &x_left, &is_final_node_x);
-
- CALLOC (move, len_y, DPMove_T *);
- for (i=0; i<len_y; i++) {
- CALLOC (move[i], len_x, DPMove_T);
- }
-
- CALLOC (init_col_score, len_y+1, DPScore_T);
- init_col_score = &(init_col_score[1]);
-
- CALLOC (curr_score, len_x+1, DPScore_T);
- curr_score = &(curr_score[1]);
- CALLOC (prev_score, len_x+1, DPScore_T);
- prev_score = &(prev_score[1]);
-
- /* FILL INITIAL ROW. */
- /* GLOBAL ALIGNMENT: no free gaps */
- /* LOCAL ALIGNMENT: free initial gap */
-
- curr_score[-1].score = 0;
- curr_score[-1].gap_x = curr_score[-1].gap_y =
- (use_global_alignment) ? 0 : TRUNCATE_GAP_LENGTH+1;
-
- for (i=0; i<len_x; i++) {
- for (xcount = 1, xl = x_left[i]; xl != NULL; xcount++, xl = xl->more) {
- prev_gap = curr_score[xl->ipos].gap_x;
- try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
- if (xcount == 1 || try_score > curr_score[i].score) {
- curr_score[i].score = try_score;
- curr_score[i].gap_x = next_gap_array[prev_gap];
- curr_score[i].gap_y = next_perp_gap_array[prev_gap];
- }
- }
- }
-
- /* FILL INITIAL COLUMN. */
-
- init_col_score[-1] = curr_score[-1];
- for (i=0; i<len_y; i++) {
- prev_gap = init_col_score[i-1].gap_y;
- init_col_score[i].score = init_col_score[i-1].score - gap_penalty_y[prev_gap];
- init_col_score[i].gap_x = next_perp_gap_array[prev_gap];
- init_col_score[i].gap_y = next_gap_array[prev_gap];
- }
-
-
- /** MAIN DYNAMIC PROGRAMMING LOOP **/
-
-
- /* OUTER LOOP (i-th position in linear seq y): */
- for (i=0; i<len_y; i++) {
-
- swap = prev_score; prev_score = curr_score; curr_score = swap;
-
- curr_score[-1] = init_col_score[i];
-
- /* INNER LOOP (j-th position in LPO x): */
- for (j=0; j<len_x; j++) {
-
- /* ONLY POSSIBLE Y-INSERTION: trace back to (i-1, j). */
- prev_gap = prev_score[j].gap_y;
- insert_y_score = prev_score[j].score - gap_penalty_y[prev_gap];
- insert_y_gap = prev_gap;
-
- /* ONLY ONE Y-PREDECESSOR */
- match_y = insert_y_y = 1;
-
- /* LOOP OVER x-predecessors: */
- for (xcount = 1, xl = x_left[j]; xl != NULL; xcount++, xl = xl->more) {
-
- /* IMPROVE XY-MATCH?: trace back to (i-1, j'=xl->ipos) */
- try_score = prev_score[xl->ipos].score + xl->score;
- if (xcount == 1 || try_score > match_score) {
- match_score = try_score;
- match_x = xcount;
- }
-
- /* IMPROVE X-INSERTION?: trace back to (i, j'=xl->ipos) */
- prev_gap = curr_score[xl->ipos].gap_x;
- try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
- if (xcount == 1 || try_score > insert_x_score) {
- insert_x_score = try_score;
- insert_x_x = xcount;
- insert_x_gap = prev_gap;
- }
- }
-
- if (0 == use_global_alignment && match_score <= 0) {
- match_score = 0;
- match_x = match_y = 0; /* FIRST ALIGNED PAIR */
- }
-
- n_edges += (xcount-1);
- match_score += m->score[(int)seq_x[j].letter][(int)seq_y[i].letter];
-
- my_score = &curr_score[j];
- my_move = &move[i][j];
-
- if (match_score > insert_y_score && match_score > insert_x_score) {
- /* XY-MATCH */
- my_score->score = match_score;
- my_score->gap_x = 0;
- my_score->gap_y = 0;
- my_move->x = match_x;
- my_move->y = match_y;
- }
- else if (insert_x_score > insert_y_score) {
- /* X-INSERTION */
- my_score->score = insert_x_score;
- my_score->gap_x = next_gap_array[insert_x_gap];
- my_score->gap_y = next_perp_gap_array[insert_x_gap];
- my_move->x = insert_x_x;
- my_move->y = 0;
- }
- else {
- /* Y-INSERTION */
- my_score->score = insert_y_score;
- my_score->gap_x = next_perp_gap_array[insert_y_gap];
- my_score->gap_y = next_gap_array[insert_y_gap];
- my_move->x = 0;
- my_move->y = insert_y_y;
- }
-
- /* RECORD BEST START FOR TRACEBACK */
- /* KEEPING ONLY FINAL-FINAL BESTS FOR GLOBAL ALIGNMENT */
- if (my_score->score >= best_score && (0 == use_global_alignment || (is_final_node_x[j] && i==len_y-1))) {
- if (my_score->score > best_score || (j == best_x && i < best_y) || j < best_x) {
- best_score = my_score->score;
- best_x = j;
- best_y = i;
- }
- }
- }
- }
-
- IF_GUARD(best_x>=len_x || best_y>=len_y,1.1,(ERRTXT,"Bounds exceeded!\nbest_x,best_y:%d,%d\tlen:%d,%d\n",best_x,best_y,len_x,len_y),CRASH);
-
- /*
- fprintf (stderr, "aligned (%d nodes, %ld edges) to (%d nodes)\n", len_x, n_edges/len_y, len_y);
- fprintf (stderr, "best score %d @ (%d %d)\n", best_score, best_x, best_y);
- */
-
- /* DYNAMIC PROGRAMING MATRIX COMPLETE, NOW TRACE BACK FROM best_x, best_y */
- trace_back_lpo_alignment (len_x, len_y, move, x_left,
- best_x, best_y,
- x_to_y, y_to_x);
-
-
- FREE (next_gap_array);
- FREE (next_perp_gap_array);
-
- prev_score = &(prev_score[-1]);
- FREE (prev_score);
- curr_score = &(curr_score[-1]);
- FREE (curr_score);
-
- init_col_score = &(init_col_score[-1]);
- FREE (init_col_score);
-
- FREE (is_final_node_x);
-
- for (i=0; i<len_x; i++) {
- if (x_left[i] != NULL && x_left[i] != &seq_x[i].left) {
- FREE (x_left[i]);
- }
- }
- FREE (x_left);
-
- for (i=0; i<len_y; i++) {
- FREE (move[i]);
- }
- FREE (move);
-
- return best_score;
-}
Deleted: trunk/packages/poa/trunk/align_lpo_po.c
===================================================================
--- trunk/packages/poa/trunk/align_lpo_po.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/align_lpo_po.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,254 +0,0 @@
-
-
-
-#include "default.h"
-#include "poa.h"
-#include "seq_util.h"
-#include "lpo.h"
-
-
-
-
-
-typedef struct {
- unsigned char x;
- unsigned char y:7;
- unsigned char is_aligned:1;
-} LPOMove_T;
-
-
-typedef unsigned char LPOGapLength_T;
-
-
-
-
-
-void trace_back_lpo_po_alignment(int len_x,LPOLetter_T seq_x[],
- int len_y,LPOLetter_T seq_y[],
- LPOMove_T **move,
- LPOLetterRef_T best_x,
- LPOLetterRef_T best_y,
- LPOLetterRef_T **x_to_y,
- LPOLetterRef_T **y_to_x)
-{
- int i;
- LPOLetterRef_T new_x,*x_al=NULL,*y_al=NULL;
- LPOLetterLink_T *left;
-
-
- CALLOC(x_al,len_x,LPOLetterRef_T);
- CALLOC(y_al,len_y,LPOLetterRef_T);
- LOOP (i,len_x) x_al[i]= INVALID_LETTER_POSITION;
- LOOP (i,len_y) y_al[i]= INVALID_LETTER_POSITION;
-
- while (best_x>=0 && best_y>=0) {
- if (move[best_x][best_y].is_aligned) {/* ALIGNED! MAP best_x <--> best_y */
- x_al[best_x]=best_y;
- y_al[best_y]=best_x;
- }
-
- if (0==move[best_x][best_y].x /* HIT START OF THE ALIGNMENT, SO QUIT */
- && 0==move[best_x][best_y].y)
- break;
-
- if ((i=move[best_x][best_y].x)>0) { /* TRACE BACK ON X */
- for (left= &seq_x[best_x].left;--i >0;left=left->more); /* USE iTH MOVE*/
- new_x = left->ipos;
- }
- else new_x=best_x;
- if ((i=move[best_x][best_y].y)>0) { /* TRACE BACK ON Y */
- for (left= &seq_y[best_y].left;--i >0;left=left->more); /* USE iTH MOVE*/
- best_y = left->ipos;
- }
- best_x=new_x;
- }
-
-
- if (x_to_y) /* HAND BACK ALIGNMENT RECIPROCAL MAPPINGS */
- *x_to_y = x_al;
- else
- free(x_al);
- if (y_to_x)
- *y_to_x = y_al;
- else
- free(y_al);
- return;
-}
-
-
-
-/** performs partial order alignment:
- seq_x[] may be a partial order;
- seq_y[] may be a partial order;
- returns the alignment in x_to_y[] and y_to_x, and also
- returns the alignment score as the return value */
-LPOScore_T align_lpo_po (LPOSequence_T *lposeq_x,
- LPOSequence_T *lposeq_y,
- ResidueScoreMatrix_T *m,
- LPOLetterRef_T **x_to_y,
- LPOLetterRef_T **y_to_x,
- LPOScore_T (*scoring_function)
- (int,int,LPOLetter_T [],LPOLetter_T [],ResidueScoreMatrix_T *),
- int use_global_alignment)
-{
- int len_x = lposeq_x->length;
- int len_y = lposeq_y->length;
- LPOLetter_T *seq_x = lposeq_x->letter;
- LPOLetter_T *seq_y = lposeq_y->letter;
-
- int i,j,j_left,best_x,best_y,nfree_list=0,ilast_right=0;
- LPOScore_T **score=NULL,*my_score,*my_matrix,*score_base=NULL;
- LPOMove_T **move=NULL,*move_base=NULL,*my_move;
- LPOGapLength_T **gap_length=NULL,*my_gap_length,*gap_length_base=NULL;
- LPOLetterLink_T *left,*my_left,*y_left;
- int new_gap_len,insert_x,previous_x,previous_y,
- i_x,new_score,new_x,new_y,current_gap_length,i_y,insert_y;
- LPOScore_T match_score,previous_score,
- insert_x_try,insert_x_score,insert_y_score,best_score= -999999;
-
- int max_gap_length;
- LPOScore_T *gap_penalty_x, *gap_penalty_y;
-
- max_gap_length = m->max_gap_length;
- gap_penalty_x = m->gap_penalty_x;
- gap_penalty_y = m->gap_penalty_y;
-
- CALLOC(score,len_x,LPOScore_T *); /* ALLOCATE MATRIX STORAGE: ROW POINTERS */
- CALLOC(move,len_x,LPOMove_T *);
- CALLOC(gap_length,len_x,LPOGapLength_T *);
- CALLOC(score_base,len_x*(len_y+1),LPOScore_T); /*ALLOCATE MATRIX RECTANGLE */
- CALLOC(move_base,len_x*(len_y+1),LPOMove_T); /*ALLOCATE MATRIX RECTANGLE */
- CALLOC(gap_length_base,len_x*(len_y+1),LPOGapLength_T); /*ALLOCATE MATRIX RECTANGLE */
-
-
- LOOPF (i,len_x) {/* BUILD UP DP MATRIX, ROW BY ROW */
- score[i]=score_base+i*(len_y+1)+1; /* LEAVE SPACE FOR [-1] ENTRY */
- move[i]=move_base+i*(len_y+1)+1; /* LEAVE SPACE FOR [-1] ENTRY */
- gap_length[i]=gap_length_base+i*(len_y+1)+1; /*LEAVE SPACE FOR [-1] ENTRY*/
- my_move=move[i]; /* USED TO SPEED UP MATRIX ACCESS INSIDE INNER LOOP*/
- my_score=score[i];
- my_gap_length=gap_length[i];
- score[i][-1]= -999999; /* UNACCEPTABLE SCORE ENSURES -1 NEVER CHOSEN*/
- /* my_matrix=m->score[seq_x[i].letter]; NOT USED */
- if (seq_x[i].left.ipos>=0) /* AT LEAST ONE VALID POSITION TO THE LEFT */
- my_left= &seq_x[i].left;
- else /* THERE IS NO POSITION TO THE LEFT */
- my_left=NULL;
- LOOPF (j,len_y) {
- j_left=SEQ_Y_LEFT(j); /* POSITION TO THE LEFT OF j */
- previous_score=previous_x=previous_y=0;
- i_x=1;
- insert_x_score= -999999;
- for (left=my_left;left;left=left->more) {
- if (move[left->ipos][j].x>0) /* COULD BE [X,0] GAP CONTINUATION */
- current_gap_length=gap_length[left->ipos][j];
- else/*NOT AN EXTENSION OF A [X,0] GAP, SO TREAT AS START OF NEW GAP*/
- current_gap_length=0;
- insert_x_try=score[left->ipos][j] /* FIND BEST [X,0] MOVE */
- + left->score /* INCLUDE WEIGHTING FROM THIS EDGE */
- - gap_penalty_x[current_gap_length];
- if (insert_x_try>insert_x_score) { /* IF BEST insert_x MOVE, SAVE*/
- insert_x=i_x;
- insert_x_score=insert_x_try;
- new_gap_len=current_gap_length+1;
- if (new_gap_len>max_gap_length) /* PREVENT OVERFLOW */
- new_gap_len=max_gap_length;
- }
-
- /* FIND BEST [X,Y] MOVE */
- if (seq_y[j].left.ipos>=0){/*AT LEAST ONE VALID POSITION TO THE LEFT*/
- i_y=1;
- for (y_left= &seq_y[j].left;y_left;y_left=y_left->more) {
- if (score[left->ipos][y_left->ipos] + left->score + y_left->score
- >previous_score) {
- previous_score=score[left->ipos][y_left->ipos]
- + left->score + y_left->score;
- previous_x=i_x;
- previous_y=i_y;
- }
- i_y++;
- }
- }
- i_x++; /* ADVANCE X MOVE INDEX */
- } /* DONE SCANNING PREDECESSORS ON X */
-
- match_score = previous_score /* TAKE BEST PREDECESSOR */
- + scoring_function(i,j,seq_x,seq_y,m);
-#ifdef USE_LOCAL_NEUTRALITY_CORRECTION /* NO LONGER USED */
- if (seq_x[i].score<seq_y[j].score) /* USE STRONGEST NEUTRALITY ADJ'T*/
- match_score += seq_x[i].score;
- else
- match_score += seq_y[j].score;
-#endif
- if (match_score>insert_x_score) { /* PREFER [X,Y] MOVE */
- new_score=match_score;
- new_x=previous_x;
- new_y=previous_y;
- new_gap_len=0;
- }
- else { /* PREFER [X,0] MOVE */
- new_score=insert_x_score;
- new_x=insert_x;
- new_y=0;
- }
-
- /* [0,Y] MOVE */
- insert_y_score= -999999;
- if (seq_y[j].left.ipos>=0){/*AT LEAST ONE VALID POSITION TO THE LEFT*/
- i_y=1;
- for (y_left= &seq_y[j].left;y_left;y_left=y_left->more) {
- if (my_move[y_left->ipos].y>0) /* COULD BE [0,1] GAP CONTINUATION */
- current_gap_length=my_gap_length[y_left->ipos];
- else/*NOT AN EXTENSION OF A [0,1] GAP, SO TREAT AS START OF NEW GAP*/
- current_gap_length=0;
- if (insert_y_score <
- my_score[y_left->ipos]-gap_penalty_y[current_gap_length]) {
- insert_y_score=my_score[y_left->ipos]-gap_penalty_y[current_gap_length];
- insert_y=i_y;
- }
- i_y++;
- }
- }
-
- if (insert_y_score<new_score) { /* [new_x,new_y] MOVE IS BEST */
- my_score[j]=new_score;
- my_move[j].x=new_x;
- my_move[j].y=new_y;
- my_gap_length[j]=new_gap_len;
- if (new_gap_len==0)
- my_move[j].is_aligned=1;
- }
- else { /* [0,Y] MOVE IS BEST */
- my_score[j]=insert_y_score;
- my_move[j].x=0;
- my_move[j].y=insert_y;
- my_gap_length[j]=current_gap_length+1;
- if (my_gap_length[j]>max_gap_length) /* PREVENT OVERFLOW */
- my_gap_length[j]=max_gap_length;
- }
- if (my_score[j]>best_score) { /* RECORD BEST MOVE */
- best_score=my_score[j];
- best_x=i;
- best_y=j;
- }
- }
- } /* DYNAMIC PROGRAMING MATRIX COMPLETE, NOW TRACE BACK FROM best_x,best_y*/
-
- IF_GUARD(best_x>=len_x || best_y>=len_y,1.1,(ERRTXT,"Bounds exceeded!\nbest_x,best_y:%d,%d\tlen:%d,%d\n",best_x,best_y,len_x,len_y),CRASH);
- trace_back_lpo_po_alignment(len_x,seq_x,len_y,seq_y,move,best_x,best_y,
- x_to_y,y_to_x);
-
-
- FREE(score_base); /* FREE MEMORY */
- FREE(move_base);
- FREE(gap_length_base);
- FREE(score);
- FREE(move);
- FREE(gap_length);
- return best_score;
-}
-
-
-
-
-
Deleted: trunk/packages/poa/trunk/align_lpo_po2.c
===================================================================
--- trunk/packages/poa/trunk/align_lpo_po2.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/align_lpo_po2.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,487 +0,0 @@
-
-#include "default.h"
-#include "poa.h"
-#include "seq_util.h"
-#include "lpo.h"
-
-
-/** set nonzero for old scoring (gap-opening penalty for X-Y transition) */
-#define DOUBLE_GAP_SCORING (0)
-
-
-typedef struct {
- unsigned char x;
- unsigned char y;
-}
-DPMove_T;
-
-typedef struct {
- LPOScore_T score;
- short gap_x, gap_y;
-}
-DPScore_T;
-
-
-
-#define LPO_INITIAL_NODE 1
-#define LPO_FINAL_NODE 2
-
-static void get_lpo_stats (LPOSequence_T *lposeq,
- int *n_nodes_ptr, int *n_edges_ptr, int **node_type_ptr,
- int **refs_from_right_ptr, int *max_rows_alloced_ptr,
- LPOLetterLink_T ***left_links_ptr)
-{
- int i, j, rows_alloced = 0, max_rows_alloced = 0, n_edges = 0, len = lposeq->length;
- LPOLetter_T *seq = lposeq->letter;
- int *node_type;
- int *refs_from_right, *tmp;
- LPOLetterSource_T *src;
- LPOLetterLink_T *lnk, **left_links;
-
- CALLOC (node_type, len, int);
- CALLOC (refs_from_right, len, int);
- CALLOC (tmp, len, int);
- CALLOC (left_links, len, LPOLetterLink_T *);
-
- for (i=0; i<len; i++) {
-
- /* NODES CONTAINING THE FIRST RESIDUE IN ANY SEQ ARE 'INITIAL'; */
- /* DITTO, LAST RESIDUE IN ANY SEQ, 'FINAL'. */
- for (src = &(seq[i].source); src != NULL && src->iseq >= 0; src = src->more) {
- if (src->ipos == 0) {
- node_type[i] = (node_type[i] | LPO_INITIAL_NODE);
- }
- if (src->ipos == (lposeq->source_seq[src->iseq]).length - 1) {
- node_type[i] = (node_type[i] | LPO_FINAL_NODE);
- }
- }
-
- /* COUNTING THE LEFT-LINKS BACK TO EACH NODE ALLOWS FOR EFFICIENT */
- /* MEMORY MANAGEMENT OF 'SCORE' ROWS (in align_lpo_po). */
- for (lnk = &(seq[i].left); lnk != NULL && lnk->ipos >= 0; lnk = lnk->more) {
- refs_from_right[lnk->ipos]++;
- n_edges++;
- }
- }
-
- /* ALL 'INITIAL' NODES (1st in some seq) MUST BE LEFT-LINKED TO -1. */
- /* THIS ALLOWS FREE ALIGNMENT TO ANY 'BRANCH' IN GLOBAL ALIGNMENT. */
- for (i=0; i<len; i++) {
- if ((node_type[i] & LPO_INITIAL_NODE) && seq[i].left.ipos != -1) {
- CALLOC (left_links[i], 1, LPOLetterLink_T);
- left_links[i]->ipos = -1;
- left_links[i]->score = 0;
- left_links[i]->more = &seq[i].left;
- }
- else {
- left_links[i] = &seq[i].left;
- }
- }
-
- for (i=0; i<len; i++) {
- tmp[i] = refs_from_right[i];
- }
-
- for (i=0; i<len; i++) {
- rows_alloced++;
- if (rows_alloced > max_rows_alloced) {
- max_rows_alloced = rows_alloced;
- }
- for (lnk = &(seq[i].left); lnk != NULL && lnk->ipos >= 0; lnk = lnk->more) {
- if ((--tmp[lnk->ipos]) == 0) {
- rows_alloced--;
- }
- }
- }
-
- FREE (tmp);
-
- (*n_nodes_ptr) = len;
- (*n_edges_ptr) = n_edges;
- (*node_type_ptr) = node_type;
- (*refs_from_right_ptr) = refs_from_right;
- (*max_rows_alloced_ptr) = max_rows_alloced;
- (*left_links_ptr) = left_links;
-}
-
-
-static void trace_back_lpo_alignment (int len_x, int len_y,
- DPMove_T **move,
- LPOLetterLink_T **x_left,
- LPOLetterLink_T **y_left,
- LPOLetterRef_T best_x, LPOLetterRef_T best_y,
- LPOLetterRef_T **x_to_y,
- LPOLetterRef_T **y_to_x)
-{
- int i, xmove, ymove;
- LPOLetterRef_T *x_al = NULL, *y_al = NULL;
- LPOLetterLink_T *left;
-
- CALLOC (x_al, len_x, LPOLetterRef_T);
- CALLOC (y_al, len_y, LPOLetterRef_T);
- LOOP (i,len_x) x_al[i] = INVALID_LETTER_POSITION;
- LOOP (i,len_y) y_al[i] = INVALID_LETTER_POSITION;
-
- while (best_x >= 0 && best_y >= 0) {
-
- xmove = move[best_y][best_x].x;
- ymove = move[best_y][best_x].y;
-
- if (xmove>0 && ymove>0) { /* ALIGNED! MAP best_x <--> best_y */
- x_al[best_x]=best_y;
- y_al[best_y]=best_x;
- }
-
- if (xmove == 0 && ymove == 0) { /* FIRST ALIGNED PAIR */
- x_al[best_x]=best_y;
- y_al[best_y]=best_x;
- break; /* FOUND START OF ALIGNED REGION, SO WE'RE DONE */
- }
-
- if (xmove>0) { /* TRACE BACK ON X */
- left = x_left[best_x];
- while ((--xmove)>0) {
- left = left->more;
- }
- best_x = left->ipos;
- }
-
- if (ymove>0) { /* TRACE BACK ON Y */
- left = y_left[best_y];
- while ((--ymove)>0) {
- left = left->more;
- }
- best_y = left->ipos;
- }
- }
-
- if (x_to_y) /* HAND BACK ALIGNMENT RECIPROCAL MAPPINGS */
- *x_to_y = x_al;
- else
- free(x_al);
- if (y_to_x)
- *y_to_x = y_al;
- else
- free(y_al);
-
- return;
-}
-
-
-/** (align_lpo_po:)
- performs partial order alignment:
- lposeq_x and lposeq_y are partial orders;
- returns the alignment in x_to_y[] and y_to_x[], and also
- returns the alignment score as the return value.
-*/
-
-LPOScore_T align_lpo_po (LPOSequence_T *lposeq_x,
- LPOSequence_T *lposeq_y,
- ResidueScoreMatrix_T *m,
- LPOLetterRef_T **x_to_y,
- LPOLetterRef_T **y_to_x,
- LPOScore_T (*scoring_function)
- (int, int, LPOLetter_T *, LPOLetter_T *, ResidueScoreMatrix_T *),
- int use_global_alignment)
-{
- LPOLetter_T *seq_x = lposeq_x->letter;
- LPOLetter_T *seq_y = lposeq_y->letter;
-
- int len_x, len_y;
- int n_edges_x, n_edges_y;
- int *node_type_x, *node_type_y;
- int *refs_from_right_x, *refs_from_right_y;
- int max_rows_alloced_x, max_rows_alloced_y, n_score_rows_alloced = 0;
-
- int i, j, xcount, ycount, prev_gap;
- int best_x = -1, best_y = -1;
- LPOScore_T min_score = -999999, best_score = -999999;
- int possible_end_square;
- LPOLetterLink_T **x_left = NULL, **y_left = NULL, *xl, *yl;
- DPMove_T **move = NULL, *my_move;
-
- DPScore_T *curr_score = NULL, *prev_score = NULL, *init_col_score = NULL, *my_score;
- DPScore_T **score_rows = NULL;
-
- int max_gap_length;
- LPOScore_T *gap_penalty_x, *gap_penalty_y;
- int *next_gap_array, *next_perp_gap_array;
-
- LPOScore_T try_score, insert_x_score, insert_y_score, match_score;
- int insert_x_x, insert_x_gap;
- int insert_y_y, insert_y_gap;
- int match_x, match_y;
-
- get_lpo_stats (lposeq_x, &len_x, &n_edges_x, &node_type_x, &refs_from_right_x, &max_rows_alloced_x, &x_left);
- get_lpo_stats (lposeq_y, &len_y, &n_edges_y, &node_type_y, &refs_from_right_y, &max_rows_alloced_y, &y_left);
-
- /*
- fprintf (stdout, "sequence x: %ld nodes, %ld edges, %ld rows at most --> %ld mem\n", len_x, n_edges_x, max_rows_alloced_x, max_rows_alloced_x * len_y);
- fprintf (stdout, "sequence y: %ld nodes, %ld edges, %ld rows at most --> %ld mem\n", len_y, n_edges_y, max_rows_alloced_y, max_rows_alloced_y * len_x);
- */
-
- /* INITIALIZE GAP PENALTIES: */
- max_gap_length = m->max_gap_length;
- gap_penalty_x = m->gap_penalty_x;
- gap_penalty_y = m->gap_penalty_y;
- CALLOC (next_gap_array, max_gap_length + 2, int);
- CALLOC (next_perp_gap_array, max_gap_length + 2, int);
-
- for (i=0; i<max_gap_length+1; i++) {
- /* GAP LENGTH EXTENSION RULE: */
- /* 0->1, 1->2, 2->3, ..., M-1->M; but M->M. */
- next_gap_array[i] = (i<max_gap_length) ? i+1 : i;
- /* PERPENDICULAR GAP (i.e. X FOR A GROWING Y-GAP) IS KEPT AT 0 IF DOUBLE-GAP-SCORING (old scoring) IS USED. */
- next_perp_gap_array[i] = (DOUBLE_GAP_SCORING ? 0 : next_gap_array[i]);
- }
-
- /* GAP LENGTH = M+1 IS USED FOR INITIAL STATE. */
- /* THIS MUST BE TREATED DIFFERENTLY FOR GLOBAL v. LOCAL ALIGNMENT: */
- if (0 == use_global_alignment) { /* FREE EXTENSION OF INITIAL GAP (FOR LOCAL ALIGNMENT) */
- gap_penalty_x[max_gap_length+1] = gap_penalty_y[max_gap_length+1] = 0;
- next_gap_array[max_gap_length+1] = next_perp_gap_array[max_gap_length+1] = max_gap_length+1;
- }
- else { /* TREAT INITIAL GAP LIKE ANY OTHER (FOR GLOBAL ALIGNMENT) */
- gap_penalty_x[max_gap_length+1] = gap_penalty_x[0];
- gap_penalty_y[max_gap_length+1] = gap_penalty_y[0];
- next_gap_array[max_gap_length+1] = next_gap_array[0];
- next_perp_gap_array[max_gap_length+1] = next_perp_gap_array[0];
- }
-
-
- /* ALLOCATE MEMORY FOR 'MOVE' AND 'SCORE' MATRICES: */
-
- CALLOC (move, len_y, DPMove_T *);
- for (i=0; i<len_y; i++) {
- CALLOC (move[i], len_x, DPMove_T);
- }
-
- CALLOC (init_col_score, len_y+1, DPScore_T);
- init_col_score = &(init_col_score[1]);
-
- CALLOC (score_rows, len_y+1, DPScore_T *);
- score_rows = &(score_rows[1]);
- CALLOC (score_rows[-1], len_x+1, DPScore_T);
- score_rows[-1] = &(score_rows[-1][1]);
- curr_score = score_rows[-1];
-
-
- /* FILL INITIAL ROW (-1). */
- /* GAP LENGTH = M+1 IS USED FOR INITIAL STATE. */
-
- curr_score[-1].score = 0;
- curr_score[-1].gap_x = curr_score[-1].gap_y = max_gap_length+1;
-
- for (i=0; i<len_x; i++) {
- curr_score[i].score = min_score;
- for (xcount = 1, xl = x_left[i]; xl != NULL; xcount++, xl = xl->more) {
- prev_gap = curr_score[xl->ipos].gap_x;
- try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
- if (try_score > curr_score[i].score) {
- curr_score[i].score = try_score;
- curr_score[i].gap_x = next_gap_array[prev_gap];
- curr_score[i].gap_y = next_perp_gap_array[prev_gap];
- }
- }
- }
-
- /* FILL INITIAL COLUMN (-1). */
-
- init_col_score[-1] = curr_score[-1];
- for (i=0; i<len_y; i++) {
- init_col_score[i].score = min_score;
- for (ycount = 1, yl = y_left[i]; yl != NULL; ycount++, yl = yl->more) {
- prev_gap = init_col_score[yl->ipos].gap_y;
- try_score = init_col_score[yl->ipos].score + yl->score - gap_penalty_y[prev_gap];
- if (try_score > init_col_score[i].score) {
- init_col_score[i].score = try_score;
- init_col_score[i].gap_x = next_perp_gap_array[prev_gap];
- init_col_score[i].gap_y = next_gap_array[prev_gap];
- }
- }
- }
-
-
- /** MAIN DYNAMIC PROGRAMMING LOOP **/
-
-
- /* OUTER LOOP (i-th position in LPO y): */
- for (i=0; i<len_y; i++) {
-
- /* ALLOCATE MEMORY FOR 'SCORE' ROW i: */
- CALLOC (score_rows[i], len_x+1, DPScore_T);
- score_rows[i] = &(score_rows[i][1]);
- n_score_rows_alloced++;
-
- curr_score = score_rows[i];
- curr_score[-1] = init_col_score[i];
-
- /* INNER LOOP (j-th position in LPO x): */
- for (j=0; j<len_x; j++) {
-
- match_score = (use_global_alignment) ? min_score : 0;
- match_x = match_y = 0;
-
- insert_x_score = insert_y_score = min_score;
- insert_x_x = insert_y_y = 0;
- insert_x_gap = insert_y_gap = 0;
-
- /* THIS SQUARE CAN END THE ALIGNMENT IF WE'RE USING LOCAL ALIGNMENT, */
- /* OR IF BOTH THE X- AND Y-NODES CONTAIN THE END OF A SEQUENCE. */
- possible_end_square = ((0 == use_global_alignment) || ((node_type_x[j] & LPO_FINAL_NODE) && (node_type_y[i] & LPO_FINAL_NODE)));
-
- /* LOOP OVER y-predecessors: */
- for (ycount = 1, yl = y_left[i]; yl != NULL; ycount++, yl = yl->more) {
-
- prev_score = score_rows[yl->ipos];
-
- /* IMPROVE Y-INSERTION?: trace back to (i'=yl->ipos, j) */
- prev_gap = prev_score[j].gap_y;
- try_score = prev_score[j].score + yl->score - gap_penalty_y[prev_gap];
- if (try_score > insert_y_score) {
- insert_y_score = try_score;
- insert_y_y = ycount;
- insert_y_gap = prev_gap;
- }
-
- /* LOOP OVER x-predecessors (INSIDE y-predecessor LOOP): */
- for (xcount = 1, xl = x_left[j]; xl != NULL; xcount++, xl = xl->more) {
-
- /* IMPROVE XY-MATCH?: trace back to (i'=yl->ipos, j'=xl->ipos) */
- try_score = prev_score[xl->ipos].score + xl->score + yl->score;
- if (try_score > match_score) {
- match_score = try_score;
- match_x = xcount;
- match_y = ycount;
- }
- }
- }
-
- /* LOOP OVER x-predecessors (OUTSIDE y-predecessor LOOP): */
- for (xcount = 1, xl = x_left[j]; xl != NULL; xcount++, xl = xl->more) {
-
- /* IMPROVE X-INSERTION?: trace back to (i, j'=xl->ipos) */
- prev_gap = curr_score[xl->ipos].gap_x;
- try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
- if (try_score > insert_x_score) {
- insert_x_score = try_score;
- insert_x_x = xcount;
- insert_x_gap = prev_gap;
- }
- }
-
- /* USE CUSTOM OR DEFAULT SCORING FUNCTION: */
- if (scoring_function != NULL) {
- match_score += scoring_function (j, i, seq_x, seq_y, m);
- }
- else {
- match_score += m->score[seq_x[i].letter][seq_y[j].letter];
- }
-
- my_score = &curr_score[j];
- my_move = &move[i][j];
-
- if (match_score > insert_y_score && match_score > insert_x_score) {
- /* XY-MATCH */
- my_score->score = match_score;
- my_score->gap_x = 0;
- my_score->gap_y = 0;
- my_move->x = match_x;
- my_move->y = match_y;
- }
- else if (insert_x_score > insert_y_score) {
- /* X-INSERTION */
- my_score->score = insert_x_score;
- my_score->gap_x = next_gap_array[insert_x_gap];
- my_score->gap_y = next_perp_gap_array[insert_x_gap];
- my_move->x = insert_x_x;
- my_move->y = 0;
- }
- else {
- /* Y-INSERTION */
- my_score->score = insert_y_score;
- my_score->gap_x = next_perp_gap_array[insert_y_gap];
- my_score->gap_y = next_gap_array[insert_y_gap];
- my_move->x = 0;
- my_move->y = insert_y_y;
- }
-
- /* RECORD BEST ALIGNMENT END FOR TRACEBACK: */
- if (possible_end_square && my_score->score >= best_score) {
- /* BREAK TIES BY CHOOSING MINIMUM (x,y): */
- if (my_score->score > best_score || (j == best_x && i < best_y) || j < best_x) {
- best_score = my_score->score;
- best_x = j;
- best_y = i;
- }
- }
- }
-
- /* UPDATE # OF REFS TO 'SCORE' ROWS; FREE MEMORY WHEN POSSIBLE: */
- for (yl = y_left[i]; yl != NULL; yl = yl->more) if ((j = yl->ipos) >= 0) {
- if ((--refs_from_right_y[j]) == 0) {
- score_rows[j] = &(score_rows[j][-1]);
- FREE (score_rows[j]);
- n_score_rows_alloced--;
- }
- }
- if (refs_from_right_y[i] == 0) {
- score_rows[i] = &(score_rows[i][-1]);
- FREE (score_rows[i]);
- n_score_rows_alloced--;
- }
- }
-
- IF_GUARD(best_x>=len_x || best_y>=len_y,1.1,(ERRTXT,"Bounds exceeded!\nbest_x,best_y:%d,%d\tlen:%d,%d\n",best_x,best_y,len_x,len_y),CRASH);
-
- /**/
- fprintf (stderr, "aligned (%d nodes, %ld edges) to (%d nodes, %ld edges): ", len_x, n_edges_x, len_y, n_edges_y);
- fprintf (stderr, "best %s score = %d @ (%d %d)\n", (use_global_alignment ? "global" : "local"), best_score, best_x, best_y);
- /**/
-
- /* DYNAMIC PROGRAMING MATRIX COMPLETE, NOW TRACE BACK FROM best_x, best_y */
- trace_back_lpo_alignment (len_x, len_y, move, x_left, y_left,
- best_x, best_y,
- x_to_y, y_to_x);
-
-
- /* CLEAN UP AND RETURN: */
-
- FREE (node_type_x);
- FREE (node_type_y);
-
- FREE (refs_from_right_x);
- FREE (refs_from_right_y);
-
- FREE (next_gap_array);
- FREE (next_perp_gap_array);
-
- score_rows[-1] = &(score_rows[-1][-1]);
- FREE (score_rows[-1]);
- score_rows = &(score_rows[-1]);
- FREE (score_rows);
-
- init_col_score = &(init_col_score[-1]);
- FREE (init_col_score);
-
- for (i=0; i<len_x; i++) {
- if (x_left[i] != &seq_x[i].left) {
- FREE (x_left[i]);
- }
- }
- FREE (x_left);
-
- for (i=0; i<len_y; i++) {
- if (y_left[i] != &seq_y[i].left) {
- FREE (y_left[i]);
- }
- }
- FREE (y_left);
-
- for (i=0; i<len_y; i++) {
- FREE (move[i]);
- }
- FREE (move);
-
- return best_score;
-}
Deleted: trunk/packages/poa/trunk/align_score.c
===================================================================
--- trunk/packages/poa/trunk/align_score.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/align_score.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,31 +0,0 @@
-
-
-
-
-#include "default.h"
-#include "poa.h"
-#include "seq_util.h"
-#include "lpo.h"
-
-
-
-typedef struct {
- unsigned char x;
- unsigned char y:7;
- unsigned char is_aligned:1;
-} LPOMove_T;
-
-
-
-/* YOU CAN PUT ANY SCORING METHOD YOU WANT INSIDE THIS
- FUNCTION. JUST REPLACE THE CONTENTS OF THE FUNCTION WITH
- YOUR SCORING METHOD */
-LPOScore_T matrix_scoring_function(int i,
- int j,
- LPOLetter_T seq_x[],
- LPOLetter_T seq_y[],
- ResidueScoreMatrix_T *m)
-{
- return m->score[seq_x[i].letter][seq_y[j].letter]; /*TRIVIAL SCORING FUNC:
- JUST USE MATRIX VALUE*/
-}
Deleted: trunk/packages/poa/trunk/align_score.h
===================================================================
--- trunk/packages/poa/trunk/align_score.h 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/align_score.h 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,15 +0,0 @@
-#ifndef ALIGN_SCORE_HEADER_INCLUDED
-#define ALIGN_SCORE_HEADER_INCLUDED
-
-#include <default.h>
-#include <poa.h>
-#include <seq_util.h>
-
-/*********************************************************** align_score.c */
-LPOScore_T matrix_scoring_function(int i,
- int j,
- LPOLetter_T seq_x[],
- LPOLetter_T seq_y[],
- ResidueScoreMatrix_T *m);
-
-#endif
Deleted: trunk/packages/poa/trunk/black_flag.c
===================================================================
--- trunk/packages/poa/trunk/black_flag.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/black_flag.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,125 +0,0 @@
-
-
-
-#include "default.h" /* ~~I */
-
-
-
-
-char ERRTXT[1024]
- ="";
-char *Program_name="black_flag";
-char *Program_version="unknown";
-int Already_reported_crash=0;
-
-int black_flag(int bug_level,
- char sourcefile[],
- int sourceline,
- char sourcefile_revision[])
-{
- static int last_line= -1;
- char *error_names[max_black_flag_type]
- ={"CRASH","DIED","EXCEPTION","BAD_DATA","WARNING","DEBUG"};
-
- switch (bug_level) {
-#ifndef DEBUG_USER_VERSION
- case DEBUG_black_flag_type: /* IN DEV'T VERSION JUST CRASH!!! */
-#ifdef DEBUG_VERSION
- case TRAP_black_flag_type: /* FOR DEBUG VERSION AND TRAP, CAUSE A CORE DUMP*/
-#endif
-#endif
- case CRASH_black_flag_type: /* COULD INCLUDE MECHANISMS TO SEND EMAIL? */
- /* print message at level 1, i.e. if we are printing anything at all */
- PRINT_DEBUG(1,(DBOUT,"black_flag: %s %s:%s %s %s,%d\n%s\nend_black_flag\n",
- error_names[bug_level],
- Program_name,Program_version,
- sourcefile,sourcefile_revision,sourceline,
- ERRTXT[0]? ERRTXT:""));
- if (Already_reported_crash)
- return 1;
- Already_reported_crash=1;
- abort(); /* FORCE A CORE DUMP */
-
-
- default: /* JUST PRINT THE ERROR */
- PRINT_DEBUG(1,(DBOUT,"black_flag: %s %s:%s %s %s,%d\n%s\nend_black_flag\n",
- error_names[bug_level],
- Program_name,Program_version,
- sourcefile,sourcefile_revision,sourceline,
- ERRTXT[0]? ERRTXT:""));
- break;
-
- }
-
- ERRTXT[0]='\0'; /* RESET THE ERROR TEXT */
- last_line=sourceline;
- return 1; /* SEND SIGNAL TO HANDLER CLAUSE TO DEAL WITH THIS ERROR */
-}
-
-
-
-
-
-
-
-
-void handle_crash(int sigcode)
-{
- char *crash_mode;
-
- if (Already_reported_crash)
- exit(-1); /* IN ENDLESS LOOP REPORTING CRASH OVER & OVER? */
-
- Already_reported_crash=1;
- black_flag(CRASH_black_flag_type,"",0,"");
- if ((crash_mode=getenv("HANDLE_CRASH")) && 0==strcmp(crash_mode,"NOCORE"))
- exit(-1);
- else
- return; /*ENVIRONMENT SETTING ASKS US TO DUMP A CORE IMMEDIATELY */
-}
-
-
-
-
-
-int handle_crash_init(void (*crash_fun)())
-{
-#define HANDLE_CRASH_MAX 5
- int i,signal_type[HANDLE_CRASH_MAX]
- ={SIGSEGV,
-#ifdef SIGBUS /* LINUX DOESN'T HAVE BUS ERRORS? */
- SIGBUS,
-#else
- SIGSEGV, /* REUSE SEGV AS DUMMY ENTRY */
-#endif
- SIGABRT,SIGFPE,SIGTRAP}; /*LIST OF SIGNALS TO HANDLE*/
- if (!crash_fun) /* NO CRASH FUNCTION SUPPLIED, SO RESET TO DEFAULT */
- crash_fun = SIG_DFL; /* RESET TO STANDARD CRASH BEHAVIOR */
-
- LOOP (i,HANDLE_CRASH_MAX) /* SET THIS HANDLER FOR ALL OUR SIGNALS */
- signal(signal_type[i],crash_fun);
- return 0;
-}
-
-void black_flag_init_args(int narg,char *arg[],char progversion[])
-{
- int i,len=0;
- LOOP (i,narg)
- len+=strlen(arg[i])+2;
- CALLOC(Program_name,len,char);
- LOOPF (i,narg) {
- strcat(Program_name,arg[i]);
- strcat(Program_name," ");
- }
- if (progversion)
- Program_version=progversion;
- handle_crash_init(handle_crash);
-}
-
-
-
-void black_flag_init(char progname[],char progversion[])
-{
- black_flag_init_args(1,&progname,progversion);
-}
-
Deleted: trunk/packages/poa/trunk/black_flag.h
===================================================================
--- trunk/packages/poa/trunk/black_flag.h 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/black_flag.h 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,245 +0,0 @@
-
-
-
-#ifndef BLACK_FLAG_HEADER_INCLUDED
-#define BLACK_FLAG_HEADER_INCLUDED 1
-
-#include <signal.h>
-
-extern char ERRTXT[];
-#define DBOUT stderr
-
-enum {
- CRASH_black_flag_type,
- TRAP_black_flag_type,
- COPE_black_flag_type,
- USERR_black_flag_type,
- WARN_black_flag_type,
- DEBUG_black_flag_type,
- max_black_flag_type
- };
-
-
-#define NOERRMSG (ERRTXT,"")
-
-
-
-
-
-/********************************************************************
- *
- * IF_PARANOID:
- * IF_PARANOID(CONDITION,REVISION,MESSAGE)
- *
- * error checking that will significantly slow execution or
- * otherwise is desirable only in situations of extreme
- * unction, e.g. during our debugging!!!
- *
- * The beauty of the IF_PARANOID idea is that you should sprinkle
- * it liberally EVERYWHERE in your code without thought for
- * whether it is necessary or might hurt performance,
- * because it will NOT even be compiled into the program in
- * the production version!!!!!!
- *
- * use IF_PARANOID checks EVERYWHERE you can think of
- * definite error signals, even if you think "That shouldn't
- * EVER happen".
- *
- * if the IF_PARANOID CONDITIONAL is TRUE, the program will abort();
- *
- *
- *
- * Also, our emacs custom highlighting system has been programmed
- * to show IF_PARANOID and IF_DEBUG lines in a dim gray, so
- * that these extensive error checks will not visually obscure
- * the layout & organization of your algorithms.
- *
- * black_flag() is smart about printing both version information
- * such as the vmake version name the executable was created by,
- * and also the exact revision number of the file in which the
- * error occured.
- *
- *
- *-------------------------------------------------
- *
- * IF_DEBUG:
- * IF_DEBUG(CONDITION,REVISION,MESSAGE)
- *
- * also not expected to be included in a final production
- * release, but should not impact performance so significantly that
- * it's unpleasant to test such a version in regular use patterns.
- * Classic examples would be fairly pedantic checks at the entry
- * and exit of all functions, testing for conditions "that shouldn't
- * ever happen."
- *
- * if the IF_DEBUG CONDITIONAL is TRUE, the program will abort();
- *
- *
- *-------------------------------------------------
- *
- * IF_GUARD:
- * IF_GUARD(CONDITION,REVISION,MESSAGE,LEVEL)
- *
- * checks INCLUDED in final production versions, but taking
- * advantage of the black_flag system to allow us to easily
- * control what will be done in response to an error
- * in any given executable, via compile-time flags:
- *
- * e.g.
- * print error info on stderr, or to special log files,
- *
- * force a core dump,
- *
- * run dbx via an auto script to generate a stack frame,
- * and mail the results to develop-support at mag.com,
- * etc.
- *
- * IF_GUARD differs from IF_PARANOID and IF_DEBUG in that it
- * requires an error_level argument, which must be one of
- *
- * CRASH ... the error is fatal, abort()
- *
- * TRAP ... the error is being trapped, but
- * not handled. e.g. quiting from
- * a function because some essential
- * file was missing...
- *
- * COPE ... the error is being handled nicely.
- * the handler code following IF_GUARD
- * knows how to correct for the situation.
- *
- * USERR ... the user appears to have done something
- * that makes no sense; they must be
- * confused by our interface.
- *
- * WARN ... suspicious data or input, not
- * definitely an error.
- *
- *
- *
- *-------------------------------------------------
- *
- * Examples:
- *
- * IF_PARANOID((iatom<0),4.6,(ERRTXT,"wacky iatom=%d",iatom));
- *
- * IF_DEBUG((iatom<0),4.6,(ERRTXT,"wacky iatom=%d",iatom));
- *
- * IF_GUARD((iatom<0),4.6,(ERRTXT,"wacky iatom=%d",iatom),CRASH);
- *
- * IF_GUARD((iatom<0),4.6,(ERRTXT,"wacky iatom=%d",iatom),COPE) {
- * put some code to handle the error condition here;
- * }
- *
- * USE THE 4.6 KEYWORD TO PUT IN VERSION NUMBERS AUTOMATICALLY.
- *
- *******************************************************************/
-
-
-#if defined(PARANOID_VERSION) /*???????????????????*/
-#ifndef DEBUG_VERSION
-#define DEBUG_VERSION
-#endif
-
-#define IF_PARANOID(CONDITION,REVISION,MESSAGE) \
- if (CONDITION) {\
- sprintf MESSAGE ;\
- black_flag(DEBUG_black_flag_type,__FILE__,__LINE__,STRINGIFY(REVISION));\
- }
-
-#else /*????????????????????????????????????????????*/
-#define IF_PARANOID(CONDITION,REVISION,MESSAGE)
-#endif /* !PARANOID_VERSION ??????????????????????*/
-
-
-
-
-
-
-
-
-
-
-#if defined(DEBUG_VERSION) /*??????????*/
-#define IF_DEBUG(CONDITION,REVISION,MESSAGE) \
- if (CONDITION) {\
- sprintf MESSAGE ;\
- black_flag(DEBUG_black_flag_type,__FILE__,__LINE__,STRINGIFY(REVISION));\
- }
-
-#else /*???????????????????????????????????????????????????????????*/
-#define IF_DEBUG(CONDITION,REVISION,MESSAGE)
-#endif /* !DEBUG_VERSION ????????????????????????????????????????*/
-
-
-
-
-
-
-
-
-/*********************************************************************
- *
- * IF_GUARD:
- * the basic black_flag macro, for production version trapping
- * and handling of errors.
- *
- * Essentially adds the flexibility of handling / recording
- * errors however you like in black_flag(). Also, the programmer
- * can provide, or omit, a clause following this macro that
- * will only be executed if the CONDITION is true, allowing
- * any kind of handling you wish.
- *
- ********************************************************************/
-
-
-#define IF_GUARD(CONDITION,REVISION,MESSAGE,LEVEL) \
- if ((CONDITION) ? \
- (sprintf MESSAGE,\
- black_flag(CONCAT_MACRO(LEVEL,_black_flag_type),__FILE__,__LINE__,\
- STRINGIFY(REVISION))) : 0)
-
-
-
-
-#define WARN_DEBUG(REVISION,MESSAGE,LEVEL) \
-(sprintf MESSAGE,\
- black_flag(CONCAT_MACRO(LEVEL,_black_flag_type),__FILE__,__LINE__,\
- STRINGIFY(REVISION)))
-
-
-#define WARN_MSG(LEVEL,MESSAGE,REVISION) \
-(sprintf MESSAGE,\
- black_flag(CONCAT_MACRO(LEVEL,_black_flag_type),__FILE__,__LINE__,\
- REVISION))
-
-
-
-/*********************************************************************
- *
- * OUT_OF_BOUNDS:
- * checks if
- * MIN <= INDEX < MAX
- *
- * e.g.
- * OUT_OF_BOUNDS(ivar,0,nvar)
- *
- ********************************************************************/
-
-#define OUT_OF_BOUNDS(INDEX,MINIMUM_BOUND,MAXIMUM_BOUND) \
-((INDEX)<(MINIMUM_BOUND) || (INDEX)>=(MAXIMUM_BOUND))
-
-void handle_crash(int sigcode);
-int handle_crash_init(void (*crash_fun)());
-int black_flag(int bug_level,
- char sourcefile[],
- int sourceline,
- char sourcefile_revision[]);
-
-char *Program_name;
-char *Program_version;
-
-void black_flag_init(char progname[],char progversion[]);
-void black_flag_init_args(int narg,char *arg[],char progversion[]);
-
-#endif
Deleted: trunk/packages/poa/trunk/blosum80.mat
===================================================================
--- trunk/packages/poa/trunk/blosum80.mat 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/blosum80.mat 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,40 +0,0 @@
-# Blosum80
-# Matrix made by matblas from blosum80.iij
-# * column uses minimum score
-# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
-# Blocks Database = /data/blocks_5.0/blocks.dat
-# Cluster Percentage: >= 80
-# Entropy = 0.9868, Expected = -0.7442
-GAP-PENALTIES=12 6 6
- A R N D C Q E G H I L K M F P S T W Y V B Z X ? a g t c u ] n
-A 7 -3 -3 -3 -1 -2 -2 0 -3 -3 -3 -1 -2 -4 -1 2 0 -5 -4 -1 -3 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9
-R -3 9 -1 -3 -6 1 -1 -4 0 -5 -4 3 -3 -5 -3 -2 -2 -5 -4 -4 -2 0 -2 -9 -9 -9 -9 -9 -9 -9 -9
-N -3 -1 9 2 -5 0 -1 -1 1 -6 -6 0 -4 -6 -4 1 0 -7 -4 -5 5 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9
-D -3 -3 2 10 -7 -1 2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6 6 1 -3 -9 -9 -9 -9 -9 -9 -9 -9
-C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4 -9 -9 -9 -9 -9 -9 -9 -9
-Q -2 1 0 -1 -5 9 3 -4 1 -5 -4 2 -1 -5 -3 -1 -1 -4 -3 -4 -1 5 -2 -9 -9 -9 -9 -9 -9 -9 -9
-E -2 -1 -1 2 -7 3 8 -4 0 -6 -6 1 -4 -6 -2 -1 -2 -6 -5 -4 1 6 -2 -9 -9 -9 -9 -9 -9 -9 -9
-G 0 -4 -1 -3 -6 -4 -4 9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9
-H -3 0 1 -2 -7 1 0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4 3 -5 -1 0 -2 -9 -9 -9 -9 -9 -9 -9 -9
-I -3 -5 -6 -7 -2 -5 -6 -7 -6 7 2 -5 2 -1 -5 -4 -2 -5 -3 4 -6 -6 -2 -9 -9 -9 -9 -9 -9 -9 -9
-L -3 -4 -6 -7 -3 -4 -6 -7 -5 2 6 -4 3 0 -5 -4 -3 -4 -2 1 -7 -5 -2 -9 -9 -9 -9 -9 -9 -9 -9
-K -1 3 0 -2 -6 2 1 -3 -1 -5 -4 8 -3 -5 -2 -1 -1 -6 -4 -4 -1 1 -2 -9 -9 -9 -9 -9 -9 -9 -9
-M -2 -3 -4 -6 -3 -1 -4 -5 -4 2 3 -3 9 0 -4 -3 -1 -3 -3 1 -5 -3 -2 -9 -9 -9 -9 -9 -9 -9 -9
-F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1 0 -5 0 10 -6 -4 -4 0 4 -2 -6 -6 -3 -9 -9 -9 -9 -9 -9 -9 -9
-P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3 -9 -9 -9 -9 -9 -9 -9 -9
-S 2 -2 1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2 7 2 -6 -3 -3 0 -1 -1 -9 -9 -9 -9 -9 -9 -9 -9
-T 0 -2 0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3 2 8 -5 -3 0 -1 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9
-W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3 0 -7 -6 -5 16 3 -5 -8 -5 -5 -9 -9 -9 -9 -9 -9 -9 -9
-Y -4 -4 -4 -6 -5 -3 -5 -6 3 -3 -2 -4 -3 4 -6 -3 -3 3 11 -3 -5 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9
-V -1 -4 -5 -6 -2 -4 -4 -6 -5 4 1 -4 1 -2 -4 -3 0 -5 -3 7 -6 -4 -2 -9 -9 -9 -9 -9 -9 -9 -9
-B -3 -2 5 6 -6 -1 1 -2 -1 -6 -7 -1 -5 -6 -4 0 -1 -8 -5 -6 6 0 -3 -9 -9 -9 -9 -9 -9 -9 -9
-Z -2 0 -1 1 -7 5 6 -4 0 -6 -5 1 -3 -6 -2 -1 -2 -5 -4 -4 0 6 -1 -9 -9 -9 -9 -9 -9 -9 -9
-X -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9
-? -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
-a -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 4 -2 -2 -2 -2 -9 0
-g -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 4 -2 -2 -2 -9 0
-t -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 4 -2 4 -9 0
-c -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 -2 4 -2 -9 0
-u -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 4 -2 4 -9 0
-] -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
-n -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 0 0 0 0 0 -9 0
Deleted: trunk/packages/poa/trunk/blosum80_trunc.mat
===================================================================
--- trunk/packages/poa/trunk/blosum80_trunc.mat 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/blosum80_trunc.mat 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,42 +0,0 @@
-# Blosum80
-# Matrix made by matblas from blosum80.iij
-# * column uses minimum score
-# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
-# Blocks Database = /data/blocks_5.0/blocks.dat
-# Cluster Percentage: >= 80
-# Entropy = 0.9868, Expected = -0.7442
-GAP-TRUNCATION-LENGTH=10
-GAP-DECAY-LENGTH=5
-GAP-PENALTIES=12 6 0
- A R N D C Q E G H I L K M F P S T W Y V B Z X ? a g t c u ] n
-A 7 -3 -3 -3 -1 -2 -2 0 -3 -3 -3 -1 -2 -4 -1 2 0 -5 -4 -1 -3 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9
-R -3 9 -1 -3 -6 1 -1 -4 0 -5 -4 3 -3 -5 -3 -2 -2 -5 -4 -4 -2 0 -2 -9 -9 -9 -9 -9 -9 -9 -9
-N -3 -1 9 2 -5 0 -1 -1 1 -6 -6 0 -4 -6 -4 1 0 -7 -4 -5 5 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9
-D -3 -3 2 10 -7 -1 2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6 6 1 -3 -9 -9 -9 -9 -9 -9 -9 -9
-C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4 -9 -9 -9 -9 -9 -9 -9 -9
-Q -2 1 0 -1 -5 9 3 -4 1 -5 -4 2 -1 -5 -3 -1 -1 -4 -3 -4 -1 5 -2 -9 -9 -9 -9 -9 -9 -9 -9
-E -2 -1 -1 2 -7 3 8 -4 0 -6 -6 1 -4 -6 -2 -1 -2 -6 -5 -4 1 6 -2 -9 -9 -9 -9 -9 -9 -9 -9
-G 0 -4 -1 -3 -6 -4 -4 9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9
-H -3 0 1 -2 -7 1 0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4 3 -5 -1 0 -2 -9 -9 -9 -9 -9 -9 -9 -9
-I -3 -5 -6 -7 -2 -5 -6 -7 -6 7 2 -5 2 -1 -5 -4 -2 -5 -3 4 -6 -6 -2 -9 -9 -9 -9 -9 -9 -9 -9
-L -3 -4 -6 -7 -3 -4 -6 -7 -5 2 6 -4 3 0 -5 -4 -3 -4 -2 1 -7 -5 -2 -9 -9 -9 -9 -9 -9 -9 -9
-K -1 3 0 -2 -6 2 1 -3 -1 -5 -4 8 -3 -5 -2 -1 -1 -6 -4 -4 -1 1 -2 -9 -9 -9 -9 -9 -9 -9 -9
-M -2 -3 -4 -6 -3 -1 -4 -5 -4 2 3 -3 9 0 -4 -3 -1 -3 -3 1 -5 -3 -2 -9 -9 -9 -9 -9 -9 -9 -9
-F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1 0 -5 0 10 -6 -4 -4 0 4 -2 -6 -6 -3 -9 -9 -9 -9 -9 -9 -9 -9
-P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3 -9 -9 -9 -9 -9 -9 -9 -9
-S 2 -2 1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2 7 2 -6 -3 -3 0 -1 -1 -9 -9 -9 -9 -9 -9 -9 -9
-T 0 -2 0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3 2 8 -5 -3 0 -1 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9
-W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3 0 -7 -6 -5 16 3 -5 -8 -5 -5 -9 -9 -9 -9 -9 -9 -9 -9
-Y -4 -4 -4 -6 -5 -3 -5 -6 3 -3 -2 -4 -3 4 -6 -3 -3 3 11 -3 -5 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9
-V -1 -4 -5 -6 -2 -4 -4 -6 -5 4 1 -4 1 -2 -4 -3 0 -5 -3 7 -6 -4 -2 -9 -9 -9 -9 -9 -9 -9 -9
-B -3 -2 5 6 -6 -1 1 -2 -1 -6 -7 -1 -5 -6 -4 0 -1 -8 -5 -6 6 0 -3 -9 -9 -9 -9 -9 -9 -9 -9
-Z -2 0 -1 1 -7 5 6 -4 0 -6 -5 1 -3 -6 -2 -1 -2 -5 -4 -4 0 6 -1 -9 -9 -9 -9 -9 -9 -9 -9
-X -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9
-? -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
-a -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 4 -2 -2 -2 -2 -9 0
-g -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 4 -2 -2 -2 -9 0
-t -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 4 -2 4 -9 0
-c -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 -2 4 -2 -9 0
-u -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 4 -2 4 -9 0
-] -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
-n -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 0 0 0 0 0 -9 0
Deleted: trunk/packages/poa/trunk/buildup_lpo.c
===================================================================
--- trunk/packages/poa/trunk/buildup_lpo.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/buildup_lpo.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,590 +0,0 @@
-
-
-#include "default.h"
-#include "poa.h"
-#include "seq_util.h"
-#include "lpo.h"
-
-
-/** if two align-rings are aligned to each other, make sure
- that the (single) aligned residue pair consists of identical
- residues, if possible.
- :
- ((a,-),(b,-),(c,d),(-,a),(-b)) ==>
- ((a,a),(b,-),(c,-),(-,d),(-b)) OR ((a,-),(b,b),(c,-),(-,d)).
- :
- ((a,-),(c,d),(-,b)) ==> self.
-*/
-void fuse_ring_identities(int len_x,LPOLetter_T seq_x[],
- int len_y,LPOLetter_T seq_y[],
- LPOLetterRef_T al_x[],
- LPOLetterRef_T al_y[])
-{
- int i,j;
- LOOP (i,len_y) {
- if (al_y[i]<0 || seq_x[al_y[i]].letter == seq_y[i].letter)
- continue; /* NOT ALIGNED, OR ALREADY IDENTICAL, SO SKIP */
- for (j=seq_x[al_y[i]].align_ring;j!=al_y[i];j=seq_x[j].align_ring) {
- if (seq_x[j].letter == seq_y[i].letter) { /* IDENTICAL! SO FUSE! */
- al_x[al_y[i]]= INVALID_LETTER_POSITION; /* DISCONNECT FROM OLD */
- al_y[i]=j; /* CONNECT TO NEW IDENTITY */
- al_x[j]=i;
- break; /* SEARCH YE NO FURTHER */
- }
- }
- }
-}
-
-/** if two align-rings are aligned to each other, make sure
- that as many identical-residue pairs are fused as possible.
- :
- ((a,-),(b,-),(c,d),(-,a),(-b)) ==>
- ((a,a),(b,b),(c,-),(-,d)).
- :
- ((a,-),(c,d),(-,b)) ==> self.
-
- NB:
- THIS DOES NOT WORK with the current fuse_lpo function.
- The fusion
-*/
-void full_fuse_ring_identities (int len_x, LPOLetter_T *seq_x,
- int len_y, LPOLetter_T *seq_y,
- LPOLetterRef_T *al_x,
- LPOLetterRef_T *al_y)
-{
- int i, ip, j, jp; /* i LABELS POS IN seq_x, j LABELS POS IN seq_y */
- int ck; /* WAS IDENTICAL-RESIDUE PAIR FOUND? */
- for (i=0; i<len_x; i++) if ((j = al_x[i]) >= 0) {
- al_x[i] = al_y[j] = INVALID_LETTER_POSITION; /* DISCONNECT FROM OLD */
-
- /* i,j ARE AN ALIGNED PAIR. WALK THROUGH RESPECTIVE RINGS: */
- ip=i; jp=j; ck=0;
- do /* while (ip!=i) */ {
- do /* while (jp!=j) */ {
- if (seq_x[ip].letter == seq_y[jp].letter) { /* IDENTICAL! SO FUSE! */
- al_x[ip] = jp;
- al_y[jp] = ip;
- ck=1;
- jp=j; /* EXIT TO OUTER LOOP... AT MOST ONE FUSED TO EACH POS IN seq_y. */
- }
- else {
- jp = seq_y[jp].align_ring;
- }
- }
- while (jp!=j);
- ip = seq_x[ip].align_ring;
- }
- while (ip!=i);
-
- if (ck==0) { /* NO IDENTICAL-RESIDUE PAIR FOUND, SO RECONNECT ORIGINAL */
- al_x[i] = j;
- al_y[j] = i;
- }
- }
-}
-
-
-/** aligns the sequences in seq[] to the sequence or partial order in
- new_seq; seq[] must be linear orders (regular sequences);
- the alignment is built up by iterative partial order alignment,
- and the resulting partial order is returned in new_seq */
-LPOSequence_T *buildup_lpo(LPOSequence_T *new_seq,
- int nseq,LPOSequence_T seq[],
- ResidueScoreMatrix_T *score_matrix,
- int use_aggressive_fusion,
- int use_global_alignment)
-{
- int i,max_alloc=0,total_alloc;
- LPOLetterRef_T *al1=NULL,*al2=NULL;
-
- lpo_index_symbols(new_seq,score_matrix); /* MAKE SURE LPO IS TRANSLATED */
- for (i=0;i<nseq;i++) { /* ALIGN ALL SEQUENCES TO my_lpo ONE BY ONE */
- if (seq[i].letter == NULL) /* HMM. HASN'T BEEN INITIALIZED AT ALL YET */
- initialize_seqs_as_lpo(1,seq+i,score_matrix);
- total_alloc=new_seq->length*seq[i].length
- + sizeof(LPOLetter_T)*new_seq->length;
- if (total_alloc>max_alloc) { /* DP RECTANGLE ARRAY SIZE */
- max_alloc=total_alloc;
-#ifdef REPORT_MAX_ALLOC
- fprintf(stderr,"max_alloc: %d bytes\n",max_alloc);
-#endif
- if (max_alloc>POA_MAX_ALLOC) {
- WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.9 $");
- break; /* JUST RETURN AND FINISH */
- }
- }
- align_lpo_po (new_seq,&seq[i],
- score_matrix,&al1,&al2,NULL,use_global_alignment); /* ALIGN ONE MORE SEQ */
- if (use_aggressive_fusion)
- fuse_ring_identities(new_seq->length,new_seq->letter,
- seq[i].length,seq[i].letter,al1,al2);
- fuse_lpo(new_seq,seq+i,al1,al2); /* BUILD COMPOSITE LPO */
-
- free_lpo_letters(seq[i].length,seq[i].letter,TRUE);/*NO NEED TO KEEP*/
- seq[i].letter=NULL; /* MARK AS FREED... DON'T LEAVE DANGLING POINTER! */
- FREE(al1); /* DUMP TEMPORARY MAPPING ARRAYS */
- FREE(al2);
- }
-
- return new_seq;
-}
-/**@memo example: aligning a set of sequences to a partial order:
- lpo_out=buildup_lpo(lpo_in,nseq,seq,&score_matrix);
-*/
-
-
-
-/** CLIPS seq->letter[] TO JUST THE SEGMENT ALIGNED TO letter_x[] via al_x[]
- DOES *NOT* FREE existing seq->letter[]; YOU MUST KEEP IT OR FREE IT YOURSELF*/
-int clip_unaligned_ends(LPOSequence_T *seq,
- LPOLetterRef_T al[],
- int len_x,LPOLetter_T letter_x[],
- LPOLetterRef_T al_x[],int *offset,int *match_length)
-{
- int i,j=0,start,end,new_length,allow_end_length=0,nidentity=0;
- LPOLetter_T *temp=NULL;
- CALLOC(temp,seq->length,LPOLetter_T); /* ALLOCATE NEW letter[] COPY */
- for (start=0;start<seq->length;start++) /* FIND 1ST ALIGNED POS */
- if (al[start]>=0)
- break;
-
- for (end=seq->length -1;end>=0;end--) /* FIND LAST ALIGNED POS */
- if (al[end]>=0)
- break;
-
- for (i=start;i<=end;i++) /* COUNT IDENTITIES TO letter_x[] */
- if (al[i]>=0 && seq->letter[i].letter==letter_x[al[i]].letter)
- nidentity++;
- if (match_length) /* RETURN THE MATCH LENGTH TO THE CALLER */
- *match_length = end-start+1;
-
- if (start>allow_end_length) /* ALLOW EXTRA RESIDUES ON EITHER END*/
- start-=allow_end_length;
- else /* KEEP IN BOUNDS */
- start=0;
- if (end+allow_end_length<seq->length)
- end+=allow_end_length;
- else /* KEEP IN BOUNDS */
- end=seq->length-1;
-
- LOOP (i,len_x) /* WE ARE SHIFTING al TO THE RIGHT BY start POSITIONS */
- if (al_x[i]>=0) /* SO WE HAVE TO TRANSLATE al_x CORRESPONDINGLY */
- al_x[i]-= start;
-
- seq->length=end-start+1; /* NOW TRANSLATE left, right, align_ring, ring_id*/
- memcpy(temp,seq->letter+start,sizeof(LPOLetter_T)*(seq->length));
- LOOP (i,seq->length) { /* THIS *ONLY* WORKS FOR PURE LINEAR SEQUENCE!!! */
- temp[i].left.ipos -= start; /*IF <0, BECOMES INVALID BY DEFINITION, OK*/
- temp[i].right.ipos -= start;
- if (temp[i].right.ipos>=seq->length) /* PAST THE NEW, CLIPPED END */
- temp[i].right.ipos= INVALID_LETTER_POSITION;
- temp[i].ring_id=temp[i].align_ring=i;
- }
-
- if (offset) /* RETURN THE OFFSET TO THE CALLER */
- *offset = start;
-
- seq->letter=temp; /* NEW START: FIRST ALIGNED POSITION */
- return nidentity; /* NEW LENGTH: FROM 1ST TO LAST ALIGNED POS*/
-}
-
-
-
-
-
-void restore_lpo_size(LPOSequence_T *seq,int length,LPOLetter_T *letter)
-{
-
- free_lpo_letters(seq->length,seq->letter,TRUE); /* DUMP CLIPPED VERSION*/
- seq->length=length; /* RESTORE ORIGINAL length AND letter[] */
- seq->letter=letter;
-}
-
-
-
-/** BUILDS UP ALIGNMENT, BUT CLIPS UNALIGNED ENDS OF EACH NEW SEQUENCE ADDED
--------------------------------------------------------
----------------------------------------------------------------------------
-*/
-LPOSequence_T *buildup_clipped_lpo(LPOSequence_T *new_seq,
- int nseq,LPOSequence_T seq[],
- ResidueScoreMatrix_T *score_matrix,
- int use_global_alignment)
-{
- int i,ntemp,offset=0,nidentity,length_max=0,match_length=0;
- int total_alloc,max_alloc=0;
- LPOLetterRef_T *al1=NULL,*al2=NULL;
- LPOLetter_T *temp;
- float identity_max=0.,f;
-
- lpo_index_symbols(new_seq,score_matrix); /* MAKE SURE LPO IS TRANSLATED */
- for (i=0;i<nseq;i++) { /* ALIGN ALL SEQUENCES TO new_seq ONE BY ONE */
- if (seq[i].letter == NULL) /* HMM. HASN'T BEEN INITIALIZED AT ALL YET */
- initialize_seqs_as_lpo(1,seq+i,score_matrix);
- total_alloc=new_seq->length*seq[i].length
- + sizeof(LPOLetter_T)*new_seq->length;
- if (total_alloc>max_alloc) { /* DP RECTANGLE ARRAY SIZE */
- max_alloc=total_alloc;
-#ifdef REPORT_MAX_ALLOC
- fprintf(stderr,"max_alloc: %d bytes (%d x %d)\n",max_alloc,
- new_seq->length,seq[i].length);
-#endif
- if (max_alloc>POA_MAX_ALLOC) {
- WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.9 $");
- break; /* JUST RETURN AND FINISH */
- }
- }
- align_lpo_po (new_seq, &seq[i],
- score_matrix,&al1,&al2,NULL,use_global_alignment); /* ALIGN ONE MORE SEQ */
- ntemp=seq[i].length; /* SAVE letter[] BEFORE CLIPPING IT TO ALIGNED AREA*/
- temp=seq[i].letter;
- if ((nidentity=clip_unaligned_ends(seq+i,al2,/*THERE IS AN ALIGNED REGION*/
- new_seq->length,new_seq->letter,al1,&offset,
- &match_length))>0) {
- f=nidentity/(float)match_length; /* CALCULATE IDENTITY FRACTION */
- if (0==i /*f>identity_max*/) { /* REPORT IDENTITY OF TOP HIT */
- identity_max=nidentity;
- length_max=match_length;
- }
- fuse_lpo(new_seq,seq+i,al1,al2+offset); /*ADD CLIPPED REGION TO LPO*/
- }
- restore_lpo_size(seq+i,ntemp,temp); /* REVERT FROM CLIPPED TO ORIGINAL*/
- FREE(al1); /* DUMP TEMPORARY MAPPING ARRAYS FROM align_lpo() */
- FREE(al2);
- }
-
- fprintf(stderr,"%s\tmaximum identity\t%3.1f%%\t%.0f/%d\n",new_seq->name,
- 100*identity_max/length_max,identity_max,length_max);
- return new_seq;
-}
-
-
-/** which LPOSeq is called, or holds a sequence called, `name'? */
-int find_seq_name (int nseq, LPOSequence_T **seq, char name[])
-{
- int i,j;
- for (i=0;i<nseq;i++) if (seq[i]) {
- if (0==strcmp(seq[i]->name,name))
- return i;
- for (j=0;j<seq[i]->nsource_seq;j++) {
- if (0==strcmp(seq[i]->source_seq[j].name,name))
- return i;
- }
- }
- return -1;
-}
-
-
-typedef struct {
- double score;
- int i;
- int j;
-}
-SeqPairScore_T;
-
-
-/* SORT IN DESCENDING ORDER BY score (SO HIGH SIMILARITY SCORES MERGE FIRST). */
-/* FOR TIES, USE ITERATIVE MERGE ORDER (1-2, then 1-3, then 1-4, etc.) */
-int seqpair_score_qsort_cmp (const void *void_a, const void *void_b)
-{
- const SeqPairScore_T *a = (const SeqPairScore_T *)void_a;
- const SeqPairScore_T *b = (const SeqPairScore_T *)void_b;
-
- if (a->score > b->score)
- return -1;
- else if (a->score < b->score)
- return 1;
-
- if (a->i > b->i)
- return 1;
- else if (a->i < b->i)
- return -1;
-
- if (a->j > b->j)
- return 1;
- else if (a->j < b->j)
- return -1;
-
- return 0;
-}
-
-
-SeqPairScore_T *read_seqpair_scorefile (int nseq, LPOSequence_T **seq,
- ResidueScoreMatrix_T *score_matrix,
- LPOScore_T (*scoring_function)
- (int,int,LPOLetter_T [],LPOLetter_T [],
- ResidueScoreMatrix_T *),
- int use_global_alignment,
- int do_progressive, FILE *ifile, int *p_nscore)
-{
- int i,j,nscore=0,max_nscore=0;
- int *adj_score = NULL;
- SeqPairScore_T *score_list=NULL;
- LPOLetterRef_T *al1=NULL,*al2=NULL;
- double x, min_score=0.0;
- char name1[256],name2[256];
-
- CALLOC (adj_score, nseq, int);
-
- max_nscore = nseq*nseq;
- CALLOC (score_list, max_nscore, SeqPairScore_T);
-
- if (ifile) { /* IF PAIR SCORE FILE (PROGRESSIVE ASSUMED) */
- while (fscanf(ifile," %s %s %lf",name1,name2,&x)==3) { /* READ SCORE FILE */
- i=find_seq_name(nseq,seq,name1);
- j=find_seq_name(nseq,seq,name2);
- if (i<0 || j<0) {
- WARN_MSG(USERR,(ERRTXT,"invalid sequence pair, not found: %s,%s",name1,name2),"$Revision: 1.2.2.9 $");
- FREE (score_list);
- FREE (adj_score);
- return NULL;
- }
-
- /* fprintf(stderr,"i=%d,j=%d,x=%.2f\n",i,j,x); */
- fprintf(stderr,"Saving score from file %d (%s), %d (%s) : %.2f\n",i,name1,j,name2,x);
- if (i<j) { int swap=i;i=j;j=swap; } /* DON'T SAVE UPPER (DUPLICATE?) HALF OF THE MATRIX */
- score_list[nscore].i = i;
- score_list[nscore].j = j;
- score_list[nscore].score = x;
- if (x<min_score) min_score = x;
- nscore++;
- if (nscore==max_nscore) {
- max_nscore *= 2;
- REALLOC (score_list, max_nscore, SeqPairScore_T);
- }
- if (j==i-1) {
- adj_score[i]=1;
- }
- }
- }
- else if (do_progressive) { /* IF PROGRESSIVE BUT NO PAIR SCORE FILE */
- for (i=0;i<nseq;i++) for (j=0;j<i;j++) { /* SCORE IS BASED ON LOCAL ALIGNMENT */
- x = align_lpo_po (seq[i],seq[j],
- score_matrix,&al1,&al2,scoring_function,use_global_alignment);
- FREE(al1); /* DUMP TEMPORARY MAPPING ARRAYS */
- FREE(al2);
- fprintf(stderr,"Saving alignment score %d (%s), %d (%s) : %.2f\n",i,seq[i]->name,j,seq[j]->name,x);
- score_list[nscore].i = i;
- score_list[nscore].j = j;
- score_list[nscore].score = x;
- if (x<min_score) min_score = x;
- nscore++;
- if (nscore==max_nscore) {
- max_nscore *= 2;
- REALLOC (score_list, max_nscore, SeqPairScore_T);
- }
- if (j==i-1) {
- adj_score[i]=1;
- }
- }
- }
- else { /* NOT DOING PROGRESSIVE ALIGNMENT */
- /* USE DEFAULT (=0.0) PAIRSCORES, ENSURING ITERATIVE ALIGNMENT: */
- fprintf(stderr,"Performing iterative alignment...\n");
- }
-
- for (i=1;i<nseq;i++) {
- if (0 == adj_score[i]) {
- score_list[nscore].i = i;
- score_list[nscore].j = i-1;
- score_list[nscore].score = min_score - 1.0;
- nscore++;
- if (nscore==max_nscore) {
- max_nscore *= 2;
- REALLOC (score_list, max_nscore, SeqPairScore_T);
- }
- }
- }
- FREE (adj_score);
-
- /* NOW SORT LIST IN DESCENDING ORDER AND HAND BACK TO CALLER: */
- qsort(score_list,nscore,sizeof(SeqPairScore_T),seqpair_score_qsort_cmp);
- if (p_nscore) /* RETURN LENGTH OF PAIR SCORE TABLE IF REQUESTED */
- *p_nscore=nscore;
- return score_list;
-}
-
-
-LPOSequence_T *buildup_progressive_lpo(int nseq,LPOSequence_T **all_seqs,
- ResidueScoreMatrix_T *score_matrix,
- int use_aggressive_fusion,
- int do_progressive,
- char score_file[],
- LPOScore_T (*scoring_function)
- (int,int,LPOLetter_T [],LPOLetter_T [],
- ResidueScoreMatrix_T *),
- int use_global_alignment,
- int preserve_sequence_order)
-{
- int i,j,k,max_alloc=0,total_alloc,min_counts=0;
- SeqPairScore_T *score=NULL;
- LPOSequence_T *new_seq=NULL;
- FILE *ifile=NULL;
- int *seq_cluster=NULL,cluster_i,cluster_j,nscore=0,iscore;
- int *initial_nseq, *cluster_size, *seq_id_in_cluster;
- int nseq_tot;
-
-
- /* INITIALIZE ALL UNINITIALIZED SEQS: */
- for (i=0;i<nseq;i++) {
- if (all_seqs[i]->letter == NULL) {
- initialize_seqs_as_lpo(1,all_seqs[i],score_matrix);
- }
- lpo_index_symbols(all_seqs[i],score_matrix); /* MAKE SURE LPO IS TRANSLATED */
- }
-
- /* RETURN IF NOTHING TO ALIGN */
- if (nseq<=0)
- return NULL;
- else if (nseq==1)
- return all_seqs[0];
-
-
- new_seq = all_seqs[0];
-
- CALLOC(seq_cluster,nseq,int); /* MAPS SEQS (or CLUSTERS) TO CLUSTER THEY'RE IN */
- CALLOC(seq_id_in_cluster,nseq,int); /* INDEXES SEQS (or CLUSTERS) WITHIN EACH CLUSTER */
- CALLOC(cluster_size,nseq,int); /* COUNTS SEQS IN SAME CLUSTER (updated w/ merges) */
- CALLOC(initial_nseq,nseq,int); /* COUNTS SEQS INITIALLY IN EACH CLUSTER (not updated w/ merges) */
-
- for (i=nseq_tot=0;i<nseq;i++) {
- seq_cluster[i] = i; /* CREATE TRIVIAL MAPPING, EACH SEQ ITS OWN CLUSTER */
- seq_id_in_cluster[i] = 0;
- cluster_size[i] = all_seqs[i]->nsource_seq;
- initial_nseq[i] = cluster_size[i];
- nseq_tot += cluster_size[i];
- }
-
- if (score_file) {
- ifile=fopen(score_file,"r");
- if (ifile==NULL) {
- WARN_MSG(USERR,(ERRTXT,"Error reading pair score file %s.\nExiting",
- score_file),"$Revision: 1.2.2.9 $");
- goto free_and_exit;
- }
- }
- else {
- ifile = NULL;
- }
-
-
- score = read_seqpair_scorefile(nseq,all_seqs,score_matrix,scoring_function,use_global_alignment,
- do_progressive,ifile,&nscore);
- if (score==NULL) {
- WARN_MSG(USERR,(ERRTXT,"Error generating pair scores (file %s).\nExiting",
- score_file ? score_file : "unspecified"),"$Revision: 1.2.2.9 $");
- goto free_and_exit;
- }
- if (ifile)
- fclose (ifile);
-
- for (iscore=0;iscore<nscore;iscore++) {
-
- /* NB: NEW CLUSTER ID WILL BE MINIMUM OF INPUT IDs,
- SO MASTER CLUSTER WILL ALWAYS BE CLUSTER 0. */
- if (seq_cluster[score[iscore].i] < seq_cluster[score[iscore].j]) {
- cluster_i=seq_cluster[score[iscore].i];
- cluster_j=seq_cluster[score[iscore].j];
- }
- else if (seq_cluster[score[iscore].j] < seq_cluster[score[iscore].i]) {
- cluster_i=seq_cluster[score[iscore].j];
- cluster_j=seq_cluster[score[iscore].i];
- }
- else /* CLUSTERS ALREADY FUSED, SO SKIP THIS PAIR */
- continue;
-
- fprintf(stderr,"Fusing cluster %d (%s, nseq=%d) --> %d (%s, nseq=%d)... score %.2f\n",
- cluster_j,all_seqs[cluster_j]->name,all_seqs[cluster_j]->nsource_seq,
- cluster_i,all_seqs[cluster_i]->name,all_seqs[cluster_i]->nsource_seq,
- score[iscore].score);
-
- new_seq = all_seqs[cluster_i];
- total_alloc = new_seq->length * (sizeof(LPOLetter_T) + all_seqs[cluster_j]->length);
- if (total_alloc>max_alloc) { /* DP RECTANGLE ARRAY SIZE */
- max_alloc=total_alloc;
-#ifdef REPORT_MAX_ALLOC
- fprintf(stderr,"max_alloc: %d bytes\n",max_alloc);
-#endif
- if (max_alloc>POA_MAX_ALLOC) {
- WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.9 $");
- break; /* JUST RETURN AND FINISH */
- }
- }
-
-#ifdef USE_LOCAL_NEUTRALITY_CORRECTION /* NO LONGER USED */
- if (score_matrix->nfreq>0) { /* CALCULATE BALANCED SCORING ON EACH PO */
- balance_matrix_score(new_seq->length,new_seq->letter,score_matrix);
- balance_matrix_score(all_seqs[cluster_j]->length,all_seqs[cluster_j]->letter,
- score_matrix);
- }
-#endif
-
- buildup_pairwise_lpo(new_seq,all_seqs[cluster_j],score_matrix,
- use_aggressive_fusion,
- scoring_function,use_global_alignment);
-
- LOOP (i,nseq) { /* APPEND ALL MEMBERS OF cluster_j TO cluster_i */
- if (seq_cluster[i] == cluster_j) {
- seq_cluster[i] = cluster_i;
- seq_id_in_cluster[i] += cluster_size[cluster_i];
- }
- }
- cluster_size[cluster_i] += cluster_size[cluster_j];
- cluster_size[cluster_j] = 0;
- }
-
- if (preserve_sequence_order) { /* PUT SEQUENCES WITHIN LPO BACK IN THEIR ORIGINAL ORDER: */
- int *perm;
- CALLOC (perm, nseq_tot, int);
-
- for (i=nseq_tot=0; i<nseq; i++) {
- for (j=0; j<initial_nseq[i]; j++) {
- perm[seq_id_in_cluster[i] + j] = (nseq_tot++);
- }
- }
- for (i=0; i<nseq_tot; i++) printf ("%d ", perm[i]); printf ("\n");
-
- reindex_lpo_source_seqs (new_seq, perm);
- FREE (perm);
- }
-
- free_and_exit:
- FREE (initial_nseq);
- FREE (seq_cluster);
- FREE (cluster_size);
- FREE (seq_id_in_cluster);
- FREE (score);
-
- return new_seq; /* RETURN THE FINAL MASTER CLUSTER... */
-}
-
-
-LPOSequence_T *buildup_pairwise_lpo(LPOSequence_T seq1[],LPOSequence_T seq2[],
- ResidueScoreMatrix_T *score_matrix,
- int use_aggressive_fusion,
- LPOScore_T (*scoring_function)
- (int,int,LPOLetter_T [],LPOLetter_T [],
- ResidueScoreMatrix_T *),
- int use_global_alignment)
-{
- int min_counts1=0;
- int min_counts2=0;
- LPOLetterRef_T *al1=NULL,*al2=NULL;
-
- lpo_index_symbols(seq1,score_matrix); /* MAKE SURE LPO IS TRANSLATED */
- lpo_index_symbols(seq2,score_matrix); /* MAKE SURE LPO IS TRANSLATED */
- align_lpo_po (seq1, seq2, score_matrix, &al1, &al2,
- scoring_function, use_global_alignment); /* ALIGN TWO POS */
- if (use_aggressive_fusion)
- fuse_ring_identities(seq1->length,seq1->letter,
- seq2->length,seq2->letter,al1,al2);
- fuse_lpo(seq1,seq2,al1,al2); /* BUILD COMPOSITE LPO */
-
- /* FREE LETTERS IN SECOND LPO */
- free_lpo_letters(seq2->length,seq2->letter,TRUE);
- seq2->letter=NULL; /*MARK AS FREED. DON'T LEAVE DANGLING POINTER*/
- FREE(al1); /* DUMP TEMPORARY MAPPING ARRAYS */
- FREE(al2);
- return seq1; /* RETURN THE FINAL LPO */
-}
-
Deleted: trunk/packages/poa/trunk/create_seq.c
===================================================================
--- trunk/packages/poa/trunk/create_seq.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/create_seq.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,53 +0,0 @@
-
-
-#include "default.h"
-#include "seq_util.h"
-
-
-
-
-void save_sequence_fields(Sequence_T *seq,
- char seq_name[],char seq_title[],int length)
-{
- STRNCPY(seq->name,seq_name,SEQUENCE_NAME_MAX);
- if (seq_title)
- seq->title=strdup(seq_title);
- else
- seq->title=strdup("untitled");
- seq->length=length; /* SAVE LENGTH */
-}
-
-
-
-int create_seq(int nseq,Sequence_T **p_seq,
- char seq_name[],char seq_title[],char tmp_seq[],
- int do_switch_case)
-{
- int i,j;
- Sequence_T *seq;
-
- REBUFF(*p_seq,nseq,SEQUENCE_BUFFER_CHUNK,Sequence_T); /* ALLOCATE MEMORY*/
- seq= (*p_seq)+nseq; /* SET POINTER TO NEWLY ALLOCATED ELEMENT */
-
- for (i=j=0;tmp_seq[i];i++) /* ELIMINATE WHITE SPACE */
- if (!isspace(tmp_seq[i]))
- tmp_seq[j++]=tmp_seq[i];
- tmp_seq[j]='\0'; /* TERMINATE COMPRESSED STRING*/
- seq->sequence=strdup(tmp_seq); /* SAVE A DYNAMIC COPY */
- save_sequence_fields(seq,seq_name,seq_title,j);
-
- switch (do_switch_case) {
- case switch_case_to_lower:
- LOOP (i,seq->length)
- seq->sequence[i]=tolower(tmp_seq[i]);
- break;
- case switch_case_to_upper:
- LOOP (i,seq->length)
- seq->sequence[i]=toupper(tmp_seq[i]);
- break;
- }
-
- return 1;
-}
-
-
Modified: trunk/packages/poa/trunk/debian/changelog
===================================================================
--- trunk/packages/poa/trunk/debian/changelog 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/debian/changelog 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,4 +1,4 @@
-poa (2.0+20060928-2) UNRELEASED; urgency=low
+poa (2.0+20060928-2) unstable; urgency=low
[ Charles Plessy ]
* debian/control:
@@ -20,8 +20,11 @@
- statically build manpages (target poa.1 available)
- minor changes
- -- David Paleino <d.paleino at gmail.com> Wed, 06 Feb 2008 13:11:56 +0100
+ [ Andreas Tille ]
+ * Added myself to uploaders
+ -- Andreas Tille <tille at debian.org> Fri, 21 Mar 2008 23:41:56 +0100
+
poa (2.0+20060928-1) unstable; urgency=low
* New upstream version. THE PREVIOUS VERSION WAS NOT 2.0.
Modified: trunk/packages/poa/trunk/debian/control
===================================================================
--- trunk/packages/poa/trunk/debian/control 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/debian/control 2008-03-22 17:20:38 UTC (rev 1652)
@@ -4,7 +4,8 @@
Maintainer: Debian-Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
DM-Upload-Allowed: yes
Uploaders: Charles Plessy <charles-debian-nospam at plessy.org>,
- David Paleino <d.paleino at gmail.com>
+ David Paleino <d.paleino at gmail.com>,
+ Andreas Tille <tille at debian.org>
Build-Depends: debhelper (>= 5)
Standards-Version: 3.7.3
Vcs-Browser: http://svn.debian.org/wsvn/debian-med/trunk/packages/poa/trunk/?rev=0&sc=0
Deleted: trunk/packages/poa/trunk/default.h
===================================================================
--- trunk/packages/poa/trunk/default.h 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/default.h 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,253 +0,0 @@
-#ifndef DEFAULT_HEADER_INCLUDED
-#define DEFAULT_HEADER_INCLUDED 1
-
-#ifndef MODULE_NAME
-#define MODULE_NAME "main"
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <math.h>
-#include <string.h>
-#include <ctype.h>
-
-
-#include "black_flag.h"
-
-
-
-typedef void *voidptr; /* ~~e: should be moved out to generic typing header
- --- */
-typedef int (*funptr)();
-
-#define LOOPB(i,size) for ((i)=(size);(i)-- >0;)
-#define LOOP(i,size) for ((i)=(size);(i)-- >0;)
-#define LOOPF(i,size) for ((i)=0;(i)<(size);(i)++)
-#define LOOP_FINISHED(i,size) ((i)<0 || (i)>=(size))
-
-
-/**@memo example usage of argument reading macros
- FOR_ARGS(i,argc) {
- ARGMATCH_VAL("-tolower",do_switch_case,switch_case_to_lower);
- ARGMATCH("-seq_err_report",count_sequence_errors);
- ARGGET("-printmatrix",print_matrix_letters);
- NEXTARG(matrix_filename);
- } */
-
-#define FOR_ARGS(INDEX,ARGC) for (INDEX=1;INDEX<ARGC;INDEX++)
-#define ARGMATCH(FLAGSTR,VAR) if (0==strcmp(argv[i],FLAGSTR)) (VAR)=1
-#define ARGMATCH_VAL(FLAGSTR,VAR,VAL) if (0==strcmp(argv[i],FLAGSTR)) (VAR)=(VAL)
-#define ARGGET(FLAGSTR,VAR) if (0==strcmp(argv[i],FLAGSTR)) {(VAR)=argv[++i];continue;}
-#define NEXTARG(VAR) if ('-'!=argv[i][0] && !(VAR)) {(VAR)=argv[i];continue;}
-
-
-
-
-#ifndef TRUE /* DEFINE true AND false SYMBOLS IN CASE NOT ALREADY THERE*/
-#define TRUE 1
-#endif
-
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-#if defined(__STDC__) || defined(__ANSI_CPP__) /*###################*/
-/* ~~a: ansi-dependent concatenation macro */
-#define CONCAT_MACRO0(a,b) a ## b
-#else
-#define CONCAT_MACRO0(a,b) a/**/b
-#endif /* !__STDC__ ##################################################*/
-
-#define CONCAT_MACRO(a,b) CONCAT_MACRO0(a,b)
-
-#if defined(__STDC__) || defined(__ANSI_CPP__) /*###################*/
-#define STRINGIFY(NAME) # NAME
-#else
-#define STRINGIFY(NAME) "NAME"
-#endif /* !__STDC__ ##################################################*/
-
-/* #define PRINT_DEBUG(MESSAGE) printf MESSAGE */
-
-
-/***********************************************************************
- *
- * STRNCPY: guaranteed overflow-protected, null-terminated strcpy
- *
- * NB: this macro forces termination at end position S1[LEN-1]
- * which will cause non-intuitive side-effects ESPECIALLY
- * if LEN is incorrect!!!! ie. STRNCPY will garbage your
- * memory by sticking zeroes into it if you give it the wrong
- * LEN.
- *
- * Make sure that the LEN you give it is correct!!!!!!!
- *
- **********************************************************************/
-
-#define STRNCPY(S1,S2,LEN) \
- (strncpy(S1,S2,LEN),(((char *)(S1))[(LEN)-1]='\0'))
-
-
-
-
-
-
-
-
-/*******************************************************************
- *
- * STRINGPTR:
- * holds string and count of last alloc'd size. Conveniently
- * manages static and dynamic strings, allowing auto-resize
- * without worrying about memory management.
- *
- ***********************************************************************/
-
-typedef struct { /* ~~d */
- char *p;
- int last_alloc;
-} stringptr; /* --- */
-
-
-#define STRINGPTR_BUFFER_CHUNK 4096
-#define STRINGPTR_EMPTY_INIT {NULL,0}
-
-char *stringptr_cat(stringptr *s1,const char s2[]);
-char *stringptr_cpy(stringptr *s1,const char s2[]);
-int stringptr_free(stringptr *s);
-
-
-
-
-
-
-#ifdef DEBUG
-#define PRINT_DEBUG(CODE,MESSAGE) {if (CODE<=2) fprintf MESSAGE ;}
-#else
-#define PRINT_DEBUG(CODE,MESSAGE) {if (CODE<=1) fprintf MESSAGE ;}
-#endif
-
-
-
-
-#define MEM0_REQUEST_BAD 0
-
-/* DEFAULT ACTION TO PERFORM IF malloc FAILS */
-#define MALLOC_FAILURE_ACTION abort()
-/* DEFAULT ACTION TO PERFORM IF realloc FAILS */
-#define REALLOC_FAILURE_ACTION abort()
-
-
-
-#define CALLOC(memptr,N,ATYPE) \
-/* printf("CALLOC(%s=%d,%s=%d,sizeof(%s)=%d), %s %d\n",STRINGIFY(memptr),(memptr), \
- STRINGIFY(N),(N),STRINGIFY(ATYPE),sizeof(ATYPE),__FILE__,__LINE__);*/ \
- if ((N)<=0) { \
- if (MEM0_REQUEST_BAD) {\
- fprintf(stderr,"%s, line %d: *** invalid memory request: %s[%d].\n",\
- __FILE__,__LINE__,STRINGIFY(memptr),(N)); \
- MALLOC_FAILURE_ACTION; \
- }\
- } \
- else if (NULL == ((memptr)=(ATYPE *)calloc((size_t)(N),sizeof(ATYPE)))) { \
- fprintf(stderr,"%s, line %d: *** out of memory \n",__FILE__,__LINE__); \
- fprintf(stderr,"Unable to meet request: %s[%d]\n",STRINGIFY(memptr),(N)); \
- fprintf(stderr,"requested %d x %d bytes \n",(N),sizeof(ATYPE)); \
- MALLOC_FAILURE_ACTION; \
- }
-
-
-
-
-
-
-
-
-
-/*******************************************************************
- *
- * FREE:
- * dumps storage if a non-zero pointer, and resets pointer to
- * NULL to indicate its data freed.
- *
- ******************************************************************/
-
-#define FREE(P) if (P) {free(P);(P)=NULL;}
-
-
-#define REALLOC(memptr,NUM,ATYPE) \
-/* printf("REALLOC(%s=%d,%s=%d,sizeof(%s)=%d), %s %d\n",STRINGIFY(memptr), \
- (memptr),STRINGIFY(NUM),(NUM),STRINGIFY(ATYPE),sizeof(ATYPE),\
- __FILE__,__LINE__);*/ \
- if ((NUM)<=0) { \
- if (MEM0_REQUEST_BAD) {\
- fprintf(stderr,"%s, line %d: *** invalid memory request: %s[%d].\n",\
- __FILE__,__LINE__,STRINGIFY(memptr),(NUM)); \
- REALLOC_FAILURE_ACTION; \
- }\
- } \
- else { \
- voidptr temp_ptr=realloc((void *)(memptr),sizeof(ATYPE)*(size_t)(NUM));\
- if (temp_ptr) \
- (memptr)=(ATYPE *)temp_ptr;\
- else { \
- fprintf(stderr,"%s, line %d: *** out of memory \n",__FILE__,__LINE__); \
- fprintf(stderr,"Unable to meet request: %s\n",STRINGIFY(memptr)); \
- fprintf(stderr,"requested %d x %d bytes \n",(NUM),sizeof(ATYPE)); \
- REALLOC_FAILURE_ACTION; \
- } \
- }
-
-
-/**********************************************************************
- *
- * REBUFF
- * GUARANTEES ALLOCATION OF NULL-INITIALIZED BLOCK OF BUF ELEMENTS
- * OF THE CORRECT DATA TYPE & SIZE, SUFFICIENT TO HOLD AT LEAST ONE
- * ELEMENT ADDED TO THE CURRENT SIZE OF THE ARRAY
- *
- **********************************************************************/
-
-#define REBUFF(memptr,NUM,BUF,ATYPE) if ((NUM)<=0) { \
- CALLOC(memptr,BUF,ATYPE) } \
- else if (0 == (NUM)%(BUF)) { \
- char *p_rebuff_size;\
- REALLOC(memptr,(NUM)+(BUF),ATYPE); \
- p_rebuff_size=(char *)(memptr)+(NUM)*sizeof(ATYPE); \
- memset((voidptr)p_rebuff_size,0,(size_t)((BUF)*sizeof(ATYPE)));\
- }
-
-
-
-
-
-
-
-
-
-/*******************************************************************
- *
- * GETMEM:
- * allocates memory either via malloc or realloc, guaranteeing
- * space to store at least one more data entry. Data is
- * alloc'd in blocks of size BUF; the total # alloc'd is
- * kept in LAST. The algorithm can go wrong if LAST
- * is incorrect.
- *
- ******************************************************************/
-
-#define GETMEM(memptr,NUM,LAST,BUF,TYPE) \
- if ((LAST)<=0) {\
- CALLOC(memptr,((NUM)+1)-((NUM)+1)%(BUF)+(BUF),TYPE);\
- (LAST)=((NUM)+1)-((NUM)+1)%(BUF)+(BUF);\
- }\
- else if (((NUM)+1)>(LAST)) { \
- REALLOC(memptr,((NUM)+1)-((NUM)+1)%(BUF)+(BUF),TYPE);\
- (LAST)=((NUM)+1)-((NUM)+1)%(BUF)+(BUF);\
- }
-
-
-
-
-
-#endif
Deleted: trunk/packages/poa/trunk/fasta_format.c
===================================================================
--- trunk/packages/poa/trunk/fasta_format.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/fasta_format.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,89 +0,0 @@
-
-#include "default.h"
-#include "seq_util.h"
-
-
-
-/** reads FASTA formatted sequence file, and saves the sequences to
- the array seq[]; any comment line preceded by a hash-mark will be saved
- to comment */
-int read_fasta(FILE *seq_file,Sequence_T **seq,
- int do_switch_case,char **comment)
-{
- int c,nseq=0,length=0;
- char seq_name[FASTA_NAME_MAX]="",
- line[SEQ_LENGTH_MAX],seq_title[FASTA_NAME_MAX]="";
- char *p;
- stringptr tmp_seq=STRINGPTR_EMPTY_INIT;
-
- /* read in sequences */
- while (fgets(line,sizeof(line)-1,seq_file)) {
- if ((p=strrchr(line,'\n'))) /* REMOVE NEWLINE FROM END OF LINE */
- *p= '\0'; /* TRUNCATE THE STRING */
- switch (line[0]) {
- case '#': /* SEQUENCE COMMENT, SAVE IT */
- if (comment) /* SAVE COMMENT FOR CALLER TO USE */
- *comment = strdup(line+1);
- break;
-
- case '>': /* SEQUENCE HEADER LINE */
- if (seq_name[0] && tmp_seq.p && tmp_seq.p[0]) { /* WE HAVE A SEQUENCE, SO SAVE IT! */
- if (create_seq(nseq,seq,seq_name,seq_title,tmp_seq.p,do_switch_case))
- nseq++;
- }
- seq_name[0]='\0';
- if (sscanf(line+1,"%s %[^\n]", /* SKIP PAST > TO READ SEQ NAME*/
- seq_name,seq_title)<2)
- strcpy(seq_title,"untitled"); /* PROTECT AGAINST MISSING NAME */
- if (tmp_seq.p)
- tmp_seq.p[0]='\0'; /* RESET TO EMPTY SEQUENCE */
- length=0;
- break;
-
- case '*': /* IGNORE LINES STARTING WITH *... DON'T TREAT AS SEQUENCE! */
- break;
-
- default: /* READ AS ACTUAL SEQUENCE DATA, ADD TO OUR SEQUENCE */
- if (seq_name[0]) /* IF WE'RE CURRENTLY READING A SEQUENCE, SAVE IT */
- stringptr_cat_pos(&tmp_seq,line,&length);
- }
-
- c=getc(seq_file); /* ?FIRST CHARACTER IS UNIGENE CLUSTER TERMINATOR? */
- if (c==EOF)
- break;
- else {
- ungetc(c,seq_file); /* PUT THE CHARACTER BACK */
- if (c=='#' && nseq>0) /* UNIGENE CLUSTER TERMINATOR, SO DONE!*/
- break;
- }
- }
- if (seq_name[0] && tmp_seq.p && tmp_seq.p[0]) { /* WE HAVE A SEQUENCE, SO SAVE IT! */
- if (create_seq(nseq,seq,seq_name,seq_title,tmp_seq.p,do_switch_case))
- nseq++;
- }
- stringptr_free(&tmp_seq);
- return nseq; /* TOTAL NUMBER OF SEQUENCES CREATED */
-}
-
-/**@memo example: reading FASTA format file:
- seq_ifile=fopen(seq_filename,"r");
- if (seq_ifile) {
- nseq=read_fasta(seq_ifile,&seq,do_switch_case,&comment);
- fclose(seq_ifile);
- }
-*/
-
-
-
-/** writes a FASTA formatted file, saving the sequence given in seq[] */
-void write_fasta(FILE *ifile,char name[],char title[],char seq[])
-{
- int j;
-
- fprintf(ifile,">%s %s\n",name,title? title : "untitled");
- for (j=0;j<strlen(seq);j+=60)
- fprintf(ifile,"%.60s\n",seq+j);
-
- return;
-}
-
Deleted: trunk/packages/poa/trunk/heaviest_bundle.c
===================================================================
--- trunk/packages/poa/trunk/heaviest_bundle.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/heaviest_bundle.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,173 +0,0 @@
-
-
-#include "default.h"
-#include "poa.h"
-#include "seq_util.h"
-#include "lpo.h"
-
-
-
-
-/** finds the heaviest traversal of the LPO seq[], using dynamic programming;
- at each node the heaviest link is chosen to buildup traversals; finally,
- the traversal with the heaviest overall link weight is returned as an
- array of position indices. The length of the array is stored in
- *p_best_len*/
-LPOLetterRef_T *heaviest_bundle(int len,LPOLetter_T seq[],
- int nsource_seq,LPOSourceInfo_T source_seq[],
- int *p_best_len)
-{
- int i,j,best_right,iright,ibest= -1,best_len=0;
- LPOLetterRef_T *best_path=NULL,*path=NULL;
- LPOLetterLink_T *right;
- LPOLetterSource_T *source;
- LPOScore_T *score=NULL,best_score= -999999,right_score;
- int *contains_pos=NULL,my_overlap,right_overlap;
-
- CALLOC(path,len,LPOLetterRef_T); /* GET MEMORY FOR DYNAMIC PROGRAMMING */
- CALLOC(score,len,LPOScore_T);
- CALLOC(contains_pos,nsource_seq,int);
- LOOP (i,nsource_seq) /* RESET TO NOT MATCH ANY POSITIONS */
- contains_pos[i]= INVALID_LETTER_POSITION;
-
- LOOPB (i,len) { /* FIND HEAVIEST PATH BY DYNAMIC PROGRAMMING */
- source= &seq[i].source; /* MARK SEQUENCES CONTAINING THIS POSITION */
- memset(contains_pos,0,nsource_seq*sizeof(int)); /* ERASE ARRAY */
- do
- if (source_seq[source->iseq].weight>0) /* EXCLUDE ZERO WEIGHT SEQS */
- contains_pos[source->iseq]=source->ipos+1; /* right MUST BE ADJACENT*/
- while (source=source->more); /* KEEP COUNTING TILL NO more */
-
- right_score=right_overlap=0; /*DEFAULT MOVE: NOTHING TO THE RIGHT*/
- best_right= INVALID_LETTER_POSITION;
- for (right= &seq[i].right;right && right->ipos>=0;right=right->more) {
- my_overlap=0; /* OVERLAP CALCULATION */
- source= &seq[right->ipos].source;/*COUNT SEQS SHARED IN i AND right*/
- do /* BIAS OVERLAP CALCULATION BY SEQUENCE WEIGHTING */
- if (contains_pos[source->iseq]==source->ipos) /* YES, ADJACENT! */
- my_overlap += source_seq[source->iseq].weight;
- while (source=source->more); /* KEEP COUNTING TILL NO more */
-
- if (my_overlap>right_overlap /* FIND BEST RIGHT MOVE: BEST OVERLAP */
- || (my_overlap==right_overlap && score[right->ipos]>right_score)) {
- right_overlap=my_overlap;
- right_score=score[right->ipos];
- best_right=right->ipos;
- }
- }
-
- path[i]=best_right; /* SAVE THE BEST PATH FOUND */
- score[i]=right_score+right_overlap; /* SAVE THE SCORE */
- if (score[i]>best_score) { /* RECORD BEST SCORE IN WHOLE LPO */
- ibest=i;
- best_score=score[i];
- }
- }
-
- CALLOC(best_path,len,LPOLetterRef_T); /* MEMORY FOR STORING BEST PATH */
- for (;ibest>=0;ibest=path[ibest]) /* BACK TRACK THE BEST PATH */
- best_path[best_len++]=ibest;
-
- FREE(path); /* DUMP SCRATCH MEMORY */
- FREE(score);
- FREE(contains_pos);
-
- if (p_best_len) /* RETURN best_path AND ITS LENGTH */
- *p_best_len = best_len;
- return best_path;
-}
-
-
-
-
-int assign_sequence_bundle_id(int path_length,LPOLetterRef_T path[],
- LPOSequence_T *seq,int bundle_id,
- float minimum_fraction)
-{
- int i,*bundle_count=NULL,nseq_in_bundle=0;
- LPOLetterSource_T *source;
-
- CALLOC(bundle_count,seq->nsource_seq,int);
- LOOP (i,path_length) /* COUNT #POSITIONS OF EACH SEQ ARE IN path */
- for (source= &seq->letter[path[i]].source;source;source=source->more)
- bundle_count[source->iseq]++;
-
- LOOP (i,seq->nsource_seq) {/* FOR EACH SEQ OVER THRESHOLD, ASSIGN bundle_id*/
-/* printf("bundle %d:\t%s\t%d/%d %d",bundle_id,seq->source_seq[i].name,
- bundle_count[i],seq->source_seq[i].length,seq->source_seq[i].weight);*/
- if (seq->source_seq[i].bundle_id<0 /* NOT YET BUNDLED */
- && seq->source_seq[i].length*minimum_fraction <= bundle_count[i]) {
-/* printf(" +++++++++++++++++");*/
- seq->source_seq[i].bundle_id = bundle_id; /* ASSIGN TO THIS BUNDLE */
- seq->source_seq[i].weight = 0; /* REMOVE FROM FUTURE heaviest_bundle */
- nseq_in_bundle++;
- }
- /* printf("\n");*/
- }
-
- FREE(bundle_count);
- return nseq_in_bundle; /* RETURN COUNT OF SEQUENCES IN BUNDLE */
-}
-
-
-
-
-/** assigns weights for bundling based upon /hb_weight arguments
- in source_seq titles */
-
-void assign_hb_weights(int nsource_seq,LPOSourceInfo_T source_seq[])
-{
- int i,weight;
- char *p;
- LOOP (i,nsource_seq) {
- if (source_seq[i].title &&
- (p=strstr(source_seq[i].title,"/hb_weight="))) {
- weight=atoi(p+11);
- if (weight!=0){ /* 0 COULD MEAN atoi FAILED TO PARSE ARG. IGNORE IT*/
- source_seq[i].weight = weight;
- fprintf(stderr,"assigned weight=%d to %s\n",source_seq[i].weight,source_seq[i].name);
- }
- else
- WARN_MSG(USERR,(ERRTXT,"hb_weight zero or unreadable: %s\nIgnored",p),"$Revision: 1.2 $");
- }
- }
-}
-
-
-
-/** generates the complete set of heaviest_bundle traversals of the the LPO
- seq, using iterative heaviest_bundle() and requiring that at least
- minimum_fraction of the positions in a sequence match the heaviest
- bundle path, for that sequence to be assigned to that bundle
----------------------------------------------------------------
-------------------------------------------------------------*/
-void generate_lpo_bundles(LPOSequence_T *seq,float minimum_fraction)
-{
- int nbundled=0,ibundle=0,path_length,iseq,count;
- LPOLetterRef_T *path=NULL;
- char name[256],title[1024];
-
- /* assign_hb_weights(seq->nsource_seq,seq->source_seq); TURN THIS ON!!*/
- while (nbundled < seq->nsource_seq) {/* PULL OUT BUNDLES ONE BY ONE */
- path=heaviest_bundle(seq->length,seq->letter,/*GET NEXT HEAVIEST BUNDLE*/
- seq->nsource_seq,seq->source_seq,&path_length);
- if (!path || path_length<10) /* ??!? FAILED TO FIND A BUNDLE ??? */
- goto premature_warning;
- sprintf(name,"CONSENS%d",ibundle);
- /* NEXT, MARK SEQUENCES THAT FIT THIS BUNDLE ADEQUATELY */
- count=assign_sequence_bundle_id(path_length,path,seq,ibundle,
- minimum_fraction);
- sprintf(title,"consensus produced by heaviest_bundle, containing %d seqs",
- count); /* DON'T INCLUDE CONSENSUS ITSELF IN THE COUNT! */
- iseq=add_path_sequence(path_length,path,seq,name,title);/*BUILD CONSENSUS*/
- seq->source_seq[iseq].bundle_id=ibundle++; /* INCREMENT BUNDLE ID */
- nbundled+=count; /* KEEP TRACK OF TOTAL SEQUENCES BUNDLED */
-
- if (count<1) {
- premature_warning:
- fprintf(stderr,"*** WARNING: bundling ended prematurely after %d bundles.\nNo sequences fit inside this last bundle.\nA total of %d sequences incuding consensus were bundled.\n\n",ibundle,nbundled);
- break;
- }
- }
-}
-
Deleted: trunk/packages/poa/trunk/lpo.c
===================================================================
--- trunk/packages/poa/trunk/lpo.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/lpo.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,788 +0,0 @@
-
-
-#include "default.h"
-#include "poa.h"
-#include "seq_util.h"
-#include "lpo.h"
-
-
-/** INITIALIZES LINEARIZED PARTIAL ORDER DATA STRUCTURES FOR
- A LINEAR SEQUENCE */
-void lpo_init(LPOSequence_T *seq)
-{
- int i;
-
- CALLOC(seq->letter,seq->length,LPOLetter_T);
-
- LOOP (i,seq->length) {
- seq->letter[i].left.ipos = SEQ_Y_LEFT(i); /* JUST A LINEAR SEQ */
- seq->letter[i].right.ipos= SEQ_Y_RIGHT(i);
- seq->letter[i].source.iseq=0; /* TRIVIAL SOURCE: POINT TO SELF */
- seq->letter[i].source.ipos=i;
- seq->letter[i].align_ring = seq->letter[i].ring_id=i; /* POINT AT SELF */
- seq->letter[i].letter = seq->sequence[i]; /* COPY OUR AA LETTER */
- }
- seq->letter[seq->length -1].right.ipos= INVALID_LETTER_POSITION;
- /* NB: letter[0].left.ipos IS INVALID_LETTER_POSITION THANKS TO SEQ_Y_LEFT()
- ABOVE */
- /* BECAUSE PO CAN CONTAIN MULTIPLE SEQUENCES, WE ALSO KEEP A SOURCE LIST.
- FOR PURE LINEAR SEQUENCE, THE LIST IS JUST OUR STARTING SEQUENCE */
- save_lpo_source(seq,seq->name,seq->title,seq->length,1,NO_BUNDLE,0,NULL);
- return;
-}
-
-
-
-
-
-/**@memo initialize one or more regular sequences (linear orders) to LPO form.
- This step is REQUIRED before running partial order alignment.
- This routine processes each sequence with limit_residues() and
- index_symbols(), then builds a linear LPO using lpo_init(). */
-
-void initialize_seqs_as_lpo(int nseq, Sequence_T seq[],ResidueScoreMatrix_T *m)
-{
- int i;
- LOOP (i,nseq) {/* EXCLUDE LETTERS THAT AREN'T IN MATRIX */
- limit_residues(seq[i].sequence,m->symbol);
- /* TRANSLATE FROM ASCII LETTERS TO COMPACTED NUMBERICAL INDEX*/
- index_symbols(seq[i].length,seq[i].sequence,seq[i].sequence,
- m->nsymbol,m->symbol);
- lpo_init(seq+i); /* CREATE TRIVIAL, LINEAR SEQUENCE LPO */
- }
-}
-
-
-/** translates letter symbols on lpo.letter[i] to indexes used in scoring
- matrix*/
-void lpo_index_symbols(Sequence_T *lpo,ResidueScoreMatrix_T *m)
-{
- int i;
-
- if (lpo->letter == NULL) { /* HMM. HASN'T BEEN INITIALIZED AT ALL YET */
- initialize_seqs_as_lpo(1,lpo,m);
- return;
- }
- if (lpo->letter[0].letter < m->nsymbol)
- return; /* LOOKS LIKE IT'S ALREADY TRANSLATED TO INDEXES */
- LOOP (i,lpo->length) /* READ FROM FILE, MAY NOT BE TRANSLATED YET */
- index_symbols(1,&lpo->letter[i].letter,&lpo->letter[i].letter,
- m->nsymbol,m->symbol);
-}
-
-
-
-
-
-/** finds ipos for the designated sequence in the designated letter,
- or INVALID_LETTER_POSITION if iseq is not found. */
-int find_letter_source(LPOLetter_T *letter,int iseq)
-{
- LPOLetterSource_T *source;
- for (source= &letter->source;source;source=source->more)/*SCAN SOURCES*/
- if (source->iseq == iseq) /*MATCH! */
- return source->ipos; /* RETURN ITS SEQUENCE POSITION */
- return INVALID_LETTER_POSITION;
-}
-
-
-
-
-
-/**@memo finds links to iseq, starting from letter[ipos] and proceeding in the
- specified direction up to max_step links. Optionally, the search
- can be constrained to only match node iseq:match_ipos.
- If iseq is found, the number
- of links connecting its letter to letter[ipos] is reported. Otherwise
- it returns max_step+1 to indicate iseq was not found within the given
- distance. If p_ipos or p_letter_pos are non-NULL, it will return
- the ipos of the position found in iseq, or the index of the found
- letter[], respectively. */
-int find_sequence_link(int iseq,
- int match_ipos,
- int start_pos,
- LPOLetter_T letter[],
- int max_step,
- int please_go_right,
- int *p_ipos,
- int *p_letter_pos)
-{
- int nstep,ipos;
- LPOLetterLink_T *link,*link0;
-
- if (please_go_right) /* CHOOSE THE DESIRED DIRECTION */
- link0= &letter[start_pos].right;
- else
- link0= &letter[start_pos].left;
-
- for (link=link0;link && link->ipos>=0;link=link->more) /*SCAN ALL LINKS*/
- if ((ipos=find_letter_source(letter+link->ipos,iseq))>=0
- && (match_ipos<0 /* NO CONSTRAINT ON match_ipos */
- || match_ipos==ipos)) { /* POSITION MATCHES */
- if (p_ipos) /* HAND BACK THE SEQUENCE ipos TO CALLER */
- *p_ipos = ipos;
- if (p_letter_pos) /* HAND BACK THE letter[] INDEX TO CALLER */
- *p_letter_pos = link->ipos;
- return 1; /* FOUND SEQUENCE ONLY ONE LINK AWAY FROM HERE! */
- }
-
- if (max_step>1) { /* OK TO TRY ANOTHER LAYER OF RECURSION */
- for (link=link0;link && link->ipos>=0;link=link->more) { /*SCAN ALL LINKS*/
- nstep=find_sequence_link(iseq,match_ipos,link->ipos,letter,max_step-1,
- please_go_right,p_ipos,p_letter_pos);
- if (nstep<max_step) /* FOUND IT! SO RETURN LINK COUNT */
- return nstep+1;
- }
- }
-
- return max_step+1; /* INDICATE iseq NOT FOUND IN THE SPECIFIED RANGE */
-}
-
-
-
-/** constructs a mapping from [iseq,ipos] -> iletter and back. This is
- saved as an index seq_to_po[ipos]==>iletter and
- po_to_seq[iletter]==>ipos .
- At the same time, it also constructs the actual sequence of the
- individual source sequences from the data stored in the LPO. */
-void build_seq_to_po_index(LPOSequence_T *seq)
-{
- int i,j;
- LPOLetterSource_T *source;
-
- LOOP (i,seq->nsource_seq) { /* DUMP EXISTING INDEX, ALLOC NEW*/
- FREE(seq->source_seq[i].seq_to_po);
- FREE(seq->source_seq[i].po_to_seq);
- FREE(seq->source_seq[i].sequence);
- CALLOC(seq->source_seq[i].seq_to_po,seq->source_seq[i].length,int);
- CALLOC(seq->source_seq[i].po_to_seq,seq->length,int);
- CALLOC(seq->source_seq[i].sequence,seq->source_seq[i].length+1,char);
- LOOP (j,seq->length) /*DEFAULT: PO LETTERS DON'T MAP TO ANY LETTER IN SEQ*/
- seq->source_seq[i].po_to_seq[j]= INVALID_LETTER_POSITION;
- }
-
- LOOP (i,seq->length) { /* MAP EVERY LETTER ONTO SOURCE INDEXES */
- for (source= &seq->letter[i].source;source;source=source->more) {
- seq->source_seq[source->iseq].seq_to_po[source->ipos]=i;/*INVERSE*/
- seq->source_seq[source->iseq].po_to_seq[i]=source->ipos;/*MAPPING*/
- seq->source_seq[source->iseq].sequence[source->ipos]=seq->letter[i].letter;
- }
- }
-}
-
-
-
-
-/** CREATES A NEW SOURCEINFO ENTRY ON seq->source_seq[], SAVING THE
- FIELDS PASSED BY THE CALLER */
-int save_lpo_source(LPOSequence_T *seq,
- char name[],
- char title[],
- int length,
- int weight,
- int bundle_id,
- int ndata,
- LPONumericData_T data[])
-{
- int i,j;
-
- REBUFF(seq->source_seq,seq->nsource_seq,SOURCE_SEQ_BUFFER_CHUNK,
- LPOSourceInfo_T);
- i=seq->nsource_seq;
- seq->source_seq[i].title=strdup(title? title:"untitled");
- seq->source_seq[i].length=length;
- seq->source_seq[i].weight=weight; /* DEFAULT WEIGHTING */
- seq->source_seq[i].bundle_id= bundle_id;
- STRNCPY(seq->source_seq[i].name,name,SEQUENCE_NAME_MAX);
-
- LOOPF(j,ndata) /* SAVE NUMERIC DATA FOR THIS SOURCE */
- cp_numeric_data(seq->source_seq+i,data+j);
-
- return seq->nsource_seq++; /* INCREMENT SOURCE SEQUENCE LIST COUNT */
-}
-
-
-
-/** SAVE source_seq[] ENTRIES FROM ONE LPO TO ANOTHER */
-int *save_lpo_source_list(LPOSequence_T *seq,
- int nsource_seq,
- LPOSourceInfo_T source_seq[])
-{
- int i,*list=NULL;
-
- CALLOC(list,nsource_seq,int);
- LOOPF (i,nsource_seq)
- list[i]=save_lpo_source(seq,source_seq[i].name,source_seq[i].title,
- source_seq[i].length,source_seq[i].weight,
- source_seq[i].bundle_id,
- source_seq[i].ndata,source_seq[i].data);
-
- return list; /*HAND BACK LIST OF NEW INDICES ASSIGNED TO THESE source_seq*/
-}
-
-
-
-/** ADDS A LINK TO ipos TO THE LINKED LIST STORED IN list,
- ALLOCATING A NEW LPOLetterLink IF NEEDED */
-LPOLetterLink_T *add_lpo_link(LPOLetterLink_T *list,LPOLetterRef_T ipos)
-{
- if (list->ipos <0) { /* FIRST ENTRY IS EMPTY, SO USE IT */
- list->ipos = ipos;
- return list;/*RETURNS PTR TO LINK IN WHICH ipos STORED */
- }
- do { /* SCAN LIST CHECKING IF ALREADY STORED */
- if (list->ipos == ipos) /* ALREADY STORED, NO NEED TO STORE AGAIN */
- return list;/*RETURNS PTR TO LINK IN WHICH ipos STORED */
- } while (list->more? (list=list->more):0);
-
- CALLOC(list->more,1,LPOLetterLink_T); /* ADD ENTRY TO LINKED LIST */
- list->more->ipos=ipos; /* SAVE THE LETTER REFERENCE */
- return list->more;/*RETURNS PTR TO LINK IN WHICH ipos STORED */
-}
-
-
-
-
-void add_lpo_sources(LPOLetterSource_T *new_s,LPOLetterSource_T *old_s,
- int iseq_new[])/*TRANSLATION TO NEW source_seq[] INDEX*/
-{
- for (;new_s->more;new_s=new_s->more);/* GO TO END*/
- for (;old_s;old_s=old_s->more) {/*SAVE SOURCES*/
- if (new_s->ipos>=0) { /* ALREADY A SOURCE HERE, SO CREATE NEW ENTRY */
- CALLOC(new_s->more,1,LPOLetterSource_T);
- new_s=new_s->more;
- }
- new_s->iseq=iseq_new[old_s->iseq]; /* SAVE SEQUENCE ID, POSITION */
- new_s->ipos=old_s->ipos;
- }
-}
-
-
-void reindex_lpo_source_seqs (LPOSequence_T *seq, int *perm)
-{
- int i, j, len, nseq, *map, *invmap;
- LPOSourceInfo_T tmp;
- LPOLetterSource_T *src;
-
- len = seq->length;
- nseq = seq->nsource_seq;
-
- CALLOC (map, len, int);
- CALLOC (invmap, len, int);
-
- /* BUILD INITIAL MAP AND INVERSE MAP: */
- for (i=0; i<nseq; i++) {
- map[i] = invmap[i] = -1;
- }
- for (i=0; i<nseq; i++) {
- map[i] = perm[i];
- IF_GUARD (map[i]>=nseq || map[i]<0 || invmap[map[i]]!=-1, 1.1, (ERRTXT,"Bad argument! 'perm' must be a permutation of [0,%d]\n",nseq-1), CRASH);
- invmap[map[i]] = i;
- }
-
- /* RENUMBER SEQS IN 'source_info' ENTRIES FOR ALL LETTERS: */
- for (i=0; i<len; i++) {
- for (src=&(seq->letter[i].source); src!=NULL && src->ipos>=0; src=src->more) {
- src->iseq = map[src->iseq];
- }
- }
-
- /* SHUFFLE 'source_seq' ENTRIES, IN-PLACE, TO NEW ORDER: */
- /* (THIS DESTROYS map AND invmap.) */
- for (i=0; i<nseq; i++) if ((j = invmap[i]) != i) {
- /* SWAP POSITIONS i AND j: */
- tmp = seq->source_seq[i];
- seq->source_seq[i] = seq->source_seq[j];
- seq->source_seq[j] = tmp;
- /* UPDATE map AND invmap: */
- map[j] = map[i];
- invmap[map[i]] = j;
- map[i] = invmap[i] = i;
- }
-
- FREE (map);
- FREE (invmap);
-}
-
-
-void copy_lpo_letter(LPOLetter_T *new,LPOLetter_T *old,
- LPOLetterRef_T old_to_new[],
- int iseq_new[])
-{
- LPOLetterLink_T *link;
- new->letter=old->letter; /* SAVE ITS SEQUENCE LETTER */
- add_lpo_sources(&new->source,&old->source,iseq_new); /* SAVE SOURCES */
- for (link= &old->left;link && link->ipos>=0;link=link->more) /*SAVE left*/
- add_lpo_link(&new->left,old_to_new[link->ipos]);
- for (link= &old->right;link && link->ipos>=0;link=link->more)/*SAVE right*/
- add_lpo_link(&new->right,old_to_new[link->ipos]);
- return;
-}
-
-
-
- /** FUSES RING a AND RING b BY CROSSLINK OPERATION */
-void crosslink_rings(LPOLetterRef_T a,LPOLetterRef_T b,LPOLetter_T seq[])
-{
- LPOLetterRef_T align_ring;
-
- if (seq[a].ring_id==seq[b].ring_id) /* ALREADY ON SAME RING, DO NOTHING! */
- return;
- else if (seq[a].ring_id<seq[b].ring_id) { /* a<b SO RESET b.ring_id */
- align_ring=b; /* TRAVERSE RING b */
- do seq[align_ring].ring_id=seq[a].ring_id; /* ENFORCE a.ring_id == b.ring_id*/
- while ((align_ring=seq[align_ring].align_ring) != b);
- }
- else { /* a>b SO RESET a.ring_id */
- align_ring=a; /* TRAVERSE RING a */
- do seq[align_ring].ring_id=seq[b].ring_id; /* ENFORCE a.ring_id == b.ring_id*/
- while ((align_ring=seq[align_ring].align_ring) != a);
- }
-
- align_ring=seq[a].align_ring; /* JUST LIKE A SWAP */
- seq[a].align_ring=seq[b].align_ring; /* CROSSLINK THE TWO RINGS */
- seq[b].align_ring=align_ring;
- return;
-}
-
-
-
-
-void copy_old_ring_to_new(LPOLetterRef_T start, /* START OF RING TO COPY*/
- LPOLetter_T old_lpo[],
- LPOLetter_T new_lpo[],
- LPOLetterRef_T old_to_new[]) /* MAPPING */
-{
- LPOLetterRef_T ipos,next_pos;
- for (ipos=start;(next_pos=old_lpo[ipos].align_ring) != start;ipos=next_pos)
- crosslink_rings(old_to_new[ipos],old_to_new[next_pos],new_lpo);
-}
-
-
-
-/* TEMPORARY: THESE CONSTANTS CONTROL SEGMENT FUSION
- TESTING THE FOLLOWING NUMBERS ON DNA ASSEMBLY*/
-#define FISSION_BREAK 5 /* LENGTH OF MISMATCH THAT SPLITS INTO SEGMENTS */
-#define MINIMUM_FUSION 10 /* MINIMUM #IDENTITIES FOR SEGMENT TO FUSE */
-#define FUSION_PERCENT 0.8 /* MINIMUM OVERALL IDENTITY FOR SEGMENT TO FUSE*/
-
-void mark_fusion_segments(int len_x,LPOLetter_T seq_x[],
- int len_y,LPOLetter_T seq_y[],
- LPOLetterRef_T y_to_x[],
- int fission_break,
- int minimum_fusion_length,
- float minimum_fusion_identity,
- char do_fuse[])
-{
- int i,i_x,i_y,mismatch_length=0,identity_count=0,fission_break_point= -1;
-
- LOOP (i_y,len_y) {
- if ((i_x=y_to_x[i_y])>=0 && seq_x[i_x].letter==seq_y[i_y].letter)
- do_fuse[i_y]=1;
- }
-#ifdef SOURCE_EXCLUDED
- LOOPF (i_y,len_y) {
- if ((i_x=y_to_x[i_y])<0 /* NOT ALIGNED AT ALL */
- || seq_x[i_x].letter!=seq_y[i_y].letter /* MISMATCH */
- || i_y==len_y-1) { /* END OF SEQ, MUST CHECK LAST SEGMENT! */
- if (++mismatch_length>=fission_break /* BREAK POINT */
- || i_y==len_y-1) { /* END OF SEQ, MUST CHECK LAST SEGMENT! */
- if (identity_count>=minimum_fusion_length /* END OF A FUSION SEGMENT?*/
- && minimum_fusion_identity
- *(i_y-mismatch_length-fission_break_point)<= identity_count) {
- /* YES, MARK PRECEEDING SEGMENT FOR FUSION*/
- for (i=i_y-fission_break;i>fission_break_point;i--)/*MARK SEGMENT!*/
- if ((i_x=y_to_x[i])>=0 && seq_x[i_x].letter==seq_y[i].letter)
- do_fuse[i]=1; /* IDENTITY! MARK THIS POSITION TO BE FUSED */
- }
- fission_break_point=i_y; /* NO FUSION SEGMENT CAN EXTEND PAST HERE */
- identity_count=0; /* RESET IDENTITY COUNTER FOR STARTING NEXT SEGMENT*/
- }
- }
- else { /* PERFECT IDENTITY */
- mismatch_length=0;
- identity_count++;
- }
- }
-#endif
-}
-
-
-
-
-int reindex_lpo_fusion(int len_x,LPOLetter_T seq_x[],
- int len_y,LPOLetter_T seq_y[],
- LPOLetterRef_T x_to_y[],
- LPOLetterRef_T y_to_x[],
- LPOLetterRef_T new_x[],
- LPOLetterRef_T new_y[],
- int fission_break,
- int minimum_fusion_length,
- float minimum_fusion_identity)
-{
- int new_len=0;
- LPOLetterRef_T i_x,i_y,i_ring,end_of_ring= -1;
- char *do_fuse=NULL;
-
- CALLOC(do_fuse,len_y,char); /* MARK POSITIONS TO FUSE seq_y TO seq_x */
- mark_fusion_segments(len_x,seq_x,len_y,seq_y,y_to_x,fission_break,
- minimum_fusion_length,minimum_fusion_identity,do_fuse);
-
- for (i_x=i_y=new_len=0;i_x<len_x;i_x++) { /* BUILD new_x, new_y */
- for (i_ring=i_x;i_ring<len_x && seq_x[i_ring].ring_id==seq_x[i_x].ring_id;
- i_ring++) /* SCAN X RING, AND SEE IF ANYTHING ALIGNS TO Y */
- if (x_to_y[i_ring]>=0) { /* IF SO, INSERT Y NOW TO KEEP X RING TOGETHER*/
- while (i_y<x_to_y[i_ring]) /* CONCATENATE LBEFORE ALIGNED LETTER */
- new_y[i_y++]=new_len++; /* ADD TO new_lpo */
- break;
- }
-
- if (x_to_y[i_x]>=0 /* ALIGNED, SO FIRST INSERT PRECEEDING FROM seq_y */
- && i_y<len_y) {
- for (i_ring=seq_y[i_y].align_ring;i_ring!=i_y; /* CIRCLE THE RING*/
- i_ring=seq_y[i_ring].align_ring) /*FIND END OF i_y ALIGNMENT RING*/
- if (i_ring>end_of_ring) /* FIND MAXIMUM INDEX ON THIS RING */
- end_of_ring=i_ring; /* RING GUARANTEED TO BE ONE CONTIGUOUS BLOCK*/
-
- if (do_fuse[i_y]) /* THIS POSITION MEETS OUR FUSION CRITERIA, SO FUSE*/
- new_y[i_y++]=new_len; /* USE SAME INDEX AS WILL BE USED FOR i_x */
- else /* NOT IDENTICAL, SO GIVE IT ITS OWN LETTER */
- new_y[i_y++]=new_len++; /* ADD TO new_lpo */
- }
- new_x[i_x]=new_len++; /* ADD TO new_lpo */
-
- while (i_y<=end_of_ring) /* CONCATENATE LETTERS ALIGNED TO i_y */
- new_y[i_y++]=new_len++; /*KEEP ALIGNED LETTERS AS ONE CONTIGUOUS BLOCK!*/
- }
- while (i_y<len_y) { /* CONCATENATE TAIL OF seq_y ONTO new_lpo */
- new_y[i_y++]=new_len++; /* ADD TO new_lpo */
- }
-
- FREE(do_fuse);
- return new_len;
-}
-
-
-void remap_x_to_new(int nremap_x,/*TRANSLATE INDICES IN remap_x USING new_x*/
- LPOLetterRef_T remap_x[],/* ARRAY OF INDICES TO TRANSLATE*/
- int len_x,
- LPOLetterRef_T new_x[]) /* MAPPING FROM OLD -> NEW */
-{
- int i;
-
- LOOP (i,nremap_x) {
- if (remap_x[i]>=0 && remap_x[i]<len_x) /* KEEP IN BOUNDS */
- remap_x[i]=new_x[remap_x[i]];
- else /* BOOM!! OUT OF RANGE!! */
- abort(); /* DELIBERATELY CRASH TO TRAP THE ERROR! */
- }
-}
-
-
-
-LPOSequence_T *copy_fuse_lpo_remap(LPOSequence_T *holder_x, /*FUSES TWO LPOs*/
- LPOSequence_T *holder_y, /*MAKES *NEW* HOLDER */
- LPOLetterRef_T x_to_y[],
- LPOLetterRef_T y_to_x[],
- int nremap_x,
- LPOLetterRef_T remap_x[])
-{
- int i,new_len,*iseq_new=NULL,len_x,len_y;
- LPOLetterRef_T i_x,i_y,*new_x=NULL,*new_y=NULL;
- LPOLetter_T *new_lpo=NULL,*seq_x,*seq_y;
- LPOSequence_T *new_seq=NULL;
-
- len_x=holder_x->length; /* GET letter ARRAY FROM BOTH x AND y */
- seq_x=holder_x->letter;
- len_y=holder_y->length;
- seq_y=holder_y->letter;
-
- CALLOC(new_seq,1,LPOSequence_T); /* CREATE A NEW HOLDER */
- CALLOC(new_x,len_x,LPOLetterRef_T);
- CALLOC(new_y,len_y,LPOLetterRef_T);
- new_len=reindex_lpo_fusion(len_x,seq_x,len_y,seq_y,x_to_y,y_to_x,new_x,new_y,
- FISSION_BREAK,MINIMUM_FUSION,FUSION_PERCENT);
- CALLOC(new_lpo,new_len,LPOLetter_T); /* ALLOCATE NEW LINEARIZED PO */
- LOOP (i,new_len) { /* INITIALIZE ALL LINKS TO INVALID */
- new_lpo[i].left.ipos=new_lpo[i].right.ipos=new_lpo[i].source.ipos
- = INVALID_LETTER_POSITION;
- new_lpo[i].align_ring=new_lpo[i].ring_id=i; /* POINT TO SELF */
- }
- new_seq->length=new_len; /* SAVE NEW LPO ARRAY IN NEW HOLDER */
- new_seq->letter=new_lpo;
-
- iseq_new=save_lpo_source_list(new_seq,holder_x->nsource_seq,/*COPY x SOURCE*/
- holder_x->source_seq);
- LOOP (i_x,len_x) /* COPY LETTER DATA TO CORRESPONDING LETTERS OF NEW LPO */
- copy_lpo_letter(new_lpo+new_x[i_x],seq_x+i_x,new_x,iseq_new);
- FREE(iseq_new);
- iseq_new=save_lpo_source_list(new_seq,holder_y->nsource_seq,/*COPY y SOURCE*/
- holder_y->source_seq);
- LOOP (i_y,len_y)
- copy_lpo_letter(new_lpo+new_y[i_y],seq_y+i_y,new_y,iseq_new);
- FREE(iseq_new);
-
- LOOP (i_x,len_x) /* COPY OLD ALIGNMENT RINGS TO THE NEW LPO */
- copy_old_ring_to_new(i_x,seq_x,new_lpo,new_x);
- LOOP (i_y,len_y)
- copy_old_ring_to_new(i_y,seq_y,new_lpo,new_y);
-
- LOOP (i_x,len_x) /* SAVE ALIGNMENT OF seq_x AND seq_y TO new_lpo.align_ring*/
- if (x_to_y[i_x]>=0) /* seq_x[i_x] ALIGNED TO seq_y, SO SAVE! */
- crosslink_rings(new_x[i_x],new_y[x_to_y[i_x]],new_lpo);
-
- if (remap_x) /* CONVERT OLD INDEX TABLE TO NEW REFERENCE SYSTEM */
- remap_x_to_new(nremap_x,remap_x,len_x,new_x);
- FREE(new_x); /* DUMP SCRATCH MEMORY AND RETURN */
- FREE(new_y);
- return new_seq; /* HAND BACK THE NEW LPO CONTAINING THE FUSION */
-}
-
-
-/** fuses the two partial orders holder_x and holder_y, based upon the
- letter_x <--> letter_y mapping specified by x_to_y and y_to_x (which
- must be consistent!) A new LPO is created to store the result;
- neither holder_x or holder_y are changed */
-LPOSequence_T *copy_fuse_lpo(LPOSequence_T *holder_x, /*WRAPPER: NO REMAPPING*/
- LPOSequence_T *holder_y,
- LPOLetterRef_T x_to_y[],
- LPOLetterRef_T y_to_x[])
-{
- return copy_fuse_lpo_remap(holder_x,holder_y,x_to_y,y_to_x,0,NULL);
-}
-
-
-
-
-LPOSequence_T *copy_lpo(LPOSequence_T *holder_x)
-{
- int i;
- LPOSequence_T dummy,*new_copy;
- LPOLetterRef_T *x_to_y=NULL;
-
- memset(&dummy,0,sizeof(dummy)); /* BLANK ALL FIELDS */
- CALLOC(x_to_y,holder_x->length,LPOLetterRef_T);
- LOOP (i,holder_x->length)
- x_to_y[i]= INVALID_LETTER_POSITION;
-
- new_copy=copy_fuse_lpo(holder_x,&dummy,x_to_y,x_to_y);
- FREE(x_to_y);
- return new_copy;
-}
-
-
-
-
-
-void translate_lpo(int old_len,LPOLetterRef_T old_to_new[],
- LPOLetter_T seq[])
-{
- int i,block_end;
- LPOLetterLink_T *link;
- block_end=old_len;
- LOOPB (i,old_len) { /* TRANSLATE AND SHIFT THE WHOLE LPO */
- seq[i].align_ring=old_to_new[seq[i].align_ring]; /* TRANSLATE INDICES*/
- seq[i].ring_id=old_to_new[seq[i].ring_id];
- for (link= &seq[i].left;link && link->ipos>=0;link=link->more)
- link->ipos = old_to_new[link->ipos];
- for (link= &seq[i].right;link && link->ipos>=0;link=link->more)
- link->ipos = old_to_new[link->ipos];
-
- if ((0==i || /* HIT SEQ START, SO COPY THE BLOCK NOW!*/
- old_to_new[i] != old_to_new[i-1]+1) /*BLOCK BOUNDARY*/
- && old_to_new[i] != i) { /* ACTUALLY REQUIRES A SHIFT */
- memmove(seq+old_to_new[i],seq+i,(block_end-i)*sizeof(LPOLetter_T));
- block_end=i; /* RESET TO POINT TO END OF NEXT BLOCK TO COPY */
- } /* BLOCK NOW SHIFTED TO ITS NEW LOCATION */
- }
-}
-
-
-
-LPOSequence_T *fuse_lpo_remap(LPOSequence_T *holder_x,
- LPOSequence_T *holder_y,
- LPOLetterRef_T x_to_y[],
- LPOLetterRef_T y_to_x[],
- int nremap_x,
- LPOLetterRef_T remap_x[])
-{
- int i,new_len,*iseq_new=NULL,len_x,len_y;
- LPOLetterRef_T i_x,i_y,*new_x=NULL,*new_y=NULL;
- LPOLetter_T *new_lpo=NULL,*seq_x,*seq_y;
-
- len_x=holder_x->length; /* GET letter ARRAY FROM BOTH x AND y */
- seq_x=holder_x->letter;
- len_y=holder_y->length;
- seq_y=holder_y->letter;
-
- CALLOC(new_x,len_x,LPOLetterRef_T);
- CALLOC(new_y,len_y,LPOLetterRef_T);
- new_len=reindex_lpo_fusion(len_x,seq_x,len_y,seq_y,x_to_y,y_to_x,new_x,new_y,
- FISSION_BREAK,MINIMUM_FUSION,FUSION_PERCENT);
- REALLOC(seq_x,new_len,LPOLetter_T); /* EXPAND seq_x TO HOLD FUSED LPO */
- translate_lpo(len_x,new_x,seq_x); /* SHIFT ALL THE LETTERS TO NEW LOCATIONS*/
- new_lpo=seq_x; /* THE EXPANDED VERSION OF seq_x IS OUR NEW LPO */
- holder_x->length=new_len; /* SAVE NEW LPO ARRAY IN NEW HOLDER */
- holder_x->letter=new_lpo;
-
- LOOP (i_y,len_y) { /* INITIALIZE ALL LETTERS FOR STORING seq_y TO BLANK */
- if (y_to_x[i_y]>=0 && new_x[y_to_x[i_y]]==new_y[i_y])/*i_y FUSED TO i_x*/
- continue; /* THIS LETTER IS ALREADY PART OF seq_x SO DON'T OVERWRITE!!*/
- i=new_y[i_y]; /* TRANSLATE TO NEW INDEXING */
- memset(new_lpo+i,0,sizeof(LPOLetter_T)); /* NULL INITIALIZE IT! */
- new_lpo[i].left.ipos=new_lpo[i].right.ipos=new_lpo[i].source.ipos
- = INVALID_LETTER_POSITION; /* RESET TO UNLINKED STATE */
- new_lpo[i].align_ring=new_lpo[i].ring_id=i; /* POINT TO SELF */
- }
-
- iseq_new=save_lpo_source_list(holder_x,holder_y->nsource_seq,/*COPY y SRC*/
- holder_y->source_seq);
- LOOP (i_y,len_y) /* COPY LETTER DATA TO CORRESPONDING LETTERS OF NEW LPO */
- copy_lpo_letter(new_lpo+new_y[i_y],seq_y+i_y,new_y,iseq_new);
- FREE(iseq_new);
-
- LOOP (i_y,len_y) /* COPY OLD ALIGNMENT RINGS TO THE NEW LPO */
- copy_old_ring_to_new(i_y,seq_y,new_lpo,new_y);
-
- LOOP (i_x,len_x) /* SAVE ALIGNMENT OF seq_x AND seq_y TO new_lpo.align_ring*/
- if (x_to_y[i_x]>=0) /* seq_x[i_x] ALIGNED TO seq_y, SO SAVE! */
- crosslink_rings(new_x[i_x],new_y[x_to_y[i_x]],new_lpo);
-
- if (remap_x) /* CONVERT OLD INDEX TABLE TO NEW REFERENCE SYSTEM */
- remap_x_to_new(nremap_x,remap_x,len_x,new_x);
- FREE(new_x); /* DUMP SCRATCH MEMORY AND RETURN */
- FREE(new_y);
- return holder_x; /* HAND BACK x LPO CONTAINING THE FUSION */
-}
-
-
-/** fuses the two partial orders holder_x and holder_y, based upon the
- letter_x <--> letter_y mapping specified by x_to_y and y_to_x (which
- must be consistent!) The result is returned in holder_x */
-LPOSequence_T *fuse_lpo(LPOSequence_T *holder_x, /*WRAPPER: NO REMAPPING*/
- LPOSequence_T *holder_y,
- LPOLetterRef_T x_to_y[],
- LPOLetterRef_T y_to_x[])
-{
- return fuse_lpo_remap(holder_x,holder_y,x_to_y,y_to_x,0,NULL);
-}
-
-
-
-/** FREES the linked list link including all nodes beneath it; NB: link
- itself is freed, so DO NOT pass a static LPOLetterLink */
-void free_lpo_link_list(LPOLetterLink_T *link)
-{
- LPOLetterLink_T *next;
- for (;link;link=next) { /* DUMP ALL THE LINKS */
- next=link->more;
- free(link);
- }
-}
-
-/** FREES the linked list source including all nodes beneath it; NB: source
- itself is freed, so DO NOT pass a static LPOLetterSource */
-void free_lpo_source_list(LPOLetterSource_T *source)
-{
- LPOLetterSource_T *next;
- for (;source;source=next) { /* DUMP ALL THE SOURCE ENTRIES */
- next=source->more;
- free(source);
- }
-}
-
-
-/** FREES ALL DATA ASSOCIATED WITH letter[], AND OPTIONALLY letter ITSELF*/
-void free_lpo_letters(int nletter,LPOLetter_T *letter,int please_free_block)
-{
- int i;
- if (!letter) /* NOTHING TO FREE... */
- return;
- LOOP (i,nletter) { /* DUMP ALL LINKED LISTS */
- if (letter[i].left.more)
- free_lpo_link_list(letter[i].left.more);
- if (letter[i].right.more)
- free_lpo_link_list(letter[i].right.more);
- if (letter[i].source.more)
- free_lpo_source_list(letter[i].source.more);
- }
- if (please_free_block) /*DON'T ALWAYS WANT TO FREE... MIGHT BE IN AN ARRAY*/
- free(letter);
-}
-
-
-
-
-void free_lpo_sourceinfo(int nsource_seq,LPOSourceInfo_T *source_seq,
- int please_free_block)
-{
- int i;
- LOOP (i,nsource_seq) {
- FREE(source_seq[i].title);
- FREE(source_seq[i].sequence);
- FREE(source_seq[i].seq_to_po);
- FREE(source_seq[i].po_to_seq);
- free_lpo_numeric_data(source_seq[i].ndata,source_seq[i].data,TRUE);
- source_seq[i].data=NULL; /* DON'T LEAVE DANGLING POINTER! */
- }
- if (please_free_block)
- free(source_seq);
-}
-
-
-
-
-/** FREES ALL DATA FROM seq, AND OPTIONALLY seq ITSELF */
-void free_lpo_sequence(LPOSequence_T *seq,int please_free_holder)
-{
- int i;
- if (!seq) /* NOTHING TO FREE... */
- return;
- free_lpo_letters(seq->length,seq->letter,TRUE);
- seq->letter=NULL; /* MARK AS FREED... DON'T LEAVE DANGLING POINTER! */
- FREE(seq->title);
- FREE(seq->sequence);
- if (seq->source_seq) {
- free_lpo_sourceinfo(seq->nsource_seq,seq->source_seq,TRUE);
- seq->source_seq=NULL; /* MARK AS FREED... DON'T LEAVE DANGLING POINTER! */
- }
- if (please_free_holder) /*DON'T ALWAYS WANT TO FREE... MIGHT BE IN AN ARRAY*/
- free(seq);
-}
-
-/**@memo {\bfEXAMPLE}: dump the LPO dna_lpo, and all its associated data: \begin{verbatim}
- if (dna_lpo)
- free_lpo_sequence(dna_lpo,TRUE);
-\end{verbatim}
-*/
-
-
-/** creates a new sequence which follows path[] through the LPO seq,
- and gives it the specified name and title */
-int add_path_sequence(int path_length,
- LPOLetterRef_T path[],
- LPOSequence_T *seq,
- char name[],
- char title[])
-{
- int i,iseq_new;
- LPOSourceInfo_T *new_seq;
- LPOLetterSource_T save_source={0,0,NULL};
-
- /* CREATE SOURCE ENTRY FOR THIS SEQUENCE */
- iseq_new=save_lpo_source(seq,name,title,path_length,0,NO_BUNDLE,0,NULL);
-
- LOOP (i,path_length) { /* ADD THIS AS SOURCE TO ALL POSITIONS IN path */
- save_source.ipos=i;
- add_lpo_sources(&seq->letter[path[i]].source,&save_source,&iseq_new);
- } /* NB: THIS DOESN'T CHECK THAT path IS A VALID WALK THRU THE PARTIAL ORDER
- MIGHT BE A GOOD IDEA TO CATCH POSSIBLE ERRORS IN path */
- return iseq_new; /* RETURN INDEX OF NEWLY CREATED ENTRY */
-}
-
-/**@memo EXAMPLE: create a consensus sequence from a path:
- iseq=add_path_sequence(path_length,path,seq,name,title);
--------------------------------------------------------
--------------------------------------------------
-*/
-
Deleted: trunk/packages/poa/trunk/lpo.h
===================================================================
--- trunk/packages/poa/trunk/lpo.h 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/lpo.h 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,186 +0,0 @@
-
-
-#ifndef LPO_HEADER_INCLUDED
-#define LPO_HEADER_INCLUDED
-
-#include <default.h>
-#include <poa.h>
-#include <seq_util.h>
-
-/*********************************************************** lpo.c */
-void lpo_init(LPOSequence_T *seq);
-
-void initialize_seqs_as_lpo(int nseq, Sequence_T seq[],ResidueScoreMatrix_T *m);
-void lpo_index_symbols(LPOSequence_T *lpo,ResidueScoreMatrix_T *m);
-
-/** reindex source sequences in `seq' so that the sequence in position i
- ends up in position perm[i]. `perm' must be a permutation of the
- integers [0,seq->nsource_seq-1]. */
-void reindex_lpo_source_seqs (LPOSequence_T *seq, int *perm);
-
-int save_lpo_source(LPOSequence_T *seq,
- char name[],
- char title[],
- int length,
- int weight,
- int bundle_id,
- int ndata,
- LPONumericData_T data[]);
-
-int *save_lpo_source_list(LPOSequence_T *seq,
- int nsource_seq,
- LPOSourceInfo_T source_seq[]);
-
-LPOLetterLink_T *add_lpo_link(LPOLetterLink_T *list,LPOLetterRef_T ipos);
-
-void add_lpo_sources(LPOLetterSource_T *new_s,LPOLetterSource_T *old_s,
- int iseq_new[]);
-
-void crosslink_rings(LPOLetterRef_T a,LPOLetterRef_T b,LPOLetter_T seq[]);
-
-LPOSequence_T *copy_fuse_lpo(LPOSequence_T *holder_x,
- LPOSequence_T *holder_y,
- LPOLetterRef_T x_to_y[],
- LPOLetterRef_T y_to_x[]);
-
-LPOSequence_T *copy_lpo(LPOSequence_T *holder_x);
-
-LPOSequence_T *fuse_lpo(LPOSequence_T *holder_x,
- LPOSequence_T *holder_y,
- LPOLetterRef_T x_to_y[],
- LPOLetterRef_T y_to_x[]);
-
-void free_lpo_letters(int nletter,LPOLetter_T *letter,int please_free_block);
-
-void free_lpo_sequence(LPOSequence_T *seq,int please_free_holder);
-
-int add_path_sequence(int path_length,
- LPOLetterRef_T path[],
- LPOSequence_T *seq,
- char name[],
- char title[]);
-
-
-/************************************************** FROM align_lpo.c */
-int align_lpo(LPOSequence_T *lposeq_x,
- LPOSequence_T *lposeq_y,
- ResidueScoreMatrix_T *m,
- LPOLetterRef_T **x_to_y,
- LPOLetterRef_T **y_to_x,
- int use_global_alignment);
-
-
-
-/************************************************** FROM align_lpo_po.c */
-LPOScore_T align_lpo_po(LPOSequence_T *lposeq_x,
- LPOSequence_T *lposeq_y,
- ResidueScoreMatrix_T *m,
- LPOLetterRef_T **x_to_y,
- LPOLetterRef_T **y_to_x,
- LPOScore_T (*scoring_function)
- (int,int,LPOLetter_T [],LPOLetter_T [],
- ResidueScoreMatrix_T *),
- int use_global_alignment);
-
-
-/************************************************** FROM buildup_lpo.c */
-LPOSequence_T *buildup_lpo(LPOSequence_T *new_seq,
- int nseq,LPOSequence_T seq[],
- ResidueScoreMatrix_T *score_matrix,
- int use_aggressive_fusion,
- int use_global_alignment);
-
-LPOSequence_T *buildup_clipped_lpo(LPOSequence_T *new_seq,
- int nseq,LPOSequence_T seq[],
- ResidueScoreMatrix_T *score_matrix,
- int use_global_alignment);
-
-LPOSequence_T *buildup_progressive_lpo(int nseq, LPOSequence_T **seqs,
- ResidueScoreMatrix_T *score_matrix,
- int use_aggressive_fusion,
- int do_progressive,
- char score_file[],
- LPOScore_T (*scoring_function)
- (int,int,LPOLetter_T [],LPOLetter_T [],
- ResidueScoreMatrix_T *),
- int use_global_alignment,
- int preserve_sequence_order);
-
-LPOSequence_T *buildup_pairwise_lpo(LPOSequence_T seq1[],LPOSequence_T seq2[],
- ResidueScoreMatrix_T *score_matrix,
- int use_aggressive_fusion,
- LPOScore_T (*scoring_function)
- (int,int,LPOLetter_T [],LPOLetter_T [],
- ResidueScoreMatrix_T *),
- int use_global_alignment);
-
-/**************************************************** lpo_format.c */
-void write_lpo(FILE *ifile,LPOSequence_T *seq,
- ResidueScoreMatrix_T *score_matrix);
-
-LPOSequence_T *read_lpo(FILE *ifile);
-LPOSequence_T *read_lpo_select(FILE *ifile,FILE *select_ifile,
- int keep_all_links,int remove_listed_sequences);
-
-void write_lpo_as_fasta(FILE *ifile,LPOSequence_T *seq,
- int nsymbol,char symbol[]);
-
-void write_lpo_bundle_as_fasta(FILE *ifile,LPOSequence_T *seq,
- int nsymbol,char symbol[],int ibundle);
-void export_clustal_seqal(FILE *ifile,
- LPOSequence_T *seq,
- int nsymbol,char symbol[]);
-
-
-/****************************************************** heaviest_bundle.c */
-void generate_lpo_bundles(LPOSequence_T *seq,float minimum_fraction);
-
-
-/****************************************************** make_frame.c */
-LPOSequence_T *build_3_frames(char dna_seq[],char name[],char title[],
- LPOScore_T frameshift_score,
- ResidueScoreMatrix_T *m);
-LPOSequence_T *map_protein_to_dna(char dna_name[],
- LPOSequence_T *lpo_dna,
- int nseq,
- Sequence_T seq[],
- ResidueScoreMatrix_T *score_matrix,
- int use_aggressive_fusion);
-
-
-/****************************************************** remove_bundle.c */
-int remove_bundle(LPOSequence_T *seq,int ibundle,int delete_all_others);
-
-
-
-/******************************************************* numeric_data.c */
-LPONumericData_T *new_numeric_data(LPOSourceInfo_T *source_seq,
- char name[],
- char title[],
- double initial_value);
-
-LPONumericData_T *find_numeric_data(LPOSourceInfo_T *source_seq,
- char name[]);
-
-void free_lpo_numeric_data(int ndata,LPONumericData_T *data,
- int please_free_block);
-
-void new_numeric_data_sets(LPOSourceInfo_T *source_seq,
- int nset,char *set_names[],
- char source_name_fmt[],
- char target_name_fmt[],
- char title_fmt[]);
-void read_numeric_data(int nsource_seq,
- LPOSourceInfo_T source_seq[],
- FILE *ifile);
-LPONumericData_T *cp_numeric_data(LPOSourceInfo_T *source_seq,
- LPONumericData_T *data);
-
-/******************************************************* balance_matrix.c */
-int read_aa_frequencies(char filename[],ResidueScoreMatrix_T *score_matrix)
- ;
-void balance_matrix_score(int nletter,LPOLetter_T letter[],
- ResidueScoreMatrix_T *score_matrix);
-
-
-#endif
Deleted: trunk/packages/poa/trunk/lpo_format.c
===================================================================
--- trunk/packages/poa/trunk/lpo_format.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/lpo_format.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,532 +0,0 @@
-
-
-#include "default.h"
-#include "poa.h"
-#include "seq_util.h"
-#include "lpo.h"
-
-
-/** writes the LPO in seq to the stream ifile; optionally a symbol table
- may be given for translating the letters in the LPO to text */
-void write_lpo(FILE *ifile,LPOSequence_T *seq,
- ResidueScoreMatrix_T *score_matrix)
-{
- int i;
- LPOLetterLink_T *link;
- LPOLetterSource_T *source;
-
- fprintf(ifile,"VERSION=LPO.0.1\n");
- fprintf(ifile,"NAME=%s\nTITLE=%s\nLENGTH=%d\nSOURCECOUNT=%d\n",
- seq->name,seq->title,seq->length,seq->nsource_seq);
-
- LOOPF (i,seq->nsource_seq)
- fprintf(ifile,"SOURCENAME=%s\nSOURCEINFO=%d %d %d %d %s\n",
- seq->source_seq[i].name,seq->source_seq[i].length,
- seq->source_seq[i].istart,seq->source_seq[i].weight,
- seq->source_seq[i].bundle_id,seq->source_seq[i].title);
-
- LOOPF (i,seq->length) {
- fprintf(ifile,"%c:",
- seq->letter[i].letter < score_matrix->nsymbol ?
- score_matrix->symbol[seq->letter[i].letter]
- : seq->letter[i].letter);
- for (link= &seq->letter[i].left;link && link->ipos>=0;link=link->more)
- fprintf(ifile,"L%d",link->ipos);
- for (source= &seq->letter[i].source;source;source=source->more)
- fprintf(ifile,"S%d",source->iseq); /* SOURCE ID */
- if (seq->letter[i].align_ring!=i) /* ALIGNED TO SOMETHING ELSE */
- fprintf(ifile,"A%d",seq->letter[i].align_ring);
- fputc('\n',ifile);
- }
-}
-/**@memo example: writing a PO file:
- if (lpo_file_out)
- write_lpo(lpo_file_out,lpo_out,score_matrix.symbol);
-*/
-
-
-
-
-/** reads an LPO from the stream ifile, dynamically allocates memory for
-it, and returns a pointer to the LPO */
-LPOSequence_T *read_lpo(FILE *ifile)
-{
- int i,j,length,nsource_seq,istart,field_id,*pos_count=NULL,value;
- int weight,bundle_id,last_alloc=0;
- LPOSequence_T *seq=NULL;
- char c,name[1024]="",title[4096]="",version[256]="";
- LPOLetterSource_T save_source={0,0,NULL};
-
- CALLOC(seq,1,LPOSequence_T);
- fscanf(ifile,"VERSION=%s",version);
- fscanf(ifile," NAME=%[^\n]",name);
- fscanf(ifile," TITLE=%[^\n]",title);
- if (fscanf(ifile," LENGTH=%d SOURCECOUNT=%d",
- &length,&nsource_seq)!=2)
- return NULL;
- STRNCPY(seq->name,name,SEQUENCE_NAME_MAX);
- seq->title=strdup(title);
- seq->length=length;
- seq->nsource_seq=nsource_seq;
- CALLOC(seq->letter,length,LPOLetter_T);
- GETMEM(seq->source_seq,nsource_seq,last_alloc,SOURCE_SEQ_BUFFER_CHUNK,LPOSourceInfo_T);
- CALLOC(pos_count,nsource_seq,int);
- LOOP (i,length) { /* INITIALIZE ALL LINKS TO INVALID */
- seq->letter[i].align_ring=i; /* POINT TO SELF */
- seq->letter[i].ring_id= INVALID_LETTER_POSITION; /* BLANK! */
- seq->letter[i].left.ipos=seq->letter[i].right.ipos=
- seq->letter[i].source.ipos= INVALID_LETTER_POSITION;
- }
-
- LOOPF(i,nsource_seq) { /* SAVE SOURCE INFO LIST */
- if (fscanf(ifile," SOURCENAME=%[^\n] SOURCEINFO=%d %d %d %d",
- name,&length,&istart,&weight,&bundle_id)!=5)
- return NULL;
- /* SKIP WHITESPACE BEFORE TITLE; ALLOW EMPTY TITLE. */
- fscanf(ifile,"%*[ \t]");
- if (fscanf(ifile,"%[^\n]",title)!=1)
- title[0] = '\0';
- STRNCPY(seq->source_seq[i].name,name,SEQUENCE_NAME_MAX);
- seq->source_seq[i].length=length;
- seq->source_seq[i].istart=istart;
- seq->source_seq[i].weight=weight;
- seq->source_seq[i].bundle_id=bundle_id;
- seq->source_seq[i].title=strdup(title);
- }
-
- LOOPF (i,seq->length) { /* NOW READ THE ACTUAL PARTIAL ORDER */
- if (fscanf(ifile," %c:",&c)!=1) /* READ SEQUENCE LETTER */
- return NULL;
- seq->letter[i].letter=c;
- while ((field_id=getc(ifile))!=EOF && '\n'!=field_id) {/* READ FIELDS*/
- if (1!=fscanf(ifile,"%d",&value))
- return NULL;
- switch (field_id) {
- case 'L':
- add_lpo_link(&seq->letter[i].left,value); /* ADD LEFT-RIGHT LINKS*/
- add_lpo_link(&seq->letter[value].right,i);
- break;
- case 'S': /* SAVE THE SOURCE ID */
- save_source.ipos=pos_count[value]++;
- add_lpo_sources(&seq->letter[i].source,&save_source,&value);
- break;
- case 'A': /* SAVE THE ALIGN RING POINTER */
- seq->letter[i].align_ring=value;
- break;
- }
- }
- }
-
- LOOPF (i,seq->length) { /* SET ring_id TO MINIMUM VALUE ON EACH RING */
- if (seq->letter[i].ring_id<0) {/* NEW RING, UPDATE IT! */
- j=i; /* GO AROUND THE ENTIRE RING, SETTING ring_id TO i */
- do seq->letter[j].ring_id=i; /* i IS MINIMUM VALUE ON THIS RING */
- while ((j=seq->letter[j].align_ring)!=i);
- }
- }
-
- FREE(pos_count);
- return seq;
-}
-
-
-
-#define INVALID_LPO_LINK (-99)
-
-enum {
- default_retention_mode,
- default_no_retention_mode
-};
-
-/** reads an LPO from the stream ifile, dynamically allocates memory for
-it, and returns a pointer to the LPO */
-LPOSequence_T *read_lpo_select(FILE *ifile,FILE *select_file,
- int keep_all_links,int remove_listed_sequences)
-{
- int i,j,k,length,nsource_seq,istart,field_id,*pos_count=NULL,value;
- int weight,bundle_id,last_alloc=0,*iseq_compact=NULL,*last_pos=NULL;
- int nlink,*link_list=NULL,*match_pos=NULL,*ring_old=NULL;
- int *pos_compact=NULL,npos_compact=0,keep_this_letter,retention_mode;
- LPOSequence_T *seq=NULL;
- char c,name[1024]="",title[4096]="",version[256]="";
- LPOLetterSource_T save_source={0,0,NULL},*source=NULL;
-
- if (remove_listed_sequences)
- retention_mode=default_retention_mode;/*KEEP SEQS AS DFLT, SKIP IF LISTED*/
- else /* SKIP SEQS UNLESS LISTED IN select_file */
- retention_mode=default_no_retention_mode;
-
- CALLOC(seq,1,LPOSequence_T);
- fscanf(ifile,"VERSION=%s",version);
- fscanf(ifile," NAME=%[^\n]",name);
- fscanf(ifile," TITLE=%[^\n]",title);
- if (fscanf(ifile," LENGTH=%d SOURCECOUNT=%d",
- &length,&nsource_seq)!=2)
- return NULL;
- STRNCPY(seq->name,name,SEQUENCE_NAME_MAX);
- seq->title=strdup(title);
- seq->length=length;
- seq->nsource_seq=nsource_seq;
- CALLOC(seq->letter,length,LPOLetter_T);
- GETMEM(seq->source_seq,nsource_seq,last_alloc,SOURCE_SEQ_BUFFER_CHUNK,LPOSourceInfo_T);
- CALLOC(pos_count,nsource_seq,int);
- LOOP (i,length) { /* INITIALIZE ALL LINKS TO INVALID */
- seq->letter[i].align_ring=i; /* POINT TO SELF */
- seq->letter[i].ring_id= INVALID_LETTER_POSITION; /* BLANK! */
- seq->letter[i].left.ipos=seq->letter[i].right.ipos=
- seq->letter[i].source.ipos= INVALID_LETTER_POSITION;
- }
-
- LOOPF(i,nsource_seq) { /* SAVE SOURCE INFO LIST */
- if (fscanf(ifile," SOURCENAME=%[^\n] SOURCEINFO=%d %d %d %d",
- name,&length,&istart,&weight,&bundle_id)!=5)
- return NULL;
- /* SKIP WHITESPACE BEFORE TITLE; ALLOW EMPTY TITLE. */
- fscanf(ifile,"%*[ \t]");
- if (fscanf(ifile,"%[^\n]",title)!=1)
- title[0] = '\0';
- STRNCPY(seq->source_seq[i].name,name,SEQUENCE_NAME_MAX);
- seq->source_seq[i].length=length;
- seq->source_seq[i].istart=istart;
- seq->source_seq[i].weight=weight;
- seq->source_seq[i].bundle_id=bundle_id;
- seq->source_seq[i].title=strdup(title);
- }
-
- CALLOC(iseq_compact,nsource_seq,int);
- CALLOC(last_pos,nsource_seq,int);
- CALLOC(match_pos,nsource_seq,int);
- if (select_file) {
- LOOP (i,nsource_seq) /* DEFAULT: MARKED AS INVALID */
- iseq_compact[i]= -retention_mode;
- while (fscanf(select_file,"SOURCENAME=%[^\n]\n",name)==1) {
- LOOP (i,nsource_seq)
- if (strcmp(seq->source_seq[i].name,name)==0) {
- iseq_compact[i]= retention_mode-1;
- break;
- }
- }
- }
-
- j=0;
- LOOPF (i,nsource_seq) {
- last_pos[i]= -1; /* DEFAULT: INVALID */
- if (iseq_compact[i]>=0) {
- iseq_compact[i]=j;
- if (i>j)
- memcpy(seq->source_seq+j,seq->source_seq+i,sizeof(LPOSourceInfo_T));
- match_pos[j]= INVALID_LPO_LINK; /* DEFAULT: NO VALID LINK! */
- j++;
- }
- else { /* EXCLUDE THIS SEQUENCE FROM THE FILTERED LPO */
- iseq_compact[i]= INVALID_LETTER_POSITION;
- if (seq->source_seq[i].title)
- free(seq->source_seq[i].title);
- }
- }
- seq->nsource_seq=nsource_seq=j; /* COMPACTED COUNT OF SEQUENCES TO KEEP*/
-
- CALLOC(link_list,seq->length,int); /*TEMPORARY DATA FOR COMPACTION MAPPING */
- CALLOC(pos_compact,seq->length,int);
- CALLOC(ring_old,seq->length,int);
- npos_compact=0;
-
- LOOPF (i,seq->length) { /* NOW READ THE ACTUAL PARTIAL ORDER */
- if (fscanf(ifile," %c:",&c)!=1) /* READ SEQUENCE LETTER */
- return NULL;
- seq->letter[npos_compact].letter=c;
- nlink=0;
- keep_this_letter=0; /*DEFAULT */
- while ((field_id=getc(ifile))!=EOF && '\n'!=field_id) {/* READ FIELDS*/
- if (1!=fscanf(ifile,"%d",&value))
- return NULL;
- switch (field_id) {
- case 'L':
- if (pos_compact[value]>=0) /*COULD BE VALID LINK: WAIT TO CHECK SRCs*/
- link_list[nlink++]=pos_compact[value]; /* SAVE IT TEMPORARILY */
- break;
- case 'S': /* SAVE THE SOURCE ID */
- if (iseq_compact[value]>=0) { /*KEEP THIS SOURCE, SO KEEP THIS LETTER*/
- keep_this_letter=1;
- value=iseq_compact[value]; /* TRANSLATE TO ITS COMPACTED INDEX*/
- if (last_pos[value]>=0) { /* MAKE SURE WE HAVE LINK TO LAST POSITION */
- LOOP (j,nlink) /* CHECK TO SEE IF LINK ALREADY SAVED */
- if (link_list[j]==last_pos[value])
- break;
- if (LOOP_FINISHED(j,nlink)) { /* NO LINK??? ADD IT!!! */
- link_list[nlink++]=last_pos[value];
- }
- }
- last_pos[value]=npos_compact; /* THIS SEQ POS IS AT THIS NODE */
- save_source.ipos=pos_count[value]++;/* COUNT LENGTH OF THIS SEQ */
- add_lpo_sources(&seq->letter[npos_compact].source,
- &save_source,&value);
- seq->letter[npos_compact].ring_id= INVALID_LETTER_POSITION;
- seq->letter[npos_compact].align_ring=i;
- ring_old[i]=i; /* DEFAULT: SELF-RING OF ONE LETTER*/
- }
- break;
- case 'A': /* SAVE THE ALIGN RING POINTER */
- ring_old[i]=value; /* SAVE OLD ALIGN RING INDICES */
- if (keep_this_letter) /* TEMP'Y: SAVE REVERSE MAPPING TO A-R INDICES*/
- seq->letter[npos_compact].align_ring=i;
- break;
- }
- }
- if (keep_this_letter) {
- for (source= &seq->letter[npos_compact].source;source;source=source->more)
- match_pos[source->iseq]=source->ipos - 1; /*VALID LINK MUST MATCH m_p*/
-
- LOOPF (j,nlink) { /*ADD LEFT-RIGHT LINKS*/
- for (source= &seq->letter[link_list[j]].source;source;source=source->more)
- if (keep_all_links /* KEEP LINKS EVEN IF NOT FROM SELECTED SEQS*/
- || source->ipos == match_pos[source->iseq]) { /*VALID LINK! */
- add_lpo_link(&seq->letter[npos_compact].left,link_list[j]);
- add_lpo_link(&seq->letter[link_list[j]].right,npos_compact);
- break; /* SAVED THIS LINK! */
- }
- }
- for (source= &seq->letter[npos_compact].source;source;source=source->more)
- match_pos[source->iseq]= INVALID_LPO_LINK;/*DFLT:NO VALID LINK*/
-
- pos_compact[i]=npos_compact++;
- }
- else
- pos_compact[i]= INVALID_LETTER_POSITION;
- }
- seq->length=npos_compact;
-
- LOOPF (i,seq->length) { /* SET ring_id TO MINIMUM VALUE ON EACH RING */
- if (seq->letter[i].ring_id<0) {/* NEW RING, UPDATE IT! */
- j=seq->letter[i].align_ring; /* GO AROUND THE ENTIRE RING, SETTING ring_id TO i */
- do {
- /*printf("i=%d\tj=%d\tpos_compact[j]=%d\tring_old[j]=%d\n",i,j,pos_compact[j],ring_old[j]);*/
- if (pos_compact[j]>=0) {
- k=pos_compact[j];
- seq->letter[k].ring_id=i; /* i IS MINIMUM VALUE ON THIS RING */
- }
- j=ring_old[j]; /* ADVANCE TO NEXT LETTER ON THE RING */
- if (pos_compact[j]>=0) /* IF VALID, POINT IT BACK TO PREVIOUS LETTER*/
- seq->letter[pos_compact[j]].align_ring=k;
- }
- while (pos_compact[j]!=i); /* STOP WHEN WE'VE COMPLETED THE RING, BACK TO START*/
- }
- }
-
- FREE(pos_count);
- FREE(pos_compact);
- FREE(iseq_compact);
- FREE(last_pos);
- FREE(link_list);
- FREE(ring_old);
- FREE(match_pos);
- return seq;
-}
-
-/*
-LPOSequence_T *read_lpo(FILE *ifile)
-{
- return read_lpo_select(ifile,NULL);
-}
-*/
-
-
-
-
-#define FASTA_GAP_CHARACTER '.'
-int xlate_lpo_to_al(LPOSequence_T *seq,
- int nsymbol,char symbol[],int ibundle,
- char gap_character,
- char ***p_seq_pos,char **p_p,char **p_include)
-{
- int i,j,iring=0,nring=0,current_ring=0,iprint;
- char **seq_pos=NULL,*p=NULL,*include_in_save=NULL;
- LPOLetterSource_T *source;
-
- LOOPF (i,seq->length) /* COUNT TOTAL #ALIGNMENT RINGS IN THE LPO */
- if (seq->letter[i].ring_id != current_ring) { /* NEXT RING */
- current_ring=seq->letter[i].ring_id;
- nring++;
- }
- nring++; /* DON'T FORGET TO COUNT THE LAST RING!!! */
-
- CALLOC(seq_pos,seq->nsource_seq,char *); /* ALLOCATE MAP ARRAY*/
- CALLOC(p,seq->nsource_seq*nring,char);
- LOOP (i,seq->nsource_seq) /* BUILD POINTER ARRAY INTO MAP ARRAY */
- seq_pos[i]=p+i*nring;
- memset(p,gap_character,seq->nsource_seq*nring);
- /* DEFAULT IS NO SEQUENCE PRESENT AT THIS POSITION */
-
- current_ring=0; /* RESET TO BEGINNING */
- LOOPF (i,seq->length) { /* NOW MAP THE LPO TO A FLAT LINEAR ORDER */
- if (seq->letter[i].ring_id != current_ring) { /* NEXT RING */
- current_ring=seq->letter[i].ring_id;
- iring++;
- } /* MAP EACH SOURCE SEQ ONTO LINEAR ORDER INDEXED BY iring */
- for (source= &seq->letter[i].source;source;source=source->more)
- if (symbol && seq->letter[i].letter<nsymbol) /* TRANSLATE TO symbol */
- seq_pos[source->iseq][iring]= symbol[seq->letter[i].letter];
- else /* NO NEED TO TRANSLATE */
- seq_pos[source->iseq][iring]= seq->letter[i].letter;
- }
-
- if (ibundle>=0) { /* ONLY SAVE SEQS THAT ARE IN THIS BUNDLE */
- CALLOC(include_in_save,nring,char); /* BLANK FLAGS: WHAT RINGS TO SHOW*/
- LOOP (iring,nring) { /* CHECK EACH RING TO SEE IF IT'S IN BUNDLE */
- LOOP (i,seq->nsource_seq) {
- if (seq_pos[i][iring]!=gap_character /* ALIGNED HERE! */
- && seq->source_seq[i].bundle_id == ibundle) { /* PART OF BUNDLE!*/
- include_in_save[iring]=1; /* SO INCLUDE THIS RING */
- break;
- }
- }
- }
- }
- if (p_seq_pos)
- *p_seq_pos = seq_pos;
- return nring;
-}
-
-
-/** writes the LPO in FASTA format, including all sequences in the
- specified bundle */
-void write_lpo_bundle_as_fasta(FILE *ifile,LPOSequence_T *seq,
- int nsymbol,char symbol[],int ibundle)
-{
- int i,j,nring=0,iprint;
- char **seq_pos=NULL,*p=NULL,*include_in_save=NULL;
-
- nring=xlate_lpo_to_al(seq,nsymbol,symbol,ibundle, /* TRANSLATE TO */
- FASTA_GAP_CHARACTER, /* RC-MSA FMT */
- &seq_pos,&p,&include_in_save);
- LOOPF (i,seq->nsource_seq) { /* NOW WRITE OUT FASTA FORMAT */
- if (ibundle<0 /* PRINT ALL BUNDLES */
- || seq->source_seq[i].bundle_id == ibundle) { /* OR JUST THIS BUNDLE*/
- fprintf(ifile,">%s %s",seq->source_seq[i].name,seq->source_seq[i].title);
- iprint=0;
- LOOPF (j,nring) { /* WRITE OUT 60 CHARACTER SEQUENCE LINES */
- if (NULL==include_in_save || include_in_save[j]) {
- fprintf(ifile,"%s%c",iprint%60? "":"\n", seq_pos[i][j]);
- iprint++; /* KEEP COUNT OF PRINTED CHARACTERS */
- }
- }
- fputc('\n',ifile);
- }
- }
-
- FREE(p); /* DUMP TEMPORARY MEMORY */
- FREE(include_in_save);
- FREE(seq_pos);
-}
-/**@memo example: writing FASTA format file:
- if (seq_ifile=fopen(fasta_out,"w")) {
- write_lpo_bundle_as_fasta(seq_ifile,lpo_out,
- score_matrix.nsymbol,score_matrix.symbol,ibundle);
- fclose(seq_ifile);
- }
-*/
-
-
-/** writes the LPO in FASTA format, including all sequences in all bundles
--------------------------------------------------------
----------------------------------------------------------------------------
-*/
-void write_lpo_as_fasta(FILE *ifile,LPOSequence_T *seq,
- int nsymbol,char symbol[])
-{ /* WRAPPER FUNCTION FOR SAVING ALL BUNDLES!! */
- write_lpo_bundle_as_fasta(ifile,seq,nsymbol,symbol,ALL_BUNDLES);
-}
-
-
-
-
-/****************************************************************
- *
- * WRITE_SEQUENCES
- *
- * This function writes out sequences. Each line of sequnces
- * will be written out in blocks with spacing in between
- * each block.
- *
- ***************************************************************/
-
-int write_sequences(FILE *ifile,
- LPOSequence_T *seq,
- int indent,
- int nblock, /* # OF BLOCKS */
- int block_size, /* SIZE OF BLOCKS */
- int block_spacing, /* SPACING BETWEEN BLOCKS */
- int paragraph_spacing, /* SPACING BETWEEN PRAGRAPHS */
- int names, /* BOOLEAN: PRINT NAMES AFTER 1ST PARAGRAPH? */
- char gap_char,
- int nsymbol,char symbol[])
-{
- int i,ip,ial,iseq,iblock,nparagraph,remainder,ipos,ipos_local,broken;
- int len,nring;
- char **seq_pos=NULL,*p=NULL,*include_in_save=NULL;
-
- nring=xlate_lpo_to_al(seq,nsymbol,symbol,ALL_BUNDLES, /* TRANSLATE TO */
- gap_char, /* RC-MSA FMT */
- &seq_pos,&p,&include_in_save);
-
- nparagraph = nring/(nblock*block_size);/*# OF FULL PARAGRAPHS */
- remainder = nring%(nblock*block_size);
- if (remainder != 0)
- nparagraph++;
- LOOPF(ip,nparagraph){ /* FOR EACH PARAGRAPH */
- LOOPF(iseq,seq->nsource_seq){ /* FOR EACH SEQUENCE */
- if (ip == 0 || names){
- if ((len=strlen(seq->source_seq[iseq].name))>indent-1){ /*MUST WE TRUNCATE NAME? */
- LOOPF(i,indent-1) /* PRINT AS MUCH OF NAME AS POSSIBLE */
- putc(seq->source_seq[iseq].name[i],ifile);
- putc(' ',ifile); /* LEAVE A SPACE */
- }
- else{ /* PRINT WHOLE NAME */
- fprintf(ifile,"%s",seq->source_seq[iseq].name); /* PRINT NAME */
- LOOP(i,indent-len) /* INDENT LINE */
- putc(' ',ifile);
- }
- }
- else{
- LOOP(i,indent) /* INDENT LINE */
- putc(' ',ifile);
- }
- LOOPF(iblock,nblock){ /* FOR EACH BLOCK */
- broken=0;
- LOOPF(i,block_size){
- ipos = i + (iblock*block_size) + (ip*nblock*block_size);
- if (ipos>=nring){
- broken=1;
- break;
- }
- putc(seq_pos[iseq][ipos],ifile); /* APPROP SYMBOL FOR THIS POS*/
- }
- if (broken)
- break;
- LOOP(i,block_spacing) /* ADD SPACING BETWEEN BLOCKS */
- putc(' ',ifile);
- } /* END OF BLOCK LOOP */
- putc('\n',ifile); /* NEW LINE AT END OF SEQUENCE LINE */
- } /* END OF SEQ LOOP */
- LOOP(i,paragraph_spacing) /* ADD SPACING BETWEEN PARAGRAPHS */
- putc('\n',ifile);
- } /* END OF PARAGRAPH LOOP */
- done:
- FREE(p); /* DUMP TEMPORARY MEMORY */
- FREE(include_in_save);
- FREE(seq_pos);
- return 0;
-}
-
-
-
-void export_clustal_seqal(FILE *ifile,
- LPOSequence_T *seq,
- int nsymbol,char symbol[])
-{
- fprintf(ifile,"CLUSTAL W (1.74) multiple sequence alignment\n\n\n");
- /* WRITE OUT SEQUNENCES: INDENT 36, 1 BLOCK OF 50 CHARS 0 CHARS BLOCK
- SPACING, 2 LINES PARAGRAPH SPACING, PRINT NAMES ON ALL LINES. */
- write_sequences(ifile,seq,36,1,50,0,2,1,'-',nsymbol,symbol);
-}
-
Deleted: trunk/packages/poa/trunk/main.c
===================================================================
--- trunk/packages/poa/trunk/main.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/main.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,386 +0,0 @@
-
-
-#include "lpo.h"
-#include "msa_format.h"
-#include "align_score.h"
-
-
-static LPOSequence_T *read_partial_order_file (char *po_filename, char *subset_filename, int remove_listed_seqs, int keep_all_links, int do_switch_case, ResidueScoreMatrix_T *mat);
-
-int main(int argc,char *argv[])
-{
- int i,j,ibundle=ALL_BUNDLES,nframe_seq=0,use_reverse_complement=0;
- int nseq=0,do_switch_case=dont_switch_case,do_analyze_bundles=0;
- int nseq_in_list=0,n_input_seqs=0,max_input_seqs=0;
- char score_file[256],seq_file[256],po_list_entry_filename[256],*comment=NULL,*al_name="test align";
- ResidueScoreMatrix_T score_matrix; /* DEFAULT GAP PENALTIES*/
- LPOSequence_T *seq=NULL,*lpo_out=NULL,*frame_seq=NULL,*dna_lpo=NULL,*lpo_in=NULL;
- LPOSequence_T **input_seqs=NULL;
- FILE *errfile=stderr,*logfile=NULL,*lpo_file_out=NULL,*po_list_file=NULL,*seq_ifile=NULL;
- char *print_matrix_letters=NULL,*fasta_out=NULL,*po_out=NULL,*matrix_filename=NULL,
- *seq_filename=NULL,*frame_dna_filename=NULL,*po_filename=NULL,*po2_filename=NULL,
- *po_list_filename=NULL, *hbmin=NULL,*numeric_data=NULL,*numeric_data_name="Nmiscall",
- *dna_to_aa=NULL,*pair_score_file=NULL,*aafreq_file=NULL,*termval_file=NULL,
- *bold_seq_name=NULL,*subset_file=NULL,*subset2_file=NULL,*rm_subset_file=NULL,
- *rm_subset2_file=NULL;
- float bundling_threshold=0.9;
- int exit_code=0,count_sequence_errors=0,please_print_snps=0,
- report_consensus_seqs=0,report_major_allele=0,use_aggressive_fusion=0;
- int show_allele_evidence=0,please_collapse_lines=0,keep_all_links=0;
- int remove_listed_seqs=0,remove_listed_seqs2=0,please_report_similarity;
- int do_global=0, do_progressive=0, do_preserve_sequence_order=0;
- char *reference_seq_name="CONSENS%d",*clustal_out=NULL;
-
- black_flag_init(argv[0],PROGRAM_VERSION);
-
- if (argc<2) {
- fprintf(stderr,"\nUsage: %s [OPTIONS] MATRIXFILE\n"
-"Align a set of sequences or alignments using the scores in MATRIXFILE.\n"
-"Example: %s -read_fasta multidom.seq -clustal m.aln blosum80.mat\n\n"
-"INPUT:\n"
-" -read_fasta FILE Read in FASTA sequence file.\n"
-" -read_msa FILE Read in MSA alignment file.\n"
-" -read_msa2 FILE Read in second MSA file. \n"
-" -subset FILE Filter MSA to include list of seqs in file.\n"
-" -subset2 FILE Filter second MSA to include list of seqs in file.\n"
-" -remove FILE Filter MSA to exclude list of seqs in file.\n"
-" -remove2 FILE Filter second MSA to exclude list of seqs in file.\n"
-" -read_msa_list FILE Read an MSA from each filename listed in file.\n"
-" -tolower Force FASTA/MSA sequences to lowercase\n"
-" (nucleotides in our matrix files)\n"
-" -toupper Force FASTA/MSA sequences to UPPERCASE\n"
-" (amino acids in our matrix files)\n"
-"\nALIGNMENT:\n"
-" -do_global Do global alignment.\n"
-" -do_progressive Perform progressive alignment using a guide tree\n"
-" built by neighbor joining from a set of\n"
-" sequence-sequence similarity scores.\n"
-" -read_pairscores FILE Read tab-delimited file of similarity scores.\n"
-" (If not provided, scores are constructed\n"
-" using pairwise sequence alignment.)\n"
-" -fuse_all Fuse identical letters on align rings.\n"
-"\nANALYSIS:\n"
-" -hb Perform heaviest bundling to generate consensi.\n"
-" -hbmin VALUE Include in heaviest bundle sequences with\n"
-" percent ID (as a fraction) >= value.\n"
-"\nOUTPUT:\n"
-" -pir FILE Write out MSA in PIR format.\n"
-" -clustal FILE Write out MSA in CLUSTAL format.\n"
-" -po FILE Write out MSA in PO format.\n"
-" -preserve_seqorder Write out MSA with sequences in their input order.\n"
-" -printmatrix LETTERS Print score matrix to stdout.\n"
-" -best Restrict MSA output to heaviest bundles (PIR only).\n"
-" -v Run in verbose mode (e.g. output gap penalties).\n\n"
-" NOTE: One of the -read_fasta, -read_msa, or -read_msa_list arguments\n"
-" must be used, since a sequence or alignment file is required.\n\n"
-"For more information, see http://www.bioinformatics.ucla.edu/poa.\n\n"
- ,argv[0],argv[0]);
- exit(-1);
- }
-
- FOR_ARGS(i,argc) { /* READ ALL THE ARGUMENTS */
- ARGMATCH_VAL("-tolower",do_switch_case,switch_case_to_lower);
- ARGMATCH_VAL("-toupper",do_switch_case,switch_case_to_upper);
- ARGMATCH_VAL("-v",logfile,stdout);
- ARGMATCH_VAL("-best",ibundle,0); /*RESTRICT FASTA OUTPUT TO HB */
- ARGMATCH_VAL("-hb",do_analyze_bundles,1);/*CALCULATE HEAVIEST BUNDLING*/
- ARGGET("-printmatrix",print_matrix_letters);
- ARGGET("-read_msa",po_filename); /* READ A MSA FILE FOR ALIGNMENT/ANALYSIS*/
- ARGGET("-read_msa2",po2_filename); /* READ A SECOND MSA FILE FOR ALIGNMENT/ANALYSIS*/
- ARGGET("-read_msa_list",po_list_filename); /* READ A LIST OF MSAs FOR ALIGNMENT/ANALYSIS */
- ARGGET("-pir",fasta_out); /* SAVE FASTA-PIR FORMAT ALIGNMENT FILE */
- ARGGET("-clustal",clustal_out); /* SAVE CLUSTAL FORMAT ALIGNMENT FILE */
- ARGGET("-po",po_out); /* SAVE PO FORMAT ALIGNMENT FILE */
- ARGMATCH("-preserve_seqorder",do_preserve_sequence_order); /* DO PRESERVE SEQUENCE ORDER */
- ARGGET("-hbmin",hbmin); /* SET THRESHOLD FOR BUNDLING */
- ARGMATCH("-fuse_all",use_aggressive_fusion);
- ARGMATCH("-do_global",do_global); /* DO GLOBAL */
- ARGGET("-read_pairscores",pair_score_file); /* FILENAME TO READ PAIR SCORES*/
- ARGMATCH("-do_progressive", do_progressive); /* DO PROGRESSIVE ALIGNMENT */
- ARGGET("-subset",subset_file); /* FILENAME TO READ SEQ SUBSET LIST*/
- ARGGET("-subset2",subset2_file); /* FILENAME TO READ SEQ SUBSET LIST*/
- ARGGET("-remove",rm_subset_file); /* FILENAME TO READ SEQ REMOVAL LIST*/
- ARGGET("-remove2",rm_subset2_file); /* FILENAME TO READ SEQ REMOVAL LIST*/
- ARGGET("-read_fasta",seq_filename); /* READ FASTA FILE FOR ALIGNMENT */
- NEXTARG(matrix_filename); /* NON-FLAG ARG SHOULD BE MATRIX FILE */
- }
-
-
- /** CHECK FOR CONFLICTING FLAGS **/
-
- if (po_list_filename && (po_filename || po2_filename)) {
- WARN_MSG(USERR,(ERRTXT, "Error: The -read_po_list and -read_po flags cannot be used at the same time.\nExiting."), "$Revision: 1.2.2.9 $");
- exit_code = 1;
- goto free_memory_and_exit;
- }
-
- if (((subset_file || rm_subset_file) && !po_filename) || ((subset2_file || rm_subset2_file) && !po2_filename)) {
- WARN_MSG(USERR,(ERRTXT, "Error: Each -subset/-remove flag must have a corresponding -read_po flag.\nExiting."),"$Revision: 1.2.2.9 $");
- exit_code = 1;
- goto free_memory_and_exit;
- }
-
- if ((subset_file && rm_subset_file) || (subset2_file && rm_subset2_file)) {
- WARN_MSG(USERR,(ERRTXT, "Error: The -subset and -remove flags cannot be used at the same time.\nExiting."),"$Revision: 1.2.2.9 $");
- exit_code = 1;
- goto free_memory_and_exit;
- }
-
- if (rm_subset_file) {
- subset_file = rm_subset_file;
- remove_listed_seqs = 1;
- }
- if (rm_subset2_file) {
- subset2_file = rm_subset2_file;
- remove_listed_seqs2 = 1;
- }
-
- if (hbmin)
- bundling_threshold=atof(hbmin);
-
- if (!matrix_filename ||
- read_score_matrix(matrix_filename,&score_matrix)<=0){/* READ MATRIX */
- WARN_MSG(USERR,(ERRTXT,"Error reading matrix file %s.\nExiting",
- matrix_filename ? matrix_filename: "because none specified"),"$Revision: 1.2.2.9 $");
- exit_code=1; /* SIGNAL ERROR CONDITION */
- goto free_memory_and_exit;
- }
-
- if (logfile) {
- fprintf(logfile,"X-Gap Penalties (Open, Aff1, Aff2; LTrunc, LDecay): %d %d %d %d %d\n",
- score_matrix.gap_penalty_set[0][0],
- score_matrix.gap_penalty_set[0][1],
- score_matrix.gap_penalty_set[0][2],
- score_matrix.trunc_gap_length,
- score_matrix.decay_gap_length);
- fprintf(logfile,"X-Gap Penalties (0, 1, 2, ...): ");
- for (i=0; i<=score_matrix.max_gap_length; i++) {
- fprintf (logfile, "%d ", score_matrix.gap_penalty_x[i]);
- }
- fprintf(logfile,"... \n");
- fprintf(logfile,"Y-Gap Penalties (Open, Aff1, Aff2; LTrunc, LDecay): %d %d %d %d %d\n",
- score_matrix.gap_penalty_set[1][0],
- score_matrix.gap_penalty_set[1][1],
- score_matrix.gap_penalty_set[1][2],
- score_matrix.trunc_gap_length,
- score_matrix.decay_gap_length);
- fprintf(logfile,"Y-Gap Penalties (0, 1, 2, ...): ");
- for (i=0; i<=score_matrix.max_gap_length; i++) {
- fprintf (logfile, "%d ", score_matrix.gap_penalty_y[i]);
- }
- fprintf(logfile,"... \n");
- }
-
- if (print_matrix_letters) /* USER WANTS US TO PRINT A MATRIX */
- print_score_matrix(stdout,&score_matrix,print_matrix_letters
- /*"ARNDCQEGHILKMFPSTWYV"*/);
-
-
- /** READ INPUT FILES **/
-
- n_input_seqs = 0;
- max_input_seqs = 10;
- CALLOC (input_seqs, max_input_seqs, LPOSequence_T *);
-
- if (po_filename) {
- lpo_in = read_partial_order_file (po_filename, subset_file, remove_listed_seqs, keep_all_links, do_switch_case, &score_matrix);
- if (lpo_in == NULL) {
- exit_code = 1;
- goto free_memory_and_exit;
- }
- fprintf(errfile,"...Read %d sequences from MSA file %s...\n",lpo_in->nsource_seq,po_filename);
- input_seqs[n_input_seqs++] = lpo_in;
- lpo_in = NULL;
- }
-
- if (po2_filename) {
- lpo_in = read_partial_order_file (po2_filename, subset2_file, remove_listed_seqs2, keep_all_links, do_switch_case, &score_matrix);
- if (lpo_in == NULL) {
- exit_code = 1;
- goto free_memory_and_exit;
- }
- fprintf(errfile,"...Read %d sequences from second MSA file %s...\n",lpo_in->nsource_seq,po2_filename);
- input_seqs[n_input_seqs++] = lpo_in;
- lpo_in = NULL;
- }
-
- if (po_list_filename) {
- po_list_file = fopen (po_list_filename, "r");
- while (po_list_file && fscanf (po_list_file, " %s", po_list_entry_filename) == 1) {
- lpo_in = read_partial_order_file (po_list_entry_filename, NULL, 0, 0, do_switch_case, &score_matrix);
- if (lpo_in == NULL) {
- exit_code = 1;
- goto free_memory_and_exit;
- }
- fprintf(errfile,"...Read %d sequences from PO list entry %s...\n",lpo_in->nsource_seq,po_list_entry_filename);
- nseq_in_list += lpo_in->nsource_seq;
- input_seqs[n_input_seqs++] = lpo_in;
- lpo_in = NULL;
- if (n_input_seqs == max_input_seqs) {
- max_input_seqs *= 2;
- REALLOC (input_seqs, max_input_seqs, LPOSequence_T *);
- }
- }
- if (nseq_in_list==0) {
- WARN_MSG(USERR,(ERRTXT,"Error reading PO list file %s.\nExiting",
- po_list_file),"$Revision: 1.2.2.9 $");
- exit_code=1; /* SIGNAL ERROR CONDITION */
- goto free_memory_and_exit;
- }
- }
-
- if (seq_filename) {
- seq_ifile = fopen (seq_filename, "r");
- if (seq_ifile == NULL) {
- WARN_MSG(USERR,(ERRTXT,"Couldn't open sequence file %s.\nExiting",
- seq_filename),"$Revision: 1.2.2.9 $");
- exit_code=1; /* SIGNAL ERROR CONDITION */
- goto free_memory_and_exit;
- }
- nseq = read_fasta (seq_ifile, &seq, do_switch_case, &comment);
- fclose (seq_ifile);
- if (nseq == 0) {
- WARN_MSG(USERR,(ERRTXT,"Error reading sequence file %s.\nExiting",
- seq_filename),"$Revision: 1.2.2.9 $");
- exit_code=1; /* SIGNAL ERROR CONDITION */
- goto free_memory_and_exit;
- }
- fprintf(errfile,"...Read %d sequences from sequence file %s...\n",nseq,seq_filename);
- for (i=0; i<nseq; i++) {
- input_seqs[n_input_seqs++] = &(seq[i]);
- initialize_seqs_as_lpo(1,&(seq[i]),&score_matrix);
- if (n_input_seqs == max_input_seqs) {
- max_input_seqs *= 2;
- REALLOC (input_seqs, max_input_seqs, LPOSequence_T *);
- }
- }
- }
-
-
- /** BUILD AND ANALYZE OUTPUT PO-MSA **/
-
- if (n_input_seqs == 0) { /* HMM.. NO DATA. */
- WARN_MSG(USERR,(ERRTXT,"No input sequences were provided; use one of the -read_ flags.\nExiting."), "$Revision: 1.2.2.9 $");
- exit_code=1; /* SIGNAL ERROR CONDITION */
- goto free_memory_and_exit;
- }
- else {
- lpo_out = buildup_progressive_lpo (n_input_seqs, input_seqs, &score_matrix,
- use_aggressive_fusion, do_progressive, pair_score_file,
- matrix_scoring_function, do_global, do_preserve_sequence_order);
- }
-
- if (comment) { /* SAVE THE COMMENT LINE AS TITLE OF OUR LPO */
- FREE(lpo_out->title);
- lpo_out->title=strdup(comment);
- }
-
- /* DIVIDE INTO BUNDLES W/ CONSENSUS USING PERCENT ID */
- if (do_analyze_bundles)
- generate_lpo_bundles(lpo_out,bundling_threshold);
-
- if (po_out) { /* WRITE FINAL PARTIAL ORDER ALIGNMENT TO OUTPUT */
- if (lpo_file_out=fopen(po_out, "w")) {
- write_lpo(lpo_file_out,lpo_out,&score_matrix);
- fclose(lpo_file_out);
- fprintf(errfile,"...Wrote %d sequences to PO file %s...\n",lpo_out->nsource_seq,po_out);
- }
- else {
- WARN_MSG(USERR,(ERRTXT,"*** Could not save PO file %s. Exiting.",
- po_out),"$Revision: 1.2.2.9 $");
- exit_code=1; /* SIGNAL ERROR CONDITION */
- }
- }
-
- if (fasta_out) { /* WRITE FINAL ALIGNMENT IN FASTA-PIR FORMAT */
- if (seq_ifile=fopen(fasta_out,"w")) { /* FASTA-PIR ALIGNMENT*/
- write_lpo_bundle_as_fasta(seq_ifile,lpo_out,score_matrix.nsymbol,
- score_matrix.symbol,ibundle);
- fclose(seq_ifile);
- fprintf(errfile,"...Wrote %d sequences to FASTA-PIR file %s...\n",lpo_out->nsource_seq,fasta_out);
- }
- else {
- WARN_MSG(USERR,(ERRTXT,"*** Could not save FASTA-PIR file %s. Exiting.",
- fasta_out),"$Revision: 1.2.2.9 $");
- exit_code=1; /* SIGNAL ERROR CONDITION */
- }
- }
-
- if (clustal_out) { /* WRITE FINAL ALIGNMENT IN CLUSTAL FORMAT */
- if (seq_ifile=fopen(clustal_out,"w")) { /* CLUSTAL ALIGNMENT*/
- export_clustal_seqal(seq_ifile,lpo_out,score_matrix.nsymbol,
- score_matrix.symbol);
- fclose(seq_ifile);
- fprintf(errfile,"...Wrote %d sequences to CLUSTAL file %s...\n",lpo_out->nsource_seq,clustal_out);
- }
- else {
- WARN_MSG(USERR,(ERRTXT,"*** Could not save CLUSTAL file %s. Exiting.",
- clustal_out),"$Revision: 1.2.2.9 $");
- exit_code=1; /* SIGNAL ERROR CONDITION */
- }
- }
-
-
-
- free_memory_and_exit: /* FREE ALL DYNAMICALLY ALLOCATED DATA!!!! */
-
- if (dna_lpo)
- free_lpo_sequence(dna_lpo,TRUE);
-
- for (i=0;i<n_input_seqs;i++) {
- for (j=0;j<nseq;j++) {
- if (input_seqs[i]==&(seq[j]))
- break;
- }
- free_lpo_sequence(input_seqs[i],(j==nseq));
- }
- FREE (input_seqs);
- if (nseq>0) FREE (seq);
-
- exit (exit_code);
-}
-
-
-
-static LPOSequence_T *read_partial_order_file (char *po_filename, char *subset_filename, int remove_listed_seqs, int keep_all_links, int do_switch_case, ResidueScoreMatrix_T *mat)
-{
- LPOSequence_T *lpo_in;
- FILE *po_file=NULL, *subset_file=NULL;
-
- if (!po_filename)
- return NULL;
-
- po_file = fopen (po_filename, "r");
- if (!po_file) {
- WARN_MSG (USERR, (ERRTXT,"Couldn't open MSA file %s.\nExiting.",po_filename), "$Revision: 1.2.2.9 $");
- return NULL;
- }
-
- if (subset_filename) {
- subset_file = fopen (subset_filename, "r");
- if (!subset_file) {
- WARN_MSG (USERR, (ERRTXT,"Couldn't open subset file %s.\nExiting.",subset_filename), "$Revision: 1.2.2.9 $");
- return NULL;
- }
- }
-
- if (subset_file) {
- lpo_in = read_msa_select (po_file, UNKNOWN_MSA, subset_file, keep_all_links, remove_listed_seqs, do_switch_case, mat);
- fclose (subset_file);
- fclose (po_file);
- if (lpo_in==NULL || lpo_in->nsource_seq == 0) {
- WARN_MSG (USERR, (ERRTXT,"MSA file %s, filtered with subset file %s, couldn't be read or contains no sequences.\nExiting.", po_filename, subset_filename), "$Revision: 1.2.2.9 $");
- return NULL;
- }
- }
- else {
- lpo_in = read_msa (po_file, UNKNOWN_MSA, do_switch_case, mat);
- fclose (po_file);
- if (lpo_in==NULL || lpo_in->nsource_seq == 0) {
- WARN_MSG (USERR, (ERRTXT,"MSA file %s couldn't be read or contains no sequences.\nExiting.", po_filename), "$Revision: 1.2.2.9 $");
- return NULL;
- }
- }
-
- return lpo_in;
-}
Deleted: trunk/packages/poa/trunk/make_pscores.pl
===================================================================
--- trunk/packages/poa/trunk/make_pscores.pl 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/make_pscores.pl 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,48 +0,0 @@
-#!/usr/bin/perl
-#
-# Usage: make_pscores.pl SEQFILE SCOREFILE
-#
-# Runs BLAST and writes output to a file "SEQFILE.out".
-# This file is parsed into lines of the form "seqname1 \t seqname2 \t bitscore"
-# (suitable for input to POA), which are written to SCOREFILE.
-#
-# NB: The bitscore increases with increasing sequence similarity.
-#
-
-$seq_file = $ARGV[0];
-$pscore_file = $ARGV[1];
-
-open(PSCORE_OUT, ">$pscore_file");
-system("./formatdb -i $seq_file -p T");
-system("./blastall -p blastp -d $seq_file -i $seq_file -M BLOSUM80 -o $seq_file.out");
-
-open(BLAST_OUT, "<$seq_file.out");
-while(<BLAST_OUT>){
- @my_parse = split(/\s/, $_);
- if ($my_parse[0] =~ /^Query=/){
- $seq_name1 = $my_parse[1];
- while(<BLAST_OUT>){
- if ($_ =~ />/){
- last;
- }
- if ($_ =~ /bits/){
- while(<BLAST_OUT>){
- if ($_ =~ />/){
- last;
- }
- @other_parse = split(/\s+/, $_);
- $seq_name2 = $other_parse[0];
- $bit_score = $other_parse[1];
- if ($seq_name2 ne ""){
- printf PSCORE_OUT "$seq_name1\t$seq_name2\t$bit_score.0\n";
- }
- }
- if ($_ =~ />/){
- last;
- }
- }
- }
- }
-}
-close(BLAST_OUT);
-close(PSCORE_OUT);
Deleted: trunk/packages/poa/trunk/msa_format.c
===================================================================
--- trunk/packages/poa/trunk/msa_format.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/msa_format.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,619 +0,0 @@
-
-
-/****************/
-/* msa_format.c */
-/****************/
-
-/* ---
- Implementation of "msa_format.h":
-
- Functions for reading CLUSTAL- and FASTA-PIR-formatted files
- into the LPOSequence_T data structure, and for determining file
- type from the initial line(s) of a file.
- --- */
-
-
-
-#include "msa_format.h"
-
-
-/** is `ch' an allowed residue? (a-z OR A-Z OR ? OR [ OR ]) */
-static int is_residue_char (char ch);
-
-/** is `ch' an allowed gap? (. OR -) */
-static int is_gap_char (char ch);
-
-/** could `ch' be the first character of a sequence name? (NOT # AND NOT * AND NOT whitespace) */
-static int is_name_first_char (char ch);
-
-
-/** decide which sequences in an RC-MSA alignment to keep based on a filter file `select_ifile'.
- remove discarded sequences and reindex the remaining ones.
- returns the NEW number of sequences. */
-static int filter_sequence_set (int n_seqs, FILE *select_ifile, int remove_listed_sequences,
- char **seq_names, char **seq_titles, char **aln_mat, int *aln_lengths);
-
-
-/** remove sequence bad_seq_id from each letter containing it.
- ASSUMES THAT NO LETTER CONTAINS _ONLY_ bad_seq_id... e.g. bad_seq_id IS A CONSENSUS.
- either keep or don't keep links present only in the removed sequence. */
-static void strip_seq_from_lpo (LPOSequence_T *lposeq, int bad_seq_id, int keep_all_links);
-
-
-static LPOSequence_T *read_clustal (FILE *ifile, const char *first_line,
- FILE *select_ifile, int remove_listed_sequences,
- int do_switch_case, ResidueScoreMatrix_T *score_matrix);
-
-static LPOSequence_T *read_pir (FILE *ifile, const char *first_line,
- FILE *select_ifile, int remove_listed_sequences,
- int do_switch_case, ResidueScoreMatrix_T *score_matrix);
-
-
-
-/** Reads an MSA from a file. If `format' is UNKNOWN_MSA, the file
- format is determined from the first line(s) of the file.
- Uses `select_ifile', if non-NULL, to filter the sequence set.
- Uppercases or lowercases sequence characters according to `do_switch_case'.
- Indexes LPO symbols using `score_matrix'.
-*/
-LPOSequence_T *read_msa_select (FILE *ifile, msa_file_format format,
- FILE *select_ifile, int keep_all_links, int remove_listed_sequences,
- int do_switch_case, ResidueScoreMatrix_T *score_matrix)
-{
- static char line[512]="";
-
- /* USE format TO FIX FILE FORMAT IF POSSIBLE. */
- /* OTHERWISE, PO FILES START WITH 'VERSION=' (and this line is discarded),
- FASTA-PIR FILES START WITH '>', AND CLUSTAL FILES WITH 'CLUSTAL' OR SIMPLY
- WITH THE FIRST ALIGNMENT LINE.
- LINES STARTING WITH whitespace OR '#' OR '*' ARE IGNORED. */
-
- while (format!=UNKNOWN_MSA || fgets (line, sizeof(line)-1, ifile)) {
-
- if (format==PIR_MSA || line[0] == '>') {
- return read_pir (ifile, line,
- select_ifile, remove_listed_sequences,
- do_switch_case, score_matrix);
- }
- else if (format==PO_MSA || 0==strncmp(line,"VERSION=",8)) {
- if (select_ifile != NULL) {
- return read_lpo_select (ifile, select_ifile, keep_all_links, remove_listed_sequences);
- }
- else {
- return read_lpo (ifile);
- }
- }
- else if (format==CLUSTAL_MSA || 0==strncmp(line,"CLUSTAL",7)) {
- return read_clustal (ifile, line,
- select_ifile, remove_listed_sequences,
- do_switch_case, score_matrix);
- }
- else if (line[0] == '#' || line[0] == '*' || line[0] == ' ' || line[0] == '\t' || line[0] == '\n' || line[0] == '\r') {
- continue;
- }
- else {
- WARN_MSG(USERR,(ERRTXT, "Unable to determine MSA file type... trying CLUSTAL.\n"),"$Revision: 1.1.2.3 $");
- return read_clustal (ifile, line,
- select_ifile, remove_listed_sequences,
- do_switch_case, score_matrix);
- }
- }
-
- WARN_MSG(USERR,(ERRTXT, "No data in MSA file.\n"),"$Revision: 1.1.2.3 $");
-
- return NULL;
-}
-
-/** Reads an MSA from a file (cf. read_msa_select).
- */
-LPOSequence_T *read_msa (FILE *ifile, msa_file_format format,
- int do_switch_case, ResidueScoreMatrix_T *score_matrix)
-{
- return read_msa_select (ifile, format,
- NULL, 0, 0,
- do_switch_case, score_matrix);
-}
-
-
-
-
-/** Reads a CLUSTAL-formatted alignment file.
- */
-LPOSequence_T *read_clustal (FILE *fp, const char *first_line,
- FILE *select_ifile, int remove_listed_sequences,
- int do_switch_case, ResidueScoreMatrix_T *score_matrix)
-{
- int i, j, n_seqs=0, curr_seq=0, expect_repeats=0, expect_header=1, line_num=0;
- char ch;
- char **seq_names=NULL, **seq_titles=NULL, **aln_mat=NULL;
- int *aln_lengths=NULL;
- char line[512]="", name[512]="", aln[512]="";
- LPOSequence_T *lposeq = NULL;
-
- while ((first_line!=NULL && line_num==0) || fgets (line, sizeof(line)-1, fp)) {
- if (first_line!=NULL && line_num==0) {
- strcpy (line, first_line);
- }
- line_num++;
-
- if (expect_header) { /* LOOKING FOR 'CLUSTAL' HEADER LINE */
- if (0 == strncmp(line,"CLUSTAL",7)) { /* FOUND IT; STOP LOOKING */
- expect_header=0;
- continue;
- }
- else if (0 == is_name_first_char(line[0])) { /* COMMENT LINE; KEEP LOOKING */
- continue;
- }
- else {
- expect_header=0; /* FOUND SOMETHING ELSE; NO HEADER, SO CONTINUE */
- }
- }
-
- if (0 == is_name_first_char(line[0])) { /* BLOCK SEPARATOR? */
- curr_seq = 0;
- if (n_seqs>0) {
- expect_repeats=1; /* YES; NOW EXPECT SAME SEQUENCES OVER AGAIN IN EACH BLOCK */
- }
- continue;
- }
-
- if (sscanf (line, "%s %[^\n\r]", name, aln) < 2) {
- WARN_MSG(USERR,(ERRTXT, "Error: Trouble reading CLUSTAL-formatted file near line %d: \n>>>\n%s<<<\nBailing out.\n",line_num,line),
- "$Revision: 1.1.2.3 $");
- goto free_memory_and_exit;
- }
-
- if (0 == expect_repeats) { /* FIRST BLOCK STILL, SO MAKE ROOM FOR NEW SEQ */
- n_seqs++;
-
- REALLOC (seq_names, n_seqs, char *);
- seq_names[n_seqs-1] = strdup(name);
- REALLOC (seq_titles, n_seqs, char *);
- seq_titles[n_seqs-1] = strdup("");
-
- REALLOC (aln_mat, n_seqs, char *);
- aln_mat[n_seqs-1] = strdup(" ");
- REALLOC (aln_lengths, n_seqs, int);
- aln_lengths[n_seqs-1] = 0;
- }
- else if (curr_seq>=n_seqs || strcmp(name,seq_names[curr_seq])) { /* NAME SHOULD BE A REPEAT */
- WARN_MSG(USERR,(ERRTXT, "Error: Trouble reading CLUSTAL-formatted file at line %d: \n>>>\n%s<<<\nSequence name (%s) does not match expected sequence name (%s). Bailing out.\n",line_num,line,name,seq_names[curr_seq]),
- "$Revision: 1.1.2.3 $");
- goto free_memory_and_exit;
- }
-
- REALLOC (aln_mat[curr_seq], aln_lengths[curr_seq] + strlen(aln), char);
- for (i=0; i<strlen(aln); i++) {
- ch = aln[i];
- if (is_residue_char(ch) || is_gap_char(ch)) {
- aln_mat[curr_seq][aln_lengths[curr_seq]++] = aln[i];
- }
- }
- curr_seq++;
- }
-
- n_seqs = filter_sequence_set (n_seqs, select_ifile, remove_listed_sequences, seq_names, seq_titles, aln_mat, aln_lengths);
-
- lposeq = lpo_from_aln_mat (n_seqs, seq_names, seq_titles, aln_mat, aln_lengths, do_switch_case, score_matrix);
-
- free_memory_and_exit:
- for (i=0; i<n_seqs; i++) {
- FREE (seq_names[i]);
- FREE (seq_titles[i]);
- FREE (aln_mat[i]);
- }
- FREE (seq_names);
- FREE (seq_titles);
- FREE (aln_mat);
- FREE (aln_lengths);
-
- if (n_seqs>0) {
- strcpy (lposeq->name, lposeq->source_seq[0].name);
- FREE (lposeq->title);
- lposeq->title = strdup (lposeq->source_seq[0].title);
- }
-
- return lposeq;
-}
-
-/** Reads a FASTA-PIR-formatted alignment file.
- */
-LPOSequence_T *read_pir (FILE *fp, const char *first_line,
- FILE *select_ifile, int remove_listed_sequences,
- int do_switch_case, ResidueScoreMatrix_T *score_matrix)
-{
- int i, j, n_seqs=0, curr_seq=0, line_num=0;
- char ch;
- char **seq_names=NULL, **seq_titles=NULL, **aln_mat=NULL;
- int *aln_lengths=NULL, *keep_seq=NULL;
- char line[512]="", name[512]="", title[512]="", aln[512]="";
- LPOSequence_T *lposeq = NULL;
-
- while ((first_line!=NULL && line_num==0) || fgets (line, sizeof(line)-1, fp)) {
- if (first_line!=NULL && line_num==0) {
- strcpy (line, first_line);
- }
- line_num++;
-
- if (line[0] == '>') { /* HEADER LINE FOR NEW SEQUENCE */
- title[0] = '\0';
- if (sscanf (line, ">%s %[^\n\r]", name, title) < 1) {
- WARN_MSG(USERR,(ERRTXT, "Error: Trouble reading PIR-formatted file near line %d (no sequence name?):\n>>>\n%s<<<\nBailing out.\n",line_num,line),
- "$Revision: 1.1.2.3 $");
- goto free_memory_and_exit;
- }
-
- n_seqs++;
- curr_seq=n_seqs-1;
- REALLOC (seq_names, n_seqs, char *);
- seq_names[n_seqs-1] = strdup(name);
- REALLOC (seq_titles, n_seqs, char *);
- seq_titles[n_seqs-1] = strdup(title);
- REALLOC (aln_mat, n_seqs, char *);
- aln_mat[n_seqs-1] = strdup(" ");
- REALLOC (aln_lengths, n_seqs, int);
- aln_lengths[n_seqs-1] = 0;
- }
- else if (line[0]=='#' || line[0]=='*') { /* COMMENT LINE */
- continue;
- }
- else { /* ALIGNMENT ROW FOR CURRENT SEQUENCE */
- sscanf (line, "%[^\n\r]", aln);
- if (n_seqs==0) {
- WARN_MSG(USERR,(ERRTXT, "Error: Trouble reading PIR-formatted file near line %d (no preceding '>seqname' line?):\n>>>\n%s<<<\nBailing out.\n",line_num,line),
- "$Revision: 1.1.2.3 $");
- goto free_memory_and_exit;
- }
-
- REALLOC (aln_mat[curr_seq], aln_lengths[curr_seq] + strlen(aln), char);
- for (i=0; i<strlen(aln); i++) {
- ch = aln[i];
- if (is_residue_char(ch) || is_gap_char(ch)) {
- aln_mat[curr_seq][aln_lengths[curr_seq]++] = aln[i];
- }
- }
- }
- }
-
- n_seqs = filter_sequence_set (n_seqs, select_ifile, remove_listed_sequences, seq_names, seq_titles, aln_mat, aln_lengths);
-
- lposeq = lpo_from_aln_mat (n_seqs, seq_names, seq_titles, aln_mat, aln_lengths, do_switch_case, score_matrix);
-
- free_memory_and_exit:
- for (i=0; i<n_seqs; i++) {
- FREE (seq_names[i]);
- FREE (seq_titles[i]);
- FREE (aln_mat[i]);
- }
- FREE (seq_names);
- FREE (seq_titles);
- FREE (aln_mat);
- FREE (aln_lengths);
-
- if (n_seqs>0) {
- strcpy (lposeq->name, lposeq->source_seq[0].name);
- FREE (lposeq->title);
- lposeq->title = strdup (lposeq->source_seq[0].title);
- }
-
- return lposeq;
-}
-
-/** Creates an LPO from an RC-MSA alignment matrix.
- */
-LPOSequence_T *lpo_from_aln_mat (int n_seqs, char **seq_names, char **seq_titles, char **aln_mat, int *aln_lengths, int do_switch_case, ResidueScoreMatrix_T *score_matrix)
-{
- int i, j, k, len, col, res_id, letter_id;
- char ch;
- LPOSequence_T *curr_seq, *lposeq;
- int **column_ids; /** which column contains each residue */
- int **res_ids; /** which residue is in each column */
- int *al_x, *al_y;
- int max_aln_length = 0;
- char *consens_row;
-
- if (n_seqs==0)
- return NULL;
-
- CALLOC (column_ids, n_seqs+1, int *);
- CALLOC (res_ids, n_seqs+1, int *);
- for (i=0; i<n_seqs; i++) {
- if (aln_lengths[i] > max_aln_length) {
- max_aln_length = aln_lengths[i];
- }
- }
- for (i=0; i<n_seqs+1; i++) {
- CALLOC (column_ids[i], max_aln_length, int);
- CALLOC (res_ids[i], max_aln_length, int);
- }
-
- /* GET CONSENSUS ROW THAT ALIGNS TO SOME OTHER ROW IN EACH COLUMN */
- /* CONSENSUS ROW IS _NECESSARY_ TO DEAL WITH THE POSSIBILITY THAT, e.g., UNORDERED RESIDUES
- IN seqs 0 & 1 ARE ORDERED BY ALIGNMENT WITH seq 2. */
-
- CALLOC (consens_row, max_aln_length, char);
- for (col=0; col<max_aln_length; col++) {
- consens_row[col] = '-';
- for (i=0; i<n_seqs; i++) {
- if (col < aln_lengths[i] && is_residue_char(aln_mat[i][col])) {
- consens_row[col] = aln_mat[i][col];
- break;
- }
- }
- }
-
- /* MAKE SEQUENCE FROM CONSENSUS ROW */
-
- CALLOC (lposeq, 1, LPOSequence_T);
- strcpy (lposeq->name, "consens_row");
- lposeq->title = strdup ("");
-
- CALLOC (lposeq->sequence, max_aln_length+1, char);
- for (len=col=0; col<max_aln_length; col++) {
- ch = consens_row[col];
- if (do_switch_case == switch_case_to_lower) {
- ch = tolower(ch);
- }
- else if (do_switch_case == switch_case_to_upper) {
- ch = toupper(ch);
- }
- if (is_residue_char(ch)) {
- lposeq->sequence[len] = ch;
- column_ids[0][len] = col;
- res_ids[0][col] = len;
- len++;
- }
- }
- lposeq->length = len;
- lposeq->sequence[col] = '\0';
- REALLOC (lposeq->sequence, len+1, char);
-
- initialize_seqs_as_lpo (1, lposeq, score_matrix);
-
- for (i=0; i<n_seqs; i++) {
-
- CALLOC (curr_seq, 1, LPOSequence_T);
- strcpy (curr_seq->name, seq_names[i]);
- curr_seq->title = strdup (seq_titles[i]);
-
- /* READ CHARACTERS FROM ALIGNMENT ROW INTO SEQUENCE: */
- CALLOC (curr_seq->sequence, aln_lengths[i]+1, char);
- for (len=col=0; col<aln_lengths[i]; col++) {
- ch = aln_mat[i][col];
- if (do_switch_case == switch_case_to_lower) {
- ch = tolower(ch);
- }
- else if (do_switch_case == switch_case_to_upper) {
- ch = toupper(ch);
- }
- if (is_residue_char(ch)) {
- curr_seq->sequence[len] = ch;
- column_ids[i+1][len] = col;
- res_ids[i+1][col] = len;
- len++;
- }
- }
- curr_seq->length = len;
- curr_seq->sequence[len] = '\0';
- REALLOC (curr_seq->sequence, len+1, char);
-
- initialize_seqs_as_lpo (1, curr_seq, score_matrix);
-
- /* ALIGN THIS SEQUENCE TO EXISTING ALIGNMENT USING CONSENSUS ROW */
-
- build_seq_to_po_index (lposeq);
-
- CALLOC (al_x, len, int);
- CALLOC (al_y, lposeq->length, int);
-
- for (j=0; j<lposeq->length; j++) {
- al_y[j] = INVALID_LETTER_POSITION;
- }
-
- /* FOR EACH RESIDUE IN THIS SEQUENCE: */
- for (j=0; j<len; j++) {
- col = column_ids[i+1][j];
-
- /* GET AN OLD NODE IN CONSENSUS ROW ALIGNED TO THIS ONE: */
- res_id = res_ids[0][col];
-
- /* STORE INFO IN al_x,al_y: */
- letter_id = lposeq->source_seq[0].seq_to_po[res_id];
- al_x[j] = letter_id;
- al_y[letter_id] = j;
- }
-
- /* DO THE FUSION-BASED BUILDUP */
-
- fuse_ring_identities (lposeq->length, lposeq->letter,
- curr_seq->length, curr_seq->letter,
- al_y, al_x);
-
- fuse_lpo (lposeq, curr_seq, al_y, al_x);
-
- free_lpo_sequence (curr_seq, 0);
- FREE (al_x);
- FREE (al_y);
- FREE (curr_seq);
- }
-
- /* STRIP OUT CONSENSUS SEQUENCE */
-
- strip_seq_from_lpo (lposeq, /*remove #*/ 0, /*0=don't keep all links*/ 0);
-
- for (i=0; i<n_seqs+1; i++) {
- FREE (column_ids[i]);
- FREE (res_ids[i]);
- }
- FREE (column_ids);
- FREE (res_ids);
- FREE (consens_row);
-
- return lposeq;
-}
-
-
-static int is_residue_char (char ch)
-{
- if (ch>='a' && ch<='z') return 1;
- if (ch>='A' && ch<='Z') return 1;
- if (ch=='?' || ch=='[' || ch==']') return 1;
- return 0;
-}
-
-static int is_gap_char (char ch)
-{
- if (ch=='.' || ch=='-') return 1;
- return 0;
-}
-
-static int is_name_first_char (char ch)
-{
- if (ch=='\n' || ch=='\r' || ch==' ' || ch=='\t' || ch=='#' || ch=='*') return 0;
- return 1;
-}
-
-static int filter_sequence_set (int n_seqs, FILE *select_ifile, int remove_listed_sequences,
- char **seq_names, char **seq_titles, char **aln_mat, int *aln_lengths)
-{
- int i, j;
- int *keep_seq;
- char name[512]="";
-
- /* DECIDE WHICH SEQUENCES TO KEEP: */
-
- CALLOC (keep_seq, n_seqs, int);
-
- for (i=0; i<n_seqs; i++) {
- keep_seq[i] = ((!select_ifile || remove_listed_sequences) ? 1 : 0);
- }
-
- while (select_ifile && fscanf (select_ifile, "SOURCENAME=%[^\r\n]%*[\r\n]", name) >= 1) {
- for (i=0; i<n_seqs; i++) {
- if (0 == strcmp(seq_names[i],name)) {
- keep_seq[i] = (remove_listed_sequences ? 0 : 1);
- }
- }
- }
-
- /* FREE MEM FOR DISCARDED SEQUENCES: */
- for (i=0; i<n_seqs; i++) if (keep_seq[i]==0) {
- FREE (seq_names[i]);
- FREE (seq_titles[i]);
- FREE (aln_mat[i]);
- }
-
- /* COMPACT SEQUENCE LIST (MOVE i<--j): */
- for (i=j=0; j<n_seqs; i++, j++) {
- while (j<n_seqs && keep_seq[j]==0)
- j++;
- if (j==n_seqs)
- break;
- if (j<n_seqs && i!=j) {
- seq_names[i] = seq_names[j];
- seq_titles[i] = seq_titles[j];
- aln_mat[i] = aln_mat[j];
- aln_lengths[i] = aln_lengths[j];
- }
- }
-
- FREE (keep_seq);
-
- return i;
-}
-
-
-
-void strip_seq_from_lpo (LPOSequence_T *lposeq, int bad_seq_id, int keep_all_links)
-{
- int i, j, id_left, id_right, len, n_seqs;
- LPOLetterLink_T *lnk, *tmp_lnk;
- LPOLetterSource_T *src, *prev, *tmp_src;
- LPOLetter_T *lett;
-
- n_seqs = lposeq->nsource_seq - 1;
-
- /* FREE MEM ASSOCIATED WITH BAD SEQ */
- FREE (lposeq->source_seq[bad_seq_id].title);
- FREE (lposeq->source_seq[bad_seq_id].sequence);
- FREE (lposeq->source_seq[bad_seq_id].seq_to_po);
- FREE (lposeq->source_seq[bad_seq_id].po_to_seq);
-
- /* COMPACT SOURCE LIST */
- for (i=0; i<n_seqs; i++) if (i>=bad_seq_id) {
- lposeq->source_seq[i] = lposeq->source_seq[i+1];
- }
- /* This de-allocation causes a bug in conjunction with
- later REBUFF calls: */
- /* REALLOC (lposeq->source_seq, n_seqs, LPOSourceInfo_T); */
- lposeq->nsource_seq = n_seqs;
-
-
- /* RENUMBER SOURCES IN EACH LETTER */
- for (i=0; i<lposeq->length; i++) {
- lett = &(lposeq->letter[i]);
- prev = NULL;
- src = &(lett->source);
- while (src != NULL && src->iseq >= 0) {
- if (src->iseq == bad_seq_id) {
- if (prev) { /* NOT IN LIST HEAD, SO RELINK FROM PREV AND FREE */
- prev->more = src->more;
- FREE (src);
- src = prev->more;
- }
- else { /* IN LIST HEAD, SO REASSIGN HEAD DATA, RELINK, FREE */
- if (src->more) {
- tmp_src = src->more;
- src->ipos = tmp_src->ipos;
- src->iseq = tmp_src->iseq;
- src->more = tmp_src->more;
- FREE (tmp_src);
- }
- else {
- src->ipos = -1;
- src->iseq = -1;
- }
- }
- }
- else {
- if (src->iseq>=bad_seq_id)
- src->iseq--;
- prev = src;
- src = src->more;
- }
- }
- }
-
- if (0==keep_all_links) {
-
- build_seq_to_po_index (lposeq);
-
- /* FREE ALL LINK INFO */
- for (i=0; i<lposeq->length; i++) {
- lett = &(lposeq->letter[i]);
- for (lnk = &(lett->left); lnk->more != NULL; ) {
- tmp_lnk = lnk->more;
- lnk->more = tmp_lnk->more;
- FREE (tmp_lnk);
- }
- lnk->ipos = -1;
- for (lnk = &(lett->right); lnk->more != NULL; ) {
- tmp_lnk = lnk->more;
- lnk->more = tmp_lnk->more;
- FREE (tmp_lnk);
- }
- lnk->ipos = -1;
- }
-
- /* REBUILD LINK INFO FROM REMAINING SEQS */
- for (i=0; i<n_seqs; i++) {
- len = lposeq->source_seq[i].length;
- for (j=0; j<len-1; j++) {
- id_left = lposeq->source_seq[i].seq_to_po[j];
- id_right = lposeq->source_seq[i].seq_to_po[j+1];
- add_lpo_link (&(lposeq->letter[id_left].right), id_right);
- add_lpo_link (&(lposeq->letter[id_right].left), id_left);
- }
- }
- }
-}
Deleted: trunk/packages/poa/trunk/msa_format.h
===================================================================
--- trunk/packages/poa/trunk/msa_format.h 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/msa_format.h 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,52 +0,0 @@
-
-
-/****************/
-/* msa_format.h */
-/****************/
-
-/* ---
- Functions for reading CLUSTAL- and FASTA-PIR-formatted files
- into the LPOSequence_T data structure, and for determining file
- type from the initial line(s) of a file.
- --- */
-
-
-#ifndef MSA_FORMAT_HEADER_INCLUDED
-#define MSA_FORMAT_HEADER_INCLUDED
-
-#include "default.h"
-#include "seq_util.h"
-#include "lpo.h"
-
-/** types of MSA files supported */
-typedef enum {
- UNKNOWN_MSA, CLUSTAL_MSA, PIR_MSA, PO_MSA
-}
-msa_file_format;
-
-
-/** Reads an MSA from a file (cf. read_msa_select).
- */
-LPOSequence_T *read_msa (FILE *ifile, msa_file_format format,
- int do_switch_case, ResidueScoreMatrix_T *score_matrix);
-
-
-/** Reads an MSA from a file. If `format' is UNKNOWN_MSA, the file
- format is determined from the first line(s) of the file.
- Uses `select_ifile', if non-NULL, to filter the sequence set.
- Uppercases or lowercases sequence characters according to `do_switch_case'.
- Indexes LPO symbols using `score_matrix'.
-*/
-LPOSequence_T *read_msa_select (FILE *ifile, msa_file_format format,
- FILE *select_ifile, int keep_all_links, int remove_listed_sequences,
- int do_switch_case, ResidueScoreMatrix_T *score_matrix);
-
-
-/** Creates an LPO from an RC-MSA alignment matrix.
- */
-LPOSequence_T *lpo_from_aln_mat (int n_seqs, char **seq_names, char **seq_titles, char **aln_mat, int *aln_lengths,
- int do_switch_case, ResidueScoreMatrix_T *score_matrix);
-
-
-
-#endif /* MSA_FORMAT_HEADER_INCLUDED */
Deleted: trunk/packages/poa/trunk/multidom.pscore
===================================================================
--- trunk/packages/poa/trunk/multidom.pscore 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/multidom.pscore 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,16 +0,0 @@
-ABL1_HUMAN ABL1_HUMAN 2475.0
-ABL1_HUMAN MATK_HUMAN 260.0
-ABL1_HUMAN GRB2_HUMAN 100.0
-ABL1_HUMAN CRKL_HUMAN 63.0
-CRKL_HUMAN CRKL_HUMAN 736.0
-CRKL_HUMAN GRB2_HUMAN 70.0
-CRKL_HUMAN ABL1_HUMAN 60.0
-CRKL_HUMAN MATK_HUMAN 55.0
-GRB2_HUMAN GRB2_HUMAN 551.0
-GRB2_HUMAN ABL1_HUMAN 100.0
-GRB2_HUMAN MATK_HUMAN 77.0
-GRB2_HUMAN CRKL_HUMAN 70.0
-MATK_HUMAN MATK_HUMAN 1215.0
-MATK_HUMAN ABL1_HUMAN 279.0
-MATK_HUMAN GRB2_HUMAN 69.0
-MATK_HUMAN CRKL_HUMAN 55.0
Deleted: trunk/packages/poa/trunk/multidom.seq
===================================================================
--- trunk/packages/poa/trunk/multidom.seq 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/multidom.seq 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,42 +0,0 @@
->ABL1_HUMAN PROTO-ONCOGENE TYROSINE-PROTEIN KINASE ABL (EC 2.7.1.112) (P150) (C-ABL).
-MLEICLKLVG CKSKKGLSSS SSCYLEEALQ RPVASDFEPQ GLSEAARWNS KENLLAGPSE
-NDPNLFVALY DFVASGDNTL SITKGEKLRV LGYNHNGEWC EAQTKNGQGW VPSNYITPVN
-SLEKHSWYHG PVSRNAAEYL LSSGINGSFL VRESESSPGQ RSISLRYEGR VYHYRINTAS
-DGKLYVSSES RFNTLAELVH HHSTVADGLI TTLHYPAPKR NKPTVYGVSP NYDKWEMERT
-DITMKHKLGG GQYGEVYEGV WKKYSLTVAV KTLKEDTMEV EEFLKEAAVM KEIKHPNLVQ
-LLGVCTREPP FYIITEFMTY GNLLDYLREC NRQEVNAVVL LYMATQISSA MEYLEKKNFI
-HRDLAARNCL VGENHLVKVA DFGLSRLMTG DTYTAHAGAK FPIKWTAPES LAYNKFSIKS
-DVWAFGVLLW EIATYGMSPY PGIDLSQVYE LLEKDYRMER PEGCPEKVYE LMRACWQWNP
-SDRPSFAEIH QAFETMFQES SISDEVEKEL GKQGVRGAVS TLLQAPELPT KTRTSRRAAE
-HRDTTDVPEM PHSKGQGESD PLDHEPAVSP LLPRKERGPP EGGLNEDERL LPKDKKTNLF
-SALIKKKKKT APTPPKRSSS FREMDGQPER RGAGEEEGRD ISNGALAFTP LDTADPAKSP
-KPSNGAGVPN GALRESGGSG FRSPHLWKKS STLTSSRLAT GEEEGGGSSS KRFLRSCSAS
-CVPHGAKDTE WRSVTLPRDL QSTGRQFDSS TFGGHKSEKP ALPRKRAGEN RSDQVTRGTV
-TPPPRLVKKN EEAADEVFKD IMESSPGSSP PNLTPKPLRR QVTVAPASGL PHKEEAEKGS
-ALGTPAAAEP VTPTSKAGSG APGGTSKGPA EESRVRRHKH SSESPGRDKG KLSRLKPAPP
-PPPAASAGKA GGKPSQSPSQ EAAGEAVLGA KTKATSLVDA VNSDAAKPSQ PGEGLKKPVL
-PATPKPQSAK PSGTPISPAP VPSTLPSASS ALAGDQPSST AFIPLISTRV SLRKTRQPPE
-RIASGAITKG VVLDSTEALC LAISRNSEQM ASHSAVLEAG KNLYTFCVSY VDSIQQMRNK
-FAFREAINKL ENNLRELQIC PATAGSGPAA TQDFSKLLSS VKEISDIVQR
->CRKL_HUMAN CRK-LIKE PROTEIN.
-MSSARFDSSD RSAWYMGPVS RQEAQTRLQG QRHGMFLVRD SSTCPGDYVL SVSENSRVSH
-YIINSLPNRR FKIGDQEFDH LPALLEFYKI HYLDTTTLIE PAPRYPSPPM GSVSAPNLPT
-AEDNLEYVRT LYDFPGNDAE DLPFKKGEIL VIIEKPEEQW WSARNKDGRV GMIPVPYVEK
-LVRSSPHGKH GNRNSNSYGI PEPAHAYAQP QTTTPLPAVS GSPGAAITPL PSTQNGPVFA
-KAIQKRVPCA YDKTALALEV GDIVKVTRMN INGQWEGEVN GRKGLFPFTH VKIFDPQNPD
-ENE
->GRB2_HUMAN GROWTH FACTOR RECEPTOR-BOUND PROTEIN 2 (GRB2 ADAPTOR PROTEIN)(SH2)
-MEAIAKYDFK ATADDELSFK RGDILKVLNE ECDQNWYKAE LNGKDGFIPK NYIEMKPHPW
-FFGKIPRAKA EEMLSKQRHD GAFLIRESES APGDFSLSVK FGNDVQHFKV LRDGAGKYFL
-WVVKFNSLNE LVDYHRSTSV SRNQQIFLRD IEQVPQQPTY VQALFDFDPQ EDGELGFRRG
-DFIHVMDNSD PNWWKGACHG QTGMFPRNYV TPVNRNV
->MATK_HUMAN MEGAKARYOCYTE-ASSOCIATED TYROSINE-PROTEIN KINASE (EC 2.7.1.112) (TYROSINE-PROTEIN KINASE CTK) (PROTEIN KINASE HYL) (HEMATOPOIETIC CONSENSUS TYROSINE-LACKING KINASE).
-MAGRGSLVSW RAFHGCDSAE ELPRVSPRFL RAWHPPPVSA RMPTRRWAPG TQCITKCEHT
-RPKPGELAFR KGDVVTILEA CENKSWYRVK HHTSGQEGLL AAGALREREA LSADPKLSLM
-PWFHGKISGQ EAVQQLQPPE DGLFLVRESA RHPGDYVLCV SFGRDVIHYR VLHRDGHLTI
-DEAVFFCNLM DMVEHYSKDK GAICTKLVRP KRKHGTKSAE EELARAGWLL NLQHLTLGAQ
-IGEGEFGAVL QGEYLGQKVA VKNIKCDVTA QAFLDETAVM TKMQHENLVR LLGVILHQGL
-YIVMEHVSKG NLVNFLRTRG RALVNTAQLL QFSLHVAEGM EYLESKKLVH RDLAARNILV
-SEDLVAKVSD FGLAKAERKG LDSSRLPVKW TAPEALKHGK FTSKSDVWSF GVLLWEVFSY
-GRAPYPKMSL KEVSEAVEKG YRMEPPEGCP GPVHVLMSSC WEAEPARRPP FRKLAEKLAR
-ELRSAGAPAS VSGQDADGST SPRSQEP
Deleted: trunk/packages/poa/trunk/numeric_data.c
===================================================================
--- trunk/packages/poa/trunk/numeric_data.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/numeric_data.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,145 +0,0 @@
-
-
-#include <lpo.h>
-
-/**@memo create a new LPONumericData_T record for the designated source_seq.
- Dynamically allocates data storage array equal in length to the length of
- the source_seq. Initializes the array values to initial_value if non-zero.
- Returns a pointer to the new LPONumericData_T and also adds it to the
- source_seq->data[] list.
-*/
-LPONumericData_T *new_numeric_data(LPOSourceInfo_T *source_seq,
- char name[],
- char title[],
- double initial_value)
-{
- int i;
- LPONumericData_T *data=NULL;
- REBUFF(source_seq->data,source_seq->ndata,NUMDATA_BUFFER_CHUNK,LPONumericData_T);
- data=source_seq->data + source_seq->ndata++;
-
- STRNCPY(data->name,name,SEQUENCE_NAME_MAX); /* COPY NAME AND TITLE */
- if (title)
- data->title=strdup(title);
-
- CALLOC(data->data,source_seq->length,double); /* ALLOCATE THE ARRAY */
- if (initial_value) /* INITIALIZE VALUES IF DESIRED */
- LOOP (i,source_seq->length)
- data->data[i]=initial_value;
- return data; /* RETURN POINTER TO THE NEW DATA HOLDER */
-}
-
-
-
-LPONumericData_T *cp_numeric_data(LPOSourceInfo_T *source_seq,
- LPONumericData_T *data)
-{
- int i;
- LPONumericData_T *new_data;
- new_data=new_numeric_data(source_seq,data->name,data->title,0);
- LOOP (i,source_seq->length) /* COPY ALL THE VALUES */
- new_data->data[i]=data->data[i];
- return new_data;
-}
-
-
-
-/**@memo finds LPONumericData from the source_seq, matching the specified
- name, or returns NULL if not found. */
-LPONumericData_T *find_numeric_data(LPOSourceInfo_T *source_seq,
- char name[])
-{
- int i;
- LOOP (i,source_seq->ndata)
- if (0==strcmp(source_seq->data[i].name,name)) /*FOUND IT. RETURN POINTER*/
- return source_seq->data+i;
- return NULL; /* NOT FOUND! */
-}
-
-
-
-/**@memo frees the set of numeric_data passed as arguments. If requested,
- will also free the block of memory for the array of entries data[]. */
-void free_lpo_numeric_data(int ndata,LPONumericData_T *data,
- int please_free_block)
-{
- int i;
- LOOP (i,ndata) { /* DUMP ASSOCIATED ARRAYS */
- FREE(data[i].title);
- FREE(data[i].data);
- }
- if (please_free_block) /* DUMP THE BLOCK ITSELF */
- free(data);
-}
-
-
-
-
-
-/**@memo creates one or more new numeric_data for a given sequence,
- based on the presence of corresponding named numeric_data. Specifically,
- the list of set_names[] is processed one by one, creating a source_name
- according to the source_name_fmt string, and finding a numeric_data
- entry with that name. If it is not found, the routine calls exit(-1).
- If it is found, a new set of numeric_data is created, with a name
- created according to target_name_fmt, and titled according to
- the title_fmt string and source_data->title. */
-void new_numeric_data_sets(LPOSourceInfo_T *source_seq,
- int nset,char *set_names[],
- char source_name_fmt[],
- char target_name_fmt[],
- char title_fmt[])
-{
- int j;
- LPONumericData_T *data;
- char name[256],title[4096];
-
- LOOPF (j,nset) {
- sprintf(name,source_name_fmt,set_names[j]); /* GENERATE NAME TO MATCH*/
- data=find_numeric_data(source_seq,name); /*FIND SOURCE DATA*/
- if (!data) {
- WARN_MSG(USERR,(ERRTXT,"*** could not find dataset %s for seq %s.\nExiting\n\n",
- name,source_seq->name),"$Revision: 1.2 $");
- exit(-1);
- }
- sprintf(name,target_name_fmt,set_names[j]); /* GENERATE NEW NAME */
- sprintf(title,title_fmt,data->title);
- new_numeric_data(source_seq,name,title,0.); /* CREATE NEW ARRAY */
- }
-}
-
-
-
-
-/**@memo reads a stream of FASTA-formatted numeric data, and stores them in
-the corresponding set of source_seq, matching the sequences by name.
-Multiple numeric data entries can be read from a single stream. */
-void read_numeric_data(int nsource_seq,
- LPOSourceInfo_T source_seq[],
- FILE *ifile)
-{
- int i,j;
- char line[4096],seq_name[128],data_name[1024],title[2048];
- LPONumericData_T *data;
-
- while (fgets(line,sizeof(line),ifile)) {
- title[0]='\0';
- if (sscanf(line,">%s NUMERIC_DATA=%s %s",seq_name,data_name,title)>=2) {
- LOOP (i,nsource_seq) /* FIND THE MATCHING SEQUENCE */
- if (0==strcmp(seq_name,source_seq[i].name)) /* MATCH */
- break;
- if (LOOP_FINISHED(i,nsource_seq)) /* SEQ NOT FOUND!! */
- WARN_MSG(USERR,(ERRTXT,"Error! NUMERIC_DATA %s, sequence %s does not exist. Skipping.\n\n",data_name,seq_name),"$Revision: 1.2 $");
- else { /* FOUND THE SEQ, SAVE THE DATA */
- if (data=find_numeric_data(source_seq+i,data_name)) /*REUSE EXISTING*/
- WARN_MSG(WARN,(ERRTXT,"NUMERIC_DATA %s already exists on sequence %s. Overwriting.\n",data_name,seq_name),"$Revision: 1.2 $");
- else /* CREATE A NEW DATA HOLDER */
- data=new_numeric_data(source_seq+i,data_name,title,0.);
- LOOPF (j,source_seq[i].length) /* READ IN THE VALUES */
- fscanf(ifile," %lf",data->data+j);
- }
- }
- }
-}
-
-
Deleted: trunk/packages/poa/trunk/poa.h
===================================================================
--- trunk/packages/poa/trunk/poa.h 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/poa.h 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,252 +0,0 @@
-
-
-#ifndef POA_HEADER_INCLUDED
-#define POA_HEADER_INCLUDED
-
-
-
-
-/** MAXIMUM GAP LENGTH TRACKED IN align_lpo;
- LARGER THAN THIS WILL BE CAPPED
- (DEFAULT VALUE) */
-#ifndef TRUNCATE_GAP_LENGTH
-#define TRUNCATE_GAP_LENGTH 16
-#endif
-
-/** LENGTH OVER WHICH GAP PENALTY DECAYS IN align_lpo
- (DEFAULT VALUE) */
-#ifndef DECAY_GAP_LENGTH
-#define DECAY_GAP_LENGTH 0
-#endif
-
-#define REV_COMP_STRING "/rev_comp"
-
-
-/** THE NULL LETTER-REFERENCE */
-#define INVALID_LETTER_POSITION (-1)
-
-typedef int LPOLetterRef_T;
-
-
-typedef int LPOScore_T;
-
- /** NEEDED FOR seq_util.h */
-typedef LPOScore_T ResidueScore_T;
-#define RESIDUE_SCORE_DEFINED
-
-/** linked list for storing source origin (sequence position)
- from which this letter was derived */
-struct LPOLetterSource_S {
- /** index of the sequence, referencing the source_seq[] array*/
- int iseq;
- /** index of the corresponding position in that sequence */
- LPOLetterRef_T ipos;
- /** next node in the linked list */
- struct LPOLetterSource_S *more;
-} ;
-
-typedef struct LPOLetterSource_S LPOLetterSource_T;
-
-
-/** linked list for connecting an LPOLetter to either right
- or left */
-struct LPOLetterLink_S {
- /** ADJACENT LETTER LINKED TO THIS LETTER */
- LPOLetterRef_T ipos;
-#ifdef USE_WEIGHTED_LINKS
- /** transition cost for traversing this link */
- LPOScore_T score;
-#endif
- /** next node in the linked list */
- struct LPOLetterLink_S *more;
-} ;
-
-typedef struct LPOLetterLink_S LPOLetterLink_T;
-
-
-/** the chunk size for allocating additional
- letters in an LPOLetter_T array */
-#define LPO_LETTER_BUFFER_CHUNK 64
-
-
-/** Structure for storing individual LPO Letters*/
-struct LPOLetter_S {
- /** ADJACENT LETTER(S) TO THE LEFT */
- LPOLetterLink_T left;
- /** ADJACENT LETTER(S) TO THE RIGHT */
- LPOLetterLink_T right;
- /** SOURCE SEQ POSITION(S) */
- LPOLetterSource_T source;
- /** CIRCULAR LIST OF ALIGNED POSITIONS */
- LPOLetterRef_T align_ring;
- /** MINIMUM INDEX OF ALL POSITIONS ON THE RING */
- LPOLetterRef_T ring_id;
- /** SCORE FOR BALANCING PARTIAL ORDER EFFECTS ON MATRIX NEUTRALITY */
- float score;
- /** THE ACTUAL RESIDUE CODE! */
- char letter;
-} ;
-
-
-
-typedef struct LPOLetter_S LPOLetter_T;
-
-
-/** maximum length of a sequence name */
-#define SEQUENCE_NAME_MAX 32
-/** buffer chunk size for expanding a block of seq storage */
-#define SEQUENCE_BUFFER_CHUNK 8
-
-/** buffer chunk size for expanding a source_seq[] array */
-#define SOURCE_SEQ_BUFFER_CHUNK 16
-
-
-
-#define NUMDATA_BUFFER_CHUNK 4
-/** storage for quantitative data attached to a sequence */
-struct LPONumericData_S { /** */
- char name[SEQUENCE_NAME_MAX]; /** */
- char *title; /** */
- double *data;
-};
-typedef struct LPONumericData_S LPONumericData_T;
-
-
-/** Structure for storing individual source sequence information,
- stuff like name, title etc. */
-struct LPOSourceInfo_S { /** */
- char name[SEQUENCE_NAME_MAX]; /** */
- char *title; /** */
- char *sequence; /** */
- int *seq_to_po; /** */
- int *po_to_seq; /** */
- LPONumericData_T *data; /** */
- int ndata; /** */
- int length; /** */
- int istart;
- /** FOR PURPOSES OF HEAVIEST BUNDLE CALCULATION */
- int weight;
- /** WHAT BUNDLE IS THIS A MEMBER OF? */
- int bundle_id;
-};
-
-typedef struct LPOSourceInfo_S LPOSourceInfo_T;
-
-/** the NULL bundle-reference */
-#define NO_BUNDLE (-1)
-
-/** bundle-reference meaning "include all bundles" */
-#define ALL_BUNDLES (-1)
-
-
-/** holder for an LPO sequence, its letters,
- and associated information */
-struct LPOSequence_S {/** */
- int length;/** */
- LPOLetter_T *letter;/** */
- char *title;/** */
- char *sequence;/** */
- char name[SEQUENCE_NAME_MAX];/** */
- int nsource_seq;/** */
- LPOSourceInfo_T *source_seq;
-};
-
-typedef struct LPOSequence_S LPOSequence_T;
-
-
-typedef LPOSequence_T Sequence_T;
-
-
-/**@memo GENERAL FORM IS seq_y[j].left.ipos */
-#define SEQ_Y_LEFT(j) (j-1)
-#define SEQ_Y_RIGHT(j) (j+1)
-
-
-
-/**@memo Data structure for analyzing sequence differences in MSA*/
-struct LPOLetterCount_S {
- unsigned int is_error:2;
- unsigned int meets_criteria:1;
- unsigned int seq_count:29;
-};
-
-typedef struct LPOLetterCount_S LPOLetterCount_T;
-
-/** classification of sequence differences */
-enum {
- no_error,
- substitution_error,
- insertion_error,
- deletion_error,
- max_error_states
-};
-
-
-
-
-
-
-
-/** DON'T ALLOCATE MORE THAN THIS TOTAL AMOUNT OF MEMORY
----------------------------------------------------------------
----------------------------------------------------------------
-*/
-#define POA_MAX_ALLOC 300000000
-
-
-#endif
-
-
-
-
-
-
-/**@name The lpo library*/
-/*@{*/
-/**@memo This set of web pages documents the functionality of the lpo
-function library. This is a set of C functions for reading, writing,
-creating, manipulating, and aligning partial order sequences. These
-functions divide into several groups:
-\begin{itemize}
-\item \URL[File utilities]{General.html#read_fasta}: reading and writing
-FASTA and po files
-\item \URL[lpo utilities]{General.html#add_lpo_link}: creating, fusing,
-freeing, manipulating lpo data
-\item \URL[alignment]{General.html#align_lpo}: aligning one or more linear
-sequences to an lpo
-\item analysis: analyzing lpo structure, e.g. to find consensus
-\end{itemize}
-*/
-
-/**@memo Click \URL[here]{../poa} for more information about partial
-order alignment.*/
-
-/*@}*/
-
-
-
-/**@name linking to the lpo library */
-/*@{*/
-/**@memo To use function from this library in your code, you must
- do two things. First you must
-include <lpo.h> in your source files, to access the prototypes.
- Second, when you compile, you must tell the compiler where the
- lpo header and library files are located. {\bfNB it appears gcc
-loads libraries in reverse order of the command line arguments, so
-you have to specify your source files BEFORE the -llpo library argument
-on the command line, or the linker will give you unresolved reference
-errors}. e.g. \begin{verbatim}
-
-gcc -o myprog myfile.c -I~leec/lib/include -L~leec/lib -llpo
-\end{verbatim}
-*/
-
-/**@memo Click \URL[here]{../poa} for more information about partial
-order alignment.*/
-
-/*@}*/
-
-
-
-
-
Deleted: trunk/packages/poa/trunk/project.h
===================================================================
--- trunk/packages/poa/trunk/project.h 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/project.h 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,11 +0,0 @@
-
-#ifndef PROGRAM_NAME
-#define PROGRAM_NAME "poa"
-#endif
-#ifndef PROGRAM_VERSION
-#define PROGRAM_VERSION "v1.0.0"
-#endif
-
-
-#include "poa.h"
-
Deleted: trunk/packages/poa/trunk/remove_bundle.c
===================================================================
--- trunk/packages/poa/trunk/remove_bundle.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/remove_bundle.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,158 +0,0 @@
-
-
-
-#include "default.h"
-#include "poa.h"
-#include "seq_util.h"
-#include "lpo.h"
-
-
-
-
-int compact_links(LPOLetterLink_T *list,int old_to_new[])
-{
- LPOLetterLink_T *link=NULL,*link_last=NULL,*next_link,*link_head=NULL;
-
- CALLOC(link,1,LPOLetterLink_T);
- memcpy(link,list,sizeof(LPOLetterLink_T));
-
- for (;link && link->ipos>=0;link=next_link){
- next_link=link->more;
- if (old_to_new[link->ipos]<0) /* THIS POSITION NO LONGER EXISTS! */
- free(link); /* DELETE THIS LINK ENTRY */
- else { /* COPY THIS BACK TO PREVIOUS LINK ENTRY: COMPACT THE LIST*/
- link->ipos = old_to_new[link->ipos]; /* REMAP TO NEW INDEX SYSTEM */
- if (link_last) /* CONNECT TO PREVIOUS NODE IN LIST */
- link_last->more=link;
- else /* THIS IS THE NEW HEAD OF THE LIST */
- link_head=link;
- link_last=link;
- }
- }
- if (link) /* AN EMPTY LINK I.E. link->ipos<0 ... JUNK IT */
- free(link);
- if (link_last) /* TERMINATE LAST NODE IN LIST */
- link_last->more=NULL;
- if (link_head) {
- memcpy(list,link_head,sizeof(LPOLetterLink_T));
- free(link_head);
- return 1; /* COMPACTED LINK LIST IS NON-EMPTY */
- }
- else { /* NOTHING LEFT IN LIST, SO BLANK IT */
- list->more=NULL;
- list->ipos= INVALID_LETTER_POSITION;
- return 0; /* COMPACTED LINK LIST IS EMPTY */
- }
-}
-
-
-
-
-
-
-
-int compact_sources(LPOLetterSource_T *list,int ibundle_delete,
- LPOSourceInfo_T source_seq[])
-{
- LPOLetterSource_T *source=NULL,*source_last=NULL,*next_source,*source_head=NULL;
-
- CALLOC(source,1,LPOLetterSource_T);
- memcpy(source,list,sizeof(LPOLetterSource_T));
-
- for (;source;source=next_source){
- next_source=source->more;
- if (source_seq[source->iseq].bundle_id == ibundle_delete)
- free(source); /* DELETE THIS SOURCE ENTRY */
- else { /* COPY THIS BACK TO PREVIOUS SOURCE ENTRY: COMPACT THE LIST*/
- if (source_last) /* CONNECT TO PREVIOUS NODE IN LIST */
- source_last->more=source;
- else /* THIS IS THE NEW HEAD OF THE LIST */
- source_head=source;
- source_last=source;
- }
- }
- if (source_last) /* TERMINATE LAST NODE IN LIST */
- source_last->more=NULL;
- if (source_head) {
- memcpy(list,source_head,sizeof(LPOLetterSource_T));
- free(source_head);
- return 1; /* COMPACTED SOURCE LIST IS NON-EMPTY */
- }
- else { /* NOTHING LEFT IN LIST, SO BLANK IT */
- list->more=NULL;
- list->ipos= INVALID_LETTER_POSITION;
- return 0; /* COMPACTED SOURCE LIST IS EMPTY */
- }
-}
-
-
-
-
-
-
-void reindex_compact_rings(LPOSequence_T *seq)
-{
- int i,iring= -1,ring_start= -1;
- LOOPF (i,seq->length) { /* REMOVE ALL LINKS TO OLD, DELETED POSITIONS */
- if (iring==seq->letter[i].ring_id) {
- seq->letter[i].ring_id=ring_start; /* USE FIRST INDEX ON RING */
- seq->letter[i].align_ring= i-1; /* LINK TO LETTER TO LEFT */
- }
- else { /* START OF A NEW RING */
- if (ring_start>=0)
- seq->letter[ring_start].align_ring = i-1;
- iring=seq->letter[i].ring_id;
- seq->letter[i].ring_id=seq->letter[i].align_ring=ring_start=i;
- }
- }
- if (ring_start>=0)
- seq->letter[ring_start].align_ring = i-1;
-}
-
-
-
-
-#define DELETE_THIS_BUNDLE (-2)
-
-int remove_bundle(LPOSequence_T *seq,int ibundle,int delete_all_others)
-{
- int i,j=0,*old_to_new=NULL,new_length;
-
- if (delete_all_others) { /* INSTEAD OF DELETING THIS BUNDLE, */
- LOOP (i,seq->nsource_seq) /*MARK ALL OTHERS TO BE DELETED! */
- if (seq->source_seq[i].bundle_id!=ibundle)
- seq->source_seq[i].bundle_id = DELETE_THIS_BUNDLE;
- ibundle=DELETE_THIS_BUNDLE;
- }
-
- CALLOC(old_to_new,seq->length,int); /* CREATE MAPPING ARRAY */
- LOOPF (i,seq->length) {
- if (compact_sources(&seq->letter[i].source,ibundle,seq->source_seq)){
- if (i>j) /* COPY LETTER TO COMPACTED POSITION */
- memcpy(seq->letter+j,seq->letter+i,sizeof(LPOLetter_T));
- old_to_new[i]=j++; /* SAVE MAPPING FROM OLD TO NEW, COMPACTED POSITION*/
- }
- else {
- free_lpo_letters(1,seq->letter+i,FALSE); /* DUMP DATA FOR THIS LETTER */
- old_to_new[i]= INVALID_LETTER_POSITION;
- }
- }
- new_length=j;
- if (new_length<seq->length) /* ERASE UNUSED PORTIONS AFTER COMPACTED ARRAY*/
- memset(seq->letter+new_length,0,(seq->length - new_length)*sizeof(LPOLetter_T));
-
- LOOP (i,new_length) { /* REMOVE ALL LINKS TO OLD, DELETED POSITIONS */
- compact_links(&seq->letter[i].left,old_to_new);
- compact_links(&seq->letter[i].right,old_to_new);
- }
- seq->length=new_length;
- reindex_compact_rings(seq);
-
- FREE(old_to_new);
- return new_length;
-}
-
-
-
-
-
Deleted: trunk/packages/poa/trunk/seq_util.c
===================================================================
--- trunk/packages/poa/trunk/seq_util.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/seq_util.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,296 +0,0 @@
-
-#include "default.h"
-#include "seq_util.h"
-
-
-
-/** randomizes seq[] by shuffling, and places the result in randseq[];
- if randseq[] and seq[] are distinct, seq[] is left unchanged */
-void shuffle_seq(int len,
- char seq[],
- char randseq[])
-{
- int i,j;
- char c;
-
- for (i=0;i<len;i++) {
- j=rand()%len; /* CHOOSE RANDOM POSITION j */
- c=seq[i]; /* SWAP POSITIONS i AND j */
- randseq[i]=seq[j];
- randseq[j]=c;
- }
-
- return;
-}
-
-
-
-
-
-
-/**@memo TRANSLATE FROM ASCII LETTERS TO COMPACTED NUMBERICAL INDEX:
- index_symbols(seq[i].length,seq[i].sequence,seq[i].sequence,
- m->nsymbol,m->symbol);
-*/
-/** converts characters in seq[] to the INDEX of the matching character in
- symbols[], and returns the result in out[] */
-void index_symbols(int nseq,char seq[],char out[],
- int nsymbs,char symbols[])
-{
- int i,j,k;
- LOOP (i,nseq) {
- k=nsymbs-1; /* DEFAULT: UNMATCHABLE SYMBOL */
- LOOP (j,nsymbs) { /* FIND MATCHING SYMBOL */
- if (symbols[j]==seq[i]) { /* FOUND IT! */
- k=j;
- break;
- }
- }
- out[i]=k; /* SAVE THE TRANSLATED CODE */
- }
- return;
-}
-
-
-
-
-
-
-int *Score_matrix_row=NULL;
-
-int best_match_qsort_cmp(const void *void_a,const void *void_b)
-{
- int *a=(int *)void_a,*b=(int *)void_b;
-
- if (Score_matrix_row[*a]>Score_matrix_row[*b])
- return -1;
- else if (Score_matrix_row[*a]<Score_matrix_row[*b])
- return 1;
- else /* EQUAL */
- return 0;
-}
-
-
-
-
-#ifdef SOURCE_EXCLUDED
-char DNA_symbols[1024];
-float DNA_rescale_score;
-#endif
-
-/** reads an alignment scoring matrix in the pam format */
-int read_score_matrix(char filename[],ResidueScoreMatrix_T *m)
-{
- int i,j,k,nsymb=0,found_symbol_line=0,isymb;
- char line[1024],dna_codes[256];
- FILE *ifile;
-
- /* GAP PENALTY DEFAULTS */
- m->gap_penalty_set[0][0]=m->gap_penalty_set[1][0]=12; /*SAVE PENALTIES*/
- m->gap_penalty_set[0][1]=m->gap_penalty_set[1][1]=2;
- m->gap_penalty_set[0][2]=m->gap_penalty_set[1][2]=0;
- m->trunc_gap_length = TRUNCATE_GAP_LENGTH;
- m->decay_gap_length = DECAY_GAP_LENGTH;
-
- ifile=fopen(filename,"r");
- if (!ifile) {
- WARN_MSG(USERR,(ERRTXT,"Can't open alignment matrix from %s\n",filename),"$Revision: 1.2.2.2 $");
- return -2; /* FAILED TO FIND FILE TO READ */
- }
-
- while (fgets(line,1023,ifile)) {
- if ('#'==line[0] || '\n'==line[0]) /* SKIP COMMENT OR BLANK LINES */
- continue;
-
- else if (1==sscanf(line,"GAP-TRUNCATION-LENGTH=%d",&i)) {
- m->trunc_gap_length = i;
- }
-
- else if (1==sscanf(line,"GAP-DECAY-LENGTH=%d",&i)) {
- m->decay_gap_length = i;
- }
-
- else if (3==sscanf(line,"GAP-PENALTIES=%d %d %d",&i,&j,&k)) {
- m->gap_penalty_set[0][0]=m->gap_penalty_set[1][0]=i; /*SAVE PENALTIES*/
- m->gap_penalty_set[0][1]=m->gap_penalty_set[1][1]=j;
- m->gap_penalty_set[0][2]=m->gap_penalty_set[1][2]=k;
- }
-
- else if (3==sscanf(line,"GAP-PENALTIES-X=%d %d %d",&i,&j,&k)) {
- m->gap_penalty_set[1][0]=i; /*SAVE PENALTIES ONLY FOR X DIRECTION*/
- m->gap_penalty_set[1][1]=j;
- m->gap_penalty_set[1][2]=k;
- }
-
-#ifdef SOURCE_EXCLUDED
- else if (1==sscanf(line,"DNACODES=%99s",dna_codes)) { /* READ DNACODES*/
- strcpy(DNA_symbols,dna_codes);/*SYMBOLS COUNTED AS DNA FOR AUTORECOG*/
- }
-
- else if (1==sscanf(line,"DNASCALE=%f",&DNA_rescale_score))
- continue;
-#endif
-
- else if (!found_symbol_line) { /* READ THIS LINE AS LIST OF SEQ SYMBOLS*/
- for (i=0;'\0'!=line[i];i++)
- if (!isspace(line[i])) /* IGNORE WHITESPACE */
- m->symbol[nsymb++]=line[i]; /* SAVE TO LIST OF SYMBOLS */
- found_symbol_line=1; /* SET FLAG SO WE NOW READ MATRIX SCORE VALUES */
- }
-
- else { /* READ SCORING MATRIX LINES */
- found_symbol_line=0; /* DEFAULT: FAILED TO FIND MATCHING SYMBOL IN LIST*/
- LOOP (isymb,nsymb) /* FIND MATCH TO THIS SYMBOL */
- if (m->symbol[isymb]==line[0]) {
- found_symbol_line=1; /* SIGNAL THAT WE SUCCESFULLY FOUND MATCH */
- j=1; /* SKIP FIRST CHARACTER: OUR SEQUENCE SYMBOL */
- LOOPF (i,nsymb) { /* READ ALL THE SCORE VALUES ON THIS LINE */
- if (1==sscanf(line+j,"%d%n",&(m->score[isymb][i]),&k))
- j+=k; /* ADVANCE THE READING POSITION */
- else { /* MISSING SCORE DATA: ERROR! */
- IF_GUARD(1,5.23,(ERRTXT,"Missing score value for pair %c:%c",
- m->symbol[isymb],m->symbol[i]),TRAP)
- ;
- fclose(ifile); /* CLOSE OUR STREAM */
- return -1;
- }
- }
- break;
- }
- IF_GUARD(!found_symbol_line,1.5,(ERRTXT,"Missing or unknown sequence symbol: %c",line[0]),TRAP) { /* ERROR: AN INVALID SYMBOL, NOT IN LIST */
- fclose(ifile); /* CLOSE OUR STREAM */
- return -1;
- }
- }
- }
- fclose(ifile);
-
- /* CONSTRUCT GAP PENALTY ARRAYS FROM GAP PARAMETERS: */
- m->max_gap_length = m->trunc_gap_length + m->decay_gap_length;
- CALLOC (m->gap_penalty_x, m->max_gap_length+2, LPOScore_T);
- CALLOC (m->gap_penalty_y, m->max_gap_length+2, LPOScore_T);
-
- /*** GAP OPENING PENALTY @ L=0->1 */
- m->gap_penalty_x[0] = m->gap_penalty_set[0][0];
- m->gap_penalty_y[0] = m->gap_penalty_set[1][0];
-
- /*** 1st AFFINE EXTENSION PENALTY (A1) @ L=1->2,2->3,...T-1->T */
- for (i=1;i<m->trunc_gap_length;i++) {
- m->gap_penalty_x[i] = m->gap_penalty_set[0][1];
- m->gap_penalty_y[i] = m->gap_penalty_set[1][1];
- }
-
- /*** DECAYING EXTENSION PENALTY (A1-->A2; skipped if D=0) @ L=T->T+1,...T+D-1->T+D */
- for (i=0;i<m->decay_gap_length;i++) {
- double dec_x = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(m->decay_gap_length + 1));
- double dec_y = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(m->decay_gap_length + 1));
- m->gap_penalty_x[i+m->trunc_gap_length] = m->gap_penalty_set[0][1] - (i+1) * dec_x;
- m->gap_penalty_y[i+m->trunc_gap_length] = m->gap_penalty_set[1][1] - (i+1) * dec_y;
- }
-
- /*** 2nd AFFINE EXTENSION PENALTY (A2) @ L>=T+D */
- m->gap_penalty_x[m->max_gap_length] = m->gap_penalty_set[0][2];
- m->gap_penalty_y[m->max_gap_length] = m->gap_penalty_set[1][2];
-
- m->gap_penalty_x[m->max_gap_length+1] = 0; /* DON'T REMOVE THIS!... SPECIAL STATE USED IN align_lpo. */
- m->gap_penalty_y[m->max_gap_length+1] = 0; /* DON'T REMOVE THIS!... SPECIAL STATE USED IN align_lpo. */
-
-
- LOOPF (i,nsymb) {
- Score_matrix_row= m->score[i]; /* ROW TO USE FOR SORTING best_match */
- LOOP (j,nsymb)
- m->best_match[i][j] = j;
- qsort(m->best_match[i],nsymb,sizeof(m->best_match[0][0]),
- best_match_qsort_cmp);
-#ifdef SOURCE_EXCLUDED
- printf("%c SORT",m->symbol[i]); /* TEST: PRINT OUT SORTED TABLE */
- LOOPF (j,nsymb)
- printf("\t%c:%d",m->symbol[m->best_match[i][j]],
- m->score[i][m->best_match[i][j]]);
- printf("\n");
-#endif
- }
-
- m->symbol[nsymb]='\0'; /* TERMINATE THE SYMBOL STRING */
- m->nsymbol=nsymb;
- return nsymb;
-}
-
-
-
-/** prints a scoring matrix, only including those symbols in subset[] */
-void print_score_matrix(FILE *ifile,ResidueScoreMatrix_T *m,char subset[])
-{
- int i,i_m,j,j_m,nsubset;
-
- nsubset=strlen(subset);
-
- printf(" ");
- LOOPF (i,nsubset)
- printf(" %c",subset[i]);
- printf("\n");
-
- LOOPF (i,nsubset) {
- LOOP (i_m,m->nsymbol)
- if (m->symbol[i_m]==subset[i])
- break;
- printf("%c",subset[i]);
- LOOPF (j,nsubset) {
- LOOP (j_m,m->nsymbol)
- if (m->symbol[j_m]==subset[j])
- break;
- printf("%3d",m->score[i_m][j_m]);
- }
- printf("\n");
- }
- return;
-}
-
-
-
-/** restricts seq[] to the set of allowed characters given by symbol[];
- other characters will be replaced by the default symbol[0] */
-int limit_residues(char seq[],char symbol[])
-{
- int i,len,nreplace=0;
-
- len=strlen(seq);
- for (i=strspn(seq,symbol);i<len;i=i+1+strspn(seq+i+1,symbol)) {
- seq[i]=symbol[0]; /* FORCE IT TO BE A VALID SYMBOL */
- nreplace++; /* COUNT TOTAL REPLACEMENTS */
- }
- return nreplace;
-}
-
-
-
-
-
-/** RETURNS THE COMPLEMENTARY BASE *IN LOWER CASE* */
-char complementary_base(char base)
-{
- switch (base) {
- case 'A': case 'a': return 't';
- case 'T': case 't': case 'U': case 'u': return 'a';
- case 'G': case 'g': return 'c';
- case 'C': case 'c': return 'g';
- default: return base;
- }
-}
-
-/** REVERSE COMPLEMENTS seq[] IN PLACE, AND RETURNS POINTER TO seq
----------------------------------------------------------------
------------------------------------------------------------
-*/
-char *reverse_complement(char seq[])
-{
- int i,j;
- char c;
- for ((i=0),(j=strlen(seq)-1);i<=j;i++,j--) {/* SWAP FROM ENDS TO CENTER*/
- c=complementary_base(seq[i]); /* SWAP ENDS AND COMPLEMENT */
- seq[i]=complementary_base(seq[j]);
- seq[j]=c;
- }
- return seq;
-}
-
Deleted: trunk/packages/poa/trunk/seq_util.h
===================================================================
--- trunk/packages/poa/trunk/seq_util.h 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/seq_util.h 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,77 +0,0 @@
-
-#ifndef SEQ_UTIL_HEADER_INCLUDED
-#define SEQ_UTIL_HEADER_INCLUDED
-
-/* NB: YOU *MUST* TYPEDEF Sequence_T ELSEWHERE, IN ORDER FOR THE PROTOTYPES
- BELOW TO WORK... */
-
-/* USE_PROJECT_HEADER IS A GENERAL WAY TO SNEAK IN CUSTOMIZATION
- BEFORE PROCESSING DEFINITIONS IN THIS FILE */
-#ifdef USE_PROJECT_HEADER
-#include "project.h"
-#endif
-
-#ifndef RESIDUE_SCORE_DEFINED
-typedef int ResidueScore_T; /* DEFAULT: USE int FOR SCORING */
-#endif
-
-
-
-
-#define FASTA_NAME_MAX 4096 /*define the max length of the name */
-#define SEQ_LENGTH_MAX 32768 /* define the maximum length of each seq */
-
-enum {
- dont_switch_case,
- switch_case_to_lower,
- switch_case_to_upper
-};
-
-
-
-#define MATRIX_SYMBOL_MAX 128
-typedef struct {
- int nsymbol;
- char symbol[MATRIX_SYMBOL_MAX];
- ResidueScore_T score[MATRIX_SYMBOL_MAX][MATRIX_SYMBOL_MAX];
- int best_match[MATRIX_SYMBOL_MAX][MATRIX_SYMBOL_MAX];
-
- ResidueScore_T gap_penalty_set[2][3];
- int trunc_gap_length;
- int decay_gap_length;
-
- ResidueScore_T *gap_penalty_x, *gap_penalty_y;
- int max_gap_length;
-
- int nfreq; /* STORE FREQUENCIES OF AMINO ACIDS FOR BALANCING MATRIX...*/
- char freq_symbol[MATRIX_SYMBOL_MAX];
- float freq[MATRIX_SYMBOL_MAX];
-} ResidueScoreMatrix_T;
-
-/****************************************************** seq_util.c */
-void shuffle_seq(int len,
- char seq[],
- char randseq[]);
-
-void index_symbols(int nseq,char seq[],char out[],
- int nsymbs,char symbols[]);
-
-int read_score_matrix(char filename[],ResidueScoreMatrix_T *m);
-
-void print_score_matrix(FILE *ifile,ResidueScoreMatrix_T *m,char subset[]);
-
-int limit_residues(char seq[],char symbol[]);
-
-int create_seq(int nseq,Sequence_T **seq,char seq_name[],char seq_title[],char tmp_seq[],int do_switch_case);
-
-char *reverse_complement(char seq[]);
-
-
-
-/******************************************************* fasta_format.c */
-int read_fasta(FILE *seq_file,Sequence_T **seq,
- int do_switch_case,char **comment);
-
-void write_fasta(FILE *ifile,char name[],char title[],char seq[]);
-
-#endif
Deleted: trunk/packages/poa/trunk/stringptr.c
===================================================================
--- trunk/packages/poa/trunk/stringptr.c 2008-03-22 15:38:40 UTC (rev 1651)
+++ trunk/packages/poa/trunk/stringptr.c 2008-03-22 17:20:38 UTC (rev 1652)
@@ -1,95 +0,0 @@
-
-#include "default.h"
-
-
-/*******************************************************************
- *
- * STRINGPTR_CAT:
- * holds string and count of last alloc'd size. Conveniently
- * manages static and dynamic strings, allowing auto-resize
- * without worrying about memory management.
- *
- * This function adds any string to the string ptr. It even
- * insulates against possibility that S2 is a substring of S1
- * by pre-copying it.
- *
- ******************************************************************/
-
-char *stringptr_cat_pos(stringptr *s1,const char s2[],int *pos) /* ~~g --- */
-{
- int s2_len,total_len;
- char *stringptr_cat_temp_p=NULL,*s2_temp=NULL;
-
- if (s2 == NULL) /* it might be better to make this a debug cope */
- return s1->p;
-
- if (0 == s1->last_alloc) /* SAVE STATIC STRING */
- stringptr_cat_temp_p = s1->p; /* SAVE OLD */
-
- total_len=s2_len=strlen(s2)+1;
- CALLOC(s2_temp,s2_len,char); /* ALLOCATE TEMP STORAGE */
- memcpy(s2_temp,s2,s2_len); /* COPY THE STRING */
-
- if (s1->p) { /* CALCULATE ADDITIONAL SPACE NEEDED FOR ORIGINAL STRING */
- if (pos) /* USE THE CALLER-SUPPLIED STRING LENGTH */
- total_len += *pos;
- else /* OTHERWISE MEASURE IT */
- total_len += strlen(s1->p);
- }
-
- GETMEM(s1->p,total_len,
- s1->last_alloc,STRINGPTR_BUFFER_CHUNK,char);
-
- if (stringptr_cat_temp_p) /* PUT OLD STATIC STRING BACK IN */
- strcpy(s1->p,stringptr_cat_temp_p);
-
- if (pos) { /* ATTACH NEW STRING AT CALLER-SUPPLIED END-POSITION */
- strcpy(s1->p + *pos,s2_temp);
- *pos = total_len-1; /* EXCLUDE TERMINATOR */
- }
- else /* OTHERWISE strcat AS USUAL */
- strcat(s1->p,s2_temp);
-
- FREE(s2_temp); /* FREE THE TEMP STORAGE */
-
- return s1->p;
-}
-
-
-char *stringptr_cat(stringptr *s1,const char s2[]) /* ~~g --- */
-{
- return stringptr_cat_pos(s1,s2,NULL);
-}
-
-
-char *stringptr_cpy(stringptr *s1,const char s2[]) /* ~~g --- */
-{
- GETMEM(s1->p,strlen(s2)+1,s1->last_alloc,STRINGPTR_BUFFER_CHUNK,char);
- strcpy(s1->p,s2);
-
- return s1->p;
-}
-
-
-
-
-
-
-/**stringptr_free*************************************************
- *
- * stringptr_free:
- * AUTHOR: tal
- * Wed Jul 27 03:04:41 PDT 1994
- *
- * Frees stringptr type.
- *
- ***************************************************************/
-
-int stringptr_free(stringptr *s) /* ~~g --- */
-{
- FREE(s->p);
- s->last_alloc=0;
- return 0;
-}
-
-
More information about the debian-med-commit
mailing list