[med-svn] [codonw] 01/03: Imported Upstream version 1.4.4
Sascha Steinbiss
sascha-guest at moszumanska.debian.org
Fri Nov 20 23:17:19 UTC 2015
This is an automated email from the git hooks/post-receive script.
sascha-guest pushed a commit to branch master
in repository codonw.
commit cd6e300044cf9dea91487a89fa282ec0d23f4f1e
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date: Fri Nov 20 21:18:43 2015 +0000
Imported Upstream version 1.4.4
---
Makefile | 65 ++
Makefile.orig | 65 ++
README_coa.txt | 156 +++
README_indices.txt | 141 +++
READ_coa.txt | 167 ++++
Readme.txt | 115 +++
Recoding.txt | 80 ++
Tutorial.txt | 350 +++++++
codonW.h | 683 +++++++++++++
codonW.hlp | 502 ++++++++++
codonWinstall | 271 +++++
codon_us.c | 2159 +++++++++++++++++++++++++++++++++++++++
codons.c | 1149 +++++++++++++++++++++
commline.c | 755 ++++++++++++++
coresp.c | 1673 +++++++++++++++++++++++++++++++
indices.txt | 139 +++
input.dat | 2835 ++++++++++++++++++++++++++++++++++++++++++++++++++++
menu.c | 1302 ++++++++++++++++++++++++
open_fil.c | 236 +++++
tester.c | 239 +++++
20 files changed, 13082 insertions(+)
diff --git a/Makefile b/Makefile
new file mode 100755
index 0000000..936befc
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,65 @@
+override cflags = $(CFLAGS) -g
+
+objects = codon_us.o codons.o open_fil.o commline.o menu.o tester.o coresp.o
+linked = rscu cu aau raau tidy reader cutab cutot transl bases base3s dinuc cai fop gc3s gc cbi enc
+
+CC=cc
+CFLAGS= -O -DBSD
+LN=ln -f
+
+
+all: codonw links
+
+codonw: $(objects)
+ $(CC) $(CFLAGS) $(objects) -o codonw -lm
+
+clean:
+ \rm -f $(objects)
+
+cleanall:
+ \rm -f $(objects) codonw Makefile $(linked)
+
+realclean:
+ \rm -f $(objects) codonw Makefile $(linked)
+
+codon_us.o: codon_us.c codonW.h
+ $(CC) -c $(CFLAGS) codon_us.c
+
+menu.o: menu.c codonW.h
+ $(CC) -c $(CFLAGS) menu.c
+
+codons.o: codons.c codonW.h
+ $(CC) -c $(CFLAGS) codons.c
+
+coresp.o: coresp.c codonW.h
+ $(CC) -c $(CFLAGS) coresp.c
+
+open_fil.o: open_fil.c codonW.h
+ $(CC) -c $(CFLAGS) open_fil.c
+
+commline.o: commline.c codonW.h
+ $(CC) -c $(CFLAGS) commline.c
+
+tester.o: tester.c codonW.h
+ $(CC) -c $(CFLAGS) tester.c
+
+links: codonw
+ $(LN) codonw rscu
+ $(LN) codonw cu
+ $(LN) codonw aau
+ $(LN) codonw raau
+ $(LN) codonw tidy
+ $(LN) codonw reader
+ $(LN) codonw cutab
+ $(LN) codonw cutot
+ $(LN) codonw transl
+ $(LN) codonw bases
+ $(LN) codonw base3s
+ $(LN) codonw dinuc
+ $(LN) codonw cai
+ $(LN) codonw fop
+ $(LN) codonw gc3s
+ $(LN) codonw gc
+ $(LN) codonw cbi
+ $(LN) codonw enc
+
diff --git a/Makefile.orig b/Makefile.orig
new file mode 100755
index 0000000..936befc
--- /dev/null
+++ b/Makefile.orig
@@ -0,0 +1,65 @@
+override cflags = $(CFLAGS) -g
+
+objects = codon_us.o codons.o open_fil.o commline.o menu.o tester.o coresp.o
+linked = rscu cu aau raau tidy reader cutab cutot transl bases base3s dinuc cai fop gc3s gc cbi enc
+
+CC=cc
+CFLAGS= -O -DBSD
+LN=ln -f
+
+
+all: codonw links
+
+codonw: $(objects)
+ $(CC) $(CFLAGS) $(objects) -o codonw -lm
+
+clean:
+ \rm -f $(objects)
+
+cleanall:
+ \rm -f $(objects) codonw Makefile $(linked)
+
+realclean:
+ \rm -f $(objects) codonw Makefile $(linked)
+
+codon_us.o: codon_us.c codonW.h
+ $(CC) -c $(CFLAGS) codon_us.c
+
+menu.o: menu.c codonW.h
+ $(CC) -c $(CFLAGS) menu.c
+
+codons.o: codons.c codonW.h
+ $(CC) -c $(CFLAGS) codons.c
+
+coresp.o: coresp.c codonW.h
+ $(CC) -c $(CFLAGS) coresp.c
+
+open_fil.o: open_fil.c codonW.h
+ $(CC) -c $(CFLAGS) open_fil.c
+
+commline.o: commline.c codonW.h
+ $(CC) -c $(CFLAGS) commline.c
+
+tester.o: tester.c codonW.h
+ $(CC) -c $(CFLAGS) tester.c
+
+links: codonw
+ $(LN) codonw rscu
+ $(LN) codonw cu
+ $(LN) codonw aau
+ $(LN) codonw raau
+ $(LN) codonw tidy
+ $(LN) codonw reader
+ $(LN) codonw cutab
+ $(LN) codonw cutot
+ $(LN) codonw transl
+ $(LN) codonw bases
+ $(LN) codonw base3s
+ $(LN) codonw dinuc
+ $(LN) codonw cai
+ $(LN) codonw fop
+ $(LN) codonw gc3s
+ $(LN) codonw gc
+ $(LN) codonw cbi
+ $(LN) codonw enc
+
diff --git a/README_coa.txt b/README_coa.txt
new file mode 100755
index 0000000..eb6410e
--- /dev/null
+++ b/README_coa.txt
@@ -0,0 +1,156 @@
+
+
+README.coa
+
+The permanent result files from a COA created by CodonW have the extension
+�.coa� for a description of their and contents see Table 1.
+
+Short description of output files created by correspondence analysis in
+CodonW.
+
+summary.coa
+This file contains a summary of all the information generated by
+correspondence analysis, including all the data written to files listed
+below, except for the output written to cusort.coa.
+
+eigen.coa
+Each axis generated in the correspondence analysis is represented by a row
+of information. Each row consists of four columns, (1) the number of the
+axis, (2) the axis eigenvalue, (3) the relative inertia of the axis, (4) the
+sum of the relative inertia.
+
+amino.coa� or codon.coa
+Each codon or amino acid included in the correspondence analysis is
+represented by a row. The first column is description of the variable, the
+subsequent columns contain the coordinate of the codon or amino acid on the
+axes, the number of axes is user definable.
+
+genes.coa
+Each row represents one gene, the first column contains a unique description
+for each gene, and subsequent columns contain the coordinates for each of
+the recorded axis. If additional genes are added to the correspondence
+analysis (advanced correspondence analysis option), the coordinates of these
+genes are appended to this file.
+
+cusort.coa�
+Contains the codon usage of each gene, sorted by the gene�s coordinate on
+the principal axis, this information is used to generate the table in
+
+hilo.coa
+This files records a 2 way Chi squared contingency test between two subsets
+(as defined by the �advanced correspondence analysis options�) of genes
+positioned at the extremes of axis 1 (cusort.coa).
+
+cai.coa�
+Contains the relative usage of each codon within each synonym family, the
+most frequent codon assigned the value one and all other codons are
+expressed relative to this. This file can be used to calculate species
+specific CAI values.
+
+fop.coa �and cbi.coa�
+Contains a list of the optimal codons and non-optimal codons as identified
+in the file �hilo.coa�. The format of this file can be utilised by CodonW to
+calculate Fop and CBI using a specific choice of optimal codons.
+
+inertia.coa
+This file is only generated if the exhaustive output option is selected
+under the advanced correspondence analysis menu. It contains four tables of
+information, the first two report the absolute contribution of each gene and
+codon (or amino acid) to the inertia explained by each axis. The second two
+tables� report the fraction of variation in each gene and codon (or amino
+acid) explained by each axis.
+
+codon.coa and hilo.coaare not generated during the correspondence analysis
+of amino acids
+
+
+Detailed explanation of file contents
+
+
+summary.coa
+========================================
+Correspondence analysis generate a large volume of data, CodonW writes the
+essential data necessary to interpret the correspondence analysis to the
+file �summary.coa�.
+
+genes.coa codons.coa amino.coa
+========================================
+The most complex analysis that CodonW performs is correspondence analysis
+(COA). COA creates a series of orthogonal axis to identify trends that
+explain the data variation, with each subsequent axis explaining a
+decreasing amount of the variation. COA positions each gene and codon (or
+amino acid) on these axes. An important property is that the ordination of
+the rows (genes) and columns (codons or amino acids) are superimposable.
+
+
+eigen.coa
+========================================
+The Eigen values of the principle trends, as well as the more accessible
+fraction (with the cumulative total) of the total data inertia, that each
+axes is explaining, is recorded to summary.coa and eigen.coa.
+
+
+cusort.coa
+========================================
+To simplify analyse of codon usage CodonW assumes that the principle trend
+is correlated with gene expression. It uses this assumption to identify
+putative optimal codons. Though the adage GIGO �garbage in, garbage out�
+must be stressed, it is the researchers responsibility to establish that the
+principle trend is correlated with gene expression (see tutorial for some
+example of how to do this).
+
+To identify the putative optimal codons, the genes are sorted according to
+their position on the principle, the sorted codon usage of these genes is
+written to the file �cusort.coa�. Then a number of genes, decided by the
+advanced correspondence analysis menu option �number of genes used to
+identify optimal codons�, are read from the start and end of this file (i.e.
+equivalent the extremes of the principle axis), the codon usage of each set
+of genes is totalled. The set of genes with the lower Nc (more highly
+biased) is putatively
+identified as the more highly expressed.
+
+hilo.coa
+========================================
+Optimal codons are defined as those codons that occur significantly more
+often in highly expressed genes relative to their frequency in lowly
+expressed genes. Significance is assessed by a two-way chi square
+contingency test with the criterion of p < 0.01. The advantage of using a
+test of significance to identify optimal codons is that variation in codon
+usage between highly and lowly expressed genes, that is due to random noise
+is suppressed, but a disadvantage is that the test is dependent on sample
+size.
+
+After CodonW does a two way chi squared test on the genes taken from the
+extremes of axis 1, their codon usage and RSCU is output as a table to
+�summary.coa� and �hilo.coa�. those codons which have been putatively
+identified as optimal p < 0.01 are indicated with an asterisk (*). Though
+not considered optimal by CodonW, codons that occur more frequently in the
+highly expressed dataset at 0.01 < p < 0.05 are indicated with a ampersand
+(@).
+
+
+fop.coa cbi.coa cai.coa
+========================================
+CodonW measures the degree to which the codon usage of a gene has adapted
+towards the usage of optimal codons. It does this by calculating these
+indices, the frequency of optimal codons (Fop), codon bias index, and codon
+adaptation index (CAI). To calculate these indexes, information about codon
+usage in the species being analysed is needed. The indices Fop and CBI used
+the optimal codons for the species. The index CAI uses codon adaptation
+values.
+For some species this information is known, and for these the optimal codons
+and codon adaptiveness values are in-built into codonW (see the �Change
+Defaults� menu). For other species these indexes cannot be calculated unless
+the additional information is know. During calculation of these indices the
+user is prompted for input files.
+During a COA CodonW generates the output files �cai.coa�, �fop.coa� and
+�cbi.coa�. These files can be used as input files for their respective
+indices (they are already in the correct format).
+Again it must be stressed that CodonW must make a number of assumptions to
+generate these files. These are: that the major trend in the codon usage is
+correlated with expression level; that the dataset contains highly expressed
+genes; that the genes used to identify of optimal codons where highly
+expressed. If these assumptions are valid then the files �cbi.coa�,
+�cai.coa� and �fop.coa� can be used to calculate the indexes CBI, CAI and
+Fop respectively.
+
diff --git a/README_indices.txt b/README_indices.txt
new file mode 100755
index 0000000..ab79a2a
--- /dev/null
+++ b/README_indices.txt
@@ -0,0 +1,141 @@
+Codon usage indices
+
+This document describes the indices calculated by CodonW, by default only
+the G+C content of the sequence is reported. The others being dependent on
+the genetic code selected. More than one index may be calculated at the same
+time.
+
+Codon Adaptation Index (CAI) (Sharp and Li 1987).
+CAI is a measurement of the relative adaptiveness of the codon usage of a
+gene towards the codon usage of highly expressed genes. The relative
+adaptiveness (w) of each codon is the ratio of the usage of each codon, to
+that of the most abundant codon for the same amino acid. The relative
+adaptiveness of codons for albeit a limited choice of species, can be
+selected from Menu 3. The user can also input a personal choice of values.
+The CAI index is defined as the geometric mean of these relative
+adaptiveness values. Non-synonymous codons and termination codons (dependent
+on genetic code) are excluded.
+
+To prevent a codon absent from the reference set but present in other genes
+from having a relative adaptiveness value of zero, which would cause CAI to
+evaluate to zero for any genes which used that codon; it was suggested that
+absent codons should be assigned a frequency of 0.5 when estimating ? (Sharp
+and Li 1987). An alternative suggestion was that ? should be adjusted to
+0.01 where otherwise it would be less than this value (Bulmer 1988). CodonW
+does not adjust the ? value if a non-zero-input value is found; zero values
+are assigned a value of 0.01.
+
+Frequency of Optimal codons (Fop) (Ikemura 1981).
+This index, is the ratio of optimal codons to synonymous codons (genetic
+code dependent). Optimal codons for several species are in-built and can be
+selected using Menu 3. By default, the optimal codons of E. coli are
+assumed. The user may also enter a personal choice of optimal codons. If
+rare synonymous codons have been identified, there is a choice of
+calculating the original Fop index or a modified Fop index. Fop values for
+the original index are always between 0 (where no optimal codons are used)
+and 1 (where only optimal codons are used). When calculating the modified
+Fop index, negative values are adjusted to zero.
+
+Codon Bias Index (CBI) (Bennetzen and Hall 1982).
+Codon bias index is another measure of directional codon bias, it measures
+the extent to which a gene uses a subset of optimal codons. CBI is similar
+to Fop as used by Ikemura, with expected usage used as a scaling factor. In a
+gene with extreme codon bias, CBI will equal 1.0, in a gene with random
+codon usage CBI will equal 0.0. Note that it is possible for the number of
+optimal codons to be less than expected by random change. This results in a
+negative value for CBI.
+
+The effective number of codons (NC) (Wright 1990).
+This index is a simple measure of overall codon bias and is analogous to the
+effective number of alleles measure used in population genetics. Knowledge
+of the optimal codons or a reference set of highly expressed genes is
+unnecessary. Initially the homozygosity for each amino acid is estimated
+from the squared codon frequencies (see Equation 5).
+
+
+If amino acids are rare or missing, adjustments must be made. When
+there are no amino acids in a synonymous family, Nc is not calculated
+as the gene is either too short or has extremely skewed amino acid
+usage (Wright 1990). An exception to this is made for genetic codes
+where isoleucine is the only 3-fold synonymous amino acid, and is not
+used in the protein gene. The reported value of Nc is always between 20
+(when only one codon is effectively used for each amino acid) and 61
+(when codons are used randomly). If the calculated Nc is greater than
+61 (because codon usage is more evenly distributed than expected), it
+is adjusted to 61.
+
+G+C content of the gene.
+The frequency of nucleotides that are guanine or cytosine.
+
+G+C content 3rd position of synonymous codons (GC3s).
+This the fraction of codons, that are synonymous at the third codon
+position, which have either a guanine of cytosine at that third codon
+position.
+
+Silent base compositions.
+Selection of this option calculates four separate indices, i.e. G3s, C3s,
+A3s & T3s. Although correlated with GC3s, this index is not directly
+comparable. It quantifies the usage of each base at synonymous third codon
+positions. When calculating GC3s each synonymous amino acid has at least one
+synonym with G or C in the third position. Two or three fold synonymous
+amino acids do not have an equal choice between bases in the synonymous
+third position. The index A3s is the frequency that codons have an A at their
+synonymous third position, relative to the amino acids that could have a
+synonym with A in the synonymous third codon position. The codon usage
+analysis of Caenorhabditis elegans identified a trend correlated with the
+frequency of G3s. Though it was not clear whether it reflected variation in
+base composition (or mutational biases) among regions of the C. elegans
+genome, or another factor (Stenico et al. 1994).
+
+Length silent sites (Lsil).
+Frequency of synonymous codons.
+
+Length amino acids (Laa).
+Equivalent to the number of translatable codons.
+
+Hydropathicity of protein.
+The general average hydropathicity or (GRAVY) score, for the hypothetical
+translated gene product. It is calculated as the arithmetic mean of the sum
+of the hydropathic indices of each amino acid (Kyte and Doolittle 1982).
+This index has been used to quantify the major COA trends in the amino acid
+usage of E. coli genes (Lobry and Gautier 1994).
+
+Aromaticity score
+The frequency of aromatic amino acids (Phe, Tyr, Trp) in the hypothetical
+translated gene product. The hydropathicity and aromaticity protein scores
+are indices of amino acid usage. The strongest trend in the variation in the
+amino acid composition of E. coli genes is correlated with protein
+hydropathicity, the second trend is correlated with gene expression, while
+the third is correlated with aromaticity (Lobry and Gautier 1994). The
+variation in amino acid composition can have applications for the analysis
+of codon usage. If total codon usage is analysed, a component of the
+variation will be due to differences in the amino acid composition of genes.
+
+
+
+Bennetzen, J. L., and B. D. Hall, (1982). Codon selection in yeast. Journal
+of Biological Chemistry 257: 3026-3031.
+Bulmer, M., (1988). Are codon usage patterns in unicellular organisms
+determined by selection-mutation balance. Journal of Evolutionary
+Biology 1: 15-26.
+Ikemura, T., (1981). Correlation between the abundance of Escherichia coli
+transfer RNAs and the occurrence of the respective codons in its
+protein genes: a proposal for a synonymous codon choice that is
+optimal for the E. coli system. Journal of Molecular Biology 151: 389-
+409.
+Kyte, J., and R. Doolittle, (1982). A simple method for displaying the
+hydropathic character of a protein. Journal of Molecular Biology 157:
+105-132.
+Lobry, J. R., and C. Gautier, (1994). Hydrophobicity, expressivity and
+aromaticity are the major trends of amino acid usage in 999
+Escherichia coli chromosome encoded genes. Nucleic Acids Research 22:
+3174-3180.
+Sharp, P. M., and W. H. Li, (1987). The codon adaptation index a measure of
+directional synonymous codon usage bias, and its potential
+applications. Nucleic Acids Research 15: 1281-1295.
+Stenico, M., A. T. Lloyd and P. M. Sharp, (1994). Codon usage in
+Caenorhabditis elegans delineation of translational selection and
+mutational biases. Nucleic Acids Research 22: 2437-2446.
+Wright, F., (1990). The effective number of codons used in a gene. Gene 87
+: 23-29.
+
diff --git a/READ_coa.txt b/READ_coa.txt
new file mode 100755
index 0000000..8552d9d
--- /dev/null
+++ b/READ_coa.txt
@@ -0,0 +1,167 @@
+
+========================================
+
+CodonW was written by John Peden in the laboratory
+of Paul Sharp at the University of Nottingham. It is distributed under the
+terms of the GNU public license, see the file License included with the
+distribution.
+
+========================================
+
+README.coa
+
+The permanent result files from a COA created by CodonW have the extension
+�.coa� for a description of their and contents see Table 1.
+
+Short description of output files created by correspondence analysis in
+CodonW.
+
+summary.coa
+This file contains a summary of all the information generated by
+correspondence analysis, including all the data written to files listed
+below, except for the output written to cusort.coa.
+
+eigen.coa
+Each axis generated in the correspondence analysis is represented by a row
+of information. Each row consists of four columns, (1) the number of the
+axis, (2) the axis eigenvalue, (3) the relative inertia of the axis, (4) the
+sum of the relative inertia.
+
+amino.coa� or codon.coa
+Each codon or amino acid included in the correspondence analysis is
+represented by a row. The first column is description of the variable, the
+subsequent columns contain the coordinate of the codon or amino acid on the
+axes, the number of axes is user definable.
+
+genes.coa
+Each row represents one gene, the first column contains a unique description
+for each gene, and subsequent columns contain the coordinates for each of
+the recorded axis. If additional genes are added to the correspondence
+analysis (advanced correspondence analysis option), the coordinates of these
+genes are appended to this file.
+
+cusort.coa�
+Contains the codon usage of each gene, sorted by the gene�s coordinate on
+the principal axis, this information is used to generate the table in
+
+hilo.coa
+This files records a 2 way Chi squared contingency test between two subsets
+(as defined by the �advanced correspondence analysis options�) of genes
+positioned at the extremes of axis 1 (cusort.coa).
+
+cai.coa�
+Contains the relative usage of each codon within each synonym family, the
+most frequent codon assigned the value one and all other codons are
+expressed relative to this. This file can be used to calculate species
+specific CAI values.
+
+fop.coa �and cbi.coa�
+Contains a list of the optimal codons and non-optimal codons as identified
+in the file �hilo.coa�. The format of this file can be utilised by CodonW to
+calculate Fop and CBI using a specific choice of optimal codons.
+
+inertia.coa
+This file is only generated if the exhaustive output option is selected
+under the advanced correspondence analysis menu. It contains four tables of
+information, the first two report the absolute contribution of each gene and
+codon (or amino acid) to the inertia explained by each axis. The second two
+tables� report the fraction of variation in each gene and codon (or amino
+acid) explained by each axis.
+
+codon.coa and hilo.coaare not generated during the correspondence analysis
+of amino acids
+
+
+Detailed explanation of file contents
+
+
+summary.coa
+========================================
+Correspondence analysis generate a large volume of data, CodonW writes the
+essential data necessary to interpret the correspondence analysis to the
+file �summary.coa�.
+
+genes.coa codons.coa amino.coa
+========================================
+The most complex analysis that CodonW performs is correspondence analysis
+(COA). COA creates a series of orthogonal axis to identify trends that
+explain the data variation, with each subsequent axis explaining a
+decreasing amount of the variation. COA positions each gene and codon (or
+amino acid) on these axes. An important property is that the ordination of
+the rows (genes) and columns (codons or amino acids) are superimposable.
+
+
+eigen.coa
+========================================
+The Eigen values of the principle trends, as well as the more accessible
+fraction (with the cumulative total) of the total data inertia, that each
+axes is explaining, is recorded to summary.coa and eigen.coa.
+
+
+cusort.coa
+========================================
+To simplify analyse of codon usage CodonW assumes that the principle trend
+is correlated with gene expression. It uses this assumption to identify
+putative optimal codons. Though the adage GIGO �garbage in, garbage out�
+must be stressed, it is the researchers responsibility to establish that the
+principle trend is correlated with gene expression (see tutorial for some
+example of how to do this).
+
+To identify the putative optimal codons, the genes are sorted according to
+their position on the principle, the sorted codon usage of these genes is
+written to the file �cusort.coa�. Then a number of genes, decided by the
+advanced correspondence analysis menu option �number of genes used to
+identify optimal codons�, are read from the start and end of this file (i.e.
+equivalent the extremes of the principle axis), the codon usage of each set
+of genes is totalled. The set of genes with the lower Nc (more highly
+biased) is putatively
+identified as the more highly expressed.
+
+hilo.coa
+========================================
+Optimal codons are defined as those codons that occur significantly more
+often in highly expressed genes relative to their frequency in lowly
+expressed genes. Significance is assessed by a two-way chi square
+contingency test with the criterion of p < 0.01. The advantage of using a
+test of significance to identify optimal codons is that variation in codon
+usage between highly and lowly expressed genes, that is due to random noise
+is suppressed, but a disadvantage is that the test is dependent on sample
+size.
+
+After CodonW does a two way chi squared test on the genes taken from the
+extremes of axis 1, their codon usage and RSCU is output as a table to
+�summary.coa� and �hilo.coa�. those codons which have been putatively
+identified as optimal p < 0.01 are indicated with an asterisk (*). Though
+not considered optimal by CodonW, codons that occur more frequently in the
+highly expressed dataset at 0.01 < p < 0.05 are indicated with a ampersand
+(@).
+
+
+fop.coa cbi.coa cai.coa
+========================================
+CodonW measures the degree to which the codon usage of a gene has adapted
+towards the usage of optimal codons. It does this by calculating these
+indices, the frequency of optimal codons (Fop), codon bias index, and codon
+adaptation index (CAI). To calculate these indexes, information about codon
+usage in the species being analysed is needed. The indices Fop and CBI used
+the optimal codons for the species. The index CAI uses codon adaptation
+values.
+For some species this information is known, and for these the optimal codons
+and codon adaptiveness values are in-built into codonW (see the �Change
+Defaults� menu). For other species these indexes cannot be calculated unless
+the additional information is know. During calculation of these indices the
+user is prompted for input files.
+During a COA CodonW generates the output files �cai.coa�, �fop.coa� and
+�cbi.coa�. These files can be used as input files for their respective
+indices (they are already in the correct format).
+Again it must be stressed that CodonW must make a number of assumptions to
+generate these files. These are: that the major trend in the codon usage is
+correlated with expression level; that the dataset contains highly expressed
+genes; that the genes used to identify of optimal codons where highly
+expressed. If these assumptions are valid then the files �cbi.coa�,
+�cai.coa� and �fop.coa� can be used to calculate the indexes CBI, CAI and
+Fop respectively.
+
+
+For the most up to date version see http://codonw.sourceforge.net
+
diff --git a/Readme.txt b/Readme.txt
new file mode 100755
index 0000000..9dfba9b
--- /dev/null
+++ b/Readme.txt
@@ -0,0 +1,115 @@
+
+
+CodonW is a package for codon usage analysis. It was designed to simplify
+Multivariate Analysis (MVA) of codon usage. The MVA method employed in
+CodonW is correspondence analysis (COA) (the most popular MVA method for
+codon usage analysis). CodonW can gen erate a COA for codon usage,
+relative synonymous codon usage or amino acid usage. Additional analyses
+of codon usage include investigation of optimal codons, codon and
+dinucleotide bias, and/or base composition.
+
+CodonW also has the capacity to analysis sequences encoded by genetic
+codes other than the universal code.
+
+Why call it codonW?
+
+Well first you must realise that "clustal" (a very popular multiple
+alignment program by Des Higgins) was originally written in Paul's lab in
+Trinity College Dublin. Clustal has since been rewritten from FORTRAN into
+C and undergone several name changes c lustal-> clustalv-> clustalw ->
+clustalx. There was also a program called "codons" written in FORTRAN by
+Andrew Lloyd (a post-doc in Paul's lab), this was the original inspiration
+for codonW. An early version of codonW, written in C, was called codonv.
+Wh en the code was enhanced to include multivariate analysis, what better
+name than codonW.
+
+
+CodonW version 1.3 June 1997
+=================
+
+The source code for CodonW can be obtained from
+ftp://molbiol.ox.ac.uk/cu/codonW.tar.Z. Binaries for a number of platforms
+are also available at this site see ftp://molbiol.ox.ac.uk/cu.
+
+
+To Install and Build on UNIX Platforms
+=================
+
+Get the source code from ftp://molbiol.ox.ac.uk/cu/codonW.tar.Z Change
+directory to the directory where you intend to install CodonW.
+
+uncompress codonW.tar.Z
+tar -xvf codonW.tar
+cd codonw
+./codonWinstall all (this writes a makefiel and then builds codonw)
+
+This will ask a few questions regarding 'make' and 'cc' and then configure
+the installation and compile the programs. If you don't understand the
+questions, just accept the default by pressing the return key and the
+installation should be OK using the defaults. The install script also
+creates a number of links to the compiled executable codonW. These links
+allow codonW to emulate other useful codon usage analysis and sequence
+manipulation software by passing the menu interface (for more informa tion
+see README.links). Alternatively you can just elect to only build the main
+program, and not install the linked programs.
+
+./codonWinstall codonw (compile only the executable codonw)
+
+Once you have successfully built codonw, try these commands to get you
+started.
+
+./codonw -help (for commandline summary)
+./codonw (menu interface)
+
+There is also a short tutorial.
+
+
+For the most recent documentation on codonW see
+http://www.molbiol.ox.ac.uk/cu/
+
+
+To Set the Codonw Help Environment:
+=================
+
+CodonW has an in-built help system, the help file is called codonW.hlp and
+should be located in the same directory as the executable codonw.
+Alternatively the help file can be pointed to by the environment variable
+CODONW_H, if you are using a C shell you
+ can add something similar to this to your .login script.
+
+setenv CODONW_H file_path
+
+Where file_path is the fully defined path name for codonW.hlp.
+
+Additional Files:
+=================
+
+README.indices - explanation about the various codon usage indices that
+codonW calculates.
+
+README.coa- explanation about the output files from the correspondence
+analysis.
+
+README.links- explanation about the auxiliary programmes created during
+the making of codonw.
+
+Tutorial- A quick tutorial on the analysis of codon usage of the open
+reading frames from Saccharomyces cerevisiae chromosome III.
+
+input.dat- An input file containing 167 open reading frames from
+Saccharomyces cerevisiae chromosome III. (see Tutorial).
+
+Recoding - A quick explanation about how amino acids and codons have are
+represented internally within codonW.
+
+
+Bugs
+
+This is a beta version of codonW, therefore there may be bugs within the
+code. If you do find or notice anything strange please e-mail bug
+reports/complaints/suggestions to johnp at molbiol.ox.ac.uk. Remember to
+include an example of the input file (and outp ut files) and the options
+selected that generated the error, don't forget to tell me the make of
+computer and operating system it was running under.
+
+
diff --git a/Recoding.txt b/Recoding.txt
new file mode 100755
index 0000000..f141bd8
--- /dev/null
+++ b/Recoding.txt
@@ -0,0 +1,80 @@
+Data Recoding
+To add computation codonW converts sequence information
+automatically from it original text format into a numerical format.
+This is normally transparent to the user. To add additional genetic
+codes or a personal choice of codon values for calculating the Fop,
+CAI or CBI indices, some understanding of the schema used to convert
+the sequences to numerical strings is advisable.
+
+When calculating the indices Fop, CBI, or CAI which are measure of
+codon bias in relation to the codon usage of a set of optimal genes,
+there is an option of using a personal choice of these values. These
+are read from file, there must be one value for each codon (64 in
+total) and they must be found in the file in a set sequence (i.e.
+the numerical order of the codons, TTT, TCT ... GAG, GGG). This is
+also the order in which codon and amino acid results are recorded to
+file.
+
+Internally CodonW recodes all nucleotides, codons and amino acids.
+Nucleotides are recoded as T/U=1, C=2, A=3, G=4. The 20 standard
+amino acids and the termination codons are recoded as integer values
+in the range 1 to 21, note that stop codons is assigned the amino
+acid value 11 (see Table 2). The decision about whether a codon is
+synonymous, or how many members are in a particular amino acid
+synonymous family are taken at run time and are dependent on the
+genetic code chosen.
+
+Each codon is recoded into an integer value in the range 1 to 64,
+see Table 1. The formulae used to recode the codons is:
+
+Equation 1
+
+code=((p1-1)*16)+P2+((p3-1)*4) 1<= code <= 64
+
+Where each of the three codon positions is represented by P1, P2 and
+P3. Using this recoding convention, the codon ATG has the value 45.
+
+code=((3-1)*16)+1+((4-1)*4)=45
+
+Unrecognised or non-translatable bases, codons or amino acids are
+represented all assigned the value zero.
+
+
+
+
+Table 1 Numerical values used for recoding codons
+
+Code Codon AA Code Codon AA Code Codon AA Code Codon AA
+1 UUU Phe 2 UCU Ser 3 UAU Tyr 4 UGU Cys
+5 UUC 6 UCC 7 UAC 8 UGC
+9 UUA Leu 10 UCA 11 UAA STOP 12 UGA STOP
+13 UUG 14 UCG 15 UAG 16 UGG Trp
+17 CUU 18 CCU Pro 19 CAU His 20 CGU Arg
+21 CUC 22 CCC 23 CAC 24 CGC
+25 CUA 26 CCA 27 CAA Gln 28 CGA
+29 CUG 30 CCG 31 CAG 32 CGG
+33 AUU Ile 34 ACU Thr 35 AAU Asn 36 AGU Ser
+37 AUC 38 ACC 39 AAC 40 AGC
+41 AUA 42 ACA 43 AAA Lys 44 AGA Arg
+45 AUG Met 46 ACG 47 AAG 48 AGG
+49 GUU Val 50 GCU Ala 51 GAU Asp 52 GGU Gly
+53 GUC 54 GCC 55 GAC 56 GGC
+57 GUA 58 GCA 59 GAA Glu 60 GGA
+61 GUG 62 GCG 63 GAG 64 GGG
+
+
+
+Table 2 Numerical values used to recode amino acids.
+Code AA One letter code Code AA One letter code
+1 Phe F 2 Leu L
+3 Ile I 4 Met M
+5 Val V 6 Ser S
+7 Pro P 8 Thr T
+9 Ala A 10 Tyr Y
+11 Stop * 12 His H
+13 Gln Q 14 Asn N
+15 Lys K 16 Asp D
+17 Glu E 18 Cys C
+19 Trp W 20 Arg R
+21 Gly G
+
diff --git a/Tutorial.txt b/Tutorial.txt
new file mode 100755
index 0000000..6f199d7
--- /dev/null
+++ b/Tutorial.txt
@@ -0,0 +1,350 @@
+Tutorial
+
+Codon usage analysis
+
+Included with this distribution of codonW should be a test dataset of
+sequences (input.dat). We will use this set of sequence as a typical example
+of a codon usage analysis. This test dataset is derived from the open
+reading frames (ORFs) of Saccharomyces cerevisiae chromosome III as
+annotated in the EMBL feature table for the sequence entry SCCHRIII
+(accession number X59720). In the current EMBL (Release 51 June 1997) the
+number of annotated ORFs was 172. The file input.dat contains 111 of these
+ORFs. The rational and why some ORFs where removed is explained below.
+
+The commandline syntax of codonW will be used in this tutorial, all options
+selected from the commandline are also selectable using the menu system. For
+more information please read the command line help (codonw -help) or just
+type "codonw" and use the menu specific online.
+
+Build your dataset of genes carefully.
+Always remember that as in any analysis, but particularly with codon usage,
+GIGO (garbage in, garbage out). Examine as many sources of information about
+the data as possible, particularly the original publication and sequence
+annotations. It is important that the sequences are a representative sample.
+Five ORFs where removed from the dataset because they where annotated (and
+had sequence identity) with genes within the previously identified
+transposable elements Ty2 and Ty5. These ORFs where annotated at positions
+1537-2127, 2118-2558, 2816-3742, 84714-86030, 84714-90384. The codon usage
+of transposable element genes differs from that of chromosomal genes.
+
+Further checks of sequence annotation was carried out, those sequences which
+had not been assigned gene names or SwissProt accession numbers where
+removed. The SwissProt annotation was also checked, genes described as
+hypothetical but which did not have any sequence identity with other
+proteins where removed.
+Check basic sequence integrity
+Sequences should be checked to confirm that they match some basic gene
+characteristics. Each sequence might reasonably be expected to have an
+initiation codon and a translation termination codon, and no internal stop
+codons. Those sequences that do not match these characteristics, or
+sequences that have partial codons or untranslatable codons are flagged by
+codonw with warning messages.
+
+To make a first pass of the input data to check for simple sequence
+problems:
+codonw input.dat -nomenu
+
+By default codonw will report the codon usage of each gene to the file
+input.blk. As there are no problems with this dataset there should be no
+warning messages. However analysis of a previous version of this dataset
+based on EMBL Release 50 where SCCHRIII had 230 annotated ORFs, generated
+these typical warning messages.
+
+Warning: Sequence 178 "SCCHRIII.PE178______" does not begin with a
+recognised start codon
+Warning: Sequence 178 "SCCHRIII.PE178______" is not terminated by a stop
+codon
+Warning: Sequence 202 "SCCHRIII.PE202______" does not begin with a
+recognised start codon
+Warning: Sequence 202 "SCCHRIII.PE202______" has 1 internal stop codon(s)
+Warning: Sequence 202 "SCCHRIII.PE202______" is not terminated by a stop
+codon
+
+Each sequence is labelled by its numerical occurrence in the input file
+(i.e. these are the 178th and 202nd sequences in the input file) and its
+sequence header line.
+
+Sequences that generate warning messages should be examined closely to
+ascertain why. Some sequences may be annotated as partial sequences and
+therefore the absence of a start or stop codon or the presence of a 3'
+partial codon is to be expected. Note the presence of a 5' partial codon
+would cause a frame shift, it is ESSENTIAL that 5' partial codons are
+removed. Unless the frame shift that they produce, results in a (incorrect)
+reading frame that contains internal stop codons, codonw cannot detect this
+problem. The codon usage of a frame shifted gene sequence could adversely
+affect the correspondence analysis (COA) (though such genes are often
+recognisable as being outliers on the COA plots).
+
+If a sequence warning is due to incorrect annotation this should be
+corrected manually. Sequences that produce warnings that cannot be explained
+or justified (e.g. a gene with internal stop codon) should be excluded.
+These warning are informational only and do not exclude sequences from the
+analysis.
+
+Codon usage indices
+Once the initial quality checks have been made for the data we can then
+proceed with the codon usage analysis (strictly speaking we can generate COA
+and codon usage indices tasks at the same time). Some of the indices of
+codon usage bias that CodonW calculates (i.e. Fop, CAI and CBI) use
+information about a preferred set of codons for highly expressed genes. This
+information is species specific and does not apply to all species (most
+eukaryotes and many prokaryotes appear to display no codon preference in
+highly expressed genes). Therefore care must be taken that the appropriate
+set of optimal codons are used. For most species the optimal codons are not
+know and therefore the indices should not be calculated at this stage.
+However this information is known for Saccharomyces cerevisiae, so we can
+immediately calculate these indices of codon usage. Later we will see how
+codonW identifies optimal codons and can generate this information for your
+species.
+
+The default optimal codons and codon adaptation values are those of E. coli.
+To select an alternative choice we use the c_type (for CAI values ) and
+f_type (for FOP/CBI) commandline arguments. These switches requires an
+integer values, this value is the same as the option number if we where
+using the menu system to change the codon information.
+
+Example "-c_type 2" is equivalent to
+Choose "Main Menu"
+Choose "Changes Defaults Menu"
+Choose "Change the CAI values"
+Choose "(2) Saccharomyces cerevisiae"
+
+Example "-f_type 4" is equivalent to
+Choose "Main Menu"
+Choose "Changes Defaults Menu"
+Choose "Change the Fop/CBI values"
+Choose "(4) Saccharomyces cerevisiae"
+
+
+Therefore to select all the codon usage indices calculated by codonw and to
+use the optimal codons of Saccharomyces cerevisiae type:
+
+codonw input.dat -all_indices -c_type 2 -f_type 4 -nomenu
+
+See below for the output of this command
+The commandline flag -nomenu by passes the menu system, the -all_indices
+indicates to codonw that you wish to calculate all the codon and amino acid
+usage indices. These indices areT3s, C3s, A3s, G3s, CAI, CBI, Fop, Nc, GC3s,
+GC, L_sym, L_aa, Gravy and Aromaticity. For a fuller explanation of what
+these indices are see Readme.indices. These indices can also be used to
+check whether there are any identical or almost identical sequences in the
+input file. If we sort the result file "input.out" we it is much easier to
+identify the sequences which are similar.
+
+sort -k 2n input.out (unix for "sort using the second
+numerical field")
+
+The sorted output reveals the presence of two pairs of identical sequences
+(Mating type proteins)
+ALPHA2____________63 0.3636 0.2273 0.4939 0.2177 0.109
+MATALPHA2_________63 0.3636 0.2273 0.4939 0.2177 0.109
+and
+ALPHA1____________52 0.4361 0.2180 0.4228 0.2589 0.112
+MATALPHA1_________52 0.4361 0.2180 0.4228 0.2589 0.112
+
+Sequences which appear to be multiple copies of the same gene are normally
+removed from our codon usage datasets, even if the sequences are not
+identical but where the differences c codon usage bias as observed, lower values indicate stronger bias. A
+useful feature of ENc is that the affect of GC biases have on the index can
+be estimated. This allows the comparison of GC3s and ENc against the
+theoretical values if codon bias was simply caused due to GC mutational
+bias. A plot of ENc vs. GC3s can be seen at
+http://www.molbiol.ox.ac.uk/cu/EncVsGC3s.gif. Although the majority of genes
+in this plot have a degree of codon bias that can be explained in terms of
+GC mutation, the cluster of genes (six genes with ENc <40) which have much
+stronger codon bias than be simply explained in terms of mutational biases.
+These genes are good candidates as genes whose codon usage has been
+determined by natural selection, probably selection for translational
+efficiency.
+
+Correspondence Analysis (COA)
+We are now ready to generate a correspondence analysis of the codon usage of
+SCCHRIII genes. We have a choice about how much information is generated. In
+this example we will use the default values.
+
+codonw input.dat -coa_cu -nomenu -silent (-silent stops all
+prompting)
+
+This generates a COA of codon usage. The summary file is "summary.coa" and
+contains most of the data generated by the COA. One of the first sections is
+the "Explanation of the variation by axis" also stored in eigen.coa.
+
+The total inertia of the data was 0.263176
+Num. Eigenval. R.Iner. R.Sum |Num. Eigenval. R.Iner. R.Sum |
+01 +4.5755E-02 +0.1739 +0.1739 |02 +3.2372E-02 +0.1230 +0.2969 |
+03 +1.8405E-02 +0.0699 +0.3668 |04 +1.2499E-02 +0.0475 +0.4143 |
+
+The relative inertia explained by the first axis is 17.4%, the 2nd axis
+explains 12.3%, the 3rd 7.0%, etc. (17.45% is not remarkably high for
+relative inertia explained by the first axis, but as there are ORFs included
+which are described as hypothetical there may be random noise present in the
+data if they are not real).
+
+The next two sections report position of each gene and codon on the trends.
+
+label Axis1 Axis2 Axis3 Axis4
+1_YCG9_Probable_____ 0.00904 0.13153 0.34028 -0.05372
+2_YCG8________573_re 0.07429 -0.24652 -0.05502 -0.39837
+3_ALPHA2________633_ 0.30675 0.04259 -0.22864 -0.03878
+4_ALPHA1________528_ 0.16444 0.00399 -0.02000 0.00937
+5_CHA1_________1083_ -0.00322 0.10387 0.07137 0.11896
+
+this information is best viewed graphically, an example of the location of
+the genes on the two principal axes can be seen here
+http://www.molbiol.ox.ac.uk/cu/axes.gif.
+
+Automatic Identification of Putative Optimal Codons
+Codonw automatically tries to identify the optimal codons in your data, or
+more precisely identify the codons which contribute to the major trend (if
+the main trend is selection for translational optimality these should be the
+optimal codons). It does this by comparing the codon usage of groups of
+genes taken from each extreme of the principle trend (axis 1). It identifies
+the set of genes with the highest bias (using the effective number of codons
+index) and tests for significant differences in the codon usage of between
+the higher bias set with a two way Chi-squared contingency test. The
+putative optimal codons are listed in summary.coa and hilo.coa. It is the
+responsibility of the user to confirm that the major codon usage trend is
+selection for translational optimality, and not due to some other mutational
+pressure (see GC variation). The number of genes included in the two groups
+can be selected using the command line switch ( -coa_num ) as an absolute
+number of genes, of a percentage of the total genes in the dataset (by
+default 5%).
+
+The analysis of this dataset identified 19 codons that appeared to be
+optimal. 18 of these agree with optimal codon identified previously using a
+larger dataset set of 575 genes [Sharp, 1991 #46]. The codon identified in
+this analysis as being optimal but not in the previous analysis, was GCC;
+this codon has been previously suggested as being an optimal codon in S.
+cerevisiae [Bennetzen, 1982 #92]. The U ending codons, AUU, GUU and UGU,
+which have been previously identified as optimal [Sharp, 1991 #46], where
+not identified here at p<0.01; although UGU was identified as potentially
+optimal with a p<0.02. The main reason that the U ending codons where not
+identified from this dataset was their much higher usage in the lower biased
+dataset.
+
+
+Caveats
+1) The codons identified by codonw, as being optimal will be dependent on
+the strength of the trend and the size of the datasets.
+2) The composition of the genes from chromosome III is quite different from
+the 575-gene dataset used by Sharp and Cowe. Only one of the 30 genes they
+considered to be highly expressed, and none of the genes they considered
+lowly expressed are present in this dataset. The reader is reminded that
+there are approximately 15,000 yeast genes, so just a little over 1% are
+located on chromosome III.
+
+Codonw generated personal choice of codons
+On the assumption that the principle trend identified by codonw is selection
+for translational optimality, and that the genes assigned to the highly bias
+codon usage group are highly expressed, codonw outputs files with the
+"optimal codons" and "CAI adaptation fitness values". These files are
+fop.coa, cbi.coa and cai.coa, their filenames are related to the index they
+have been formatted for. These files can be used to calculate the indices
+in species where the preferred codon usage has not been hardwired into
+codonW.
+
+codonw input.dat -fop_file fop.coa
+codonw input.day -cai_file cai.coa -cbi_file cbi.coa
+
+Caveats
+1) The original CAI paper calculated fitness values from experimentally
+determined highly expressed genes. The fitness values that are internal
+to codonW where derived from these criteria. CAI indices calculated using
+fitness values derived from genes identified solely by COA, as being
+highly expressed should not be regarded as true CAI values.
+2) The optimal codons stored in the files cbi.coa and fop.coa where
+identified by codonw using a statistical test of significance, this test
+is dependent on sample size.
+3) The size of the sample taken from the extremes of the axis will affect
+the identified optimal codons.
+4) The principle trend in the variation of codon usage may not be
+translation optimality.
+
+When we calculate the indexes CAI, CBI and Fop using the "codonw" generated
+optimal codons and fitness values based on this small dataset, as we would
+expect differ from when these indices are calculated using the codonw
+internal codon usage information for S. cerevisiae. The internal values are
+more accurate because the datasets used to generate them where larger, and
+contained experimentally verified gene sequences.
+
+Although the two sets of indices differ, they remain highly correlated, all
+three indices have correlation coefficients greater than 0.96. Therefore if
+comparisons between the index values are internally consistent (i.e. they
+where both calculated using the same optimal codon information) relative
+comparisons of codon usage and bias can be made. Based on a dataset of 111
+genes we have been able to identify optimal codons, which give us some
+insight into the codon usage of S. cerevisiae.
+
+
+Axis2 is highly correlated with GC3s content
+Alternative datasets could have been chosen that would present a much
+simpler analyses of codon usage (i.e. where the optimal codons identified
+better matched those previously published). This dataset was specifically
+chosen as the codon usage variation for genes from this chromosome is know
+to have a second trend, GC3s varies with chromosomal location in a
+systematic fashion [Sharp, 1993 #39]. When we examine correlation
+coefficients between the first 4 axes the correlation coefficient between
+axis2 and GC3s is highly significant (r=0.89). Interestingly the bias is most
+strong among the U ending codons it is possible that the presence of this
+trend contributed to why the three U ending codons where not identified here
+as optimal codons. This trend is quite strong accounting for 12.3% of the
+relative inertia of the data, the principle trend (apparently selection for
+translation optimality) accounted for 17.4%. We therefore see how it is
+possible that the strongest influence on the choice of codon usage might not
+be translation optimality but mutation biases.
+
+
+
+
+Typical output from codonw -all_indices -nomenu
+======================= Output ======================================
+
+Genetic code is currently set to Universal Genetic code TGA=* TAA=* TAG=*
+
+ Welcome to CodonW 1.3 for Help type h
+
+Using Saccharomyces cerevisiae (Sharp and Cowe (1991) Yeast 7:657-678)
+w values to calculate CAI
+Using Saccharomyces cerevisiae (Sharp and Cowe (1991) Yeast 7:657-678)
+optimal codons to calculate CBI
+Using Saccharomyces cerevisiae (Sharp and Cowe (1991) Yeast 7:657-678)
+optimal codons to calculate Fop
+..................................................................
+
+ Number of sequences: 111
+
+Files used:
+ Input file was input.dat
+ Output file was input.out (codon usage indices, e.g. gc3s)
+ Output file was input.blk (bulk output e.g. raw codon usage)
+
+ CodonW has finished
+ ======================================================
+Tabulation of total codon usage
+
+Phe UUU 1483 1.14 Ser UCU 1094 1.47 Tyr UAU 1000 1.12 Cys UGU 434 1.18
+ UUC 1117 0.86 UCC 773 1.04 UAC 789 0.88 UGC 303 0.82
+Leu UUA 1349 1.55 UCA 882 1.19 TER UAA 47 1.27 TER UGA 36 0.97
+ UUG 1549 1.78 UCG 487 0.66 UAG 28 0.76 Trp UGG 665 1.00
+
+ CUU 698 0.80 Pro CCU 747 1.27 His CAU 677 1.15 Arg CGU 328 0.86
+ CUC 364 0.42 CCC 415 0.71 CAC 499 0.85 CGC 171 0.45
+ CUA 671 0.77 CCA 911 1.55 Gln CAA 1388 1.35 CGA 151 0.39
+ CUG 604 0.69 CCG 281 0.48 CAG 668 0.65 CGG 103 0.27
+
+Ile AUU 1612 1.35 Thr ACU 1052 1.38 Asn AAU 1778 1.17 Ser AGU 717 0.97
+ AUC 1018 0.85 ACC 660 0.87 AAC 1262 0.83 AGC 500 0.67
+ AUA 943 0.79 ACA 883 1.16 Lys AAA 2118 1.13 Arg AGA 1038 2.71
+Met AUG 1156 1.00 ACG 444 0.58 AAG 1645 0.87 AGG 504 1.32
+
+Val GUU 1184 1.49 Ala GCU 1055 1.40 Asp GAU 1905 1.25 Gly GGU 1284 1.87
+ GUC 674 0.85 GCC 765 1.01 GAC 1145 0.75 GGC 552 0.80
+ GUA 622 0.78 GCA 836 1.11 Glu GAA 2371 1.41 GGA 557 0.81
+ GUG 690 0.87 GCG 368 0.49 GAG 995 0.59 GGG 355 0.52
+
+53400 codons (used Universal Genetic code)
+
+======================================================
+
+
+ [...]
\ No newline at end of file
diff --git a/codonW.h b/codonW.h
new file mode 100755
index 0000000..4faf563
--- /dev/null
+++ b/codonW.h
@@ -0,0 +1,683 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package */
+/* Copyright (C) 2005 John F. Peden */
+/* This program is free software; you can redistribute */
+/* it and/or modify it under the terms of the GNU General Public License */
+/* as published by the Free Software Foundation; version 2 of the */
+/* License, */
+/* */
+/* This program is distributed in the hope that it will be useful, but */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA. */
+/* */
+/* */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _) */
+/* */
+/* For the latest version and information see */
+/* http://codonw.sourceforge.net */
+/**************************************************************************/
+
+
+#define ARB_UNIT 100 /* used to define the array*/
+#define MAX_GENE (ARB_UNIT*3) /* seq, which holds readin */
+#define LINE_LENGTH (ARB_UNIT+100) /* sequence data */
+#define GARG_EXACT 0x800 /* used in function gargs */
+#define GARG_NEXT 0x1000 /* used in function gargs */
+#define GARG_THERE 0x2000 /* used in function gargs */
+#define GARG_SUBSQ 0x4000 /* used in function gargs */
+#define MAX_ARGS 100 /* used in function gargs */
+/* debugging code */
+#define debug_ printf("Got to %i\n",debugger++);
+#define debug(x) printf( #x " = %d", x);
+/* defile the macro pause */
+#define pause {fprintf(stderr,"\nPress return or enter to continue -> ");gets(pm->junk);}
+#define MAX_FILENAME_LEN 90 /* max filename */
+
+/* define the structures used within codonW */
+typedef struct {
+ char *des;
+ char *typ;
+ int ca[65];
+} GENETIC_CODE_STRUCT; /* genetic code information */
+
+typedef struct {
+ char *aa1[22]; /* 1 letter AA code */
+ char *aa3[22]; /* 3 letter AA code */
+ char *cod[65]; /* 3 letter name of codons */
+} AMINO_STRUCT;
+
+typedef struct {
+ float hydro[22]; /* hydropathicity values */
+ int aromo[22]; /* aromaticity values */
+} AMINO_PROP_STRUCT;
+
+typedef struct {
+ char *des; /* store a description */
+ char *ref; /* a reference */
+ char fop_cod[65]; /* the optimal codons */
+} FOP_STRUCT;
+
+typedef struct {
+ char *des; /* store a description */
+ char *ref; /* a reference */
+ float cai_val[65]; /* the CAI w values */
+} CAI_STRUCT;
+
+typedef struct {
+char level; /* either expert or standard*/
+int axis; /* how many axis to generate*/
+int rows; /* how many genes in dataset*/
+int colm; /* how many columns in data */
+int fop_gene; /* No of genes to use to ident opt codon*/
+char add_row[MAX_FILENAME_LEN]; /* file with supp sequences */
+float inertia; /* total data inertia */
+char codons[65]; /* codon to be analysed */
+char amino [22]; /* amino acids to be COA'ed */
+} COA_STRUCT;
+
+typedef struct {
+ char prog; /* used to ident which prog */
+ char bulk; /* used to ident blk output */
+ char verbose; /* don't overwrite files */
+ char totals; /* concatenate genes ? */
+ char menu; /* show a menu ? */
+ char warn; /* show sequence warning */
+
+ char codonW; /* am I codonW */
+ char fop; /* calc index fop */
+ char cai; /* calc index CAI */
+ char cbi; /* calc index CBI */
+ char bases; /* calc base composition */
+ char gc3s; /* calc gc at sil.3rd base */
+ char gc; /* calc gc */
+ char enc; /* calc enc */
+ char sil_base; /* calc silent base compo */
+ char L_sym; /* No of synonymous codons */
+ char L_aa; /* No of amino acids */
+ char hyd; /* calc hydropathicity */
+ char aro; /* calc aromaticity */
+
+ char seperator; /* column separator */
+ char coa; /* calculate a COA or not ? */
+
+ char code; /* which genetic code */
+ char f_type; /* which predefined fop val */
+ char c_type; /* which predefined CAI val */
+
+ char seq_type; /* DNA or Protein or CU */
+ char seq_format; /* Human or machine readable*/
+ char curr_infilename [MAX_FILENAME_LEN]; /* input filename */
+ char curr_outfilename[MAX_FILENAME_LEN]; /* .out filename */
+ char curr_tidyoutname[MAX_FILENAME_LEN]; /* .blk filename */
+ char fop_filen[MAX_FILENAME_LEN]; /* user fop filename */
+ char cai_filen[MAX_FILENAME_LEN]; /* user CAI filename */
+ char cbi_filen[MAX_FILENAME_LEN]; /* user CBI filename */
+ char curr_logfilename[MAX_FILENAME_LEN]; /* used for logging errors */
+
+ char junk [BUFSIZ+1]; /* used to store char info */
+ char messages [300]; /* used to constuct messgs */
+ char analysis_run; /* has CodonW actually run */
+
+ int term_length; /* how many lines are there */
+ /* file pointers */
+ FILE *inputfile; /* input file */
+ FILE *outputfile; /* .out file */
+ FILE *tidyoutfile; /* .blk file */
+ FILE *cuout; /* codon usage output */
+ FILE *fopfile; /* fop input values */
+ FILE *caifile; /* cai input values */
+ FILE *cbifile; /* cbi input values */
+ FILE *logfile; /* log file name */
+ FILE *my_err; /* pointer for err stream */
+
+ FILE *fcoa_in;
+ FILE *fcoa_out;
+} MENU_STRUCT ;
+
+
+#ifndef DECOSF
+#define DEBUG /* include debug code */
+#endif
+
+#ifndef TRUE
+#define TRUE 1 /* for dumb compilers */
+#endif
+
+#ifndef FALSE
+#define FALSE 0 /* for dumb compilers */
+#endif
+
+
+/* these handle how to delete files, and blank the screen */
+#if defined _WINDOWS || defined _WIN32
+# define deletefile(x) _unlink(x)
+# define clearscr(x) {int n; for(n=0; n<x ;n++) printf("\n");}
+#elif defined _DOS
+# define deletefile(x) _unlink(x)
+# define clearscr(x) system("cls");
+#else
+# define deletefile(x) remove(x)
+#if defined DEBUG
+# define clearscr(x) {int n; for(n=0; n<x ;n++) printf("\n");}
+#else
+# define clearscr(x) system("clear");
+#endif
+#endif
+
+#ifdef ORIG_DEFS /* declare only once */
+char Revision[] = "1.4.4"; /* version */
+char Update[] = "$Date: 2005/05/11 21:43:49 $";/* date */
+char Author[] = "$Author: johnfpeden $"; /* author */
+char title[100]; /* sequence description */
+char long_seq; /* length of seq title */
+char last_base;
+long int ncod[65];
+long int naa[23];
+long int din[3][16];
+long int codon_tot;
+long int master_ic;
+long int fl_pos_start;
+long int fl_pos_curr;
+long int GC_TOT;
+long int AT_TOT;
+long int AA_TOT;
+long int IUBC_TOT;
+long int GAP_TOT;
+long int num_sequence;
+long int num_seq_int_stop;
+long int non_std_char;
+long int tot;
+int last_aa = 0;
+int reg = 1;
+int valid_stops;
+int valid_start;
+int fram;
+int *da;
+int *ds;
+
+AMINO_STRUCT *paa; /* pointer to structs */
+GENETIC_CODE_STRUCT *pcu;
+FOP_STRUCT *pfop;
+FOP_STRUCT *pcbi;
+CAI_STRUCT *pcai;
+MENU_STRUCT *pm;
+COA_STRUCT *pcoa;
+AMINO_PROP_STRUCT *pap;
+
+
+ /* declare default values */
+COA_STRUCT coa={
+'n', /* level */
+4, /* axis */
+0, /* rows or genes */
+64, /* colms */
+-5, /* fop_gene (if number is negative implies a percentage)*/
+"", /* add_row */
+(float) 0.00 /* inertia */
+};
+
+int NumGeneticCodes=8; /* used in menu.c */
+ /* No. of predefined codes*/
+
+ /* define genetic codes */
+GENETIC_CODE_STRUCT cu[] = {
+ "Universal Genetic code",
+ "TGA=* TAA=* TAG=*",
+ 0,
+ 1, 6, 10, 18, 1, 6, 10, 18, 2, 6, 11, 11, 2, 6, 11, 19,
+ 2, 7, 12, 20, 2, 7, 12, 20, 2, 7, 13, 20, 2, 7, 13, 20,
+ 3, 8, 14, 6, 3, 8, 14, 6, 3, 8, 15, 20, 4, 8, 15, 20,
+ 5, 9, 16, 21, 5, 9, 16, 21, 5, 9, 17, 21, 5, 9, 17, 21,
+ "Vertebrate Mitochondrial code",
+ "AGR=* ATA=M TGA=W",
+ 0,
+ 1, 6, 10, 18, 1, 6, 10, 18, 2, 6, 11, 19, 2, 6, 11, 19,
+ 2, 7, 12, 20, 2, 7, 12, 20, 2, 7, 13, 20, 2, 7, 13, 20,
+ 3, 8, 14, 6, 3, 8, 14, 6, 4, 8, 15, 11, 4, 8, 15, 11,
+ 5, 9, 16, 21, 5, 9, 16, 21, 5, 9, 17, 21, 5, 9, 17, 21,
+ "Yeast Mitochondrial code",
+ "CTN=* ATA=M TGA=W",
+ 0,
+ 1, 6, 10, 18, 1, 6, 10, 18, 2, 6, 11, 19, 2, 6, 11, 19,
+ 8, 7, 12, 20, 8, 7, 12, 20, 8, 7, 13, 20, 8, 7, 13, 20,
+ 3, 8, 14, 6, 3, 8, 14, 6, 4, 8, 15, 20, 4, 8, 15, 20,
+ 5, 9, 16, 21, 5, 9, 16, 21, 5, 9, 17, 21, 5, 9, 17, 21,
+ "Filamentous fungi Mitochondrial code",
+ "TGA=W",
+ 0,
+ 1, 6, 10, 18, 1, 6, 10, 18, 2, 6, 11, 19, 2, 6, 11, 19,
+ 2, 7, 12, 20, 2, 7, 12, 20, 2, 7, 13, 20, 2, 7, 13, 20,
+ 3, 8, 14, 6, 3, 8, 14, 6, 3, 8, 15, 20, 4, 8, 15, 20,
+ 5, 9, 16, 21, 5, 9, 16, 21, 5, 9, 17, 21, 5, 9, 17, 21,
+ "Insects and Plathyhelminthes Mitochondrial code",
+ "ATA=M TGA=W AGR=S",
+ 0,
+ 1, 6, 10, 18, 1, 6, 10, 18, 2, 6, 11, 19, 2, 6, 11, 19,
+ 2, 7, 12, 20, 2, 7, 12, 20, 2, 7, 13, 20, 2, 7, 13, 20,
+ 3, 8, 14, 6, 3, 8, 14, 6, 4, 8, 15, 6, 4, 8, 15, 6,
+ 5, 9, 16, 21, 5, 9, 16, 21, 5, 9, 17, 21, 5, 9, 17, 21,
+ "Nuclear code of Cilitia",
+ "UAA=Q=Gln UAG=Q",
+ 0,
+ 1, 6, 10, 18, 1, 6, 10, 18, 2, 6, 13, 11, 2, 6, 13, 19,
+ 2, 7, 12, 20, 2, 7, 12, 20, 2, 7, 13, 20, 2, 7, 13, 20,
+ 3, 8, 14, 6, 3, 8, 14, 6, 3, 8, 15, 20, 4, 8, 15, 20,
+ 5, 9, 16, 21, 5, 9, 16, 21, 5, 9, 17, 21, 5, 9, 17, 21,
+ "Nuclear code of Euplotes",
+ "UGA=C",
+ 0,
+ 1, 6, 10, 18, 1, 6, 10, 18, 2, 6, 11, 18, 2, 6, 11, 19,
+ 2, 7, 12, 20, 2, 7, 12, 20, 2, 7, 13, 20, 2, 7, 13, 20,
+ 3, 8, 14, 6, 3, 8, 14, 6, 3, 8, 15, 20, 4, 8, 15, 20,
+ 5, 9, 16, 21, 5, 9, 16, 21, 5, 9, 17, 21, 5, 9, 17, 21,
+ "Mitochondrial code of Echinoderms",
+ "UGA=W AGR=S AAA=N",
+ 0,
+ 1, 6, 10, 18, 1, 6, 10, 18, 2, 6, 11, 19, 2, 6, 11, 19,
+ 2, 7, 12, 20, 2, 7, 12, 20, 2, 7, 13, 20, 2, 7, 13, 20,
+ 3, 8, 14, 6, 3, 8, 14, 6, 3, 8, 14, 6, 4, 8, 15, 6,
+ 5, 9, 16, 21, 5, 9, 16, 21, 5, 9, 17, 21, 5, 9, 17, 21
+};
+ /* define amino acid info */
+AMINO_STRUCT amino_acids ={
+ "X",
+ "F","L","I","M","V",
+ "S","P","T","A","Y",
+ "*","H","Q","N","K",
+ "D","E","C","W","R","G",
+ "UNK",
+ "Phe","Leu","Ile","Met","Val",
+ "Ser","Pro","Thr","Ala","Tyr",
+ "TER","His","Gln","Asn","Lys",
+ "Asp","Glu","Cys","Trp","Arg","Gly",
+ "BAD",
+ "UUU","UCU","UAU","UGU",
+ "UUC","UCC","UAC","UGC",
+ "UUA","UCA","UAA","UGA",
+ "UUG","UCG","UAG","UGG",
+ "CUU","CCU","CAU","CGU",
+ "CUC","CCC","CAC","CGC",
+ "CUA","CCA","CAA","CGA",
+ "CUG","CCG","CAG","CGG",
+ "AUU","ACU","AAU","AGU",
+ "AUC","ACC","AAC","AGC",
+ "AUA","ACA","AAA","AGA",
+ "AUG","ACG","AAG","AGG",
+ "GUU","GCU","GAU","GGU",
+ "GUC","GCC","GAC","GGC",
+ "GUA","GCA","GAA","GGA",
+ "GUG","GCG","GAG","GGG"
+};
+
+int NumFopSpecies=8; /* again used in menu.c */
+ /* predefined fop info */
+FOP_STRUCT fop[] = {
+ "Escherichia coli",
+ "Ikemura (1985) Mol. Biol. Evol. 2:13-34 (updated by INCBI 1991)",
+0,2,3,2,2,3,3,3,3,2,2,2,2,2,2,2,2,
+ 2,2,2,3,2,2,3,3,2,2,2,2,3,3,3,2,
+ 2,3,2,2,3,3,3,3,2,2,3,2,2,2,2,2,
+ 3,3,2,3,2,2,3,3,2,2,3,2,2,3,2,2,
+ "Bacillus subtilis ",
+ "Sharp et al (1990) Genetics & Biotech of Bacilli vol3 pp89-98",
+0,2,3,2,2,3,1,3,2,2,2,2,2,2,1,2,2,
+ 3,3,2,3,2,1,2,3,2,3,3,1,2,2,2,1,
+ 2,3,2,2,3,1,3,2,1,2,3,2,2,2,2,1,
+ 3,3,2,3,2,1,3,2,3,2,3,2,2,2,2,1,
+ "Dictyostelium discoideum ",
+ "Sharp and Devine (1989) Nucl. Acids Res 17:5029-5039)",
+0,2,2,2,2,3,2,3,2,2,2,2,2,2,2,2,2,
+ 2,2,2,3,3,2,3,2,2,3,3,2,2,2,2,2,
+ 2,2,2,2,3,3,3,2,2,2,2,2,2,2,3,2,
+ 2,2,2,3,3,3,2,2,2,2,3,2,2,2,2,2,
+ "Aspergillus nidulans ",
+ "Lloyd and Sharp (1991) Mol. Gen. Genet 230: 288-294",
+0,2,2,2,2,3,3,3,2,2,2,2,2,2,2,2,2,
+ 2,2,2,3,3,3,3,3,2,2,2,2,2,2,3,2,
+ 2,2,2,2,3,3,3,2,2,2,2,2,2,2,3,2,
+ 2,3,2,3,3,3,3,2,2,2,2,2,2,2,3,2,
+ "Saccharomyces cerevisiae ",
+ "Sharp and Cowe (1991) Yeast 7:657-678",
+0,2,3,2,3,3,3,3,2,2,2,2,2,3,2,2,2,
+ 2,2,2,2,2,2,3,2,2,3,3,2,2,2,2,2,
+ 3,3,2,2,3,3,3,2,2,2,2,3,2,2,3,2,
+ 3,3,2,3,3,2,3,2,2,2,3,2,2,2,2,2,
+ "Drosophila melanogaster",
+ "Shields et al. (1988) Mol Biol Evol 5: 704-716",
+0,2,2,2,2,3,3,3,3,2,2,2,2,2,2,2,2,
+ 2,2,2,3,2,3,3,3,2,2,2,2,3,2,3,2,
+ 2,2,2,2,3,3,3,2,2,2,2,2,2,2,3,2,
+ 2,2,2,2,3,3,3,3,2,2,2,2,3,2,3,2,
+ "Caenorhabditis elegans",
+ "Stenico, Lloyd and Sharp Nuc. Acids Res. 22: 2437-2446(1994)",
+0,2,2,2,2,3,3,3,3,2,2,2,2,2,2,2,2,
+ 3,2,2,3,3,2,3,3,2,3,2,2,2,2,2,2,
+ 2,2,2,2,3,3,3,2,2,2,2,2,2,2,3,2,
+ 2,3,2,2,3,3,3,2,2,2,2,3,2,2,3,2,
+ "Neurospora crassa",
+ "Lloyd and Sharp (1993)",
+0,2,3,2,2,3,3,3,3,2,2,2,2,2,2,2,2,
+ 2,2,2,3,3,3,3,3,2,2,2,2,2,2,3,2,
+ 2,3,2,2,3,3,3,2,2,2,2,2,2,2,3,2,
+ 2,2,2,3,3,3,3,3,2,2,2,2,2,2,3,2
+};
+
+int NumCaiSpecies=3; /* used in menu.c */
+CAI_STRUCT cai[]= { /* array of cai structs */
+ "Escherichia coli",
+ "No reference",
+ 0.000F,
+ 0.296F,1.000F,0.239F,0.500F,1.000F,0.744F,1.000F,1.000F,
+ 0.020F,0.077F,0.000F,0.000F,0.020F,0.017F,0.000F,1.000F,
+ 0.042F,0.070F,0.291F,1.000F,0.037F,0.012F,1.000F,0.356F,
+ 0.007F,0.135F,0.124F,0.004F,1.000F,1.000F,1.000F,0.004F,
+ 0.185F,0.965F,0.051F,0.085F,1.000F,1.000F,1.000F,0.410F,
+ 0.003F,0.076F,1.000F,0.004F,1.000F,0.099F,0.253F,0.002F,
+ 1.000F,1.000F,0.434F,1.000F,0.066F,0.122F,1.000F,0.724F,
+ 0.495F,0.586F,1.000F,0.010F,0.221F,0.424F,0.259F,0.019F,
+ "Bacillus subtilis",
+ "No reference",
+ 0.00F,
+ 0.571F,1.000F,0.500F,1.000F,1.000F,0.021F,1.000F,1.000F,
+ 1.000F,0.458F,0.000F,0.000F,0.036F,0.021F,0.000F,1.000F,
+ 0.857F,1.000F,1.000F,1.000F,0.143F,0.071F,0.083F,0.609F,
+ 0.500F,0.714F,1.000F,0.022F,0.071F,0.143F,0.214F,0.043F,
+ 0.500F,1.000F,0.417F,0.125F,1.000F,0.033F,1.000F,0.208F,
+ 0.071F,0.867F,1.000F,0.435F,1.000F,0.200F,0.097F,0.022F,
+ 1.000F,1.000F,0.417F,0.955F,0.188F,0.025F,1.000F,0.773F,
+ 0.750F,0.275F,1.000F,1.000F,0.438F,0.125F,0.412F,0.045F,
+ "Saccharomyces cerevisiae",
+ "Sharp and Cowe (1991) Yeast 7:657-678",
+ 0.00F,
+ 0.113F,1.000F,0.071F,1.000F,1.000F,0.693F,1.000F,0.077F,
+ 0.117F,0.036F,0.000F,0.000F,1.000F,0.005F,0.000F,1.000F,
+ 0.006F,0.047F,0.245F,0.137F,0.003F,0.009F,1.000F,0.002F,
+ 0.039F,1.000F,1.000F,0.002F,0.003F,0.002F,0.007F,0.002F,
+ 0.823F,0.921F,0.053F,0.021F,1.000F,1.000F,1.000F,0.031F,
+ 0.003F,0.012F,0.135F,1.000F,1.000F,0.006F,1.000F,0.003F,
+ 1.000F,1.000F,0.554F,1.000F,0.831F,0.316F,1.000F,0.020F,
+ 0.002F,0.015F,1.000F,0.002F,0.018F,0.001F,0.016F,0.004F
+};
+
+
+AMINO_PROP_STRUCT amino_prop={ /* amino acid properties */
+ 0.00F,
+ 2.80F,3.80F,4.50F,1.90F,4.20F, /* hydropathicity values */
+ -0.8F,-1.6F,-0.7F,1.80F,-1.3F,
+ 1.00F,-3.2F,-3.5F,-3.5F,-3.9F,
+ -3.5F,-3.5F,2.50F,-0.9F,-4.5F,
+ -0.4F,
+ 0,
+ 1,0,0,0,0, /* am i aromatic ? */
+ 0,0,0,0,1,
+ 0,0,0,0,0,
+ 0,0,0,1,0,0
+};
+
+
+MENU_STRUCT Z_menu={ /* define all manner of default values */
+ FALSE, /* prog */
+ 'X', /*This default is set in proc_commline to CU */
+ TRUE , /*verbose */
+ FALSE, /*totals */
+ TRUE, /*menu interface */
+ TRUE, /*warnings about sequence data are to be displayed */
+ FALSE, /*codons */
+ FALSE, /*fop */
+ FALSE, /*cai */
+ FALSE, /*cbi */
+ FALSE, /*bases */
+ FALSE, /*gc3s */
+ FALSE, /*gc */
+ FALSE, /*enc */
+ FALSE, /* silent base */
+ FALSE, /* Length silent codons */
+ FALSE, /* length in codons */
+ FALSE, /* hydrophobicity */
+ FALSE, /* aromaticity */
+
+ ' ', /* default seperator */
+
+ FALSE, /* coa */
+
+ 0, /* genetic code */
+ 0, /* type of fop_species */
+ 0, /* type of cai_species */
+
+ FALSE, /* sequence type */
+ 'H', /* Sequence format */
+ "", /* current input file name */
+ "", /* current output file name */
+ "", /* current tidy outfile name */
+ "", /* current fop input file name */
+ "", /* current cai input file name */
+ "", /* current sbi input file name */
+ "", /* log all stderr output to a file */
+ "", /* Null the string junk */
+ "", /* Null the string messages */
+
+ FALSE, /* was analysis run */
+ 24, /* current number of lines (height of ) screen */
+
+ NULL, /* Null pointer input file */
+ NULL, /* Null pointer outputfile */
+ NULL, /* Null pointer tidyout file */
+ NULL, /* Null codon usage file */
+ NULL, /* Null pointer fopfile */
+ NULL, /* Null pointer caifile */
+ NULL, /* Null pointer cbifile */
+ NULL, /* Null pointer the logfile name */
+ NULL, /* assign NULL pointer to my_err */
+ NULL, /* Null pointer fcoa_in */
+ NULL /* Null pointer fcoa_out */
+};
+
+
+#else /* already been defined so declare as externals */
+
+extern AMINO_STRUCT *paa;
+extern GENETIC_CODE_STRUCT *pcu;
+extern FOP_STRUCT *pfop;
+extern FOP_STRUCT *pcbi;
+extern CAI_STRUCT *pcai;
+extern MENU_STRUCT *pm;
+extern COA_STRUCT *pcoa;
+extern AMINO_PROP_STRUCT *pap;
+
+#if defined (_WINDOWS) || defined (_DOS)
+ extern CAI_STRUCT /*_near*/ cai[]; /* some MS compilers */
+ extern GENETIC_CODE_STRUCT /*_near*/ cu[]; /* want these to be */
+ extern FOP_STRUCT /*_near*/ fop[]; /* declared as _near */
+#else
+ extern CAI_STRUCT cai[];
+ extern GENETIC_CODE_STRUCT cu[];
+ extern FOP_STRUCT fop[];
+#endif
+ extern COA_STRUCT coa;
+ extern AMINO_STRUCT amino_acids;
+ extern AMINO_PROP_STRUCT amino_prop;
+ extern MENU_STRUCT Z_menu;
+
+ extern char Revision[]; /* version string */
+ extern char Update[];
+ extern char Author[];
+ extern char title[100];
+ extern char long_seq;
+ extern char last_base;
+
+ extern long int ncod[65];
+ extern long int naa[23];
+ extern long int din[3][16];
+ extern long int codon_tot;
+ extern long int master_ic;
+ extern long int fl_pos_start;
+ extern long int fl_pos_curr;
+ extern long int GC_TOT;
+ extern long int AT_TOT;
+ extern long int AA_TOT;
+ extern long int IUBC_TOT;
+ extern long int GAP_TOT;
+ extern long int num_sequence;
+ extern long int num_seq_int_stop;
+ extern long int non_std_char;
+ extern long int tot;
+ extern int last_aa;
+ extern int reg;
+ extern int valid_stops;
+ extern int valid_start;
+ extern int fram;
+ extern int *da;
+ extern int *ds;
+ extern int NumGeneticCodes;
+ extern int NumFopSpecies;
+ extern int NumCaiSpecies;
+#endif
+
+/****************** Function type declarations *****************************/
+
+FILE *open_file ( char *info, char *default_name, char *mode,
+ int verbose );
+
+int* how_synon ( void );
+int* how_synon_aa ( void );
+int* how_synon ( void );
+int* how_synon_aa ( void );
+
+int codon_usage_tot( char *seq, long int how_many);
+int ident_codon ( char *codon );
+int codon_usage_out( FILE *fblkout, long int *ncod,int last_aa,
+ int valid_stops, char *info);
+int rscu_usage_out ( FILE *fblkout, long int *ncod,long int *naa);
+int raau_usage_out ( FILE *fblkout, long int *naa );
+int aa_usage_out ( FILE *fblkout, long int *naa );
+int cai_out ( FILE *foutput, long int *ncod);
+int cbi_out ( FILE * foutput, long int *ncod, long int *naa );
+int fop_out ( FILE *foutput, long int *ncod);
+int hydro_out ( FILE *foutput, long int *naa );
+int aromo_out ( FILE *foutput, long int *naa );
+int toutput ( FILE *fblkout, char *seq );
+int output_long ( FILE *fblkout, char *seq );
+int cutab_out ( FILE *fblkout, long *ncod, long *naa);
+int dinuc_out ( FILE *fblkout, char *title );
+int fileclose ( FILE **file_pointer );
+int clean_up ( long int *ncod,long int *naa );
+int initilize_point( char code , char fop_type, char cai_type );
+int initilize_coa ( char code );
+int proc_comm_line ( int *argc, char ***arg_list);
+int my_exit ( int exit_value, char *message );
+int printinfo ( void);
+
+int dinuc_count ( char *seq , long int tot );
+int tidy ( FILE *finput , FILE *foutput , FILE *fblkout,
+ FILE *fcoaout ) ;
+int chelp ( char *help );
+
+long int codon_error( int last_aa, int valid_stops, char *title,
+ char error_level);
+
+float enc_out ( FILE *foutput, long int *ncod, long int *naa);
+double inertot ( void);
+
+char* get_aa ( int one_or_3_letter , char* the_dna_word);
+char* garg ( int argc, char *argv[], const char *targ, int mode);
+char coa_raw_out ( FILE *fcoaout, long *ncod, long *naa, char *title);
+char WasHelpCalled ( char * input);
+
+void sorted_by_axis1( double *ax1, int *sortax1, int lig);
+void highlow ( long int *low , long int *high ,FILE *summ );
+void menu_1 ( void);
+void menu_2 ( void);
+void menu_3 ( void);
+void menu_4 ( void);
+void menu_5 ( void);
+void menu_6 ( void);
+void menu_7 ( void);
+void menu_8 ( void);
+void menu_coa ( void);
+void welcome ( void);
+void menu_initial ( void);
+
+void asummary ( void);
+void tester ( void);
+void vecalloc ( double **vec, int n);
+void vecalloc ( double **vec, int n);
+void writevec ( double *v1, FILE *fic);
+void lecmat ( double **tab, char *nfic);
+void freetab ( double **tab);
+void freevec ( double *vec);
+void taballoc ( double ***tab, int l1, int c1);
+void lecvec ( double *v1, char *nfic);
+void ecrmat ( double **tab, char *nfic);
+void ecrvec ( double *v1, char *nfic);
+void scalmat ( double **tab, double r);
+void scalvec ( double *v1, double r);
+void sqrvec ( double *v1);
+void prodmatAAtB ( double **a, double **b);
+void prodmatABC ( double **a, double **b, double **c);
+void prodmatAtAB ( double **a, double **b);
+void ecrmatred ( double **tab, int c1, char *nfic);
+void readvec ( double *v1, FILE *fic);
+void lecvalpro ( double *v1, char *nfic);
+void writescal ( double r, FILE *fic);
+void editvalpro ( FILE *ficlist, double *vp, int n, double s);
+void DiagoRC ( FILE *summary);
+void gc_out ( FILE *foutput, FILE *fblkout, int which);
+void base_sil_us_out( FILE *foutput, long int *ncod,long int *naa);
+void bintext ( char *nfice , char *nfics);
+void select_coa ( char choice);
+void textbin ( char *filein , char *fileout);
+void colmout ( char *nfice, char *nfics,AMINO_STRUCT *paa,
+ FILE *summary);
+void output ( char *seq , FILE *foutput , FILE* fblkout ,
+ FILE *fcoaout);
+void rowout ( char *nfice, char *nfics, char *ncout, FILE *summary);
+void PrepAFC ( char *nfic);
+void inertialig ( char *inertia_out, char *filen, FILE *summary);
+void inertiacol ( char *inertia_out, FILE *summary);
+void selectcol ( char *nfic , double *col, int numcol);
+void gen_cusort_fop ( int *sortax1, int lig , FILE *fnam ,FILE *summ );
+void dot ( int y , long int period );
+void DiagoComp ( int n0, double **w, double *d, int *rang);
+void suprow ( int num_seq,char *nficvp,char *nfictasup,
+ char *nficlisup,char *option, FILE *summary);
+void main_menu ( int c );
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/codonW.hlp b/codonW.hlp
new file mode 100755
index 0000000..3bb3fe2
--- /dev/null
+++ b/codonW.hlp
@@ -0,0 +1,502 @@
+#main_menu#
+
+CodonW is a package for codon usage analysis. It was designed to
+simplify Multivariate Analysis (MVA) of codon usage. The MVA method
+employed in CodonW is correspondence analysis (COA), the most widely
+used codon usage MVA method. COA can be performed on codon usage,
+relative synonymous codon usage or amino acid usage. Integrated into
+CodonW is the ability to work with genetic codes other than the
+universal code. Other indices of codon usage and codon bias,
+dinucleotide bias and mutation bias are also analysed by CodonW.
+
+Modes of use:
+a) There are an extensive number of command line options available if
+your platform supports command line parameters. For more information
+type
+
+codonw -help
+
+b) Maximum functionality is obtained by running CodonW using the
+interactive menus. Each menu has its own online help.
+
+c) CodonW also emulates a large number of useful utility programs used
+in our labs to aid the analysis of codon usage. If the first argument
+to the CodonW program is one of a recognised list of programs (rscu, cu,
+aau, raau, tidy, reader, cutab, cutot, transl, bases, base3s, dinuc,
+cai, fop, gc3s or enc), CodonW assumes that you want to accomplish or
+calculate one of these simpler tasks/indices and bypasses the menu
+system. For a fuller description of what these pseudo programs
+calculate, see the README file.
+
+To Run CodonW:
+
+a) You must load a file containing all your sequences in fasta/Pearson
+Pearson format, either from the command line or using menu 1.
+
+b) You may change many of the default values using menu 3.
+
+c) Select which codon usage indices to measure (menu 4). Choose the type
+of correspondence analysis, if any (menu 5). Other data analysis options
+may also be selected using menu 8.
+
+d) Return to the first (main) menu and type R to run an analysis.
+
+Output files from the correspondence analysis have the extension .coa.
+See summary.coa for an overall explanation of what is being generated by
+the analysis.
+
+Other output will be stored in the files that you choose using menu 1 or
+as specified on the command line. Depending on the options chosen there
+will either be one or two result files; usually they will have the
+extensions .out and .blk.
+
+//
+#open_file_query#
+
+Open file dialog.
+
+You have been requested to choose a file for the analysis. If the
+request is for an input filename, this file must contain all your
+sequences that you wish to analyse in a sequential fasta formatted file.
+That is, all sequences should be in one file and individual sequences
+separated by a single header line that starts with an angle bracket
+character ">".
+
+If you use GCG, the output from the program tofasta is acceptable.
+
+If prompted for either the "bulk" or "output" file names, these
+filenames will be used to record the results of the analysis. These
+files will be opened for writing which may destroy the content of the
+files, should the files already exist. So if a file already exists with
+the name you have chosen, you will be asked whether you wish to
+overwrite the file, append the results to the file, or choose a new
+filename (that is, unless you have chosen the option to overwrite files
+silently).
+
+//
+#File_not_found#
+
+File not found
+
+The name of the input file that you have chosen does not exist in the
+current working directory. Either choose a new filename or give the
+fully qualified filename (e.g. e:\codon\cu\input.dat).
+
+Depending on the system that you are using, the names of all files in
+the current working directory may or may not be displayed when a file
+cannot be located.
+
+//
+#file_exists#
+
+File exists
+
+If the filename that you have chosen as the output file exists, it will
+be deleted if opened for writing. You now have the choice of whether or
+not to overwrite this file (thus deleting the original). If you choose
+not to overwrite you have the further choice of either appending the
+results to the file you originally choose or selecting a new filename.
+
+(Note: If you select overwrite silently from the defaults menu you will
+not be prompted if a file of the same name already exists; it will be
+overwritten.)
+
+//
+#file_append#
+
+File Append
+
+You decided not to overwrite the file. You can either append the results
+to this file or choose a new filename.
+
+//
+#menu_2#
+
+Menu 2 Purifying sequences menu
+
+This menu was originally used to eliminate sequences from data that had
+high sequence identity to other sequences in the dataset and thus might
+bias the output results.
+
+This functionality is not currently portable and is not being made
+available at present. Try using the NCBI program nrdb or the EGCG9
+program clean_up to remove identical or almost identical sequences.
+
+//
+#menu_3#
+
+Menu 3 Defaults menu
+
+To improve flexibility, many of the default values used internally by
+CodonW (defined in the header file codonW.h) can be altered at runtime
+using this menu. Ten options can be customised.
+
+Option (1) Change ASCII delimiter in output. The default ASCII delimiter
+used to separate information in machine readable output files is a
+comma. The delimiter can be changed via this option to either the tab or
+space character.
+
+Option (2) Run silently. This option can be used when running from a
+script file or as a batch job. If TRUE, it suppresses warnings about
+overwriting files, the prompting for a personal choice of Fop, CBI or
+CAI values (although these can still be given via command line
+arguments) and the pause after each page of error or warning messages
+has been displayed.
+
+Option (3) Log warnings/information to a file. The default value for
+this option is set as FALSE, in which case all warning or error messages
+generated by CodonW are written to the screen via the standard error
+stream. When TRUE, the errors are redirected to a log file:- you will be
+prompted for the filename for this log file. This option is useful if
+there are a large number of sequences in the input file or there are
+many warning messages.
+
+Option (4) Number of lines on screen. This is used to set the screen
+length, which is used during screen refreshing and the pagination of
+error messages.
+
+Option (5) Change the genetic code. By default, CodonW assumes the
+universal genetic code when translating and processing codons. This
+option allows alternative genetic codes to be selected.
+
+Option (6) Change the Fop/CBI values. To calculate either the CBI or Fop
+indices, a set of optimal codons is required; by default the optimal
+codons of E. coli are assumed. This option displays a submenu which
+lists eight species where optimal codons have been identified. When
+calculating the Fop/CBI of genes from these species the appropriate set
+of codons should be selected. Personal selections of optimal codons can
+be input at runtime.
+
+Option (7) Change the CAI values. To calculate the codon adaptation
+index it is necessary to assign fitness values to each codon; by default
+the fitness values of E. coli codons are assumed. However, these values
+are very species-specific and so using E. coli fitness values to
+calculate CAI values for other species is nonsensical. Before assigning
+fitness values to a codon a set of genes which have been experimentally
+verified to be highly expressed must be identified. Such sets have been
+created for relatively few species. This menu lists the species where a
+reference set of highly expressed genes is known, and fitness values
+assigned. Personal selections of fitness values can be input at runtime
+if calculating CAI.
+
+Option (8) Toggle human or machine-readable output. The default format
+for most CodonW output files is human readable. Machine-readable output
+is fixed width numerical data separated by an ASCII delimiter. This
+format is readily imported into a wide range of statistical and
+graphical analysis programs but not easily read by eye. Human readable
+output is more verbose but easier to read. The output formats for codon
+usage, tabulation of codon usage, relative synonymous codon usage and
+base compositions are the most radically affected by this option.
+
+Option (9) Toggle output for each or all genes. By default, CodonW
+processes each gene individually. When the option "all genes" is
+selected, sequences are concatenated and processed as a single sequence.
+This option can be used to calculate total codon or amino acid usage,
+the average G+C content, Fop, etc.
+
+Option (10) Correspondence analysis defaults. This option allows access
+to the "advanced correspondence analysis" menu. This menu is normally
+accessed as a submenu of "Correspondence analysis" (Menu 5), but is
+included here so that all runtime options are accessible via the "Change
+default values" menu.
+
+//
+#menu_4#
+
+Menu 4 Codon Usage Indices
+
+This menu is used to choose the indices calculated by CodonW; by default
+only the G+C content of the sequence is selected. The calculation of
+these indices (except G+C content) is dependent on the genetic code
+selected under Menu 3. More than one index may be calculated at once.
+
+ Option (1) Codon Adaptation Index (CAI). CAI measures the relative
+adaptation of a gene to the codon usage of highly expressed genes. The
+relative adaptiveness (w) of a codon is the ratio of the usage of that
+codon to that of the most abundant codon for the same amino acid. The
+relative adaptiveness of codons (for albeit a limited choice of species)
+can be selected from Menu 3.
+
+Option (2) Frequency of Optimal codons (Fop). This index is the ratio of
+optimal codons to synonymous codons (genetic code dependent). Optimal
+codons for several species are in-built and can be selected using Menu
+3. By default, the optimal codons of E. coli are assumed. The user may
+also enter a personal choice of optimal codons. If rare synonymous
+codons have been identified, there is a choice of calculating the
+original Fop index or a modified Fop index. Fop values for the original
+index are always between 0 (where no optimal codons are used) and 1
+(where only optimal codons are used). When calculating the modified Fop
+index, any negative values are adjusted to zero.
+
+Option (3) Codon Bias Index (CBI). The codon bias index is a measure of
+directional codon bias. It measures the extent to which a gene uses a
+subset of optimal codons.
+
+Option (4) The effective number of codons (NC). This index is a simple
+measure of overall codon bias and is analogous to the effective number
+of alleles measure used in population genetics. Knowledge of the optimal
+codons or a reference set of highly expressed genes is unnecessary when
+calculating this index.
+
+Option (5) G+C content of the gene. This is calculated as the frequency
+of nucleotides that are guanine or cytosine.
+
+Option (6) G+C content 3rd position of synonymous codons (GC3s). This is
+the fraction of codons, synonymous at the third codon position, which
+have either a guanine of cytosine at that third codon position.
+
+Option (7) Silent base composition. Selection of this option calculates
+four separate indices, i.e. G3s, C3s, A3s & T3s. Although correlated
+with GC3s, this index is not directly comparable with it. It quantifies
+the usage of each base at synonymous third codon positions.
+
+Option (8) Length silent sites (Lsil). This is the frequency of
+synonymous codons within each gene.
+
+Option (9) Length amino acids (Laa). This is the number of translatable
+codons.
+
+Option (10) Hydropathicity of protein. This is the general average
+hydropathicity or (GRAVY) score for the hypothetical translated gene
+product. It is the arithmetic mean of the sum of the hydropathic indices
+of each amino acid.
+
+Option (11) Aromaticity score of protein. This is the frequency of
+aromatic amino acids (Phe, Tyr, Trp) in the hypothetical translated gene
+product.
+
+The hydropathicity and aromaticity protein scores are indices of amino
+acid usage. The strongest trend in the variation in the amino acid
+composition of E. coli genes is correlated with protein hydropathicity,
+the second strongest trend is correlated with gene expression, while the
+third is correlated with aromaticity.
+//
+#menu_5_coa#
+
+Menu 5 Correspondence analysis
+
+In many unicellular organisms, protein coding genes have non-random
+usage of synonymous codons (see Andersson and Kurland (1990) and Sharp
+et al. (1993) for reviews). Correspondence analysis uses contingency
+tables (counts of the joint occurrences of rows and columns of a table).
+Therefore, the sequence data must be transformed into a contingency
+table. The frequency of each codon (or amino acid) is tabulated for each
+gene. This is then converted into an Euclidean distance measurement of
+distance between the rows or columns. CodonW calculates a scaled
+distance measurement as recommended by Grantham and co-workers (Grantham
+et al 1981).
+
+Analysis of a large number of distances would ordinarily be very time
+consuming. Correspondence analysis provides a simple visualisation of
+these distances by projecting the points from their original
+multidimensional space onto lower dimensions, with genes with similar
+distances plotted as neighbours. In addition to calculating the
+coordinates for the projection of these points, correspondence analysis
+(as implemented in CodonW) also calculates the total inertia of the
+data, together with the eigenvalue and relative variation explained by
+each axis. CodonW can also quantify the absolute and relative
+contribution of each gene, codon or amino acid on each identified trend.
+To limit variation due to stochastic noise, it is recommended that short
+genes (less than 50 codons) be excluded from a correspondence analysis.
+
+The correspondence analysis menu (Menu 5) has four options, the default
+option being not to generate a correspondence analysis, i.e. Do not
+perform a COA.
+
+Option (1) Correspondence analysis of codon usage. This generates a
+correspondence analysis on the total codon usage. By default, this is on
+synonymous codons, although the advanced menu may be used to adjust
+which codons are included/excluded. If analysing synonymous codon usage,
+the analysis has 58 degrees of freedom.
+
+Option (2) Correspondence analysis of RSCU. This generates a
+correspondence analysis of relative synonymous codon usage (RSCU). RSCU
+is calculated as the ratio of the observed frequency of a codon to the
+frequency expected under unbiased codon usage within a synonymous codon
+group. Correspondence analysis of RSCU is useful because variation
+caused by unequal usage of amino acids is removed; however the number
+of degrees of freedom is reduced to 40.
+
+Option (3) Correspondence analysis of Amino Acid usage. This generates a
+correspondence analysis of amino acid composition, with 19 degrees of
+freedom.
+
+Option (4) Do not perform a correspondence analysis. This is the default
+option.
+//
+#menu_6#
+
+Menu 6 Basic Stats
+
+This menu was originally designed to calculate some basic statistics on
+the output from the various codon usage indices.
+
+This functionality is not currently portable and is not being made
+available at present.
+
+
+//
+#menu_7#
+
+Menu 7 Relaxation (almost)
+
+This menu was designed to help teach the genetic code(s). It asks
+various random questions about codon translation and codon usage. The
+genetic code used as the basis for the correct answers can be changed
+under the default menu (Menu 3).
+
+//
+
+#fun#
+
+Teach yourself the genetic codes and codon usage.
+
+To exit type "quit" or "exit" (without the quotation marks).
+
+If you don't know the answer to the question, you can type "?" (without
+the quotation marks) .
+You will then be prompted with the correct answer. Beware:- you will be
+penalised for incorrect answers :).
+
+The questions are:
+What is the three-letter name?
+(You must convert the one-letter code given to the three-letter code.)
+
+How synonymous is Amino Acid?
+(How many synonyms are there for this amino acid?)
+
+Name the Amino Acid?
+(Which amino acid is coded by this codon?)
+
+//
+
+#menu_8_blk#
+
+Menu 8 Bulk output options in CodonW
+
+Non-correspondence analysis output from CodonW which cannot easily be
+summarised as a single index is bulk output. Under this menu there are
+10 options. Multiple options cannot be selected simultaneously. Each
+time this menu is selected you will be prompted for an alternative
+output filename.
+
+Option (1) Fasta format output of DNA sequence. The input sequences are
+reformatted and written to a file in a Fasta /Pearson-like format.
+
+Option (2) Reader format output of DNA sequence. This format is derived
+from the fasta format, except that the sequence is written as codons
+with three bases separated by a space, and the size of the sequence is
+recorded at column 70.
+
+Option (3) Translate input file to amino acids. This translates DNA to
+amino acids using the selected genetic code. The amino acids are written
+in a Fasta/Pearson compatible format.
+
+Option (4) Codon Usage. This is the default option. The frequency of
+each codon is written to a file in four rows with 16 columns per row.
+The codons are written in sequential numerical order, left to right.
+
+Option (5) Amino acid usage. The frequency of each amino acid,
+untranslatable codons and stop codons are recorded, one row per gene and
+23 columns per row. The first column contains a unique gene description,
+the second column records number of untranslatable codons, the third and
+subsequent columns summarize the amino acid and termination codon usage.
+
+Option (6) Relative Synonymous Codon Usage (RSCU). Relative synonymous
+codon usage is calculated as the ratio of the observed frequency of a
+codon to the frequency expected if codon usage were random.
+
+Option (7) Relative Amino acid usage (RAAU). Relative Amino acid usage
+is the frequency of the amino acid relative to the total amino acid
+usage.
+
+Option (8) Dinucleotide frequencies. The frequency of the 16
+dinucleotides is calculated in each of the three possible codon
+positions. The data are recorded with one row per position and 16
+columns per row.
+
+Option (9) Base composition analysis. This option records the frequency
+of nucleotides in each codon position. It also reports GC, GC3s and GCns
+(GC content excluding synonymous third position codons).
+
+Option (10) No output written to file. This option is useful when
+working with large datasets and disk storage or disk access is a
+limiting factor. This option suppresses all the output to the bulk
+output file.
+//
+
+#menu_coa#
+
+Advanced Correspondence Analysis menu.
+
+This menu allows much greater control over the correspondence analysis.
+
+ Option (1) Unselect or select. This menu changes slightly depending on
+whether correspondence analysis is of amino acid or codon usage.It
+simplifies the selection of the codons/amino acids that are to be
+included in the COA. This allows the user to override the default
+selections, which if the COA is of codon usage, is the exclusion of non-
+synonymous codons and termination codons.
+
+Option (2) Change the number of axes. The number of axes generated by a
+correspondence analysis is N-1, where N is either the number of genes or
+columns (whichever is the lesser in value). However, the default is to
+generate information about the first four axes (or trends). This option
+allows the user to record coordinates on any number of axes, up to the
+maximum generated by the analysis.
+
+Each axis generated by correspondence analysis is represented by a
+multidimensional vector. The position of a gene on any axis is the
+product of that gene's codon usage and the axis vector. As the vector is
+itself a product of the codon usage, the vectors can be affected by
+unusual codon usage. An analysis of nuclear and plasmid genes would be
+difficult, as the codon usage of each would perturb the other. Each
+dataset could be analysed individually but as the vectors for the axes
+would be different, it would be difficult to make direct comparisons
+between the analyses. To overcome this problem it is necessary to
+generate the COA vectors using one dataset and then to apply the same
+vectors to another. Thus direct comparison between the ordination of
+genes is possible. In CodonW, this is possible by using the following
+option (Option 3).
+
+Option (3) Add additional genes after correspondence analysis. The user
+is prompted for the file containing the additional sequences, to which
+the vectors are to be applied. The vectors are calculated, as normal,
+using the genes contained in the standard input file (Menu 1). The co-
+ordinates and any additional information about these original genes are
+recorded as normal. Next the additional genes are read in and the
+original vectors applied to them. The ordinations of these additional
+genes are then appended to the COA output files (for an explanation
+about the COA output files see below).
+
+Option (4) Toggle level of correspondence analysis output. By default
+this option is set to "normal" but can be toggled to "exhaustive". If
+the exhaustive output option is selected, then in addition to the
+standard information about gene and codon/amino acid ordination,
+additional information about inertia of the rows and columns is
+generated. This additional information includes the absolute
+contribution of the inertia of each row or column to each of the
+recorded axes, and the fraction of the variation within each row or
+column explained by each axis.
+
+Option (5) Change number of genes used to identify optimal codons.
+Correspondence analysis of either RSCU or codon usage where the major
+trend correlates with gene expression can be used to identify optimal
+codons. This is achieved by comparing the codon usage of the genes that
+lie at the extremes of the principal trend (axis 1). By default this is
+the top and bottom 10% of genes (as defined by axis 1 ordination). Using
+this option this can be set to a percentage between 1% and 50%, or to an
+absolute number of genes.
+//
+
+#select#
+
+Codon or Amino acid selection
+
+The codons or amino acids that will NOT be analysed in this
+correspondence analysis are surrounded by curly brackets. The choices of
+which codons/amino acids that are to be excluded can be changed. Simply
+give the number associated with each codon/amino acid for which you want
+to change the status.
+
+//
diff --git a/codonWinstall b/codonWinstall
new file mode 100755
index 0000000..dd4bc5b
--- /dev/null
+++ b/codonWinstall
@@ -0,0 +1,271 @@
+#!/bin/sh
+# Adapted from the SRS5 srsinstall script
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# analyse command line and print usage if wrong
+#
+
+ERR='codonWinstall: Stopping due to Error'
+
+if [ "$#" = 0 ]; then
+ option="all"
+elif [ "$1" = "all" ]; then
+ option="all"
+elif [ "$1" = "clean" ]; then
+ option="clean"
+elif [ "$1" = "codonw" ]; then
+ option="codonw"
+elif [ "$1" = "links" ]; then
+ option="links"
+elif [ "$1" = "cleanall" ]; then
+ option="cleanall"
+elif [ "$1" = "realclean" ]; then
+ option="cleanall"
+else
+ option="usage"
+ echo "unknown option '$1'"
+fi
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# print "usage"
+#
+
+if [ "$option" = 'usage' ]; then
+ cat << END
+
+ Usage: ./codonWinstall option
+
+ Options:
+
+ all does a complete installation
+ codonw compile codonw only .. no linked programmes
+ links generate links to pseudo programmes
+ clean removes all object files
+ cleanall removes all the object files, codonW, linked files and Makefile
+ realclean removes all the object files, codonW, linked files and Makefile
+
+END
+ exit 1
+fi
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if [ "$option" = 'all' ]; then
+ echo "... starting installation of codonW"
+fi
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# put correct "make" and "cc" commands
+#
+
+OS=`uname`
+
+if [ "$OS" = "SunOS" ]; then
+ case "`uname -r`" in
+ [56]*) OS='Solaris' ;;
+ esac
+fi
+
+# some echo commands do not support -n
+# generally /usr/bin/echo doesn't but /usr/ucb/echo does, so one can hardwire
+# to be independent of users path.
+if [ -r /usr/ucb/echo ]; then
+ ECHON="/usr/ucb/echo -n"
+else
+ ECHON="echo -n"
+fi
+
+# OSF/1 v4.0 /usr/ucb is symlink to /usr/bin, but /bin/sh has builtin -n
+if [ "$OS" = "OSF1" ]; then
+ case "`uname -r`" in
+ V[4]*) ECHON="echo -n"
+ # or can
+ #CMD_ENV=bsd ; export CMD_ENV
+ ;;
+ esac
+fi
+
+# we did our best, but now let's test
+ECHONT="`$ECHON | wc -c`"
+if [ $ECHONT != 0 ]; then
+ ECHONEND="\c"
+ ECHON=echo
+else
+ ECHONEND=""
+fi
+
+
+if [ ! -f "Makefile" ]; then
+
+ $ECHON "enter the make command [make]: $ECHONEND"
+ read makeCom
+ if [ "$makeCom" = "" ]; then makeCom='make' ; fi
+
+ # for OSF1 need to know if it is osf1 make or gnu make
+ if [ "$OS" = "OSF1" ]; then
+ $ECHON "is this OSF1 make [y]: $ECHONEND"
+ read OSFmake
+ if [ "$OSFmake" = "" ]; then OSFmake='y' ; fi
+ fi
+
+ if [ "$OS" = "SunOS" -o "$OS" = "Solaris" ]; then
+ ccComDef='gcc'
+ else
+ ccComDef='cc'
+ fi
+
+ $ECHON "enter the cc command [${ccComDef}]: $ECHONEND"
+ read ccCom
+
+ if [ "$ccCom" = "" ]; then ccCom="$ccComDef" ; fi
+
+ echo 'choose between optimised code, or code for debugging'
+ $ECHON "optimised code [y]: $ECHONEND"
+ read optimCom
+ if [ "$optimCom" = "" ]; then optimCom="y" ; fi
+
+
+ $ECHON "enter the link command [ln]: $ECHONEND"
+ read ccLn
+
+ if [ "$ccLn" = "" ]; then ccLn="ln -f" ; fi
+
+
+ $ECHON "Do you want hard or soft links [hard]: $ECHONEND"
+ read ccLnflag
+
+ if [ "$ccLnflag" = "" ]; then ccLnflag="hard" ; fi
+
+ echo 'choose between optimised code, or code for debugging'
+ $ECHON "optimised code [y]: $ECHONEND"
+ read optimCom
+ if [ "$optimCom" = "" ]; then optimCom="y" ; fi
+
+
+
+ echo "...creating makefile for '$OS'"
+
+#better rename the old makefiles
+ if [ -f "Makefile" ]; then \mv Makefile Makefile.pre ; fi
+ if [ -f "makefile" ]; then \mv makefile makefile.pre ; fi
+ touch Makefile
+
+# Add the logical parts of the make file
+
+ if [ "$OS" = "OSF1" ]; then
+ echo "override cflags = $(CFLAGS) -g" >> Makefile;
+ fi
+
+ if [ "$optimCom" = "y" ]; then
+ cflags="-O";
+ else
+ cflags="-g -DDEBUG" ;
+ fi
+
+ if [ "$ccLnflag" = "hard" ]; then
+ lncmd=$ccLn;
+ else
+ lncmd="$ccLn -s";
+ fi
+
+ link_prog="rscu cu aau raau tidy reader cutab cutot transl bases base3s dinuc cai fop gc3s gc cbi enc"
+
+cat <<EOF >> Makefile
+
+objects = codon_us.o codons.o open_fil.o commline.o menu.o tester.o coresp.o
+linked = $link_prog
+
+CC=$ccComDef
+CFLAGS= $cflags -DBSD
+LN=$lncmd
+
+
+all: codonw links
+
+codonw: $(objects)
+ $(CC) $(CFLAGS) $(objects) -o codonw -lm
+
+clean:
+ \rm -f $(objects)
+
+cleanall:
+ \rm -f $(objects) codonw Makefile $(linked)
+
+realclean:
+ \rm -f $(objects) codonw Makefile $(linked)
+
+codon_us.o: codon_us.c codonW.h
+ $(CC) -c $(CFLAGS) codon_us.c
+
+menu.o: menu.c codonW.h
+ $(CC) -c $(CFLAGS) menu.c
+
+codons.o: codons.c codonW.h
+ $(CC) -c $(CFLAGS) codons.c
+
+coresp.o: coresp.c codonW.h
+ $(CC) -c $(CFLAGS) coresp.c
+
+open_fil.o: open_fil.c codonW.h
+ $(CC) -c $(CFLAGS) open_fil.c
+
+commline.o: commline.c codonW.h
+ $(CC) -c $(CFLAGS) commline.c
+
+tester.o: tester.c codonW.h
+ $(CC) -c $(CFLAGS) tester.c
+
+links: codonw
+EOF
+
+
+for file in $link_prog
+do
+ echo "\t\t$(LN) codonw $file" >> Makefile
+done
+
+ echo >> Makefile
+
+fi
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# cleans the object
+#
+
+if [ "$option" = 'clean' ]; then
+ echo '...cleaning the old object files '
+ echo '...make clean'
+ make clean
+fi
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if [ "$option" = 'cleanall' -o "$option" = 'realclean' ]; then
+ echo '...cleaning the old object files, linked files and executables'
+ echo '...make realclean '
+ make realclean
+fi
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if [ "$option" = 'all' ]; then
+ echo '...Starting to make codonW, with auxillary programs '
+ echo '...make all'
+ make all
+fi
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if [ "$option" = 'codonw' ]; then
+ echo '...checking codonW is up to date'
+ echo '...make codonw'
+ make codonw
+fi
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if [ "$option" = 'links' ]; then
+ echo '... Linking auxilliary programs to '
+ echo '...make links'
+ make links
+fi
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+
+
diff --git a/codon_us.c b/codon_us.c
new file mode 100755
index 0000000..2328031
--- /dev/null
+++ b/codon_us.c
@@ -0,0 +1,2159 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package */
+/* Copyright (C) 2005 John F. Peden */
+/* This program is free software; you can redistribute */
+/* it and/or modify it under the terms of the GNU General Public License */
+/* as published by the Free Software Foundation; version 2 of the */
+/* License, */
+/* */
+/* This program is distributed in the hope that it will be useful, but */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA. */
+/* */
+/* */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _) */
+/* */
+/* For the latest version and information see */
+/* http://codonw.sourceforge.net */
+/**************************************************************************/
+/* */
+/* ----------------------- codon_us.C ------------------------ */
+/* This file contains most of the codon usage analysis subroutines */
+/* except for the COA analysis */
+/* Internal subroutines and functions */
+/* initilize_point assigns genetic code dependent parameters to structs*/
+/* initilize_coa decides which cod/AA to include in a COA by default */
+/* codon_usage_tot Counts codon and amino acid usage */
+/* ident_codon Converts codon into a numerical value in range 1-64 */
+/* codon_usage_out Write out Codon Usage to file */
+/* codon_error Called after all codons read, checks data was OK */
+/* rscu_usage_out Write out RSCU */
+/* raau_usage_out Write out normalised amino acid usage */
+/* aa_usage_out Write out amino acid usage */
+/* how_synon Calculates how synonymous each codon is */
+/* how_synon_aa Calculates how synonymous each AA is */
+/* clean_up Re-zeros various internal counters and arrays */
+/* base_sil_us_out Write out base composition at silent sites */
+/* cai_out Write out CAI usage */
+/* cbi_out Write out codon bias index */
+/* fop_out Write out Frequency of Optimal codons */
+/* enc_out Write out Effective Number of codons */
+/* gc_out Writes various analyses of base usage */
+/* dot(,X) prints a period every X times it is called */
+/* get_aa converts a three base codon into a 1 or 3 letter AA */
+/* cutab_out Write a nice tabulation of the RSCU+CU+AA */
+/* dinuc_count Count the dinucleotide usage */
+/* dinuc_out Write out dinucleotide usage */
+/* coa_raw_out Write out raw codon usage for use by COA analysis */
+/* sorted_by_axis1 Sorts genes according to their axis one position */
+/* gen_cusort_fop COA specific, write out cu of genes by axis1 posit. */
+/* highlow Used sorted cu to calculate high_low chi sq. contin */
+/* hydro_out Write out Protein hydropathicity */
+/* aromo_out Write out Protein aromaticity */
+/* */
+/* */
+/* External subroutines to codon_us.c */
+/* my_exit Controls exit from CodonW closes any open files */
+/* tidy reads the input data */
+/* output called from tidy to decide what to do with the data */
+/* toutput handles the reformatting and translation of seqs */
+/* output_long if sequence is very long then process what we know */
+/* and write sequence to disk in fragments */
+/* open_file Open files, checks for existing files */
+/* fileclose Closes files and returns a NULL pointer or exits */
+/* */
+/**************************************************************************/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <math.h>
+#include <limits.h>
+#include "codonW.h"
+/********************* Initilize Pointers**********************************/
+/* Various pointers to structures are assigned here dependent on the */
+/* genetic code chosen. */
+/* paa points to a struct containing Amino Acid names */
+/* pap points to amino acid properties */
+/* pcai points to Adaptation values used to calc CAI */
+/* pfop points to a struct describing optimal codons */
+/* pcbi points to the same structure as pfop */
+/* pcu points to data which has the translation of codons */
+/* ds is a struct describing how synonymous a codon is */
+/* da is a struct describing the size of each AA family */
+/* pcoa points to a struct that describes columns to be */
+/* included/excluded from any COA analysis */
+/**************************************************************************/
+int initilize_point(char code, char fop_species, char cai_species)
+{
+ paa = &amino_acids;
+ pap = &amino_prop;
+ pcai = &cai[cai_species];
+ pfop = &fop[fop_species];
+ pcbi = &fop[fop_species];
+ pcu = &cu[code];
+ ds = how_synon();
+ da = how_synon_aa();
+ pcoa = &coa;
+
+ printf ("\n");
+ if (pm->codonW)
+ printf ("Genetic code is currently set to %s %s\n\n",pcu->des,pcu->typ);
+
+ return 1;
+}
+/*******************How Synonymous is this codon *************************/
+/* This function discovers at run time how synonymous a codon is by check-*/
+/* ing all other codons to see if they encode the same AA */
+/* This saves a lot of time when new genetic codes are added */
+/**************************************************************************/
+int *how_synon(void)
+{
+ static int dds[65];
+ int x,i;
+
+ for (x = 0; x < 65; x++)
+ dds[x] = 0;
+
+ for (x = 1; x < 65; x++)
+ for (i = 1; i < 65; i++)
+ if (pcu->ca[x] == pcu->ca[i])
+ dds[x]++;
+ return dds; /* return a structure */
+}
+/*******************How Synonymous is this AA *************************/
+/* This function discovers at run time how synonymous an amino acid is by */
+/* checking all codons to see if they encode this same AA */
+/* This saves a lot of time when new genetic codes are added */
+/**************************************************************************/
+int *how_synon_aa(void)
+{
+ static int dda[22];
+ int x;
+
+ for (x = 0; x < 22; x++)
+ dda[x] = 0;
+
+ for (x = 1; x < 65; x++)
+ dda[pcu->ca[x]]++;
+ return dda; /* return a structure */
+}
+/********************* Initialise COA *********************************/
+/* Decides which codons or amino acids are to be included in a COA if only*/
+/* the default choice is used. For an amino acid COA, only stops are excl */
+/* but for a codon usage COA stop codons and non-synonymous codons are */
+/* excluded */
+/* pcoa points to a struct that describes columns to be */
+/* included/excluded from any COA analysis */
+/* structure contains AA and Codon information */
+/**************************************************************************/
+int initilize_coa(char code)
+{
+ static char initilized;
+ static char oldcode;
+ int i;
+
+ /* if called a second time return unless the genetic code has changed */
+ if (initilized && (oldcode == code)) return 1;
+
+ for (i = 0; i < 22; i++) /* for each amino acid */
+ if (i == 11 || i == 0) /* stop codons have the value 11 */
+ pcoa->amino[i] = FALSE; /* see RECODING file for more details */
+ else
+ pcoa->amino[i] = TRUE;
+
+ for (i = 0; i < 65; i++) /* for each codon */
+ if (*(ds + i) == 1 || pcu->ca[i] == 11 || i == 0)
+ pcoa->codons[i] = FALSE;
+ else
+ pcoa->codons[i] = TRUE;
+
+ initilized = TRUE; /* we have been called ... */
+ return 1;
+}
+/****************** Codon Usage Counting *****************************/
+/* Counts the frequency of usage of each codon and amino acid this data */
+/* is used throughout CodonW */
+/* pcu->ca contains codon to amino acid translations for the current code */
+/* and is assigned in initialise point */
+/**************************************************************************/
+int codon_usage_tot(char *seq, long int how_many)
+{
+ char codon[4];
+ int icode;
+ int i;
+
+ for (i = 0; i < how_many - 2; i += 3) {
+ strncpy(codon, (seq + i), 3);
+ icode = ident_codon(codon);
+ ncod[icode]++; /*increment the codon count */
+ naa[pcu->ca[icode]]++; /*increment the AA count */
+ codon_tot++; /*increment the codon total */
+ }
+
+ if (how_many % 3) { /*if last codon was partial */
+ icode = 0; /*set icode to zero and */
+ ncod[0]++; /*increment untranslated */
+ } /*codons */
+ return icode; /*return the last codon */
+}
+
+/****************** Ident codon *****************************/
+/* Converts each codon into a numerical array (codon) and converts this */
+/* array into a numerical value in the range 0-64, zero is reserved for */
+/* codons that contain at least one unrecognised base */
+/* */
+/**************************************************************************/
+int ident_codon(char *codon)
+{
+ int icode = 0;
+ int x;
+
+ for (x = 0; x < 3; x++) {
+ switch (codon[x]) {
+ case 'T':
+ case 't':
+ case 'U':
+ case 'u':
+ codon[x] = (char) 1;
+ continue;
+ case 'C':
+ case 'c':
+ codon[x] = (char) 2;
+ continue;
+ case 'A':
+ case 'a':
+ codon[x] = (char) 3;
+ continue;
+ case 'G':
+ case 'g':
+ codon[x] = (char) 4;
+ continue;
+ case '\0':
+ return 0;
+ default:
+ codon[x] = (char) 0;
+ break;
+ }
+ }
+ if (codon[0] * codon[1] * codon[2] != 0)
+ icode = (codon[0] - 1) * 16 + codon[1]
+ + (codon[2] - 1) * 4;
+ else
+ icode = 0;
+
+ return icode;
+}
+
+/****************** Codon error *****************************/
+/* Does some basic error checking for the input data, it can be called */
+/* using different error levels, thus generating different types of */
+/* messages. Basically checks for start, stop codons and internal stop */
+/* codons. As well as non-translatable and partial codons */
+/**************************************************************************/
+long int codon_error(int x, int y, char *ttitle, char error_level)
+{
+ long int ns = 0; /* number of stops */
+ long int loc_cod_tot = 0;
+ static int error_lines = 0;
+ int i;
+
+ for (i = 1, ns = 0; i < 65; i++) {
+ loc_cod_tot += ncod[i];
+ if (pcu->ca[i] == 11)
+ ns += ncod[i]; /*count stop codons */
+ }
+
+ switch (error_level) {
+ case 1: /*internal stop codons */
+ ns = ns - valid_stops;
+ /* a stop was a valid_stop if it was the last codon of a sequence */
+
+ if ( ! valid_start && pm->warn ) {
+ dot(0,10);
+ fprintf(pm->my_err, "\nWarning: Sequence %3li \"%-20.20s\" does "
+ "not begin with a recognised start codon\n"
+ ,num_sequence,ttitle);
+ error_lines++;
+ }
+
+ if (ns && pm->warn ) {
+ dot(0,10);
+ if (pm->totals && pm->warn)
+ fprintf(pm->my_err,"\nWarning: some sequences had internal stop"
+ " codons (found %li such codons)\n", ns);
+ else
+ fprintf(pm->my_err, "\nWarning: Sequence %3li \"%-20.20s\" has "
+ "%li internal stop codon(s)\n", num_sequence, ttitle, ns);
+ num_seq_int_stop++;
+ error_lines++;
+ }
+ break;
+ case 2:
+ dot(0,10);
+ if (ncod[0] == 1 && pcu->ca[x] != 11 && pm->warn){ /* last codon was partial */
+ fprintf(pm->my_err,
+ "\nWarning: Sequence %3li \"%-20.20s\" last codon was partial\n"
+ ,num_sequence, ttitle);
+ error_lines++;
+ }else {
+ if (ncod[0] && pm->warn){ /* non translatable codons */
+ if (pm->totals)
+ fprintf(pm->my_err,
+ "\nWarning: some sequences had non translatable"
+ " codons (found %li such codons)\n", ncod[0]);
+ else
+ fprintf(pm->my_err,
+ "\nWarning: sequence %3li \"%-20.20s\" has %li non translatable"
+ " codon(s)\n", num_sequence, ttitle, ncod[0]);
+ error_lines++;
+ }
+ if (pcu->ca[x] != 11 && pm->warn ) {
+ if (!pm->totals){
+ fprintf(pm->my_err,
+ "\nWarning: Sequence %3li \"%-20.20s\" is not terminated by"
+ " a stop codon\n", num_sequence, ttitle);
+ error_lines++;
+ }
+ }
+ }
+ break;
+ case 3:
+ /* Nc error routines see codon_us */
+ dot(0,10); /* dot resetting internal counter */
+ if (x==3) x=4; /* if x=3 there are no 3 or 4 fold AA */
+ fprintf(pm->my_err,
+ "\nSequence %li \"%-20.20s\" contains ",num_sequence, ttitle);
+ (y) ? fprintf(pm->my_err, "only %i ", (int) y) :
+ fprintf(pm->my_err, "no ");
+ fprintf(pm->my_err, "amino acids with %i synonymous codons\n", x);
+ fprintf(pm->my_err, "\t--Nc was not calculated \n");
+ error_lines+=2;
+ break;
+ case 4: /* run silent */
+ break;
+ default:
+ my_exit(99,"Programme error in codon_error\n");
+ }
+ if ((((error_lines + 2) * 2) > pm->term_length) && pm->verbose
+ && pm->my_err == stderr ) {
+ error_lines = 0; /* count lines of errors */
+ dot(0,10);
+ pause;
+ }
+ return loc_cod_tot; /* Number of codons counted */
+}
+
+/****************** Codon Usage Out *****************************/
+/* Writes codon usage output to file. Note this subroutine is only called */
+/* when machine readable output is selected, otherwise cutab_out is used */
+/**************************************************************************/
+int codon_usage_out(FILE * fblkout, long int *nncod, int last_aa,
+ int vvalid_stops, char *ttitle)
+{
+ long int ccodon_tot = 0;
+ int x;
+ char sp=pm->seperator;
+
+ ccodon_tot = codon_error(last_aa, vvalid_stops, "" , (char) 4); /*dummy*/
+
+ /*example of output */
+ /*0,0,0,0,3,2,2,0,0,0,0,0,0,3,0,0, */
+ /*0,0,0,4,3,4,1,7,0,0,0,0,3,1,3,1,Codons=100 */
+ /*0,0,0,0,10,6,3,0,0,0,0,0,1,1,12,0,Universal Genetic code */
+ /*0,0,0,3,7,5,7,9,0,1,1,1,8,4,5,0,MLSPCOPER.PE1 */
+
+ for (x = 1; x < 65; x++) {
+
+ fprintf(fblkout, "%i%c",nncod[x],sp);
+
+ switch (x) {
+ case 16:
+ fprintf(fblkout, "\n");
+ break;
+ case 32:
+ fprintf(fblkout, "Codons=%ld\n",ccodon_tot);
+ break;
+ case 48:
+ fprintf(fblkout, "%.30s\n", pcu->des);
+ break;
+ case 64:
+ fprintf(fblkout, "%.20s\n",ttitle);
+ break;
+ default:
+ break;
+ }
+ }
+ return 1;
+}
+/****************** RSCU Usage out *****************************/
+/* Writes Relative synonymous codon usage output to file. Note this subrou*/
+/* tine is only called if machine readable output is selected */
+/* If human readable format was selected then what the user really wanted */
+/* was cutab so this is automatically selected in codons.c */
+/* RSCU values are genetic codon dependent */
+/**************************************************************************/
+int rscu_usage_out(FILE * fblkout, long *nncod, long *nnaa)
+{
+ int x;
+ char sp=pm->seperator;
+
+ /* ds points to an array[64] of synonym values i.e. how synon its AA is */
+
+ for (x = 1; x < 65; x++) {
+ if (nnaa[pcu->ca[x]] != 0)
+ fprintf(fblkout, "%5.3f%c",
+ ( (float) nncod[x] / (float) nnaa[pcu->ca[x]])
+ * ((float) *(ds + x)), sp );
+ else
+ fprintf(fblkout, "0.000%c",sp);
+
+ if (x == 64)
+ fprintf(fblkout, "%-20.20s", title);
+
+ if (!(x % 16))
+ fprintf(fblkout, "\n");
+ }
+ return 1;
+}
+/****************** RAAU output *****************************/
+/* Writes Relative amino acid usage output to file. Amino Acid usage is */
+/* normalised for gene length */
+/**************************************************************************/
+int raau_usage_out(FILE * fblkout, long *nnaa)
+{
+ long int aa_tot = 0;
+ static char first_line = TRUE;
+ int i,x;
+ char sp;
+
+ if (pm->seq_format=='M') /* if machine readable */
+ sp = pm->seperator;
+ else
+ sp = '\t';
+
+ if (first_line) { /* if true write a header*/
+ if ( pm->seq_format=='M')
+ fprintf(fblkout, "%s", "Gene_name");
+ else
+ fprintf(fblkout, "%-20.20s", "Gene name");
+
+ for (i = 0; i < 22; i++)
+ if ( pm->seq_format=='M')
+ fprintf(fblkout, "%c%s", sp,paa->aa3[i]);/* three letter AA names*/
+ else
+ fprintf(fblkout, "%c %-6.6s", sp,paa->aa3[i]);
+ fprintf(fblkout, "\n");
+ first_line = FALSE;
+ }
+ for (i = 1; i < 22; i++)
+ if (i != 11)
+ aa_tot += nnaa[i]; /* total No. of AAs */
+
+ if ( pm->seq_format=='M')
+ fprintf(fblkout, "%.30s", title);
+ else
+ fprintf(fblkout, "%-20.20s", title); /* don't waste spaces */
+
+ for (x = 0; x < 22; x++)
+ if (x == 11)
+ fprintf(fblkout, "%c0.0000",sp); /* report 0 for stops */
+ else if (aa_tot)
+ if ( pm->seq_format=='M')
+ fprintf(fblkout, "%c%.4f",sp,
+ (double) nnaa[x] / (double) aa_tot);
+ else
+ fprintf(fblkout, "%c%7.4f",sp,
+ (double) nnaa[x] / (double) aa_tot);
+ else /*What no AminoAcids!!!! */
+ if ( pm->seq_format=='M')
+ fprintf(fblkout, "%c%c",sp,sp);
+ else
+ fprintf(fblkout, "%c ***** ",sp);
+
+ fprintf(fblkout, "\n",sp);
+ return 1;
+}
+/****************** AA usage output *****************************/
+/* Writes amino acid usage output to file. */
+/**************************************************************************/
+int aa_usage_out(FILE * fblkout, long *nnaa)
+{
+ static char first_line = TRUE;
+ int i;
+ char sp=pm->seperator;
+
+ if (first_line) {
+ (pm->seq_format=='M')?
+ fprintf(fblkout, "%s", "Gene_name"):
+ fprintf(fblkout, "%-20.20s ", "Gene name");
+
+ for (i = 0; i < 22; i++)
+ (pm->seq_format=='M')?
+ fprintf(fblkout, "%c%s", sp,paa->aa3[i]): /* 3 letter AA code */
+ fprintf(fblkout, "%-5.5s", paa->aa3[i]);
+ fprintf(fblkout, "\n");
+ first_line = FALSE;
+ }
+ (pm->seq_format=='M')?
+ fprintf(fblkout, "%.20s", title):
+ fprintf(fblkout, "%-20.20s ", title);
+
+ for (i = 0; i < 22; i++){
+ (pm->seq_format=='M')?
+ fprintf(fblkout, "%c%li", sp,nnaa[i]):
+ fprintf(fblkout, "%-5li",nnaa[i]);
+ }
+
+ fprintf(fblkout, "\n");
+ return 1;
+}
+/****************** Base Silent output *******************************/
+/* Calculates and write the base composition at silent sites */
+/* normalised as a function of the possible usage at that silent site with*/
+/* changing the amino acid composition of the protein. It is inspired by */
+/* GC3s but is much more complicated to calculate as not every AA has the */
+/* option to use any base at the third position */
+/* All synonymous AA can select between a G or C though */
+/**************************************************************************/
+void base_sil_us_out(FILE * foutput, long *nncod, long *nnaa)
+{
+ int id,i,x,y,z;
+ long bases_s[4]; /* synonymous GCAT bases */
+
+ long cb[4]; /* codons that could have been GCAT */
+ int done[4];
+ char sp= (char) (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;
+
+ for (x = 0; x < 4; x++) {
+ cb[x] = 0;
+ bases_s[x] = 0;
+ } /* blank the arrays */
+
+ for (x = 1; x < 5; x++)
+ for (y = 1; y < 5; y++)
+ for (z = 1; z < 5; z++) { /* look at all 64 codons */
+ id = (x - 1) * 16 + y + (z - 1) * 4;
+
+ if (*(ds + id) == 1 || pcu->ca[id] == 11)
+ continue; /* if no synon skip to next codon */
+ bases_s[z - 1] += nncod[id]; /* count No. codon ending in base X */
+ }
+
+ for (i = 1; i < 22; i++) {
+ for (x = 0; x < 4; x++) /* don't want to count bases in 6 fold */
+ done[x] = FALSE; /* sites twice do we so we remember */
+
+ if (i == 11 || *(da + i) == 1)
+ continue; /* if stop codon skip, or AA not synony */
+
+ for (x = 1; x < 5; x++) /* else add aa to could have ended count */
+ for (y = 1; y < 5; y++)
+ for (z = 1; z < 5; z++) {
+ id = (x - 1) * 16 + y + (z - 1) * 4;
+ /* assign codon values in range 1-64 */
+ if (pcu->ca[id] == i && done[z - 1] == FALSE) {
+ /* encode AA i which we know to be synon so add could_be_x ending*/
+ /* by the Number of that amino acid */
+ cb[z - 1] += nnaa[i];
+ done[z - 1] = TRUE; /* don't look for any more or we might */
+ /* process leu+arg+ser twice */
+ }
+ }
+ }
+
+ /* Now the easy bit ... just output the results to file */
+ for (i = 0; i < 4; i++) {
+ if (cb[i] > 0)
+ fprintf(foutput, "%6.4f%c", (double) bases_s[i]/(double)cb[i], sp);
+ else
+ fprintf(foutput, "0.0000%c",sp);
+ }
+ return;
+}
+/****************** Clean up *******************************/
+/* Called after each sequence has been completely read from disk */
+/* It re-zeros all the main counters, but is not called when concatenating*/
+/* sequences together */
+/**************************************************************************/
+int clean_up(long int *nncod, long int *nnaa)
+{
+ int x;
+ int i;
+
+ for (x = 0; x < 65; x++)
+ nncod[x] = 0;
+ for (x = 0; x < 23; x++)
+ nnaa[x] = 0;
+ /* dinucleotide count remembers the */
+ dinuc_count(" ", 1); /* last_base from the last fragment */
+ /* this causes the last base to be "" */
+ for (x = 0; x < 3; x++)
+ for (i = 0; i < 16; i++)
+ din[x][i] = 0;
+
+ dinuc_count(" ", 1);
+ master_ic = tot =
+ non_std_char = AT_TOT = GC_TOT = AA_TOT = GAP_TOT = IUBC_TOT = 0;
+ long_seq = FALSE;
+ valid_stops = valid_start = codon_tot = tot = fram = 0;
+ return 1;
+}
+/*****************Codon Adaptation Index output *************************/
+/* Codon Adaptation Index (CAI) (Sharp and Li 1987). CAI is a measurement */
+/* of the relative adaptiveness of the codon usage of a gene towards the */
+/* codon usage of highly expressed genes. The relative adaptiveness (w) of*/
+/* each codon is the ratio of the usage of each codon, to that of the most*/
+/* abundant codon for the same amino acid. The relative adaptiveness of */
+/* codons for albeit a limited choice of species, can be selected from the*/
+/* Menu. The user can also input a personal choice of values. The CAI */
+/* index is defined as the geometric mean of these relative adaptiveness */
+/* values. Non-synonymous codons and termination codons (genetic code */
+/* dependent) are excluded. To aid computation, the CAI is calculated as */
+/* using a natural log summation, To prevent a codon having a relative */
+/* adaptiveness value of zero, which could result in a CAI of zero; */
+/* these codons have fitness of zero (<.0001) are adjusted to 0.01 */
+/**************************************************************************/
+int cai_out(FILE * foutput, long int *nncod)
+{
+ long int totaa = 0;
+ double sigma;
+ float ftemp;
+ int x;
+ char sp= (char) (pm->seq_format=='H')?
+ (char) '\t':
+ (char) pm->seperator;
+ static char cai_ttt = FALSE;
+ static char description[61];
+ static char reference[61];
+
+ static CAI_STRUCT user_cai;
+
+
+ if (!cai_ttt ) { /* have we been called already */
+ user_cai.des = description; /* assign an array to a pointer */
+ user_cai.ref = reference; /* as above */
+
+ if ( pm->caifile==NULL && pm->verbose==TRUE
+ && pm->menu==TRUE && (pcai == cai )){
+ /* this is false */
+ /* if personal caifile is on commandline or */
+ /* in non-interactive mode or -silent option */
+ /* or cai values are not the default values */
+
+
+ printf("\nDo you wish to input a personal choice of CAI"
+ " values (y/n) [n] ");
+ gets(pm->junk);
+
+ /* This allows a user defined choice of CAI values to be selected */
+ if ('Y' == (char) toupper( (int) pm->junk[0])) {
+ /* tell the user a little about what we are looking for */
+ printf("\nInput file must contain 64 CAI values\n"
+ "ranging from 0.00 to 1.00\n"
+ "values must be separated by spaces\n");
+ /* open the CAI adaptiveness values file */
+ if (!(pm->caifile = open_file("file with CAI values"
+ ,"cai.coa", "r", 0))) my_exit(6,"cai_out");
+
+ }
+ } /* matched if pm->caifile=*/
+ if (pm->caifile){
+ rewind (pm->caifile); /* unlikely unless fopfile = caifile */
+ x = 0;
+ strcpy(user_cai.des,"User supplied CAI adaptation values ");
+ strcpy(user_cai.ref,"No reference");
+ user_cai.cai_val[x++] = (float) 0.0;
+
+ while ((fscanf(pm->caifile, "%f ", &ftemp)) != EOF) {
+ /* if any bad CAI values are read EXIT*/
+ if (ftemp < 0 || ftemp > 1.0) {
+ printf("\nError CAI %f value out of range\nEXITING",ftemp);
+ my_exit(99,"cai_out");
+ }
+ user_cai.cai_val[x++] = ftemp; /* assign value */
+ } /* end of while */
+ if (x != 65) { /* wrong number of codons */
+ fprintf(pm->my_err, "\nError in CAI file, found %i values"
+ " expected 64 values EXITING\n", x - 1);
+ my_exit(99,"cai_out");
+ }
+ pcai = &user_cai; /* assigns pointer to user CAI values */
+ } /* matches if( pm->caifile... */
+
+
+ printf ("Using %s (%s) w values to calculate "
+ "CAI \n",pcai->des,pcai->ref);
+ cai_ttt = TRUE; /*stops this "if" from being entered */
+
+ } /* matches if (!cai_ttt ) */
+
+ for (x = 1, sigma = 0; x < 65; x++) {
+ if (pcu->ca[x] == 11 || *(ds + x) == 1) continue;
+ if (pcai->cai_val[x] < 0.0001)/* if value is effectively zero */
+ pcai->cai_val[x] = (float) 0.01; /* make it .01 */
+ sigma += (double) *(nncod + x) * log((double) pcai->cai_val[x]);
+ totaa += *(nncod + x);
+ }
+
+ if (totaa) { /* catch floating point overflow error*/
+ sigma = sigma / (double) totaa;
+ sigma = exp(sigma);
+ } else
+ sigma = 0;
+
+ fprintf(foutput, "%5.3f%c", sigma,sp);
+ return 1;
+}
+/*****************Codon Bias Index output **************************/
+/* Codon bias index is a measure of directional codon bias, it measures */
+/* the extent to which a gene uses a subset of optimal codons. */
+/* CBI = ( Nopt-Nran)/(Nopt-Nran) Where Nopt = number of optimal codons; */
+/* Ntot = number of synonymous codons; Nran = expected number of optimal */
+/* codons if codons were assigned randomly. CBI is similar to Fop as used */
+/* by Ikemura, with Nran used as a scaling factor. In a gene with extreme */
+/* codon bias, CBI will equal 1.0, in a gene with random codon usage CBI */
+/* will equal 0.0. Note that it is possible for Nopt to be less than Nran.*/
+/* This results in a negative value for CBI. */
+/* ( Bennetzen and Hall 1982 ) */
+/**************************************************************************/
+int cbi_out(FILE * foutput, long int *nncod, long int *nnaa )
+{
+ long int tot_cod = 0;
+ long int opt = 0;
+ float exp_cod = (float) 0.0;
+ float fcbi;
+ int c,x;
+ char str[2];
+ char sp= (pm->seq_format=='H')?
+ (char) '\t':
+ (char) pm->seperator;
+
+
+ static char description[61];
+ static char reference[61];
+ static char first_call_cbi = TRUE;
+ static char has_opt_info[22];
+ static FOP_STRUCT user_cbi;
+
+ if (first_call_cbi) { /* have we been called already */
+
+ user_cbi.des = description; /* assign a pointer to array */
+ user_cbi.ref = reference;
+
+ if ( pm->cbifile == NULL && pm->verbose==TRUE
+ && pm->menu==TRUE && ( pcbi == fop )){
+ /* this is false */
+ /* if personal fopfile is on commandline or */
+ /* in non-interactive mode or -silent option */
+ /* or fop values are not the default values */
+
+ printf("\nDo you wish to input a personal choice of CBI"
+ " values (y/n) [n] ");
+
+ gets(pm->junk);
+
+ if ('Y' == (char) toupper( (int) pm->junk[0])) {
+
+ printf("\nInput file must contain 64 CBI values\n"
+ " 1= rare codon\n 2= common codon\n 3= optimal codon\n");
+
+ if (!(pm->cbifile = open_file("file with CBI values"
+ ,"cbi.coa", "r", 0)))
+ my_exit(6,"cai_out");
+ } /* matches if Y== */
+ } /* matches if pm->cbifile==NULL */
+
+
+ if ( pm->cbifile ){
+ rewind (pm->cbifile); /* fopfile can be the same as cbifile */
+ strcpy(user_cbi.des,"User supplied choice");
+ strcpy(user_cbi.ref,"No reference");
+ x = 0;
+ user_cbi.fop_cod[x++] = 0;
+
+ while ((c = fgetc(pm->cbifile)) != EOF && x <=66) {
+ sprintf (str,"%c",c);
+ if (isdigit(c) && atoi(str) >= 0
+ && atoi(str) <= 3) {
+ user_cbi.fop_cod[x++] = (char) atoi(str);
+
+ } /* isdigit */
+ } /* end of while */
+
+ if (x != 65) { /* wrong number of codons */
+ sprintf(pm->messages, "\nError in CBI file %i digits found, "
+ "expected 64 EXITING\n", x - 1);
+ my_exit(99,pm->messages);
+ }
+ pcbi = (&user_cbi);
+ } /* matches if(pm->cbifile) */
+
+
+ printf ("Using %s (%s) \noptimal codons to calculate "
+ "CBI\n",pcbi->des,pcbi->ref);
+
+
+ /* initilise has_opt_info */
+ for (x = 1; x < 22; x++) has_opt_info[x]=0;
+
+ for (x = 1; x < 65; x++) {
+ if (pcu->ca[x] == 11 || *(ds + x) == 1)
+ continue;
+ if (pcbi->fop_cod[x] == 3 )
+ has_opt_info[pcu->ca[x]]++;
+ }
+
+
+
+ first_call_cbi = FALSE; /* this won't be called again */
+ } /* matches if (first_call_cbi) */
+
+
+ for (x = 1; x < 65; x++) {
+ if (! has_opt_info[pcu->ca[x]]) continue;
+ switch ((int) pcbi->fop_cod[x]) {
+ case 3:
+ opt += nncod[x];
+ tot_cod += nncod[x];
+ exp_cod += (float) nnaa[pcu->ca[x]]/ (float) da[pcu->ca[x]];
+ break;
+ case 2:
+ case 1:
+ tot_cod += *(nncod + x);
+ break;
+ default:
+ sprintf(pm->messages, " Serious error in CBI information found"
+ " an illegal CBI value of %f for codon %i"
+ " permissible values are \n 1 for non-optimal"
+ " codons\n 2 for common codons\n"
+ " 3 for optimal codons\n" " EXITING ",
+ pcbi->fop_cod[x], x);
+
+ my_exit(99,pm->messages);
+ break;
+ } /* end of switch */
+ } /* for ( ) */
+
+ if( tot_cod - exp_cod)
+ fcbi= (opt - exp_cod) / (tot_cod - exp_cod);
+ else
+ fcbi= (float) 0.0;
+
+ fprintf(foutput, "%5.3f%c", fcbi,sp); /* CBI QED */
+
+ return 1;
+}
+
+/****************** Frequency of OPtimal codons output ********************/
+/* Frequency of Optimal codons (Fop) (Ikemura 1981). This index, is ratio */
+/* of optimal codons to synonymous codons (genetic code dependent). Optimal*/
+/* codons for several species are in-built and can be selected using Menu 3*/
+/* By default, the optimal codons of E. coli are assumed. The user may also*/
+/* enter a personal choice of optimal codons. If rare synonymous codons */
+/* have been identified, there is a choice of calculating the original Fop */
+/* index or a modified index. Fop values for the original index are always */
+/* between 0 (where no optimal codons are used) and 1 (where only optimal */
+/* codons are used). When calculating the modified Fop index, any negative */
+/* values are adjusted to zero. */
+/***************************************************************************/
+int fop_out(FILE * foutput, long int *nncod)
+{
+ long int nonopt = 0;
+ long int std = 0;
+ long int opt = 0;
+ float ffop;
+ int c,x;
+ char nonopt_codons = FALSE;
+
+ char str[2];
+
+
+ char sp= (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;
+
+ static char first_call = TRUE;
+ static char description[61];
+ static char reference[61];
+ static char asked_about_fop = FALSE;
+ static char factor_in_rare = FALSE;
+ static char has_opt_info[22];
+ static FOP_STRUCT user_fop;
+
+ if (first_call) { /* have I been called previously */
+ user_fop.des = description;
+ user_fop.ref = reference;
+ if ( pm->fopfile == NULL && pm->verbose==TRUE
+ && pm->menu == TRUE && (pfop == fop )) {
+ /* this is false */
+ /* if personal fopfile is on commandline or */
+ /* in non-interactive mode or -silent option */
+ /* or fop values are not the default values */
+
+ printf("\nDo you wish to input a personal choice of Fop"
+ " values (y/n) [n] ");
+ gets(pm->junk);
+ if ('Y' == (char) toupper( (int) pm->junk[0])) {
+ printf("\nInput file must contain 64 Fop values\n"
+ " 1= rare codon\n 2= common codon\n 3= optimal codon\n");
+
+ if (!(pm->fopfile = open_file("file with Fop values"
+ ,"fop.coa", "r", 0))) my_exit(6,"fop_out");
+
+ } /* if 'Y' == */
+ } /* if (pm->fopfile == NULL........ ) */
+
+
+ if ( pm->fopfile ) {
+ rewind (pm->fopfile); /* possible for fopfile = cbifile */
+ strcpy(user_fop.des,"User supplied choice");
+ strcpy(user_fop.ref,"No reference");
+ x = 0;
+ user_fop.fop_cod[x++] = 0;
+
+ while ((c = fgetc(pm->fopfile)) != EOF && x <=66) {
+ sprintf (str,"%c",c);
+
+ if (isdigit(c) && atoi(str) >= 0
+ && atoi(str) <= 3) {
+ user_fop.fop_cod[x++] = (char) atoi(str);
+ } /* test isdigit */
+ } /* end of while */
+
+ if (x != 65) { /* wrong number of codons */
+ sprintf(pm->messages, "\nError in Fop file %i values found, "
+ "expected 64 EXITING\n", x - 1);
+ my_exit(99,pm->messages);
+ }
+ pfop = &user_fop; /* assigns pointer to user fop values*/
+ }
+
+
+ printf ("Using %s (%s)\noptimal codons to calculate "
+ "Fop\n",pfop->des,pfop->ref);
+
+
+ /* initilise has_opt_info */
+ for (x = 1; x < 22; x++) has_opt_info[x]=0;
+
+ for (x = 1; x < 65; x++) {
+ if (pcu->ca[x] == 11 || *(ds + x) == 1)
+ continue;
+ if (pfop->fop_cod[x] == 3 )
+ has_opt_info[pcu->ca[x]]++;
+
+ if (pfop->fop_cod[x] == 1 ){
+ if (!asked_about_fop && pm->verbose) {
+ printf("\nIn the set of optimal codons you have selected,\n"
+ "non-optimal codons have been identified\nThey can be "
+ "used in the calculation of a modified Fop, "
+ "(Fop=(opt-rare)/total)\n else the original formulae "
+ "will be used (Fop=opt/total)\n\n\t\tDo you wish "
+ "calculate a modified fop (y/n) [n] ");
+ gets(pm->junk);
+ if ( 'Y' == (char) toupper( (int)pm->junk[0]))
+ factor_in_rare = TRUE;
+ asked_about_fop = TRUE;
+ }
+
+ if ( factor_in_rare == TRUE )
+ has_opt_info[pcu->ca[x]]++;
+ }
+ } /* matches for (x=1 */
+ first_call = FALSE;
+ } /* matches if ( !first_call ) */
+
+
+
+ for (x = 1; x < 65; x++) {
+ if (!has_opt_info[pcu->ca[x]] )
+ continue;
+
+ switch ((int) pfop->fop_cod[x]) {
+ case 3:
+ opt += *(nncod + x);
+ break;
+ case 2:
+ std += *(nncod + x);
+ break;
+ case 1:
+ nonopt_codons = TRUE;
+ nonopt += *(nncod + x);
+ break;
+ default:
+ sprintf(pm->messages, " Serious error in fop information found"
+ " an illegal fop value of %f for codon %l"
+ " permissible values are \n 1 for non-optimal"
+ " codons\n 2 for common codons\n"
+ " 3 for optimal codons\n" " EXITING ",
+ pfop->fop_cod[x], x);
+ printf ("opt %l, std %l, nonopt %l\n",opt,std,nonopt);
+ my_exit(99,pm->messages);
+ break;
+ }
+ }
+ /* only ask this once ... */
+
+
+ if (factor_in_rare && (opt + nonopt + std) )
+ ffop = (float) (opt - nonopt) / (float) (opt + nonopt + std);
+ else if ((opt + nonopt + std))
+ ffop = (float) opt / (float) (opt + nonopt + std);
+ else
+ ffop=0.0;
+
+
+ fprintf(foutput, "%5.3f%c", ffop,sp);
+
+ return 1;
+}
+
+/*************** Effective Number of Codons output *********************/
+/* The effective number of codons (NC) (Wright 1990). This index is a */
+/* simple measure of overall codon bias and is analogous to the effective */
+/* number of alleles measure used in population genetics. Knowledge of the*/
+/* optimal codons or a reference set of highly expressed genes is not */
+/* needed when calculating this index. Initially the homozygosity for each*/
+/* amino acid is estimated from the squared codon frequencies. */
+/**************************************************************************/
+float enc_out(FILE * foutput, long int *nncod, long int *nnaa) {
+ int numaa[9];
+ int fold[9];
+ int error_t = FALSE;
+ int i,z,x;
+ double totb[9];
+ double averb = 0, bb = 0, k2 = 0, s2 = 0;
+ float enc_tot = 0.0F;
+ char sp= (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;
+
+/* don't assume that 6 is the largest possible amino acid family assume 9*/
+ for (i = 0; i < 9; i++) {
+ fold[i] = 0; /* initialise arrays to zero */
+ totb[i] = 0.0;
+ numaa[i] = 0;
+ }
+
+ for (i = 1; i < 22; i++) { /* for each amino acid */
+ if (i == 11)
+ continue; /* but not for stop codons */
+
+ if (*(nnaa + i) <= 1) /* if this aa occurs once then skip */
+ bb = 0;
+ else {
+ for (x = 1, s2 = 0; x < 65; x++) {
+ /* Try all codons but we are only looking for those that encode*/
+ /* amino amid i, saves having to hard wire in any assumptions */
+ if (pcu->ca[x] != i) continue; /* skip is not i */
+
+
+ if (*(nncod + x) == 0) /* if codons not used then */
+ k2 = 0.0; /* k2 = 0 */
+ else
+ k2 = pow(((double) *(nncod + x) / (double) *(nnaa + i)),
+ (double) 2);
+
+ s2 += k2; /* sum of all k2's for aa i */
+ }
+ bb = (((double) *(nnaa + i) * s2) - 1.0) / /* homozygosity */
+ (double) (*(nnaa + i) - 1.0);
+ }
+
+ if (bb > 0.0000001) {
+ totb[*(da + i)] += bb; /* sum of all bb's for amino acids */
+ /* which have z alternative codons */
+ numaa[*(da + i)]++; /* where z = *(da+i) */
+ }
+ /* numaa is no of aa that were z */
+ fold[*(da + i)]++; /* fold z=4 can have 9 in univ code */
+ } /* but some aa may be absent from */
+ /* gene therefore numaa[z] may be 0 */
+ enc_tot = (float) fold[1];
+
+ for (z = 2, averb = 0, error_t = FALSE; z <= 8; z++) {
+ /* look at all values of z if there */
+ if (fold[z]) { /* are amino acids that are z fold */
+ if (numaa[z] && totb[z] > 0)
+ averb = totb[z] / numaa[z];
+ else if (z==3 && numaa[2] && numaa[4] && fold[z]==1 )
+ /* special case */
+ averb = (totb[2] / numaa[2] + totb[4] / numaa[4]) * 0.5;
+ else { /* write error to stderr */
+ codon_error( z, numaa[z], title, 3 );
+ error_t = TRUE; /* error catch for strange genes */
+ break;
+ }
+ enc_tot += (float) fold[z] / (float) averb;
+ /* the calculation */
+ }
+ }
+
+ if (error_t)
+ fprintf(foutput, "*****%c",sp);
+ else if (enc_tot <= 61)
+ fprintf(foutput, "%5.2f%c", enc_tot,sp);
+ else
+ fprintf(foutput, "61.00%c",sp);
+
+ return enc_tot;
+}
+
+/******************* G+C output *******************************/
+/* This function is a real work horse, initially it counts base composit */
+/* ion in all frames, length of gene, num synonymous codons, number of */
+/* non synonymous codons. Then dependent on the value for which used in */
+/* switch statement. We return various analyses of this data */
+/* if which ==1 then the output is very detailed, base by base etc. */
+/* if which ==2 then the output is for GC content only */
+/* if which ==3 then the output is for GC3s (GC at synonymous 3rd posit) */
+/* if which ==4 then the output is for L_sym */
+/* if which ==5 then the output is for L_aa */
+/* The output from this subroutine is in a tabular format if human read- */
+/* able output is selected, and in columns if machine readable. Also the */
+/* number of values reported changes as it is assumed the user has access*/
+/* to a spreadsheet type programme if they are requesting tabular output */
+/*************************************************************************/
+void gc_out(FILE * foutput, FILE * fblkout, int which){
+
+ long int id;
+ long int bases[5]; /* base that are synonymous GCAT */
+ long int base_tot[5];
+ long int base_1[5];
+ long int base_2[5];
+ long int base_3[5];
+ long int tot_s = 0;
+ long int totalaa = 0;
+ static char header = FALSE;
+ int x,y,z;
+ char sp= (pm->seq_format=='H')?
+ (char) '\t':
+ (char) pm->seperator;
+
+ typedef double lf;
+
+ for (x = 0; x < 5; x++) {
+ bases[x] = 0; /* initialise array values to zero */
+ base_tot[x] = 0;
+ base_1[x] = 0;
+ base_2[x] = 0;
+ base_3[x] = 0;
+ }
+
+ for (x = 1; x < 5; x++)
+ for (y = 1; y < 5; y++)
+ for (z = 1; z < 5; z++) { /* look at all 64 codons */
+ id = (x - 1) * 16 + y + (z - 1) * 4;
+
+ if (pcu->ca[id] == 11)
+ continue; /* skip if a stop codon */
+ base_tot[x] += ncod[id]; /* we have a codon xyz therefore the */
+ base_1[x] += ncod[id]; /* frequency of each position for base*/
+ base_tot[y] += ncod[id]; /* x,y,z are equal to the number of */
+ base_2[y] += ncod[id]; /* xyz codons .... easy */
+ base_tot[z] += ncod[id]; /* will be fooled a little if there */
+ base_3[z] += ncod[id]; /* non translatable codons, but these */
+ /* are ignored when the avg is calc */
+ totalaa += ncod[id];
+
+ if (*(ds + id) == 1)
+ continue; /* if not synon skip codon */
+
+ bases[z] += ncod[id]; /* count no of codons ending in Z */
+
+ tot_s += ncod[id]; /* count tot no of silent codons */
+
+ }
+
+
+ if (!tot_s || !totalaa) {
+ fprintf(pm->my_err, "Warning %.20s appear to be too short\n", title);
+ fprintf(pm->my_err, "No output was written to file \n");
+ return;
+ }
+ switch ((int) which) {
+ case 1: /* exhaustive output for analysis */
+ if (pm->seq_format == 'M') { /* machine readable format */
+ if (!header) { /* print a first line */
+ fprintf(fblkout,
+ "Gene_description%cLen_aa%cLen_sym%cGC%cGC3s%cGCn3s%cGC1%cGC2"
+ "%cGC3%cT1%cT2%cT3%cC1%cC2%cC3%cA1%cA2%cA3%cG1%cG2%cG3\n"
+ ,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp);
+ header = TRUE;
+ }
+ /* now print the information */
+ fprintf(fblkout, "%-.20s%c", title,sp);
+ fprintf(fblkout,
+ "%ld%c%ld%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c"
+ "%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c"
+ "%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f\n",
+ totalaa,sp,
+ tot_s,sp,
+ (lf) (base_tot[2] + base_tot[4]) / (lf) (totalaa * 3),sp,
+ (lf) (bases[2] + bases[4]) / (lf) tot_s,sp,
+ (lf) (base_tot[2] + base_tot[4] - bases[2] - bases[4])
+ / (lf) (totalaa * 3 - tot_s),sp,
+ (lf) (base_1[2] + base_1[4]) / (lf) (totalaa),sp,
+ (lf) (base_2[2] + base_2[4]) / (lf) (totalaa),sp,
+ (lf) (base_3[2] + base_3[4]) / (lf) (totalaa),sp,
+ (lf) base_1[1] / (lf) totalaa,sp,
+ (lf) base_2[1] / (lf) totalaa,sp,
+ (lf) base_3[1] / (lf) totalaa,sp,
+ (lf) base_1[2] / (lf) totalaa,sp,
+ (lf) base_2[2] / (lf) totalaa,sp,
+ (lf) base_3[2] / (lf) totalaa,sp,
+ (lf) base_1[3] / (lf) totalaa,sp,
+ (lf) base_2[3] / (lf) totalaa,sp,
+ (lf) base_3[3] / (lf) totalaa,sp,
+ (lf) base_1[4] / (lf) totalaa,sp,
+ (lf) base_2[4] / (lf) totalaa,sp,
+ (lf) base_3[4] / (lf) totalaa);
+ } else { /* must be human formatted output then*/
+ fprintf(fblkout, /* tabulated output */
+ "Gene Name: %-69.69s\nLength : %-ld aa"
+ " \tNon_synonymous/synonymous codons (%3ld/%5ld)\n"
+ " GC=%5.3f\tGC3s=%5.3f\tGC_not_GC3s=%5.3f\n"
+ "base\t1\t2\t3\ttotal\t\t1\t2\t3 \ttotal\n"
+ " T\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t"
+ "W\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n"
+ " C\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t"
+ "S\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n"
+ " A\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t"
+ "R\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n"
+ " G\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t"
+ "Y\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n\n",
+ title,
+ totalaa,
+ totalaa - tot_s,
+ tot_s,
+ (lf) (base_tot[2] + base_tot[4]) / (lf) (totalaa * 3),
+ (lf) (bases[2] + bases[4]) / (lf) tot_s,
+ (lf) (base_tot[2] + base_tot[4] - bases[2] - bases[4])
+ / (lf) (totalaa * 3 - tot_s),
+ (lf) base_1[1] / (lf) totalaa, (lf) base_2[1] / (lf) totalaa,
+ (lf) base_3[1] / (lf) totalaa,
+ (lf) base_tot[1] / (lf) (totalaa * 3),
+ (lf) (base_1[1] + base_1[3]) / (lf) totalaa,
+ (lf) (base_2[1] + base_2[3]) / (lf) totalaa,
+ (lf) (base_3[1] + base_3[3]) / (lf) totalaa,
+ (lf) (base_tot[1] + base_tot[3]) / (lf) (totalaa * 3),
+ (lf) base_1[2] / (lf) totalaa, (lf) base_2[2] / (lf) totalaa,
+ (lf) base_3[2] / (lf) totalaa,
+ (lf) base_tot[2] / (lf) (totalaa * 3),
+ (lf) (base_1[2] + base_1[4]) / (lf) totalaa,
+ (lf) (base_2[2] + base_2[4]) / (lf) totalaa,
+ (lf) (base_3[2] + base_3[4]) / (lf) totalaa,
+ (lf) (base_tot[2] + base_tot[4]) / (lf) (totalaa * 3),
+ (lf) base_1[3] / (lf) totalaa, (lf) base_2[3] / (lf) totalaa,
+ (lf) base_3[3] / (lf) totalaa,
+ (lf) base_tot[3] / (lf) (totalaa * 3),
+ (lf) (base_1[3] + base_1[4]) / (lf) totalaa,
+ (lf) (base_2[3] + base_2[4]) / (lf) totalaa,
+ (lf) (base_3[3] + base_3[4]) / (lf) totalaa,
+ (lf) (base_tot[3] + base_tot[4]) / (lf) (totalaa * 3),
+ (lf) base_1[4] / (lf) totalaa, (lf) base_2[4] / (lf) totalaa,
+ (lf) base_3[4] / (lf) totalaa,
+ (lf) base_tot[4] / (lf) (totalaa * 3),
+ (lf) (base_1[1] + base_1[2]) / (lf) totalaa,
+ (lf) (base_2[1] + base_2[2]) / (lf) totalaa,
+ (lf) (base_3[1] + base_3[2]) / (lf) totalaa,
+ (lf) (base_tot[1] + base_tot[2]) / (lf) (totalaa * 3));
+ /* What hit me, did anyone see a bus */
+ }
+ break;
+ case 2: /* a bit more simple ... GC content */
+ fprintf(foutput, "%5.3f%c", (lf) ((base_tot[2] + base_tot[4]) / (lf)
+ (totalaa * 3)),sp);
+ break;
+ case 3: /* GC3s */
+ fprintf(foutput, "%5.3f%c", (lf) (bases[2] + bases[4]) /
+ (lf) tot_s,sp);
+ break;
+ case 4: /* Number of synonymous codons */
+ fprintf(foutput, "%3li%c", tot_s,sp);
+ break;
+ case 5: /* Total length in translatable AA */
+ fprintf(foutput, "%3li%c", totalaa,sp);
+ break;
+
+#ifdef DEBUG
+ default:
+ fprintf(stderr, " Programming error in GC_out which (%i) is out of "
+ "valid range\n"
+ ,(int) which);
+ my_exit(99, "gc out");
+ break;
+#endif
+ }
+ return;
+}
+
+/******************** DOT ******************************************/
+/* Indicates the progress of a search */
+/**************************************************************************/
+
+void dot(int y, long int period)
+{
+ static long int xx;
+ static char dott=0;
+
+ if (!y) dott = 0; /* re-zero the width counter */
+
+ if (++xx % period == 0){ /* every period calls print a . */
+ fprintf(stderr,".");
+ dott++;
+ }
+ if ( dott == 50) { /* every 50 dots wrap the line */
+ fprintf(stderr,"\n");
+ dott=0;
+ }
+ return;
+}
+/********************** get_aa *****************************************/
+/* get_aa converts a numeric codon value (range 0-64 ) into a amino acid */
+/* and returns that amino acid number */
+/* pcu->ca converts the codon number into amino acid number */
+/* paa->aa1 converts amino acid code into letters */
+/***************************************************************************/
+
+char *get_aa(int which, char *codon)
+{
+ char *amino=NULL;
+
+ if (strlen(codon) == 3) {
+ if (which == 1)
+ amino = paa->aa1[pcu->ca[ident_codon(codon)]];
+ else
+ amino = paa->aa3[pcu->ca[ident_codon(codon)]];
+ } else {
+ amino = amino;
+ amino = paa->aa1[0];
+ }
+ return amino;
+}
+/********************** cutab_out ***********************************/
+/* Generates a formatted table of codon, RSCU and amino acid usage */
+/* ds points to an array[64] of synonymous values */
+/* it reveals how many synonyms there are for each aa */
+/**************************************************************************/
+int cutab_out(FILE * fblkout, long *nncod, long *nnaa)
+{
+ int last_row[4];
+ int x;
+ char sp;
+
+ if (pm->seq_format=='M')
+ sp = pm->seperator;
+ else
+ sp = '\t';
+
+ for (x = 0; x < 4; x++)
+ last_row[x] = 0;
+
+ codon_tot = codon_error(1, 1, "", (char) 4); /* dummy*/
+
+ for (x = 1; x < 65; x++) {
+ if (last_row[x % 4] != pcu->ca[x]){
+ (pm->seq_format=='M')?
+ fprintf(fblkout, "%s%c%s%c", paa->aa3[pcu->ca[x]], sp, paa->cod[x], sp):
+ fprintf(fblkout, "%s %s" , paa->aa3[pcu->ca[x]], paa->cod[x]);
+ }
+ else{
+ (pm->seq_format=='M')?
+ fprintf(fblkout, "%c%s%c", sp, paa->cod[x], sp):
+ fprintf(fblkout, " %s", paa->cod[x]);
+ }
+/* Sample of output *******************************************************/
+/*Phe UUU 0 0.00 Ser UCU 1 0.24 Tyr UAU 1 0.11 Cys UGU 1 0.67 */
+/* UUC 22 2.00 UCC 10 2.40 UAC 17 1.89 UGC 2 1.33 */
+/*Leu UUA 0 0.00 UCA 1 0.24 TER UAA 0 0.00 TER UGA 1 3.00 */
+/* UUG 1 0.12 UCG 6 1.44 UAG 0 0.00 Trp UGG 4 1.00 */
+/**************************************************************************/
+ (pm->seq_format=='M')?
+ fprintf(fblkout, "%i%c%.2f%c",
+ (int) nncod[x],
+ sp, (nncod[x]) ?
+ ((float) nncod[x] / (float) nnaa[pcu->ca[x]])
+ * (float) (*(ds + x)):0,sp): /* end of fprintf */
+ fprintf(fblkout, "%5i%5.2f ",
+ (int) nncod[x],
+ (nncod[x]) ?
+ ((float) nncod[x] / (float) nnaa[pcu->ca[x]])
+ * (float) (*(ds + x)):0); /* end of fprintf */
+
+ last_row[x % 4] = pcu->ca[x];
+
+ if (!(x % 4))
+ fprintf(fblkout, "\n");
+ if (!(x % 16))
+ fprintf(fblkout, "\n");
+ }
+ fprintf(fblkout, "%li codons in %16.16s (used %22.22s)\n\n",
+ (long int) codon_tot, title, pcu->des);
+ return 1;
+}
+/******************** Dinuc_count *************************************/
+/* Count the frequency of all 16 dinucleotides in all three possible */
+/* reading frames. This one of the few functions that does not use the */
+/* codon and amino acid usage arrays ncod and naa to measure the parameter*/
+/* rather they use the raw sequence data */
+/**************************************************************************/
+int dinuc_count(char *seq, long int ttot)
+{
+ static char a = 0;
+ int i;
+
+ for (i = 0; i < ttot; i++) {
+ last_base = a;
+ switch (seq[i]) {
+ case 't':
+ case 'T':
+ case 'u':
+ case 'U':
+ a = 1;
+ break;
+ case 'c':
+ case 'C':
+ a = 2;
+ break;
+ case 'a':
+ case 'A':
+ a = 3;
+ break;
+ case 'g':
+ case 'G':
+ a = 4;
+ break;
+ default:
+ a = 0;
+ break;
+ }
+ if (!a || !last_base)
+ continue; /* true if either of the base is not */
+ /* a standard UTCG, or the current bas*/
+ /* is the start of the sequence */
+ din[fram][((last_base - 1) * 4 + a) - 1]++;
+ if (++fram == 3) fram = 0; /* resets the frame to zero */
+ }
+ return 1;
+}
+/***************** Dinuc_out ************************************/
+/* Outputs the frequency of dinucleotides, either in fout rows per seq */
+/* if the output is meant to be in a human readable form, each row repre- */
+/* senting a reading frame. The fourth row is the total of the all the */
+/* reading frames. Machine readable format writes all the data into a */
+/* single row */
+/**************************************************************************/
+int dinuc_out(FILE * fblkout, char *ttitle)
+{
+ static char called = FALSE;
+ char bases[5] = {'T', 'C', 'A', 'G'};
+ char sp = pm->seperator;
+ long dinuc_tot[4];
+ int i,x,y;
+
+
+ for ( x=0 ; x<4 ; x ++) dinuc_tot[x]=0;
+
+
+ for ( x=0 ; x<3 ; x++ )
+ for ( i=0 ; i<16 ; i++ ){
+ dinuc_tot[x]+=din[x][i]; /* count dinuc usage in each frame */
+ dinuc_tot[3]+=din[x][i]; /* and total dinuc usage, */
+ }
+
+ if (pm->seq_format=='H' ) sp = ' ';
+
+ if (!called) { /* write out the first row as a header*/
+ called = TRUE;
+
+ if (pm->seq_format=='H' ) {
+ fprintf(fblkout,"%-13.13s%cframe%c","title", sp,sp);
+ for (x = 0; x < 4; x++)
+ for (i = 0; i < 4; i++)
+ fprintf(fblkout,"%c%c%4.4c",bases[x],bases[i],sp);
+ }else{
+ fprintf(fblkout, "%s","title");
+ for (y = 0; y < 4; y ++){
+ fprintf(fblkout, "%c%s",sp,"frame");
+ for (x = 0; x < 4; x++)
+ for (i = 0; i < 4; i++)
+ fprintf(fblkout,"%c%c%c",sp, bases[x],bases[i]);
+ }
+ }
+ fprintf(fblkout, "\n");
+ } /* matches if (!called) */
+
+/*Sample output truncated **********************************************/
+/*title frame TT TC TA TG CT CC CA CG AT */
+/*MLSPCOPER.PE1__ 1:2 0.024 0.041 0.016 0.008 0.049 0.041 0.033 0.098 ... */
+/*MLSPCOPER.PE1__ 2:3 0.000 0.195 0.000 0.098 0.000 0.138 0.008 0.073 ... */
+/*MLSPCOPER.PE1__ 3:1 0.008 0.016 0.000 0.033 0.033 0.107 0.172 0.262 ... */
+/*MLSPCOPER.PE1__ all 0.011 0.084 0.005 0.046 0.027 0.095 0.071 0.144 ... */
+/*MLSPCOPER.PE2__ 1:2 0.026 0.026 0.009 0.009 0.053 0.035 0.053 0.061 ... */
+/**************************************************************************/
+ for (x = 0; x < 4; x++) {
+ if ( pm->seq_format == 'H' || x == 0 )
+ fprintf(fblkout, (pm->seq_format=='H') ?
+ "%-15.15s%c":"%-.15s%c", ttitle, sp);
+
+ switch (x) {
+ case 0:
+ fprintf(fblkout, "1:2%c", sp);
+ break;
+ case 1:
+ fprintf(fblkout, "2:3%c", sp);
+ break;
+ case 2:
+ fprintf(fblkout, "3:1%c", sp);
+ break;
+ case 3:
+ fprintf(fblkout, "all%c", sp);
+ break;
+ }
+
+ if ( x == 3 ){
+ for (i = 0; i < 16; i++)
+ if ( dinuc_tot[x] )
+ fprintf(fblkout,"%5.3f%c",
+ (float)(din[0][i]+din[1][i]+din[2][i])/
+ (float)dinuc_tot[x], sp);
+ else
+ fprintf(fblkout,"%5.3f%c",0.00, sp);
+ }
+ else{
+ for (i = 0; i < 16; i++)
+ if ( dinuc_tot[x] )
+ fprintf(fblkout, "%5.3f%c",
+ (float) din[x][i]/(float)dinuc_tot[x], sp);
+ else
+ fprintf(fblkout,"%5.3f%c", 0.00, sp);
+ }
+
+ if ( pm->seq_format == 'H' || x == 3)
+ fprintf(fblkout, "\n");
+ }
+ return 1;
+}
+/************* Coa_raw_out *************************************/
+/* Write out codon usage in a format compatible with the format required */
+/* by text2bin, i.e. part of the COA analysis suite of subroutines */
+/* rather than storing this data in memory, we first write raw codon usage*/
+/* to disk, and then read it in as necessary, the file handle for this */
+/* data is passed via the fcoaout pointer. By default it writes to the */
+/* files coa_raw and coa1_raw */
+/**************************************************************************/
+char coa_raw_out(FILE * fcoaout, long *nncod, long *nnaa, char *ttitle)
+{
+
+ static int count = 0;
+ int i;
+
+ for (i = 0; i < (int) strlen(ttitle); i++) /* don't take any chances */
+ if (isspace( (int) *(ttitle + i))) *(ttitle + i) = '_';
+
+ strncpy(pm->junk, ttitle, 20); /* sequence name */
+ fprintf(fcoaout, "%i_%s ", ++count, pm->junk);
+
+ switch (pm->coa) {
+ case 'c':
+ case 'r': /* if rscu or codon usage */
+ for (i = 1; i < 65; i++)
+ fprintf(fcoaout, "%i\t", (int) nncod[i]);
+ fprintf(fcoaout, "\n");
+ break;
+ case 'a': /* if amino acid usage */
+ for (i = 1; i < 22; i++)
+ fprintf(fcoaout, "%i\t", (int) nnaa[i]);
+ fprintf(fcoaout, "\n");
+ break;
+#ifdef DEBUG /* Debugging code */
+ default:
+ fprintf(pm->my_err, " Error in coa_out_raw\n");
+#endif
+ }
+ return 1;
+}
+/********** sorted_by_axis1 *******************************************/
+/* COA specific routine, after the position of the genes on the first axis*/
+/* has been computed the genes are sorted according to there ordination */
+/* this allows us to identify gene positioned at either end of the first */
+/* trend. Then the codon usage of these genes is used to determine the CU */
+/* of these two groups. This information is used to identify optimal codon*/
+/* calculated putative CAI adaptive values and for the Chi squared con- */
+/* tingency test, used to identify the optimal and non-optimal codons */
+/* The position of each gene on axis 1 is passed via the ax1 pointer */
+/* The integer rank of each sequence is stored in sortax1 */
+/* The number of genes is passed by the integer value lig */
+/**************************************************************************/
+void sorted_by_axis1(double *ax1, int *sortax1, int lig)
+{
+ double min;
+ int nmin, *tagged;
+ int i,j;
+
+ /* allocated an array such that we can record which genes have been */
+ /* processed already, and are in sortax1 */
+ if ((tagged = (int *) calloc(lig + 1, sizeof(int))) == NULL)
+ my_exit(3, "sorted by axis 1");
+
+ /* blank the array, shouldn't have to do this for ANSI C compilers */
+ for (i = 1; i <= lig; i++)
+ tagged[i] = FALSE;
+
+ /* for each gene */
+ for (j = 1; j <= lig; j++) {
+ i = 0;
+ while (tagged[++i]); /* find the first gene not in sortax1 */
+ min = ax1[i]; /* assign it value to min */
+ nmin = i; /* assign it ordination to nmin */
+
+ for (i = 1; i <= lig; i++) { /* for each gene */
+ if (tagged[i]) continue; /* gene is already in sortax1 .. next */
+ if (ax1[i] < min) { /* find the min value among the rest */
+ min = ax1[i]; /* assign it value to min */
+ nmin = i; /* assign it ordination to nmin */
+ }
+ }
+ sortax1[j] = nmin; /* gene with lowest ax1 position is */
+ tagged[nmin] = TRUE; /* assigned to sorax1 and tagged */
+ }
+ free(tagged);
+}
+/*********** gen_cusort_fop ******************************/
+/* COA specific routine, takes the sorted array of axis 1 positions from */
+/* sort_by_axis1 and passed via the sortax1 pointer. The array contains */
+/* the genes in order of occurrence in the original input file, but the */
+/* ranked order of each gene is recorded as the array value */
+/* This allows us to identify genes position at either end of the main */
+/* trend. Then the codon usage of these genes is used to write out a file */
+/* with the genes in a axis1 position order */
+/* the codon usage of the two groups at either end of the principle axis */
+/* are also counted. This information is then passed to highlow() */
+/* The position of each gene on axis 1 is passed via the ax1 pointer */
+/* The integer rank of each sequence is stored in sortax1 */
+/* The number of genes is passed by the interger value lig */
+/**************************************************************************/
+void gen_cusort_fop(int *sortax1, int lig, FILE * fnam, FILE *ssummary)
+{
+ int stops;
+ long int *low, *high;
+ int min, max, i ;
+ float v2;
+ FILE *fcusort = NULL;
+ int j;
+
+
+ /* first open the original raw codon usage file */
+ if ((fcusort = open_file("", "cusort.coa", "w", FALSE)) == NULL)
+ my_exit(1, "gen_cusort_fop");
+
+ /* calloc enough memory for the codon usage of the low group of genes */
+ if ((low = (long int *) calloc(65, sizeof(long int))) == NULL)
+ my_exit(3, "low gen_cusort_fop");
+ /* calloc enought memory for the codon usage of the high group of genes*/
+ if ((high = (long int *) calloc(65, sizeof(long int))) == NULL)
+ my_exit(3, "high gen_cusort_fop");
+
+ /*pcoa->fop_gene is set in the advanced correspondence menu and is used*/
+ /*to set the No of genes at either end of the principle axis that are */
+ /*to be used to create the low and high codon bias subsets of genes */
+ if (pcoa->fop_gene < 0) { /* the number represent a percentage */
+ min = (int) ((float) lig * ((float) pcoa->fop_gene * -0.01));
+ max = lig - (int) ((float) lig * ((float) pcoa->fop_gene * -0.01));
+ } else { /* the value is an absolute number */
+ min = pcoa->fop_gene;
+ max = lig - pcoa->fop_gene;
+ }
+
+ if (min <= 0) { /* error catch in case % is too low */
+ min = 1; /* or fop_gene is set too high */
+ fprintf(pm->my_err, "Problems with the number genes used for"
+ " fop adjusting to 1 gene\n");
+ }
+ if (max <= 0) { /* ditto */
+ max = 1;
+ fprintf(pm->my_err, "Problems with the number genes used for"
+ " fop adjusting to one gene\n");
+ }
+ for (j = 1; j < 65; j++) { /* initialise the blank array */
+ low[j] = 0;
+ high[j] = 0;
+ }
+
+ /* write explanation about what we are doing to summary.coa */
+ fprintf(ssummary, "\ncusort.coa (not shown here) contains CU of "
+ "genes sorted by their\n"
+ "ordination on the principle axis or factor\n"
+ "Genes used to calculate fop were 1 to %i and %i to %i\n"
+ "these gene numbers REFER ONLY to the file cusort.coa\n"
+ ,min, max + 1, pcoa->rows);
+
+ for (i = 1; i <= lig; i++) { /* foreach gene */
+ rewind(fnam); /* go to start of codon_raw */
+ clean_up(ncod, naa); /* blank the codon usage array */
+ j = 1;
+ while (j++ != sortax1[i]) /* find the rank of gene i */
+ fgets(pm->junk, BUFSIZ,fnam);/* by scanning for lines of CU in */
+ fscanf(fnam, "%s", pm->junk); /* now we know the name of seq i */
+
+ for (j = 1; j < 64; j++) { /* now read in the cu of each codon */
+ fscanf(fnam, "%f", &v2); /* assign it initially to v2 */
+ ncod[j] = (long int) v2; /* then place this value in ncod */
+ if (min >= i) /* remember the codon usage of the */
+ low[j] += (long int) v2; /* two groups of genes at either end */
+ if (max < i) /* of the axis, containing min and */
+ high[j] += (long int) v2; /* max genes */
+ }
+
+ fscanf(fnam, "%f\n", &v2); /* now read the last codon in */
+ ncod[64] = (long int) v2;
+ if (min >= i)
+ low[64] += (long int) v2;
+ if (max < i)
+ high[64] += (long int) v2; /* as above */
+
+ /* we want to use codon_us_out to write out the sorted list of CU */
+ /* to cusort.coa. But if we have any internal stops etc, it will */
+ /* generate error messages, but we have already seen this messages */
+ /* on the first pass, so we fool it by saying all the stops are */
+ /* valid stops and not to complain again */
+ for (j = 1, stops = 0; j < 65; j++)
+ if (pcu->ca[j] == 11)
+ stops += (int) ncod[j];
+ dot( 1 , 10 );
+ codon_usage_out(fcusort, ncod, 11, stops, pm->junk);
+ }
+ fileclose(&fcusort);
+ highlow(low, high, ssummary); /* now we call highlow */
+ /* to use the sorted cu output */
+ free(low); /* release the memory to the OS */
+ free(high);
+}
+
+/************ highlow ********************************************/
+/* The codon usage of the two groups on either end of the axis is assigned*/
+/* to low and high ... perhaps these would be better called left and right*/
+/* as when they are passed to this function it is not know which group is */
+/* lowly or highly biased. This is decided within highlow, by calculating */
+/* the enc (a measure of bias) for each group and assigning the group with*/
+/* the lowest enc as the higher biased genes. This works if the trend */
+/* represented by axis1 is truly selection for optimal translation */
+/* IT'S THE USERS RESPONSIBILITY TO ASSERTAIN IF THIS IS VALID */
+/* This information is used to identify optimal codons, as well as */
+/* calculate putative CAI adaptive values and for the Chi squared con- */
+/* tingency test, used to identify the optimal and non-optimal codons */
+/**************************************************************************/
+
+void highlow(long int *low, long int *high, FILE * ssummary)
+{
+
+ int *last_row, icode, outer,i,j,x ;
+
+ long int *aa_low, *aa_high, *left, *right, *left_aa, *right_aa;
+ long int *highest_x;
+ long int right_tot = 0, left_tot = 0;
+
+ float enc_low, enc_high;
+ float a, b, c, d, e, f, g, h, total, hr, br, *x2;
+ float w;
+ char *flag, sp;
+
+ FILE *fcai=NULL,*fhilo = NULL, *ffop = NULL;
+ FILE *fcbi=NULL;
+
+ /*calloc to the pointers the required storage */
+ if ((fhilo = open_file("", "hilo.coa", "w", FALSE)) == NULL)
+ my_exit(1, "hilo.coa");
+ if ((ffop = open_file("", "fop.coa", "w", FALSE)) == NULL)
+ my_exit(1, "fop.coa");
+ if ((aa_low = (long int *) calloc(22, sizeof(long int))) == NULL)
+ my_exit(3, "aa_low");
+ if ((aa_high = (long int *) calloc(22, sizeof(long int))) == NULL)
+ my_exit(3, "aa_high");
+ if ((highest_x = (long int *) calloc(22, sizeof(long int))) == NULL)
+ my_exit(3, "last_row");
+ if ((x2 = (float *) calloc(65, sizeof(float))) == NULL)
+ my_exit(3, "x2");
+ if ((flag = (char *) calloc(65, sizeof(char))) == NULL)
+ my_exit(3, "flag");
+ if ((last_row = (int *) calloc(65, sizeof(int))) == NULL)
+ my_exit(3, "last_row");
+
+
+ if (pm->seq_format=='M')
+ sp = pm->seperator;
+ else
+ sp = '\t';
+
+ /* initialize the various arrays */
+ for (x = 0; x < 4; x++) last_row[x] = 0;
+
+ for (x = 0; x < 22; x++){
+ highest_x[x]=0;
+ aa_low [x]=0;
+ aa_high [x]=0;
+ }
+ for (x = 0; x <65 ; x++) {
+ x2 [x]= (float) 0.0;
+ flag [x]=0;
+ last_row[x]=0;
+ }
+
+
+ /*count the amino acid usage for the two datasets, initially we only */
+ /*have the codon usage of the two groups */
+ for (i = 1; i < 65; i++) {
+ aa_low[pcu->ca[i]] += low[i];
+ aa_high[pcu->ca[i]] += high[i];
+ flag[i] = ' '; /*flag is used to identify opt codons */
+ }
+
+ enc_low = enc_out(fhilo, low, aa_low); /*calc enc for each of */
+ enc_high = enc_out(fhilo, high, aa_high); /*datasets */
+ fprintf(fhilo, "\n");
+
+ fprintf(ssummary, "\nenc_left %f enc_right %f\n", enc_low, enc_high);
+
+ for (i = 1; i < 65; i++) {
+ if (*(ds + i) == 1 || pcu->ca[i] == 11) /*skip stop and nonsynon*/
+ continue;
+
+ if (enc_low < enc_high) { /*decide which is more */
+ left = low; /*biased */
+ right = high; /*left and right refer */
+ left_aa = aa_low; /*the columns of outputed*/
+ right_aa = aa_high; /*hilow table */
+ a = (float) low[i];
+ b = (float) high[i];
+ g = (float) aa_low[pcu->ca[i]];
+ h = (float) aa_high[pcu->ca[i]];
+ } else {
+ left = high;
+ right = low;
+ left_aa = aa_high;
+ right_aa = aa_low;
+ a = (float) high[i];
+ b = (float) low[i];
+ g = (float) aa_high[pcu->ca[i]];
+ h = (float) aa_low[pcu->ca[i]];
+ }
+ /* calculate the chi squared contingency value */
+ c = g - a;
+ d = h - b;
+ e = a + b;
+ f = c + d;
+ total = a + b + c + d;
+ if (e * f * h * g)
+ x2[i] = ((a * d - c * b) * (a * d - c * b)) * total / (e * f * g * h);
+ else
+ x2[i] = (float) -99.0; /*if 0 assign nonsense value*/
+
+ if (g * h) {
+ hr = a / g;
+ br = b / h;
+ if (hr > br && x2[i] > 6.635) /* if significant at p<.99 */
+ flag[i] = '*';
+ else if (hr > br && x2[i] > 3.841) /* if significant at p<0.05 */
+ flag[i] = '@';
+ }
+ }
+ fprintf(ssummary, "Chi squared contingency test of genes from both\n"
+ "extremes of axis 1\n");
+/* this created the hi-low codon usage table */
+/* Sample output truncated (***********************************************/
+/*Asp GAU 0.10 ( 10) 1.68 ( 53) Gly GGU 0.21 ( 12) 0.85 ( 11) */
+/* GAC* 1.90 (184) 0.32 ( 10) GGC* 3.13 (176) 2.00 ( 26) */
+/*Glu GAA 0.00 ( 0) 1.34 ( 55) GGA 0.05 ( 3) 0.69 ( 9) */
+/* GAG* 2.00 (255) 0.66 ( 27) GGG 0.60 ( 34) 0.46 ( 6) */
+/* */
+/* */
+/* Number of codons in high bias dataset 2825 */
+/* Number of codons in low bias dataset 1194 */
+/*Note: high bias was assigned to the dataset with the lower average Nc */
+/*NO Chi could be calculated for UGU */
+/*Codon UUC (Phe) chi value was 70.175 */
+/*Codon UCC (Ser) chi value was 48.030 */
+/*Codon UAC (Tyr) chi value was 86.069 */
+/**************************************************************************/
+
+ for (outer = 1; outer <= 3; outer += 2) {
+ for (x = 1; x < 5; x++) {
+ for (j = 1; j < 5; j++) {
+ icode = ((x - 1) * 16) + ((j - 1) * 4) + outer;
+
+
+ for (i = icode; i <= icode + 1; i++) { /*loop twice */
+ /* if the previous entry in this column codes for the same AA */
+ if (last_row[i % 2] != pcu->ca[i]) {
+ fprintf(fhilo, "%s%c%s%c%c", paa->aa3[pcu->ca[i]],
+ sp, paa->cod[i], flag[i], sp);
+ fprintf(ssummary, "%s%c%s%c%c", paa->aa3[pcu->ca[i]],
+ sp, paa->cod[i], flag[i], sp);
+ } else {
+ fprintf(fhilo, "%c%s%c%c", sp, paa->cod[i], flag[i], sp);
+ fprintf(ssummary, " %c%s%c%c",sp,paa->cod[i],flag[i],sp);
+ }
+ /* write out Codon usage, RSCU and significance for both data */
+ fprintf(fhilo, "%4.2f (%3i) %4.2f (%3i)%c",
+ (left[i]) ?
+ ((float) left[i] / (float) left_aa[pcu->ca[i]])
+ * (float) (*(ds + i))
+ : 0.0,
+ (int) left[i],
+ (right[i]) ?
+ ((float) right[i] / (float) right_aa[pcu->ca[i]])
+ * (float) (*(ds + i))
+ : 0.0,
+ (int) right[i],sp); /* end of fprintf */
+ fprintf(ssummary, "%4.2f (%3i) %4.2f (%3i)%c",
+ (left[i]) ?
+ ((float) left[i] / (float) left_aa[pcu->ca[i]])
+ * (float) (*(ds + i))
+ : 0.0,
+ (int) left[i],
+ (right[i]) ?
+ ((float) right[i] / (float) right_aa[pcu->ca[i]])
+ * (float) (*(ds + i))
+ : 0.0,
+ (int) right[i],sp); /* end of fprintf */
+ last_row[i % 2] = pcu->ca[i]; /* remember the last row */
+ }
+ fprintf(fhilo, "\n");
+ fprintf(ssummary, "\n");
+ }
+ fprintf(ssummary, "\n");
+ fprintf(fhilo, "\n");
+ }
+ fprintf(ssummary, "\n");
+ fprintf(fhilo, "\n");
+ }
+
+ for (i = 1; i < 65; i++) { /* count both datasets */
+ right_tot += right[i];
+ left_tot += left[i];
+ }
+
+
+ fprintf(fhilo,
+ "\tNumber of codons in high bias dataset %li\n", left_tot);
+ fprintf(fhilo,
+ "\tNumber of codons in low bias dataset %li\n", right_tot);
+ fprintf(fhilo,
+ "Note: high bias was assigned to the dataset with the lower"
+ " average Nc\n");
+
+ fprintf(ssummary,
+ "\tNumber of codons in high bias dataset %li\n", left_tot);
+ fprintf(ssummary,
+ "\tNumber of codons in low bias dataset %li\n", right_tot);
+ fprintf(ssummary,
+ "Note high bias was assigned to the genes with the lower"
+ " overall Nc\n");
+
+ /* now printout the Chi Squared values for each significant comparison */
+ for (i = 1; i < 65; i++) {
+ if (flag[i] == '*' || flag[i] == '@') {
+ fprintf(fhilo, "Codon %s (%s) chi value was %.3f\n", paa->cod[i],
+ paa->aa3[pcu->ca[i]], x2[i]);
+ fprintf(ssummary, "Codon %s (%s) chi value was %.3f\n", paa->cod[i],
+ paa->aa3[pcu->ca[i]], x2[i]);
+ }
+ if (x2[i] == -99) /* there were no codons in one of the groups*/
+ fprintf(fhilo, "NO Chi could be calculated for %s\n", paa->cod[i]);
+ }
+ fprintf(fhilo, "\n");
+ fprintf(ssummary, "\n");
+
+ /* now write out the optimal codons as PUTATIVELY identified by codonW */
+ fprintf(ssummary, "These are the PUTATIVE optimal codons\n"
+ "This is the format required for Menu 4 option 2 (Fop) "
+ "and option 3 (CBI)\n"
+ "This data is also duplicated in the files \"fop.coa\" "
+ "and \"cbi.coa\"\n"
+ "The format of these files is that required for input "
+ "as a personal choice\n"
+ "of optimal codons for these indexes\n");
+
+ for (i = 1; i < 65; i++) {
+ if( left[i] > highest_x[pcu->ca[i]]) /* used for calculating CAI */
+ highest_x[pcu->ca[i]]=left[i];
+
+ if (*(ds + i) == 1 || pcu->ca[i] == 11) {
+ fprintf(ffop, "2");
+ fprintf(ssummary, "2");
+ } else if (flag[i] == '*') {
+ fprintf(ffop, "3");
+ fprintf(ssummary, "3");
+ } else if (((left[i]) ?
+ ((float) left[i] / (float) left_aa[pcu->ca[i]])
+ * (float) (*(ds + i))
+ : 0.0) < 0.1) { /* if RSCU <0.1 its rare */
+ fprintf(ffop, "1");
+ fprintf(ssummary, "1");
+ } else {
+ fprintf(ffop, "2");
+ fprintf(ssummary, "2");
+ }
+
+ if (!(i % 16)) { /* handle line wrapping */
+ fprintf(ffop, "\n");
+ fprintf(ssummary, "\n");
+ } else {
+ fprintf(ffop, ",");
+ fprintf(ssummary, ",");
+ }
+ }
+ fileclose(&ffop); /* close the Fop file */
+
+ if ((fcbi = open_file("", "cbi.coa", "w", FALSE)) == NULL)
+ my_exit(1, "cbi.coa"); /* open cbi.coa */
+
+ for (i = 1; i < 65; i++) { /* write values 2 cbi.coa*/
+
+ if (flag[i] == '*') /* Only report optimal codons */
+ fprintf(fcbi, "3");
+ else
+ fprintf(fcbi, "2"); /* ignore non optimal codons */
+
+ if (!(i % 16))
+ fprintf(fcbi, "\n");
+ else
+ fprintf(fcbi, ",");
+
+ }
+
+ fileclose(&fcbi);
+
+ fprintf(ssummary, "\n\n");
+
+ /* now calculate and write out CAI adaptiveness values */
+ fprintf(ssummary, "These are PUTATIVE CAI adaptiveness values "
+ "identified by this programme\n"
+ "This data is also duplicated in the file \"cai.coa\"\n"
+ "The format of this file is compatible with the format\n"
+ "of the file used to input a personal selection of CAI values\n"
+ "That is, the format required for Menu 4 option 1\n"
+ "cai.coa\tinput file to be used for CAI calculations\n"
+ "\n\nCod AA Xi\tWi\t\tCod AA Xi\tWi\n");
+
+
+ if ((fcai = open_file("", "cai.coa", "w", FALSE)) == NULL)
+ my_exit(1, "cai.coa");
+
+ for (i = 1, x = TRUE ; i < 65 && x ; i++) {
+
+ /* if a stop or a non-synonymous codon w = 1 */
+ if (*(ds + i) == 1 || pcu->ca[i] == 11) {
+ fprintf(fcai, "1.0000000 \n");
+ fprintf(ssummary,"%s %s %6.1f %9.7f\t",
+ paa->cod[i],
+ paa->aa3[pcu->ca[i]],
+ (float) left[i], 1.0000000);
+ } else if ( highest_x[pcu->ca[i]] ) {
+
+ /* if a codon is absent then adjust its frequecy to 0.5 */
+ if ( left[i] )
+ w= (float) left[i]/ (float) highest_x[pcu->ca[i]];
+ else
+ w= (float) 0.5 / (float) highest_x[pcu->ca[i]];
+ fprintf(fcai, "%9.7f \n", w); /* output CAI W */
+ fprintf(ssummary,"%s %s %6.1f %9.7f\t",
+ paa->cod[i], paa->aa3[pcu->ca[i]],
+ (left[i]) ? (float) left[i]:0.5 , w);
+ /* either strange amino acid composition or data sets where too small */
+ } else {
+ fprintf(pm->my_err,
+ "WARNING An attempt to calculate CAI relative "
+ "adaptivnesss FAILED\n no %s amino acids found"
+ " in the high bias dataset \n",paa->aa3[pcu->ca[i]]);
+ fprintf(ssummary,
+ "\nWARNING An attempt to calculate CAI relative adaptiveness "
+ "FAILED\n no %s amino acids found in the high bias dataset \n",
+ paa->aa3[pcu->ca[i]]);
+ x=FALSE;
+ }
+ if( !(i%2)) fprintf (ssummary , "\n");
+ } /* matches for (i = 1, x = TRUE ; i < 65 && x ; i++) */
+
+ fileclose(&fcai); /* close files */
+ fileclose(&fhilo);
+ free(aa_low); /* free memory */
+ free(aa_high);
+ free(highest_x);
+ free(x2);
+ free(flag);
+ free(last_row);
+ return;
+}
+/********************* hydro_out **********************************/
+/* The general average hydropathicity or (GRAVY) score, for the hypothet- */
+/* ical translated gene product. It is calculated as the arithmetic mean */
+/* of the sum of the hydropathic indices of each amino acid. This index */
+/* was used to quantify the major COA trends in the amino acid usage of */
+/* E. coli genes (Lobry, 1994). */
+/* Calculates and outputs total protein hydropathicity based on the Kyte */
+/* and Dolittle Index of hydropathicity (1982) */
+/* nnaa Array with frequency of amino acids */
+/* paa points to a struct containing Amino Acid values */
+/* pap->hydro Pointer to hydropathicity values for each AA */
+/**************************************************************************/
+int hydro_out(FILE * foutput, long int *nnaa)
+{
+ long int a2_tot = 0;
+ float hydro = (float) 0.0;
+ int i;
+ char sp= (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;
+
+ for (i = 1; i < 22; i++)
+ if (i != 11) a2_tot += nnaa[i];
+
+ if (!a2_tot) { /* whow .. no amino acids what happened */
+ fprintf(pm->my_err, "Warning %.20s appear to be too short\n", title);
+ fprintf(pm->my_err, "No output was written to file \n", title);
+ return 1;
+ }
+
+ for (i = 1; i < 22; i++)
+ if (i != 11)
+ hydro += ((float) nnaa[i] / (float) a2_tot) * (float) pap->hydro[i];
+
+ fprintf(foutput, "%8.6f%c", hydro,sp );
+
+ return 1;
+}
+/**************** Aromo_out ***********************************************/
+/* Aromaticity score of protein. This is the frequency of aromatic amino */
+/* acids (Phe, Tyr, Trp) in the hypothetical translated gene product */
+/* nnaa Array with frequency of amino acids */
+/* paa points to a struct containing Amino Acid values */
+/* pap->aromo Pointer to aromaticity values for each AA */
+/**************************************************************************/
+int aromo_out(FILE * foutput, long int *nnaa)
+{
+ long int a1_tot = 0;
+ float aromo = (float) 0.0;
+ int i;
+ char sp= (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;
+
+ for (i = 1; i < 22; i++)
+ if (i != 11)
+ a1_tot += nnaa[i];
+
+
+ if (!a1_tot) {
+ fprintf(pm->my_err, "Warning %.20s appear to be too short\n", title);
+ fprintf(pm->my_err, "No output was written to file \n", title);
+ return 1;
+ }
+ for (i = 1; i < 22; i++)
+ if (i != 11)
+ aromo += ((float) nnaa[i] / (float) a1_tot) * (float) pap->aromo[i];
+
+ fprintf(foutput, "%8.6f%c", aromo,sp);
+ return 1;
+}
+
+
diff --git a/codons.c b/codons.c
new file mode 100755
index 0000000..309a4db
--- /dev/null
+++ b/codons.c
@@ -0,0 +1,1149 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package */
+/* Copyright (C) 2005 John F. Peden */
+/* This program is free software; you can redistribute */
+/* it and/or modify it under the terms of the GNU General Public License */
+/* as published by the Free Software Foundation; version 2 of the */
+/* License, */
+/* */
+/* This program is distributed in the hope that it will be useful, but */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA. */
+/* */
+/* */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _) */
+/* */
+/* For the latest version and information see */
+/* http://codonw.sourceforge.net */
+/**************************************************************************/
+/* */
+/* ----------------------- Codons.C ------------------------ */
+/* This file contains main() function and drives CodonW. */
+/* */
+/* External subroutines and functions */
+/* clearscr screen clearing Macro defined in CodonW.h */
+/* proc_comm_line process command line arguments */
+/* initilize_point assigns genetic code dependent parameters to structs*/
+/* initilize_coa selects the default codons to exclude from the */
+/* Correspondence Analysis */
+/* main_menu The interactive menu system */
+/* clean_up Re-zeros various internal counters and arrays */
+/* open_file Open files, checks for existing files */
+/* fileclose Closes files and returns a NULL pointer or exits */
+/* textbin Converts codon usage to binary data file */
+/* dot(,X) prints a period every X times it is called */
+/* PrepAFC Prepare for the COA */
+/* DiagoRC This routine generates the COA */
+/* colmout write the output from COA to file */
+/* rowout save as above except records the gene information */
+/* inertialig analyse row inertia and records the results to file */
+/* inertiacol analyse column inertia and record the results */
+/* suprow add supplementary genes into COA */
+/* get_aa converts a three base codon into a 1 or 3 letter AA */
+/* codon_error Called after all codons read, checks data was OK */
+/* rscu_usage_out Write out RSCU */
+/* codon_usage_out Write out Codon Usage */
+/* raau_usage_out Write out normalised amino acid usage */
+/* dinuc_count Count the dinucleotide usage */
+/* dinuc_out Write out dinucleotide usage */
+/* aa_usage_out Write out amino acid usage */
+/* gc_out Writes various analyses of base usage */
+/* cutab_out Write a nice tabulation of the RSCU+CU+AA */
+/* base_sil_us_out Write out base composition at silent sites */
+/* cai_out Write out CAI usage */
+/* cbi_out Write out codon bias index */
+/* fop_out Write out Frequency of Optimal codons */
+/* enc_out Write out Effective Number of codons */
+/* hydro_out Write out Protein hydropathicity */
+/* aromo_out Write out Protein aromaticity */
+/* coa_raw_out Write out raw codon usage for use by COA analysis */
+/* */
+/* */
+/* Internal subroutines to Codon.c */
+/* my_exit Controls exit from CodonW closes any open files */
+/* tidy reads the input data */
+/* output called from tidy to decide what to do with the data */
+/* toutput handles the reformatting and translation of seqs */
+/* output_long if sequence is very long then process what we know */
+/* and write sequence to disk in fragments */
+/* file_close Closes open files */
+/* c_help Generates help informatio */
+/* WasHelpCalled Checks strings to see if help was requested */
+/* */
+/**************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+
+#define ORIG_DEFS
+ /* used to decide whether declarations are external or not */
+ /* Master Header file */
+#include "codonW.h"
+#undef ORIG_DEFS
+
+
+#if defined(__MWERKS__)
+#include <console.h>
+#endif
+
+/************************** MAIN **************************************/
+/* The main function processes commandline arguments to decide whether */
+/* CodonW is running in an interactive mode, if so then the menu is called*/
+/* CodonW also had the less documented feature of imitating other useful */
+/* codon usage and sequence manipulation program. If the program is */
+/* called by a recognised name (see proc_comm_line for a list) such as */
+/* rscu then pm->codons is false and it only performs the required tasks */
+/* bypassing the menu system. */
+/* Main then calls tidy() to read in the data files, and count codon usage*/
+/* depending on the requested output options toutput calls various subrou */
+/* tines. If COA has been requested it also calls these subroutines and */
+/* recording useful information to summary.coa. */
+/**************************************************************************/
+
+int main(int argc, char *argv[])
+{
+ FILE *finput = NULL, *foutput = NULL, *fblkout = NULL;
+ FILE *fcoaout = NULL;
+ FILE *fsummary= NULL;
+ int num_seq = 0;
+
+ num_sequence = 0;
+ num_seq_int_stop = 0;
+ valid_stops = 0;
+ last_aa = 0;
+
+#if defined(__MWERKS__) /* Macintosh code-warrior */
+ argc=ccommand(&argv);
+#endif
+
+ pm = &Z_menu;
+ pm->totals = FALSE;
+ pm->my_err = stderr;
+
+
+ initilize_point(pm->code, pm->f_type, pm->c_type);
+ initilize_coa(pm->code);
+
+ proc_comm_line(&argc, &argv);
+
+
+
+ /******************** main loop ****************************/
+
+ do {
+ if (pm->codonW) {
+ /* If the program chosen is codons */
+ printf(" Welcome to CodonW %.*s for Help type h\n\n",
+ (int) strlen(Revision) - 11, Revision +10 );
+ /* Now Run the main menu interface */
+ if (pm->menu) main_menu(0);
+ }
+
+ /* if users select human readable output they want nice tables */
+ if (pm->bulk == 'C' && pm->seq_format == 'H') pm->bulk = 'O';
+ if (pm->bulk == 'S' && pm->seq_format == 'H') pm->bulk = 'O';
+
+ pm->analysis_run = TRUE; /* codons has started an analysis this*/
+ /* parameter is checked by my_exit */
+
+ if (pm->inputfile != NULL) /* rewind various input files in case */
+ rewind(pm->inputfile); /* this is a second analysis run */
+ if (pm->fopfile != NULL)
+ rewind(pm->fopfile);
+ if (pm->cbifile != NULL)
+ rewind(pm->cbifile);
+ if (pm->caifile != NULL)
+ rewind(pm->caifile);
+
+ /* num_sequence number of sequences read */
+ /* num_seq_int_stop number with internal stop codons */
+ /* valid_stops No.terminated with a stop codon */
+ /* tot total number of codons read */
+
+ num_sequence = num_seq_int_stop = valid_stops = tot = 0;
+
+ clean_up(ncod, naa); /*re-zero count of amino and codons */
+ finput = pm->inputfile;
+ foutput = pm->outputfile;
+ fblkout = pm->tidyoutfile;
+
+ fileclose(&pm->fcoa_out);
+ if (pm->coa)
+ if ((pm->fcoa_out = open_file("", "coa_raw", "w", FALSE)) == NULL)
+ my_exit(1, "coa_raw"); /*controlled exit from CodonW */
+ fcoaout = pm->fcoa_out;
+
+/* Tidy */
+/* reads input data, returns the number of sequences read in */
+/* num_sequence is global so I don't really have to assign it here */
+ num_sequence = tidy(finput, foutput, fblkout, fcoaout);
+
+ fprintf(pm->my_err,"\n\n\t\tNumber of sequences: %i\n",
+ num_sequence);
+
+/* num_seq_int_stop value is calculated in codon_usage_out */
+ if (num_seq_int_stop > 0 && pm->warn ) {
+ if (pm->totals && (num_seq_int_stop >= valid_stops ))
+ fprintf(pm->my_err, "\tWARNING\t At least one sequence in your"
+ " input file has\ninternal stop codons (found %i"
+ " internal stops) \tWARNING\n",num_seq_int_stop);
+ else
+ fprintf(pm->my_err, "\tWARNING\t %i sequences had internal "
+ "stop codons \tWARNING\n",num_seq_int_stop);
+ }
+/* don't wait for a pause if no_menu has been set */
+ if ( pm->codonW && pm->menu ) pause;
+
+ if ( pm->coa && pm->totals) /* idiots error catch */
+ my_exit(99,"A COA analysis of concatenated sequences is nonsensical\n"
+ "I have completed any other requests but not the COA");
+
+/* if COA has been requested then open summary.coa and start the analysis */
+ if (pm->coa) {
+ if (fsummary == NULL)
+ if ((fsummary = open_file("", "summary.coa", "w", FALSE)) == NULL)
+ my_exit(1, "summary.coa");
+/* set the number of genes in the analysis to the number read in by tidy */
+ pcoa->rows = num_sequence;
+ fileclose(&fcoaout);
+/* if COA has been selected then during the reading in phase raw codon usag*/
+/* will have been written to the file coa_raw */
+/* text bin converts this to binary data for the COA analysis program */
+ textbin("coa_raw", "cbrawin");
+ printf("Generating correspondence analysis\n");
+ dot(0,10);
+
+
+ fprintf(fsummary, "\t\tSummary of Correspondence Analysis \n\n"
+ "The input file was %s it contained %i genes\n"
+ "The number of axes generated was %i\n"
+ "A COA was requested of %s%s usage\n\n\n"
+ "Most of the output presented in this file "
+ "has also been written to separate files\n"
+ "genes.coa\tThe position of the genes on the "
+ "first %i axis\n"
+ "%s.coa\tThe position of the %i %s on the %i "
+ "principle axes\n\n\n",
+ pm->curr_infilename,
+ pcoa->rows,
+ ((pcoa->rows<pcoa->colm)?pcoa->rows:pcoa->colm)-1,
+ (pm->coa == 'r') ?"relative synonymous ":"",
+ (pm->coa == 'a') ?"amino acid" : "codon",
+ pcoa->axis,
+ (pm->coa == 'a') ?"amino" : "codon",
+ pcoa->colm,
+ (pm->coa == 'a') ?"amino acids":"codons",
+ pcoa->axis);
+/* allocate memory for the rows and columns, scale both, and write out the*/
+/* resulting matrix to the file cbrawin */
+
+ PrepAFC("cbrawin");
+
+/* Now do the analysis, calculate the data inertia and all the vectors */
+
+ DiagoRC(fsummary);
+
+/* colmout records the position of the columns on each of the factors/axes*/
+
+ if (pm->coa == 'a')
+ colmout("cbfcco", "amino.coa", paa, fsummary);
+ else
+ colmout("cbfcco", "codon.coa", paa, fsummary);
+
+/* rowout records the position of the genes on each of the axis */
+
+ rowout("cbfcli", "genes.coa", "coa_raw", fsummary);
+
+/* pcoa->level == e for exhaustive analysis of inertia */
+ if (pcoa->level == 'e') {
+
+ fprintf(fsummary, "\n\n\nYou requested detailed output from the COA"
+ "\n\nThe absolute and relative inertia "
+ "of each gene and %s (see also inertia.coa)\n",
+ (pm->coa == 'a') ? "amino acids" : "codons");
+/* inertialig must preceed inertiacol, records inertia of genes to file */
+/* it opens the raw codon usage file and loads the raw data to memory */
+ inertialig("inertia.coa", "coa_raw" ,fsummary);
+/* uses the preloaded raw codon usage, to calculate inertia and other data*/
+/* such as contribution of each column to each factor and to the extent */
+/* each column is explained by each factor and what the residual variation*/
+/* is */
+ inertiacol("inertia.coa", fsummary);
+ }
+
+/* if pcoa->add_row is real string, then it will be the name of the file */
+/* containing additional sequence data, that will be excluded from the COA*/
+/* but factored in, using the original COA vectors and then all other */
+/* calculation can proceed as with the original data */
+ if (strlen(pcoa->add_row)) {
+ if ((finput = open_file("", pcoa->add_row, "r", FALSE))
+ == NULL) my_exit(6, "add_row");
+ if ((foutput = tmpfile()) == NULL)
+ my_exit(1, "temp file foutput");
+ if ((fblkout = tmpfile()) == NULL)
+ my_exit(1, "temp file fblkout");
+
+ if ((fcoaout = open_file("", "coa1_raw", "w", FALSE)) == NULL)
+ my_exit(1, "coa1_raw");
+
+ clean_up(ncod, naa);
+ num_sequence =num_seq_int_stop=valid_stops=tot = 0;
+/* load the additional data file and process as normal */
+/* but don't calculate any indices or write the data to the normal output */
+/* files, rather write them to tmp files which will be deleted at end of */
+/* program execution */
+ num_seq = tidy(finput, foutput, fblkout, fcoaout);
+
+/* close the files now we are finished */
+ fileclose(&fcoaout);
+ fileclose(&foutput);
+ fileclose(&fblkout);
+ fileclose(&finput);
+
+/* covert to binary, use additional raw data file, note not coa_raw this */
+ textbin("coa1_raw", "cb1raw");
+/* now call the routine suprow and add these additional genes, we will */
+/* process this data for inertia and append the gene and col. coordinates */
+/* to the original gene.coa and codon.coa (or amino.coa) */
+ suprow(num_seq, "cbfcvp", "cb1raw", "genes.coa", "coa1_raw", fsummary);
+
+/* close these files now that we have finished with them and the COA */
+
+ fileclose(&foutput);
+ fileclose(&fblkout);
+ fileclose(&fcoaout);
+ }
+ }
+ printf("\n");
+ } while (pm->codonW && pm->menu ); /* OK now we loop back to main_menu */
+/* though only if we are in interactive mode and running as CodonW */
+ my_exit(0,""); /* last call to my_exit */
+ return 0; /* dummy return to keep pedantic but */
+ /* brain dead compilers happy */
+}
+
+/********************** END of MAIN() **********************************/
+
+
+/********************** Subroutines **********************************/
+/* Tidy */
+/* reads input data from a sequence file containing fasta like formatted */
+/* sequence discards numbers, but keeps other characters */
+/* Each sequence must begin with title line must start with > or ; */
+/* any following descriptive lines must begin with ; or >.Sequence start */
+/* is the first alphabetic character on the line following the headers */
+/* There is no limit to sequence length or number of sequences but */
+/* input lines should be less than 200 char in width */
+/**************************************************************************/
+
+int tidy(FILE * finput, FILE * foutput, FILE * fblkout, FILE * fcoaout)
+{
+ char seq[MAX_GENE + LINE_LENGTH + 1];
+ char in[LINE_LENGTH + 1];
+ int first_line = TRUE, ic = 0;
+ int ii = 0;
+ int i,x;
+ long ic_orig = 0;
+/* while still able to read data from the input file keep reading */
+ while ((fgets(in, LINE_LENGTH, finput) != NULL)) {
+
+/* idiot error check to see if the file looks like fasta or PIR format */
+ if (!num_sequence && in[0] != ';' && in[0] != '>') {
+ fprintf(stderr, "\n Error input file not in a recognised format \n"
+ " you must convert it into FASTA/Pearson format"
+ " EXITING\n");
+ my_exit(99, "input file not in a recognised format:tidy");
+ }
+
+ if (in[0] == ';' || in[0] == '>') { /* if true them this is a header */
+ if (first_line) { /* if true this is the first header*/
+
+ first_line = FALSE; /* will only be reset when reread */
+ /* the next sequence */
+ if (num_sequence) { /* wait till we have read the first*/
+ /* before writing to disk */
+/* now if we are concatenating sequence data we need will handle it thus */
+ if (pm->totals) {
+
+/* first if translating or reformatting the input file flush the read */
+/* data to the disk */
+
+ if (strchr("RNT",(int)pm->bulk)!=NULL) output_long(fblkout, seq);
+ if (tot) {
+ /* if something we have sequence read in, then we need to process this */
+ /* check whether the last codon of the sequence was was a stop */
+ last_aa = codon_usage_tot(seq, tot);
+ if (pcu->ca[last_aa] == 11) valid_stops++;
+ }
+/* rather re-setting everything to zero, we will just blank the array seq */
+ tot = 0;
+ } else {
+/* else matches if tot; if sequences are not being concatenated we call */
+/* output to decide what to do with all the read data */
+/* then we blank all the data from memory and start again */
+ output(seq, foutput, fblkout, fcoaout);
+ clean_up(ncod, naa);
+ }
+ } /* matches if(num_sequence) */
+
+/* If we get here we have read a header line, this then needs to be proc'ed*/
+/* first the header is tested to see does it contain spaces the string is */
+/* converted from the first non space character to the title array */
+
+ for (ii = 1; isspace( (int) in[ii]) && ii < (int) strlen(in); ii++)
+ ;
+ strncpy(title, in + ii, 99);
+
+/* Titles are cleaned up by removing newline characters and the delimiting */
+/* character p,->seperater and also null terminating the title string */
+
+ for (i = 0; i < (int) strlen(title); i++) {
+ if (title[i] == '\n')
+ title[i] = '\0'; /* chops new line off */
+ else if (title[i] == pm->seperator )
+ title[i] = '_'; /* removes the separator if present */
+ else if (i == (int) (strlen(title) - 1))
+ title[i] = '\0'; /* if we have reached end of title */
+ }
+
+/* if we are reformatting the data, we print a friendly dot just in-case */
+if (strchr("RNT", (int)pm->bulk) ==NULL || pm->totals)
+ dot((int) num_sequence, 5);
+/* we have now finished processing our first header line and are reading */
+/* our sequence data */
+num_sequence++;
+ } /* matches if first line */
+ continue; /* read another line ie. jump to while()*/
+ } /* if (in[0] == ';' || in[0] == '>') */
+ else{ /* this must be a line containing seq */
+ first_line = TRUE; /* so reset the first_line variable */
+ }
+
+/* at this point we have read in the header lines and have been or about to*/
+/* process the input data, now we test how much we have read into the array*/
+/* seq, tot is equivalent to the last element in the array */
+/* if tot is greater than or equal to MAX_GENE then the array is quite full*/
+/* luckily we made the array seq to be MAX_GENE plus LINE_LENGTH +1 */
+
+ if (tot >= MAX_GENE) { /* sequence is larger than seq */
+ master_ic += MAX_GENE; /* now remember how many bases we are */
+ ic_orig = tot; /* going to write to disk */
+ /* and what size the array was to start */
+
+ if (strchr("RNT", (int) pm->bulk) != NULL)
+ output_long(fblkout, seq);/* flush to disk and then continue */
+ else if (pm->bulk == 'D')
+ dinuc_count(seq, tot); /* then we had better count the dinucs */
+
+/* Debugging code in-case we are asking for something that we can't handle */
+#ifdef DEBUG
+ else if (strchr("OCASLDBX", (int) pm->bulk) != NULL) ; /* dummy */
+ else if (pm->bulk)
+ fprintf(stderr, "ERROR-22 %c pm->bulk undefined\n", pm->bulk);
+
+ if (pm->cai || pm->fop || pm->cbi || pm->enc || pm->gc ||
+ pm->gc3s || pm->sil_base || pm->bulk ||
+ pm->coa);
+ else
+ fprintf(stderr, "Programming error");
+#endif
+
+
+/* Now count first MAX_GENE bases, luckily MAX_GENE is always a multiple of*/
+/* 3, we count the bases and amino acids in codon_usage_tot */
+
+ last_aa = codon_usage_tot(seq, MAX_GENE);
+
+/* now we move all unprocessed/written/counted bases to the front of seq */
+
+ for (i = MAX_GENE, x = 0; i < ic_orig; i++, x++)
+ seq[x] = seq[i]; /* i is pointing near the end of array */
+ tot = x; /* x the front of the array */
+ } /* Matches if (tot >= MAX_GENE) */
+
+ ic = 0; /* first base of the input file */
+ while (in[ic] != '\0') { /* scan input line till we see a Null */
+ if (isalpha((int)in[ic])) ; /* do nothing if a alpha */
+ else if (pm->bulk == 'R' && in[ic] == '-'); /* do nothing */
+ else if (in[ic] == '*' || in[ic] == '.') ; /* do nothing */
+
+ else {
+ ic++; /* is not one above skip to next letter */
+ continue;
+ } /* while( in[ic] != '\0') */
+
+
+ in[ic] = (char)toupper((int)in[ic]);/* converts2capitals */
+ if (strrchr("CG", (int) in[ic]) != NULL)
+ GC_TOT++; /* is it a G or C */
+ else if (strrchr("ATU", (int) in[ic]) != NULL)
+ AT_TOT++; /* is it an A or T */
+ else if ( in[ic] == '-' )
+ GAP_TOT++; /* is it a gap character */
+ else
+ non_std_char++; /* then it isn't a standard base */
+
+ if (strrchr("ABCDEFGHIKLMNPQRSTVWYZX"
+ ,(int) in[ic]) != NULL)
+ AA_TOT++; /* it might be an amino acid */
+ if (strrchr("MRWSYKVHDBXN" , (int) in[ic]) != NULL)
+ IUBC_TOT++; /* it might be a IUBC code */
+
+ seq[tot] = in[ic]; /* move base into seq array */
+ seq[tot + 1] = '\0'; /* make sure array is null term'ed */
+
+
+ /* now we test that the first codon is a valid start codon */
+
+ if ( tot == 0 && master_ic == 0 ) {
+
+ in[1] = (char)toupper((int)in[1]); /* Uppercase the first codon */
+ in[2] = (char)toupper((int)in[2]);
+
+ if ( in[1] == 'T' && (in[0] == 'A' || in[2] == 'G' ))
+ valid_start=TRUE; /* Yeup it could be a start codon */
+ else
+ valid_start=FALSE; /* Nope it doesn't seem to be one */
+ }
+ ic++; /* total No. of sequence bases read */
+ tot++; /* total currently stored in memory */
+ }
+ } /* reached end of input file */
+
+/* Idiot error catch, this file is empty, at least it looks empty to codonW*/
+
+ if ( !num_sequence ) my_exit(99,"The input file was empty");
+
+/* better make sure to write anything left in seq to disk before returning */
+
+ output(seq, foutput, fblkout, fcoaout);
+ return (int) num_sequence;
+}
+
+/************************ TOUTPUT **********************************/
+/* toutput */
+/* */
+/* This subroutine is very similar to output_long, basically it reformats */
+/* or translates sequences less than MAX_GENE in length as a single read */
+/* It writes in reader format "ACG ATT ATC" i.e writes the sequence in */
+/* codons. Because it works with output_long it needs to know whether */
+/* the sequence being written to disk is a fragment or a complete gene */
+/**************************************************************************/
+int toutput(FILE * fblkout, char *seq) {
+ long int ic = 0;
+ int space = 3;
+ char codon[4];
+ int i,x;
+
+ if (long_seq == FALSE) { /* then this must be a complete genes */
+ switch (pm->bulk) {
+ case 'T': /* tidy or fasta formatted header */
+ fprintf(fblkout, ">%-20.20s%6li\n",
+ title, (long int) tot + master_ic);
+ break;
+ case 'R': /* reader header .. don't ask */
+ fprintf(fblkout, ">%6li %-70.70s\n",
+ (long int) tot + master_ic, title);
+ break;
+ case 'N': /* Conceptually translated DNA header */
+ fprintf(fblkout, ">%-20.20s%6li\n",
+ title, (long int) ((tot + master_ic) / 3));
+ break;
+ default: /* whoops */
+ printf("\nProgramming error type A2 check code \n");
+ my_exit(99, "toutput");
+ break;
+ }
+ } else {
+
+/* then long_seq must be true, this means we are about to finish writing a*/
+/* sequence that has already been written in MAX_GENE chunks to disk) */
+/* when we wrote the original header line, we didn't know the size of the */
+/* sequence, but now we do so we are going to update that bit of info */
+/* luckily remembered to record where the header line is in the file */
+/* its at fl_pos_start */
+
+ fl_pos_curr = ftell(fblkout); /* record where we are at present */
+ fseek(fblkout, fl_pos_start, 0);/* find the header line for this seq */
+ switch (pm->bulk) {
+ case 'T': /* Now update the info */
+ fprintf(fblkout, ">%-20.20s%6li",
+ title, (long int) tot + master_ic);
+ break;
+ case 'R':
+ fprintf(fblkout, ">%6li %-70.70s",
+ (long int) tot + master_ic, title);
+ break;
+ case 'N':
+ fprintf(fblkout, ">%-20.20s%6li", title,
+ (long int) ((tot + master_ic) / 3));
+ break;
+ default:
+ printf("\nProgramming error type A3 check code \n");
+ my_exit(99, "output");
+ }
+ fseek(fblkout, fl_pos_curr, 0);/* now we move back to where we were */
+ }
+
+
+ while (ic < tot) { /* keep writing till the array is empty*/
+ switch (pm->bulk) {
+ case 'T':
+ fprintf(fblkout, "%c", seq[ic++]);
+ reg++;
+ break;
+ case 'R':
+ if (space == 3) { /* Its reader format so print a space */
+ fprintf(fblkout, " "); /* every third base */
+ space = 0;
+ } else { /* not the 3rd base yet so just print */
+ fprintf(fblkout, "%c", seq[ic++]);
+ space++;
+ reg++;
+ }
+ break;
+ case 'N':
+ for (i = (int) ic, x = 0; i < (int) ic + 3 && i < tot; i++, x++)
+ codon[x] = *(seq + i); /* get the next three bases if there */
+ codon[x] = '\0'; /* null terminate the codon array */
+ ic += 3; /* remember that we have read 3 bases */
+ /* use the function get_aa to return the amino acid for the codon */
+ /* 1 = is for the one letter code of the codon */
+ fprintf(fblkout, "%c", *get_aa(1, codon));
+ reg++;
+ break;
+ }
+ if (!(reg % 61)) { /* every 60 bases print a new line char */
+ reg = 1;
+ fprintf(fblkout, "\n");
+ }
+ }
+
+ if (reg != 1) { /* reached the end of sequence so we */
+ fprintf(fblkout, "\n"); /* print a \n char unless we just did */
+ reg = 1; /* reset number of bases printed */
+ }
+
+/* Now that we have finished writing this sequence to disk lets have a */
+/* closer look at it, and do a few diagnostics about the bases used */
+
+ if (AT_TOT + GC_TOT > AA_TOT*0.5) {/* Assume its DNA then */
+ fprintf(pm->my_err, "%3li>\t%6li %-40.40s\tDNA\tGC%"
+ " =%5.3f\n" /* with G+C content and length of gene */
+ ,num_sequence
+ ,(long int) tot + master_ic, title
+ ,(float) GC_TOT / (GC_TOT + AT_TOT));
+
+ if (non_std_char - IUBC_TOT && pm->warn ) /* any non IUBC characters */
+ fprintf(pm->my_err, "\t\t WARNING %d non IUBC standard characters "
+ "in sequence %i\n"
+ ,non_std_char - IUBC_TOT
+ ,num_sequence);
+ } else { /* if not DNA then it must be a protein */
+ fprintf(pm->my_err, "\t%3i>\t%6li %-40.40s\tPROTEIN\n"
+ ,num_sequence
+ ,(long int) tot + master_ic
+ ,title);
+ if ( (tot+master_ic)-AA_TOT && pm->warn) /* non IUBC AA chars */
+ fprintf(pm->my_err, "\t\t WARNING %d non "
+ "standard AA characters "
+ "in sequence %i\n"
+ ,non_std_char
+ ,num_sequence);
+ }
+ return 1; /* return to calling function */
+}
+
+
+/************************* output_long **********************************/
+/* called to write a block of a sequence that has exceeded the MAX_GENE */
+/* limit. If this is the first time it has been called for this sequence */
+/* (ie. long_seq is false) it write a dummy header line which is updated */
+/* by toutput when the last fragment of the sequence is written to disk */
+/**************************************************************************/
+
+int output_long(FILE * fblkout, char *seq)
+{
+ long int ic = 0;
+ char space = 3;
+ char codon[4];
+ int i,x;
+
+ if (long_seq == FALSE) {
+/* First call to output_long for seq. So record where the header line is */
+/* and then write the dummy header line. */
+
+ fl_pos_start = ftell(fblkout);
+ if (pm->bulk == 'R')
+ fprintf(fblkout, ">%6s %-72.72s\n", " ", title);
+ else
+ fprintf(fblkout, ">%-20.20s%9s\n", title, " ");
+ long_seq = TRUE;
+ }
+/* see toutput for explanation of the switch statement */
+ while (ic < MAX_GENE && ic < tot) {
+ switch (pm->bulk) {
+ case 'T':
+ fprintf(fblkout, "%c", seq[ic++]);
+ reg++;
+ break;
+ case 'R':
+ if (space == 3) {
+ fprintf(fblkout, " ");
+ space = 0;
+ } else {
+ fprintf(fblkout, "%c", seq[ic++]);
+ space++;
+ reg++;
+ }
+ break;
+ case 'N':
+ for (i = (int) ic, x = 0; i < (int) ic + 3 && i < tot; i++, x++)
+ codon[x] = *(seq + i);
+ codon[x] = '\0';
+ fprintf(fblkout, "%c", *get_aa(1, codon));
+ ic += 3;
+ reg++;
+ break;
+ default:
+ printf("\nProgramming error type A1 check code \n");
+ my_exit(99, "output_long");
+ }
+ if (!(reg % 61)) {
+ reg = 1;
+ fprintf(fblkout, "\n");
+ }
+ }
+ return 1; /* return to tidy */
+}
+
+
+/************************* output **********************************/
+/* Called from after subroutine tidy has read the sequence into memory */
+/* or more accurately counted the codon and amino acid usage. This sub- */
+/* routine, via a switch checks which parameters and indices have been */
+/* requested and write these to file, it handles all output except for COA*/
+/**************************************************************************/
+
+
+void output(char *seq, FILE * foutput, FILE * fblkout, FILE * fcoaout)
+{
+ char sp;
+
+ /* set the column delimiter to something shorter than pm->seperator */
+ sp = (char) (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;
+
+ if (tot) { /* still data in array seq.. */
+ last_aa = codon_usage_tot(seq, tot);
+ if (pcu->ca[last_aa] == 11)
+ valid_stops++; /* check the last codon was a stop */
+ }
+
+ /* codon_error, if 4th parameter is 1, then checks for valid start and */
+ /* internal stop codon, if 4th parmater is 2, checks that the last codon*/
+ /* is a stop or was partial, and for non-translatable codons */
+ codon_error(last_aa, valid_stops, title, (char) 1);
+ codon_error(last_aa, valid_stops, title, (char) 2);
+
+ /* if we are concatenating sequences then change the title to avger_of */
+ if(pm->totals)
+ (pm->seq_format=='M')?
+ strcpy(title, "Average_of_genes"):
+ strcpy(title, "Average of genes");
+
+
+ if (strchr("RNT", (int) pm->bulk) != NULL) {
+ /* better write the remaing sequence in seq to disk */
+ toutput(fblkout, seq);
+ } else if (strchr("OCASDLDBX", (int) pm->bulk) != NULL) {
+
+/* These subroutines are self explanatory (see the top of this file) */
+/* are called such that only one can be called for each sequence read */
+/* all these calls are written to the bulk output file */
+
+ switch ((int) pm->bulk) {
+ case 'S':
+ rscu_usage_out(fblkout, ncod, naa);
+ break;
+ case 'C':
+ codon_usage_out(fblkout, ncod, last_aa, valid_stops, title);
+ break;
+ case 'L':
+ raau_usage_out(fblkout, naa);
+ break;
+ case 'D':
+ dinuc_count(seq, tot);
+ dinuc_out(fblkout, title);
+ break;
+ case 'A':
+ aa_usage_out(fblkout, naa);
+ break;
+ case 'B':
+ gc_out(foutput, fblkout, 1);
+ break;
+ case 'O':
+ cutab_out(fblkout, ncod, naa);
+ break;
+ case 'X':
+ /* X is no bulk output written to file */
+ break;
+ default:
+ fprintf(stderr, "ERROR-23 %s bulk undefined\n", pm->prog);
+ my_exit(99, "output");
+ break;
+ }
+ } else if (pm->bulk) { /* just a programming error catch */
+ fprintf(stderr, "ERROR-24 %s -prog undefined\n", pm->prog);
+ my_exit(99, "output");
+ }
+
+
+ /* if an index has been requested then this is true */
+ if (pm->sil_base || pm->cai || pm->fop || pm->enc || pm->gc3s ||
+ pm->gc || pm->cbi || pm->L_sym || pm->L_aa || pm->coa ||
+ pm->hyd|| pm->aro) {
+ /* if this is the first sequence then write a header line */
+
+ if (num_sequence == 1 || pm->totals) {
+
+ fprintf(foutput, (pm->seq_format == 'H')?
+ "%-25.25s%c":"%-.25s%c"
+ ,"title",sp);
+ if (pm->sil_base)
+ fprintf(foutput, "%s%c%s%c%s%c%s%c", "T3s",sp,"C3s",sp,"A3s",sp,
+"G3s",sp);
+ if (pm->cai)
+ fprintf(foutput, "%s%c", "CAI",sp);
+ if (pm->cbi)
+ fprintf(foutput, "%s%c", "CBI",sp);
+ if (pm->fop)
+ fprintf(foutput, "%s%c", "Fop",sp);
+ if (pm->enc)
+ fprintf(foutput, "%s%c", "Nc",sp);
+ if (pm->gc3s)
+ fprintf(foutput, "%s%c", "GC3s" ,sp);
+ if (pm->gc)
+ fprintf(foutput, "%s%c", "GC" ,sp);
+ if (pm->L_sym)
+ fprintf(foutput, "%s%c", "L_sym",sp);
+ if (pm->L_aa)
+ fprintf(foutput, "%s%c", "L_aa" ,sp);
+ if (pm->hyd)
+ fprintf(foutput, "%s%c", "Gravy",sp);
+ if (pm->aro)
+ fprintf(foutput, "%s%c", "Aromo",sp);
+
+ fprintf(foutput, "\n");
+ }
+
+ /* if output format is human readable print the fixed width sequence */
+ /* name, else print only the name of the sequence */
+ fprintf(foutput, (pm->seq_format == 'H')?
+ "%-25.25s%c":"%-.25s%c"
+ ,title,sp);
+
+ /*Need to use if statements as we allow more than one index to be calc*/
+ /* per sequence read in */
+ if (pm->sil_base)
+ base_sil_us_out(foutput, ncod, naa);
+ if (pm->cai)
+ cai_out(foutput, ncod);
+ if (pm->cbi)
+ cbi_out(foutput, ncod, naa);
+ if (pm->fop)
+ fop_out(foutput, ncod);
+ if (pm->enc)
+ enc_out(foutput, ncod, naa);
+ if (pm->gc3s)
+ gc_out(foutput, fblkout, 3);
+ if (pm->gc)
+ gc_out(foutput, fblkout, 2);
+ if (pm->L_sym)
+ gc_out(foutput, fblkout, 4);
+ if (pm->L_aa)
+ gc_out(foutput, fblkout, 5);
+ if (pm->hyd)
+ hydro_out(foutput, naa);
+ if (pm->aro)
+ aromo_out(foutput, naa);
+ if (pm->coa)
+ coa_raw_out(fcoaout, ncod, naa, title);
+
+ fprintf(foutput, "\n");
+
+ }
+ return;
+}
+
+/************************* my_exit **********************************/
+/* Called to clean up open files and generate an intelligent exit message */
+/* Also warns if no analysis has been run, the user did not select R from */
+/* the main menu. If COA was selected then it reminds the user to look */
+/* at the file summary.coa, and deletes any stray binary files */
+/**************************************************************************/
+
+int my_exit(int error_num, char *message)
+{
+
+ fileclose(&pm->inputfile);
+
+ /* if we are masuquarading as another program we assign both outputfile */
+ /* and tidyout the same filehandle (we don't want to close this twice */
+ if ( pm->outputfile == pm->tidyoutfile ){
+ fileclose(&pm->outputfile);
+ }else{
+ fileclose(&pm->outputfile);
+ fileclose(&pm->tidyoutfile);
+ }
+
+ fileclose(&pm->cuout);
+ fileclose(&pm->fopfile);
+ fileclose(&pm->cbifile);
+ fileclose(&pm->caifile);
+ fileclose(&pm->logfile);
+ fileclose(&pm->fcoa_in);
+ fileclose(&pm->fcoa_out);
+
+ if (pm->inputfile = fopen("cbrawin", "r")) {
+ fclose(pm->inputfile);
+ deletefile("cbrawin");
+ }
+ if (pm->inputfile = fopen("cbfcco", "r")) {
+ fclose(pm->inputfile);
+ deletefile("cbfcco");
+ }
+ if (pm->inputfile = fopen("cbfcli", "r")) {
+ fclose(pm->inputfile);
+ deletefile("cbfcli");
+ }
+ if (pm->inputfile = fopen("cbfcpc", "r")) {
+ fclose(pm->inputfile);
+ deletefile("cbfcpc");
+ }
+ if (pm->inputfile = fopen("cbfcpl", "r")) {
+ fclose(pm->inputfile);
+ deletefile("cbfcpl");
+ }
+ if (pm->inputfile = fopen("cbfcta", "r")) {
+ fclose(pm->inputfile);
+ deletefile("cbfcta");
+ }
+ if (pm->inputfile = fopen("cbfcvp", "r")) {
+ fclose(pm->inputfile);
+ deletefile("cbfcvp");
+ }
+ if (pm->inputfile = fopen("cb1rawin", "r")) {
+ fclose(pm->inputfile);
+ deletefile("cb1rawin");
+ }
+ if (error_num == 2 || error_num == 0 ) {
+ if (pm->analysis_run) {
+ fprintf(stderr, "Files used:\n");
+ if (strlen(pm->curr_infilename))
+ fprintf(pm->my_err, " Input file was\t %s \n",
+ pm->curr_infilename);
+
+ if (strlen(pm->curr_outfilename)){
+ fprintf(pm->my_err, " Output file was\t %s %s",
+ pm->curr_outfilename,
+ (pm->codonW) ? " (codon usage indices, e.g. gc3s)\n":"\n");
+ }
+
+ if (strlen(pm->curr_tidyoutname)){
+ fprintf(pm->my_err, " Output file was\t %s %s",
+ pm->curr_tidyoutname,
+ (pm->codonW) ? " (bulk output e.g. raw codon usage)\n":"\n");
+ }
+
+ if (pm->coa)
+ fprintf(pm->my_err, " For more information about the COrrespondence "
+ "Analysis see summary.coa\n");
+ } else if ( pm->codonW )
+ fprintf(stderr, " \n\n WARNING You are exiting before codonW has generated any results\n"
+ " Select 'r' from the main menu to run\n");
+ }
+
+ if ( pm->codonW ) printf("\n CodonW has finished\n");
+
+ switch ((int) error_num) {
+
+ case 0:
+ /* silent exit */
+ exit(0);
+ break;
+ case 1:
+ printf("failed to open file for output <%s>\n", message);
+ exit(1);
+ break;
+ case 2:
+ printf("user requested exit <%s>\n", message);
+ exit(0);
+ break;
+ case 3:
+ printf("failed to allocate memory <%s>\n", message);
+ exit(1);
+ break;
+ case 4:
+ printf("Write to disk failed ! <%s>\n", message);
+ exit(1);
+ break;
+ case 5:
+ printf("Read from disk failed! <%s>\n", message );
+ exit(1);
+ break;
+ case 6:
+ printf("failed to open file for reading <%s>\n", message);
+ exit(1);
+ break;
+ case 7:
+ printf("failed to close file <%s>\n", message);
+ exit(1);
+ case 99:
+ printf(" Controlled exit <%s>\n",message);
+ exit(0);
+ break;
+ default:
+ printf("for unknown reason\n");
+ exit(1);
+ break;
+ }
+ return 0;
+}
+
+/************************** file_close **********************************/
+/* Fileclose function checks whether the filepointer is open, if so it */
+/* attempts to close the open file handle and assigns a null pointer */
+/* to that handle */
+/**************************************************************************/
+
+int fileclose(FILE ** file_pointer)
+{
+ if (*file_pointer != NULL ) {
+ if (fclose(*file_pointer) == EOF ) {
+ fprintf(stderr,"Failed to close file %i \n",errno);
+ perror ("Unexpected condition in fileclose");
+ exit(7);
+ }
+ *file_pointer = NULL; /* make sure file_pointer is null*/
+ }
+ return 1;
+}
+
+/************************** Chelp **************************************/
+/* Chelp scans opens the help file and returns text associated with that */
+/* help keyword. Help keywords are surrounded by hashs, starting in the */
+/* first column of the ASCII help file and are terminated by // */
+/**************************************************************************/
+
+int chelp ( char *help_keyword )
+{
+ char helplib [MAX_FILENAME_LEN]="";
+ char *p=NULL, inhelp=FALSE;
+ char QueryString[120]; /* limit for help phrase is 120 chars */
+ char HelpMessage[121];
+ int line_counter=2; /* assume 2 blank lines to start with */
+ FILE *hfp=NULL;
+/* Inital steps is to locate help file */
+/* First check if CODONW_H has been set as an environment variable */
+/* If not then assume that the help file is in the current directory */
+
+ p=getenv( "CODONW_H" );
+ if ( p != NULL )
+ strcpy ( helplib , p );
+ else {
+ strcpy ( helplib , "codonW.hlp");
+ }
+
+ hfp=open_file("",helplib, "r", FALSE);
+
+/* if we can't open the help file then explain what we where trying to do */
+
+ if ( hfp == NULL ) {
+ fprintf ( stderr ,
+ "Could not open help file codonw.hlp\n"
+ "Expected to find this file in %s\n"
+ "This can be overridden by setting the"
+ "environmental variable\n"
+ "CODONW_H to the help file location\n",
+ helplib);
+ pause; /* make sure they Ack. the error mesg */
+ return 0; /* abort */
+ }
+/* Now that we have opened the help file, assemble the help keyword string */
+
+strcpy (QueryString , "#");
+strcat (QueryString , help_keyword );
+strcat (QueryString , "#");
+fprintf(stderr,"\n\n");
+
+/* now scan the help file looking for this keyword */
+
+while ( fgets ( HelpMessage, 120, hfp ) ) {
+
+ if ( strstr (HelpMessage,QueryString) != NULL )
+ inhelp=TRUE; /* we found it */
+
+ else if ( inhelp && strstr ( HelpMessage , "//") ) { /* found the end*/
+ fileclose(&hfp );
+ if ( line_counter )pause;
+ return 1;
+ }
+ /* if inhelp is true we have found the help keyword but not reached EOF */
+ else if ( inhelp ) {
+ if ( strchr(HelpMessage,'\n') )
+ fprintf ( stderr, "%s",HelpMessage );
+ /*stderr,it must be interactive */
+ else
+ fprintf ( stderr, "%s\n",HelpMessage );
+ /*make sure there are line feeds*/
+
+
+ /* count how many lines I have printed to the terminal and compare it */
+ /* with the length of the terminal screen as defined by pm->term_length */
+
+ if (line_counter++ >= pm->term_length-3 && line_counter ) {
+ line_counter=0;
+ pause;
+ fprintf(stderr, "%s",HelpMessage);
+ }
+ }
+}
+
+/* Error catches for problems with help file */
+if ( HelpMessage == NULL && inhelp == FALSE ){
+ fprintf ( stderr ," Error in help file, %s not found ", QueryString);
+ pause;
+ }
+else {
+ fprintf (stderr , "Premature end of help file ... \n");
+ pause;
+ }
+return 0; /* failed for some reason */
+}
+
+
+/******************** WasHelpCalled ***********************************/
+/* Checks the string input to see if the user asked for help */
+/**************************************************************************/
+
+char WasHelpCalled ( char * input ) {
+ char ans = FALSE;
+
+ if ( strlen ( input) == 1 && (char)toupper((int)input[0]) == 'H')
+ ans = TRUE;
+ else if ( !strcmp ( input , "help") )
+ ans = TRUE;
+ else if ( !strcmp ( input , "HELP") )
+ ans = TRUE;
+
+ return ans;
+}
diff --git a/commline.c b/commline.c
new file mode 100755
index 0000000..53c3f0d
--- /dev/null
+++ b/commline.c
@@ -0,0 +1,755 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package */
+/* Copyright (C) 2005 John F. Peden */
+/* This program is free software; you can redistribute */
+/* it and/or modify it under the terms of the GNU General Public License */
+/* as published by the Free Software Foundation; version 2 of the */
+/* License, */
+/* */
+/* This program is distributed in the hope that it will be useful, but */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA. */
+/* */
+/* */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _) */
+/* */
+/* For the latest version and information see */
+/* http://codonw.sourceforge.net */
+/**************************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include "codonW.h"
+
+/************** process_command_line *************************************/
+/* The command line is passed to this function for processing. The name of*/
+/* the programme is read, and based on this, CodonW will emulate several */
+/* useful codon usage analysis programmes routinely used in our laboratory*/
+/* all other command line arguments are read. Unrecognised arguments are */
+/* reported to the user, arguments not preceded by a dash are assumed to */
+/* be filenames. The input, output and bulk output files to be precise */
+/**************************************************************************/
+int proc_comm_line( int *pargc , char ***pargv)
+{
+ char *p;
+ char c;
+ int n;
+ char prog_name[64];
+ char root[MAX_FILENAME_LEN];
+
+/* decide how to process argc[0] which will be the name of the programme */
+#if defined (_WINDOWS) || defined (_DOS) || defined ( WIN32 )
+ if ( (p = strrchr(**pargv, '\\')) != NULL )
+ strncpy(prog_name, p+1 , 63);
+ else
+ strncpy(prog_name, **pargv, 10);
+#elif defined (BSD) || defined(SYSV) || defined (UNIX) || defined (LINUX) /* Must be unix */
+ if ( (p = strrchr(**pargv, '/')) != NULL )
+ strncpy(prog_name, p+1, 63);
+ else
+ strncpy(prog_name, **pargv, 10);
+#elif defined(VMS) || defined (OPENVMS) /* maybe VMS or OPENVMS */
+ if ( (p = strrchr(**pargv, ']')) != NULL )
+ strncpy(prog_name, p+1, 63);
+ else
+ strncpy(prog_name, **pargv, 10);
+#else
+ printf("UNRECOGNISED SYSTEM type won't be able to impersonate other programmes\n");
+ strcpy(prog_name, "codon"); /* OK I give up */
+#endif
+
+ if ( (p = strrchr(prog_name, '.')) != NULL ) /* remove file extension */
+ *p = '\0'; /* tidy.exe -> tidy */
+
+ /* first call to garg initialises the function with the command line*/
+ /* parameters and the number of arguments, subsequent calls strip */
+ /* these off one by one */
+
+ /* has the user asked for help ???????????? */
+ if ((p = garg(*pargc, *pargv, "-h", GARG_EXACT)) ||
+ (p = garg(0, NULL, "-help", GARG_EXACT))){
+ printf(
+ "codonW [inputfile] [outputfile] [bulkoutfile] [options]\n"
+ "General options and defaults:\n"
+ " -h(elp)\tThis help message\n"
+ " -nomenu\tPrevent the menu interface being displayed\n"
+ " -nowarn\tPrevent warnings about sequences being displayed\n"
+ " -silent\tOverwrite files silently\n"
+ " -totals\tConcatenate all genes in inputfile\n"
+ " -machine\tMachine readable output\n"
+ " -human\t\tHuman readable output\n"
+ " -code N\tGenetic code as defined under menu 3 option 5\n"
+ " -f_type N\tFop/CBI codons as defined by menu 3 option 6\n"
+ " -c_type N\tCai fitness values as defined by menu 3 option 7\n"
+ " -t (char)\tColumn separator to be used in output files "
+ "(comma,tab,space)\n"
+ "\nCodon usage indices and Amino acid indices \n"
+ " -cai\t\tcalculate Codon Adaptation Index (CAI)\n"
+ " -fop\t\tcalculate Frequency of OPtimal codons index (FOP)\n"
+ " -cbi\t\tcalculate Codon Bias Index (CBI)\n"
+ " -enc\t\tEffective Number of Codons (ENc)\n"
+ " -gc\t\tG+C content of gene (all 3 codon positions)\n"
+ " -gcs3\t\tGC of synonymous codons 3rd positions\n"
+ " -sil_base\tBase composition at synonymous third codon "
+ "positions\n"
+
+ );
+ pause;
+ printf(
+ " -L_sym\t\tNumber of synonymous codons\n"
+ " -L_aa\t\tTotal number of synonymous and non-synonymous codons\n"
+ " -all_indices\t\tAll the above indices\n"
+ " -aro\t\tCalculate aromaticity of protein\n"
+ " -hyd\t\tCalculate hydropathicity of protein\n"
+ " -cai_file {file}\tUser input file of CAI values\n"
+ " -cbi_file {file}\tUser input file of CBI values\n"
+ " -fop_file {file}\tUser input file of Fop values\n"
+ "\nCorrespondence analysis (COA) options \n"
+ " -coa_cu \tCOA of codon usage frequencies\n"
+ " -coa_rscu\tCOA of Relative Synonymous Codon Usage\n"
+ " -coa_aa\tCOA of amino acid usage frequencies\n"
+ " -coa_expert\tGenerate detailed(expert) statistics on COA\n"
+ " -coa_axes N\tSelect number of axis to record\n"
+ " -coa_num N\tSelect number of genes to use to identify "
+ "optimal codons\n"
+ "\t\tvalues can be whole numbers or a percentage (5 or 10%%)\n"
+ "\nBulk output options | only one can be selected per analysis\n"
+ " -aau\t\tAmino Acid Usage (AAU)\n"
+ " -raau\t\tRelative Amino Acid Usage (RAAU)\n"
+ " -cu\t\tCodon Usage (CU) (default)\n"
+ );
+ pause;
+ printf(
+ " -cutab\t\tTabulation of codon usage\n"
+ " -cutot\t\tTabulation of dataset's codon usage\n"
+ " -rscu\t\tRelative Synonymous Codon Usage (RSCU)\n"
+ " -fasta\t\tfasta format\n"
+ " -tidy\t\tfasta format\n"
+ " -reader\tReader format (codons are separated by spaces)\n"
+ " -transl\tConceptual translation of DNA to amino acid\n"
+ " -base\t\tDetailed report of codon G+C composition\n"
+ " -dinuc\t\tDinucleotide usage of the three codon pos.\n"
+ " -noblk\t\tNo bulk output to be written to file\n"
+ "\nWhere {file} represents an input filename, and N an integer"
+ " value"
+ );
+ pause;
+ my_exit(99,""); /* after writing out help quit */
+ }
+
+
+/* These parameters are normally set in menu3 ie. the defaults menu */
+/* for a explanation of the various GARG_FLAGS see gargs */
+
+/* -silent stops warnings about file about to be overwritten */
+ if (garg(0, NULL, "-silent", GARG_THERE))
+ pm->verbose = FALSE;
+
+
+/* -total causes sequences to be concatenated and treated as one sequence */
+ if ( garg(0, NULL, "-total" , GARG_THERE))
+ pm->totals = TRUE;
+
+/* -machine or -human determines for whom the output should be formatted */
+ if (p = garg(0, NULL, "-human", GARG_THERE))
+ pm->seq_format = 'H';
+ if (p = garg(0, NULL, "-mach", GARG_THERE))
+ pm->seq_format = 'M';
+
+/* -code determines the genetic code */
+ if (p = garg(0, NULL, "-code", GARG_NEXT | GARG_EXACT)) {
+ strcpy(pm->junk, p);
+ n=0;
+ while ( isdigit( (int) pm->junk[n]) && pm->junk[n] != '\0')
+ n++;
+ if ( n != (int)strlen(pm->junk)
+ || atoi(pm->junk) < 0 || atoi(pm->junk) > NumGeneticCodes ) {
+ printf( "FATAL: The value for genetic code %s is invalid\n",
+ pm->junk);
+ my_exit(99,"Fatal error in genetic code value");
+ } else {
+ pm->code = (char) atoi(p); /* define genetic code */
+ initilize_point(pm->code, pm->f_type, pm->c_type);
+ }
+ }
+
+/* -f_type selects which of the predefined fop values to use */
+/* NB. The fop is selected with the integer value corresponding to the menu*/
+/* choice under the defaults menu. It must be in the range 1-NumFopSpecies */
+
+
+ if (p = garg(0, NULL, "-f_type", GARG_NEXT | GARG_EXACT)) {
+ strcpy(pm->junk, p);
+ n = 0;
+ while ( isdigit( (int) pm->junk[n]) && pm->junk[n] != '\0')
+ n++;
+ if ( n != (int)strlen(pm->junk) || atoi(pm->junk) < 0 ||
+ atoi(pm->junk) >= NumFopSpecies ) {
+ printf("FATAL: The value for fop_type %s is not valid\n",
+ pm->junk);
+ my_exit(99,"Fatal error in Fop value");
+ } else {
+ pm->f_type = (char) atoi(p); /* define organism type for Fop */
+ initilize_point(pm->code, pm->f_type, pm->c_type);
+ }
+ }
+
+/* -d_type selects which of the predefined CAI values to use */
+/* NB. The CAI is selected with the integer value corresponding to the menu*/
+/* choice under the defaults menu. It must be in the range 1-NumCAISpecies */
+ if (p = garg(0, NULL, "-c_type", GARG_NEXT | GARG_EXACT)) {
+ strcpy(pm->junk,p);
+ n = 0;
+ while ( isdigit( (int) pm->junk[n]) && pm->junk[n] != '\0')
+ n++;
+ if ( n != (int)strlen(pm->junk) || atoi(pm->junk) < 0 ||
+ atoi(pm->junk) >= NumCaiSpecies) {
+ printf("FATAL: The value for cai_type %s is not valid\n",
+ pm->junk);
+ my_exit(99,"Fatal error in CAI type value");
+
+ } else {
+ pm->c_type = (char) atoi(p); /* define organism type for CAI */
+ initilize_point(pm->code, pm->f_type, pm->c_type);
+ }
+ }
+
+
+/* Command line arguments for the indices menu (4) */
+/* The presence of any of these flags, cause the relevant indices to be */
+/* calculated */
+/* Indices are CAI, FOP, CBI, Nc, GC, GC3s, Lsyn, Laa, silent_base */
+/* composition, hydropathicity, aromaticity */
+ if (p = garg(0, NULL, "-cai" , GARG_EXACT))
+ pm->cai = TRUE;
+ if (p = garg(0, NULL, "-fop" , GARG_EXACT))
+ pm->fop = TRUE;
+ if (p = garg(0, NULL, "-cbi" , GARG_EXACT))
+ pm->cbi = TRUE;
+ if (p = garg(0, NULL, "-enc" , GARG_EXACT))
+ pm->enc = TRUE;
+ if (p = garg(0, NULL, "-gc" , GARG_EXACT))
+ pm->gc = TRUE;
+ if (p = garg(0, NULL, "-gc3s" , GARG_EXACT))
+ pm->gc3s = TRUE;
+ if (p = garg(0, NULL, "-sil_base" , GARG_EXACT))
+ pm->sil_base = TRUE;
+ if (p = garg(0, NULL, "-L_sym" , GARG_EXACT))
+ pm->L_sym = TRUE;
+ if (p = garg(0, NULL, "-L_aa" , GARG_EXACT))
+ pm->L_aa = TRUE;
+ if (p = garg(0, NULL, "-hyd" , GARG_EXACT))
+ pm->hyd = TRUE;
+ if (p = garg(0, NULL, "-aro" , GARG_EXACT))
+ pm->aro = TRUE;
+/* Turns on all the above indices */
+ if (p = garg(0, NULL, "-all_indices" , GARG_EXACT)){
+ pm->cai = TRUE;
+ pm->fop = TRUE;
+ pm->cbi = TRUE;
+ pm->enc = TRUE;
+ pm->gc = TRUE;
+ pm->gc3s = TRUE;
+ pm->sil_base = TRUE;
+ pm->L_sym = TRUE;
+ pm->L_aa = TRUE;
+ pm->hyd = TRUE;
+ pm->aro = TRUE;
+ }
+
+/* This section in used to input the filenames for personal choices of Fop */
+/* CBI or CAI values. The name is tested to make sure the file is readable */
+/* the pointer to the file is then assign to the relevant pointer in the */
+/* struct Z_menu and then processed properly in codon_us.c */
+
+/* Fop */
+ if (p = garg(0, NULL, "-fop_file", GARG_NEXT | GARG_EXACT)) {
+ if ( (pm->fopfile = open_file( "", p, "r", FALSE)) == NULL ) {
+ printf("Could not open Fop file - %s\n", p);
+ my_exit(1,"commline open fop file");
+ } else
+ strncpy(pm->fop_filen, pm->junk, MAX_FILENAME_LEN - 1);
+ /* idiot catch, if you load personal fop values you want to calculate fop */
+ pm->fop=TRUE;
+ }
+
+/* CAI */
+ if (p = garg(0, NULL, "-cai_file", GARG_NEXT | GARG_EXACT)) {
+ if ( (pm->caifile = open_file( "", p, "r", FALSE)) == NULL ) {
+ printf("Could not open CAI file - %s\n", p);
+ my_exit(1,"commline failed error");
+ } else
+ strncpy(pm->cai_filen, pm->junk, MAX_FILENAME_LEN - 1);
+ pm->cai=TRUE; /* idiot catch */
+ }
+/* CBI */
+ if (p = garg(0, NULL, "-cbi_file", GARG_NEXT | GARG_EXACT)) {
+ if ( (pm->cbifile = open_file( "", p, "r", FALSE)) == NULL ) {
+ printf("Could not open CBI file - %s\n", p);
+ my_exit(1,"Commline failed to open file");
+ } else
+ strncpy(pm->cbi_filen, pm->junk, MAX_FILENAME_LEN - 1);
+ pm->cbi = TRUE; /* idiot catch */
+ }
+
+
+/* This section changes the default correspondence menu choices normally */
+/* set in menu menu 5. */
+/* Note only one of -coa_cu -coa_rscu -coa_aa can be chosen */
+ if (p = garg(0, NULL, "-coa_cu" , GARG_EXACT))
+ pm->coa = 'c';
+ if (p = garg(0, NULL, "-coa_rscu" , GARG_EXACT))
+ pm->coa = 'r';
+ if (p = garg(0, NULL, "-coa_aa" , GARG_EXACT))
+ pm->coa = 'a';
+ if (p = garg(0, NULL, "-coa_expert", GARG_EXACT)) /* detailed inertia */
+ (coa.level='e'); /* analysis */
+
+
+/* These are options selectable under the advanced COA menu */
+/* This first option -coa_axes changes the number of axis recorded to file */
+ if (p = garg(0, NULL, "-coa_axes", GARG_NEXT | GARG_EXACT)){
+ if ( isdigit( (int) *p) ){
+ n = (char)atoi(p);
+ /* just check that correspondence analysis has been selected */
+ if ( pm->coa == 'a' && (n > 20 || n<0) || ( n<0 || n>59 )) {
+ fprintf(pm->my_err,"Value %d is out of range for Number COA Axis "
+ "adjusting to max value\n",n);
+ if ( pm->coa == 'a' ) pcoa->axis = 20; else pcoa->axis = 59;
+ }else{
+ pcoa->axis = (char) n;
+ }
+ }
+ }
+
+/* Select the size of dataset to use to identify optimal codons */
+ if (p = garg(0, NULL, "-coa_num", GARG_NEXT|GARG_EXACT)) {
+ strcpy (pm->junk,p) ;
+ if( (p=strchr ( pm->junk,'%')) != NULL) {
+ p='\0';
+ pcoa->fop_gene=atoi(pm->junk)*-1;
+ }else {
+ pcoa->fop_gene=atoi(pm->junk);
+ }
+ }
+
+
+/* These option are mutually exclusive and are normally selected using the */
+/* the bulk output menu (menu 8) */
+
+ if ( p = garg(0, NULL, "-raau", GARG_EXACT))
+ pm->bulk = 'L';
+ if ( p = garg(0, NULL, "-cu" , GARG_EXACT))
+ pm->bulk = 'C';
+ if ( p = garg(0, NULL, "-cutab", GARG_THERE))
+ pm->bulk = 'O';
+ if ( p = garg(0, NULL, "-cutot", GARG_THERE)){
+ pm->bulk = 'C';
+ pm->totals =TRUE;
+ }
+ if ( p = garg(0, NULL, "-reader", GARG_EXACT))
+ pm->bulk = 'R';
+ if ( p = garg(0, NULL, "-rscu", GARG_EXACT))
+ pm->bulk = 'S';
+ if ( p = garg(0, NULL, "-tidy", GARG_EXACT))
+ pm->bulk = 'T';
+ if ( p = garg(0, NULL, "-fasta", GARG_EXACT))
+ pm->bulk = 'T';
+ if ( p = garg(0, NULL, "-aau", GARG_EXACT))
+ pm->bulk = 'A';
+ if ( p = garg(0, NULL, "-transl", GARG_THERE))
+ pm->bulk = 'N';
+ if ( p = garg(0, NULL, "-base", GARG_THERE))
+ pm->bulk = 'B';
+ if (p = garg(0, NULL, "-dinuc", GARG_THERE))
+ pm->bulk = 'D';
+ if (p = garg(0, NULL, "-noblk", GARG_EXACT))
+ pm->bulk = 'X';
+
+/* -t is used to change the column separator used in the output files */
+/* at present it must be a space, tab or comma */
+/* Must occur after -transl or it misreads transl as a seperator */
+ if (p = garg(0, NULL, "-t" , GARG_NEXT | GARG_SUBSQ)) {
+ strcpy(pm->junk, p);
+ n = 0;
+ do {
+ c = pm->junk[n++];
+ } while ( strchr("'\"\0", (int) c) != NULL );
+ if ( strchr ("\t, ", (int) c) == NULL ) {
+ printf( "WARNING: The chosen separator %s is unsuitable use"
+ "comma, tab or space\n", pm->junk);
+ } else {
+ pm->seperator = c;
+ }
+ }
+
+
+/* These options are commandline specific, ie. they do not have an */
+/* menu option */
+
+/* prevents the menu system from being displayed, everything is */
+/* assumed to have been given on the command line */
+
+ if (p = garg(0, NULL, "-nomenu", GARG_EXACT))
+ pm->menu = FALSE;
+
+/* prevents warnings about possible problems with the sequence data */
+/* being displayed, i.e. partial codons, stop codons, start codons */
+
+ if (p = garg(0, NULL, "-nowarn", GARG_THERE))
+ pm->warn = FALSE;
+
+
+
+/* This section tries to identify the name used to call CodonW and it that*/
+/* name concurs with one of those tested for, certain commandline options */
+/* are assumed and the programme becomes much less interactive */
+
+/* First step is to convert programme name to lower case */
+ for ( n=0; *(prog_name + n) != '\0'; n++)
+ *(prog_name + n) = (char) tolower( (int) *(prog_name + n));
+
+
+/* special options designed to unify code used by several auxiliary */
+/* programmes. In essence CodonW will impersonate other commonly used */
+/* codon usage analysis programmes if called using a special name */
+
+ if ( !strcmp(prog_name, "raau" ) )
+ pm->bulk = 'L';
+ else if ( !strcmp(prog_name, "cu" ) )
+ pm->bulk = 'C';
+ else if ( !strcmp(prog_name, "cutab" ) )
+ pm->bulk = 'O';
+ else if ( !strcmp(prog_name, "reader") )
+ pm->bulk = 'R';
+ else if ( !strcmp(prog_name, "rscu" ) )
+ pm->bulk = 'S';
+ else if ( !strcmp(prog_name, "tidy" ) )
+ pm->bulk = 'T';
+ else if ( !strcmp(prog_name, "aau" ) )
+ pm->bulk = 'A';
+ else if ( !strcmp(prog_name, "dinuc" ) )
+ pm->bulk = 'D';
+ else if ( !strcmp(prog_name, "transl") )
+ pm->bulk = 'N';
+ else if ( !strcmp(prog_name, "bases" ) )
+ pm->bulk = 'B';
+ else if ( !strcmp(prog_name, "base3s") ) {
+ pm->prog = 's' ;
+ pm->menu = FALSE;
+ pm->sil_base = TRUE;
+ } else if ( !strcmp(prog_name, "cai" ) ) {
+ pm->prog = 'c';
+ pm->menu = FALSE;
+ pm->cai = TRUE;
+ } else if ( !strcmp(prog_name, "fop" ) ) {
+ pm->prog = 'f';
+ pm->menu = FALSE;
+ pm->fop = TRUE;
+ } else if ( !strcmp(prog_name, "gc3s" ) ) {
+ pm->prog = '3';
+ pm->menu = FALSE;
+ pm->gc3s = TRUE;
+ } else if ( !strcmp(prog_name, "gc" ) ) {
+ pm->prog = 'g';
+ pm->menu = FALSE;
+ pm->gc = TRUE;
+ } else if ( !strcmp(prog_name, "enc" ) ) {
+ pm->prog = 'e';
+ pm->menu = FALSE;
+ pm->enc = TRUE;
+ } else if ( !strcmp(prog_name, "cbi" ) ) {
+ pm->prog = 'i';
+ pm->menu = FALSE;
+ pm->cbi = TRUE;
+ } else if ( !strcmp(prog_name, "cutot" ) ) {
+ pm->bulk = 'C';
+ pm->menu = FALSE;
+ pm->totals =TRUE;
+ } else {
+ pm->codonW=TRUE; /* if argc[0] is not recognised assume codons*/
+ /* if blk output is still X then assume cu */
+ if (pm->bulk=='X') pm->bulk='C';
+ }
+
+
+ if (!pm->codonW ) { /* we appear to be impersonating another prog*/
+ /* now we switch to the correct greeting */
+ if (pm->bulk && pm->bulk!='X'){
+ pm->seperator='\000'; /* stop chars being converted by tidy*/
+ switch (pm->bulk) {
+ case 'R':
+ printf("\t\t\tREADER Formatting Program\n");
+ break;
+ case 'T':
+ printf("\t\t\tTIDY Formatting Program\n");
+ break;
+ case 'S':
+ printf("\tRelative Synonymous Codon Usage\n");
+ break;
+ case 'B':
+ printf("\t\t\tBase composition calculation\n");
+ break;
+ case 'C':
+ (pm->totals)?
+ printf("\t\t\tTotal Codon Usage Tabulation\n"):
+ printf("\t\t\tCodon Usage\n");
+ break;
+ case 'L':
+ printf("\tRelative Amino Acid Usage Calculating \n");
+ break;
+ case 'D':
+ printf("\t\t\tDi-Nucleotide frequencies Program\n");
+ break;
+ case 'A':
+ printf("\t\t\tAmino Acid Usage Calculating Program\n");
+ break;
+ case 'N':
+ printf("\t\tDNA 2 AA translating Program\n");
+ break;
+ case 'O':
+ printf("\tCodon usage tabulation Program\n");
+ break;
+ case 'G':
+ printf("\tTotal Codon usage tabulation\n");
+ break;
+ default:
+ {
+ fprintf(stderr, "Sorry:- could not recognise BULK option"
+ " -%c (Use -h for help)", pm->bulk);
+ my_exit(99,"bad option commandline");
+ }
+ }
+ }
+ else if (pm->prog)
+ switch (pm->prog) {
+ case 's':
+ printf("\t\t\tSilent base G+C+A+T Calculating Program\n");
+ break;
+ case 'e':
+ printf("\t\tNc Calculating Program\n");
+ break;
+ case 'f':
+ printf("\t\tFop Calculating Program\n");
+ break;
+ case 'c':
+ printf("\t\t\tCAI Calculating Program\n");
+ break;
+ case '3':
+ printf("\t\t\tGC3s Calculating Program\n");
+ break;
+ case 'g':
+ printf("\t\t\tG+C Calculating Program\n");
+ break;
+ case 'i':
+ printf("\t\t\tCodon Bias Index Calculating Program\n");
+ break;
+ default:
+ {
+ fprintf(stderr, "Sorry:- could not recognise ");
+ fprintf(stderr, "argument -%c (Use -h for help)",
+ pm->prog);
+ my_exit(99,"commline");
+ } /* matches default */
+ }
+ else
+ fprintf( stderr, "unknown error type in commline.c" );
+ } /* matched if (!pm->codonW) */
+
+
+/* By this point we should have processed all the command line arguments */
+/* so now we test for any remaining, these are unrecognised */
+
+ while (p = garg(0, NULL, "-", GARG_THERE))
+ if ( pm->menu )
+ printf("Unrecognised argument %s\n", p);
+ else {
+ /* if we are running without a menu then abort this run */
+ sprintf ( pm->junk,"Unrecognised argument %s", p);
+ my_exit ( 99 , pm->junk);
+ }
+
+/* Anything remaining should be file names */
+/* The first name should be the input file name */
+
+ if ( p = garg(0, NULL, "", GARG_THERE)) {
+ if ( (pm->inputfile = open_file( "", p, "r", FALSE)) == NULL ) {
+ printf("Could not open input file - %s\n", p );
+ my_exit(1,"failed to open file in proc_commline");
+ } else
+ strncpy(pm->curr_infilename, pm->junk, MAX_FILENAME_LEN - 1);
+ }
+/* The second should be the output filename */
+ if ( p = garg(0, NULL, "", GARG_THERE)) {
+ if ( (pm->outputfile = open_file( "", p, "w",
+ (int) pm->verbose)) == NULL ) {
+ printf("Could not open output file - %s\n", p );
+ my_exit(1,"commline out file");
+ } else
+ strncpy(pm->curr_outfilename, pm->junk, MAX_FILENAME_LEN - 1);
+ }
+
+/* The third which only occurs if the programme is running as CodonW */
+
+ if ( pm->codonW && (p = garg(0, NULL, "", GARG_THERE)) ) {
+ if ( (pm->tidyoutfile = open_file( "", p, "w",
+ (int) pm->verbose)) == NULL ) {
+ printf("Could not open blkoutput file - %s\n", p );
+ my_exit(1,"commline blk outfile");
+ } else
+ strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1);
+ }
+
+/* Now check the command line is empty ... it should be at this point */
+ while (p = garg(0, NULL, "", GARG_THERE))
+ printf("This command line parameter was not recognised %s\n", p);
+
+/* IF no file name was found on the command line and the programme is */
+/* impersonating another programme or we decided not to use the menu */
+/* we need to load an input file name */
+
+ if ( (!pm->codonW || !pm->menu) && !pm->inputfile ){
+ if ( (pm->inputfile = open_file( "input filename", "input.dat",
+ "r", FALSE)) == NULL ) {
+ printf("Could not open input file - %s\n", p );
+ my_exit(1,"commline inputfile");
+ }
+ strncpy(pm->curr_infilename, pm->junk, MAX_FILENAME_LEN - 1);
+ }
+
+/* If we have an input filename but no output then we must prompt for the */
+/* output filename */
+
+ if ( pm->inputfile && !pm->outputfile ) {
+ /* If we are trying to impersonate another programme use this method*/
+ /* but make sure that we know what this other programme is called */
+ if ( !pm->codonW && strlen (prog_name) ){
+ strcpy(pm->curr_outfilename, prog_name);
+ strcat(pm->curr_outfilename, ".def");
+ } else {
+
+ /* Use the input filename as a root filename */
+ strncpy(root, pm->curr_infilename, MAX_FILENAME_LEN - 5);
+ for (n = (int) strlen(root); n && root[n]!='.' ; --n);
+ if ( n ) root[n] = '\0'; /* find root of filename */
+
+ strcpy(pm->curr_outfilename, root);
+ strcat(pm->curr_outfilename, ".out");
+ } /* matchs else */
+
+ /* now we know the suggested name for the output file lets open it */
+ if ( pm->verbose ) {
+ if ( (pm->outputfile = open_file( "indices output filename",
+ pm->curr_outfilename, "w",(int) pm->verbose)) == NULL )
+ my_exit(1,"commline");
+ strncpy(pm->curr_outfilename, pm->junk, MAX_FILENAME_LEN - 1);
+ }else{
+ if ( (pm->outputfile = open_file( "",
+ pm->curr_outfilename, "w",(int) pm->verbose)) == NULL )
+ my_exit(1,"commline");
+ strncpy(pm->curr_outfilename, pm->junk, MAX_FILENAME_LEN - 1);
+ }
+
+ } /* match if ( pm->inputfile */
+
+
+
+ /* we had a commandline inputfile name and output filename but none */
+ /* for bulkoutput .. we prompt to save having to use menu 1 */
+ if ( pm->inputfile && ! pm->tidyoutfile ){
+ if ( pm->codonW ) {
+ /* Use the input filename as a root filename */
+ strncpy(root, pm->curr_infilename , MAX_FILENAME_LEN - 5);
+
+ for (n = (int) strlen(root); n && root[n]!='.' ; --n);
+ if ( n ) root[n] = '\0'; /* find root of filename */
+
+ strcpy(pm->curr_tidyoutname, root);
+ strcat(pm->curr_tidyoutname, ".blk");
+
+ /* now we know the suggested name for the output file lets open it */
+ if( pm->verbose) {
+ if ( (pm->tidyoutfile = open_file( "bulk output filename",
+ pm->curr_tidyoutname, "w",(int) pm->verbose)) == NULL )
+ my_exit(1,"commline");
+ strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1);
+ }else{
+ if ( (pm->tidyoutfile = open_file( "",
+ pm->curr_tidyoutname, "w",(int) pm->verbose)) == NULL )
+ my_exit(1,"commline");
+ strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1);
+ }
+ }else{
+ /* only use one output file when impersonating other programmes */
+ /* just in case we make blkout and output the same file */
+ pm->tidyoutfile = pm->outputfile;
+ }
+ }
+return 1;
+}
+/****************** Garg ***********************************************/
+/* This subroutine strips of the commandline arguments and passes them back*/
+/* to the calling function. Each time it is called with argc and argv non */
+/* null the commandline is refreshed. If called with these are null args */
+/* a commandline pre-stored is used, this commandline is striped arg by arg*/
+/* as they are identified */
+/* This subroutine was developed as a collaboration with Colin McFarlane */
+/* GARG_EXACT The argument must match targ exactly */
+/* GARG_THERE The targ may be sub-string of the argument */
+/* GARG_SUBSQ The string immediate after targ is returned */
+/* GARG_NEXT The next argument after targ is returned */
+/* else return NULL */
+/***************************************************************************/
+char *garg(int argc, char *argv[], const char *targ, int mode)
+{
+ static char *argw[MAX_ARGS];
+ static int done[MAX_ARGS];
+ static int argn;
+
+ int arg = 1, nc;
+
+ if (argv) {
+ if (--argc < 1)
+ return NULL;
+ for (argn = 0; argn < argc; argn++) {
+ argw[argn] = argv[argn + 1];
+ done[argn] = 0;
+ }
+ }
+ nc = mode & GARG_EXACT ? BUFSIZ : strlen(targ);
+
+ for (arg = 0; arg < argn; arg++)
+ if ((0 == strncmp(targ, argw[arg], nc)) && !done[arg]) {
+ done[arg] = 1;
+ if (mode & GARG_THERE)
+ return argw[arg];
+ if (mode & GARG_SUBSQ)
+ return &argw[arg][nc];
+ if (mode & GARG_NEXT) {
+ done[++arg < argn ? arg : --arg] = 1;
+ return argw[arg];
+ }
+ return argw[arg];
+ }
+ return NULL;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/coresp.c b/coresp.c
new file mode 100755
index 0000000..82d7cbd
--- /dev/null
+++ b/coresp.c
@@ -0,0 +1,1673 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package */
+/* Copyright (C) 2005 John F. Peden */
+/* This program is free software; you can redistribute */
+/* it and/or modify it under the terms of the GNU General Public License */
+/* as published by the Free Software Foundation; version 2 of the */
+/* License, */
+/* */
+/* This program is distributed in the hope that it will be useful, but */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA. */
+/* */
+/* */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _) */
+/* */
+/* For the latest version and information see */
+/* http://codonw.sourceforge.net */
+/**************************************************************************/
+/* This file contains source code for */
+/* the core functions involved in correspondence */
+/* analysis, this code was originally written */
+/* by Jean Thioulouse */
+/* ADE software: multivariate analysis and graphical */
+/* display of environmental data */
+/* IN Guariso,G and Rizzoli, A (eds), */
+/* Software per l'Ambiente. Patron editor, Bolonia, pp.57-62. */
+/* */
+/* and is used with kind permission */
+/* */
+/* It has however been extensively modified to integrate it */
+/* as seamlessly as practical into CodonW and as such can no */
+/* longer be considered as a stand alone package */
+/* */
+/* Originally written as a general Multivariate analysis (MVA) */
+/* package, it is now hardwired specifically for codon or amino */
+/* acid usage analysis */
+/* */
+/* All unnecessary functions have been removed */
+/* Originally each data file had an associated resource file */
+/* which described required parameters */
+/* The need for these files has been removed */
+/* */
+/**************************************************************************/
+/* Functions */
+/* textbin converts codon usage to binary data file */
+/* */
+/**************************************************************************/
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include "codonW.h"
+
+
+/*************** textbin *****************************************/
+/* examines the struct pcoa to see which codons/amino acids are to be inc */
+/* in the analysis. It then writes this data to a binary file */
+/* it also counts the amino acid and codon usage of each gene */
+/**************************************************************************/
+
+void textbin(char *fileread, char *fileout)
+{
+ double *vlec;
+ int v2;
+ int i,j,x;
+
+ pcoa->colm=0;
+
+ if ( pm->coa=='a' ) {
+ for ( i=1; i<22;i++)
+ if ( pcoa->amino[i] ) pcoa->colm++; /* number of colms in analysis */
+ }else {
+ for ( i=1; i<65;i++)
+ if ( pcoa->codons[i]) pcoa->colm++; /* number of colms in analysis */
+ }
+
+ vecalloc(&vlec, pcoa->colm); /* allocate an array */
+
+ /* open output files */
+ if ( (pm->fcoa_in = open_file( "", fileread, "r", FALSE)) == NULL ) {
+ fprintf(pm->my_err,"(txt2bin)");
+ my_exit(1,"txt2bin");
+ }
+ if ( (pm->fcoa_out = open_file( "",fileout, "wb", FALSE)) == NULL ) {
+ fprintf(pm->my_err,"(txt2bin)");
+ my_exit(6,"fileout");
+ }
+
+ for (i=1;i<=pcoa->rows;i++) { /* pcoa-rows is the No of genes */
+ fscanf(pm->fcoa_in,"%s",pm->junk);
+
+ /* read the data from coa_raw into the array vlec */
+ switch (pm->coa){
+ case 'a':
+ for (j=1,x=1;j<21;j++) {
+ fscanf(pm->fcoa_in,"%i",&v2);
+ if ( pcoa->amino[j] )
+ vlec[x++] = (double) v2;
+ }
+ fscanf(pm->fcoa_in,"%i\n",&v2);
+ if ( pcoa->amino[j] )
+ vlec[pcoa->colm] = (double) v2;
+ if ( x != pcoa->colm ) my_exit (99,"Fatal Error in txt2bin");
+ break;
+ case 'c':
+ for (j=1,x=1;j<64;j++) {
+ fscanf(pm->fcoa_in,"%i",&v2);
+ if( pcoa->codons[j] )
+ vlec[x++] = (double) v2;
+ }
+ fscanf(pm->fcoa_in,"%i\n",&v2);
+ if(pcoa->codons[j] )
+ vlec[pcoa->colm] = (double) v2;
+ if ( x != pcoa->colm ) my_exit (99,"Fatal Error in txt2bin");
+ break;
+ case 'r':
+ clean_up ( ncod , naa );
+ for (j=1,x=1;j<64;j++) {
+ fscanf(pm->fcoa_in,"%i",&v2);
+ naa[pcu->ca[j]]+=v2; /* count amino acids */
+ ncod[j] =v2; /* count codons */
+ }
+ fscanf(pm->fcoa_in,"%i\n",&v2); /* read last codon */
+ naa[pcu->ca[j]]+=v2;
+ ncod[j] =v2;
+
+ for (j=1,x=0;j<=64;j++) {
+ if(pcoa->codons[j] ) {
+ ++x;
+ vlec[x] = (double) ((naa[pcu->ca[j]])?
+ (float) ncod[j]/naa[pcu->ca[j]]*(float)( *(ds+j) ):
+ 0.00);
+ }
+ }
+ break;
+
+#ifdef DEBUG
+ default:
+ fprintf(pm->my_err,"error in textbin %c unknown \n",pm->coa );
+ break;
+#endif
+ } /* end if */
+ writevec(vlec, pm->fcoa_out);
+ }
+ /* close files and release memory and return */
+ fileclose(&pm->fcoa_in);
+ fileclose(&pm->fcoa_out);
+ free (vlec);
+}
+
+/*************** colmout *****************************************/
+/* The user has already decided how many axis to be recorded to file */
+/* this value is stored in pcoa->axis. After the analysis is complete the */
+/* output data is stored in several binary formatted file. In this case */
+/* nfice and nfics points at the file names. */
+/* For each axis that has been requested to be recorded, the position */
+/* of each column (either amino or codon ) is read from the binary file */
+/* and converted into an easily read text file, which is pointed */
+/* at by nfics and the summary file pointed at by summary. */
+/**************************************************************************/
+void colmout(char *nfice, char *nfics,AMINO_STRUCT *ppaa, FILE *summary)
+{
+ double *vlec;
+ int col, lig=0;
+ FILE *fice=NULL, *fics=NULL;
+ float v2;
+ int x,i,j;
+ char sp=pm->seperator;
+
+ lig=pcoa->colm;
+
+ col=pcoa->axis; /* number of axis */
+
+ vecalloc(&vlec, col);
+
+if( (fice=open_file("",nfice,"rb",FALSE))==NULL) my_exit(6,"nfice2");
+if( (fics=open_file("",nfics, "w",FALSE))==NULL) my_exit(1,"nfics2");
+
+fprintf(summary,"\n\nThe position of each %s by axis \n"
+ "also see %s for seperate output\n",
+ (pm->coa=='a')? "amino acid":"codon",nfics);
+
+fprintf(fics , "%s","label");
+fprintf(summary, "%-20.20s","label");
+
+
+for (j=1;j<=col;j++) {
+ fprintf(fics , "%c%s%d",sp,"Axis",j);
+ fprintf(summary, "%c%9s%d",sp, "Axis",j);
+}
+fprintf(fics , "\n");
+fprintf(summary, "\n");
+
+
+i=0;
+x=1;
+ while( x<=lig ) {
+ /* only write out for the columns analysed */
+ if( pm->coa == 'a' ) {
+
+ while ( !pcoa->amino[++i] ); /* skip amino if false */
+
+ fprintf(fics , "%s%c",ppaa->aa3[i],sp );
+ fprintf(summary, "%-20.20s%c",ppaa->aa3[i],sp );
+ x++;
+ }else{
+
+ while ( !pcoa->codons[++i] ); /* skip codon if false */
+
+ fprintf(fics , "%s%c",ppaa->cod[i],sp);
+ fprintf(summary , "%-20.20s%c",ppaa->cod[i],sp);
+ x++;
+ }
+ readvec(vlec, fice);
+ for (j=1;j<col;j++) {
+ v2 = (float) vlec[j];
+ fprintf(fics , "%f%c", v2,sp);
+ fprintf(summary, "%10.5f%c", v2,sp);
+ }
+ v2 = (float) vlec[col];
+ fprintf(fics , "%f\n", v2);
+ fprintf(summary, "%10.5f\n", v2);
+ }
+ fileclose(&fics);
+ fileclose(&fice);
+ free(vlec);
+}
+/*************** rowout *****************************************/
+/* The position of each gene on each of the principle axis as given by */
+/* pcoa->axis is converted from a binary text file to an ASCII file as */
+/* well as the summary file */
+/**************************************************************************/
+void rowout(char *nfice, char *nfics, char *ncout, FILE *summary)
+{
+ double *vlec, *ax1;
+ int col, lig,*sortax1;
+ FILE *fice=NULL, *fics=NULL, *fnam=NULL;
+ float v2;
+ int i,j;
+ char sp=pm->seperator;
+
+ lig=pcoa->rows;
+ col=pcoa->axis;
+
+ vecalloc(&vlec, col);
+ vecalloc(&ax1 , lig);
+ if( (sortax1= (int *) calloc(lig+1,sizeof(int)))==NULL)
+ my_exit(3,"sortax1");
+
+ if( (fice=open_file("",nfice,"rb",FALSE))==NULL) my_exit(6,"nfice3");
+ if( (fics=open_file("",nfics, "w",FALSE))==NULL) my_exit(1,"nfics3");
+ if( (fnam=open_file("",ncout, "r",FALSE))==NULL) my_exit(6,"ncout3");
+
+ fprintf(summary,"\n\nThe position of each gene by axis \n"
+ "(see also %s)\n",nfics);
+
+ fprintf(fics , "%s%c","label",sp);
+ fprintf(summary, "%-20.20s%c","label",sp);
+
+ for (j=1;j<=col;j++) {
+ fprintf(fics , "%s%d%c","Axis",j,sp);
+ fprintf(summary, "%9s%d%c", "Axis",j,sp);
+ }
+ fprintf(fics , "\n");
+ fprintf(summary, "\n");
+
+ for (i=1;i<=lig;i++) {
+
+ fgets(pm->junk,BUFSIZ,fnam);
+ pm->junk[35]='\0';
+ for ( j=35 ; j>=0; j--)
+ if ( isspace( (int) pm->junk[j]) ) pm->junk[j]='\0';
+
+ fprintf(fics , "%s%c",pm->junk,sp);
+ fprintf(summary, "%-20.20s%c",pm->junk,sp);
+
+ readvec(vlec, fice);
+ for (j=1;j<col;j++) {
+
+ if (j==1) ax1[i]=vlec[j]; /* first factors */
+
+ v2 = (float) vlec[j];
+ fprintf(fics , "%f%c", v2,sp);
+ fprintf(summary, "%10.5f%c", v2,sp);
+ }
+ v2 = (float) vlec[col];
+ fprintf(fics , "%f\n", v2);
+ fprintf(summary , "%10.5f\n", v2);
+ }
+
+ if ( pm->coa != 'a' ) {
+ sorted_by_axis1 ( ax1, sortax1, lig);
+ gen_cusort_fop ( sortax1, lig, fnam, summary );
+ }
+ fileclose(&fics);
+ fileclose(&fice);
+ fileclose(&fnam);
+ free(ax1);
+ free(sortax1);
+ free(vlec);
+}
+
+/************** vecalloc *****************************************/
+/* Allocate memory for a vector of size n and assign that memory to the */
+/* pointer to a pointer vac */
+/**************************************************************************/
+void vecalloc (double **vec, int n)
+{
+ if ( (*vec = (double *) calloc(n+1, sizeof(double))) != NULL) {
+ **vec = n;
+ return;
+ } else
+ my_exit(3,"vecalloc");
+}
+
+/************** writevec *****************************************/
+/* Write out the value of the vector v1 to a binary file fic */
+/**************************************************************************/
+void writevec(double *v1, FILE *fic)
+{
+ float v2;
+ int i, c1;
+
+ c1 = (int) v1[0]; /* Num of vectors */
+
+ for (i=1;i<=c1;i++) {
+ v2 = (float) v1[i];
+ if ( fwrite((const char *)&v2, 4, 1, fic) != 1)
+ my_exit(4,"writevec");
+ }
+}
+
+/************** PrepAFC *****************************************/
+/* Calculated Distance matrix for values in contingency table */
+/* Values are first scaled by n (where n is the total usage of a row or */
+/* column */
+/**************************************************************************/
+
+void PrepAFC(char *nfic)
+{
+ char bid[17];
+ int i, j;
+ double **w;
+ double *poili, *poico;
+ double a1, a2, x1, n;
+
+/*-------------------------------------------------------------------------*/
+
+ vecalloc(&poili, pcoa->rows);
+ vecalloc(&poico, pcoa->colm);
+ taballoc(&w, pcoa->rows, pcoa->colm);
+
+ lecmat(w, nfic);
+
+ n = 0;
+ for (i=1;i<=pcoa->rows;i++) {
+ a1 = 0.0;
+ a2 = 0.0;
+ for (j=1;j<=pcoa->colm;j++) {
+ x1 = w[i][j];
+ a1 = a1 + x1;
+ poico[j] = poico[j] + x1;
+ }
+ n = n + a1;
+ poili[i] = a1;
+ }
+/* scale the vectors, and matrix */
+ scalvec(poili, 1.0/n);
+ scalvec(poico, 1.0/n);
+ scalmat(w, 1.0/n);
+ strcpy(bid,"cbfcpl");
+ ecrvec(poili, bid);
+ strcpy(bid,"cbfcpc");
+ ecrvec(poico, bid);
+
+/*-------------------------------------------------------------------------*/
+
+ for (i=1;i<=pcoa->rows;i++) {
+ a1 = poili[i];
+ if (a1 != 0.0) {
+ for (j=1;j<=pcoa->colm;j++) {
+ a2 = poico[j];
+ if (a2 != 0) w[i][j] = w[i][j] / a1 / a2 - 1;
+ }
+ }
+ }
+ strcpy(bid,"cbfcta");
+ ecrmat(w, bid);
+
+/*-------------------------------------------------------------------------*/
+ freetab(w);
+ freevec(poili);
+ freevec(poico);
+ pcoa->inertia = (float) inertot ();
+}
+
+/************** inertot ********************************************/
+/* Calculate total data inertia */
+/***************************************************************************/
+
+double inertot ( void )
+{
+ int i, j;
+ double **tab;
+ double *pl, *pc;
+ double a1, s1, inertia;
+ taballoc (&tab, pcoa->rows, pcoa->colm);
+ vecalloc (&pc, pcoa->colm);
+ vecalloc (&pl, pcoa->rows);
+
+ lecmat (tab,"cbfcta");
+ lecvec(pl, "cbfcpl");
+ lecvec(pc, "cbfcpc");
+ inertia = 0;
+ for (i=1;i<=pcoa->rows;i++) {
+ a1 = pl[i];
+ for (j=1;j<=pcoa->colm;j++) {
+ s1 = tab[i][j];
+ inertia = inertia + s1 * s1 * a1 * pc[j];
+ }
+ }
+ freetab(tab);
+ freevec(pl);
+ freevec(pc);
+
+ return inertia;
+}
+
+/************** lecmat *****************************************/
+/* Opens binary file nfic, reads the values it contains and records them */
+/* in the matrix pointed to by tab */
+/**************************************************************************/
+void lecmat (double **tab, char *nfic)
+{
+ int i, j, l1, c1;
+ float v2;
+ FILE *fic=NULL;
+
+ l1 = (int) tab[0][0];
+ c1 = (int) tab[1][0];
+
+ if( (fic=open_file("",nfic,"rb",FALSE))==NULL) my_exit(1,"lecmat");
+
+
+ for (i=1;i<=l1;i++) {
+ for (j=1;j<=c1;j++) {
+ if ( fread((char *)&v2, 4, 1, fic) != 1) {
+ fprintf(pm->my_err,"Error: can't read matrix (lecmat)");
+ my_exit(5,"lecmat");
+ }
+ tab[i][j] = v2;
+ }
+ }
+ fileclose(&fic);
+}
+
+/************** freetab *****************************************/
+/* Releases memory dynamically allocated to a table tab(x,y) */
+/**************************************************************************/
+void freetab (double **tab)
+{
+ int i, n;
+ n = (int) *(*(tab)); /* number of rows in table */
+ for (i=0;i<=n;i++) {
+ free((char *) *(tab+i) );
+ }
+ free((char *) tab);
+}
+
+/************** freevec *****************************************/
+/* Releases memory dynamically allocated to a vector */
+/**************************************************************************/
+void freevec (double *vec)
+{
+ free((char *) vec);
+}
+
+/************** taballoc *****************************************/
+/* Dynamically allocates memory to the table tab(l1,c1) */
+/**************************************************************************/
+void taballoc (double ***tab, int l1, int c1)
+{
+ int i;
+
+ if ( (*tab = (double **) calloc(l1+1, sizeof(double *))) != NULL) {
+ for (i=0;i<=l1;i++) {
+ if ( (*(*tab+i)=(double *) calloc(c1+1, sizeof(double))) == NULL ) {
+ fprintf(pm->my_err,"(taballoc)");
+ my_exit(3,"taballoc");
+ }
+ }
+ } else {fprintf(pm->my_err,"(taballoc)");
+ my_exit(3,"taballoc2");
+ }
+
+ **(*tab) = l1;
+ **(*tab+1) = c1;
+}
+
+/************** lecvec *****************************************/
+/* Reads vectors from filename *nfic and assigns them to a vector */
+/**************************************************************************/
+void lecvec (double *v1, char *nfic)
+{
+ float v2;
+ int i, c1;
+ FILE *fic=NULL;
+
+ if( (fic=open_file("",nfic,"rb",FALSE))==NULL) my_exit(6,"lecvec");
+
+ c1 = (int) v1[0];
+ for (i=1;i<=c1;i++) {
+ if ( fread((char *)&v2, 4, 1, fic) != 1){
+ fprintf(pm->my_err,"(lecvec)");
+ my_exit(5,"lecvec");
+ }
+ v1[i] = v2;
+ }
+ fileclose(&fic);
+}
+
+/************** ecrmat ******************************************/
+/* Writes the table pointed to by **tab to the binary filename *nfic */
+/**************************************************************************/
+void ecrmat (double **tab, char *nfic)
+{
+ int i, j, l1, c1;
+ float v2;
+ FILE *fic=NULL;
+
+ l1 = (int)tab[0][0];
+ c1 = (int)tab[1][0];
+
+ if( (fic=open_file("",nfic,"wb",FALSE))==NULL) my_exit(1,"ecrmat");
+
+ for (i=1;i<=l1;i++) {
+ for (j=1;j<=c1;j++) {
+ v2 = (float)tab[i][j];
+ if ( fwrite((const char *)&v2, 4, 1, fic) != 1) {
+ fprintf(pm->my_err,"(ecrmat)");
+ my_exit(4,"ecrmat");
+ }
+ }
+ }
+
+ fileclose(&fic);
+}
+/************** ecrvec ******************************************/
+/* Writes the pointer pointed to by *v1 to the binary file *nfic */
+/**************************************************************************/
+void ecrvec (double *v1, char *nfic)
+{
+ float v2;
+ int i, c1;
+ FILE *fic=NULL;
+
+ c1 = (int)v1[0];
+
+ if( (fic=open_file("",nfic,"wb",FALSE))==NULL) my_exit(1,"ecrvec");
+
+
+ for (i=1;i<=c1;i++) {
+ v2 = (float)v1[i];
+ if ( fwrite((const char *)&v2, 4, 1, fic) != 1){
+ fprintf(pm->my_err,"(ecrvec)");
+ my_exit(4,"ecrvec");
+ }
+ }
+
+ fileclose(&fic);
+}
+
+/************** scalmat ******************************************/
+/* Scale the matrix pointed to by **tab by r */
+/**************************************************************************/
+void scalmat (double **tab, double r)
+{
+ int l1, c1, i, j;
+
+ l1 = (int) tab[0][0];
+ c1 = (int) tab[1][0];
+ for (i=1;i<=l1;i++) {
+ for (j=1;j<=c1;j++) {
+ tab[i][j] = tab[i][j] * r;
+ }
+ }
+}
+
+/************** scalvec ******************************************/
+/* Scale the vector pointed to by *v1 by r */
+/**************************************************************************/
+void scalvec (double *v1, double r)
+{
+ int i, c1;
+
+ c1 = (int) v1[0];
+
+ for (i=1;i<=c1;i++) {
+ v1[i] = v1[i] * r;
+ }
+}
+
+/************** DiagoRC ******************************************/
+/* This function generates/calculates the correspondence analysis factors */
+/**************************************************************************/
+void DiagoRC ( FILE *summary)
+{
+ int lcmin, rang, f1, i, j, k;
+ double **w, **ctab, **auxi, **vp1, **vp2;
+ double *poili, *poico, *l;
+ double s, s1, a1, inertotal;
+
+
+ lcmin = pcoa->colm;
+ if (pcoa->rows < pcoa->colm) lcmin = pcoa->rows;
+ taballoc(&w, pcoa->rows, pcoa->colm);
+ taballoc(&ctab, lcmin, lcmin);
+ taballoc(&auxi, lcmin, 2);
+ vecalloc(&poili, pcoa->rows);
+ vecalloc(&poico, pcoa->colm);
+ vecalloc(&l, lcmin);
+
+ lecvec(poili, "cbfcpl");
+ sqrvec(poili);
+ lecvec(poico, "cbfcpc");
+ sqrvec(poico);
+ lecmat(w, "cbfcta");
+
+ inertotal=0;
+ for (i=1;i<=pcoa->rows;i++) {
+ a1 = poili[i];
+ for (j=1;j<=pcoa->colm;j++) {
+ s1 = w[i][j] * a1 * poico[j];
+ w[i][j] = s1;
+ s1 = s1 * s1;
+ inertotal = inertotal + s1;
+ }
+ }
+
+ fprintf(summary,"The total inertia of the data was %f\n",inertotal);
+ fprintf(summary, "\nExplanation of the variation by axis "
+ "(see also eigen.coa)\n");
+
+
+/* prodmatAAtB and prodmatAtAB calc product of the scaled distance matrix */
+/* DiagoComp diagnolises the product matrix ctab */
+/* editvalpro output the eigen values */
+
+dot(1,10);
+ if (pcoa->rows < pcoa->colm) {
+
+ prodmatAAtB(w, ctab);
+ DiagoComp(pcoa->rows, ctab, l, &rang);
+ f1=pcoa->axis;
+ editvalpro(summary, l, pcoa->rows, inertotal);
+ for (j=1;j<=pcoa->rows;j++) {
+ auxi[j][1] = l[j];
+ auxi[j][2] = l[j]/inertotal;
+ }
+ sqrvec(l);
+ } else {
+ prodmatAtAB(w, ctab);
+ DiagoComp(pcoa->colm, ctab, l, &rang);
+ f1=pcoa->axis;
+ editvalpro(summary, l, pcoa->colm, inertotal);
+ for (j=1;j<=pcoa->colm;j++) {
+ auxi[j][1] = l[j];
+ auxi[j][2] = l[j]/inertotal;
+ }
+ sqrvec(l);
+ }
+
+ if (f1==0) {
+ if (lcmin == 1) f1 = 1;
+ else f1 = 2;
+ }
+
+ /* output the relative inertia values */
+ ecrmat(auxi, "cbfcvp");
+
+ /* Calculate the factorial coordinates */
+
+ if (pcoa->rows < pcoa->colm) {
+ taballoc(&vp2, pcoa->colm, f1);
+ for (j=1;j<=pcoa->colm;j++) {
+ for (k=1;k<=f1;k++) {
+ s = 0;
+ for (i=1;i<=pcoa->rows;i++) {
+ s = s + w[i][j] * ctab[i][k];
+ }
+ vp2[j][k] = s;
+ }
+ }
+ for (i=1;i<=pcoa->colm;i++) {
+ if (poico[i] != 0) {
+ for (j=1;j<=f1;j++) {
+ vp2[i][j] = vp2[i][j] / poico[i];
+ }
+ }
+ }
+ for (i=1;i<=pcoa->rows;i++) {
+ if (poili[i] != 0) {
+ for (j=1;j<=pcoa->rows;j++) {
+ ctab[i][j] = ctab[i][j] * l[j] / poili[i];
+ }
+ }
+ }
+ ecrmatred(ctab, f1, "cbfcli");
+ ecrmatred(vp2, f1, "cbfcco");
+ freetab(vp2);
+ } else {
+ taballoc(&vp1, pcoa->colm, f1);
+ taballoc(&vp2, pcoa->rows, f1);
+ for (i=1;i<=pcoa->colm;i++) {
+ for (j=1;j<=f1;j++) {
+ vp1[i][j] = ctab[i][j];
+ }
+ }
+ prodmatABC(w, vp1, vp2);
+ for (i=1;i<=pcoa->rows;i++) {
+ if (poili[i] != 0.0) {
+ for (j=1;j<=f1;j++) {
+ vp2[i][j] = vp2[i][j] / poili[i];
+ }
+ }
+ }
+ for (i=1;i<=pcoa->colm;i++) {
+ if (poico[i] != 0) {
+ for (j=1;j<=rang;j++) {
+ ctab[i][j] = ctab[i][j] * l[j] / poico[i];
+ }
+ }
+ }
+ ecrmat(vp2, "cbfcli");
+ ecrmatred(ctab, f1, "cbfcco");
+ freetab(vp1);
+ freetab(vp2);
+ }
+
+ goto fin;
+
+/* free memory */
+
+fin:
+ freetab(w);
+ freetab(ctab);
+ freetab(auxi);
+ freevec(poili);
+ freevec(poico);
+ freevec(l);
+
+} /* End of DiagoRC */
+
+/************** sqrvec ******************************************/
+/* This function calculates the square root of a vector */
+/**************************************************************************/
+void sqrvec (double *v1)
+{
+ int i, c1;
+ double v2;
+
+ c1 = (int) v1[0];
+
+ for (i=1;i<=c1;i++) {
+ v2 = v1[i];
+ if (v2 < 0.0) {
+ fprintf(pm->my_err,"Error: Square root of negative number (sqrvec)");
+ my_exit(99,"sqrvec");
+ }
+ v2 = sqrt(v2);
+ v1[i] = v2;
+ }
+}
+
+/************** prodmatAAtB ***************************************/
+/* Calculate the product of matrix a*a and return it as matrix b */
+/**************************************************************************/
+void prodmatAAtB (double **a, double **b)
+{
+ int j, k, i, lig, col;
+ double s;
+
+ lig = (int) a[0][0];
+ col = (int) a[1][0];
+
+ for (j=1;j<=lig;j++) {
+ dot ( 1 , 10 );
+ for (k=j;k<=lig;k++) {
+ s = 0;
+ for (i=1;i<=col;i++) {
+ s = s + a[j][i] * a[k][i];
+ }
+ b[j][k] = s;
+ b[k][j] = s;
+ }
+ }
+}
+
+/************** prodmatABC ***************************************/
+/* Calculate the product of matrix a*b and return it as matrix c */
+/**************************************************************************/
+void prodmatABC (double **a, double **b, double **c)
+{
+ int j, k, i, lig, col, col2;
+ double s;
+
+ lig = (int) a[0][0];
+ col = (int) a[1][0];
+
+ col2 = (int) b[1][0];
+
+ for (i=1;i<=lig;i++) {
+ dot(1,10);
+ for (k=1;k<=col2;k++) {
+ s = 0;
+ for (j=1;j<=col;j++) {
+ s = s + a[i][j] * b[j][k];
+ }
+ c[i][k] = s;
+ }
+ }
+}
+
+/************** prodmatAtAB ***************************************/
+/* Calculate the product of matrix a*A and return it as matrix b */
+/**************************************************************************/
+void prodmatAtAB (double **a, double **b)
+{
+ int j, k, i, lig, col;
+ double s;
+
+ lig = (int) a[0][0];
+ col = (int) a[1][0];
+
+ for (j=1;j<=col;j++) {
+ dot(1,100);
+ for (k=j;k<=col;k++) {
+ s = 0;
+ for (i=1;i<=lig;i++) {
+ s = s + a[i][k] * a[i][j];
+ }
+ b[j][k] = s;
+ b[k][j] = s;
+ }
+ }
+}
+
+/************** editvalpro ***************************************/
+/* Calculate eigenvalues, relative inertia and Sum of inertia for each */
+/* factor and record this to eigen.coa and summary.coa */
+/**************************************************************************/
+void editvalpro (FILE *ficlist, double *vp, int n, double s)
+{
+ double sc1, sc2;
+ int i, n1;
+ float v2, v3, v4;
+ FILE *eigen=NULL;
+ char sp;
+
+ sp=pm->seperator;
+
+ if ( (eigen=open_file("","eigen.coa","w",FALSE))==NULL )
+ my_exit(1,"editvalpro");
+
+
+ sc1 = 0.0;
+ for (i=1;i<=n;i++) {
+ if (vp[i] < 0.0) {
+ v2 = (float) vp[i];
+ fprintf(ficlist, "Eigenvalue number %d is negative : %+.4E\n", i, v2);
+ vp[i] = 0.0;
+ }
+ }
+ n1 = (n > 40) ? 40 : n;
+ fprintf(ficlist, "Num. Eigenval. R.Iner. R.Sum "
+ "|Num. Eigenval. R.Iner. R.Sum |");
+ fprintf(ficlist, "\n");
+ for (i=1;i<=n1;i=i+2) {
+ sc1 = sc1 + vp[i];
+ if (i < n1) {
+ sc2 = sc1 + vp[i+1];
+ v2 = (float) vp[i];
+ v3 = (float)vp[i]/(float)s;
+ v4 = (float)sc1/(float)s;
+ fprintf(ficlist, "%.2d %+.4E %+.4f %+.4f ", i, v2, v3, v4);
+ fprintf(eigen ,"%.2d%c%.4E%c%.4f%c%.4f\n",i,sp,v2,sp,v3,sp,v4);
+ v2 = (float)vp[i+1];
+ v3 = (float)vp[i+1]/(float)s;
+ v4 = (float)sc2/(float)s;
+ fprintf(ficlist, " |%.2d %+.4E %+.4f %+.4f |", i+1, v2, v3, v4);
+ fprintf(eigen ,"%.2d%c%.4E%c%.4f%c%.4f\n",i+1,sp,v2,sp,v3,sp,v4);
+ } else {
+ v2 = (float)vp[i];
+ v3 = (float)vp[i]/(float)s;
+ v4 = (float)sc1/(float)s;
+ fprintf(ficlist, "%.2d %+.4E %+.4f %+.4f ", i, v2, v3, v4);
+ fprintf(eigen ,"%.2d%c%.4E%c%.4f%c%.4f\n",i,sp,v2,sp,v3,sp,v4);
+ }
+ sc1 = sc2;
+ fprintf(ficlist, "\n");
+ }
+ fprintf(ficlist, "\n");
+fileclose(&eigen);
+}
+
+/************** ecrmatred *****************************************/
+/* Output c1 columns of matrix tab to filename *nfic */
+/**************************************************************************/
+void ecrmatred (double **tab, int c1, char *nfic)
+{
+ int i, j, l1;
+ float v2;
+ FILE *fic=NULL;
+
+ l1 = (int) tab[0][0];
+
+ if( (fic=open_file("",nfic,"wb",FALSE))==NULL) my_exit(1,"ecrmatred");
+
+ for (i=1;i<=l1;i++) {
+ for (j=1;j<=c1;j++) {
+ v2 = (float) tab[i][j];
+ if ( fwrite((const char *)&v2, 4, 1, fic) != 1){
+ fprintf(pm->my_err,"(ecrmatred)");
+ my_exit(4,"ecrmatred");
+ }
+ }
+ }
+
+ fileclose(&fic);
+}
+
+/************** readvec ***************************************/
+/* read vector v1 from filehandle fic */
+/**************************************************************************/
+void readvec (double *v1, FILE *fic)
+{
+ float v2;
+ int i, c1;
+
+ c1 = (int) v1[0];
+
+ for (i=1;i<=c1;i++) {
+ if ( fread((char *)&v2, 4, 1, fic) != 1) {
+ fprintf(pm->my_err,"(readvec)");
+ my_exit(5,"readvec");
+ }
+ v1[i] = v2;
+ }
+}
+
+/************** DiagoComp ***************************************/
+/* Diagnolisation of matrix w */
+/* T. FOUCART Analyse factorielle de tableaux multiples, */
+/* Masson, Paris 1984,185p., p. 62. D'aprhs VPROP et TRIDI, */
+/* de LEBART et coll. */
+/* Lots of nasty goto jumps ... ported from Fortran */
+/*************************************************************************/
+void DiagoComp (int n0, double **w, double *d, int *rang)
+{
+ double *s;
+ double a, b, c, x, xp, q, bp, ab, ep, h, t, u , v;
+ double dble;
+ int ni, i, i2, j, k, jk, ijk, ij, l, ix, m, m1, isnou;
+
+ vecalloc(&s, n0);
+ a = 0.000000001;
+ ni = 100;
+ if (n0 == 1) {
+ d[1] = w[1][1];
+ w[1][1] = 1.0;
+ *rang = 1;
+ freevec (s);
+ return;
+ }
+
+ for (i2=2;i2<=n0;i2++) {
+
+ b=0.0;
+ c=0.0;
+ i=n0-i2+2;
+ k=i-1;
+ if (k < 2) goto Et1;
+ for (l=1;l<=k;l++) {
+ c = c + fabs((double) w[i][l]);
+ }
+ if (c != 0.0) goto Et2;
+
+Et1: s[i] = w[i][k];
+ goto Etc;
+
+Et2: for (l=1;l<=k;l++) {
+ x = w[i][l] / c;
+ w[i][l] = x;
+ b = b + x * x;
+ }
+ xp = w[i][k];
+ ix = 1;
+ if (xp < 0.0) ix = -1;
+
+/* q = -sqrt(b) * ix; */
+ dble = b;
+ dble = -sqrt(dble);
+ q = dble * ix;
+
+ s[i] = c * q;
+ b = b - xp * q;
+ w[i][k] = xp - q;
+ xp = 0;
+ for (m=1;m<=k;m++) {
+ w[m][i] = w[i][m] / b / c;
+ q = 0;
+ for (l=1;l<=m;l++) {
+ q = q + w[m][l] * w[i][l];
+ }
+ m1 = m + 1;
+ if (k < m1) goto Et3;
+ for (l=m1;l<=k;l++) {
+ q = q + w[l][m] * w[i][l];
+ }
+
+Et3: s[m] = q / b;
+ xp = xp + s[m] * w[i][m];
+ }
+ bp = xp * 0.5 / b;
+ for (m=1;m<=k;m++) {
+ xp = w[i][m];
+ q = s[m] - bp * xp;
+ s[m] = q;
+ for (l=1;l<=m;l++) {
+ w[m][l] = w[m][l] - xp * s[l] - q * w[i][l];
+ }
+ }
+ for (l=1;l<=k;l++) {
+ w[i][l] = c * w[i][l];
+ }
+
+Etc: d[i] = b;
+ } /* for (i2=2;i2<n0;i2++) */
+
+ s[1] = 0.0;
+ d[1] = 0.0;
+
+ for (i=1;i<=n0;i++) {
+ dot(1,100);
+ k = i - 1;
+ if (d[i] == 0.0) goto Et4;
+ for (m=1;m<=k;m++) {
+ q = 0.0;
+ for (l=1;l<=k;l++) {
+ q = q + w[i][l] * w[l][m];
+ }
+ for (l=1;l<=k;l++) {
+ w[l][m] = w[l][m] - q * w[l][i];
+ }
+ }
+
+Et4: d[i] = w[i][i];
+ w[i][i] = 1.0;
+ if (k < 1) goto Et5;
+ for (m=1;m<=k;m++) {
+ w[i][m] = 0.0;
+ w[m][i] = 0.0;
+ }
+
+Et5:;
+ }
+
+ for (i=2;i<=n0;i++) {
+ s[i-1] = s[i];
+ }
+ s[n0] = 0.0;
+ for (k=1;k<=n0;k++) {
+ m = 0;
+
+Et6: for (j=k;j<=n0;j++) {
+ dot(1,100);
+ if (j == n0) goto Et7;
+ ab = fabs((double) s[j]);
+ ep = a * (fabs((double) d[j]) + fabs((double) d[j+1]));
+ if (ab < ep) goto Et7;
+ }
+
+Et7: isnou = 1;
+ h = d[k];
+ if (j == k) goto Eta;
+ if (m < ni) goto Etd;
+
+ fprintf(pm->my_err,"Error: can't compute matrix eigenvalues");
+ my_exit(99,"corresp");
+
+Etd: m = m + 1;
+ q = (d[k+1]-h) * 0.5 / s[k];
+
+/* t = sqrt(q * q + 1.0); */
+ dble = q * q + 1.0;
+ dble = sqrt(dble);
+ t = dble;
+
+ if (q < 0.0) isnou = -1;
+ q = d[j] - h + s[k] / (q + t * isnou);
+ u = 1.0;
+ v = 1.0;
+ h = 0.0;
+ jk = j-k;
+ for (ijk=1;ijk<=jk;ijk++) {
+ dot(1,100);
+ i = j - ijk;
+ xp = u * s[i];
+ b = v * s[i];
+ if (fabs((double) xp) < fabs((double) q)) goto Et8;
+ u = xp / q;
+
+/* t = sqrt(u * u + 1); */
+ dble = u * u + 1.0;
+ dble = sqrt(dble);
+ t = dble;
+
+ s[i+1] = q * t;
+ v = 1 / t;
+ u = u * v;
+ goto Et9;
+
+Et8: v = q / xp;
+
+/* t = sqrt(1 + v * v); */
+ dble = 1.0 + v * v;
+ dble = sqrt(dble);
+ t = dble;
+
+ s[i+1] = t * xp;
+ u = 1 / t;
+ v = v * u;
+
+Et9:
+ q = d[i+1] - h;
+ t = (d[i] - q) * u + 2.0 * v * b;
+ h = u * t;
+ d[i+1] = q + h;
+ q = v * t - b;
+ for (l=1;l<=n0;l++) {
+ xp = w[l][i+1];
+ w[l][i+1] = u * w[l][i] + v * xp;
+ w[l][i] = v * w[l][i] - u * xp;
+ }
+ }
+ d[k] = d[k] - h;
+ s[k] = q;
+ s[j] = 0.0;
+ goto Et6;
+
+Eta:;
+ } /* for (k=1;k<=n0;k++) */
+
+ for (ij=2;ij<=n0;ij++) {
+ dot(1,300);
+ i = ij - 1;
+ l = i;
+ h = d[i];
+ for (m=ij;m<=n0;m++) {
+ if (d[m] >= h) {
+ l = m;
+ h = d[m];
+ }
+ }
+ if (l == i) {
+ goto Etb;
+ } else {
+ d[l] = d[i];
+ d[i] = h;
+ }
+ for (m=1;m<=n0;m++) {
+ h = w[m][i];
+ w[m][i] = w[m][l];
+ w[m][l] = h;
+ }
+
+Etb:;
+ } /* for (ij=2;ij<=n0;ij++) */
+
+ *rang = 0;
+ for (i=1;i<=n0;i++) {
+ if (d[i] / d[1] < 0.00001) d[i] = 0.0;
+ if (d[i] != 0.0) *rang = *rang + 1;
+ }
+ freevec(s);
+} /* DiagoComp */
+
+/************** inertialig ***************************************/
+/* Called when advanced correspondence analysis option has been selected */
+/* This analyses and reports the absolute and relative contributions of */
+/* each gene to the inertia of the principal factors (by default the */
+/* first 4 axis) */
+/*************************************************************************/
+void inertialig( char *inertia_out, char *ncout, FILE *summary)
+{
+ int i, j, k, f1, l1,c1,lcmin;
+ double **cooli, **w;
+ double *vtab, *conli, *poili, *poico;
+ double l0, inertotal, a1, a2, m2, m3, s1;
+ double temp1=0,temp2=0;
+ FILE *inert_out=NULL,*fnam=NULL;
+
+ l1 =pcoa->rows;
+ c1 =pcoa->colm;
+ f1 =pcoa->axis;
+ inertotal =pcoa->inertia;
+
+ if( (inert_out=open_file( "",inertia_out,"w",FALSE))==NULL)
+ my_exit(1,"inertia out");
+
+ lcmin = c1; if (l1<lcmin) lcmin=l1;
+ taballoc (&w, l1,c1);
+ vecalloc(&poili, l1);
+ vecalloc(&poico, c1);
+ taballoc(&cooli, l1, f1);
+ vecalloc(&conli, l1);
+ vecalloc(&vtab, lcmin);
+
+ lecvec(poili, "cbfcpl");
+ sqrvec(poili);
+ lecvec(poico, "cbfcpc");
+ sqrvec(poico);
+ lecmat(cooli, "cbfcli");
+ selectcol("cbfcvp", vtab, 2);
+ lecmat(w, "cbfcta");
+
+ fprintf(summary, "\n\nNumber of rows: %d, columns: %d\n", l1, c1);
+ fprintf(summary, "Total inertia: %8.6G - Number of axes: %d\n\n",
+ inertotal, f1);
+ fprintf(summary, "Contributions of each gene to the recorded factors "
+ "A.K.A axes\n");
+
+/* calculate the contribution */
+
+ for (i=1;i<=l1;i++) {
+ a1 = poili[i];
+ for (j=1;j<=c1;j++) {
+ s1 = w[i][j] * a1 * poico[j];
+ s1 = s1 * s1;
+ conli[i] = conli[i] + s1;
+ }
+ }
+
+/* scale the vectors by 1/inertia total */
+
+ scalvec(conli, 1.0/inertotal);
+
+
+ if( (fnam=open_file("",ncout, "r",FALSE))==NULL) my_exit(6,"inertialgn");
+
+ fprintf(summary, "Row inertia\n");
+ fprintf(summary, "All contributions are in 1/10000\n\n");
+ fprintf(summary, "----------Absolute contributions----------\n");
+ fprintf(summary, "Short_Gene_Name|Num |");
+ for (k=1;k<=f1;k++) {
+ fprintf(summary, "Fac%2d|", k);
+
+ }
+ fprintf(summary , "\n");
+ fprintf(inert_out, "\n");
+ for (i=1;i<=l1;i++) {
+
+ fgets(pm->junk,BUFSIZ,fnam);
+ pm->junk[35]='\0';
+ for ( j=35 ; j>=0; j--) if ( isspace((int)pm->junk[j]) )
+ pm->junk[j]='\0';
+
+ fprintf(inert_out ,"%-.15s%c",pm->junk,pm->seperator);
+ fprintf(summary, "%-15.15s",pm->junk);
+
+ fprintf(summary ,"|%5d|", i);
+ fprintf(inert_out,"%d%c", i,pm->seperator);
+
+ l0 = poili[i]*poili[i]/inertotal;
+
+ for (j=1;j<=f1;j++) {
+ temp1=(cooli[i][j] * cooli[i][j]); /* bug fix for Think C */
+ temp2=(l0 / vtab[j]); /* need to split calculation*/
+ a1 = temp1 * temp2;
+ fprintf(summary, "%5d|", (int) (a1 * 10000));
+ fprintf(inert_out,"%d%c",(int) (a1 * 10000),pm->seperator);
+ }
+ fprintf(summary, "\n");
+ fprintf(inert_out,"\n");
+ }
+ fprintf(summary, "\n\nRelative contributions\nThis is the variation \n"
+ "in the %s usage of each gene that is \n"
+ "explained by each axis/factor\n"
+ "see also %s \n",
+ (pm->coa=='a')?"amino acid":"codon",inertia_out);
+
+ fclose(fnam);
+ if( (fnam=open_file("",ncout, "r",FALSE))==NULL)
+ my_exit(6,"inertialgn");
+
+ fprintf(summary, "----------Relative contributions----------\n");
+ fprintf(summary, "Short_gene_name|Num |");
+ for (k=1;k<=f1;k++) {
+ fprintf(summary, "Fac%2d|", k);
+ }
+ fprintf(summary, "|Remains| Weight | Cont.|");
+ fprintf(summary, "\n");
+ fprintf(inert_out,"\n");
+
+ for (i=1;i<=l1;i++) {
+
+ fgets(pm->junk,BUFSIZ,fnam);
+ pm->junk[35]='\0';
+ for ( j=35 ; j>=0; j--) if ( isspace( (int) pm->junk[j]) )
+ pm->junk[j]='\0';
+
+ fprintf(inert_out , "%-.15s%c",pm->junk,pm->seperator);
+ fprintf(summary, "%-15.15s",pm->junk);
+
+
+ fprintf(summary, "|%5d|", i);
+ fprintf(inert_out,"%d%c", i,pm->seperator);
+ a2 = 0.;
+ m3 = poili[i]*poili[i]/inertotal;
+ m2 = conli[i];
+ if (m2 == 0.) m2 = 1.;
+ for (j=1;j<=f1;j++) {
+ a1 = cooli[i][j] * cooli[i][j] * m3 / m2;
+ a2 = a2 + a1;
+ fprintf(summary, "%5d|", (int) (a1 * 10000));
+ fprintf(inert_out,"%d%c",(int) (a1 * 10000),pm->seperator);
+ }
+ fprintf(summary, "|%5d ", (int) ((1-a2) * 10000));
+ fprintf(summary, "|%5d |%5d |\n", (int) (inertotal * m3 * 10000),
+ (int) (m2 * 10000));
+ fprintf(inert_out, "\n");
+ }
+ fprintf(summary , "\n");
+ fprintf(inert_out, "\n");
+
+ /* free memory */
+ freetab(w);
+ freevec(poili);
+ freevec(poico);
+ freetab(cooli);
+ freevec(conli);
+ freevec(vtab);
+ fileclose(&inert_out);
+ fileclose(&fnam);
+
+} /* End of Inertia */
+
+/************** inertiacol ****************************************/
+/* Called when advanced correspondence analysis option has been selected */
+/* This analyses and reports the absolute and relative contributions of */
+/* each codon or amino acid to the inertia of the principal factors (by */
+/* default the first 4 axis) */
+/**************************************************************************/
+void inertiacol(char *inertia_out, FILE *summary )
+{
+ int x,i, j, k, f1, l1,c1, lcmin;
+ double **cooco, **w;
+ double *vtab, *conco, *poili, *poico;
+ double l0, inertotal, a1, a2, m2, m3, s1;
+ FILE *inert_out=NULL;
+
+ if( (inert_out=open_file( "",inertia_out,"a",FALSE))==NULL)
+ my_exit(1,"inertia out2");
+
+ l1 =pcoa->rows;
+ c1 =pcoa->colm;
+ f1 =pcoa->axis;
+ inertotal =pcoa->inertia;
+
+ lcmin = c1; if (l1<lcmin) lcmin=l1;
+
+ taballoc (&w, l1,c1);
+ vecalloc(&poili, l1);
+ vecalloc(&poico, c1);
+ taballoc(&cooco, c1, f1);
+ vecalloc(&conco, c1);
+ vecalloc(&vtab, lcmin);
+
+ lecvec(poili, "cbfcpl");
+ sqrvec(poili);
+ lecvec(poico, "cbfcpc");
+ sqrvec(poico);
+ lecmat(cooco, "cbfcco");
+ selectcol("cbfcvp", vtab, 2);
+ lecmat(w, "cbfcta");
+
+ fprintf(summary, "\n\nColumn inertia\nNumber of genes: %d, columns: "
+ "%d\n\n", l1, c1);
+ fprintf(summary, "This is the fraction of the total inertia that is\n"
+ "explained for each %s by each of the recorded\n"
+ "factors or axes\n\n\n",(pm->coa=='a')? "amino acids":"codons");
+
+
+ for (i=1;i<=l1;i++) {
+ a1 = poili[i];
+ for (j=1;j<=c1;j++) {
+ s1 = w[i][j] * a1 * poico[j];
+ s1 = s1 * s1;
+ conco[j] = conco[j] + s1;
+ }
+ }
+
+ /* scale the vectors by 1/inertia total */
+ scalvec(conco, 1.0/inertotal);
+
+ fprintf(summary, "\n\nColumn inertia\n");
+ fprintf(summary, "All contributions are in 1/10000\n\n");
+ fprintf(summary, "----------Absolute contributions----------\n");
+ fprintf(summary, "Key|Num |");
+ for (k=1;k<=f1;k++) {
+ fprintf(summary, "Fac%2d|", k);
+ }
+ fprintf(summary, "\n");
+ for (x=0,i=1;i<=c1;i++) {
+
+ if (pm->coa == 'a' ){
+
+ while(pcoa->amino[++x] == FALSE);
+
+ fprintf(summary, "%s", paa->aa3[x]);
+ fprintf(inert_out,"%s%c",paa->aa3[x],pm->seperator);
+ }else{
+
+ while(pcoa->codons[++x] == FALSE);
+
+ fprintf(summary, "%s", paa->cod[x]);
+ fprintf(inert_out,"%s%c",paa->cod[x],pm->seperator);
+ }
+
+ fprintf(summary, "|%5d|", i);
+ fprintf(inert_out,"%d%c",i,pm->seperator);
+
+ l0 = poico[i]*poico[i]/inertotal;
+ for (j=1;j<=f1;j++) {
+ a1 = cooco[i][j] * cooco[i][j] * l0 / vtab[j];
+ fprintf(summary, "%5d|", (int) (a1 * 10000));
+ fprintf(inert_out,"%i%c", (int) (a1 * 10000),pm->seperator );
+ }
+ fprintf(summary, "\n");
+ fprintf(inert_out,"\n");
+ }
+ fprintf(summary, "\n");
+ fprintf(inert_out,"\n");
+ fprintf(summary, "----------Relative contributions----------\n");
+ fprintf(summary, "Key|Num |");
+ for (k=1;k<=f1;k++) {
+ fprintf(summary, "Fac%2d|", k);
+ }
+ fprintf(summary, "|Remains| Weight | Cont.|");
+ fprintf(summary, "\n");
+ for (x=0,i=1;i<=c1;i++) {
+
+
+ if (pm->coa == 'a' ){
+
+ while(pcoa->amino[++x] == FALSE);
+
+ fprintf(summary, "%s", paa->aa3[x]);
+ fprintf(inert_out,"%s%c",paa->aa3[x],pm->seperator);
+ }else{
+
+ while(pcoa->codons[++x] == FALSE);
+
+ fprintf(summary, "%s", paa->cod[x]);
+ fprintf(inert_out,"%s%c",paa->cod[x],pm->seperator);
+ }
+
+ fprintf(summary, "|%5d|", i);
+ fprintf(inert_out,"%d%c",i,pm->seperator);
+ a2 = 0.;
+ m3 = poico[i]*poico[i]/inertotal;
+ m2 = conco[i];
+ if (m2 == 0.) m2 = 1.;
+ for (j=1;j<=f1;j++) {
+ a1 = cooco[i][j] * cooco[i][j] * m3 / m2;
+ a2 = a2 + a1;
+ fprintf(summary, "%5d|", (int) (a1 * 10000));
+ fprintf(inert_out,"%d%c",(int) (a1 * 10000),pm->seperator);
+ }
+ fprintf(summary, "|%5d ", (int) ((1-a2) * 10000));
+ fprintf(summary, "|%5d |%5d |\n",
+ (int) (inertotal * m3 * 10000), (int) (m2 * 10000));
+ fprintf(inert_out,"\n");
+ }
+ fprintf(summary, "\n");
+
+ freetab(w);
+ freetab(cooco);
+ freevec(poili);
+ freevec(poico);
+ freevec(conco);
+ freevec(vtab);
+} /* End of Inertia */
+
+/************** selectcol ***************************************/
+/* extract a column from the file *nfic, column has the dimension of the */
+/* number of genes. If these disagree it will about. Col is the number of*/
+/* the column to extract. */
+/*************************************************************************/
+void selectcol (char *nfic , double *col, int numcol)
+{
+ FILE *fic=NULL;
+ int i, c1,l1;
+ double *vlec;
+
+ c1=2;
+ l1=( pcoa->rows < pcoa->colm)? pcoa->rows:pcoa->colm;
+
+ vecalloc(&vlec, c1);
+
+
+ if (numcol>c1) {
+ fprintf (pm->my_err,"fatal input-output error numcol>c1 (selectcol");
+ my_exit(99,"corresp");
+ }
+
+if( (fic=open_file( "",nfic,"rb",FALSE))==NULL) my_exit(6,"nfic4");
+ for (i=1;i<=l1;i++) {
+ readvec(vlec, fic);
+ col[i] = vlec[numcol];
+ }
+
+ fileclose(&fic);
+ freevec(vlec);
+}
+
+/************** suprow ***************************************/
+/* This sub adds supplementary genes after the correspondence analysis */
+/* has completed for an initial set of genes. The supplementary genes are*/
+/* read in and processed up to the point of the generation of factors */
+/* at which point the factors for the initial analysis are used to calc */
+/* the position of the supplementary genes on the originally identified */
+/* axis */
+/*************************************************************************/
+void suprow (int num_seq, char *nficvp, char *nfictasup, char *nficlisup,
+char*option , FILE *summary)
+{
+ int l1,c1,l2,c2,i,j,k;
+ double **compos, **tabsup;
+ double *vp, *poico;
+ double *moy, *var;
+ double a1, a2;
+ FILE *ficlisup=NULL;
+ FILE *fnam=NULL;
+
+ l2=num_seq;
+ c2=pcoa->colm;
+ l1=pcoa->rows;
+ c1=pcoa->colm;
+
+ if( (fnam=open_file("",option, "r",FALSE))==NULL)
+ my_exit(6,"sup row corresp");
+
+
+ taballoc(&tabsup, l2, c2);
+ lecmat(tabsup, nfictasup);
+
+ taballoc(&compos, c1, pcoa->axis);
+ lecmat(compos, "cbfcco");
+ vecalloc(&moy, c1);
+ vecalloc(&var, c1);
+
+ vecalloc(&vp, pcoa->axis);
+ lecvalpro(vp, nficvp);
+
+ vecalloc(&poico, c1);
+ lecvec(poico, "cbfcpc");
+
+ for (j=1;j<=pcoa->axis;j++) {
+ vp[j] = sqrt((double)vp[j]);
+ a1 = vp[j];
+ for (i=1;i<=c1;i++) {
+ compos[i][j] = compos[i][j] / a1;
+ }
+ }
+ for (i=1;i<=c1;i++) {
+ a1 = poico[i];
+ for (j=1;j<=pcoa->axis;j++) {
+ compos[i][j] = compos[i][j] * a1;
+ }
+ }
+
+ /* Transform genes with the initial factor */
+
+ for (i=1;i<=l2;i++) {
+ a1 = 0.0;
+ for (j=1;j<=c1;j++) {
+ a1 = a1 + tabsup[i][j];
+ }
+ if (a1 != 0.) {
+ for (j=1;j<=c1;j++) {
+ a2 = tabsup[i][j] / a1;
+ if (poico[j]!=0) {tabsup[i][j] = a2 / poico[j];}
+ }
+ }
+ }
+
+ /* Position the suppli. genes on the original factors */
+
+ if( (ficlisup = open_file("",nficlisup,"a",FALSE))==NULL )
+ my_exit(1,"nficlisup");
+
+ fprintf(summary,"\n\nThe position of each additional gene by axis "
+ "(see also %s )\n",option);
+
+ fprintf(summary, "Additional genes added after COA: \n");
+ fprintf(summary, "Number of genes: %d, columns: %d\n\n", l1, c1);
+
+
+ for (i=1;i<=l2;i++) {
+ fgets(pm->junk,BUFSIZ,fnam);
+ pm->junk[35]='\0';
+ for ( j=35 ; j>=0; j--)
+ if ( isspace((int)pm->junk[j]) ) pm->junk[j]='\0';
+ fprintf(ficlisup, "%s%c",pm->junk,pm->seperator);
+ fprintf(summary , "%s%c",pm->junk,pm->seperator);
+
+ for (k=1;k<=pcoa->axis;k++) {
+ a1 = 0.;
+ for (j=1;j<=c1;j++) {
+ a1 = a1 + tabsup[i][j] * compos[j][k];
+ }
+ fprintf(ficlisup,"%f%c",(float)a1,pm->seperator);
+ fprintf(summary ,"%10.5f%c",(float)a1,pm->seperator);
+ }
+ fprintf(ficlisup,"\n");
+ fprintf(summary ,"\n");
+ }
+ fclose(ficlisup);
+
+ freetab (tabsup);
+ freetab (compos);
+ freevec (vp);
+ freevec(poico);
+ freevec(moy);
+ freevec(var);
+ fileclose(&fnam);
+}
+
+/************** lecvalpro ***************************************/
+/* Read a vector from a binary formatted file */
+/*************************************************************************/
+void lecvalpro (double *v1, char *nfic)
+{
+ float v2;
+ int i, c1;
+ FILE *fic=NULL;
+
+ if ( (fic=open_file("",nfic,"rb",FALSE))==NULL) my_exit(6,"lecvalpro");
+
+ c1 = (int) v1[0];
+ for (i=1;i<=c1;i++) {
+ if ( fread((char *)&v2, 4, 1, fic) != 1) {
+ fprintf(pm->my_err,"(lecvalpro)");
+ my_exit(5,"lecvalpro");
+ }
+ v1[i] = v2;
+ if ( fread((char *)&v2, 4, 1, fic) != 1) {
+ fprintf(pm->my_err,"(lecvalpro)");
+ my_exit(5,"lecvalpro2");
+ }
+ }
+ fileclose(&fic);
+}
+
diff --git a/indices.txt b/indices.txt
new file mode 100755
index 0000000..d15ef25
--- /dev/null
+++ b/indices.txt
@@ -0,0 +1,139 @@
+Codon usage indices
+
+This document describes the indices calculated by CodonW, by default only
+the G+C content of the sequence is reported. The others being dependent on
+the genetic code selected. More than one index may be calculated at the same
+time.
+
+Codon Adaptation Index (CAI) (Sharp and Li 1987).
+CAI is a measurement of the relative adaptiveness of the codon usage of a
+gene towards the codon usage of highly expressed genes. The relative
+adaptiveness (w) of each codon is the ratio of the usage of each codon, to
+that of the most abundant codon for the same amino acid. The relative
+adaptiveness of codons for albeit a limited choice of species, can be
+selected from Menu 3. The user can also input a personal choice of values.
+The CAI index is defined as the geometric mean of these relative
+adaptiveness values. Non-synonymous codons and termination codons (dependent
+on genetic code) are excluded.
+
+To prevent a codon absent from the reference set but present in other genes
+from having a relative adaptiveness value of zero, which would cause CAI to
+evaluate to zero for any genes which used that codon; it was suggested that
+absent codons should be assigned a frequency of 0.5 when estimating ? (Sharp
+and Li 1987). An alternative suggestion was that ? should be adjusted to
+0.01 where otherwise it would be less than this value (Bulmer 1988). CodonW
+does not adjust the ? value if a non-zero-input value is found; zero values
+are assigned a value of 0.01.
+
+Frequency of Optimal codons (Fop) (Ikemura 1981).
+This index, is the ratio of optimal codons to synonymous codons (genetic
+code dependent). Optimal codons for several species are in-built and can be
+selected using Menu 3. By default, the optimal codons of E. coli are
+assumed. The user may also enter a personal choice of optimal codons. If
+rare synonymous codons have been identified, there is a choice of
+calculating the original Fop index or a modified Fop index. Fop values for
+the original index are always between 0 (where no optimal codons are used)
+and 1 (where only optimal codons are used). When calculating the modified
+Fop index, negative values are adjusted to zero.
+
+Codon Bias Index (CBI) (Bennetzen and Hall 1982).
+Codon bias index is another measure of directional codon bias, it measures
+the extent to which a gene uses a subset of optimal codons. CBI is similar
+to Fop as used by Ikemura, with expected usage used as a scaling factor. In a
+gene with extreme codon bias, CBI will equal 1.0, in a gene with random
+codon usage CBI will equal 0.0. Note that it is possible for the number of
+optimal codons to be less than expected by random change. This results in a
+negative value for CBI.
+
+The effective number of codons (NC) (Wright 1990).
+This index is a simple measure of overall codon bias and is analogous to the
+effective number of alleles measure used in population genetics. Knowledge
+of the optimal codons or a reference set of highly expressed genes is
+unnecessary. Initially the homozygosity for each amino acid is estimated
+from the squared codon frequencies (see Equation 5).
+
+
+If amino acids are rare or missing, adjustments must be made. When
+there are no amino acids in a synonymous family, Nc is not calculated
+as the gene is either too short or has extremely skewed amino acid
+usage (Wright 1990). An exception to this is made for genetic codes
+where isoleucine is the only 3-fold synonymous amino acid, and is not
+used in the protein gene. The reported value of Nc is always between 20
+(when only one codon is effectively used for each amino acid) and 61
+(when codons are used randomly). If the calculated Nc is greater than
+61 (because codon usage is more evenly distributed than expected), it
+is adjusted to 61.
+
+G+C content of the gene.
+The frequency of nucleotides that are guanine or cytosine.
+
+G+C content 3rd position of synonymous codons (GC3s).
+This the fraction of codons, that are synonymous at the third codon
+position, which have either a guanine of cytosine at that third codon
+position.
+
+Silent base compositions.
+Selection of this option calculates four separate indices, i.e. G3s, C3s,
+A3s & T3s. Although correlated with GC3s, this index is not directly
+comparable. It quantifies the usage of each base at synonymous third codon
+positions. When calculating GC3s each synonymous amino acid has at least one
+synonym with G or C in the third position. Two or three fold synonymous
+amino acids do not have an equal choice between bases in the synonymous
+third position. The index A3s is the frequency that codons have an A at their
+synonymous third position, relative to the amino acids that could have a
+synonym with A in the synonymous third codon position. The codon usage
+analysis of Caenorhabditis elegans identified a trend correlated with the
+frequency of G3s. Though it was not clear whether it reflected variation in
+base composition (or mutational biases) among regions of the C. elegans
+genome, or another factor (Stenico et al. 1994).
+
+Length silent sites (Lsil).
+Frequency of synonymous codons.
+
+Length amino acids (Laa).
+Equivalent to the number of translatable codons.
+
+Hydropathicity of protein.
+The general average hydropathicity or (GRAVY) score, for the hypothetical
+translated gene product. It is calculated as the arithmetic mean of the sum
+of the hydropathic indices of each amino acid (Kyte and Doolittle 1982).
+This index has been used to quantify the major COA trends in the amino acid
+usage of E. coli genes (Lobry and Gautier 1994).
+
+Aromaticity score
+The frequency of aromatic amino acids (Phe, Tyr, Trp) in the hypothetical
+translated gene product. The hydropathicity and aromaticity protein scores
+are indices of amino acid usage. The strongest trend in the variation in the
+amino acid composition of E. coli genes is correlated with protein
+hydropathicity, the second trend is correlated with gene expression, while
+the third is correlated with aromaticity (Lobry and Gautier 1994). The
+variation in amino acid composition can have applications for the analysis
+of codon usage. If total codon usage is analysed, a component of the
+variation will be due to differences in the amino acid composition of genes.
+
+Bennetzen, J. L., and B. D. Hall, (1982). Codon selection in yeast. Journal
+of Biological Chemistry 257: 3026-3031.
+Bulmer, M., (1988). Are codon usage patterns in unicellular organisms
+determined by selection-mutation balance. Journal of Evolutionary
+Biology 1: 15-26.
+Ikemura, T., (1981). Correlation between the abundance of Escherichia coli
+transfer RNAs and the occurrence of the respective codons in its
+protein genes: a proposal for a synonymous codon choice that is
+optimal for the E. coli system. Journal of Molecular Biology 151: 389-
+409.
+Kyte, J., and R. Doolittle, (1982). A simple method for displaying the
+hydropathic character of a protein. Journal of Molecular Biology 157:
+105-132.
+Lobry, J. R., and C. Gautier, (1994). Hydrophobicity, expressivity and
+aromaticity are the major trends of amino acid usage in 999
+Escherichia coli chromosome encoded genes. Nucleic Acids Research 22:
+3174-3180.
+Sharp, P. M., and W. H. Li, (1987). The codon adaptation index a measure of
+directional synonymous codon usage bias, and its potential
+applications. Nucleic Acids Research 15: 1281-1295.
+Stenico, M., A. T. Lloyd and P. M. Sharp, (1994). Codon usage in
+Caenorhabditis elegans delineation of translational selection and
+mutational biases. Nucleic Acids Research 22: 2437-2446.
+Wright, F., (1990). The effective number of codons used in a gene. Gene 87
+: 23-29.
+
diff --git a/input.dat b/input.dat
new file mode 100755
index 0000000..740568c
--- /dev/null
+++ b/input.dat
@@ -0,0 +1,2835 @@
+>YCG9 Probable 1377 residues Pha 0 Code 0
+ATGAATATGCTCATTGTCGGTAGAGTTGTTGCTAGTGTTGGGGGAAGCGGACTTCAAACG
+CTTTGCTTTGTTATTGGTTGTACGATGGTTGGTGAAAGGTCACGTCCATTGGTGATTTCC
+ATCCTAAGTTGTGCATTTGCTGTAGCTGCTATCGTTGGTCCTATAATCGGAGGTGCCTTT
+ACAACCCATGTTACCTGGAGGTGGTGCTTCTATATCAATCTTCCTATCGGTGGTCTTGCC
+ATTATTATGTTTTTACTCACATATAAGGCCGAGAATAAGGGTATACTTCAACAAATTAAA
+GATGCTATAGGAACAATCTCGAGCTTTACTTTTAGTAAGTTCAGACACCAAGTTAATTTT
+AAAAGACTTATGAATGGCATAATCTTCAAGTTTGACTTCTTTGGTTTTGCCCTCTGCTCT
+GCAGGGCTGGTCCTTTTCCTACTGGGGCTAACCTTTGGTGGTAATAAATATAGTTGGAAC
+TCTGGCCAAGTCATCGCATATTTGGTTTTGGGTGTCTTACTTTTTATTTTTTCATTGGTG
+TACGATTTCTTCTTATTCGATAAATTCAACCCGGAACCTGATAATATATCCTACAGGCCT
+CTCCTTCTAAGAAGATTGGTAGCAAAACCAGCCATAATAATAATAAACATGGTAACATTT
+CTATTATGTACCGGTTACAATGGGCAAATGATATACTCTGTCCAGTTTTTCCAACTTATA
+TTTGCGTCGAGTGCATGGAAAGCCGGTCTTCACTTGATACCAATCGTTATTACCAACGTT
+ATTGCGGCCATTGCAAGTGGTGTGATTACCAAAAAGCTCGGTTTAGTTAAACCACTCTTA
+ATATTTGGAGGCGTTCTTGGGGTAATTGGAGCAGGGCTTATGACACTTATGACAAATACG
+TCCACGAAGTCAACTCAAATTGGTGTTTTGCTATTACCGGGGTTTTCCCTTGGATTTGCT
+CTACAAGCATCGCTCATGAGTGCACAGCTTCAAATTACCAAAGATCGTCCAGAAGCTGCT
+ATGGACTTTATTGAAGTAACAGCTTTCAATACATTCATGAAGTCATTAGGTACAACTCTT
+GGTGGTGTGCTTTCAACCACTGTTTTTTCCGCCTCCTTTCACAACAAAGTATCACGAGCT
+CATCTAGAGCCTTACGAAGGAAAAACGGTTGATGACATGATTTTGTATCGTCTTCAAAAC
+TACGACGGTTCTCATTCGACTATTGGAAACATTTTAAGCGACTCCATTAAGAACGTATTT
+TGGATGGATCTAGGGTTTTATGCCTTAGGATTTTTGTTTTGTAGTTTTTCATCCAATAAG
+AAATTAATCATACCAAAAAAGGACGAGACACCAGAAGATAATTTAGAAGACAAGTAG
+>YCG8 573 residues Pha 0 Code 0
+ATGAGAACGGCCGTACCGCAGTTGCTGGAAGCAACTGCCTGTGTCTCTAGAGAATGCCCC
+CTCGTCAAAAGAAGTCAGGACATAAAAAGAGCAAGAAAACGTCTACTCAGTGACTGGTAT
+AGGCTCGGCGCTGATGCAAACATGGATGCCGTATTACTAGTTGTTAACTCCGCCTGGAGG
+TTTCTGGCCGTCTGGCGACCCTTCGTAAACTCAATCCAACATGCAACTCAGGAATTGTAT
+CAAAATATCGCCCATTACCTTCTTCATGGCAACGTAAATATACAGAGGGTCACAGCACTA
+CTACAGCTCGTAATGGGACAGGACGATTTACTTTTTAGTATGGATGATGTTCTACAAGAG
+GTCTTCAGAATACAGCTCTATTTGAATAAGATGCTGCCGCACAACTCTCACAAATGGCAA
+AAGCCATCCCCCTTTGACTCCGCAAACTTACTACTTAACTTCAGAGACTGGACAACTGAC
+AATGCTCTCCTCCAAGAGTTGCTACTATCCTATCCCACAATTAATAAAAACAAACACAAA
+AATCACTCCGTCCCTCGTCTAATACAAGTTTGA
+>ALPHA2 633 residues Pha 0 Code 0
+ATGAATAAAATACCCATTAAAGACCTTTTAAATCCACAAATCACAGATGAGTTTAAATCC
+AGCATACTAGACATAAATAAAAAGCTCTTTTCTATTTGCTGTAATTTACCTAAGTTACCA
+GAGAGTGTAACAACAGAAGAAGAAGTTGAATTAAGGGATATATTAGGATTCTTATCTAGG
+GCCAACAAAAACCGTAAGATTAGTGATGAGGAGAAGAAGTTGTTGCAAACAACATCTCAA
+CTCACTACTACCATTACTGTATTACTCAAAGAAATGCGCAGCATAGAAAACGATAGAAGT
+AATTATCAACTTACACAGAAAAATAAATCGGCGGATGGGTTGGTATTTAATGTGGTAACT
+CAAGATATGATAAACAAAAGTACTAAACCTTACAGAGGACACCGGTTTACAAAAGAAAAT
+GTCCGAATACTAGAAAGTTGGTTTGCAAAGAACATCGAGAACCCATATCTAGATACCAAG
+GGCCTAGAGAATCTAATGAAGAATACCAGTTTATCTCGCATTCAAATCAAAAACTGGGTT
+TCGAATAGAAGAAGAAAAGAAAAAACAATAACAATCGCTCCAGAATTAGCGGACCTCTTG
+AGCGGTGAGCCTCTGGCAAAGAAGAAAGAATGA
+>ALPHA1 528 residues Pha 0 Code 0
+ATGTTTACTTCGAAGCCTGCTTTCAAAATTAAGAACAAAGCATCCAAATCATACAGAAAC
+ACAGCGGTTTCAAAAAAGCTGAAAGAAAAACGTCTAGCTGAGCATGTGAGGCCAAGCTGC
+TTCAATATTATTCGACCACTCAAGAAAGATATCCAGATTCCTGTTCCTTCCTCTCGATTT
+TTAAATAAAATCCAAATTCACAGGATAGCGTCTGGAAGTCAAAATACTCAGTTTCGACAG
+TTCAATAAGACATCTATAAAATCTTCAAAGAAATATTTAAACTCATTTATGGCTTTTAGA
+GCATATTACTCACAGTTTGGCTCCGGTGTAAAACAAAATGTCTTGTCTTCTCTGCTCGCT
+GAAGAATGGCACGCGGACAAAATGCAGCACGGAATATGGGACTACTTCGCGCAACAGTAT
+AATTTTATAAACCCTGGTTTTGGTTTTGTAGAGTGGTTGACGAATAATTATGCTGAAGTA
+CGTGGTGACGGATATTGGGAAGATGTGTTTGTACATTTGGCCTTATAG
+>CHA1 1083 residues Pha 0 Code 0
+ATGTCGATAGTCTACAATAAAACACCATTATTACGTCAATTCTTCCCCGGAAAGGCTTCT
+GCACAATTTTTCTTGAAATATGAATGCCTTCAACCAAGTGGCTCCTTCAAAAGTAGAGGA
+ATCGGTAATCTCATCATGAAAAGTGCCATTCGAATTCAAAAGGACGGTAAAAGATCTCCT
+CAGGTTTTCGCTAGTTCTGGCGGTAATGCCGGTTTTGCTGCTGCAACAGCATGTCAAAGA
+CTGTCTCTACCATGTACAGTCGTGGTTCCTACAGCGACAAAGAAGAGAATGGTAGATAAA
+ATCAGGAACACCGGTGCCCAGGTTATCGTGAGTGGTGCCTACTGGAAAGAAGCAGATACT
+TTTTTAAAAACAAATGTCATGAATAAAATAGACTCTCAGGTCATTGAGCCCATTTATGTT
+CATCCCTTCGATAATCCGGATATTTGGGAAGGACATTCATCTATGATAGATGAAATAGTA
+CAAGATTTGAAATCGCAACATATTTCCGTGAATAAGGTTAAAGGCATAGTATGCAGCGTT
+GGTGGAGGTGGTTTATACAATGGTATTATTCAAGGTTTGGAAAGGTATGGTTTAGCTGAT
+AGGATCCCTATTGTGGGGGTGGAAACGAATGGATGTCATGTTTTCAATACTTCTTTGAAA
+ATAGGCCAACCAGTTCAATTCAAGAAGATAACAAGTATTGCTACTTCTCTAGGAACGGCC
+GTGATCTCTAATCAAACTTTCGAATACGCTCGCAAATACAACACCAGATCCGTTGTAATA
+GAGGACAAAGATGTTATTGAACCCTGTCTTAAATATACACATCAATTCAATATGGTGATT
+GAACCGGCATGTGGCGCCGCATTGCATTTGGGTTACAACACTAAGATCCTAGAAAATGCA
+CTGGGCTCAAAATTAGCTGCGGATGACATTGTGATAATTATTGCTTGTGCGAGCTCCTCT
+AATACTATAAAGGACTTGGAAGAAGCGTTGGATAGCATGAGAAAAAAAGACACTCCTGTA
+ATAGAAGTCGCTGACAATTTCATATTTCCAGAAAAAAATATTGTGAATTTAAAAAGTGCT
+TGA
+>KRR1 951 residues Pha 0 Code 0
+ATGGTGTCTACACATAACAGAGATAAACCTTGGGATACGGATGATATTGATAAATGGAAG
+ATAGAGGAGTTTAAGGAAGAGGATAACGCATCCGGTCAACCTTTTGCTGAAGAGTCCAGT
+TTTATGACTTTGTTTCCTAAATACAGAGAAAGTTACTTGAAGACGATTTGGAATGATGTA
+ACAAGGGCTCTAGACAAACACAACATAGCGTGTGTTCTAGATTTAGTCGAAGGTTCTATG
+ACAGTAAAAACAACTAGAAAAACATACGATCCCGCTATCATTTTGAAAGCCAGAGATTTG
+ATCAAATTATTGGCGAGATCCGTTCCTTTCCCGCAAGCCGTTAAGATCCTACAAGATGAC
+ATGGCATGCGACGTTATTAAAATTGGTAATTTCGTTACTAACAAAGAAAGGTTTGTCAAG
+AGAAGACAACGTCTTGTAGGCCCTAACGGTAATACTTTAAAGGCTTTGGAACTTCTAACT
+AAATGTTACATTCTAGTACAAGGTAACACAGTAAGTGCCATGGGTCCCTTCAAGGGCTTG
+AAGGAGGTCCGTCGAGTAGTAGAAGATTGTATGAAAAATATTCACCCTATCTATCATATC
+AAGGAATTAATGATAAAAAGAGAATTGGCAAAAAGGCCAGAGTTAGCCAATGAAGATTGG
+TCAAGATTCTTGCCCATGTTTAAGAAGAGGAATGTGGCCAGAAAGAAACCCAAGAAGATC
+AGAAACGTCGAAAAGAAGGTCTATACTCCATTTCCTCCTGCCCAATTGCCTAGAAAGGTT
+GATTTGGAAATTGAAAGTGGTGAGTATTTCTTAAGCAAGAGAGAAAAGCAAATGAAGAAA
+TTAAATGAGCAAAAGGAAAAGCAAATGGAAAGAGAAATCGAAAGGCAGGAAGAGAGAGCA
+AAAGATTTCATAGCTCCGGAAGAAGAAGCATACAAGCCAAACCAAAATTAG
+>PRD1 2139 residues Pha 0 Code 0
+ATGCGATTGTTGCTGTGCAAGAATTGGTTTGCGTCACCTGTAATCTCACCACTACTGTAT
+ACCCGCTCCTTATATTCAATGGCTAACACTACTAGTTTCCCTATTGCTCCCCAGGCCCCG
+CCTAATTGGTCGTTCACTCCCAGCGATATTAGTGGGAAAACCAACGAAATCATCAACAAC
+AGCAACAATTTCTATGATTCTATGAGTAAGGTAGAGAGCCCTTCCGTGAGTAATTTTGTG
+GAGCCTTTCATGAAGTTTGAAAATGAATTGGGCCCAATAATTAACCAATTAACTTTCTTA
+CAGCATGTGTCGTCTGATAAAGAAATTAGGGACGCATCTGTGAACTCCTCAATGAAACTG
+GATGAGTTGAACATCGATCTATCTCTGCGTCACGACATCTTTTTGCAATTCGCCCGCGTC
+TGGCAGGATGTTCAATCGAAGGCAGATTCTGTGGAAAGAGAAACTTTCAAATACGTTGAG
+AAGTCTTACAAGGACTACATTCATTCTGGTTTGGAACTTGACGAGGGAAACCGATTGAAA
+ATCAAAGAGATCAAAAAGAAGATCTCCGTTAACTCTATTAATTTTTCGAAGAATCTGGGA
+GAACAAAAGGAATACATCACTTTCACCAAAGAACAATTGGAAGGTGTGCCGGATTCTATT
+TTGACGCAGTTCGAGACAATAAAATCTGACAAAGATAGCAATGAAACCTTGTATAAAGTC
+ACCTTCAAATATCCGGACATTTTTCCCGTGATGAAATTGGCATCCTCAGCTCAGACTAGA
+AAGCAGGCCTTTTTGGCCGACCAAAATAAGGTCCCTGAAAATGAAGCTATACTGTTGGAT
+ACATTGAAGCTGCGTGACGAATTGGCCTCGTTATTGGGCTATGACACGTATGCGAACTAC
+AACCTGTATGATAAAATGGCTGAAGATAGCACTACGGTAATGAACTTTTTGAATGATTTG
+AAGGACAAGCTAATTCCGCTGGGCAGAAAGGAACTACAGGTCTTGCAAGATATGAAAGCC
+GAAGATGTTAAGAAACTTAACCAGGGTGCAGATCCAAACTACTACATTTGGGACCACCGT
+TACTACGATAACAAATATTTGTTAGAAAACTTCAATGTGGACCTAGAAAAGATTTCTGAA
+TATTTTCCACTAGAGGCTACGATTACTGGTATGCTGGAAATATACGAAACATTGTTTAAT
+TTGAAGTTTATCGAGACGAAAGATTCTCAAAACAAATCTGTTTGGCATGACGACGTCAAA
+CAAATCGCCGTTTGGAATATGGATGATCCAAAGTCTCCAAACTTTGTTGGTTGGATTTAT
+TTCGATTTACATCCTCGTGATGGTAAATATGGCCACGCTGCCAATTTTGGTTTATCGTCA
+TCATTCATGATTGATGACACCACAAGATCGTATCCGGTTACTGCGTTGGTTTGCAATTTC
+TCCAAATCTACGAAGGATAAACCTTCTCTACTGAAGCATAACGAAATAGTGACCTTTTTC
+CATGAATTGGGCCATGGTATCCATGACCTGGTGGGACAAAACAAGGAATCGAGGTTTAAT
+GGCCCCGGATCTGTTCCATGGGATTTTGTGGAGGCACCTTCCCAAATGTTAGAATTTTGG
+ACTTGGAATAAGAATGAATTAATCAACCTCTCATCACATTACAAAACGGGCGAAAAAATT
+CCAGAATCTTTGATCAATTCATTGATCAAAACTAAACACGTAAATGGTGCTTTATTCACT
+CTAAGACAATTACATTTTGGGTTATTTGATATGAAAGTACATACTTGTAAAGACTTGCAA
+AACCTGTCAATTTGCGATACCTGGAACCAATTGAGACAGGATATTTCTTTGATTTCTAAT
+GGTGGTACGTTATCCAAGGGTTATGATTCATTTGGCCATATAATGTCAGACTCTTACTCT
+GCCGGTTATTACGGTTATCTATGGGCGGAAGTCTTTGCAACTGATATGTATCACACCAAA
+TTCGCTAAGGATCCGTTAAATGCCAAGAATGGGATACAATACCGTGATATTGTGTTGGCT
+CGTGGTGGCCTTTATGATATTAATGATAATCTGAAAGAATTTTTGGGTAGGGAACCTTCT
+AAGGATGCTTTCTTGAAGGAGCTGGGCTTACAGAACTAA
+>KAR4 1008 residues Pha 0 Code 0
+ATGGCATTCCAAGATCCAACTTACGACCAGAATAAAAGCAGACACATCAACAACAGTCAC
+TTGCAAGGGCCAAACCAGGAAACAATAGAAATGAAATCTAAACACGTATCATTCAAACCC
+TCTAGAGACTTCCATACAAACGATTACTCGAATAACTACATTCATGGGAAGTCGCTACCG
+CAACAGCATGTTACTAATATTGAGAATAGGGTTGATGGCTATCCAAAACTTCAGAAATTA
+TTTCAGGCGAAAGCTAAACAAATAAATCAATTTGCCACTACGCCATTTGGGTGTAAAATC
+GGAATAGATTCCATTGTTCCAACGTTGAATCACTGGATACAGAACGAAAATTTGACTTTC
+GACGTGGTGATGATTGGCTGCTTAACAGAAAATCAGTTTATTTACCCAATTTTAACCCAA
+TTGCCATTGGATAGATTGATCTCCAAACCAGGTTTCCTGTTCATCTGGGCCAATTCTCAA
+AAAATCAATGAACTTACTAAACTTTTGAATAATGAAATATGGGCTAAAAAGTTTAGAAGA
+AGTGAAGAATTGGTTTTTGTTCCTATTGACAAGAAATCACCGTTTTATCCAGGTTTAGAT
+CAGGACGATGAAACGTTGATGGAAAAAATGCAATGGCACTGTTGGATGTGTATCACAGGT
+ACAGTAAGGAGGTCTACAGATGGACATCTTATTCATTGTAACGTAGACACTGACTTGAGT
+ATCGAAACGAAGGACACCACTAATGGTGCTGTACCATCCCATTTGTATCGTATTGCAGAA
+AACTTCTCTACCGCGACTAGACGATTACATATTATTCCTGCAAGGACTGGTTACGAGACA
+CCCGTCAAAGTAAGACCTGGCTGGGTTATAGTGAGCCCAGATGTTATGTTGGATAACTTC
+TCACCCAAGAGATATAAAGAAGAGATAGCTAATTTAGGTTCGAATATCCCATTAAAAAAT
+GAGATTGAGCTGTTAAGACCAAGAAGTCCAGTACAAAAAGCACAATAA
+>PBN1 1251 residues Pha 0 Code 0
+ATGGTGACAAGACATAGAGTGACTGTACTCTACAATGCCCCTGAGGATATCGGTAATCAT
+ATGCGCCAAAATGACACTCATTTGACTGTTCGTGGAGGTTCTGGTGTGGTTTTACAACAA
+AGGTGGCTATTAGAGAGGACTGGAAGCTTGGATAAATCCTTTACGAGAATCACTTGGAGG
+CCCAGAGCGGACTTGGCTAGAAGTTTAAGCGTTATAGAAAATGAACTGAGTGCTGGCTTT
+TCAGTTTACTCAAATTCTTCGGATGTGCCGGAAAGGTTTATTACTAACCCAGTCTACAAT
+TCATTTCACAGTGAGAAGTTTGACATAGAGCAGTACTTGCCTCCCGAAGTAGATTTGAAT
+CTGTCATGGAATCCAGAAGATTTTACATATGATATATCAGTGGAGCCCACACAAATCCAA
+ATTGTTGAATATCGTCTGTTGAAACAGGGTGAAGAATTTACAATTGCAAGAGTGAAAGAT
+GAGAAACTCGAAGTAGGTGTATTCTTTGTGGATGCAAGTGATGAAAGTGATGTCGATATT
+GGTGGAATACGTTGTAATTGGAGGATGGACGATGGTAAAATGGAAAGATGTCAGAAAACA
+TCCTTATTGTATAAACAGGGCCATATCGCATACAATCACTCGACGACTACGACATCACTA
+TATCTGAATGAACCTATCGGTTTGCATCCAAAAATCATGATTGATCTCACAGATTTCGAA
+GAACGCCCTAAATGCATGTATCTAATGCACCTGCAATTGCCGTTAGAATTATTTATCGAT
+AAATTCCAATCCTCTCCCTTACTACTTTTTGGAGAAGACGACTTAGAATTACCAGAATAC
+TCTCTTCGAGATAAGGCATGGGGTTCTGAAAGTATCTTTGAATTGAAAGCCGGCACAATG
+AATGAAGTGACATTGCATACTAGATATATTGAGCCTTCTAATAATAAAGGGGATAAATTA
+GAAGTTTCATTTGATCCAGAAGTTATATTAGCCTGCGACACAGGTGACAATAAAGTTTCC
+CGTAATCCATTTTATAAAAAAGGTCTAGGATATGAATCTCTCTTTACAGACGATACTACA
+TTCCGCCATTTGAACTCGACAACTCTTCTAGTACCAATTCCAAGGCCTGACACAAAGGAT
+TATTCCAAGATCAAAAATGGTACGTTACTATGCTTACTCATCTCCATCATATACATTTTC
+TCCAAGGTATTTGGTAACAACAAGAAGAAAAGATCAGTAAAACGGGAATAA
+>LRE1 1761 residues Pha 0 Code 0
+ATGCCCAATACGCATACTCAACATGTGCAAATATCAGAGCCAAATCCTGTAAATACTTTG
+TCTACACCATCCAAAAGAGGTCACCGCCATCGCAGATCGCTAGCAATATCAGGAGATTTT
+GATTTTTTGAAACAGCCTGCAGCAATTGTGAATTTACCACCTCCACAGGCGGCTGAAAAT
+TGTCCTTCAACTGCCCCAACTGCTGTATCAAGTACATTATCGCCAATACGCTACAATAGA
+TTTCCTTGCAAAACCAATGAAGACGCTGGAACGTTAGATTTGCCTGAACCAAGATTTTAT
+CCGTTATCACCAAAGAACAATCTGCAAACACCAAGTCCACGATTTTTCATTAGTGAAGAG
+CCAAGTTTTTCATCGCCAGTTAAAGGCGTCCCAGATGCCATTATTAACCTTGACGATGCG
+TTGAAGACAAGGCCTAGGTCATTTAAATCACATAGAAGATCTGAATCCGCTCCTCCTGAT
+TTGGAGGTTATGGTAGATAAGGGCAATTGTGCAGCCGGTTCTAACTCTATGATTAAAGAA
+GAAGAGGACTCCTTAATTGAACCAGAATCGAAAAATGAATATTATGAGCAAAAGCTTCCA
+ACAGCACTATTATCCCCACTGCGGCCTTCCCTTTGTGTATCTGAACAGGCCATTGATGTA
+GATGATTCAGCTCTCAATGGGTCACCGACCCATCACAACCATGGGATGCAAAACGCCAAT
+GCACGGAATTCCAACACATTCAATTCGTTGAAGATCAAAGGCCAAAAGCAAAGATATTAT
+CATTATACGAAGCAGCTACCTTTGACCGTAGGCTGTGACTCGCAATCTCCAAAAGAACAA
+AGGTCGGCTGCTTCAATGACAATCAATCAGGCAATGACACCTTCTTCCCTGGCCTATACC
+CCTTCTAAACTAGCATCTACTCCCGCAACACCAGTATCCTTTTATGACAGCAATGCGGAC
+ATTAACTTAGAAAGTGATAATTTTCCACTAAAAGATAACCCTAGATATGCCAAGGATGGT
+TATCCTAAAAAGTGCGGCAATTCACAGCTTAATCGTGTGCTGGATAGCGATAAAAGACAG
+GATTTTAGTGGAGAATCGAGAAGAAGAAGATCGGGCAGTCCTATCTCCCACATGCAACAC
+CGCAACCTGATTGATAATATGAAAGGTAGACGAAACAGTAACACGATAAACTCAATCTTC
+AACTACAAGAGTCAACATTATGAAATGCCATATGATGATATGATGAAAAATGAAAACATT
+AATGCACAGTCCATGCCCTTTTCAGTCAACGGTGTCAACAATGAAAATAGTATCGGAGGG
+GTTATTACGAGAGCGGACGATGCACCCCTTCAACACTCTGTGGTCAAATCCTGTACGCCT
+GATGGCAAGGAAGAAATGAATAGGCTTAAAAGTAATGACAGTAATGAATATTCCAAGTCT
+GAAGGGCAGATCAGAACCAATTCGCAACTAAGTAAGGACATTCTCATGGGTGAACCAGGT
+GATATGGTTGATCTGTCCTCTTTTGTCAACACGCAGAGAAAAGCCTCAAATGAAACTGGT
+GACTTAGTCTTTAGTTTATCCCAGGATGATGACGCACTGAAAACGTTCCATGCGAGCAAT
+AGCGCAGCAACAAGCAATGAAAGCTGGTGTATTAGCGATGGTGCGTTAGGAAAGCAGGCG
+CAGGACAGTGAAGTTAGGAGGAAAGAAATCAAATTAGGACTCTTTAGACATATTTTCAAG
+GAAGTAATACAACAATATTAA
+>APA1 966 residues Pha 0 Code 0
+ATGAGTATCCCCGCTGACATTGCATCTTTAATTAGTGACAAGTACAAAAGTGCCTTCGAT
+AATGGTAACTTAAAATTTATCCAGACTGAAACAACGAAAACAAAGGACCCAAAAACCAGC
+ATGCCATACTTGATTAGCCACATGCCAAGTCTGATCGAAAAGCCAGAGCGTGGCCAAACT
+CCAGAAGGAGAGGATCCACTAGGCAAACCTGAGGAAGAATTAACGGTTATCCCAGAATTT
+GGTGGTGCCGATAACAAAGCGTATAAATTGCTATTAAACAAATTCCCTGTAATCCCTGGA
+CACACTTTATTGGTAACTAACGAATACCAACATCAAACTGATGCCTTGACCCCAACCGAT
+TTATTGACTGCTTATAAGTTGCTGTGTGCCTTGGACAATGAAGAATCCGACAAGAGACAC
+ATGGTCTTTTACAATTCTGGTCCAGCCAGTGGTTCTTCATTGGACCACAAACATTTGCAA
+ATTTTGCAAATGCCTGAAAAGTTCGTCACTTTCCAAGATAGACTATGTAATGGTAAAGAA
+CATTTCCTACCAACTTTCAATACTGAACCTTTGCAAGATGCTAAAGTCTCGTTCGCTCAT
+TTTGTCTTGCCAATGCCGGAGTCCGAAGAAACTGTTGATGAAGACCTATTAGCTATGTGT
+TACATCTCCATATTGCAAAGAGCTTTGACCTTTTTCCAGGACTGGTTGAACGAAAATCCA
+GAACTAAAGAAATCCTACAATCTTATGTTAACCAAGGAATGGATCTGTGTCGTTCCACGT
+TCGAAGGCCTTTTCTGATGAAATGAAGATAGGTTTCAACTCCACAGGTTATTGTGGTATG
+ATCTTAACCAAAAATGATGAAGTTTTCTCCAAGATTACTGAAAAACCTGAATTGATTAAC
+GATATCTTATTGGAATGTGGTTTCCCAAACACTTCTGGTCAAAAACCAAACGAATACAAC
+TATTGA
+>YCE9 939 residues Pha 0 Code 0
+ATGTTTAGTAAATACCTCGTAACTGCATCTTCCCTCTTTGTGGCTTTGACCTCTGCAGCA
+TCTACCGTTGATCTAGATGCTCTGCTTCTTCTACCAGGGGTCGAGTCCCACGACGGCGTT
+GATACTGTATTTTCGACCAAAGACTTTTATCAAGTGTCATTCGTCAAATCCATTGCTCCT
+GCTATCGTAAACAGCTCCGTAATCTTCCACGATGTTTCTCGTGGTGTGGCTATGGGCAAT
+GTCAAGAGCAGAGCAAGTATCTTCAACCCAGAGGAAACGTATTACGATTGGGAACAGTAC
+CAAGTAGTAAATAACGGAGACTGGCGAACCGAATGGGCACCTGCCTCTGACTGCATTTGG
+AGGGAGGAGAAGGATAACAGCGACGAAACACCGGACAGATTCCCCATCTCGGTGCCATAT
+AATTGGACGTCACAGTACTCAATTGTAGATTATGACACAGACGCTAACGAAGACAATTTA
+GATTTCAGGTTTATTAAATCATTGCTAGATAAGAAAAATTGGTTGAATAAAATTAACCAG
+ACTGTTTCCCAATCCAGTATTATGGTAGCACCAATGATTAAGCCATACAATGTGGTCCAG
+CTTTGGTATTCAAAATATATGGTTTGGGCAAACGTTCAAAGACAATATTGTAGCGGTGTT
+TATCCAGGAGGGACTCAATGTAGCGCTTGGTCCAGGTACTACCATGTTGATGCACCTACC
+TGCGATGAGCCTGTCGCCTCTTACATGACCAAAATGTCGGAAAATGAGGTTCAGTGTCCC
+AATGAGAGAAACGCAACTACCCTAGAGCCTCTCCGCCTGAATAAGCAGGGAGACTCTGAT
+TTTTCTTTGACTTTCGAGGAAGAGGAAGAGGAAGAGACAGGATCTAAATCTCTTTGGAGT
+ACATTGAAAAAAATTTTCTCTAAAAGAAGTATAAGTTGA
+>YCE8 1392 residues Pha 0 Code 0
+ATGAACCGTATTACTAGGAAAAGTTGTTTATTCGCGATTATATTTGCATCATTATTTGTG
+ACACATGCATTGGGTGCCGCTATTGATCCGCCAAGGCGACCACATAATGTGAAGCCTTTT
+CATAACGGTAATCTCGAACTTCAAAGAAGAGCAAATGAACCGTTTTTTGAAATAGATGTC
+AAGAGTCTGAACACAAACTCACCGATATCAGAGTTGTGTAAAAAAGATTTGCACGTCATT
+GAATCGTCTCATGATCTTTTTCATTTACAAAACCAATGTGAATTCATCTTGGGGTCATTA
+AAAGTCACAAACTATGATTCTAACATTTTGGATTTGAACAGCTTGAGGGCCATTGGTGGT
+GACCTGATTATTCAGGATTCACCTGAACTGATCAGAATCCAAGCCGGGAACTTGAATAAA
+ATCGAAGGGCTCTTCCAATTACAGGGACTAACCTCTTTGGTTTCTGTTGAAATTCCAACT
+TTGAAATTTTGTCAGTCACTGGAGTGGAAAGTTGTTCCCATCTTGAACTACGTCTCCATG
+GATTCTCAGAATATTGAGATTATAAAGGATATTGTCATATCGGATACTTCATTAGCAAAC
+ATCGAGAATTTCAACAAGGTTCAGGAAATTGATACTTTCAATATCAATAATAACAGATTT
+TTAGAAACTATTCATTCGAACGTTAAAACCATTAGGGGACAATTCAGTGTACATGCGAAC
+GCTAAGGAGCTAGAACTTGAAATGCCACACTTGAGAGAAGTGGAAAACATAACGATTAGG
+GACACATCATTGGTCTACCTTCCACAATTAACAAAAGTGAAAAGCTCTTTAGAGTTCATC
+GAAAATTACTTTTACGAATTGAACCTGAACAATTTGCAGAAGATTGGTGGAACATTAGGA
+ATTATCAACAATGTAAATTTAATAAAAGTTAATTTGGAGAACTTAACAGACATTCAAGGT
+GGCTTGATGATCGCCGATAACGAATCCCTCGAGGATATTACTTTCCTGCCAAACTTGAAG
+CAGATTGGAGGTGCTATTTTCTTTGAAGGTTCGTTCAAAGATATCATGTTCGATAGCTTG
+AAACTGGTGAAAGGTAGCGCTTTTATTAAGAGTTCATCAAACGTGTTGGATTGCAATAAA
+TGGACAAACCCATCAAATGGAAGATCAATCATCAGGGGTGGGAAATTCACTTGTATTTCT
+GGTAAGAAGGAAAATACGCTGAATGTTAAACAGGATGGTACAATCATAGAAAAAGGGTAC
+AAAGATTTAACGCAAGAAGGTGAAGACTCCAAGAAAAGAGTGATTTCAAAATACGCGAAC
+TCAGCAAATCCAAGCATGCAATTGGACCCCCTTCTTTTTGGTACATGCCTTGTTGCTATG
+TTATTGTTTTAA
+>YCE7 777 residues Pha 0 Code 0
+ATGAAGAAGACGTTCGAGCAGTTTCGAAAAAGCAATTTACTATTTCAGGTTCTCAAAGGA
+CCCCAGCATCTAGAATGTCAGAAGTTATTTGTCCTTGATTCTTCATTCAATCCACCACAT
+CTGGCCCATTTTCAACTACTATCGCAGACTATTAAAAACTTCAAATTGAAGGACACCCGT
+TCGCATGTTTTATTACTGTTAGCGGTGAATAATGCAGATAAGTTGCCTAAGCCGGCATCT
+TTTCCAACTCGTCTGGAAATGATGTGCTTATTCGCTGACTACCTTCAGGAGAAGCTCCCC
+CAATCTGTAGTATCTGTCGGGTTGACTGTTTTCTCGAAATTCATCGACAAGGACAAAATA
+TTACATGAGCAATTTGTTAAAGGATGCAGTGCAGATATAGGCTACTTAGTTGGTTTTGAT
+ACAATTGCTAGGATCTTTGATGAAAAATATTATCATCCTTTAAAAATCAGTGATGTAATG
+GAGAGCTTCATGTCGGGATCTCAATTATATTGCTTGGCGAGAGGCGATTGCCATCTCAGT
+GCTGAATCGCAACTAAGATACGCCAGTGACATCCTTGAGGGAAAATTCGAACCGGTAATA
+CCAAGAGAATGGGGCGCTAGGATTCATGTTATGCAAAATGATTATCCAGCATTAAGAAAT
+GTTTCATCATCCGAGATTAGGAACAAACTGAAGAATGGGCAAGTGGAGAGTTTGAAAGAC
+GAGTTGCCATTGTGCATATACGATTATTTGATCAATAATAAGACAATATTTGATTGA
+>YCE5 2283 residues Pha 0 Code 0
+ATGAAGATAACGTGTACAGACTTGGTGTACGTCTTCATTTTACTCTTCCTAAACACGAGT
+TGTGTCCAAGCCGTTTTTTCAGATGATGCATTTATCACTGATTGGCAACTGGCTAACTTA
+GGTCCTTGGGAGAAAGTCATCCCTGATTCTCGAGACCGCAACAGGGTTCTCATCTTATCG
+AACCCTACCGAAACTTCCTGCTTAGTTTCTTCGTTTAACGTTTCTTCCGGACAGATTCTT
+TTCAGAAACGTTTTACCCTTTACCATTGATGAGATTCAACTGGATAGTAATGACCATAAC
+GCAATGGTTTGTGTGAACTCTTCAAGCAACCATTGGCAGAAATATGATTTACACGATTGG
+TTTTTACTAGAGGAAGGCGTAGATAATGCCCCTTCTACGACCATTTTACCTCAATCCTCA
+TATTTAAACGATCAAGTATCTATTAAGAACAATGAACTACATATTCTCGATGAGCAGTCA
+AAACTGGCAGAATGGAAATTGGAGTTACCTCAAGGGTTCAATAAAGTGGAATATTTTCAT
+CGTGAAGATCCCCTGGCGTTAGTGTTGAACGTTAATGATACCCAATATATGGGATTCTCT
+GCCAATGGCACAGAATTGATCCCCGTTTGGCAAAGAGATGAATGGTTGACTAACGTGGTA
+GACTATGCTGTATTGGACGTCTTCGATTCTAGGGATGTGGAGTTGAACAAAGATATGAAA
+GCGGAACTTGATTCAAATTCGCTTTGGAATGCTTACTGGCTTAGATTGACAACTAATTGG
+AATCGCCTTATCAACTTATTGAAAGAAAACCAATTCTCACCAGGACGTGTCTTCACTAAA
+CTCCTAGCTCTAGACGCTAAGGATACCACGGTATCAGATTTGAAGTTCGGATTCGCCAAA
+ATCTTAATTGTTTTGACGCATGATGGCTTTATCGGCGGCCTTGATATGGTCAATAAGGGC
+CAACTTATCTGGAAACTCGATTTAGAAATTGATCAGGGCGTCAAAATGTTCTGGACGGAT
+AAAAACCATGACGAACTTGTTGTTTTTTCGCATGATGGGCATTATTTGACAATTGAAGTT
+ACTAAAGATCAACCGATTATCAAATCAAGATCCCCCCTATCTGAAAGGAAAACTGTTGAT
+TCCGTTATTAGGCTGAATGAACATGATCACCAGTATCTGATTAAGTTTGAGGATAAGGAT
+CATTTACTGTTCAAATTGAATCCCGGCAAGAATACGGATGTACCAATAGTTGCCAACAAC
+CATTCTAGTTCCCACATATTCGTCACAGAGCATGACACGAATGGCATTTATGGCTACATA
+ATCGAAAACGATACGGTAAAACAAACTTGGAAAAAAGCCGTAAATTCGAAAGAGAAAATG
+GTGGCATATAGCAAGAGGGAAACAACAAACCTAAACACTCTTGGTATTACACTAGGTGAC
+AAATCGGTTCTTTATAAATATTTGTACCCCAACCTAGCGGCTTATCTGATCGCTAATGAA
+GAACATCATACAATCACTTTTAACTTAATTGATACCATTACAGGAGAAATCCTCATTACC
+CAAGAGCACAAGGATTCTCCGGATTTTAGGTTTCCAATGGATATTGTTTTCGGTGAATAT
+TGGGTCGTTTATTCCTATTTCAGTTCTGAACCTGTTCCAGAACAAAAGTTAGTAGTGGTG
+GAATTATATGAGTCACTAACCCCAGATGAGCGTTTGTCTAACTCAAGCGACAATTTTTCT
+TATGATCCATTGACTGGACACATTAACAAACCTCAATTTCAAACTAAACAATTCATTTTT
+CCCGAGATTATCAAAACAATGTCCATTTCCAAGACAACGGATGATATTACCACAAAGGCA
+ATCGTTATGGAATTAGAAAATGGACAAATCACCTACATACCAAAGCTTTTATTGAATGCA
+AGAGGTAAACCAGCAGAAGAAATGGCCAAGGATAAGAAAAAAGAGTTTATGGCTACCCCA
+TACACGCCAGTTATCCCAATTAATGATAATTTCATTATCACTCATTTCAGAAATCTATTG
+CCAGGATCCGATTCGCAGTTGATCTCCATCCCAACCAATCTGGAATCCACAAGCATTATA
+TGTGATCTAGGCCTTGATGTATTTTGTACAAGGATCACACCTTCGGGCCAATTTGATTTA
+ATGAGTCCTACTTTCGAAAAGGGTAAATTGCTTATTACTATATTCGTCTTGTTGGTGATC
+ACGTATTTTATCCGTCCTTCTGTTTCAAACAAGAAGTTGAAATCCCAATGGCTAATTAAA
+TAG
+>YCE6 324 residues Pha 0 Code 0
+ATGGTAAAGGGTAAAACGTTTCTGAAAAGAATCTGTCCGGAAGAAACGTTAAACGAAGAA
+ACTAAGCAGGAAGTTTCGGTAGGGTTCGATAAGATGAGAACCCTGTTGCGGTCTCGAGAA
+TCAGGGATGACTTTCTCCCAAGGACCTAAGTTAGCCAGTTGCCAATCAGTGATAAATGCA
+TCATCTGAAAAAACGGCTTGGACACAACTCGTGTTTAGGAAGAGTAAAATGAAGACGTAC
+ACCAAGTCTGTACACGTTATCTTCATTGCTATGGGGGAAGGGGAGGATGAAAGTGTTGAT
+ATGAATGTAGGTATTAGTTATTAA
+>YCE4 1254 residues Pha 0 Code 0
+ATGGCTGTATTTACTCCTCCATCAGGTAATAGCAATTCCACCGACCATACTCACACACAA
+GATGACCACGACAAAGATGATAATGATATCAAGAAATTCTACATAAGGCCAAGTTTAGGC
+TTAAAACTGTGGGGTCCGCTCGTACCCGCTCCTGATAACCTACCGGGACTATACACTCTA
+ATCACTATCCAATCTGCAGTGGGTTTCTTTGCCCTTTGGAGACTGAGAAGGCTCTACAAA
+CTACCGCCACCGCGCCGCATTGCCACTGGCACTCACTCGGATTTATCCTTTGGCGAACTA
+CCCAGTGAAATGATTGTCAATGGCAAGACTAAAATCAAAAAGGATATTGCTGACTTTCCA
+ACTTTGAACCGCTTCTCCACCACCCATGGTGACATTGTGCTCGCCCCTCCTCCCATCATA
+CCTCGCCAATCTCGATTCGTCAGCGTCAGAAAGCTCTTATGGGGGTTGTTTGGCTCTTTG
+CTACTTTCTCAGTCACTGTTGGAGCTTACTCGCCTGAACTTTCTTAAATACGACCCCTGG
+TGCGACGAAATGAAATCCGTACGTGACAAGAAGTTTTTCAACAATATTGTCAAATATTAT
+CACGAGGGCATAGACCCCACCAAAATAAAAGTCAAGGATGCTATGAACGGTACTCCTCTC
+TCGACAAATATCCCTGAGGTCAAACAAAGCGTCGCTCTCGCTAGAGCGCAAGTTGAGGCG
+CAGAATCCCATTATTAAATGGTTCGGACCCTTGGAATACAAGCCCATGTCTTTCAACGAG
+TACCTCAATCGCATGGAATTTCACTTGGACATGTTCGAGTTTTTTCAAAATAAAAGAAAC
+ATTAGAGAAAATTCCATTGAACTCATCAATTCCATATCCCACAATCCGCAGTCTTCTTCT
+ACTGGCCTTGAAGGTCTTTCCGAGTCCAAAAAACTCCATCTACAAAATGTGGAAAAAAGA
+CTGCATTTCTTAGCATCTTCGGGAGATTCCATTTCCGCACCAGTAAAGAAGAGATCCAGC
+ACCACACTCTCCCGAGGTGTCATTTTGCCCCATGACACGAAAGGCCCGCAAGATATTGAT
+CTCGATACAATAAGATCGCTTTATGATCCATGGATGACTTTGGCCTTAGAAACTTCGCTA
+AGCATCAAATTCATACCAACTACCATGCCCTCCCATACCAAGACACCCACTAGCACGGAC
+CAGCCGTTACCAGGGCCTACCCCCAAGGCTCTCACTAATGAAAAGACACATTAG
+>PDI1 1569 residues Pha 0 Code 0
+ATGAAGTTTTCTGCTGGTGCCGTCCTGTCATGGTCCTCCCTGCTGCTCGCCTCCTCTGTT
+TTCGCCCAACAAGAGGCTGTGGCCCCTGAAGACTCCGCTGTCGTTAAGTTGGCCACCGAC
+TCCTTCAATGAGTACATTCAGTCGCACGACTTGGTGCTTGCGGAGTTTTTTGCTCCATGG
+TGTGGCCACTGTAAGAACATGGCTCCTGAATACGTTAAAGCCGCCGAGACTTTAGTTGAG
+AAAAACATTACCTTGGCCCAGATCGACTGTACTGAAAACCAGGATCTGTGTATGGAACAC
+AACATTCCAGGGTTCCCAAGCTTGAAGATTTTCAAAAACAGCGATGTTAACAACTCGATC
+GATTACGAGGGACCTAGAACTGCCGAGGCCATTGTCCAATTCATGATCAAGCAAAGCCAA
+CCGGCTGTCGCCGTTGTTGCTGATCTACCAGCTTACCTTGCTAACGAGACTTTTGTCACT
+CCAGTTATCGTCCAATCCGGTAAGATTGACGCCGACTTCAACGCCACCTTTTACTCCATG
+GCCAACAAACACTTCAACGACTACGACTTTGTCTCCGCTGAAAACGCAGACGATGATTTC
+AAGCTTTCTATTTACTTGCCCTCCGCCATGGACGAGCCTGTAGTATACAACGGTAAGAAA
+GCCGATATCGCTGACGCTGATGTTTTTGAAAAATGGTTGCAAGTGGAAGCCTTGCCCTAC
+TTTGGTGAAATCGACGGTTCCGTTTTCGCCCAATACGTCGAAAGCGGTTTGCCTTTGGGT
+TACTTATTCTACAATGACGAGGAAGAATTGGAAGAATACAAGCCTCTCTTTACCGAGTTG
+GCCAAAAAGAACAGAGGTCTAATGAACTTTGTTAGCATCGATGCCAGAAAATTCGGCAGA
+CACGCCGGCAACTTGAACATGAAGGAACAATTCCCTCTATTTGCCATCCACGACATGACT
+GAAGACTTGAAGTACGGTTTGCCTCAACTCTCTGAAGAGGCGTTTGACGAATTGAGCGAC
+AAGATCGTGTTGGAGTCTAAGGCTATTGAATCTTTGGTTAAGGACTTCTTGAAAGGTGAT
+GCCTCCCCAATCGTGAAGTCCCAAGAGATCTTCGAGAACCAAGATTCCTCTGTCTTCCAA
+TTGGTCGGTAAGAACCATGACGAAATCGTCAACGACCCAAAGAAGGACGTTCTTGTTTTG
+TACTATGCCCCATGGTGTGGTCACTGTAAGAGATTGGCCCCAACTTACCAAGAACTAGCT
+GATACCTACGCCAACGCCACATCCGACGTTTTGATTGCTAAACTAGACCACACTGAAAAC
+GATGTCAGAGGCGTCGTAATTGAAGGTTACCCAACAATCGTCTTATACCCAGGTGGTAAG
+AAGTCCGAATCTGTTGTGTACCAAGGTTCAAGATCCTTGGACTCTTTATTCGACTTCATC
+AAGGAAAACGGTCACTTCGACGTCGACGGTAAGGCCTTGTACGAAGAAGCCCAGGAAAAA
+GCTGCTGAGGAAGCCGATGCTGACGCTGAATTGGCTGACGAAGAAGATGCCATTCACGAT
+GAATTGTAA
+>GLK1 1503 residues Pha 0 Code 0
+ATGTCATTCGACGACTTACACAAAGCCACTGAGAGAGCGGTCATCCAGGCCGTGGACCAG
+ATCTGCGACGATTTCGAGGTTACCCCCGAGAAGCTGGACGAATTAACTGCTTACTTCATC
+GAACAAATGGAAAAAGGTCTAGCTCCACCAAAGGAAGGCCACACATTGGCCTCGGACAAA
+GGTCTTCCTATGATTCCGGCGTTCGTCACCGGGTCACCCAACGGGACGGAGCGCGGTGTT
+TTACTAGCCGCCGACCTGGGTGGTACCAATTTCCGTATATGTTCTGTTAACTTGCATGGA
+GATCATACTTTCTCCATGGAGCAAATGAAGTCCAAGATTCCCGATGATTTGCTAGACGAT
+GAGAACGTCACATCTGACGACCTGTTTGGGTTTCTAGCACGTCGTACACTGGCCTTTATG
+AAGAAGTATCACCCGGACGAGTTGGCCAAGGGTAAAGACGCCAAGCCCATGAAACTGGGG
+TTCACTTTCTCATACCCTGTAGACCAGACCTCTCTAAACTCCGGGACATTGATCCGTTGG
+ACCAAGGGTTTCCGCATCGCGGACACCGTCGGAAAGGATGTCGTGCAATTGTACCAGGAG
+CAATTAAGCGCTCAGGGTATGCCTATGATCAAGGTTGTTGCATTAACCAACGACACCGTC
+GGAACGTACCTATCGCATTGCTACACGTCCGATAACACGGACTCAATGACGTCCGGAGAA
+ATCTCGGAGCCGGTCATCGGATGTATTTTCGGTACCGGTACCAATGGGTGCTATATGGAG
+GAGATCAACAAGATCACGAAGTTGCCACAGGAGTTGCGTGACAAGTTGATAAAGGAGGGT
+AAGACACACATGATCATCAATGTCGAATGGGGGTCCTTCGATAATGAGCTCAAGCACTTG
+CCTACTACTAAGTATGACGTCGTAATTGACCAGAAACTGTCAACGAACCCGGGATTTCAC
+TTGTTTGAAAAACGTGTCTCAGGGATGTTCTTGGGTGAGGTGTTGCGTAACATTTTAGTG
+GACTTGCACTCGCAAGGCTTGCTTTTGCAACAGTACAGGTCCAAGGAACAACTTCCTCGC
+CACTTGACTACACCTTTCCAGTTGTCATCCGAAGTGCTGTCGCATATTGAAATTGACGAC
+TCGACAGGTCTACGTGAAACAGAGTTGTCATTATTACAGAGTCTCAGACTGCCCACCACT
+CCAACAGAGCGTGTTCAAATTCAAAAATTGGTGCGCGCGATTTCTAGGAGATCTGCGTAT
+TTAGCCGCCGTGCCGCTTGCCGCGATATTGATCAAGACAAATGCTTTGAACAAGAGATAT
+CATGGTGAAGTCGAGATCGGTTGTGATGGTTCCGTTGTGGAATACTACCCCGGTTTCAGA
+TCTATGCTGAGACACGCCTTAGCCTTGTCACCCTTGGGTGCCGAGGGTGAGAGGAAGGTG
+CACTTGAAGATTGCCAAGGATGGTTCCGGAGTGGGTGCCGCCTTGTGTGCGCTTGTAGCA
+TGA
+>YCD8 1587 residues Pha 0 Code 0
+ATGAGCTATGGAACTATAAATGATATGAATGAATCGGTAACGAACTATCGAATAAAAAAA
+GCCCAAAACAATATCAAGGGATGGTACGCTTACTCATTTTCTAGCGAACCATTTGTCGTT
+TCTGCGGTTTCAACGTATATTCCCTTACTACTGCAGCAATTTGCGAGTATAAATGGTGTA
+AAAGTTCACGATCACTCCATACCCTGCCTGTCAGAAACGGGTAGTGATTCAGATAAGTGT
+GTTCTTGGTTTGTTCAACAATCGGATCTTCGTAGATACTTCAAGTTTTGCATTATATGTC
+TTTTCCCTTAGCGTTTTATTCCAAACTATAATAGTCATTTCCGTTTCAGGGATAGTAGAT
+CTCTGGGGGAGCGTTAAATTCAAAGGCAGAATTCTGGTTTGGTTTGGTATTGTGGGCGCA
+TTGTCGACTGTTGCGATTTCAAAATTGAATGATACCCAGATTTATTCTCTGGCTGGGCTT
+TATATAGTGGCCAATGGTTGTTTTGGCGTTATCAATGTTGTTGGGAATTCTCTTCTGCCC
+ATTTTTGTCAAGGATTCTTTGAAATGTCAAAGTCAAGGAGCTTATGAACCTGATAAGGTA
+GACTCGTTAACTACTGTTATTAGCGGTAGAGGTGCATCTTTAGGTTATTCAAGTGCCCTC
+ATTGTTCAGATTGTATCTATGTTCTTAGTCGCATCTAAAAAGGGCAGTAAGCAGGATGTT
+CAAGTGGCTGTTCTTTTCGTTGGGATTTGGTGGTTTGTGTGGCAACTGCCCATGATCTGG
+TTGATTGACGATGTGACAATACCGATAAGAGTTGACGATTCTACATTAGCATCCGCCCGC
+AGTCCGTATCCCGGTGAGCAAGACGCCTTGGGTCAACTAAACTGGAAGAATTACCTTTCA
+TATGGTTGGGTTTCGCTTTTCGAATCGTTTAAACATGCCAGACTATTGAAAGATGTGATG
+ATTTTTCTTATTGCGTGGTTTATTATTAGTGATTCCATTACAACTATAAATTCTACAGCG
+GTTTTGTTCTCCAAGGCAGAACTGCACATGAGTACCCTCAATTTAATCATGATAAGTGTT
+TTGACCGTTGTAAATGCAATGCTGGGTGCCTTTATGATTCCACAATTTCTTGCCACAAAG
+TTTCGGTGGACTTCTAGTCAAACTTTGATGTACATTATCATTTGGGCAAGTTTCATACCA
+TTTTATGGTATTCTTGGATTTTTCTTCAATGCGTTCGGTTTAAAGCATAAGTTTGAAATG
+TTCTTATTGGCCATTTGGTATGGATTATCACTAGGTGGCCTGTCCGCGGTTTCAAGATCA
+GTTTTCAGTTTGATTGTACCTCCAGGAAAAGAATCCACGTTTTTTAGTATGTTCAGTATC
+ACAGATAAGGGGTCGTCCATCCTGGGACCCTTCCTTGTTGGACTGCTTACCGATAAAACG
+CATAATATTCGCTATTCGTTTTATTTCTTCTTTTTGCTTTTGATGCTATCATTGCCTGTG
+CTAAACTGTTTGGATGTCAAGAGAGGTAGAAGAGAGGCTGAAGAACTCAGTCAAGTTTTA
+CCTGAAAGTGAAAGAAGGTTGGATTAG
+>SRO9 1401 residues Pha 0 Code 0
+ATGAAGATCTTTTGGGATCCTAGATCGGTAATAGAACATCAGGATTACTCTGGACCTGCT
+AACGTGTTTCATCTTCTTTTCACTTCTCTGCCCACGATGTCTGCTGAAACCGCCGCCGCA
+AACACTGCTACTGCCCCAGTCCCAGAAGTGCAAGAACAAGAGAGCTCCAAGAGCAAGCAA
+GTCAACTTGACGCCGGCACCATTGCCCACATCTTCCCCATGGAAACTTGCTCCTACTGAG
+ATCCCTGTTTCTACTATCTCAATAGAAGACTTGGATGCCACAAGAAAGAAGAAGAACAGA
+ACACCCACTCCGAAATCATCGACTGCTACCAAGTGGGTTCCCATCAAGGCCTCCATTACC
+GTCTCTGGCACCAAAAGATCCGGTTCCAAGAATGGTGCAAGTAATGGCAACAGCAACAAG
+AGCAAAAACAACAAAACTGCAGCATCGTCGACATCGTCGAGTAATGCTAACAGGAAAAAG
+AAGCATCACCAACATAATGCTAAGAAGCAACAACAAATGAAGAAAGATGGCTTTGAATCG
+GCAGTAGGTGAGGAAGATTCAAAAGACGCTACCTCTCAAGAAAATGGTCAATCTACACAA
+CAGCAACAACCACCTCACCACCGTAATCATCACCACAGTCATCACCATAACAGCAATGGT
+CCTCAAAGGAGAAAGTTCCACAACAGTAATAACGCCGGTATGCCTCAGAACCAAGGCTTC
+CCACCACAGTTTAAACCTTACCAAGGACGCAACGCTCGTAATAACAACAACAACCGCTCT
+AAATACCACAACCACTTCCATCACAACCAACAACATCCTCAACAACCTATGGTCAAATTA
+CAGCAACAGTTTTATCCAGTCCAACCAGTGTTAATGGCCATCAACAACATTGCTAGACAA
+ATTGAATACTATTTCAGCGAAGAAAACTTGACCGTCGACAATTACTTAAGGTCCAAACTC
+TCCAAGGATGGTTTTGCTCCATTGTCTTTAATCTCTAAGTTTTACAGAGTTGTTAACATG
+TCCTTCGGAGGTGACACTAACCTGATTTTAGCCGCATTGAGAGAAATTGTCGCTAACGAA
+GCCGCTACCGTCAATGTTGCAGAAGGTACTTTGGCCGCCAAGGAAGGTGATAACGTTACC
+GGTGAAGCCAAAGAACCATCTCCATTGGATAAGTACTTCGTTCGTTCCAAGAGCTGGTCA
+AACTGGTTACCAGAAACTTTTGAAACTGAAATTAATATTGAAAAAGAACTGGTCGGCGAT
+GCATTGGACCAATTCATGATATCCCTACCACCTGTTCCTCAACAAGAAGAGGAATCATCC
+ACTGAACTCGCTTCTCAAGAACAAGAAACCAAAGAAGACTCTGCGCCGGTTGCTGCCGGT
+GAATCCGAGTCTTCCTTATAA
+>YCD6 1701 residues Pha 0 Code 0
+ATGCAGGTTCAAAAAATGGTGAGAGATAACAGTAATAACGGTAGCGATAAAAGCGTCCAT
+TGGGAGAGGAGGAATAATAACGGCGCAGGCCCCCGTTATCGTTCCAGAAGCGGTAATACC
+GGTGCTTTGGCAACAAAACTAAGTAATGGGACGCTCTCTGTCAGAGGATTAGTGAAGGAC
+CGAACAGGAAGCGGCAAGATCGCGGGCTGTGTGGAGGCGTTTCTGGATGCCAGGACCCAA
+TTGAATACGCCCTGGGACCGTGCTAAGTGCAATTGGCTGGACCAGATAGATTACTATGTA
+CAGTTGAGAAAGACCGCGTTTTCTAAGGAATTGGACCAACTAAGGAAGCCCATGATCGAT
+GCATATGTGGCGGAGATGAGGCAGAAGTTTGATGCCTCCTATGGACAATCCAGGGCGCAA
+TTGGAAGCCAAACTGGCGCAGGTGGACAGTGAATGGCATATGGTACATGGTGATGTGCAT
+GCAAAACTGGAAAAACTCGTGGAAGAACGCCGGTTTTTGAAAAGATTAAGCGACACGATC
+GTACCACCCAGGTCCAAAAGATCACAGCGGCTGTCTCCATTGACCAAAGAGGACCGAGCC
+AACTGTATCTGTCCGCAGCCCAAAGGAATGAGCGACACCGCTTGGTTCGAAGCCATTCAG
+AAGAAAATGTTAGGAATGAATGGTACCATCAAGCTCCTAGAGACAGAACAGAAACTACTG
+GCTGACGAGAAAAACAGCGTGAGGAAGACGTTCTGGCCCATGGTGGAAGCACATTCACGC
+TCGAATGAATTTGCTTATCTGGAGAAATGCATCAGGCTGATGGCCTCTCAGAGAGCAATA
+TGCTTTTGTCTTGATATAGAGGCTTTCGAAACAAACCAGAACGTAATCACCGAAATTGGG
+ATTTCAATTTATGACCCCAGGGAAAATATGGTGCCGTCAATGGTTCCAATTACAAAGAAT
+TACCACCTAATTATCGAGGAGTCCCTGGAACTTAGAAACCAAAAATGGGTCTGTGACTAC
+AAGGATTGCTACTTATTGGGAGAAAGCTATGTTTTGAGCTTGAAAGAGTGCGTGCATTTC
+ATTCAATCACTAATAAACTATTACTTGGTCCCGGTGACCGAAGAAGACAAGACATGGTCA
+AGGGCATTTGTTGGTCATCACGTGAGCGGGGATCTTAAGTGGCTGGAGACTATTGGTGTC
+AAATTCCCTGGCAGAGGGTATGAAGGCCATCTGGACCATACGCTGCTTTTGGCTGAAACT
+CCCGGTGATCTAGACGTGTTCATCTTGGACACTGAGCAGTTTTACAGGAAATCGTATGGC
+GAAAAGGGCAGCAGTCTGGGCAAGATTCTGCGGTTGTTCGAGATACCGCATGCGTTTCTA
+CACAATGCCGGTAACGATGCCTACTATACCCTGCATTTGTTCATGAAGTTTTGCGATGTT
+AATTTCAGGAAAATAAGCGGCATGGACGATGTTCTTAAAGTAATGGGCCAAGTAAAAGTT
+TGGGGAGAACGAGACGTACGAGAGCCTAAAGTGGTGCCCATGTCGTATGCCATCTCCATC
+GAGGAGGCAGTCAAAAATCGGACGTACCGCAAGGGCGTCAAGAGCAGTAGGAAGGAAAGA
+GTCTGCCAAACGGAATTCGGTGGGTTAACGTATTTCGGAACTGCTAAAGACGCCTTCACA
+AGCACTCTTCCGACACACTAA
+>YCD5 333 residues Pha 0 Code 0
+ATGGTATCTCAAGAAACTATCAAGCACGTCAAGGACCTTATTGCAGAAAACGAGATCTTC
+GTCGCATCCAAAACGTACTGTCCATACTGCCATGCAGCCCTAAACACGCTTTTTGAAAAG
+TTAAAGGTTCCCAGGTCCAAAGTTCTGGTTTTGCAATTGAATGACATGAAGGAAGGCGCA
+GACATTCAGGCTGCGTTATATGAGATTAATGGCCAAAGAACCGTGCCAAACATCTATATT
+AATGGTAAACATATTGGAGGCAACGACGACTTGCAGGAATTGAGGGAGACTGGTGAATTG
+GAGGAATTGTTAGAACCTATTCTTGCAAATTAA
+>YCD3 507 residues Pha 0 Code 0
+ATGAATAAGTGGAGCAGGCTGTACGTTATAACTGTACGCAGGACTTTTCCAGGGAGAAGA
+AACATTGTACTGACGCAGTACTGGAATAAGAGCAAGAAAATGAGTGACGAATCGAATGAC
+GTGAAGTGGAACGATGCCCTGACACCATTGCAGCTGATGGTGCTGAGAGATAAGGCCACT
+GAAAGGCCCAACACCGGTGCGTATTTACACACCAACGAGTCCGGTGTCTACCATTGTGCC
+AACTGCGACAGACCGTTGTATTCGAGCAAGGCCAAGTTCGACGCTCGTTGTGGATGGCCC
+GCATTCTACGAAGAGGTATCCCCTGGAGCCATCACATATCATCGTGACAATTCTTTAATG
+CCTGCGAGGGTGGAGATATGTTGTGCAAGGTGTGGTGGACACTTGGGACATGTGTTTGAA
+GGTGAAGGCTGGAAACAGTTGCTAAACTTGCCCAAGGACACCAGACACTGTGTGAACAGT
+GCGTCTTTAAACCTCAAGAAGGATTAA
+>STE50 1041 residues Pha 0 Code 0
+ATGGAGGACGGTAAACAGGCCATCAATGAGGGATCAAACGATGCTTCGCCGGATCTGGAC
+GTGAATGGCACAATATTGATGAATAATGAAGACTTTTCCCAGTGGTCGGTTGATGATGTG
+ATAACTTGGTGTATATCCACGCTGGAGGTGGAAGAAACCGATCCATTATGTCAGAGACTG
+CGAGAAAATGATATTGTAGGAGATCTTTTGCCGGAATTGTGCTTGCAAGATTGCCAGGAC
+TTGTGTGACGGTGATTTGAATAAGGCCATAAAATTCAAGATACTGATCAATAAGATGAGA
+GACAGCAAGTTGGAGTGGAAGGACGACAAGACTCAAGAGGACATGATAACGGTACTGAAA
+AACTTGTACACTACTACATCTGCGAAATTGCAAGAATTTCAATCGCAGTACACAAGGCTG
+AGGATGGATGTCTTGGACGTAATGAAGACCAGCTCAAGCTCTTCTCCGATTAACACACAT
+GGAGTGTCCACTACGGTACCTTCTTCAAACAACACAATTATACCCAGTAGTGACGGTGTG
+TCTCTTTCACAAACAGACTATTTCGACACAGTTCATAACCGACAATCACCGTCAAGGAGA
+GAATCCCCGGTAACGGTATTTAGGCAACCCAGTCTTTCCCACTCAAAATCTTTGCACAAG
+GATAGCAAAAACAAAGTACCCCAAATATCTACAAACCAATCTCACCCATCTGCCGTTTCA
+ACAGCGAACACACCGGGGCCATCACCTAACGAGGCGTTAAAACAGTTGCGTGCATCTAAA
+GAAGACTCCTGCGAACGGATCTTGAAAAACGCAATGAAAAGACATAACTTAGCAGATCAG
+GATTGGAGACAATATGTCTTGGTCATTTGCTATGGGGATCAAGAGAGGCTGTTAGAATTG
+AACGAAAAGCCTGTGATCATATTCAAGAACTTAAAGCAACAGGGTTTGCACCCCGCCATT
+ATGTTAAGAAGAAGAGGTGATTTCGAAGAAGTAGCAATGATGAACGGAAGTGACAATGTC
+ACCCCCGGTGGAAGACTCTAA
+>HIS4 2400 residues Pha 0 Code 0
+ATGGTTTTGCCGATTCTACCGTTAATTGATGATCTGGCCTCATGGAATAGTAAGAAGGAA
+TACGTTTCACTTGTTGGTCAGGTACTTTTGGATGGCTCGAGCCTGAGTAATGAAGAGATT
+CTCCAGTTCTCCAAAGAGGAAGAAGTTCCATTGGTGGCTTTGTCCTTGCCAAGTGGTAAA
+TTCAGCGATGATGAAATCATTGCCTTCTTGAACAACGGAGTTTCTTCTCTGTTCATTGCT
+AGCCAAGATGCTAAAACAGCCGAACACTTGGTTGAACAATTGAATGTACCAAAGGAGCGT
+GTTGTTGTGGAAGAGAACGGTGTTTTCTCCAATCAATTCATGGTAAAACAAAAATTCTCG
+CAAGATAAAATTGTGTCCATAAAGAAATTAAGCAAGGATATGTTGACCAAAGAAGTGCTT
+GGTGAAGTACGTACAGACCGTCCTGACGGTTTATATACCACCCTAGTTGTCGACCAATAT
+GAGCGTTGTCTAGGGTTGGTGTATTCTTCGAAGAAATCTATAGCAAAGGCCATCGATTTG
+GGTCGTGGCGTTTATTATTCTCGTTCTAGGAATGAAATCTGGATCAAGGGTGAAACTTCT
+GGCAATGGCCAAAAGCTTTTACAAATCTCTACTGACTGTGATTCGGATGCCTTAAAGTTT
+ATCGTTGAACAAGAAAACGTTGGATTTTGCCACTTGGAGACCATGTCTTGCTTTGGTGAA
+TTCAAGCATGGTTTGGTGGGGCTAGAATCTTTACTAAAACAAAGGCTACAGGACGCTCCA
+GAGGAATCTTATACTAGAAGACTATTCAACGACTCTGCATTGTTAGATGCCAAGATCAAG
+GAAGAAGCTGAAGAACTGACTGAGGCAAAGGGTAAGAAGGAGCTTTCTTGGGAGGCTGCC
+GATTTGTTCTACTTTGCACTGGCCAAATTAGTGGCCAACGATGTTTCATTGAAGGACGTC
+GAGAATAATCTGAATATGAAGCATCTGAAGGTTACAAGACGGAAAGGTGATGCTAAGCCA
+AAGTTTGTTGGACAACCAAAGGCTGAAGAAGAAAAACTGACCGGTCCAATTCACTTGGAC
+GTGGTGAAGGCTTCCGACAAAGTTGGTGTGCAGAAGGCTTTGAGGAGACCAATCCAAAAG
+ACTTCTGAAATTATGCATTTAGTCAATCCGATCATCGAAAATGTTAGAGACAAAGGTAAC
+TCTGCCCTTTTGGAGTACACAGAAAAGTTTGATGGTGTAAAATTATCCAATCCTGTTCTT
+AATGCTCCATTCCCAGAAGAATACTTTGAAGGTTTAACCGAGGAAATGAAGGAAGCTTTG
+GACCTTTCAATTGAAAACGTCCGCAAATTCCATGCTGCTCAATTGCCAACAGAGACTCTT
+GAAGTTGAAACCCAACCTGGTGTCTTGTGTTCCAGATTCCCTCGTCCTATTGAAAAAGTT
+GGTTTGTATATCCCTGGTGGCACTGCCATTTTACCAAGTACTGCATTAATGCTTGGTGTT
+CCAGCACAAGTTGCCCAATGTAAGGAGATTGTGTTTGCATCTCCACCAAGAAAATCTGAT
+GGTAAAGTTTCACCCGAAGTTGTTTATGTCGCAGAAAAAGTTGGCGCTTCCAAGATTGTT
+CTAGCTGGTGGTGCCCAAGCCGTTGCTGCTATGGCTTACGGGACAGAAACTATTCCTAAA
+GTGGATAAGATCTTGGGTCCAGGTAATCAATTTGTGACTGCCGCCAAAATGTATGTTCAA
+AATGACACTCAAGCTCTATGTTCCATTGATATGCCAGCTGGCCCAAGTGAAGTTTTGGTT
+ATTGCCGATGAAGATGCCGATGTGGATTTTGTTGCAAGTGATTTGCTATCGCAAGCTGAA
+CACGGTATTGACTCCCAAGTTATCCTTGTTGGTGTTAACTTGAGCGAAAAGAAAATTCAA
+GAGATTCAAGATGCTGTCCACAATCAAGCTTTACAACTGCCACGTGTGGATATTGTTCGT
+AAATGTATTGCTCACAGTACGATCGTTCTTTGTGACGGTTACGAAGAAGCCCTTGAAATG
+TCCAACCAATATGCACCAGAACATTTGATTCTACAAATCGCCAATGCTAACGATTATGTT
+AAATTGGTTGACAATGCAGGGTCCGTATTTGTGGGTGCTTACACTCCAGAATCGTGCGGT
+GACTATTCAAGTGGTACTAACCATACATTACCAACCTATGGTTACGCTAGGCAGTACAGT
+GGTGCCAACACTGCAACCTTCCAAAAGTTTATCACTGCCCAAAACATTACCCCTGAAGGT
+TTAGAAAACATCGGTAGAGCTGTTATGTGCGTTGCCAAGAAGGAGGGTCTAGACGGTCAC
+AGAAACGCTGTGAAAATCAGAATGAGTAAGCTTGGGTTGATCCCAAAGGATTTCCAGTAG
+>BIK1 1323 residues Pha 0 Code 0
+ATGGATAGATATCAAAGAAAGATAGGATGTTTCATACAAATCCCAAATTTGGGGCGCGGA
+CAACTGAAATACGTGGGTCCAGTGGACACGAAAGCTGGAATGTTTGCTGGTGTAGACTTA
+CTTGCCAACATTGGTAAGAACGATGGATCATTCATGGGGAAGAAGTATTTTCAAACAGAG
+TATCCTCAAAGTGGACTATTTATCCAGTTGCAAAAAGTCGCATCATTGATCGAGAAGGCA
+TCGATATCGCAAACCTCGAGAAGAACGACGATGGAACCGCTATCAATACCCAAAAACAGA
+TCTATTGTGAGGCTCACTAACCAGTTCTCTCCCATGGATGATCCTAAATCCCCCACACCC
+ATGAGAAGTTTCCGGATCACCAGTCGGCACAGCGGTAATCAACAGTCGATGGACCAGGAG
+GCATCGGATCACCATCAACAGCAAGAATTTGGTTACGATAACAGAGAAGACAGAATGGAG
+GTCGACTCTATCCTGTCATCAGACAGAAAGGCTAATCACAACACCACCAGCGATTGGAAA
+CCGGACAATGGCCACATGAATGACCTCAATAGCAGCGAAGTTACAATTGAATTACGAGAA
+GCCCAATTGACCATCGAAAAGCTACAAAGGAAACAACTACACTACAAAAGGCTACTCGAT
+GACCAAAGAATGGTCCTCGAAGAAGTGCAACCGACTTTTGATAGGTATGAAGCCACAATA
+CAAGAAAGAGAGAAAGAGATAGACCATCTCAAGCAACAATTGGAGCTCGAACGCAGACAG
+CAAGCCAAACAAAAGCAGTTTTTTGACGCTGAGAATGAACAGCTACTTGCTGTCGTAAGC
+CAACTACACGAAGAGATCAAAGAAAACGAAGAGAGAAATCTTTCTCATAATCAACCCACT
+GGTGCCAACGAAGATGTCGAACTCCTGAAAAAACAGCTGGAACAATTACGCAACATAGAA
+GACCAATTTGAGTTACACAAGACAAAGTGGGCTAAAGAACGCGAACAATTGAAAATGCAT
+AACGATTCGCTCAGTAAAGAATACCAAAATTTGAGCAAGGAACTATTTTTGACAAAACCA
+CAAGATTCCTCATCGGAAGAGGTGGCATCCTTAACGAAAAAACTTGAAGAGGCTAATGAA
+AAAATCAAACAGTTGGAACAGGCTCAAGCACAAACAGCCGTGGAATCGTTGCCAATTTTC
+GACCCCCCTGCACCAGTCGATACCACGGCAGGAAGACAACAGTGGTGTGAGCATTGCGAT
+ACGATGGGTCATAATACAGCAGAATGCCCCCATCACAATCCTGACAACCAGCAGTTCTTC
+TAG
+>FUS1 1539 residues Pha 0 Code 0
+ATGGTAGCAACAATAATGCAGACGACAACAACTGTGCTGACGACAGTCGCCGCAATGTCT
+ACTACCTTAGCATCAAATTACATATCTTCGCAAGCTAGTTCCTCGACGAGTGTAACAACA
+GTAACGACAATAGCGACATCAATACGCTCTACACCGTCTAATCTACTCTTTTCTAATGTG
+GCGGCTCAGCCAAAATCATCTTCAGCAAGCACAATTGGGCTTTCAATCGGACTTCCCATC
+GGAATATTCTGTTTCGGATTACTTATCCTTTTGTGTTATTTCTACCTTAAAAGGAATTCG
+GTGTCCATTTCAAATCCACCCATGTCAGCTACGATTCCAAGGGAAGAGGAATATTGTCGC
+CGCACTAATTGGTTCTCACGGTTATTTCGGCAGAGTAAGTGTGAGGATCAGAATTCATAT
+TCTAATCGTGATATTGAGAAGTATAACGACACCCAGTGGACCTCGGGTGATAACATGTCT
+TCAAAAATACAGTACAAAATTTCCAAACCCATAATACCGCAGCATATACTGACACCTAAG
+AAAACGGTGAAGAACCCATATGCTTGGTCTGGTAAAAACATTTCGTTAGACCCCAAAGTG
+AACGAAATGGAGGAAGAGAAAGTTGTGGATGCATTCCTGTATACTAAACCACCGAATATT
+GTCCATATTGAATCCAGCATGCCCTCGTATAATGATTTACCTTCTCAAAAAACGGTGTCC
+TCAAAGAAAACTGCGTTAAAAACGAGTGAGAAATGGAGTTACGAATCTCCACTATCTCGA
+TGGTTCTTGAGGGGTTCTACATACTTTAAGGATTATGGCTTATCAAAGACCTCTTTAAAG
+ACCCCAACTGGGGCTCCACAACTGAAGCAAATGAAAATGCTCTCCCGGATAAGTAAGGGT
+TACTTCAATGAGTCAGATATAATGCCTGACGAACGATCGCCCATCTTGGAGTATAATAAC
+ACGCCTCTGGATGCAAATGACAGCGTGAATAACTTGGGTAATACCACGCCAGATTCACAA
+ATCACATCTTATCGCAACAATAACATCGATCTAATCACGGCAAGACCCCATTCAGTGATA
+TACGGTACTACTGCACAACAAACTTTGGAAACCAACTTCAATGATCATCATGACTGCAAT
+AAAAGCACTGAGAAACACGAGTTGATAATACCCACCCCATCAAAACCACTAAAGAAAAGG
+AAAAAAAGAAGACAAAGTAAAATGTATCAGCATTTACAACATTTGTCACGTTCTAAACCA
+TTGCCGCTTACTCCAAACTCCAAATATAATGGAGAGGCTAGCGTCCAATTAGGGAAGACA
+TATACAGTTATTCAGGATTACGAGCCTAGATTGACAGACGAAATAAGAATCTCGCTGGGT
+GAAAAAGTTAAAATTCTGGCCACTCATACCGATGGATGGTGTCTGGTAGAGAAGTGTAAT
+ACACGAAAGGGTACTATTCACGTCAGTGTTGACGATAAAAGATACCTCAATGAAGATAGA
+GGCATTGTGCCTGGTGACTGTCTCCAAGAATACGACTGA
+>YC08 579 residues Pha 0 Code 0
+ATGTCCCCAACTGGAAACTACTTAAACGCTATTACAAACCGTCGTACCATCTACAATTTG
+AAGCCCGAATTACCACAAGGTGTCGGTTTGGATGATGTAAAGAGAACTGTACACGTTATT
+CTCAAGAATACGCCAACAGCTTTTAACTCACAAGTGAATCGCGCTGTCATTATCGTTGGT
+GATACACACAAAAGGATATGGGATGCTGTTGCGAGCGCAATGCCAACTGCTGAAGCCAAG
+AAGAGACCAGAGTCTTGCAGAGATGAGGCTTACGGTTCAGTCATTTTCTTCACTGATGAA
+GGACCAACTGAAAACTGCAAGAGATTTTCCAGCCTTGGCACCGCTTTCCCAACATGCGCC
+GCTCATACGACCGGTGCTGTGCAAATTCAGTCTTGGACTGCCCTCGAACTATTGGGATTG
+GGGGCTAATTTGCAACACTATAATGACTACGTCAAATCTGCTTTGCCTCAAGATGTTCCT
+ATTGCGTGGACTGTACAATCTCAATTGGTCTTTGGTGTTCCAACTGCCTTGCCAGAAGAA
+AAGACTTTTATCAATAACGTAATCAACGTTTATCACTGA
+>AGP1 1902 residues Pha 0 Code 0
+ATGTCGTCGTCGAAGTCTCTATACGAACTGAAAGACTTGAAAAATAGCTCCACAGAAATA
+CATGCCACGGGGCAGGATAATGAAATTGAATATTTCGAAACAGGCTCCAATGACCGTCCA
+TCCTCACAACCTCATTTAGGTTACGAACAGCATAACACTTCTGCCGTGCGTAGGTTTTTC
+GACTCCTTTAAAAGAGCGGATCAGGGTCCACAGGATGAAGTAGAAGCAACACAAATGAAC
+GATCTTACGTCGGCTATCTCACCTTCTTCTAGACAGGCTCAAGAACTAGAAAAAAATGAA
+AGTTCGGACAACATAGGCGCTAATACAGGTCATAAGTCGGACTCGCTGAAGAAAACCATT
+CAGCCTAGACATGTTCTGATGATTGCGTTGGGTACGGGTATCGGTACTGGGTTACTGGTC
+GGTAACGGTACCGCGTTGGTTCATGCGGGTCCAGCTGGACTACTTATTGGTTACGCTATT
+ATGGGTTCTATCTTGTACTGTATTATTCAAGCATGTGGTGAAATGGCGCTAGTGTATAGT
+AACTTGACTGGTGGCTACAATGCATACCCAGTTTCCTTGTGGATGATGGTTTTTGGGTTT
+GCAGTCGCTTGGGTTTATTGTTTGCAATGGCTGTGTGTGTGTCCTCTGGAATTGGTGACC
+GCATCCATGACTATCAAATATTGGACGACATCTGTGAACCCGGATGTGTTCGTCATTATT
+TTCTATGTTTTGGTGATTACTATTAATATTTTCGGTGCTCGTGGTTATGCAGAAGCTGAG
+TTCTTCTTCAACTGTTGCAAAATTTTGATGATGACTGGGTTCTTCATTCTTGGTATTATC
+ATCGATGTTGGTGGCGCTGGTAATGATGGTTTTATTGGTGGTAAATACTGGCACGATCCG
+GGCGCTTTCAATGGTAAACATGCCATTGACAGATTTAAAGGTGTTGTTGCAACATTAGTG
+ACTGCTGCTTTTGCCTTTGGTGGTTCAGAGTTTATTGCCATCACCACTGCAGAACAATCT
+AATCCAAGAAAGGCCATTCCAGGTGCGGCCAAACAAATGATCTACAGAATCTTATTCCTA
+TTCTTGGCTACCATTATTCTACTGGGTTTCTTGGTGCCATACAATTCCGATCAATTATTG
+GGTTCTACCGGTGGTGGTACTAAAGCCTCGCCATATGTCATTGCTGTTGCATCCCACGGT
+GTCCGTGTCGTCCCACACTTCATTAACGCCGTTATTCTACTTTCCGTGCTGTCCATGGCT
+AACTCCTCCTTCTACTCCAGTGCTCGTTTATTTTTAACTCTATCCGAGCAAGGTTACGCT
+CCTAAGGTTTTCTCCTACATCGACAGAGCCGGTAGACCATTGATTGCCATGGGTGTTTCT
+GCATTGTTTGCCGTTATTGCCTTCTGTGCTGCATCTCCCAAGGAAGAACAAGTTTTCACT
+TGGTTATTGGCCATTTCTGGTTTGTCTCAGCTTTTCACATGGACTGCCATTTGTTTATCC
+CATCTTAGATTTAGAAGAGCCATGAAAGTCCAAGGGAGATCTCTTGGAGAATTGGGTTTC
+AAATCTCAAACTGGTGTTTGGGGATCTGCCTACGCTTGCATTATGATGATTTTAATTCTT
+ATTGCCCAATTTTGGGTCGCTATCGCCCCCATTGGTGAAGGTAAGCTGGATGCACAAGCC
+TTTTTCGAAAACTACTTGGCTATGCCAATCTTGATTGCACTATATGTCGGCTACAAGGTC
+TGGCACAAGGATTGGAAACTGTTCATCAGGGCCGACAAGATCGACCTAGATTCTCATAGA
+CAAATCTTTGATGAAGAATTAATCAAGCAAGAAGACGAAGAATATAGGGAACGTTTGAGG
+AACGGACCTTATTGGAAAAGGGTCGTTGCCTTCTGGTGTTAA
+>LEU2 1095 residues Pha 0 Code 0
+ATGTCTGCCCCTAAGAAGATCGTCGTTTTGCCAGGTGACCACGTTGGTCAAGAAATCACA
+GCCGAAGCCATTAAGGTTCTTAAAGCTATTTCTGATGTTCGTTCCAATGTCAAGTTCGAT
+TTCGAAAATCATTTAATTGGTGGTGCTGCTATCGATGCTACAGGTGTCCCACTTCCAGAT
+GAGGCGCTGGAAGCCTCCAAGAAGGTTGATGCCGTTTTGTTAGGTGCTGTGGGTGGTCCT
+AAATGGGGTACCGGTAGTGTTAGACCTGAACAAGGTTTACTAAAAATCCGTAAAGAACTT
+CAATTGTACGCCAACTTAAGACCATGTAACTTTGCATCCGACTCTCTTTTAGACTTATCT
+CCAATCAAGCCACAATTTGCTAAAGGTACTGACTTCGTTGTTGTCAGAGAATTAGTGGGA
+GGTATTTACTTTGGTAAGAGAAAGGAAGACGATGGTGATGGTGTCGCTTGGGATAGTGAA
+CAATACACCGTTCCAGAAGTGCAAAGAATCACAAGAATGGCCGCTTTCATGGCCCTACAA
+CATGAGCCACCATTGCCTATTTGGTCCTTGGATAAAGCTAATGTTTTGGCCTCTTCAAGA
+TTATGGAGAAAAACTGTGGAGGAAACCATCAAGAACGAATTCCCTACATTGAAGGTTCAA
+CATCAATTGATTGATTCTGCCGCCATGATCCTAGTTAAGAACCCAACCCACCTAAATGGT
+ATTATAATCACCAGCAACATGTTTGGTGATATCATCTCCGATGAAGCCTCCGTTATCCCA
+GGTTCCTTGGGTTTGTTGCCATCTGCGTCCTTGGCCTCTTTGCCAGACAAGAACACCGCA
+TTTGGTTTGTACGAACCATGCCACGGTTCTGCTCCAGATTTGCCAAAGAATAAGGTCAAC
+CCTATCGCCACTATCTTGTCTGCTGCAATGATGTTGAAATTGTCATTGAACTTGCCTGAA
+GAAGGTAAGGCCATTGAAGATGCAGTTAAAAAGGTTTTGGATGCAGGTATCAGAACTGGT
+GATTTAGGTGGTTCCAACAGTACCACGGAAGTCGGTGATGCTGTCGCCGAAGAAGTTAAG
+AAAATCCTTGCTTAA
+>NFS1 1494 residues Pha 0 Code 0
+ATGTTGAAATCAACTGCTACAAGATCGATAACAAGATTATCTCAAGTTTACAACGTTCCA
+GCGGCCACATATAGGGCTTGTTTGGTAAGCAGGAGATTCTATTCCCCTCCTGCAGCAGGC
+GTGAAGTTAGACGACAACTTCTCTCTGGAAACGCATACCGATATTCAGGCTGCTGCAAAG
+GCACAGGCTAGTGCCCGTGCGAGTGCATCCGGTACCACCCCAGATGCTGTAGTAGCTTCT
+GGTAGCACTGCAATGAGCCATGCTTATCAAGAAAACACAGGTTTTGGTACTCGTCCCATA
+TATCTTGACATGCAAGCCACTACACCAACAGACCCTAGGGTTTTGGATACGATGTTGAAG
+TTTTATACGGGACTTTATGGTAATCCTCATTCCAACACTCACTCTTACGGTTGGGAAACA
+AATACTGCTGTGGAAAATGCTAGAGCTTACGTAGCAAAGATGATCAATGCCGACCCCAAG
+GAAATAATATTCACTTCGGGAGCGACCGAATCTAATAATATGGTTCTTAAGGGTGTCCCA
+AGATTTTATAAGAAGACTAAGAAACACATCATCACCACTAGAACGGAACACAAGTGTGTC
+TTGGAAGCCGCACGGGCCATGATGAAGGAGGGATTTGAAGTCACTTTCCTAAATGTGGAC
+GATCAAGGTCTTATCGATTTGAAGGAATTGGAAGATGCCATTAGACCAGATACCTGTCTC
+GTCTCTGTGATGGCTGTCAATAATGAAATCGGTGTCATTCAACCTATTAAAGAAATTGGT
+GCAATTTGTAGAAAGAATAAGATCTACTTTCATACTGACGCCGCACAAGCCTATGGTAAG
+ATTCACATTGATGTCAATGAAATGAACATTGATTTACTATCAATTTCTTCTCACAAGATT
+TACGGTCCAAAGGGAATAGGTGCCATCTATGTAAGAAGGAGACCAAGAGTTAGATTAGAA
+CCTTTACTATCCGGTGGTGGCCAAGAGAGAGGATTGAGATCTGGTACTTTGGCCCCCCCA
+TTGGTAGCGGGATTTGGTGAAGCTGCGAGATTGATGAAGAAAGAATTTGACAACGACCAA
+GCTCACATCAAAAGACTATCCGATAAATTAGTCAAAGGTCTATTATCCGCTGAACATACC
+ACGTTGAACGGATCTCCAGATCATCGTTATCCAGGGTGTGTTAACGTTTCTTTCGCCTAC
+GTGGAAGGAGAATCTTTATTGATGGCACTAAGGGATATCGCATTATCCTCGGGTTCAGCC
+TGTACATCTGCTTCCCTAGAACCTTCTTATGTTTTACATGCGCTGGGTAAGGATGATGCA
+TTAGCCCATTCTTCCATCAGATTTGGTATTGGTAGATTTAGTACTGAAGAGGAGGTCGAC
+TACGTCGTTAAGGCCGTTTCTGACAGAGTAAAATTCTTGAGGGAACTTTCACCATTATGG
+GAAATGGTTCAAGAAGGTATTGACTTAAACTCCATCAAATGGTCAGGTCATTGA
+>BUD3 4104 residues Pha 0 Code 0
+ATGGAGAAAGACCTGTCGTCTCTTTACTCTGAAAAGAAAGACAAAGAGAACGATGAAACC
+TTATTTAACATCAAACTATCCAAATCTGTTGTCGAGACCACACCGCTAAATGGTCATTCA
+TTGTTTGATGATGATAAATCACTTTCAGACTGGACGGATAATGTGTTCACTCAATCAGTA
+TTCTATCACGGGTCAGATGACTTGATATGGGGGAAGTTCTTTGTCTGCGTGTACAAGTCC
+CCCAACAGCAATAAGTTGAACGCTATAATATTCGACAAATTAGGAACATCATGCTTCGAA
+TCCGTCGATATATCTTCCAACTCGCAATACTATCCGGCCATTGAGAATTTGAGTCCAAGT
+GATCAGGAAAGCAATGTTAAGAAATGCATTGCTGTCATTCTGTTACAGCGCTATCCATTA
+CTTTCACCATCAGACTTATCACAAATATTGTCCAATAAATCGGAAAATTGCGACTATGAC
+CCCCCTTATGCTGGAGATTTGGCTAGTAGTTGCCAGTTGATAACAGCAGTTCCTCCAGAA
+GATCTGGGGAAGCGCTTCTTTACATCAGGACTTCTGCAAAATAGATTTGTCAGCTCTACC
+CTGTTAGATGTTATTTATGAAAACAATGAATCCACCATCGAACTAAATAATAGGTTGGTA
+TTCCATCTGGGTGAACAACTTGAACAACTTTTTAACCCAGTCACAGAATACTCACCGGAA
+CAGACAGAATATGGTTATAAGGCGCCAGAGGACGAATTACCCACAGAATCGGATGATGAT
+CTTGTCAAGGCCATTTGCAACGAGTTATTACAACTACAAACAAATTTTACTTTCAATTTG
+GTAGAATTTTTGCCAAAATTCCTGATCGCCTTGAGAGTCAGAGTACTCAATGAAGAAATT
+AATGGGTTATCCACAACCAAATTAAATCGACTCTTCCCACCTACAATAGATGAAGTCACA
+AGAATCAATTGTATTTTTCTAGACTCGCTAAAGACAGCAATCCCTTACGGTTCCCTCGAA
+GTACTGAAGGCATGCAGCATTACTATTCCTTATTTCTACAAAGCATATACAAGACACGAG
+GCGGCCACAAAGAACTTCAGCAAAGATATTAAATTGTTTATTAGGCATTTCAGCAATGTA
+ATTCCAGAAAGAGAGGTCTACACGGAAATGAAAATCGAGAGTATAATTAAGGGACCTCAG
+GAAAAACTACTGAAGCTAAACTTAATTATAGAGAGATTGTGGAAGTCGAAAAAATGGAGA
+CCGAAAAATCAAGAAATGGCAAAAAAATGCTACAACAATATCATTGATGTCATTGATTCG
+TTTGGAAAATTAGATTCCCCACTTCATTCTTATAGTACCAGAGTATTTACTCCATCGGGA
+AAAATCCTTACAGAATTAGCCAAATGCTGGCCCGTAGAACTGCAATACAAATGGCTGAAG
+AGAAGGGTAGTCGGTGTGTATGATGTAGTGGATTTGAATGATGAAAATAAGAGAAATTTA
+TTAGTCATATTCAGTGATTATGTGGTTTTCATCAATATACTGGAGGCAGAAAGTTACTAC
+ACTTCAGATGGATCAAACAGGCCCTTAATCTCAGATATTTTAATGAACTCATTGATCAAC
+GAAGTTCCGTTGCCCTCCAAGATCCCTAAGTTGAAAGTGGAGCGTCATTGCTATATAGAT
+GAGGTTCTAGTTTCTATATTAGACAAAAGCACTCTACGTTTTGATCGATTGAAGGGAAAA
+GATTCTTTCTCAATGGTATGTAAATTATCCTCTGCATTTATCTCTTCTTCGTCAGTTGCT
+GACTTGATTACGAAGGCTAGAATTTTGGAAAAAGACACTGCATTTCATTTATTTAAAGCT
+AGTAGAAGCCATTTTACATTATATTCTACTGCTCACGAGCTTTGCGCTTATGATTCCGAA
+AAAATAAAATCAAAATTTGCCTTATTCCTGAACATACCACCATCCAAGGAGATATTGGAG
+GTCAACAACCTTCATTTGGCTTTTTTTGCAAGATTTTGCAGTAACGATGGTAGAGATAAC
+ATCGTAATCTTAGACGTCTTAACCAAACATGACGATAAACATATAGAAGTTACATCCGAT
+AACATTGTTTTCACCATAATTAATCAATTGGCCATTGAAATACCGATATGCTTTTCTTCC
+TTAAACTCATCGATGGCCAAAGATTTACTCTGTGTAAATGAGAATTTGATAAAAAACTTA
+GAACATCAATTGGAAGAGGTCAAGCACCCTTCAACAGACGAACATAGGGCTGTTAATAGC
+AAACTTTCCGGTGCATCCGATTTCGATGCTACTCACGAGAAGAAAAGATCATACGGTACC
+ATAACAACATTTAGAAGCTATACAAGCGACTTGAAGGACAGTCCATCAGGCGATAATAGT
+AATGTCACCAAGGAAACTAAGGAAATTTTACCAGTGAAACCTACGAAAAAGTCTTCAAAA
+AAACCAAGAGAAATTCAAAAGAAGACCAAGACAAACGCCTCTAAAGCAGAGCACATAGAA
+AAGAAGAAGCCTAACAAAGGCAAAGGGTTTTTTGGCGTGTTAAAAAATGTTTTTGGAAGT
+AAAAGCAAGAGCAAGCCTTCACCAGTTCAAAGAGTGCCTAAAAAAATATCGCAGAGGCAT
+CCTAAGTCTCCAGTGAAGAAGCCAATGACCTCAGAAAAGAAATCCTCCCCTAAAAGGGCA
+GTCGTTTCATCTCCCAAAATTAAAAAGAAAAGTACTTCTTTTTCCACAAAAGAATCACAA
+ACTGCTAAATCTTCTCTTCGAGCAGTTGAATTCAAATCTGATGACTTGATCGGAAAACCA
+CCTGATGTTGGAAATGGCGCACATCCTCAAGAAAATACCAGAATATCTTCAGTAGTAAGG
+GATACAAAATATGTCTCCTACAATCCCTCTCAGCCTGTGACAGAAAATACCAGTAACGAA
+AAAAATGTCGAACCAAAAGCGGATCAATCCACAAAGCAGGATAACATTTCCAATTTTGCA
+GATGTAGAGGTATCTGCGTCTTCTTATCCTGAAAAACTTGATGCAGAAACAGATGATCAA
+ATAATTGGGAAGGCGACGAATTCGTCATCAGTTCATGGAAATAAAGAGCTGCCAGACCTT
+GCTGAGGTGACTACAGCAAATAGGGTTTCTACAACATCGGCTGGGGACCAACGTATTGAT
+ACCCAAAGCGAATTTTTACGTGCAGCTGATGTTGAAAACTTAAGTGATGACGATGAACAC
+AGACAGAATGAAAGTAGAGTTTTTAACGATGACCTCTTTGGTGATTTTATTCCTAAGCAT
+TACCGTAATAAACAGGAGAACATTAACAGCTCGAGTAATTTGTTTCCAGAGGGAAAGGTG
+CCCCAAGAAAAGGGCGTATCAAATGAAAACACTAACATATCTCTCAAAACTAATGAAGAT
+GCATCTACATTGACGCAGAAACTCTCTCCACAAGCGAGTAAAGTGCTGACAGAAAATTCT
+AATGAATTAAAAGATACCAACAATGAAGGGAAGGACGCAAAGGACATAAAATTAGGAGAT
+GATTACAGTGATAAAGAAACAGCGAAAGAAATAACTAAACCAAAAAATTTTGTTGAAGGA
+ATAACTGAACGGAAAGAAATATTCCCCACTATTCCTAGGTTAGCGCCGCCAGCTTCAAAA
+ATTAACTTTCAAAGGTCACCATCCTATATTGAGCTCTTTCAAGGAATGAGGGTGGTTTTA
+GATAAGCATGATGCCCATTATAACTGGAAACGCTTGGCTAGTCAAGTCTCCTTAAGTGAG
+GGACTAAAAGTCAATACTGAGGAAGATGCGGCAATTATAAATAAAAGTCAGGATGATGCC
+AAGGCGGAAAGAATGACTCAAATTTCTGAAGTGATTGAGTATGAAATGCAGCAACCTATC
+CCAACTTATTTGCCTAAGGCGCATCTAGATGACTCGGGTATTGAAAAAAGTGATGACAAA
+TTCTTCGAAATTGAAGAAGAACTTAAGGAAGAATTGAAGGGCAGCAAAACGGTAATGAAG
+ATGTCGGTAATAATAATCCATCCAATTCTATTCCAAAAATCGAGAAGCCCCCAGCATTCA
+AAGTTATTAGAACATCGCCTGTGA
+>GBP2 1284 residues Pha 0 Code 0
+ATGGAGAGAGAGCTAGGGATGTATGGAAATGATAGGAGTAGATCAAGATCACCTGTACGT
+CGTCGTTTGAGCGACGACAGAGACAGGTACGATGATTATAACGATAGTAGCAGTAATAAT
+GGTAATGGCAGTCGTCGTCAGAGACGCGACCGAGGCTCCCGTTTCAATGATCGGTACGAT
+CAGAGTTATGGTGGCAGCCGCTACCACGATGATAGGAACTGGCCCCCTCGCCGAGGAGGC
+CGTGGCAGAGGAGGAAGCAGATCATTCAGAGGGGGACGCGGTGGCGGTAGGGGTCGTACT
+TTAGGTCCAATTGTTGAAAGAGACTTAGAAAGGCAATTTGACGCGACCAAGAGAAATTTT
+GAAAATAGTATCTTCGTGAGAAACTTGACTTTTGATTGTACCCCTGAAGACCTTAAGGAA
+TTGTTTGGTACAGTGGGCGAAGTTGTGGAGGCTGACATTATCACATCAAAGGGCCATCAC
+CGTGGTATGGGGACTGTGGAATTTACCAAAAACGAATCTGTCCAAGATGCCATATCGAAG
+TTTGATGGTGCCCTCTTTATGGACCGGAAACTAATGGTAAGACAGGATAATCCTCCTCCT
+GAAGCTGCCAAGGAATTTTCTAAGAAAGCTACTAGGGAAGAAATAGATAATGGGTTTGAA
+GTGTTCATCATCAATTTACCGTACTCTATGAATTGGCAATCCTTAAAAGATATGTTTAAA
+GAATGTGGTCATGTCTTGCGTGCCGATGTAGAATTGGATTTCAACGGATTTTCAAGAGGA
+TTCGGTTCTGTCATTTATCCTACTGAGGATGAAATGATTAGAGCTATCGATACATTCAAC
+GGCATGGAAGTAGAAGGTAGAGTTTTGGAAGTTAGAGAAGGGCGTTTCAACAAGAGAAAG
+AACAATGATCGTTATAATCAAAGGCGTGAGGACCTTGAAGATACCAGAGGTACTGAACCA
+GGTCTTGCGCAGGATGCCGCTGTCCACATTGATGAAACTGCAGCAAAATTTACTGAAGGT
+GTCAATCCAGGAGGGGATAGAAACTGTTTCATTTATTGTAGTAATTTACCATTCTCAACA
+GCAAGAAGCGATTTATTCGACTTGTTTGGGCCTATCGGCAAAATCAATAACGCGGAATTG
+AAACCACAGGAAAATGGTCAACCAACTGGTGTTGCTGTTGTAGAATATGAAAATTTAGTA
+GATGCAGATTTTTGTATTCAAAAATTAAATAATTATAATTATGGTGGTTGTAGTTTACAG
+ATCTCTTATGCTAGACGTGATTAA
+>ILV6 930 residues Pha 0 Code 0
+ATGCTGAGATCGTTATTGCAAAGCGGCCACCGCAGGGTGGTTGCTTCTTCATGTGCTACC
+ATGGTGCGTTGCAGTTCCTCGTCGACCTCCGCGTTGGCGTACAAGCAGATGCACAGACAC
+GCAACAAGACCTCCCTTGCCCACACTAGACACTCCTTCCTGGAATGCCAACAGTGCCGTT
+TCATCCATCATTTACGAAACACCAGCGCCTTCTCGTCAACCAAGAAAACAGCATGTCTTG
+AACTGTTTGGTGCAAAACGAACCCGGTGTCTTGTCCAGAGTCTCGGGTACGTTAGCTGCC
+AGAGGCTTTAACATCGATTCGTTGGTCGTGTGCAACACCGAGGTCAAAGACCTAAGTAGA
+ATGACCATTGTTTTGCAAGGGCAAGATGGCGTAGTCGAACAAGCACGCAGACAAATCGAA
+GACTTGGTCCCCGTCTACGCCGTCCTAGACTATACCAATTCTGAGATCATCAAAAGAGAG
+CTAGTGATGGCCAGAATCTCTCTATTGGGTACTGAATACTTCGAAGACCTACTATTGCAC
+CACCACACTTCCACCAATGCTGGCGCCGCTGACTCCCAAGAATTGGTCGCCGAAATCAGA
+GAAAAGCAATTCCACCCTGCCAACTTGCCCGCCAGTGAGGTATTAAGGTTGAAGCACGAG
+CATTTGAACGATATCACCAACTTGACCAACAACTTTGGAGGTCGTGTCGTCGACATCAGC
+GAAACAAGCTGTATTGTGGAATTGTCTGCAAAACCCACACGTATCTCTGCCTTCTTGAAG
+TTGGTCGAGCCATTCGGTGTCCTAGAGTGTGCAAGAAGCGGTATGATGGCATTGCCAAGA
+ACTCCTTTGAAGACAAGCACCGAGGAAGCTGCCGACGAAGACGAAAAGATCAGCGAAATC
+GTCGACATTTCCCAACTACCACCTGGTTAG
+>CWH36 393 residues Pha 0 Code 0
+ATGGAGCTGGCAAAGGAACGTAATGGCCCACATCAAAAACATCATGGCCAATGTCAAAAT
+CACTGTACTTCTCCAAACACTGTACGACAAAACAAAACAAACAAACTCTTGTTAGTAAAA
+AAGAAAGGGAAACTAGTAATATGGAGACACATCGTAAAAAAAATGTTGCACATACGCTTG
+GTTGTTCTTTGGAGCCATTATCCAGAACAGCACGGACATGGCACTAACCACTATGAATAC
+ACCAACAACAGTATAGCTAAATTGGACGCGCAGAGAGTTAGTAGAAGAAGAAGGAAGAAA
+AGGGAAGCGGAGAGAAGAGATTATGACACATACAAACTACTCATTACTCTTTGTTCTTTA
+TTATTCGTTGGACCTTTGTTTCTTAAAGTATAG
+>PEL1 1251 residues Pha 0 Code 0
+ATGACGACTCGTTTGCTCCAACTCACTCGTCCTCATTACAGATTATTATCCCTACCTCTC
+CAGAAACCCTTCAATATAAAAAGGCAGATGTCCGCTGCGAACCCTTCTCCATTTGGCAAT
+TATTTGAACACGATCACTAAGTCCCTACAACAGAATTTACAAACATGCTTTCATTTCCAA
+GCAAAAGAAATCGATATAATCGAATCTCCATCTCAGTTTTACGATCTCTTGAAGACAAAA
+ATACTTAATTCACAAAATAGAATATTCATTGCGTCTCTGTATTTAGGCAAAAGCGAGACT
+GAGTTGGTGGACTGCATATCCCAGGCATTGACCAAGAACCCCAAGTTGAAAGTTTCTTTT
+CTACTTGATGGCCTTCGAGGAACAAGAGAATTGCCTTCCGCCTGTTCCGCCACTTTATTA
+TCGTCTTTAGTAGCCAAATATGGGTCAGAGAGAGTGGATTGCCGATTGTACAAGACGCCT
+GCTTATCATGGTTGGAAAAAAGTCTTGGTTCCCAAGAGATTTAATGAAGGTTTAGGCTTA
+CAACATATGAAAATATATGGGTTTGATAACGAGGTCATTCTTTCGGGAGCCAACCTTTCG
+AACGACTATTTCACCAACAGACAAGATAGATACTATCTCTTTAAATCTCGAAACTTCTCC
+AACTATTATTTTAAATTACATCAACTCATAAGTTCCTTCAGTTATCAGATTATAAAGCCA
+ATGGTGGATGGTAGCATCAACATCATTTGGCCAGATTCGAATCCTACTGTTGAACCGACG
+AAAAATAAAAGGCTGTTTTTAAGGGAAGCATCTCAATTACTAGATGGCTTTTTAAAGAGT
+TCTAAACAAAGCCTCCCGATTACTGCCGTGGGTCAATTCTCCACATTAGTTTACCCAATT
+TCTCAATTCACTCCACTTTTTCCCAAATATAATGACAAATCGACCGAAAAAAGAACAATA
+TTGTCATTGCTTTCCACTATAACAAGCAATGCCATTTCTTGGACGTTCACTGCAGGATAC
+TTCAATATTTTGCCAGACATCAAAGCAAAACTGCTGGCAACGCCGGTTGCTGAGGCAAAT
+GTAATAACAGCTTCCCCCTTTGCAAACGGCTTTTACCAATCAAAGGGCGTCTCATCAAAT
+TTACCTGGTGCTTACTTGTACCTGTCAAAAAAATTTCTACAAGATGTATGTAGGTACAGA
+CAAGATCATGCTATTACCATTAAGAGAATGGCAAAGAGGCGTAGTAAATAA
+>RER1 567 residues Pha 0 Code 0
+ATGGATTACGATAGCTCTGATACAATGAACGGTGGTTCAAGTAACCCCTTAATCACTAAG
+ATGAATACAATGAAATTATTATATCAACACTATTTGGATAAAGTCACTCCTCACGCTAAG
+GAGAGGTGGGCTGTATTGGGTGGTTTGTTATGTTTGTTTATGGTTCGTATTACAATGGCC
+GAAGGCTGGTATGTGATTTGTTATGGTCTAGGTCTATTTTTATTGAATCAATTTTTAGCC
+TTTTTGACCCCAAAATTCGATATGTCCTTACAGCAAGATGAAGAAAACAACGAATTGGAA
+GCTGGAGAAAAATCAGAAGAATTCCGTCCATTCATCAGAAGATTACCAGAGTTCAAATTC
+TGGTATAACAGCATTAGAGCCACTGTCATTTCCCTCTTGTTGTCGCTATTTTCAATCTTC
+GATATTCCAGTATTTTGGCCCATCTTATTGATGTATTTCATATTATTGTTTTTTTTAACT
+ATGAGAAGGCAGATTCAACATATGATAAAATATAGATATATACCCTTAGATATCGGTAAG
+AAGAAATATTCTCATTCTTCTAACTGA
+>CDC10 969 residues Pha 0 Code 0
+ATGGATCCTCTCAGCTCAGTACAGCCTGCTTCTTATGTTGGTTTTGATACCATCACGAAT
+CAGATCGAACATCGTCTGTTGAAGAAAGGTTTTCAATTTAATATAATGGTTGTTGGCCAA
+TCCGGATTGGGTAAAAGTACTCTAATAAATACGTTATTTGCCTCACATTTGATTGATTCT
+GCTACTGGTGATGATATTTCTGCCCTGCCTGTTACAAAAACAACTGAAATGAAAATTTCT
+ACTCATACTCTTGTGGAGGACCGCGTTCGCTTGAATATTAATGTTATAGATACACCTGGA
+TTTGGTGACTTTATTGACAATTCTAAAGCTTGGGAGCCTATTGTGAAGTACATTAAGGAA
+CAACATTCTCAATACTTACGTAAAGAATTGACAGCCCAACGTGAAAGGTTTATTACTGAT
+ACAAGAGTTCATGCAATTCTTTATTTCCTGCAACCAAATGGAAAGGAGTTGAGCCGCCTT
+GACGTTGAAGCCTTGAAAAGATTGACAGAAATAGCAAATGTTATACCAGTTATTGGCAAG
+TCGGATACATTGACTTTAGATGAAAGAACGGAGTTTAGGGAGCTTATTCAAAATGAATTC
+GAAAAATACAATTTCAAGATTTATCCTTATGATTCGGAAGAACTAACTGACGAGGAATTA
+GAACTAAACAGAAGTGTTAGATCTATCATTCCGTTTGCAGTGGTTGGTTCTGAGAATGAG
+ATTGAAATAAACGGTGAAACCTTCAGGGGAAGAAAAACTCGTTGGAGCGCTATTAATGTT
+GAGGATATCAACCAGTGTGATTTTGTATATTTAAGGGAATTTTTGATTCGAACTCATCTC
+CAAGACTTAATCGAAACAACTTCCTACATTCATTATGAAGGGTTCAGAGCAAGACAATTA
+ATTGCCTTGAAAGAAAATGCGAATAGTCGTTCCTCAGCTCATATGTCTAGCAACGCCATT
+CAACGTTGA
+>MRPL32 552 residues Pha 0 Code 0
+ATGAATTCTTTGATTTTTGGTAAACAATTAGCATTTCACAAAATTGTGCCTACCACTGCA
+ATTGGGTGGTTGGTACCGCTAGGAAATCCTTCACTGCAGATTCCAGGCCAAAAACAACTG
+GGATCTATCCACCGTTGGTTGAGAGAAAAGCTACAACAAGATCATAAGGACACTGAAGAT
+AAAGATTTTTTCTCTAATAATGGTATTCTACTAGCAGTTCCTAAAAAAAAAGTATCACAC
+CAAAAAAAAAGGCAAAAACTTTACGGTCCAGGTAAGAAGCAATTGAAGATGATTCACCAT
+TTGAATAAGTGCCCATCATGCGGCCATTATAAGAGAGCCAATACACTGTGTATGTATTGT
+GTTGGACAAATAAGTCATATATGGAAAACGCATACCGCTAAAGAAGAAATTAAGCCGAGA
+CAAGAGGAGGAACTTTCCGAACTAGACCAAAGAGTCCTATATCCTGGTAGAAGAGATACC
+AAATATACCAAGGATTTGAAAGATAAAGATAACTATTTGGAACGTCGCGTTCGGACTTTA
+AAAAAGGACTAG
+>YCP4 744 residues Pha 0 Code 0
+ATGGTAAAGATTGCGATAATTACTTACTCTACCTACGGGCACATAGACGTTTTAGCCCAA
+GCTGTTAAGAAAGGTGTGGAGGCAGCTGGTGGTAAAGCTGATATATACAGGGTCGAGGAA
+ACTTTACCTGATGAAGTCCTCACCAAGATGAACGCTCCTCAGAAACCTGAAGATATTCCT
+GTTGCCACTGAGAAAACGTTGCTCGAATATGACGCCTTTTTGTTCGGTGTTCCAACTAGG
+TTTGGTAATTTGCCGGCTCAATGGTCCGCCTTTTGGGATAAAACCGGTGGATTATGGGCC
+AAGGGCTCTTTGAACGGCAAAGCTGCGGGGATATTCGTTAGTACTTCCAGTTACGGAGGT
+GGTCAAGAAAGTACCGTTAAAGCCTGTTTGTCTTATTTAGCTCATCACGGAATTATCTTT
+TTACCACTGGGTTATAAGAATTCATTTGCTGAGTTAGCCAGTATAGAAGAGGTACACGGT
+GGCTCTCCATGGGGTGCTGGTACCCTTGCAGGACCTGACGGCTCAAGAACTGCGTCTCCA
+CTTGAATTGAGAATTGCTGAAATTCAAGGTAAAACATTCTACGAAACCGCCAAAAAACTT
+TTCCCTGCAAAAGAAGCCAAGCCCTCCACTGAAAAGAAGACCACTACTTCTGATGCGGCT
+AAGAGACAAACTAAACCTGCAGCAGCTACAACTGCAGAAAAGAAGGAGGACAAAGGATTA
+TTATCCTGCTGTACTGTCATGTAA
+>CIT2 1383 residues Pha 0 Code 0
+ATGACAGTTCCTTATCTAAATTCAAACAGAAATGTTGCATCATATTTACAATCAAATTCA
+AGCCAAGAAAAGACTCTAAAAGAGAGATTTAGCGAAATCTACCCCATCCATGCTCAAGAT
+GTAAGGCAATTCGTTAAAGAGCATGGCAAAACTAAAATTAGCGATGTTCTATTAGAACAG
+GTATATGGTGGTATGAGAGGTATTCCAGGGAGCGTATGGGAAGGTTCCGTTTTGGACCCA
+GAAGACGGTATTCGTTTCAGAGGTCGTACGATCGCCGACATTCAAAAGGACCTGCCCAAG
+GCAAAAGGAAGCTCACAACCACTACCAGAAGCTCTCTTTTGGTTATTGCTAACTGGCGAG
+GTTCCAACTCAAGCGCAAGTTGAAAACTTATCAGCTGATCTAATGTCAAGATCGGAACTA
+CCTAGTCATGTCGTTCAACTTTTGGATAATTTACCAAAGGACTTACACCCAATGGCTCAA
+TTCTCTATTGCTGTAACTGCCTTGGAAAGCGAGTCAAAGTTTGCTAAGGCTTATGCTCAA
+GGAATTTCCAAGCAAGATTATTGGAGTTATACTTTTGAAGATTCACTAGACTTGCTGGGT
+AAATTGCCAGTTATTGCAGCTAAAATTTATCGTAATGTATTCAAAGATGGCAAAATGGGT
+GAAGTGGACCCAAATGCCGATTATGCTAAAAATCTGGTCAACTTGATTGGTTCTAAGGAT
+GAAGATTTCGTGGACTTGATGAGACTTTATTTAACCATTCATTCGGATCACGAAGGTGGT
+AATGTATCTGCACATACATCCCATCTTGTGGGCTCAGCACTATCATCACCTTATCTGTCC
+CTTGCATCAGGTTTGAACGGGTTGGCTGGCCCACTTCATGGGCGTGCTAATCAAGAAGTA
+CTAGAATGGTTATTTGCACTTAAAGAAGAGGTAAATGATGACTACTCTAAAGATACGATC
+GAAAAATATTTATGGGATACTCTAAACTCAGGAAGAGTCATTCCCGGTTATGGTCATGCT
+GTGCTAAGGAAAACTGATCCTCGTTATATGGCTCAGCGTAAGTTTGCCATGGACCATTTT
+CCAGATTATGAATTATTCAAGTTAGTTTCATCAATATACGAGGTAGCACCTGGCGTATTG
+ACTGAACATGGTAAAACTAAAAATCCATGGCCAAATGTAGATGCTCACTCTGGTGTCTTA
+TTACAATATTATGGACTAAAAGAATCTTCTTTCTATACCGTTTTATTTGGCGTTTCAAGG
+GCATTTGGTATTCTTGCTCAATTGATCACTGATAGGGCCATCGGTGCTTCCATTGAAAGG
+CCAAAGTCCTATTCTACTGAGAAATACAAGGAATTGGTCAAAAACATTGAAAGCAAACTA
+TAG
+>YCP7 720 residues Pha 0 Code 0
+ATGCAGCCTCATTTAGACAACAACAGTAATAATGACGATGTCAAATTGGATACATTAGGG
+GAACAAAATGTGTTATCATCCGCAGAAAATATCACTTTACCTGAAGACACCTTTAAATCA
+TATATGACCTACTTGCTGTACGAGATGGCTCATTACAAACCGATGATATTTTCCTTCTTG
+GCACTTTCAGTTTCAATTTTAATAGTTGTGATCTTTCATAATGTTAAAGCTTGTGATGTC
+GTTTTTGGTTTTTCAATTTTCGTCACTTCTATTTTGTTTTTGTCTACGTTGATTCCGTTT
+AATGTGTATATCTCGGATGAGGGTTTCAGAATTAAGCTTTTGCTGGAAGTTATCACCCAC
+AGGCCAGCGGTAAAGGGAAAAGAATGGAGAGCAATCACAGACAATATGAATCAATATTTA
+CTTGATAATGGTTTATGGAGTACTCGCTATTACTTTTATAGTAGTGAAAGATGCTACAAA
+TTCTTCAGATTTCTTGTGAAAGAAAAACCCCCAGGTGTGAATGTAAATTCATCGGTAAAG
+GACGCCACAAGTACGCAGATAGATGCACCAGCAAATGAGGCTTCAAATGAGGTAATAAAA
+TGCTTTAGTTTCAGTTCTGACCCAATATTCGAAGCATACTTTGTTAAAGCAGTAGAAGTT
+GAGAAACAAGCACAACAGGAATATTGGAGAAAGCAATATCCTGACGCCGATATACCATGA
+>SAT4 1812 residues Pha 0 Code 0
+ATGACTGGTATGAATGATAATAATGCCGCTATTCCTCAGCAAACTCCAAGGAAACATGCG
+CTATCTTCTAAAGTTATGCAACTTTTTAGAAGCGGTTCAAGATCATCTAGGCAGGGAAAG
+GCCTCATCGAATATCCAGCCACCTTCTAATATAAACACAAACGTTCCATCGGCGTCTAAA
+TCAGCCAAATTTGGTTTACATACCCCAACCACTGCTACTCCTAGGGTAGTTTCTAATCCT
+TCTAATACTGCAGGTGTGAGTAAACCGGGCATGTATATGCCCGAATATTACCAGTCGGCA
+TCACCATCGCACTCTAGTTCATCCGCATCATTAAACAACCATATTGATATTAACACCTCT
+AAGTCATCATCAGCTGCTTCTTTAACTTCGTCAGTATCAGCTTTATCCTTATCACCCACA
+TCAGCCATAAATATTAGCTCCAAAAGTTTGAGCCCAAAGTTCTCTCATCATAGTAACAGC
+AATACTGCTATTACACCCGCGCCTACTCCCACTGCTTCAAATATTAATAATGTAAATAAG
+ATAACCAATACAAGTGCACCTATTTGTGGGAGGTTTCTTGTGCATAAAGATGGTACCCAT
+GAACATCACTTAAAAAATGCTAAGAGACAAGAAAAGCTAAGCACAATGATTAAAAACATG
+GTTGGTGCGAGCAAATTACGTGGTGAGGCAAAATCTGCTGTCCCTGATATAATAATGGAT
+CCAAAGACGACTTTAAAATCCAACAAGAATCCTCCTACTCTTTTTGCAGGCTTCATGAAG
+CAGGTCGTGGATATGGATGATAAATATCCAGAAGGCGCTCCCACAAGTGGCGCTTTAAAT
+TGTCCTGAAAGGGATATATACAGGTCAGATCAAAAAGATTCCAAAAATAATACGCATAAT
+ATCACTACTACTAAAAAAGATAGGCAATGTTTTGCCGAAAAGTATGGTCGCTGTCAAGAA
+GTCCTTGGTAAAGGTGCTTTTGGTGTAGTAAGAATATGTCAAAAGAAAAATGTTTCTTCT
+CAAGATGGTAATAAAAGTGAAAAGCTTTATGCAGTGAAAGAGTTCAAGCGTAGAACATCC
+GAATCAGCAGAAAAGTATTCTAAGAGGTTGACTTCTGAATTTTGCATTTCTTCTTCATTA
+CACCATACAAATATTGTTACTACACTAGATCTTTTCCAAGATGCCAAAGGCGAGTACTGT
+GAAGTAATGGAATATTGTGCAGGTGGCGATCTATTCACTTTGGTCGTTGCCGCCGGAAAA
+TTAGAATATATGGAAGCAGATTGTTTCTTCAAGCAGCTTATTAGAGGTGTTGTTTATATG
+CATGAAATGGGTGTTTGTCATAGAGATTTGAAGCCTGAGAACTTACTGCTTACGCACGAT
+GGTGTGCTAAAAATTACAGACTTTGGTAACAGCGAATGTTTCAAGATGGCATGGGAAAAA
+AATATTCACCTTAGTGGAGGCGTTTGCGGTTCATCGCCGTACATCGCCCCAGAGGAATAT
+ATCAAAGAAGAGTTTGATCCAAGACCCGTAGATATATGGGCATGTGGTGTCATTTATATG
+GCAATGAGAACTGGTAGACAATTGTGGAGTTCTGCTGAAAAAGACGATCCATTTTATATG
+AATTATTTAAAAGGACGTAAGGAAAAGGGAGGCTATGAGCCAATCGAAAGTTTAAAAAGA
+GCCAGGTGTAGGAATGTTATATATTCGATGTTAGATCCCGTTCCGTACAGAAGAATTAAC
+GGGAAACAAATTTTGAACAGTGAATGGGGAAGGGAGATAAAATGCTGCCATAATGGGCGC
+GCATTGAAATAA
+>RVS161 798 residues Pha 0 Code 0
+ATGAGTTGGGAAGGTTTTAAGAAAGCTATCAACAGAGCTGGTCACAGTGTGATAATTAAG
+AATGTCGACAAGACCATTGATAAAGAGTATGACATGGAAGAACGTCGTTATAAAGTTCTT
+CAAAGAGCAGGTGAGGCATTACAAAAGGAAGCCAAAGGTTTCTTGGACTCATTGAGAGCT
+GTGACAGCATCACAGACTACCATTGCCGAGGTCATCTCTAACCTCTATGACGATTCAAAA
+TATGTTGCTGGTGGTGGTTACAACGTTGGTAACTATTATTTGCAATGTGTTCAAGATTTT
+GATAGCGAAACTGTTAAGCAATTAGACGGGCCCTTAAGAGAAACCGTACTAGATCCAATA
+ACAAAGTTTTCGACGTATTTCAAAGAAATTGAGGAGGCCATAAAAAAGAGAGACCATAAG
+AAACAAGACTTCGATGCTGCGAAGGCAAAAGTTCGTAGATTAGTGGACAAACCTGCTAAA
+GATGCCTCTAAACTGCCAAGGGCTGAAAAAGAATTGAGCTTAGCTAAAGATATTTTCGAA
+AATCTTAATAACCAATTGAAAACTGAACTACCACAGTTAGTTTCATTAAGAGTACCTTAC
+TTTGACCCAAGTTTTGAAGCTTTAATCAAGATTCAGCTAAGGTTCTGTACTGATGGTTAC
+ACTCGTTTAGCGCAGATTCAACAATATTTGGACCAACAATCAAGAGACGACTATGCCAAT
+GGGTTATTAGACACTAAAATCGAAGAACTATTAGGACAAATGACAAGCCTAGATATTTGT
+GCGCTCGGGATAAAATAA
+>YCQ0 852 residues Pha 0 Code 0
+ATGTCTGACAAGGAACAAACGAGCGGAAACACAGATTTGGAGAATGCACCAGCAGGATAC
+TATAGTTCCCATGATAACGACGTTAATGGCGTTGCAGAAGATGAACGTCCATCTCATGAT
+TCGTTGGGCAAGATTTACACTGGAGGTGATAACAATGAATATATCTATATTGGGCGTCAA
+AAGTTTTTGAAGAGCGACTTATACCAAGCCTTTGGTGGTACCTTGAATCCAGGGTTAGCT
+CCTGCTCCAGTGCACAAATTTGCTAATCCTGCGCCCTTAGGTCTTTCAGCCTTCGCGTTG
+ACGACATTTGTGCTGTCCATGTTCAATGCGAGAGCGCAAGGGATCACTGTTCCTAATGTT
+GTCGTCGGTTGTGCTATGTTTTATGGTGGTTTGGTGCAATTGATTGCTGGTATTTGGGAG
+ATAGCTTTGGAAAATACTTTTGGTGGTACCGCATTATGTTCTTACGGTGGGTTTTGGTTG
+AGTTTCGCTGCAATTTACATTCCTTGGTTTGGTATCTTGGAAGCTTACGAAGACAATGAA
+TCTGATTTGAATAATGCTTTAGGATTTTATTTGTTGGGGTGGGCCATCTTTACGTTTGGT
+TTAACCGTTTGTACCATGAAATCCACTGTTATGTTCTTTTTGTTGTTCTTCTTACTAGCA
+TTAACTTTCCTACTGTTGTCTATTGGTCACTTTGCTAATAGACTTGGTGTCACAAGAGCT
+GGTGGTGTCCTGGGAGTTGTTGTTGCTTTCATTGCTTGGTACAACGCATATGCAGGTGTT
+GCTACAAAGCAGAATTCATATGTACTGGCTCGTCCATTCCCATTACCATCTACTGAAAGG
+GTAATCTTTTAA
+>ADP1 3150 residues Pha 0 Code 0
+ATGGGAAGTCATCGACGTTATCTCTACTATAGTATATTATCATTTCTATTATTATCCTGC
+TCAGTGGTACTTGCAAAACAAGATGAGACCCCATTCTTTGAAGGTACTTCTTCGAAAAAT
+TCGCGTCTAACTGCACAAGATAAGGGCAATGATACGTGCCCGCCATGTTTTAATTGTATG
+CTACCTATTTTTGAATGCAAACAGTTTTCTGAATGCAATTCGTACACTGGTAGATGTGAG
+TGTATAGAAGGGTTTGCAGGTGATGATTGCTCTCTGCCCCTCTGTGGCGGTCTATCACCG
+GATGAAAGCGGTAATAAGGATCGTCCCATAAGAGCACAAAATGACACCTGTCATTGTGAT
+AACGGATGGGGAGGGATCAATTGTGACGTTTGTCAAGAAGATTTTGTCTGTGATGCGTTC
+ATGCCTGATCCTAGTATTAAGGGGACATGTTATAAGAATGGTATGATTGTAGATAAAGTA
+TTTTCAGGTTGTAATGTGACCAATGAGAAAATTCTACAGATTTTGAACGGCAAAATACCA
+CAAATTACATTTGCCTGTGATAAACCTAATCAAGAATGTAATTTTCAGTTTTGGATAGAT
+CAGTTAGAAAGCTTCTATTGTGGCTTAAGTGATTGTGCCTTTGAATACGACTTGGAACAG
+AATACCTCCCATTATAAGTGTAATGACGTTCAATGCAAATGCGTTCCCGACACTGTGTTG
+TGTGGTGCTAAGGGGTCTATAGATATCTCGGATTTCCTGACAGAGACAATAAAAGGGCCA
+GGAGATTTCAGCTGTGATTTAGAAACAAGGCAATGTAAATTCAGTGAGCCTTCTATGAAT
+GATTTGATATTGACCGTGTTTGGTGACCCTTATATTACTTTGAAGTGTGAATCCGGTGAA
+TGTGTTCATTATAGTGAGATTCCAGGTTACAAATCTCCTTCAAAAGATCCAACAGTGTCA
+TGGCAAGGGAAATTGGTGTTGGCATTGACTGCTGTGATGGTCCTGGCACTTTTTACATTT
+GCTACCTTTTACATTTCTAAATCTCCGTTATTCAGAAATGGATTGGGTTCCTCAAAGTCT
+CCCATTCGTTTGCCAGATGAAGATGCGGTGAATAATTTCTTACAAAATGAAGATGACACA
+CTGGCGACATTAAGTTTTGAAAATATCACTTATAGTGTCCCCTCGATAAATTCAGATGGT
+GTTGAAGAAACTGTGCTGAATGAAATAAGTGGTATCGTGAAGCCCGGCCAAATATTAGCT
+ATCATGGGTGGATCTGGTGCGGGTAAAACTACTTTATTAGATATCCTAGCAATGAAACGG
+AAAACAGGTCACGTTTCGGGTTCCATAAAAGTTAACGGTATTAGTATGGACCGTAAATCT
+TTCTCGAAAATAATCGGGTTCGTCGATCAAGATGACTTTTTGCTGCCCACTTTGACTGTT
+TTTGAAACCGTATTAAATAGTGCGCTGTTAAGATTGCCAAAAGCATTGTCATTCGAGGCC
+AAGAAGGCAAGAGTTTATAAGGTGTTGGAAGAACTAAGAATTATTGATATCAAAGATCGT
+ATTATTGGTAATGAATTTGATCGTGGTATTAGTGGAGGTGAAAAACGCCGAGTTTCCATT
+GCATGTGAATTAGTGACATCTCCATTGGTTTTATTTTTGGATGAACCTACATCTGGTTTA
+GATGCTAGTAATGCCAATAATGTTATTGAATGTTTGGTAAGGTTATCCAGCGACTATAAC
+AGGACATTGGTGCTATCTATTCATCAGCCAAGATCAAATATATTTTATTTATTCGATAAA
+TTGGTCCTGTTAAGTAAAGGTGAGATGGTCTATTCCGGAAATGCCAAAAAAGTGTCAGAA
+TTTTTGAGAAATGAGGGATATATCTGTCCGGACAACTATAATATTGCTGATTATTTGATT
+GATATTACTTTTGAAGCCGGTCCTCAGGGGAAAAGGAGAAGAATCAGAAACATTTCCGAT
+TTAGAAGCTGGTACGGATACTAACGATATTGATAATACGATACACCAAACAACATTTACT
+AGCAGTGATGGTACAACACAGAGAGAGTGGGCTCATCTTGCAGCTCATAGAGATGAGATC
+AGATCTTTACTCAGAGATGAAGAAGATGTAGAGGGAACAGATGGAAGGCGAGGTGCTACT
+GAGATTGACTTAAATACCAAACTACTACACGATAAATATAAAGATAGCGTCTATTATGCA
+GAGCTTTCACAGGAGATCGAGGAAGTTTTAAGCGAAGGTGATGAGGAAAGTAACGTTTTG
+AATGGAGATTTACCCACAGGTCAACAATCTGCTGGTTTTCTGCAACAGTTATCGATATTG
+AATTCAAGAAGTTTTAAAAACATGTACAGAAACCCTAAACTATTATTGGGTAATTATTTA
+CTGACGATCCTATTGAGTTTATTCTTGGGAACACTATATTACAACGTCTCCAATGATATC
+AGCGGTTTTCAGAACAGAATGGGGCTGTTCTTCTTTATACTAACGTACTTCGGTTTTGTT
+ACATTCACAGGTCTCAGCTCGTTCGCTCTGGAAAGGATCATTTTCATAAAAGAAAGATCC
+AATAACTATTACTCGCCACTTGCATACTACATTAGTAAGATAATGAGCGAAGTGGTCCCG
+CTACGTGTTGTACCACCTATACTCTTGTCATTGATTGTTTACCCAATGACTGGTTTAAAC
+ATGAAAGACAATGCTTTTTTTAAATGTATTGGAATCCTTATACTGTTTAACCTTGGGATA
+TCGTTGGAAATCCTAACCATCGGCATAATTTTTGAAGACTTGAATAACTCCATAATATTA
+AGCGTGCTGGTGCTTTTGGGCTCACTACTGTTTAGCGGACTATTTATCAATACTAAGAAT
+ATTACAAACGTGGCCTTCAAGTACCTGAAAAACTTCTCTGTGTTTTACTACGCCTACGAA
+TCTTTATTGATCAATGAGGTCAAAACATTGATGCTGAAAGAGAGAAAGTACGGCTTAAAT
+ATTGAAGTTCCAGGCGCTACTATCTTGAGCACATTTGGATTTGTTGTCCAAAACCTTGTA
+TTTGACATCAAGATCCTGGCTCTGTTTAATGTGGTGTTTTTAATAATGGGGTATCTAGCC
+CTTAAGTGGATAGTTGTGGAACAAAAGTAG
+>PGK1 1251 residues Pha 0 Code 0
+ATGTCTTTATCTTCAAAGTTGTCTGTCCAAGATTTGGACTTGAAGGACAAGCGTGTCTTC
+ATCAGAGTTGACTTCAACGTCCCATTGGACGGTAAGAAGATCACTTCTAACCAAAGAATT
+GTTGCTGCTTTGCCAACCATCAAGTACGTTTTGGAACACCACCCAAGATACGTTGTCTTG
+GCTTCTCACTTGGGTAGACCAAACGGTGAAAGAAACGAAAAATACTCTTTGGCTCCAGTT
+GCTAAGGAATTGCAATCATTGTTGGGTAAGGATGTCACCTTCTTGAACGACTGTGTGCGT
+CCAGAAGTTGAAGCCGCTGTCAAGGCTTCTGCCCCAGGTTCCGTTATTTTGTTGGAAAAC
+TTGCGTTACCACATCGAAGAAGAAGGTTCCAGAAAGGTCGATGGTCAAAAGGTCAAGGCT
+TCCAAGGAAGATGTTCAAAAGTTCAGACACGAATTGAGCTCTTTGGCTGATGTTTACATC
+AACGATGCCTTCGGTACCGCTCACAGAGCTCACTCTTCTATGGTCGGTTTCGACTTGCCA
+CAACGTGCTGCCGGTTTCTTGTTGGAAAAGGAATTGAAGTACTTCGGTAAGGCTTTGGAG
+AACCCAACCAGACCATTCTTGGCCATCTTAGGTGGTGCCAAGGTTGCTGACAAGATTCAA
+TTGATTGACAACTTGTTGGACAAGGTCGACTCTATCATCATTGGTGGTGGTATGGCTTTC
+ACCTTCAAGAAGGTTTTGGAAAACACTGAAATCGGTGACTCCATCTTCGACAAGGCTGGT
+GCTGAAATCGTTCCAAAGTTGATGGAAAAGGCCAAGGCCAAGGGTGTCGAAGTCGTCTTG
+CCAGTCGACTTCATCATTGCTGATGCTTTCTCTGCTGATGCCAACACCAAGACTGTCACT
+GACAAGGAAGGTATTCCAGCTGGCTGGCAAGGGTTGGACAATGGTCCAGAATCTAGAAAG
+TTGTTTGCTGCTACTGTTGCAAAGGCTAAGACCATTGTCTGGAACGGTCCACCAGGTGTT
+TTCGAATTCGAAAAGTTCGCTGCTGGTACTAAGGCTTTGTTAGACGAAGTTGTCAAGAGC
+TCTGCTGCTGGTAACACCGTCATCATTGGTGGTGGTGACACTGCCACTGTCGCTAAGAAG
+TACGGTGTCACTGACAAGATCTCCCATGTCTCTACTGGTGGTGGTGCTTCTTTGGAATTA
+TTGGAAGGTAAGGAATTGCCAGGTGTTGCTTTCTTATCCGAAAAGAAATAA
+>POL4 1749 residues Pha 0 Code 0
+ATGTCTCTAAAGGGTAAATTTTTCGCCTTTTTACCTAATCCTAACACATCTTCCAATAAG
+TTCTTTAAGAGTATATTGGAGAAAAAGGGCGCCACAATTGTGTCAAGTATTCAAAATTGT
+CTTCAATCTAGCCGTAAGGAAGTTATCATTTTGATTGAGGACTCCTTTGTTGATTCTGAT
+ATGCATTTGACTCAGAAAGATATTTTCCAAAGGGAAGCAGGCTTAAATGATGTCGATGAA
+TTTCTTGGTAAGATTGAACAGTCAGGCATTCAATGTGTGAAAACCAGTTGCATCACAAAG
+TGGGTCCAGAATGATAAATTTGCGTTTCAAAAAGATGATTTGATTAAATTTCAACCATCC
+ATTATCGTTATATCAGATAACGCTGATGACGGACAAAGTTCTACTGATAAAGAGAGTGAG
+ATTTCAACTGACGTAGAAAGTGAAAGGAATGATGACAGCAACAATAAGGATATGATACAA
+GCTTCAAAACCTCTTAAGCGACTTTTACAGGAGGATAAAGGAAGAGCTTCCCTTGTTACT
+GACAAAACGAAGTACAAAAACAATGAATTGATTATCGGAGCGTTGAAAAGGTTAACAAAA
+AAATATGAGATCGAAGGTGAGAAATTTCGTGCAAGAAGTTATAGACTGGCTAAACAGTCG
+ATGGAAAATTGCGATTTCAATGTTCGTTCCGGTGAAGAAGCACATACTAAATTAAGGAAT
+ATCGGGCCTAGTATTGCCAAAAAAATACAAGTTATATTAGATACGGGAGTTTTACCAGGT
+TTAAATGATTCAGTGGGATTAGAAGACAAGTTAAAATACTTCAAAAATTGTTACGGCATT
+GGGTCGGAAATTGCTAAACGCTGGAATCTTCTAAATTTTGAAAGCTTTTGTGTTGCAGCT
+AAGAAGGATCCAGAGGAGTTTGTATCAGATTGGACAATTTTATTTGGTTGGTCATATTAC
+GACGATTGGTTATGCAAGATGTCTCGGAATGAATGTTTCACACATTTAAAGAAGGTTCAA
+AAAGCGCTGCGTGGCATTGATCCTGAATGCCAAGTCGAATTACAGGGAAGTTATAATAGG
+GGCTATTCCAAGTGTGGTGACATTGATCTTTTATTTTTCAAGCCGTTTTGTAATGACACG
+ACCGAGTTGGCAAAAATCATGGAAACGCTTTGTATTAAGTTGTACAAGGATGGCTATATC
+CATTGTTTTTTACAGCTAACGCCAAACTTGGAAAAGCTATTCTTAAAAAGAATAGTGGAG
+AGATTTCGTACAGCGAAGATTGTTGGGTATGGAGAAAGAAAGAGGTGGTATTCTTCTGAG
+ATAATCAAGAAATTTTTCATGGGAGTCAAATTCTCTCCAAGAGAATTAGAAGAACTGAAA
+GAAATGAAAAATGATGAAGGCACATTGTTAATTGAAGAAGAAGAAGAAGAAGAAACAAAA
+TTAAACCCGATTGACCAATATATGTCTCTGAATGCCAAGGATGGAAATTATTGCAGAAGA
+TTAGACTTTTTTTGTTGCAAGTGGGATGAGCTTGGAGCAGGAAGAATACACTATACTGGA
+TCTAAAGAGTACAATAGATGGATAAGAATATTGGCAGCGCAAAAAGGCTTCAAGCTTACA
+CAACACGGTTTATTTCGAAATAATATCCTTCTCGAAAGCTTTAACGAACGCAGAATTTTC
+GAGTTATTAAACTTAAAATACGCTGAACCCGAACATAGAAATATCGAATGGGAAAAAAAA
+ACTGCATAA
+>YCQ7 2862 residues Pha 0 Code 0
+ATGCTGATCATCAATGGGAAGATCATCCCTATAGCTCATACTATTTGCGCATTCTCCGCC
+TTCTTTGCAGCTTTGGTCACTGGTTATTCATTACATTTTCATAAAATTGTAACCAATGCA
+CATTATACGTATCCAGATGAGTGGTTTCCTAGTGTATCAGCCACTATCGGGGACCGCTAT
+CCGGAACGTTCTATTTTCCAAATCTTAATAGCTCTAACTGCTTTTCCAAGATTTTTACTG
+CTACTAGGTCACTACTACTTGAACCAATCTAAGGTATGCTTCCTTGTCGGTGTACTCCGG
+ACAGTCTCTTGCGGTGGTTGGGTATACATTACAAGTACAGATGACCACGATATTCATGAT
+ATATTTATGATCACATACATTGTTTTAACGTTACCATGGGATATAATGATTACCCGCTAT
+TCTAGTCCTTTAACTTCGAAGAACAAAGGGTTGACTGCTACAATTTTTTTTGGAACATTG
+TTCCCGATGATTTACTGGTACATTCAGCACTCCGTCCAACAGAGAGCTGGGGCATATTCT
+ATATATGCTTATTTCGAATGGTCTCTGATTCTTTTAGATATTGCATTTGATGCATTTGCT
+TACGCTGATTTCAAAAAGATAGATATTGTTCTCGCTTTTAATGAGAAACCCGGTAATACC
+AGTTTTTTCCAAATTAGAGACTCTAATCCCATAAATTATGGAGAAGAAAAAAGTTCAGAA
+TTGCAGAAAAGTGGTGAAAAGAAGGTTGAAAAGGAAAAACCCGTTGCTAGAAGCGCAACT
+GGTTCATATTTCAGGTTTGACTCTTTTTTTTACTTACTAACAAATATTTTTAACGGTTTT
+CTTTTCTGGTCGAACGTTACGTCCCTTTTATGTAGTATTTGGCATTTCCCGCTATGGTAT
+ATGGGAATCTCAGGTTATGAAGCTGCAATATTGGGTTATTTGGGACCCATTTTCTTATAT
+CTGCCGTTCGTTTCTGAAGCCTTCATGCAATATGGTGTACTTTTAGGAGGTATTATTGCC
+ATTGGTGCCTATATTGTTCAGATGCCAGAATTAAGGTTGATTTCTGTAGCTGTGGGAACT
+TCCATTACCGTTGCAACGTTTGTACAAAATCTAAGATATATCACAAATGCGGAGACTAGT
+TTCTCTTTTGCTCTAACTTGGCTGCTAGGTCTTGTTGCATCTGTGATCTTGAAAATGGGG
+TTCTATACCAACAACCCAACTTGGGTCATTTTAGATGAACGTAATGGTGGGTATAATAAG
+ACAGCTCTCGTGCTTACTGTTTTATTCGGCATGCTGTCGCCTTATGTTAATTCAATTAAT
+TTCGAAGGGAAAAGGAATGCTCAAGCAAAATCTGCTTCGTTGATCGGCAAATTATTTTTG
+GCTGTTGGTTTTGGCTCGTTGTTATTCGGAATTCATCAGTTATTGACGGATTCTTCTACT
+ACTATTTATTGGGCATGGGAAGGTTACAATGAATCACACGGTCCCTTGCCATGGCCTTGG
+GGCGCCTTAACTTGTACGGTCATGTTATTTGCTTCTTTGAGTTCTGTGAAGTTTATGGGC
+AAGCCATTAGTTCCATGTTTGTTGCTTCTCATATCCACTGCTGTACTTTCAGCTAGAAGC
+ATTACACAATGGCCTAAATATATTTTTGGTGGTTTATTGTACGCTATCGCTATGCTTTGG
+TTAGTTCCTTCGTATTTTTCTGCATTAGGCCAAGTTCAAAACATATGGGTTTATGTCCTA
+TCATTCTCCGTTTATATTATCTTTGTCCTTGCCCATGTTTGGGTCGTTGCATACGCATTT
+GTTCCAATGGGCTGGGTACTGAGGGAGAAGATTGAGACGGTTCTTGCCTTTTCTTCCACA
+TTTATCATTATTGGTGCTTTAACATGCAAAAACCTTAACGTTCAACTGGTGACTATGGGC
+AAAAAATTCTTCATTTATGTTTTCTTCTTTGCCGTGGCCCTACTATCACTAACAGCTAGG
+TTCGTGTATGATATTAGACCTACAGGAATTCCTCAGCCTTATCATCCAGATTCTCAGTTG
+ATTACAGCTGGTATTTGGACTATCCACTTTGGTCTCGATAATGATATGTGGGCATCTGAA
+GACAGAATGATCAACCTTATTAAAGATATGGAACTAGATGTGGTAGGTCTACTAGAAACA
+GATACACAAAGAATTACCATGGGGAACAGGGATCTAACTAGCAAACTAGCTCATGATTTG
+AATATGTATGCAGATTTCGGACCAGGTCCAAATAAACATACCTGGGGCTGTGTTCTTCTT
+TCTAAATTCCCTATCGTAAATTCTACGCATCATTTATTGCCCTCTCCAGTTGGGGAACTT
+GCGCCAGCCATTCATGCCACACTTCAAACGTACAATGACACTCTCGTTGACGTCTTTGTA
+TTCCATAGTGGACAAGAAGAGGATGAAGAGGATAGAAGACTGCAAAGTAACTACATGGCT
+AAGCTCATGGGCAATACGACTCGCCCAGCTATTTTATTAAGTTACTTAGTTGTTGATCCA
+GGTGAAGGCAACTACAATACGTACGTTAGTGAAACATCCGGAATGCACGACATTGATCCC
+TCTGACGATGATAGATGGTGTGAGTATATCTTGTATAAGGGCTTGAGAAGAACAGGATAT
+GCTAGAGTTGCAAGAGGAACGATAACCGATACGGAGCTACAAGTTGGTAAGTTCCAAGTT
+TTGAGTGAGCAAGCGTTAGTAGAGCACTCGGATTCTATGTATGAATACGGTCATATGAGT
+GAACCGGAATATGAGGACATGAAATTTCCAGATAAGTTTTTAGGCGAAGGTGAGAGGGGT
+CACTTCTACCATGTTTTTGATGAGCCACGTTATTACTTATAA
+>SRD1 678 residues Pha 0 Code 0
+ATGCGATATAATAATTATGACAACTCTGGAAGTTCCTTCTTAACTAGAGTAGTTAAAAAG
+TCAGATATGGAGAAAACGTTATTATTAAATAGAGAAATTGATGACTGGAAGTCAAACGAT
+AAAAAGAAGGCATATAAGGAACGCGGAAGAGTTTATGCAAGTTGCTCATTTATTGAAGTA
+TCCTTTTCTCAAATAAGGGCTGTTGATGTTGAAAAAAAAATTGAGAATGCCGAACAACTA
+AGAGATCTTACAAGAAATATTGTTAAGAACAAAACCAGCTCTTTGAACGAAATTACACCC
+TCAAAGAATCGTGTTATTAGTGCATGCAATTCCGAGAGACGTACGACTAGCCAAGAAGCA
+AACAATCTTGAAGGCTACCATAGTTGTGCACAAGGAACTAGTCGGTCTGCCAGTATTACG
+AAGAAATACAGCAAAAAGACTACTAGTCGTCCTAAAAGAGAAAAGAGACAAACAATCCTC
+CCAAATGGTGAGATAAAGGAATGCTCTAAATGTAAAGACACTTGGACAATTCAATGGCGT
+AGTGGACCCGACCAAAACAGGGAACTTTGTAGTCCCTGTGGACTCGCCTATGGAAAAAGA
+CTGAAGAAGGAGAATGAAAAAAAAAGGCAAGCGGCAGATAAAAGGATAGATTCGAAACAA
+TCCATAGTATCTATTTAA
+>MAK32 1092 residues Pha 0 Code 0
+ATGATGAATGAAGAGGATTCTACAGAAACGAAAAGCCTAGTCATAACTAATGGCATGTTT
+ATCATAGACGACATCGAGCGTAGTAAATATAATATTCACTATAAGAATGTCCCAGGAGGC
+GGAGGGACTTTTGCCATTTTGGGTGCATGCATAATATCTTCCGGCAATGTCACATCCAAA
+GGTTTGAAGTGGATAGTGGACAGAGGCTCTGACTTTCCAAAGGAAGTTATAAGGGAAATA
+GACTCATGGGGTACTGATGTGAGGTTTCGAGATGACTTTAGCAGATTAACTACCAAAGGG
+TTGAATTATTACGAGGGAAGTGATGATTTGAGAAAGTTCAAGTTTTTGACGCCGAAGAAG
+CAGATTAACGTCGATGACTGGATTTCCACATTTGGGCAGAAGATAATTGATGAAATGCAT
+GCGTTTCATTTGCTATGTTCTGGGTCTAGATGCTTAGACATAATAAACGATCTGCTACGG
+GTGAAAAGTTCAAAGGGCACAAAACCAATCGTGATTTGGGAGCCATTCCCAGATCTTTGC
+GACTTTGATCATCAAAATGACATTAAAAGTGTAATGCAGAGGAACGATGTTACGGTAATA
+TTATCTCCAAATGCCGAAGAATCAAGTCGCTTATTTGGTTTAAGTAGCAAGGAACCGACT
+AGTTTGGAAGAATGTCTAGCATTAGCGCATCGTTTCGATGATTTCATGGATGAAAACAAT
+ATGTGTATTCTACGATGCGGTGCCCTCGGAAGCATATCGGTAAGTGAGAAGTTTAAGAAC
+GGACGAACCTATGACCATTTCCCCGCCTACCATTTCAAAACTCAGTCTAAAGTACTAGAT
+CCTACTGGCGGGGGAAACTCGTTCCTTGGCGGCTTTGCAGTTTCTTATGCCCTAACGAAA
+AGCTTAGATATTGCTAGTATATGTGGGAACATCGCTGCAGGCGCAATAATTGAACAATTC
+GGAATACCGAGGTACGATCCAATTGCTAAAACCTGGAACGGAATCACATTCTTGGATAGA
+CTGAAATTTTACCTTTCACAGTCCGGTCTTCAATATAATATAAACGATCTTTACAAAAGT
+CTAACACGATGA
+>PET18 648 residues Pha 0 Code 0
+ATGAGCTGTACCACTGATAAGTTAATACAAAAGTACGACGCCCTTGTTAGGAAAACCACA
+GAACATAAATTCGCTAAGGAACTATGTGCCGGAACATTGAAGGACCGTAGTTTGTACATC
+TATTTATCACAAGATCTGCAATTTTTTGAAACTAGCTTAAGGTTGATATGTAAGACGACT
+TCTTTAGCACCAACTACTCACGCTTTAATAACCTTAGCCAAAAAGATTGGATTTTTTTCT
+AATGATGAAAACTCATACTTTCATGACTGCTTAGAATTATTGGCACCATCCCTCACCAAG
+GAAGAAAGAGATAATTTTGACAATAAAGCGATCCCCGGCGTTGATGCGTATATTAATTTC
+TTAGATGAGCTGAGAAAGGACGCCTCAATTACATGGCCATCCTTAGTAACCAGCTTATGG
+GTTGCTGAGGAACTCTATTGGAGATGGGCTCGTGATACTCCTAGAGCCCCAGGGTTGCAT
+TGGAAATATCAAAAATGGATTGATTTACATGATGGTGAGCATTTTCAAACTTGGTGTGAA
+TTTCTAAAGGCTGAAGTTGACAAGTTTCCCGTCGAAGAAGTGGAAAGCATATTTGTGAAG
+GTTTCACAGTTCGAGTTCGAATTTTTTGAATCTTGTTACAACGCCTAA
+>MAK31 267 residues Pha 0 Code 0
+ATGGACATCTTGAAACTGTCAGATTTTATTGGAAATACTTTAATAGTTTCCCTTACAGAA
+GATCGTATTTTAGTTGGAAGCTTGGTTGCTGTAGATGCCCAAATGAATTTGCTATTAGAT
+CATGTTGAGGAACGTATGGGCTCCAGTAGTAGAATGATGGGCCTAGTCAGCGTCCCTAGG
+CGTTCCGTTAAGACCATAATGATTGATAAGCCTGTTCTGCAGGAGCTTACTGCGAATAAA
+GTTGAATTGATGGCTAATATTGTTTAG
+>HSP30 999 residues Pha 0 Code 0
+ATGAACGATACGCTATCAAGCTTTTTAAATCGTAACGAGGCTTTAGGGCTTAATCCACCA
+CATGGCCTGGATATGCACATTACCAAGAGAGGTTCGGATTGGTTATGGGCAGTGTTTGCA
+GTCTTTGGCTTTATATTGCTATGCTATGTTGTGATGTTCTTCATTGCGGAGAACAAGGGC
+TCCAGATTGACTAGATATGCCTTAGCTCCTGCATTTTTGATCACTTTCTTTGAATTTTTT
+GCTTTCTTCACTTATGCTTCTGATTTAGGTTGGACTGGTGTTCAAGCTGAATTTAACCAC
+GTCAAGGTTAGCAAGTCTATCACAGGTGAAGTTCCCGGTATTAGACAAATCTTTTACTCG
+AAATATATTGCCTGGTTCTTGTCCTGGCCATGCCTTTTATTTTTAATCGAGTTAGCCGCT
+AGTACTACTGGTGAGAATGACGACATTTCCGCCTTGGATATGGTACATTCGCTGTTAATT
+CAAATCGTGGGTACCTTATTCTGGGTTGTTTCGCTATTAGTTGGTTCATTGATCAAGTCC
+ACCTACAAGTGGGGTTATTACACCATTGGTGCTGTCGCTATGTTGGTTACCCAAGGTGTG
+ATATGCCAACGTCAATTCTTCAATTTGAAAACTAGAGGGTTCAATGCACTTATGCTGTGT
+ACCTGCATGGTAATCGTTTGGTTGTACTTTATCTGTTGGGGTCTAAGTGATGGTGGTAAC
+CGTATTCAACCAGACGGTGAGGCTATCTTTTATGGTGTTTTGGATTTATGTGTATTTGCC
+ATTTATCCATGTTACTTGCTAATTGCAGTCAGCCGTGATGGCAAATTGCCAAGGCTATCT
+TTGACAGGAGGATTCTCTCATCACCATGCTACGGACGATGTGGAAGATGCGGCTCCTGAA
+ACAAAAGAAGCTGTTCCAGAGAGCCCAAGAGCATCTGGAGAGACTGCAATCCACGAACCC
+GAACCTGAAGCAGAGCAAGCTGTCGAAGATACTGCTTAG
+>YCR3 1836 residues Pha 0 Code 0
+ATGGCGCGTCAAAAGCTTACTTTCAAAGAACAAATGGATGGTTTCCCCTGGGTCCAACTT
+GTTGTTGTGTCCTTAGTTAGGTTCAGCGAACCAATTGCGTTTTCGTCACTATTTCCTTAT
+GTTTATTTCATGGTTAGAGATTTTAATATTGCTCCCAATGATGCTCAAGTGTCCAAATAT
+TCAGGTTATTTATCTTCATCATTTGCGTTATGCCAAGTCATATCTGCGTACCACTGGGGT
+AGATTCTCTGAAAAACATGGCAGAAAAATAACATTGACTTGCGGGCTTATAGGAACATCT
+GTATCATTGTTAATACTGGGATTTTCACACAATTTCTATCAGGCTTTGGTGGCAAGAAGT
+TTAATGGGATTGCTAAATGGTAACGTCGGCGTTATTAGAACCATTATTGGTGAAATAGCA
+ACTGAAAGAAAACATCAGGCTTTAGCTTTCAGTACTATGCCTTTATTATTTCAATTTGGT
+GCCGTTGTTGGGCCTATGATCGGTGGGTTTCTTGTATTTAGAGATGGAACAATGAATGAA
+GTGCCACTATGGTTTCCACATTTTGCAAAAAGAATAATTAGGTCATATCCGTACGCCTTG
+CCAAACGTGGTAGTGTGCATGTTTTTGATGTTTGGTTTAACTAATGCAACATTGTTTTTG
+GAAGAAACACATCCTGCTTTTAAAAATAGAAGAGATTACGGTTTAGAGGTCGGTGATTTT
+ATTAAGAAGAATATATTTGGTATACAGCCGAAAAGAAGACCCTGGCAAAAGCGCATTCAG
+GATGATTCGGAAAACATTCACCACCGTAATGAGAATGTGAACAGCAATCGAGGACAAGAT
+AGTGAAGAGGATGAAAATAGTCCCCTAGTGAATACTACCAATGACGATGATACTGAAAGC
+ATACAATCGATTGATCCTATTTTAACAAGAAGACAGTCTGTAGGCCTGATTAGGACATAT
+TCTCTGCATGAACCAACAGACGCTGTGCATGCCAATATAGATACAGCTCCAGACGGTTGT
+AAAGAAAGTAGTATATTTCATCACGTTTTTCATACAAAAGTATTTTACCCTATATCGGTG
+AATTTTATTATGGCTTTACATTTGATTGTATACAACGAATTTTTGCCTGTTTTTTTAGCT
+TATGATTTAGCCGTAGATCCAGAAAATCCAAAGAAGCTGGCTTCAAAATTTCCGTGGAAA
+ATATCTGGCGGTATAGGTTATGAACCAGAACAAACCGGTACTCTTTTGTCGACAACAGGT
+ATCTTTGGTTGTTTTGTGGTTATTTTCATTTTTCCCATAGTTGATCGAAATTTCGATTGT
+TTAACAATTTTCAGAACTTTAGTCAAGCTGTACCCTATTATGTACGTTATGGTTCCTTAC
+GTTGTTTTTCTACAGAATGAACGGATTCCTAGCTGGTATACTGTCGTCTACTTGTACATA
+ATCACAGGGATAAAAACATTTTGTGGCGCTTTAACGTCACCACAAATTATGTTATTAATT
+CATAATTCGAGTCCCTTGAGTTGTAGATCAGTCATCAATGGCGCCACCATTAGTATTTCT
+GCCTCTGCTCGTTTCATAGGTCCCTTAGTATGGGGCTATATTATGTCTTGGTCCCAGCAA
+AATGACGTCGCCTGGGTCAGTTGGTGGTCGTTAAGTCTTTTTTGTATGGTAGCTCTTTAT
+CAAAGTTATAAGATAGCACCAATTGATGATAACGAAAATGAGCTTCATGGACAGGGTAGT
+GAAGATGCCTACAATTCGCAGTCACAGTCTTCTGATTTAAGAATGGCTCATCGATCTAGT
+TTAAGCAGCTTAAGTAACCAACGCTGTACCACATGA
+>SYN 1479 residues Pha 0 Code 0
+ATGTTTCATGCTTTCACCTTCCTTAAAGGTGGTAGATTTTACTCTTCACTAACAGTTAAA
+TCATTGTACGAGCAGGTACACCATACTAGCCATGATCCCATTTCAATTAATGGATGGATC
+AAATCCATAAGACTATTAAAACGTATAGCGTTTTTGGATTTACAAGATGGGACTTCTGTG
+AACCCATTAAGAATAGTTATTCCACTCACAAATACTGATGAAGTACAGTTCCTAAAAATT
+CTGAAAACTGGTCAAACTTTATCTATATCTAATGCTACCTGGCAAAGCACCCCTAATAGA
+AAACAACCTTTTGAATTGCAAATCAAAAATCCTGTCAAGTCAATTAAACTTGTGGGTCCC
+GTTTCAGAAAACTATCCATTACAAAAGAAATATCAAACCTTACGTTATTTAAGGTCCTTA
+CCTACACTAAAATACAGAACCGCTTACTTAAGTGCAATTTTACGGTTAAGATCATTTGTA
+GAATTCCAGTTCATGCTATATTTCCAGAAAAACCACTTCACCAAAGTTTCACCACCAATA
+TTAACTTCAAACGATTGTGAAGGTGCCGGCGAGTTGTTTCAAGTCTCCACCAATACGTCG
+CCAACTGCATCCTCGTACTTTGGGAAGCCGACTTATTTGACTGTGTCCACTCAATTGCAC
+TTGGAAATTTTAGCGTTATCACTGTCAAGGTGTTGGACGTTATCTCCTTGCTTTAGAGCC
+GAAAAGAGTGATACTCCAAGACACCTTTCGGAGTTTTGGATGCTTGAAGTGGAAATGTGC
+TTTGTTAATAGCGTCAACGAGCTAACATCGTTTGTTGAGACTACAATAAAACACATAATT
+AAAGCTTGTATAGATAACCAACAAGAACTCTTGCCGAAGCAATTTATCTCTTCACAAGAA
+AATAATGCATCGTCAGAGCTATCAATAAATCAAGAGACACAACAAATTAAAACACGATGG
+GAAGATTTAATAAATGAAAAATGGCACAATATAACGTATACCAATGCAATAGAAATTCTC
+AAGAAACGCCACAATGAAGTTTCACACTTTAAGTATGAACCTAAATGGGGACAGCCTTTG
+CAAACTGAACATGAAAAATTTTTAGCCGGAGAGTATTTTAAGTCCCCAGTTTTCGTTACC
+GACTATCCACGTCTTTGTAAACCATTCTACATGAAACAAAATTCCACTCCTGACGATACT
+GTTGGATGCTTTGATCTACTGGTTCCTGGAATGGGTGAAATAATTGGTGGGAGTTTAAGG
+GAAGATGACTATGACAAGTTATGTAGAGAAATGAAAGCACGCGGGATGAATAGATCTGGA
+GAATTGGACTGGTATGTTTCTCTGAGAAAAGAAGGAAGTGCACCACACGGAGGCTTTGGT
+CTAGGGTTTGAGAGATTTATCTCATACTTATATGGCAACCATAATATAAAGGATGCCATA
+CCCTTTTATAGAACATCTGCAGAATCCATCGATTTTTGA
+>YCR6 2232 residues Pha 0 Code 0
+ATGGAACTTCAGAATGATTTAGAGTCGCTCGATAACGAGCTGAATGATTTTAGTGAAGAT
+CCATTTCGTGATGATTTCATAACGGATGAAGACGCTGTAAGATCGGGGTGGCGATCTGCG
+TGGACCAGGATGAAATATTGGTTTTATAAGAATAGACTGAAGTGGACAAACAATCCCATA
+GTGATTGGCGACGCGAAAGATAGTAGGGATGGTTCTAACTTTAGAAGGGGTATACCGCTA
+TATGAATTAGACGCGAATGGTCAACCCATTGATACTGAACTTGTTGATGAGAATGAACTT
+TCTTTTGGAACGGGATTTCGTTCCAAAGTGCCTTTTAAAATAATATTTCGCACATTGCTT
+GGCTCGCTGGTGTTTGCCATTTTTTTAATTCTGATGATTAACATAGCAAAACCCCATCAC
+TCCACGAGAGTGCTATCGCACTTTGGCAGTCCTGAATTTGACCCTTACGTGAAGTATTTT
+AACGGTACGCATGAATTTTTCCCCTTAACGATAGTAATTTCACTAGACGGTTTCCATCCT
+TCACTCATATCTAAGAGGAACACACCGTTTTTACATGACTTATATGAATTGAAATATGAT
+GGAGGTATGAATATCACGTCCACACCTTTTATGATACCCAGCTTCCCTACGGAGACCTTT
+CCCAACCATTGGACGTTGGTTACTGGACAATACCCAATACACCACGGTATAGTCTCTAAC
+GTATTTTGGGATCCTGATCTTAATGAAGAATTCCATCCAGGTGTATTGGACCCTCGAATA
+TGGAACAATAATGATACAGAACCAATATGGCAAACTGTTCAGTCTGCATTTGACGGTGAT
+ATACCATTCAAAGCTGCTACCCATATGTGGCCAGGTAGCGATGTGAATTATACCAAGTAT
+AAGACTGAAGAGAAACTACAACCTGAACATAAAAAGCCTATTGCTAGAGAGAGAACTCCA
+TTTTACTTCGACGAATTCAATGCTAAAGAACCACTTTCGCAAAAATTATCCAAGATTATT
+GAATATGTGGATATGAGTACACTGAACGAAAGACCACAGTTAATTCTCGGTTATGTACCG
+AACGTAGATGCCTTTGGACATAAGCATGGATATCCGTCAGAGTCGGAATACTATTATGAA
+GACTTCACTGAAACACTGGGGGAAGTAGATACATTTCTGAAGCAACTAGTGGAATCGCTG
+CAAGAAAGAAATTTAACCAGCTTTACTAATTTGGTCATTGTTAGCGATCATGGTATGAGC
+GATATCGTAGTTCCCTCAAATGTTATTATATGGGAAGACTTACTGGACGAAAAATTGAGG
+AAGGATTATGTATCGCACGCATATCTAGAGGGTCCGATGATGGCTATATCGTTGAAAGAT
+TCCGGAAACATCAATGAGGTTTACCACAATTTAAAGACTTCTATAGATGAAGACAAGTAT
+ACGGTTTACGTTAATGGAAATTTCCCCAAAGAATGGAACTTTAATGATGGAAAAAATCAT
+CACATGGCGTCAATCTGGATTGTGCCCGAGCCTGGGTATGCAGTGATGAAGAAAGAACAA
+TTGAAGAAGGTGGCAAAAGGTGATCATAAGGACAAAAACGAAGACAATGTGTTCACGATT
+GGATCACATGGATACGACAATAACGCGATCGATATGAGATCTGTATTTATTGGTATGGGG
+CCATATTTTCCACAGGGATACATTGAGCCGTTCCAAAATACCGAAATTTACAACCTTTTG
+TGCGATATTTGCGGTGTGGCAGAAAAGGACAGAAATTCCAATGATGGGACTGGGATGCTT
+ATGAACCAACTCCGCGAACCCCAGAGCAGCGAAGAAGTAGAGATTGAAGATGACTTTGAT
+TATTTGGTCAGTAAGTTTGGTGAATTCAGCACTTATAATATAATTTGGGGCGGGTACCCC
+GAAGAGACAGAACAAGACAATGTTGACAATGATAATGATGACAACGACGATGGAAACACT
+GATGAAATAGCCGCTATGCCATCTTCGTCATTAACGATAAAACTAGAAATGACAACTTCA
+ATACCATCAGCAACTGAGACTCTACCGGGCGAAACATCACCATCATCAAGAAGAAGCAGC
+AGCAGCAGCATACAAGCTAGCGCTACTGCTAGCACAGTGGGGGATTGGCTTCAAGACATA
+ATCAACGACGCAAAAGATCTCATTGACGACATAATTGACAGCATCGACGATTTAGTCGAT
+TCTGATACCTAA
+>GNS1 630 residues Pha 0 Code 0
+ATGGAATACGCCACTATGTCTTCTTCGAACTCCACACATAACTTTCAGAGAAAGATTGCT
+CTTATAGGAGCTAGAAATGTCGGCAAAACCACATTAACGGTTCGCTTCGTAGAATCGCGG
+TTCGTTGAATCCTATTATCCCACTATTGAAAATGAATTTACCAGGATAATTCCTTATAAA
+AGTCATGACTGTACTCTGGAAATTCTAGATACTGCAGGCCAAGATGAAGTTTCTCTATTA
+AACATTAAATCGTTGACGGGCGTACGAGGCATAATGCTGTGCTATAGTATAATAAATCGT
+GCTAGCTTTGATCTTATTCCCATTCTCTGGGACAAGCTGGTAGATCAGCTGGGTAAGGAT
+AACCTCCCGGTAATACTTGTGGGTACCAAAGCTGATTTGGGAAGGAGTACAAAAGGTGTA
+AAAAGGTGTGTCACGAAAGCTGAAGGAGAGAAACTAGCTTCGACAATTGGCAGTCAAGAT
+AAGAGGAACCAGGCAGCATTTATAGAATGCAGTGCCGAGTTAGATTATAATGTTGAAGAA
+ACTTTTATGCTCCTTTTGAAACAAATGGAACGTGTCGAAGGAACTCTGGGGCTTGATGCC
+GAAAATAATAATAAATGTTCTATAATGTGA
+>FEN2 1539 residues Pha 0 Code 0
+ATGATGAAGGAATCGAAATCTATCACTCAACATGAGGTTGAGAGAGAATCTGTTTCTTCC
+AAACGTGCCATTAAAAAGAGATTACTTCTGTTTAAAATAGACTTGTTTGTGCTATCATTT
+GTTTGCTTGCAATACTGGATTAATTATGTCGACCGTGTCGGTTTCACCAATGCATATATA
+TCGGGTATGAAGGAAGATCTTAAGATGGTCGGAAACGATTTGACCGTGTCTAACACAGTT
+TTCATGATTGGTTACATTGTAGGTATGGTCCCCAATAATTTAATGTTATTGTGTGTTCCA
+CCTAGGATATGGCTAAGTTTTTGTACGTTTGCCTGGGGTTTATTGACCTTGGGAATGTAC
+AAAGTTACATCGTTCAAACATATTTGCGCAATTAGATTCTTTCAAGCCTTATTTGAGAGT
+TGCACATTTTCAGGAACACATTTTGTTTTGGGTTCGTGGTATAAAGAAGACGAATTGCCC
+ATTAGAAGTGCTATTTTTACAGGTAGCGGTTTGGTGGGATCTATGTTCAGTGGATTTATG
+CAAACAAGTATCTTTACTCATTTGAATGGGCGGAATGGCTTGGCGGGTTGGAGATGGTTA
+TTCATTATTGATTTTTGTATCACATTACCCATTGCAATTTATGGGTTTATTTTCTTCCCC
+GGCCTTCCTGATCAAACAAGTGCTGTTAGCAAATTTTCTATGACGAGATACATTTTTAAT
+GAACAAGAGCTACATTATGCTAGGAGAAGGCTCCCCGCTAGGGACGAAAGCACCCGGTTA
+GACTGGTCGACTATTCCTAGAGTCCTAAAAAGGTGGCACTGGTGGATGTTCTCTCTTGTT
+TGGGTTCTGGGAGGTGAGAATTTGGGTTTCGCATCTAATTCTACATTTGCATTATGGTTA
+CAAAACCAAAAATATACGTTGGCGCAAAGAAATAATTATCCTTCGGGGATATTTGCCGTA
+GGTATAGTTTCTACGCTTTGTTCTGCTGTATATATGAGTAAGATCCCAAGAGCTAGGCAT
+TGGCATGTTTCTGTTTTCATATCATTGGTAATGGTTATTGTTGCGGTACTAATACGTGCA
+GACCCACTAAATCCAAAAGTCGTCTTTTCTGCACAGTATCTTGGAGGCGTAGCATACGCT
+GGACAAGCGGTTTTTTTTTCGTGGGCAAACATTATTTGTCATGCAGATCTTCAAGAACGT
+GCTATCGTTCTTGCTTCAATGAATATGTTTTCAGGGGCCGTTAACGCATGGTGGTCTATA
+TTATTCTTTGCTTCAGATATGGTGCCCAAGTTTGAGAGAGGTTGCTACGCCCTCTTGGCT
+ACGGCAATATCAAGCGGAATTGTCTCGGTCGTCATACGCTCACTACAGATAAAAGAGAAT
+TTGTCTAAGAAACAGGTTCCTTATATAGATGCTAATGACATGCCCGGGGAAGATGACGAT
+GACGACAACCAGGATAATGAAAATGATGGCGACGACGAGAGTATGGAAGTTGAACTTCAT
+AATGAGGAAATGGCCGAAATTTCAAATCCTTTCCGATAA
+>RIM1 444 residues Pha 0 Code 0
+ATGTTTTTACGTACTCAAGCTCGTTTCTTCCATGCTACTACCAAGAAGATGGACTTCTCG
+AAAATGTCCATCGTCGGCCGCATTGGCTCTGAATTCACTGAACATACTTCTGCTAATAAC
+AATCGTTATTTGAAATATAGTATCGCTTCGCAACCAAGAAGAGATGGCCAAACCAATTGG
+TATAATATCACCGTTTTCAATGAACCTCAAATCAATTTTTTGACAGAATATGTTAGAAAA
+GGCGCTTTGGTATATGTTGAAGCAGATGCTGCTAACTATGTCTTCGAGAGAGACGACGGT
+TCTAAGGGTACTACTTTGAGCTTAGTTCAAAAGGACATTAATTTATTGAAGAATGGGAAG
+AAATTAGAAGATGCTGAGGGCCAAGAAAATGCTGAGGGCCAAGAAAATGCTGAGGGCCAA
+GAAAATGCTGCTTCTTCAGAATAA
+>CRY1 414 residues Pha 0 Code 0
+ATGTCTAACGTTGTTCAAGCTCGTGACAATTCCCAAGTTTTTGGTGTTGCTAGAATTTAC
+GCTTCTTTCAACGATACTTTCGTTCATGTTACCGATTTATCTGGTAAGGAAACCATCGCC
+AGAGTTACTGGTGGTATGAAGGTTAAGGCTGACAGAGATGAATCTTCTCCATACGCTGCT
+ATGTTAGCTGCCCAAGATGTTGCCGCTAAGTGTAGGGAAGTCGGTATCACTGCCGTTCAC
+GTTAAGATCAGAGCTACCGGTGGTACTAGAACCAAGACTCCAGGTCCAGGTGGTCAAGCT
+GCTTTGAGAGCTTTGGCCAGATCTGGTTTGAGAATTGGCCGTATCGAAGATGTTACCCCA
+GTTCCATCTGACTCCACCAGAAAGAAGGGTGGTAGAAGAGGTAGAAGATTATGA
+>YCS2 6504 residues Pha 0 Code 0
+ATGAATTCAATTATTAATGCTGCTTCGAAAGTCTTAAGACTCCAAGACGATGTGAAGAAG
+GCTACTATAATATTAGGAGATATACTGATATTACAACCAATTAATCACGAAGTTGAACCA
+GATGTAGAAAACTTGGTACAGCATGAACTAACCAAGATAATACAAGGTTATCCCATACAG
+GATAATATGATTATTAATAGCAAAAAAGGCACAGTTGAAGATGACTTATGCGAACTCAAT
+AACTATACCTGTTTTGCACTTTCGAAAAGCTTTGATTTATGCCATGATAGCAGAAATTTC
+AACATAGCGCAGCCGAAACGATGGATACAATTATTAGAGACATTAACTGACTCAGTTAGT
+TTCGCAGTTATTGTTCAAATTATTCTCACTTTATCTAACATTTCGCTAATAAATAAACAA
+ACCTTGGGGAAGTTAAAAAAACTGAGGATTCGAATTTTCGAAATACTATCAAATAAAAAC
+GATAGTTGGAAATCTACATTACTACAGAAAAACCTTATAGAATGGTACATTTTTATGCTT
+TCCGTGGATTGCACACCTTTAGAATTGCAAAACTTATATCTCCATAAGGAGTTGAAATTC
+TGTAACGATATCTTGAATTCATTAACACTCCAAGTTTCTGATCCTCGCTCACAAAATTAC
+CTGCAATTTGAGAACACGTATAAGCTTTTTCAAATACAAAAGTCATCTAGAATTAACAAC
+TCGTTCCTTTTTTACATAGAATTCAATTCCGTTACCTCAAATAGGATAATGACCATAGAA
+AAACACATTTATTTGGAAATTAAGGAAGGCCAGTTTTGTATTTCAAATGATAACTACATA
+ATCGGTTTATTTGAAAACTTCGAATTCGAAGCGGGCACTTTGTACTTTATTGGAGTTTTA
+ATTGATCACAATAATCGAATAACTCTTTATGTTGATGGAAGTATGATCAATCAGCTCACG
+TTATTTGAAAACTCTATATGCCAATTAAGCACTTGTGAACTGGGATCCATGATTTGTTCA
+ATTAAAGTATATAGATTTTATTTGTGGGATGGATTATTAACAGAATTTGCGATAAATATA
+CTTCAAGCTATCGGCACCAATTACCAATATACATTTAGCAAGAAAAAAGAAGGGCCTGAA
+GTTTTATCGCTCTGCCAAGACTTTTTGATCGCTAAGGCTCATTTAATGGCCAGGCCTGCA
+ACAGAAATATCTTCCACAAAATACATCGATGAGATTGAACTTCTTGAAATGGAAAATATC
+ATTATTGATGTTAACCCAAATGATATTCTTCAAGATTTCACCGAATCGTCTAATTTTACG
+GTAAAATTTGAGGAAAGCACAAACTCGAAAAATATTCCGGAAGTGGGTAAGTGCTATTTC
+TATAGGAGTTCAAACTTGGTTTCAAAATTTGTGTCCATTGATTCTATACGGCTTGCGTTT
+TTAAACATGACAGAATCCGGTAGTATAGACGATCTGTTTCATCATGTATCACATCTGATG
+AATCTTTTACGAAATATTGATATTCTTAATTGGTTTAAAAAAGACTTTGGCTTCCCTTTA
+TTTGCTTATACTTTAAAACAAAAAATAACACAAGATTTATCTCAGCCTCTGAATATCCAA
+TTTTTCAATTTATTCTTAGAATTTTGCGGGTGGGATTTCAACGATATTTCCAAATCCATA
+ATTCTAGATACTGATGCCTACGAAAACATAGTCCTTAACTTGGATTTATGGTATATGAAT
+GAGGATCAAAGTTCTCTGGCGTCAGGCGGATTAGAAATTATCAGATTTCTTTTCTTCCAA
+ATTTCAAGTTTGATGGAAGCCTCTATTTATTCTAAGTTCAATTCCAATAAATTCAATGAT
+ATGAATATCCTAGAAAAACTATGTTTAAGCTATCAGGCTGTCACAAAAAGAGAAAATCAG
+AACAGTAAATTTAATGAGCTATCAAATGATTTAATTTCTGTATTTGTTACTTTATTGAAA
+AGCAATACTGATAAACGACACCTGCAGTGGTTTTTACATCTCTCATATTACTTTATTAAG
+AGAAAAGATGTACGTTCTACAGAAATTATACTTCAAGCGGTAGATCAACTTTTTTCGTTT
+TACTTAGATCAAGGTAGCGACGAAAATGCGAAGATACTTTCAGAGATTATACCACTTAAG
+CTAATGCTGATGATTATGGATCAAATAGTGGAAAATAATGAATCAAACCCTATTACGTGC
+TTGAATATCTTATTTAAGGTAGTTCTGACCAATAAACCGCTTTTCAAACAATTTTACAAA
+AATGATGGTTTGAAACTCATATTGACTATGCTTTGTAAGGTAGGGAAAAGCTATCGAGAG
+GAGATTATTTCTTTGCTTCTCACATATTCTATTGGCAATTATACCACAGCTAACGAAATA
+TTTTCAGGTGCTGAAGACATGATTGGAGGAATTTCAAACGACAAGATAACTGCAAAAGAA
+ATTATTTATTTGGCTGTCAACTTCATTGAGTGGCATGTGATTAATTCTAATGCCAGTGAT
+TCTTCTTCTGTATTGGACCTGAACAACCATATATTAAGATTCGTCGAAGATCTGAAATCG
+CTGAGCGCTGTTCCGATTAATGAATCTGTATTTGATCCTAAAAAAAGTTATGTGATGGTT
+TCATTATTAGATCTCTCGATAGCTTTGAATGAATCGGAGGACATCTCAAAGTTCAAGAGC
+TCTTCAAAAGTGATTTCAGAGCTCATTAAAGGTAATATAATGTGTGCTCTTACGAAATAT
+GCCGCTTATGATTTCGAAGTCTATATGAGCACATTTTTTTGTCACAGTACAGAATACAAA
+CTGGTTTATCCAAAAACTGTAATGAACAATTCCAGTTACTTAGAGCTATCATTTATAGTG
+ACACTCCTACCCGAAATACTTAATGACCTGATAGATAGCAATAACAATTTGAACCTGATG
+ATGTTGAAGCATCCATACACGATGTCAAATCTCCTTTATTTTCTTCGCAAATTTCGACCT
+GATACGTCACAGATAGTTATGCCTAAAGATTTTTATTTCTCAAGTTATACATGTCTCTTG
+CATTGTGTTATTCAGATTGATAAATCATCATTTTACCATTTCAAAAACGTTTCTAAGTCG
+CAACTGTTACAGGAATTCAAAATCTGCATAATGAACTTAATATATTCCAATACTCTAAAG
+CAGATAATCTGGGAGAAAGAAGAATACGAGATGTTTTCTGAGTCACTGATGGCGCATCAG
+GAAGTTTTATTTGCACATGGAGCATGTGATAATGAGACCGTTGGCTTATTGTTAATATTT
+TTTGCCAACAGATTACGTGATTGTGGATACAACAAAGCAGTCTTCAATTGTATGAAAGTG
+ATCATTAAGAACAAGGAAAGGAAACTAAAGGAGGTGGCGTGTTTTTTTGACGCAGCGAAT
+AAAAGTGAAGTACTCGAAGGTTTAAGTAATATCCTCTCATGCAATAACTCTGAAACAATG
+AACCTCATAACTGAACAATACCCATTTTTTTTCAACAATACACAACAGGTACGGTTCATA
+AACATTGTCACCAATATCTTGTTTAAGAACAACAATTTTTCTCCAATAAGCGTTAGACAG
+ATCAAAAACCAAGTTTACGAATGGAAAAATGCAAGATCAGAATACGTCACCCAAAACAAT
+AAAAAGTGCCTTATTTTATTTAGAAAAGACAACACATCCTTAGATTTTAAAATCAAAAAG
+TCCATATCAAGATACACTTACAACCTCAAAACGGATAGAGAAGAAAATGCAGTTTTCTAT
+CGAAATAATTTAAATCTTTTGATTTTTCATCTGAAACATACACTGGAGATACAATCAAAT
+CCAAATTCGTCCTGCAAGTGGTCATTGGACTTTGCAGAAGATTTTGATGGGATGAAACGG
+AGGCTTTTGCCTGCTTGGGAACCAAAATATGAACCACTCATTAACGAGGAAGATGCTAAT
+CAAGATACTATAACAGGTGGTAACAGACAAAGGAGAGAAAGTGGAAGCATTTTATCCTAC
+GAATTTATCGAACATATGGAGACTCTTGAGTCGGAGCCAGTTGGAGATTTGAATGAGAAT
+AGAAAAATTCTTAGACTTTTGAAGGATAACGATTCTATTGCAACTATTTGGAATTGCAGT
+TTGATTATTGGATTAGAAATTAAGGAGGGGATTTTAATTCATGGCAGTAATTACCTTTAC
+TTTGTAAGTGATTACTATTTTAGTTTAGAGGATAAAAAGATTCTAAAATTATCAGAAGTA
+TCGCAAGAATCACGGGATATGACGGTTAGCTTAATTAACGGCCCTGATGTTAAAAGGGTA
+TCAACTTTCCTAAAGCACGAAGTCTTTGTTTGGAAACTTCTCGATATCACTTTCGTTACC
+AAACGACCCTTTCTACTTCGGGATGTCGCCATCGAATTATTGTTCAAAGAGAGAGTTAGC
+GCTTTTTTTAGTTTTTACAACAAAAGAGTGAGAGATGACGTTTTACGGGTACTGAATAAG
+ATCCCGAAGCACCTTCCAGCAGATCCAATTTTTTCAAGCGTTTTACAAGAAATAAACGAC
+CGAGGAAATAGTATAGTGGCAAGAAATGGAATAGGAAAGGCAAGCATTGCTTCCAAATTC
+ACTAGCGTCTTCTCAGCGAACAACAGCCTAATAGATGGATTTGAGATCAGCAAAAAATGG
+GTTAGGGGAGAGATTTCTAATTTTTATTACCTGTTGAGTATCAACATCCTAGCGGGAAGG
+TCATTCAACGATTTGACCCAATATCCAGTGTTTCCGTGGGTTATTGCAGATTACGAAAGT
+AACGTACTCGATTTAGAGAATCCTAAAACTTACCGGGACCTATCGAAACCTATGGGCGCT
+CAAAGTGAGAAAAGGAAATTACAGTTTATAGAGCGTTATGAAGCTTTGGCTTCCCTGGAA
+AATGCTGATTCCGCACCATTTCATTATGGCACGCATTATTCCTCAGCTATGATAGTATCT
+TCATATCTGATAAGGCTGAAGCCCTTTGTCGAATCCTTTTTGTTATTGCAAGGCGGAAGT
+TTTGGCCCTGCAGATCGTTTATTTAGTTCGCTTGAAAGGGCCTGGAGCTCTGCTTCTTCT
+GAAAATACAACGGATGTCAGGGAATTGACACCTGAATTTTTTTTTCTACCTGAATTTTTG
+ATCAACGTTAATAGTTATGACTTTGGTACAGACCAAAGCGGTAAAAAAGTTGACGACGTC
+GTACTTCCACCCTGGGCAAATGGTGACCCAAAGGTTTTCATTCAAAAGAATAGAGAAGCT
+TTAGAAAGTCCTTATGTATCAGCACATTTACATGAATGGATTGATTTGATATTTGGTTAC
+AAACAAAAGGGGGAAATTGCTGTGAAATCTGTTAACGTATTCAACAGATTGAGTTACCCA
+GGCGCTGTAAATCTAGATAATATTGACGATGAAAATGAGCGCAGAGCTATCACAGGCATT
+ATTCACAACTTTGGTCAAACGCCTTTACAAATATTTCAGGAACCTCATCCGGAAAAAATA
+GCCTGCAATGTTCAACAGCTAACAACAGAGGTATGGCGTAAGGTTCCAATGAAGCCAATA
+TTTGAGAAGACAATCTTTAATTTGAATGAAAAGAACAGGTCTGTCGATTATGTTATACAC
+GATCCTAGTTACTTCGATTCATTATACTGGAGGGGCTTCGCTTTCCCAAACTTGTTTTTC
+AGAACGGAAGAATCGTTAGTGTCATTGAGAATTGTGCATAAAAATTGGTTAAAAATTGGA
+CTAGATATTTTTAAAAAGACGCATATGGCTCAGATTACATCGTTTGCGTACTGGAAGTTG
+GGCGAATTCATAACTGGTGATAAAAATGGGCTGATAAAAGTTTGGAAATATCGTAAAGAT
+AAGCATTCGGTTTCAGGTAACCTTGAGAACAAAAAAACAATGTTTGGGCACCTATGCGAG
+CTAAAGGAAATGCGCTGTTATCACGACTACAATACGCTTTTAACCTTAGACATCAGCGGC
+TTAGTATATGTCTGGGACATGATTAATTTCGAACTAGTGAGACAAATAACAAATGATGCG
+CAAAAGGTCGCAATATCTCAACATGCAGGGAGCATTATGGTATTGACTAAGAATAACGCC
+ATTTCGATCTTCAATCTAAATGGACAAATATATACATCAAAGAAATTCGAACCAGCTAAA
+ATTGTAAGCTCAATTGATTTTTTTGACTTCACTAAGTTAGACGCAGGTTACAGAAAGCAT
+ATCTATTGGAAAGAGATGGAAATACTACTAGTGGGCTTTGAAGATGGAACTATAGAAATT
+TACGAGCTCTTTTTGACTTTTCATAATGAATGGGCGATAAAGCTACTGAAACAGCTCTGT
+ACCGAAAGAGGGAAAGCCATAACTAGCATTAAGGGACAGGGGAAGACATACCTGTCCCAG
+AAAAGACGCAAGGATACAGCAGAGCCTCATGAGATAGAAGTGATTGCGGGAACATTAGAT
+GGCAGATTAGCTATTTGGTACTAG
+>YCS3 3681 residues Pha 0 Code 0
+ATGGGGTATCCGCCACCTACACGAAGGCTTGGAGATAAGAAAAGGTACCATTATTCCAAT
+AATCCTAACCGAAGGCATCCTTCCGCTGTTTATTCCAAGAATAGCTTTCCAAAATCAAGC
+AATAATGGATTTGTATCTTCTCCTACTGCCGATAATTCAACAAATCCGTCTGTAACTCCC
+AGTACTGCATCTGTACCTCTTCCTACAGCGGCACCTGGAAGCACGTTTGGTATCGAAGCA
+CCCAGGCCATCTCGATATGATCCGAGCTCAGTCAGTAGGCCTTCGTCATCATCTTATTCG
+TCAACAAGAAAAATTGGAAGCCGTTATAACCCAGATGTGGAAAGATCCTCTTCAACCACT
+AGTTCAACTCCGGAAAGTATGAATACGAGCACCATAACACACACCAATACGGATATCGGA
+AACTCACGCTATTCTCGAAAAACCATGAGCAGATATAATCCTCAATCTACTAGTTCTACA
+AACGTTACCCACTTTCCCTCGGCATTATCAAACGCTCCACCGTTTTATGTTGCCAACGGG
+AGTTCTCGGAGACCTCGATCAATGGATGATTATAGTCCTGATGTAACGAACAAGCTCGAA
+ACAAATAATGTTTCATCTGTTAATAATAACAGCCCTCATTCTTATTACTCTAGGAGCAAC
+AAATGGAGATCCATTGGAACGCCTTCCAGACCACCATTTGATAATCATGTCGGCAATATG
+ACGACCACCAGCAATACTAACTCGATCCATCAAAGGGAACCTTTTTGGAAAGCAAATAGT
+ACTACTATTTTAAAATCAACTCATTCACAGTCATCGCCTTCCCTTCATACTAAAAAATTT
+CACGATGCGAATAAATTGGACAAACCAGAGGCTTCAGTTAAAGTTGAAACACCCAGTAAA
+GATGAGACAAAAACCATATCGTACCATGATAACAATTTTCCACCAAGAAAATCAGTTTCT
+AAACCTAATGCACCTTTAGAACCCGATAATATCAAGGTTGGCGAAGAAGATGCATTGGGG
+AAAAAAGAAGTACATAAAAGTGGGCGTGAGATAGCAAAGGAACATCCTACTCCTGTAAAA
+ATGAAAGAGCATGATGAACTAGAAGCTCGCGCTAAAAAAGTAAATAAAATCAATATTGAT
+GGAAAGCAGGACGAAATTTGGACGACAGCAAAAACAGTGGCCAGTGCAGTCGAAGTTTCC
+AAAGAAAGTCATAAGGAACTAACACGCTCTGTTGAAAGGAAGGAAAGTCCAGAAATTAGA
+GATTATGAAAGAGCATACGATCCGAAAGCCCTGAAAACAGACGCAACAAAGTTGACAGTA
+GACGATGATAATAAAAGTTACGAAGAACCTCTTGAAAAAGTGGAAGGGTGTATTTTCCCA
+TTACCAAAAGCAGAAACGAGATTATGGGAATTGAAAAACCAGAAAAGAAACAAAATAATA
+AGTAAACAAAAGTACTTACTGAAAAAGGCAATTAGGAATTTCTCAGAGTATCCTTTTTAC
+GCACAGAACAAACTTATACATCAGCAGGCTACCGGACTTATCTTGACGAAAATTATATCA
+AAGATAAAAAAGGAGGAACATTTGAAAAAAATAAATTTAAAACATGATTATTTCGATCTC
+CAGAAGAAGTATGAAAAAGAATGCGAAATTTTGACTAAACTGAGTGAAAATTTAAGGAAG
+GAAGAAATCGAAAATAAACGTAAAGAGCACGAATTAATGGAGCAGAAAAGACGTGAAGAA
+GGTATCGAAACAGAAAAAGAAAAAAGCTTACGGCATCCATCCTCGTCTTCCTCATCTCGT
+CGCAGAAATAGGGCTGACTTCGTTGATGATGCGGAAATGGAAAATGTATTGCTACAAATC
+GACCCAAATTATAAACATTATCAGGCTGCTGCAACAATTCCTCCGCTAATTTTAGATCCA
+ATCCGCAAATACTCTTACAAATTCTGTGATGTAAATAACTTGGTTACAGACAAAAAGCTT
+TGGGCGTCTAGAATATTGAAAGACGCCTCTGACAACTTTACTGACCATGAGCACTCTTTA
+TTTTTGGAGGGTTATTTAATTCATCCTAAAAAATTCGGTAAAATTTCTCACTACATGGGC
+GGCTTAAGAAGTCCTGAAGAGTGTGTCCTACATTATTATAGAACAAAGAAAACTGTGAAT
+TATAAACAACTTCTTATCGATAAGAACAAGAAAAGAAAAATGTCAGCCGCTGCGAAGCGC
+CGCAAGAGGAAGGAAAGAAGTAATGACGAGGAAGTCGAAGTTGATGAGAGTAAAGAAGAG
+TCAACGAACACGATAGATAAGGAAGAAAAAAGTGAGAACAATGCCGAGGAAAATGTTCAG
+CCGGTTCTAGTTCAAGGTTCTGAAGTGAAAGGTGATCCATTAGGTACACCGGAAAAAGTT
+GAAAATATGATTGAAAAGAGAGGCGAAGAGTTTGCAGGTGAATTGGAAAATGCTGAGAGG
+GTAAATGACTTAAAAAGGGCGCATGATGAAATTGGAGAAGAGAGCAATAAGTCCAGTGTA
+ATAGAAACCAACAATGAGGTACAAATAATGGCTCCAAAAGGAGGTGTTCGGAATGGTTAT
+TATCCAGAGGAGACCAAAGAACTTGACTTCAGTTTAGAGAATGCGTTACAGAGAAAGAAA
+CACAAATCTGCACCAGAGCATAAAACAAGTTATTGGAGTGTTCGTGAATCTCAACTCTTT
+CCAGAATTGTTGAAGGAGTTTGGCTCTCAATGGTCTCTCATATCAGAAAAACTGGGTACC
+AAATCTACTACAATGGTAAGGAATTACTACCAAAGAAATGCAGCTCGCAATGGATGGAAA
+TTACTGGTTGATGAAACCGACTTAAAGCGAGATGGGACTAGTTCAGAATCTGTACAACAA
+TCTCAAATTTTGATACAACCAGAACGACCAAACATCAATGCCTATAGTAATATTCCTCCT
+CAACAAAGACCGGCTTTGGGTTATTTTGTTGGACAACCAACTCATGGGCATAATACATCT
+ATTTCATCTATCGATGGCTCTATAAGACCATTTGGGCCTGATTTTCATCGTGATACCTTT
+TCTAAAATTAGTGCTCCTTTAACCACTTTACCACCACCAAGACTACCATCTATTCAGTTT
+CCTCGTTCAGAAATGGCAGAACCTACAGTGACAGATTTGCGTAACAGGCCCTTAGACCAT
+ATTGACACGTTGGCTGATGCAGCTTCGTCAGTAACAAATAATCAAAACTTCAGTAATGAA
+AGGAATGCAATTGACATTGGCCGTAAATCGACGACAATCAGCAATCTATTGAATAATTCG
+GATCGAAGCATGAAATCTTCTTTCCAAAGCGCTTCAAGACACGAAGCACAGCTCGAAGAC
+ACTCCCAGCATGAACAATATTGTAGTACAAGAAATAAAACCGAATATTACTACGCCAAGA
+TCGAGTTCTATTTCTGCATTACTAAATCCTGTAAATGGGAATGGGCAATCAAACCCAGAT
+GGAAGGCCGTTGCTGCCATTTCAGCATGCTATTTCTCAAGGCACTCCTACTTTCCCTTTA
+CCGGCCCCTCGCACTAGTCCAATAAGTCGTGCGCCTCCAAAGTTCAATTTTTCGAATGAT
+CCGTTGGCAGCTTTGGCTGCGGTTGCCTCCGCGCCAGATGCAATGAGCAGTTTTTTATCT
+AAAAAGGAAAATAATAATTGA
+>GNS1 1044 residues Pha 0 Code 0
+ATGAATTCACTCGTTACTCAATATGCTGCTCCGTTGTTCGAGCGTTATCCCCAACTTCAT
+GACTATTTACCAACTTTGGAGCGACCATTTTTTAATATTTCGTTGTGGGAACATTTCGAT
+GATGTCGTCACTCGTGTAACTAACGGTAGATTTGTTCCAAGCGAATTCCAATTCATTGCA
+GGTGAATTACCATTAAGCACTTTGCCCCCTGTGCTATACGCCATCACTGCCTATTACGTT
+ATTATTTTTGGTGGCAGGTTTTTGTTAAGTAAGTCGAAACCATTTAAATTAAATGGCCTT
+TTCCAATTGCATAATTTGGTTTTAACTTCACTTTCATTGACGCTTTTATTGCTTATGGTT
+GAACAATTAGTGCCAATTATTGTTCAGCACGGGTTATACTTCGCTATCTGTAATATTGGT
+GCTTGGACTCAACCGCTCGTTACATTATATTACATGAATTACATTGTCAAGTTTATTGAA
+TTTATAGACACCTTTTTCTTGGTGCTAAAACATAAAAAATTGACATTTTTGCATACTTAT
+CACCATGGCGCTACTGCCTTATTATGTTACACCCAATTGATGGGCACCACATCTATTTCT
+TGGGTCCCTATTTCATTGAACCTTGGTGTTCACGTGGTTATGTATTGGTACTATTTCTTG
+GCTGCCAGAGGCATCAGGGTCTGGTGGAAGGAATGGGTTACCAGATTTCAAATTATCCAA
+TTTGTTTTGGATATCGGTTTCATATATTTTGCTGTCTACCAAAAAGCAGTTCACTTGTAT
+TTCCCAATTTTGCCACATTGTGGTGACTGTGTGGGTTCAACAACTGCCACCTTTGCAGGT
+TGTGCCATTATTTCTTCATATTTGGTACTATTTATTTCATTTTACATTAACGTTTATAAA
+CGTAAAGGCACCAAAACCAGTAGAGTGGTAAAGCGTGCCCACGGCGGTGTTGCCGCAAAG
+GTTAATGAGTATGTTAACGTTGACTTGAAAAACGTTCCTACTCCATCTCCATCACCAAAA
+CCTCAACACAGAAGAAAAAGGTAA
+>RBK1 1002 residues Pha 0 Code 0
+ATGGGTATTACAGTAATAGGTTCTCTAAACTATGATTTGGACACATTTACGGATAGATTA
+CCTAACGCTGGAGAAACTTTCAGGGCTAACCACTTCGAAACACATGCTGGTGGTAAGGGA
+TTGAACCAAGCTGCGGCCATTGGTAAATTAAAAAACCCCAGCAGCAGATATAGTGTTCGA
+ATGATTGGTAATGTTGGAAATGATACATTTGGTAAACAATTGAAGGACACTTTATCCGAT
+TGCGGAGTCGATATCACTCACGTCGGTACTTACGAAGGCATTAATACGGGTACCGCTACC
+ATATTAATTGAAGAGAAAGCTGGTGGCCAAAATAGGATATTGATTGTAGAAGGTGCTAAC
+AGCAAGACTATTTATGACCCGAAACAGTTGTGTGAAATTTTTCCAGAGGGCAAGGAGGAA
+GAAGAGTATGTTGTTTTTCAACACGAAATTCCTGATCCTCTTTCCATTATTAAATGGATA
+CATGCGAACAGGCCGAATTTTCAGATCGTATATAACCCCTCACCTTTCAAGACCATGCCT
+AAGAAAGATTGGGAGTTGGTAGACCTTTTGGTCGTTAATGAAATTGAGGGTCTTCAAATC
+GTGGAAAGTGTATTTGATAATGAACTTGTTGAAGAAATAAGGGAGAAGATAAAGGACGAC
+TTTTTAGGAGAATATCGTAAAATTTGTGAGCTTTTGTATGAAAAACTCATGAATCGAAAG
+AAAAGAGGAATTGTGGTTATGACTTTGGGTTCGAGAGGGGTGCTTTTCTGTTCGCACGAA
+AGCCCTGAAGTACAATTCCTTCCGGCTATTCAAAATGTTTCGGTTGTTGATACTACAGGA
+GCTGGAGATACTTTCCTGGGCGGTTTGGTTACTCAATTGTATCAAGGAGAGACCTTGTCT
+ATGGCTATAAAGTTCTCTACATTAGCTAGTTCATTGACCATTCAAAGAAAAGGTGCTGCT
+GAAAGCATGCCACTGTATAAAGATGTTCAGAAAGATGCATAA
+>PHO87 2772 residues Pha 0 Code 0
+ATGAGATTCTCACACTTTCTCAAATACAACGCTGTCCCTGAATGGCAGAATCATTACCTA
+GATTATAACGAATTGAAAAATTTGATCTACACATTACAGACAGATGAATTGAAACAAGAA
+ACGCCAACCGGTGACTTAAACGATGACGCTGACTCTCAGACTCCAGGTCCAATCGCTGAT
+ATAGAAAGCAACATAGCTGCAGGAGAACCATCTCCATCGAAAAGAAGATTTACACATAAA
+CTCAAGCGTAAGCTCTTTGGTTCTAAAACACCTTCAGGAAGCAAAAGGGGAGACTCCGAC
+GAAAAGGCCATAGATGGGAACAATATTAACGAGGAAACAATTGAGTTAGACGAGTTATCT
+CCTCAAGGGAAAACCACCTCTTTCAATAAGAATTTTATACGTAAGAAATTCTTTGAATCA
+CGCAGCTCATCTGTGAGTAGCGAGGGAAAGACGCTCTTCAGTTCTTATGATACATTCGTA
+ACTAACCTGAGCGACGAGAAATTGAAAGTAGATGATTTCTACAAAAGAATGGAAGCTAAG
+TTCTATGAAAGATTTGACCACTTGATTAATGATTTGGAGAAGGAAGGCATTGTAACAAGA
+TTGAATGAAACTTTCAATCCTGAAATTCAAGCATTGCCTCCTTTAAGAGAAATTATTTCT
+GGTACATCAGAGACACATTCATCTAATAACCCATTTGAAATACACTCTTCAAACATCGAC
+AGTGAATTGAGAAATAGGTTTGATTACAGCGAAGAAGAAATGGATGAAGATGATGACGTT
+GACGTGTTTGCTGACACTACCGACAATACCGCCCTCTTGAATTATTCGCAATTTAACATT
+AAATCTCAGAAAAAATCATTATTAAAACAGACAATAATAAATCTTTACATAGACCTTTGC
+CAGTTGAAATCTTTTATCGAATTGAACAGAATGGGTTTCAGTAAAATTACTAAGAAGTCT
+GATAAAGTATTGCACATGAACACTAGGCAAGAATTAATAGAAAGTGAAGAATTTTTCAAA
+GACACCTACATCTTCCAGCATGAAACTTTAAGCAGTTTAAACAGTAAAATTGCACAACTT
+ATTGAATTTTATGCTGTTCTCATGGGTCAGCCTGGGAACGTAGATTCATGCAAGCAAGAG
+TTAAAGTCGTACCTGCACGACCACATTGTTTGGGAAAGAAGCAACACATGGAAAGACATG
+TTGGGCCTCTCTTCGCAAAATAACGATATAATAACTATTGAAGATGAAGCTGAGAAACTT
+ATGCAAGAAAAGCTTCAAATTGAATATTTCAAGTATCCATTGCCTAAGCCAATTAATTTG
+AAGTTTACTAAAATTGAAAATTTGGCAGTTCCTAAGCTATTTTTTGGGAAAAGAGCAATG
+AAAATAGGCTTCATTATCATTGTCACAGGTGTTTTGTTGGGTGTTAAAACTTTCAATGAC
+CCTGTCGAACACCGGTGTATGGCATTGGTAGAATGCTGTGCTTTCTTATGGGCTAGTGAA
+GCCATTCCATTACACATCACAGGTTTATTGGTTCCCCTTCTAACTGTCCTTTTTAGGGTA
+CTAAAAGACGATGACGGTAAGGTAATGGGAGCAGCAGCTGCCTCTACAGAAATCTTAGGT
+ACAATGTGGTCGTCAACAATTATGATTTTATTAGCAGGTTTCACATTGGGTGAAGCCTTG
+TCGCAATATAACGTTGCGAAAGTTTTGGCATCGTGGTTATTGGCCCTTGCAGGTACCAAG
+CCAAGAAATGTCCTTTTAATGGCAATGAGTGTTGTATTCTTTCTTTCGATGTGGATTTCC
+AACGTTGCCTCCCCAGTATTGACATATTCTCTATTAACACCCTTACTAGATCCGCTGGAC
+TACACTTCACCGTTTGCTAAGGCATTAGTCATGGGTGTTGCACTTTCGGCAGATATTGGT
+GGTATGGCTTCACCTATTTCTTCGCCACAGAATATCATCTCCATGCAGTACTTAAAACCT
+TATGGAATCGGCTGGGGGCAATTTTTTGCTGTCGCTCTGCCTACAGGTATTCTATCGATG
+CTGTGCTCCTGGGCCTTGATGATACTCACCTTTAAAATAGGCAAAACTAAACTGGAAAAA
+TTTAAACCAATAAGGACCAGATTTACTATAAAGCAATATTTTATCATCATTGTAACTATT
+GCTACTATTCTTCTATGGTGTGTAGAGTCACAAATAGAAAGTGCTTTTGGATCGTCCGGT
+GAAATTGCAGTAATACCGATAGTCCTGTTTTTTGGTACAGGTCTACTATCAACAAAGGAT
+TTCAACACATTCCCTTGGTCAATTGTTGTTCTTGCTATGGGTGGTATAGCCCTTGGTAAG
+GCAGTTTCATCTTCAGGCTTGTTGGTAACTATTGCAAGAGCATTACAAAAGAAAATTCAG
+AACGATGGTGTTTTTGCTATCTTATGTATTTTCGGTATTTTAATGTTAGTTGTGGGCACT
+TTTGTCTCACATACTGTGTCAGCAATCATCATTATTCCCTTGGTGCAAGAAGTTGGTGAC
+AAATTATCCGATCCAAAGGCAGCTCCAATTCTTGTGTTCGGTTGCGCCTTGTTAGCCTCA
+TGCGGTATGGGGTTGGCTTCATCTGGATTTCCAAACGTTACTGCTATTTCTATGACCGAT
+AAAAAGGGTAATAGATGGCTAACTGTAGGCGCTTTTATCTCCAGAGGTGTTCCTGCTTCG
+TTGTTAGCGTTTGTCTGCGTAATTACTCTCGGTTATGGTATTAGTTCTTCCGTCTTAAAA
+GGTAGCACTTAA
+>BUD5 1617 residues Pha 0 Code 0
+ATGAGAACGGCCGTACCGCAGTTGCTGGAAGCAACTGCCTGTGTCTCTAGAGAATGCCCC
+CTCGTCAAAAGAAGTCAGGACATAAAAAGAGCAAGAAAACGTCTACTCAGTGACTGGTAT
+AGGCTCGGCGCTGATGCAAACATGGATGCCGTATTATTAGTTGTTAACTCCGCCTGGAGG
+TTTCTGGCCGTCTGGCGACCCTTCGTAAACTCAATCCAACATGCAACTCAGGAATTGTAT
+CAAAATATCGCCCATTACCTTCTTCATGGCAACGTAAATATACAGAGGGTCACAGCACTA
+ATACAGCTCGTAATGGGACAGGACGATTTACTTTTTAGTATGGATGATGTTCTACAAGAG
+GTCTTCAGAATACAGCTCTATTTGAATAAGATGCTGCCGCACAACTCTCACAAATGGCAA
+AAGCCATCCCCCTTTGACTCCGCAAACTTACTACTTAACTTCAGAGACTGGACAACTGAC
+AATGCTCTCCTCCAAGAGTTGCTACTATCCTATCCCACAATTAATAAAAACAAACACAAA
+AATCACTCCGTCCCTCGTCTAATACAAATCTGGGTAGAGTCTTATTGGCAAGATAGTGAG
+ACAACATTAAAAGATATCCTCAATTTTTGGTACAGTCACTTGGCTGAATATTATGAATAC
+CAAGAACTGTTTGCAGACATAGTTCAGCTGTTTATAAACAAAAAAAGAACGAGGCAATTG
+AAGATTCATTACATTGGTCTAACTGATAAGGAAATCGAAGAAAATAAACCGCCCCTGGAC
+TACGAAAACTTATTTCTCCAATACGAGATAGACAAAACGAACGCAAATGATGAATTGTGC
+GGTGCAACTGACCTCAGTGATTTACTTTTCCAATGGAAACAGGGTGAACCTCTAGAAGTC
+GAAGCCTTCGCTCTAAACGTATCTCCATGGTCACTTGCAAAGACATTGACTCTCTTAGAA
+TCTTCTCTTTACTTGGATATTGAAACAATAGAATTCACAAGACATTTCAAACACAACGAT
+ACAACAATTGACTCCGTGTTTACGCTTTCCAACCAGTTATCGTCCTACGTTCTTGAGACA
+ACTTTGCAGCAAACGCACACCATTTCCTACTGGTTACAAGTTGCACTTGCTTGTCTATAC
+TTACGAAACTTAAACTCACTTGCTTCAATCATTACATCATTGCAAAATCATTCAATAGAA
+AGACTATCTCTCCCGATAGATGTTAAATCAGACCACCTTTTTCAGCGCCTAAAAGTCGTC
+GTACATCCAAACAACAACTACAACGTTTATAGAAGAACAATTAAACATATTTTCCACAGT
+CAGCTTCCTTGTGTACCTTTTACATCACTGCTTATCAGGGACATTACCTTCATAAGAGAC
+GGAAACGATACATTCACTAAAGATGGTAATAACGTGAATATGCAAAAGTTCAACCAAATC
+ACAAAGATAGTCGCTTTTGCGCAATATTTACAACAAAAGCAATATGAAGATATACACTGT
+TCAAATACTACTGCAAGAAGCTTATTAGGGGCTATGATAAAGGTGCACACTTTATATAAC
+GACAACAAAGACAGGGCGTATCAAGTCAGTATAGCTAAGGTTCCAAGGCTTACCTAA
+>MATALPHA2 633 residues Pha 0 Code 0
+ATGAATAAAATACCCATTAAAGACCTTTTAAATCCACAAATCACAGATGAGTTTAAATCC
+AGCATACTAGACATAAATAAAAAGCTCTTTTCTATTTGCTGTAATTTACCTAAGTTACCA
+GAGAGTGTAACAACAGAAGAAGAAGTTGAATTAAGGGATATATTAGGATTCTTATCTAGG
+GCCAACAAAAACCGTAAGATTAGTGATGAGGAGAAGAAGTTGTTGCAAACAACATCTCAA
+CTCACTACTACCATTACTGTATTACTCAAAGAAATGCGCAGCATAGAAAACGATAGAAGT
+AATTATCAACTTACACAGAAAAATAAATCGGCGGATGGGTTGGTATTTAATGTGGTAACT
+CAAGATATGATAAACAAAAGTACTAAACCTTACAGAGGACACCGGTTTACAAAAGAAAAT
+GTCCGAATACTAGAAAGTTGGTTTGCAAAGAACATCGAGAACCCATATCTAGATACCAAG
+GGCCTAGAGAATCTAATGAAGAATACCAGTTTATCTCGCATTCAAATCAAAAACTGGGTT
+TCGAATAGAAGAAGAAAAGAAAAAACAATAACAATCGCTCCAGAATTAGCGGACCTCTTG
+AGCGGTGAGCCTCTGGCAAAGAAGAAAGAATGA
+>MATALPHA1 528 residues Pha 0 Code 0
+ATGTTTACTTCGAAGCCTGCTTTCAAAATTAAGAACAAAGCATCCAAATCATACAGAAAC
+ACAGCGGTTTCAAAAAAGCTGAAAGAAAAACGTCTAGCTGAGCATGTGAGGCCAAGCTGC
+TTCAATATTATTCGACCACTCAAGAAAGATATCCAGATTCCTGTTCCTTCCTCTCGATTT
+TTAAATAAAATCCAAATTCACAGGATAGCGTCTGGAAGTCAAAATACTCAGTTTCGACAG
+TTCAATAAGACATCTATAAAATCTTCAAAGAAATATTTAAACTCATTTATGGCTTTTAGA
+GCATATTACTCACAGTTTGGCTCCGGTGTAAAACAAAATGTCTTGTCTTCTCTGCTCGCT
+GAAGAATGGCACGCGGACAAAATGCAGCACGGAATATGGGACTACTTCGCGCAACAGTAT
+AATTTTATAAACCCTGGTTTTGGTTTTGTAGAGTGGTTGACGAATAATTATGCTGAAGTA
+CGTGGTGACGGATATTGGGAAGATGTGTTTGTACATTTGGCCTTATAG
+>TSM1 4224 residues Pha 0 Code 0
+ATGATGTCCTTTTCCAAAAACGCCACTCCTAGAGCCATTGTTAGTGAATCTAGCACTTTG
+CATGAGATGAAGTTTAGAAATTTTAGAGTTGCCCATGAAAAAATCTCGTTGGATATAGAT
+CTAGCTACTCACTGCATTACCGGTAGCGCTACTATAATAATCATTCCGTTGATCCAAAAC
+CTAGAATATGTAACTTTTGATTGCAAGGAAATGACTATTAAAGATGTTCTGGTCGAAAAT
+CGTCGATGTGATCAATTTATTCATGACGACCCACTTCAAACAAATTTGAATGGATTGACT
+TCACAAAATGTATTATACAGCGACAATTCCATTGAACAGTCACATTTTTTGAGATCTAAG
+TTTGCTAGCTTGAATGAATACCCAGAAACGGACTCTAAATCCCAGTTAACTATAAAAATA
+CCATCTTCCATCAAAATATCTTTGGAGGACGCCAATGCATTAAGTAATTACACTCCGATT
+ACTCCTTCAATTAAGACTACCCCTGGGTTTCAAGAATCTGTTTTCACTCCAATTACATTA
+CAAATTGAATATGAAATCAGAAACCCAAAGTCGGGTATTAAATTCGATACTGTGTATGCT
+GACAAGCCCTGGTTATGGAACGTTTACACTTCAAATGGTGAGATTTGCAGTTCTGCATCA
+TATTGGGTCCCATGTGTCGATTTGCTTGATGAAAAATCTACATGGGAGTTAGAATTCAGC
+GTACCGAGATTGGTTAAAAATATAGGTACTTCGAAATTAATCGGACAAAATGGAGAAGAG
+AGTGAAAAAGAGAAGGAGGATACGCCTGAGCACGATGAAGAGGAAGAGGGGAAGCCGGCA
+AGAGTTATCAAAGACGAAGATAAGGATTCTAACTTGAAAAATGACGAAGAAGGCAAAAAT
+AGTAAAAGCAAAGATGCACAAGATAATGATGAAGAAGAAGAGGAAGGCGAAAGTGACGAA
+GAGGAAGAGGAAGGGGAAGAGGAAAGGCGGAATATTGAGGAAAGCAACAATCCGAGTTTG
+AGGGATGTGATTGTGTGTTGTTCAGAATATTCAAATATTAAAGAACTTCCGCACCCGATT
+GATTTGACGAAAAAAAAATGCATATTTCAGATAATTAATCCTGTGGCTCCACATCACATT
+GGTTGGGCTATAGGCGCCTTTAATTCATGGTCTTTACCTTTGATATCACCTCCAAGTGTT
+GATGCCGAGGACGAAGTAGAGGAAGACAAGTTGAGAGAGAATGTTGTGGACAATGTTAAC
+GATACTATGGATGACGACATTGGTTCGGATATTATACCCATTCAAATTTTCACACTTCCG
+ACGCAGGAAACAGATGAGTTAACAGTTATAAATTCGACAGTTGTCTGCCAAAAAATTATA
+GATTTCTACTCGAAAGAATTTGGGTCTTATCCTTTCACTTGTTACTCTATGGTGTTTTTA
+CCTACCGCACCTTCTAAGCATATGGATTTTGCAGCATTAGGCATTTGTAATACCAGATTA
+TTGTACCCTCTAGAAGTTATTGATAAAGCATTCAGTACTACGAATGAGTTAGCATGGGCA
+CTTGCTAACCAATGGTCTTGTGTGAATATAACTCCTTTAGATATGAACGACTACTGGTGC
+TGTCTTGGTATTGCTGGTTATATGGTGTTTCAGGTAACCAAAAAATTAATGGGTAATAAC
+ACGTATAAATATCAATTAAAGCGTAATAGTGAGGCGATTGTGGAACAAGACTTCGAGAAA
+CCGCCTATTGGGAGCACTTTTACCGGCAGTTCTAGGCCAATATCTTGGTCTTCTAAAGAT
+TTGTCCTTTATACAATTGAAGGCACCGATGATACTACACATACTTGACAGAAGGATGACT
+AAAACAGAACGATCTTTCGGTATGTCTCGAGTATTACCTAAAATTTTCCTTCAAGCTATG
+TCTGGTGATTTACCGAATAATTCGTTGACTTCATCGCATTTTCAACATGTTTGCGAAAGA
+GTTAATAAAAGTAAATTAGAGAATTTTTTCAACGAATGGGTATATGGGTCTGGGGTACCC
+ATATTACGTGTCACCCAAAGATTTAATAGGAAGAGGATGGTTATAGAACTGGGTATAAGG
+CAAGTTCAAGATGAAGAACTTGGCCACGAAAAAGTGGTAGGGGAGGAAGGATTTTTCAAA
+AGTGCACTAGACCACTTAGAACATCCAGATTTGAACCGAACCGAATGCTTCACGGGCTCG
+ATGACTATAAGGATCCATGAACACGATGGTACTCCGTATGAGCATATTGTGGAAATCAAA
+GATACATTCACAAAAATAGATATTCAGTACAATACAAAGTACAGAAGATTAAGGAAAAGA
+GGTGGTGGTGCAAATGATGAAAATGGTGTTGAAAACAATAATGAGGAGAAGCCTATTGTT
+GTGGATGTGAATTGTCTAGGAAATGTATACATGTCGCCCGAAGAGTGTTCCCGATTCAGT
+TTGACGGAATTTAATCGTACGTCTGAGAGTAATGAATTGCTTAAGCAAAACGAAGCATTT
+GAGTGGATACGCATAGACTCTGATCTGGAATGGATTTGCCAAATGCACATTAATCAGCCG
+GATTACATGTTTTCTTCTCAGTTGAGACAAGATGGGGACATAGAGGCCCAACTAGAAGCC
+ATACGATATTATGAGGACGTCGTTGTTAATGGTGGTGTGAAATCACTTGTTTATTCAAGT
+ATTTTGTTTAGAACGGCGATCGACGAGCGTTACTTTTTTGGCATAAGACTCGCGGCGTGC
+GAAGCGCTTAGTAAATACGTATATGATCCGGATTTTACTGGCGGTGTTAAGCATTTAATT
+CAGATTTTTCAGATTTTGTTTTGCCTAGAAGACTCTAATATTCCAAAGAGTAATAACTTT
+GAGAATCCTAAGTTGTATTTCTTACAGTGTAATATTCCCAAATATTTGGCTAAAGTGAAA
+AATGAAAATGGTAAATGTCCAAAATTGGTGAAGCAATTTTTACTGGATATTCTTGTTTAT
+AATGAGAATGGTGAAAATAAATACAGTGATGATGCGTACGTCCGCAGCTTGATTGAAAAT
+GTTGTTAAAGTTGCTTTAAATGAGTATAAAGATAAAGCATATATGGAAAAAGTTAAGACT
+CAGTTATTGAGGTACGAAAATTTGGTGAATTGGCTTTCATCATACGAGTCTTTGATTAAG
+ACTACTATCATGTATGCTAAGTACAAATTGCATAAAGTGGGTGCTTATGACTTTACGGAA
+TTGACAGGAATGATAATGCATACATTAACATTAGGTATAAATAACGGAGATATTTCCAGG
+GAAAGCTTTCAGAATGAGTTTTTAATGGTTTTGAAAATCATGCTTTTAGAAGGTGGTTTA
+AAAAACAAGGATGCCCTTGTTTTGTTTACTGAAATACTTTGCTTCCATGAGGATTCTTAT
+ATTAGGGATAAAAGTGTTGATGTGCTTTCTGAATGTGTAAATCTAGTTGTTATGGATGGT
+AGTTTGGATACCATAAGTGACGATATTAAGTCCTCCGTCCAATCTGTGCACAATGAAGTT
+AAAAATATAAAAAGTGAGGATGATATTGAGTTGTTTTTAAGTGGTCATTACGTCGATGAT
+ATGAAAATAAAAATAGAAAAGATTGGCCGTCAAAATATTAGTGGGTTAATACAAATATGC
+CGAGATATGTTTAAAGGGTATAGCCCTTTGAAGATATTACTCTGGGATGTTTTGAATTTA
+CCTGTTCTTAGCTTGTACCAGAGGAAGCAAATACATGATCTTGTTAGGGTGATGTACACC
+CTAATCAACAGTTTTGTAGTTAGATTGGAAACACCAAGGGAGAGAAGACTTGTGGCGAAG
+ATGAATAGTAATGAAGAAGGTAAACTTGATATTGTTATAAAGCGTGAAAGTATCCTAAAA
+GTACATATTAAAAAGGAAGTAACCTCTACTGTGGAGGCACCCAAGAAGGCGAATAAGATA
+AAGATAAGTTTGAAAGGTGATAAACCTGTTAGAAAAGTGGAAAAACAAATTGTGAAGCCG
+AAGGTAACTAGCAAACAAAGGAAAGTCAAAAGTCATGTGAACCGCATGGGCAGTTTACCT
+TTACGGTTTGTTAAGATCCAACAACAACCTAGAGTAATGGTGCATTTGTCATCCGTCCCG
+TATAGCCAATTCGTTCAAATTACAAAAGTCACATCAAGATCGTTTATGGTTAAGATAAGA
+ACAAAGAATGATGCTAAGAATTGA
+>YCT5 1476 residues Pha 0 Code 0
+ATGAAGCCACAGTGCATACTCATCTCTTTGCTGGTCAACCTCGCATACGCAGAGGAGTAT
+TTGGTGAGGTTCAAAAATCCCACAGCATTCCAACAATTCACTTCGAATTCCAACAGGTCA
+TGGAGACAGTTCATCGACAACAAAATTGAGAAGAAATTCTCCATCGGATCCTTCCGCGGC
+GTGACCATGAACCTGTCCAAGAACTTAGTGAACAAGCTGAAGAAAAGCCCACTGGTGGCT
+GATATTGTGCCCAACTTCAGGTTCGAAGCTTTTGAAGGCGACAGTGTAAATAGCGCCGAG
+TCGAGTTATACGTTTAACGCTACCGCCAAATACTCGTACGAAGACGTCGAGGAAGAGCAA
+AATATAACGTATCAACCAGACGCACCCCGTCACTTGGCCCGGATTTCCCGCCACTACCAA
+CTCCCATTCGACGTTGGGGACAAGGACCGCTACAAAAGCTGGTTCAATTACTACTATGAA
+CACGACTATCAAGGTCAAGACGTCAACGCCTATATCATGGATACGGGTATCTTCGCGGAC
+CATCCGGAATTCGAAGACAGAGTCATCCAGGGGATTGACTTGACCAAAGAAGGGTTTGGC
+GACCAGAATGGCCACGGAACGCACGTGGCGGGACTCGTAGGTTCCAAAACGTATGGAGCG
+GCAAAGAGGGTCAATCTTGTGGAGGTCAAAGTCTTGGGCAAAGACGGGTCTGGCGAGGCC
+AGTAACGTTCTTAGTGGTCTGGAGTTCATCGTGGAACATTGCACAAAGGTCAGTCGCCCA
+CAGGGTAAAAAATGCGTGGCCAATCTAAGTCTAGGGAGTTTCAGGAGCCCCATAATCAAC
+ATGGCAGTGGAGGGGGCCATTGAAGAAGGTATTGTATTTGTTGCCGCGGCGGGGAACTTC
+AATTTAGACGCCTACTGGGCCTCACCTGCGTCTGCAGAAAACGTTATCACCGTAGGGGCC
+TTTGATGACCACATTGACACGATTGCCAAGTTCAGCAATTGGGGGCCCTGTGTAAACATC
+TTTGCCCCAGGCGTGGAAATTGAGTCGCTATCTCATCTGAACTACAACGACACTTTAATT
+TTGTCAGGTACATCTATGTCGACGCCCATTGTCACCGGAGTTGCAGCGATCCTACTCTCG
+AAGGGAATTGAGCCTGAAATGATAGCACAGGAGATTGAGTATTTGTCCACGCGTAATGTT
+TTCCATAGAAGAACGTTGTTTTTCAAGCCTTCTACGCCAAACCAGATTCTTTACAACGGC
+GTCGATAAACTGGACGATCCATATGACGACGAAACGTTCCCTCGATTGAACATAGAGGCA
+ATTGCTAAGGAACTGGAGGAGTACAATGCCACTTTACAAACTCCTATGTCTGAGAATCTT
+CAATCTGGTTCAAAACTGTGGGGTTGGAATAACGATGTCACACTACCTCTTGGTGAGATT
+CGATTGAAGAGGCGTGATTTTATGAAAAATTTGTAG
+>PETCR46 510 residues Pha 0 Code 0
+ATGTGGAGCAGGAACGTCAGATTGCTTGGATCATGGACAAGGTCCTACATGGTCCCCGCC
+ACCAAGAGAAAAACCATCCCCGTGTACCCACCTGTGCAGCGCATAGCTTCGTCGCAGATT
+ATGAAGCAGGTGGCCCTCTCAGAAATAGAGTCTCTGGATCCCGGGGCCGTTAAGAGGAAG
+CTCATCAGTAAAAAGAACAAGGACCGCTTGAAGGCAGGCGACGTGGTCCGGATTGTGTAC
+GACTCGTCCAAGTGCTCGTACGACACCTTTGTTGGCTACATCCTTTCCATAGACCGCAAA
+CAACTGGTGCAAGACGCCTCGTTGCTGTTGCGGAACCAGATAGCCAAGACGGCCGTCGAG
+ATTAGAGTGCCATTGTTTTCGCCGCTGATCGAGAGAATCGACTTGCTAACCCCCCACGTC
+TCGAGCAGACAAAGAAACAAACACTACTACATCAGAGGTACAAGGTTGGATGTCGGCGAC
+CTCGAGGCAGGTCTAAGAAGAAAGAAATAG
+>YCT7 828 residues Pha 0 Code 0
+ATGTCACGTCCTGAGGAGTTGGCACCACCGGAGATTTTCTATAATGATAGCGAAGCACAC
+AAGTACACGGGTTCGACCAGAGTGCAGCATATCCAGGCGAAGATGACGCTGAGGGCGTTG
+GAGCTTTTGAATCTGCAGCCGTGCAGTTTCATTCTGGATATCGGGTGCGGGTCCGGACTG
+TCTGGGGAGATTTTGACGCAGGAGGGAGACCATGTGTGGTGTGGTTTGGATATATCGCCC
+AGCATGCTTGCGACCGGTCTTAGTAGAGAGCTGGAGGGCGACTTGATGTTGCAGGATATG
+GGCACCGGGATACCGTTCCGGGCGGGCTCGTTTGACGCGGCTATTAGTATCAGTGCGATC
+CAATGGCTGTGCAATGCGGACACTTCATACAACGATCCTAAACAGCGGTTGATGAGGTTT
+TTCAACACATTGTATGCTGCACTGAAGAAGGGAGGGAAATTTGTGGCCCAGTTCTACCCG
+AAAAACGACGACCAGGTGGACGACATACTGCAGTCTGCCAAGGTGGCAGGGTTCAGTGGC
+GGGCTTGTGGTGGACGACCCAGAGTCTAAAAAGAATAAGAAGTACTACCTTGTGTTGAGC
+AGTGGGGCCCCACCGCAGGGGGAGGAGCAGGTGAATTTGGACGGTGTGACCATGGACGAG
+GAGAACGTCAACTTGAAGAAACAACTGCGCCAGCGCTTGAAGGGAGGCAAAGACAAGGAG
+TCTGCCAAGAGTTTCATTCTAAGAAAGAAGGAGCTCATGAAAAGACGTGGGAGGAAAGTT
+GCGAAGGACTCCAAGTTCACCGGGAGGAAAAGAAGACACAGGTTCTAG
+>YCT9 447 residues Pha 0 Code 0
+ATGGCGCTGTCCAGGAGCGTGGGGCGAGGATCAAAACTCACGTCCCCAAAAAACGACACA
+TACTTGCTAGCATCCTTTCGGTGGAACCTCGACCGAGACTTGCTCTTCAGGTGTGAAAGG
+TACTTTTGCATGTGGGCGTCCACAGGGTACTCCTCCTCCTGCTCCTGCTTCCCTGCCACA
+CGTTCCGCCTCAGTCGACTCCACTCCTTCAGTCGACTCCACTGGCTCCACCAGCGACGTG
+GTAGACGACCGTGGCGAAACCTCCATGGACTCCTGTGGCAGGATCACGTTATCGTACGTG
+ACCGAATGCCGTTTGTTGGCTTCTGCGGAATTGAGTCTGCGGATCTTAAGAAACTCTTCG
+TCTTGCAACAAATCCTTAGTCTCCGTCATTCTTGCAATCTGTTTTGGCGCTCTTGCTGCA
+AGCCGTGCTGAACAACCACCTGCGTGA
+>ARE1 1833 residues Pha 0 Code 0
+ATGACGGAGACTAAGGATTTGTTGCAAGACGAAGAGTTTCTTAAGATCCGCAGACTCAAT
+TCCGCAGAAGCCAACAAACGGCATTCGGTCACGTACGATAACGTGATCCTGCCACAGGAG
+TCCATGGAGGTTTCGCCACGGTCGTCTACCACGTCGCTGGTGGAGCCAGTGGAGTCGACT
+GAAGGAGTGGAGTCGACTGAGGCGGAACGTGTGGCAGGGAAGCAGGAGCAGGAGGAGGAG
+TACCCTGTGGACGCCCACATGCAAAAGTACCTTTCACACCTGAAGAGCAAGTCTCGGTCG
+AGGTTCCACCGAAAGGATGCTAGCAAGTATGTGTCGTTTTTTGGGGACGTGAGTTTTGAT
+CCTCGCCCCACGCTCCTGGACAGCGCCATCAACGTGCCCTTCCAGACGACTTTCAAAGGT
+CCGGTGCTGGAGAAACAGCTCAAAAATTTACAGTTGACAAAGACCAAGACCAAGGCCACG
+GTGAAGACTACGGTGAAGACTACGGAGAAAACGGACAAGGCAGATGCCCCCCCAGGAGAA
+AAACTGGAGTCGAACTTTTCAGGGATCTACGTGTTCGCATGGATGTTCTTGGGCTGGATA
+GCCATCAGGTGCTGCACAGATTACTATGCGTCGTACGGCAGTGCATGGAATAAGCTGGAA
+ATCGTGCAGTACATGACAACGGACTTGTTCACGATCGCAATGTTGGACTTGGCAATGTTC
+CTGTGCACTTTCTTCGTGGTTTTCGTGCACTGGCTGGTGAAAAAGCGGATCATCAACTGG
+AAGTGGACTGGGTTCGTTGCAGTGAGCATCTTCGAGTTGGCTTTCATCCCCGTGACGTTC
+CCCATTTACGTCTACTACTTTGATTTCAACTGGGTCACGAGAATCTTCCTGTTCCTGCAC
+TCCGTGGTGTTTGTTATGAAGAGCCACTCGTTTGCCTTTTACAACGGGTATCTTTGGGAC
+ATAAAGCAGGAACTCGAGTACTCTTCCAAACAGTTGCAAAAATACAAGGAATCTTTGTCC
+CCAGAGACCCGCGAGATTCTGCAAAAAAGTTGCGACTTTTGCCTTTTCGAATTGAACTAC
+CAGACCAAGGATAACGACTTCCCCAACAACATCAGTTGCAGCAATTTCTTCATGTTCTGT
+TTGTTCCCCGTCCTCGTGTACCAGATCAACTACCCAAGAACGTCGCGCATCAGATGGAGG
+TATGTGTTGGAGAAGGTGTGCGCCATCATTGGCACCATCTTCCTCATGATGGTCACGGCA
+CAGTTCTTCATGCACCCGGTGGCCATGCGCTGTATCCAGTTCCACAACACGCCCACCTTC
+GGCGGCTGGATCCCCGCCACGCAAGAGTGGTTCCACCTGCTCTTCGACATGATTCCGGGC
+TTCACTGTTCTGTACATGCTCACGTTTTACATGATATGGGACGCTTTATTGAATTGCGTG
+GCGGAGTTGACCAGGTTTGCGGACAGATATTTCTACGGCGACTGGTGGAATTGCGTTTCG
+TTTGAAGAGTTTAGCAGAATCTGGAACGTCCCCGTTCACAAATTTTTACTAAGACACGTG
+TACCACAGCTCCATGGGCGCATTGCATTTGAGCAAGAGCCAAGCTACATTATTTACTTTT
+TTCTTGAGTGCCGTGTTCCACGAAATGGCCATGTTCGCCATTTTCAGAAGGGTTAGAGGA
+TATCTGTTCATGTTCCAACTGTCGCAGTTTGTGTGGACTGCTTTGAGCAACACCAAGTTT
+CTACGGGCAAGACCGCAGTTGTCCAACGTTGTCTTTTCGTTTGGTGTCTGTTCAGGGCCC
+AGTATCATTATGACGTTGTACCTGACCTTATGA
+>RSC6 1452 residues Pha 0 Code 0
+ATGGTAACACAGACCAATCCGGTCCCTGTTACATATCCAACGGATGCTTATATCCCCACG
+TATCTGCCCGATGATAAGGTCTCCAATCTGGCAGATTTGAAAAAATTGATAGAAATGGAT
+TCCAGACTAGATTTGTATCTGACAAGAAGGAGGCTGGATACGTCCATCAATTTACCTACA
+AACACCAAGACCAAGGACCATCCCCCCAATAAAGAGATGCTGAGGATTTACGTCTACAAC
+ACTACGGAAAGCAGCCCTCGCAGCGATTCTGGCACCCCAGCGGACTCAGGCAAGACTACA
+TGGACACTGAGAATAGAAGGTAAGCTTCTGCACGAGTCCGCAAACGGAAAGCACCCATTT
+AGTGAGTTTTTGGAAGGTGTCGCGGTCGACTTTAAAAGACTGAAACCGCTGGGCATGGGC
+AAGAAGAGGAAACGCGATTCGTCATTGAGCCTTCCTTTGAATCTGCAACAACCCGAATAC
+AATGATCAAGATAGCACCATGGGCGATAACGACAACGGCGAGGATGAGGACAGTGCAGAG
+GCAGAATCCAGGGAGGAAATTGTAGACGCACTGGAATGGAACTACGATGAAAACAACGTT
+GTGGAGTTTGATGGTATCGACATCAAGAGGCAAGGCAAGGATAATTTGCGATGCAGTATA
+ACCATCCAGTTGAGGGGTGTCGACGGTGGAAAAGTACAGTACTCGCCCAACTTAGCTACC
+TTGATAGGTATGCAAACGGGCTCCGTTAATGACGCGGTTTATTCGATCTACAAGTACATT
+TTGATCAACAATCTGTTTGTTACGGAACAAACAGAGGCTCAAGATGGTTCCAACGATGCC
+GAAGACAGCAGTAACGAGAATAACAATAAAAACGGTGCTGGTGACGATGATGGCGTCGAG
+GGAAGTACTCCAAAGGATAAGCCCGAATTGGGTGAAGTGAAGCTAGATTCACTCTTACAA
+AAGGTATTGGATACAAACGCCGCGCACCTCCCCTTGATGAATGTTGTGCAAACCGTGAAC
+AAACTGGTATCACCCCTACCGCCCATCATCCTAGATTATACAATTGATCTTTCCAAAGAT
+ACCACCTATGGTGCTACCACCTTGGATGTAGATGTGTCGCACATTCTCCACCAGCCTCAA
+CCCCAGCCAAATTTACAAAAAGAGGAAGAAACAGATGCTGAAGACACAGCAAAACTACGT
+GAAATCACAAAGCTTGCCTTGCAGTTGAACTCTAGTGCTCAAAAATACCAGTTTTTCCAC
+GAACTGTCTTTGCATCCAAGAGAAACGCTGACTCACTACTTATGGTCTTCCAAGCAAAAC
+GAGCTTGTGCTGCAGGGCGACCAATACTTCAATGAAGATGCTGCAAGAACGAGTGACATA
+TACAGTAACAACAACAATGACAGGTCACTAATGGGCAATATCTCACTACTGTACTCCCAA
+GGAAGACTATAA
+>THR4 1545 residues Pha 0 Code 0
+ATGCCTAACGCTTCCCAAGTTTACAGATCTACCAGATCCAGCTCTCCAAAGACAATCTCT
+TTTGAAGAGGCTATCATTCAAGGTCTGGCCACTGACGGTGGTCTTTTCATTCCACCAACT
+ATTCCACAAGTGGACCAAGCCACTCTTTTCAATGATTGGTCAAAGCTCTCCTTCCAAGAC
+TTAGCCTTTGCTATCATGAGACTATACATTGCCCAAGAAGAGATTCCAGATGCTGATCTA
+AAGGACTTGATCAAGAGATCTTATTCTACTTTCCGTTCTGATGAAGTCACCCCCTTGGTG
+CAAAACGTCACTGGTGACAAGGAGAATTTGCACATTTTAGAATTATTCCACGGTCCTACC
+TACGCTTTCAAAGACGTTGCTTTACAATTTGTCGGTAATCTTTTTGAATACTTCTTACAA
+AGAACCAACGCCAATTTACCTGAAGGCGAGAAAAAGCAAATCACTGTGGTCGGTGCTACT
+TCCGGTGACACTGGTTCTGCAGCCATCTACGGTTTAAGAGGCAAAAAGGACGTTTCCGTT
+TTCATCTTATATCCAACCGGTAGAATTTCCCCAATTCAAGAAGAACAAATGACCACCGTT
+CCAGATGAAAACGTCCAGACTTTGTCTGTTACCGGTACTTTCGACAACTGTCAAGATATC
+GTCAAAGCTATTTTCGGTGACAAAGAATTCAACTCTAAACACAACGTCGGTGCTGTTAAC
+TCCATCAACTGGGCAAGAATCTTGGCCCAAATGACCTATTACTTTTATTCATTCTTCCAA
+GCCACCAACGGTAAGGACTCCAAGAAGGTCAAGTTCGTTGTGCCAAGTGGGAACTTCGGT
+GATATATTGGCCGGTTATTTTGCCAAGAAAATGGGTTTGCCTATTGAAAAACTGGCCATC
+GCTACCAATGAAAACGACATTTTGGACAGATTTTTGAAATCTGGTCTATACGAAAGATCA
+GACAAGGTTGCTGCTACTTTATCCCCAGCAATGGATATCTTAATCTCTTCTAACTTTGAA
+AGACTACTATGGTACCTAGCTCGTGAATACCTAGCTAATGGTGATGATTTGAAAGCCGGT
+GAAATCGTCAACAATTGGTTCCAGGAATTGAAGACCAACGGTAAGTTCCAAGTTGACAAA
+TCCATCATTGAAGGCGCATCAAAGGACTTTACATCAGAAAGAGTTTCCAATGAAGAAACA
+TCTGAAACAATCAAGAAGATCTACGAATCATCTGTAAATCCAAAACATTACATCTTAGAT
+CCTCACACAGCTGTCGGTGTTTGCGCCACAGAAAGATTGATTGCAAAAGATAATGACAAG
+TCCATCCAATACATTTCTCTATCTACCGCTCACCCAGCTAAATTTGCCGATGCTGTAAAC
+AATGCATTGTCTGGATTTTCCAATTATTCATTTGAAAAGGATGTTTTGCCTGAGGAATTG
+AAGAAACTATCCACATTAAAGAAGAAATTAAAATTCATCGAAAGAGCTGACGTTGAATTG
+GTCAAAAACGCTATTGAAGAAGAACTTGCTAAAATGAAATTATAA
+>CTR86 1692 residues Pha 0 Code 0
+ATGCCTATGAACAATTTTCTAGATGAATTCAATTTATTTGATTCAATCATTACCATGATG
+AAGAACGACCCATGTTGCGTCGAGGATTATGAGCCAATCGTCGAAAACCTGAACCGTATA
+TTTCAAAGGACGTTTAATGATGAAGAACATAGGAAATCAATGGCTAACTCCCAGCTTTTT
+TGGGAACGATTAAGAGACACCTTGGAAGCAATGCTGTTGCCAGCGTCGTTAAATGAGAAT
+AGCTCAATACCGTATACAAGAACAGTGAGGGGCCTTATCTTAATGATGAGAAACCTTGCC
+GCTGAAAACCAGGAAATACCCCAAAAGCTTTTACTACAAAACCTCGTAATTCGTGGTTTT
+CTGCATGCAACTAGTGAGTATGTCGTTGACACTCCGCTAATCAAACATCTATACATCGCA
+TGTTTAACGTGCCTTTTCAATATACAGCAGAACTACTCTACAGTGGATATGACTACTTTT
+CCAGCTCTTTTACAATTTCTTCAATACCCTTATGGGATCAAATTGGAAGACGGTGAAGAA
+GAAGAGCATTTCTGGCTACCATATTTATTTCTTTTCAAGACGTATCTCAACAATGATGAA
+TTTTCCAACGAATTTTTCAGGGATAATGATACACCCCAGAAAGACTATTATTGTGTTAGG
+GATAGAATATTTTTCGATATAGTGACAGCCAAATTCATCCAGGATCAAGAGAATTCCTTT
+TTAATTGAGAAGGGCAGAAACTATCTGGATGATTCAAAATTGGAAATAACTTCTATTGAC
+CTATCTGTCTTAGAATGTATTAGCAAAAGTCTTACAACTGCTTCTTTTGGTAAATACCTC
+AATGGGTTAGAAGAAAGACAGCCAGGAAAATTCACCACTTTGTTGCAGATATTGCAATTG
+GTTGTAACGAGTAAAGAAGATTGGAATACCTATGAGTTGACTGCAATTATGTCATGGTGC
+TACCCCATTCTGCAACGTCTTGCATGCAAGGATATTCCTGCCTTTTTCAATAAAAGTTGT
+AACGATTATGCTCCTTCAGTTGCCATCCAATTACACTCCACTTTACTTTCTTGCCTGGAC
+ATAATTTCTGACTTGTGCAAATTCAATCATGTTAGAAAATTCTTAATTTCGTATGACTCT
+GTGAAAATATTGGTATCTCTCTTGGATACTTTCCAAAAGAATTTGTTGAGGATTAATTTT
+TTGAAAGGAAACGGTGATACGGTGAATGAAATTAAAATCACAGATCATGAAGGTAACAAA
+ATCGAGGACCGGTTATTAATTTTCAACCGTGTTAATACCAACGAATCCTTTATTAGGGCT
+GATAATTTTCCCCATTGTAAATTAGTAATAATCGAAATATTGGCATCGTTAGTGTATGCA
+CATCCTGAAATCCAAGATCAAATAAGAGAATTAGGTGGTCTTGCATTAATTCTTTCCAAT
+TGTGTCATCGATGATAATGATCCGTTTATCAAGGAAAGATCTATTGTTTGCTTGAAGTTT
+TTGTTAAAGAATAATGCCAAGAATCAGGAATATGTCAAAAAAATGGAAGCTCAAGACGTT
+GTTCAAGACGATGCATTGAGCAAAGCTGGGTTTGAAATATCAGTTGAAAAGGGCGGGAAA
+GTTAGATTAGTATCTAAAGAAGAAGACCCTGGGAACGAGAATTCTGAGATTATTAGCATA
+GATGAAGATTAA
+>PWP2 2772 residues Pha 0 Code 0
+ATGAAATCCGATTTCAAGTTCTCTAACCTTTTAGGTACGGTCTACAGGCAAGGTAACATC
+ACCTTTTCCGATGATGGCAAGCAACTACTCTCACCGGTGGGGAATAGGGTCAGCGTGTTT
+GACTTAATCAACAACAAATCGTTCACGTTTGAATACGAGCATCGCAAAAATATTGCTGCC
+ATTGATCTGAACAAACAAGGCACATTGCTGATTTCTATTGACGAGGACGGTCGCGCCATC
+CTTGTCAATTTCAAAGCCCGTAACGTGCTTCACCATTTCAACTTCAAAGAAAAATGCTCC
+GCTGTGAAGTTCAGCCCTGATGGGAGACTCTTTGCATTAGCCTCAGGCAGGTTTTTACAG
+ATTTGGAAGACTCCAGATGTTAATAAAGACAGACAGTTTGCTCCCTTCGTCCGCCATAGG
+GTGCATGCGGGACACTTTCAAGACATAACGTCTTTGACGTGGTCACAAGATTCCAGATTT
+ATCCTTACGACTTCCAAAGACTTAAGCGCAAAAATATGGTCCGTAGATTCAGAGGAAAAG
+AACCTTGCGGCGACAACATTTAATGGGCACAGAGACTACGTTATGGGTGCGTTCTTCAGT
+CATGATCAGGAAAAAATCTACACTGTAAGCAAAGACGGTGCTGTCTTTGTCTGGGAATTT
+ACCAAGAGGCCATCCGATGACGACGACAATGAAAGTGAAGACGACGACAAGCAAGAAGAA
+GTAGATATTTCGAAATACAGCTGGAGAATCACAAAGAAACATTTTTTTTACGCAAACCAA
+GCCAAAGTAAAGTGTGTCACCTTCCATCCAGCAACAAGGCTTTTAGCTGTCGGATTTACT
+AGTGGGGAATTCCGTCTTTACGATTTGCCTGATTTCACTTTGATTCAACAGCTTTCTATG
+GGGCAAAACCCAGTCAACACCGTTAGCGTCAACCAAACCGGCGAATGGCTGGCGTTTGGT
+TCCAGCAAACTGGGCCAATTACTAGTTTACGAATGGCAATCGGAATCGTATATCTTGAAG
+CAGCAGGGCCATTTCGATTCCACAAATAGTCTTGCATACTCTCCGGATGGTTCACGTGTA
+GTGACAGCATCCGAAGATGGGAAAATCAAAGTTTGGGACATTACATCAGGGTTTTGTTTG
+GCCACTTTTGAAGAACACACCTCTTCAGTTACTGCTGTACAGTTTGCGAAAAGGGGTCAG
+GTCATGTTCTCATCATCGTTAGATGGTACGGTGAGAGCGTGGGACTTAATCAGGTATCGT
+AATTTTAGAACATTCACTGGTACTGAAAGAATCCAATTCAATTGTTTAGCGGTGGATCCA
+TCAGGTGAAGTGGTTTGTGCCGGGTCCCTGGACAATTTTGACATTCATGTTTGGTCCGTG
+CAAACTGGTCAATTATTAGATGCTTTGTCCGGACATGAAGGCCCTGTTTCGTGTCTTTCA
+TTTAGTCAAGAGAACAGTGTCTTAGCTTCTGCATCATGGGATAAAACAATTAGAATCTGG
+TCCATATTTGGTAGAAGCCAACAAGTAGAACCTATAGAAGTTTATTCCGATGTTTTAGCC
+TTATCAATGAGACCAGATGGTAAAGAAGTTGCAGTATCTACCTTAAAGGGTCAAATATCC
+ATTTTCAACATAGAAGATGCCAAGCAGGTGGGCAACATTGACTGTAGAAAGGATATAATA
+TCTGGTAGGTTTAATCAAGATAGGTTCACTGCCAAAAATTCTGAACGATCCAAATTTTTT
+ACTACAATACATTACAGTTTTGATGGTATGGCTATTGTGGCTGGTGGTAATAATAACTCC
+ATTTGTCTATATGATGTTCCAAATGAAGTCTTGTTAAAAAGATTCATTGTGTCCAGAAAC
+ATGGCTTTGAATGGTACTCTCGAATTTTTAAACAGTAAGAAAATGACTGAAGCAGGTTCA
+TTAGATTTGATTGACGATGCAGGCGAAAATTCAGATTTGGAGGATCGTATTGATAATTCT
+TTACCAGGGTCTCAAAGAGGTGGCGACCTGTCCACAAGAAAAATGAGACCAGAGGTTAGA
+GTTACTTCGGTGCAATTCTCCCCAACGGCGAATGCATTTGCCGCTGCTTCAACGGAAGGT
+TTATTGATATATTCCACCAATGACACGATATTATTTGATCCCTTTGATCTGGATGTGGAC
+GTCACCCCCCATTCTACTGTAGAGGCGCTACGAGAAAAGCAGTTTTTAAATGCATTAGTA
+ATGGCGTTCAGGTTAAATGAAGAATATTTGATCAATAAAGTCTATGAAGCCATACCTATT
+AAGGAAATCCCCTTGGTTGCAAGTAATATTCCTGCAATATATTTACCGAGGATTCTGAAG
+TTCATCGGTGATTTTGCCATTGAATCCCAACACATTGAGTTTAACCTAATTTGGATCAAA
+GCTCTATTATCTGCGAGCGGTGGTTACATAAATGAACACAAATATCTCTTCTCGACGGCT
+ATGAGGTCGATACAAAGATTTATTGTTAGAGTGGCTAAGGAAGTAGTCAATACCACTACT
+GATAACAAATACACCTATAGATTTTTGGTATCAACTGATGGGTCCATGGAAGATGGCGCG
+GCTGATGATGACGAGGTTCTATTAAAAGATGACGCAGATGAAGATAACGAAGAGAACGAA
+GAGAACGATGTAGTCATGGAATCTGACGACGAGGAAGGATGGATTGGTTTCAATGGGAAG
+GATAACAAATTACCCTTGTCTAATGAAAATGATTCCAGTGATGAAGAAGAAAATGAGAAA
+GAGCTTCCTTGA
+>YCU9 777 residues Pha 0 Code 0
+ATGGATGACGATCACGAACAGTTGGTCGAAGAACTGGAGGCCGTCGAGGCCATCTATCCG
+GATCTTCTCTCCAAGAAGCAGGAAGACGGAAGCATCATCGTTGTGAAAGTGCCGCAGCAT
+GAATACATGACACTGCAGATCTCCTTCCCGACACACTACCCCTCCGAGGAGGCTCCTAAT
+GTCATCGAAGTTGGTGTCTGCACTTCTTTGGCTAAGCGCGATCTCTACGATACCAAGTAC
+CTTCAGCATTTGTTCCAGGAAGTGATGGACTCTGTTTTCCACCGCGGATCTGTCTGTCTA
+TTTGACTTCCTCACAGAACTCGACGGTGTCTTGTACGTTGAACCAGAGGAGGAGACAGAA
+CCGGTCCAGCAGAGTGACATTCCCACAGACCCCTTCGAGGGCTGGACCGCGTCGGACCCC
+ATTACTGATAGAGGCTCGACTTTCATGGCCTTTGCAGCACATGTTACCTCCGAGGAACAA
+GCGTTTGCCATGCTAGACCTACTGAAGACCGACTCCAAGATGCGTAAGGCAAACCATGTC
+ATGAGTGCATGGCGAATCAAGCAGGATGGCTCTGCGGCAACATATCAAGATTCCGATGAT
+GACGGTGAAACGGCCGCCGGCTCCAGAATGCTGCACCTCATCACCATCATGGATGTGTGG
+AACGTCATCGTTGTGGTGGCCCGTTGGTTCGGCGGTGCCCACATAGGTCCCGACCGGTTT
+AAACACATCAATTCTACGGCAAGAGAAGCTGTTGTCAGGGCCGGCTTCGACTCGTAA
+>YCV1 1752 residues Pha 0 Code 0
+ATGGTGCGTTTTGTTTCAATTTTAAGTTTATTCGGCTGCGCGGCGACGCTTGTCACGGCC
+CATGATGACATGGACATGGACATGGATATGGACATGGATATGGACATGAATATCGATACG
+ACAACGTCTCAATCCATAGATGTCTCATCCACGGCTTCAATCGTCCCCGTGCCACATGAA
+CCAAAACATTTGCATGGCCTTCCTATACTGCAATCGCCCTCGCTTACCCCTGCGGAGAGA
+TTGTACTGGGAAAACTACAACACCACAACCTACTTTACTACACAGGCTGGGAATAGGTCT
+GCCCTTCGCTACCACATTATTACGCTGCTCTTGGTTGCATTTGTGCTCTACCCTGTGTCC
+CTGGCGCTAAGCGCCGCCCGTTCTAGGTGGTACTTACCCCTGCTGTTTGTTAATCTATGC
+ATTTGTATTTCGTCCGTAATGGCATTGTCCGTGTTCAAAAATACTTTCCCGGAAGAAGAC
+TGGTATGCGCATAATATCTATGGCACCACTTCTGTGCTACTTCTCGTTTTTATGCTTGTT
+CACTTCTTCGCTGCGGTGCTTTCTGTCCCCGTCTCATTAGCATCGAAAAAGGAGTACCGT
+CCGGTTGACACCATCCCTCTGAATGATCTTGAATCTACGCCCGTCATGGTGAATAGTGCA
+CGTGGCTCTCCAAGTCCTTCTTCCAACAGAGACACGTTGTTCTCGCTCTCTTCAGACACC
+ACGACCGCCACGGCCACCAATAATAATAAACGGAGACGCGCTGAAGGCGAAGACGAGGGT
+GATAACACCTCCAACCACGACACTTTGCGCGACGAAGACTACGATAATGATGACGACGAA
+ATTGCTTCCATTGAAGCGCCACCTCTGCTTCCTCAAGACATACCCGTTTTCCGAATCTTG
+TTTACCAACACGAAGTACCAGATGCTTGCCGCGCACCTCTCGTGCGTCGCCAACGTGGTC
+TTTCACATGCTTACCTACCCGCTATTCATGTACATCTTTGTAGACCTAATCATCGGCTTC
+GCTGTAGGTAACTTGCTCGGCAAGGGCATCCGCATCTTTAATCTCTTGGCCCACTGGATT
+AAGGGCGGCGTATTTTTTACTCTGGGCGTTGTCTCTTTAGCAAGATACTGCGGTTTCGCA
+GCTAAGTACGGCTGGGCATGGAACAACATCAGCTTCACCTCTCAACTCACACAAACGCGT
+TCCTCCAATCTTCTTTTCCGGTTTGCTCCTGCGGGGACTTTCACCATGGAATTCGTTGAA
+TCCTTCCTCATTTTCTTTTACGGGTCCACCAACATCTTCTTGGAGCACCTGGCAGGAAAC
+GGCGGCGCATGGACTGCCAAGGATTTACAGCATGTGTCGATAAATTCTCACCGGCCCCAA
+GGTGTGTGGGCTACTCACGGAGTACAAGCTCAACCATTGGCGATTCGAGCATGCCCGCAA
+ACGGCCACAGACCGATGTAGTTGCTGCCACACCGGGGTACTCTCCAAACCCGTTCCCCGC
+TTTCACCATATTTTGGACTGGGATTCTGATGTCCCAGCACGCACAGTCCTCGCAATTTTC
+TACTACCATTCACACGCAATGGGGATACTTGTTGTCCTATGGGTCCTTCTTCCGTCTGCT
+AACATTTTTGATTCTGTTTTTGGTGCCCAACACCAACAGTGCCGCATCCAAGCCTTTCAC
+GGAGTTGATCACCTCGTTCTGTCTCCTCTGTGGTGGTCTGGTATTTATGGAGTCCACGGA
+TCAGTCCATTGA
+>G10 474 residues Pha 0 Code 0
+ATGCCGCGCATAAAGACCAGAAGATCCAAGCCTGCACCTGACGGGTTCGAAAAAATCAAG
+CCAACCCTCACAGATTTCGAAATCCAACTCAGAGATGCCCAAAAGGACAAGTCGTCTAAG
+CTCGCAGCAAAGTCCAATGAGCAGCTCTGGGAGATAATGCAACTCCACCACCAGCGCTCT
+AGATACATATATACTCTGTACTACAAGAGAAAGGCCATCTCCAAAGACCTTTACGATTGG
+TTGATAAAGGAAAAGTATGCTGATAAATTGCTAATTGCCAAATGGCGCAAAACCGGGTAT
+GAAAAACTGTGCTGTCTGCGCTGCATTCAAAAGAACGAAACTAACAACGGTAGCACTTGC
+ATCTGCAGGGTGCCTCGTGCACAGTTAGAGGAAGAAGCACGCAAAAAGGGCACACAGGTG
+TCCTTCCATCAGTGCGTCCACTGCGGCTGCCGTGGATGTGCAAGCACAGACTAA
+>HCM1 1599 residues Pha 0 Code 0
+ATGATGAATGAAGACATATCCATCATTGATGGCCATAATAGTTTTTTAACGGAAAAAAGC
+ACCGTGCTATTAACCCAAGCCAAGAGAACACTAGAAGACGAAAAGGAAATGATTACTCCC
+CCGAGCTCAACTGTGAGAAAAACAATGAAGGAAGTAAATAAGAGGCCGTCGCATCCCCTC
+TCACCGGATCACTCGTCCCCAATTGCTCCATCTAAGGCCAAGCGCCAAAGATCGGACACA
+TGCGCTCGGTCCAATGGTAACCTAACCTTGGAAGAAATTCTTCAATCTTTGGAAAGAAGA
+AGAATAAATGGTGAACTCGCCAAGAAACCTCCATATTCGTATGCAACTTTGATTTGCTTG
+GCCATTTTGCAATCTCAGGAGGGAAAGCTAACGCTATCCCAGATATATCATTGGATCCAC
+GTTCACTTCCCTTATTACAAGCAGAAAGATGCTAGTTGGCAAAATTCAATAAGACATAAC
+TTGTCTTTAAATGATGCGTTCATCAAGACTGAAAAGTCCTGCGATGGTAAGGGTCATTTC
+TGGGAGGTCAGACCGGGTGCCGAAACAAAATTTTTCAAAGGTGAAAATCGTGGTTATGAA
+TTTGTAAAGGACTCCTTACAAGACATTGGGAAGTATTTTGAAATAGATTCTACACTTGAT
+GAATTAGAACAAGTTGAGAGTGGAGAAGGCAATGATGATCTTCCTGACGAGGAAGAAAGA
+GAGGAAGCAGGGAAATTCCCTTCCATTGAAATTCAATTGAACTCCTCCCCTATACTGAGA
+GTTTCCCAGTTACATCACATACCGCAATTGAAAACAGACAACAGTGTACTGAACCCTCAC
+GAAAACCTAGAATCGATGCGGAACATGATAGAAAACGATGTCAACAATATAGATTCCTTG
+GAACCTCCTTATGTCATGAAGAAATATCATACTTCTTTAGGCTTACCGTCGCTGGTGAAT
+GCCAAAGATCATTTCCAGGCGGGTGTGAAAAACAATAATATCACCCAGGCAAATAGATTT
+AATACACTCCCTATAACTAGCGCAAAGTCTCCTCAGAATTTCAGAAAATATTTCACCTCA
+TTCAATTCAAATTTTGAAGATTTATCTCCACTTCGAAGTAATGTAGGGGCTGGTTCTCTA
+CTCGACCCACTTCCGTATTCCCCATTGAAGCTGTACGATCAGAAAAATCTTGCGCTCATG
+TCGAAACCACAATCTCAGCAATCATATTCCAATTCTCAACTTCCACCTCCACCTTCCTCT
+CATGGTTCGGACTTACTTAAAACACCCAAGATGAGGCATTCCGATGGCTTAGAGAAAACC
+CCATCGCGGTTGATAAGCACACCTAAGGACGGTAACTCGATTTTGAGGAAATGGCAGACT
+CCTTCACACCTTTTTGAAGATTTGTACTGTTCTCCGCTATTTAGAGCTATAGAGACTCCA
+ATCAGGTATATCACGACGCCGGGGGGCAACTTTGGAAACCCAAATTTCACCAAGAAAGTC
+CTCTGCACCCGATGTCCTCACAAGCGCAACGAATTCCAAATTTGCTTCAAGCGGGCTGTT
+TGGCGTGGATGTTTATTCTGTTTGGAAGCGCGCAACTGA
+>RAD18 1464 residues Pha 0 Code 0
+ATGGACCACCAAATAACCACTGCAAGCGACTTCACGACTACTTCAATACCGAGCCTGTAC
+CAATTGGATACACTTTTGAGATGTCACATTTGTAAAGATTTTCTAAAAGTCCCCGTCTTA
+ACACCTTGTGGCCATACATTTTGTTCCCTTTGTATTAGAACACATTTGAATAACCAACCA
+AATTGTCCTCTCTGCCTTTTCGAGTTCAGAGAGTCCTTGCTGAGAAGTGAGTTCCTGGTC
+AGTGAAATAATTCAAAGTTATACATCCCTACGATCTTCCTTACTAGATGCACTAAGGATA
+CCGAAGCCTACCCCTGTCCCTGAGAATGAGGAAGTACCAGGTCCTGAAAATTCTTCATGG
+ATAGAACTCATATCAGAGTCTGAAAGTGACAGTGTAAATGCCGCTGATGATGACTTGCAA
+ATTGTTGCAACAAGTGAAAGAAAACTTGCCAAAAGATCCATGACTGATATATTACCACTG
+AGTTCCAAACCATCCAAAAGGAATTTTGCAATGTTCAGAAGTGAACGTATCAAGAAAAAA
+TCAAAGCCAAATGAACAAATGGCCCAGTGCCCCATATGTCAACAATTTTATCCTCTTAAA
+GCCCTTGAAAAAACACATTTGGATGAATGCCTAACTTTACAATCACTAGGCAAAAAACCA
+AAAATTTCTACCACTTTCCCTACAGAGTCAAATCCACATAACAAAAGTTCATCCAGATTC
+AAGGTACGAACTCCAGAAGTCGACAAAAGCTCATGTGGTGAGACCTCACATGTGGATAAG
+TATTTAAACTCAATGATGAGTGCAGAACACCAAAGATTGCCGAAGATCAATTTTACGTCT
+ATGACTCAATCCCAAATAAAACAAAAACTGTCATCGTTGGGACTGTCAACTAATGGTACT
+AGGCAAAACATGATTAAAAGATACAATCACTACGAAATGCTTTGGAATTCTAATTTTTGT
+GATTCTCTAGAACCTGTTGATGAAGCTGAACTAAAAAGACAGTTGTTAAGCTGGGATGTT
+TCACACAATAAAACCCCCCAAAATAGTAGCAACAAGGGTGGAATTTCTAAATTAATGATA
+ATGAAGAGTAATGGGAAATCTTCTTCATATAGGAAATTACTTGAAAATTTCAAAAACGAT
+AAATTTAATAGGAAAGGATGGATGGTTATGTTTCGGAAGGATTTTGCTAGGCTTATCAGG
+GAAGCAAAAATGAAAATAAAAACAGGTTCATCGGACAGTTCAGGTTCAGTGGGACATTCT
+AATGATGGAGATGGTGTTGAAAAAGTTCAAAGTGACCAGGGAACCGAGGATCAGCAAATG
+GAGAAGGATCAGGACACTGTTATCAACGAAGATAGAGTTGCTGGTGAAAGAAATTTGCCT
+AACGAAGATTCAACTGATGCTGACTTATCAAGAGAATTAATGGACTTGAATGAATATAGT
+AAAGACCCACCCGGTAACAATTAA
+>CYPR 957 residues Pha 0 Code 0
+ATGTGGTTGAAATCCTTGCTGCTCTGCCTGTACTCCTTAGTACTCTGCCAAGTCCACGCT
+GCACCTTCATCAGGGAAGCAGATTACCTCCAAGGATGTTGATCTTCAGAAAAAATATGAG
+CCCAGTCCCCCCGCCACACATCGTGGAATAATCACTATCGAATACTTTGATCCCGTTTCG
+AAGTCGATGAAAGAGGCGGATCTGACTTTTGAGTTGTACGGTACTGTCGTGCCCAAAACT
+GTGAACAACTTTGCTATGCTGGCCCATGGTGTTAAGGCAGTTATCGAAGGGAAAGATCCC
+AATGATATACATACTTACTCGTACCGTAAGACCAAAATCAACAAGGTTTACCCTAACAAG
+TATATCCAGGGTGGTGTGGTTGCCCCAGATGTGGGTCCTTTCACCGTCTATGGGCCCAAA
+TTTGATGACGAAAACTTTTACTTAAAACATGACAGGCCTGAAAGACTCGCAATGGCCTAT
+TTTGGACCTGATTCTAACACCTCGGAATTCATCATCACCACTAAAGCCGATGGAAATGAG
+GAATTGGATGGCAAAAGTGTCGTGTTTGGTCAAATAACTTCTGGTCTAGATCAACTAATG
+GATGCTATTCAATACACAGAAACAGACGAATATGGAAAGCCTCAGCATGAATTACGGTTC
+CTGTATTTCGTTCTAGAAATCTTAAAAATTAGTAACATCTTAGATTTGCACGCTGCGTAC
+ACAGAAAAAGTCGAGAAGTTTAGAAATGGCGATGTGTCTGTTGGCTCCACTTTGGAAAAC
+ATCTTCCGTAACGATAAAGCCTACACACCTTTAACCACCTCCACTGGAACCACCGCCTAT
+GATTTAAACCACCCAATTTCCAGAGCCTTGATGTGTTTAACTGTTCTTGGCCTTTGTTTC
+ATTGCCTACAAGGGCATGCACGAAAAGCCTCATACGGTTTCATTAAGACACAAGTAA
+>YCW1 366 residues Pha 0 Code 0
+ATGATCAGTTCGTGTGTTACTAGATGTTTTGGTAGGGGTAAATGCCTTCCAGGGCCTGCC
+ACTGCCTCGATATACCAAACGATAAGATGTATATCCACTAATTCAAATAAAGCTGCTGAG
+GCGCCAATATTTCCAAAGCTGGAAGACGTGAAGATGCATGAGCTCATAGGAAACAACAAT
+TTTGGTAAAAAGACCTACTACGTGGAGAGAAGCAGGACCGGAAATCTACCGGTGTATTCC
+GCTTATAAAAATGGAGGTAACAAGATTATCACGGAGATCAGAAAGATTGAAGGAGATGTA
+ATTCAACTAAGAAATGACTTGCAGGAGCAACTGCCTTTCATACCCAAAAAATCATGGCTG
+TGGTGA
+>YCW2 1548 residues Pha 0 Code 0
+ATGTCCACCCTGATTCCTCCACCTTCTAAGAAACAAAAGAAAGAGGCTCAACTTCCCAGA
+GAAGTAGCTATTATTCCGAAAGATTTACCCAATGTTTCAATCAAGTTCCAAGCTTTAGAT
+ACTGGTGACAATGTAGGTGGCGCCCTGAGAGTTCCCGGTGCTATCTCCGAGAAACAGTTA
+GAAGAACTTTTAAATCAATTGAACGGTACTTCAGACGATCCAGTGCCATATACCTTCAGC
+TGTACAATTCAAGGTAAGAAGGCCAGTGACCCTGTGAAGACGATTGATATAACAGATAAC
+CTATATTCTTCATTAATAAAACCAGGCTATAACAGTACAGAAGATCAGATCACGCTACTG
+TATACGCCAAGAGCAGTTTTCAAAGTCAAGCCGGTAACTAGAAGTTCATCAGCCATTGCA
+GGTCACGGTTCCACAATTTTGTGTTCTGCCTTCGCACCACATACGAGTTCTAGGATGGTA
+ACCGGTGCAGGTGATAATACTGCAAGGATTTGGGACTGTGACACCCAAACGCCAATGCAT
+ACTCTAAAGGGTCACTACAATTGGGTTCTCTGCGTTTCCTGGTCCCCCGATGGAGAAGTA
+ATTGCTACGGGATCCATGGACAATACCATAAGATTATGGGACCCAAAAAGCGGTCAGTGT
+CTAGGTGATGCTCTCAGAGGTCATTCCAAGTGGATCACTTCTTTAAGTTGGGAACCTATA
+CATCTTGTGAAGCCGGGCTCCAAACCAAGATTAGCTTCATCTTCTAAGGATGGTACTATT
+AAGATTTGGGACACTGTGAGCAGAGTTTGCCAGTATACGATGAGTGGTCACACAAATTCA
+GTGTCTTGTGTCAAATGGGGCGGCCAAGGTCTATTGTATAGTGGCTCTCACGATAGAACC
+GTACGTGTATGGGACATCAATTCGCAGGGCAGATGTATCAACATTTTGAAGTCGCATGCG
+CACTGGGTTAATCACTTATCTTTATCTACAGATTACGCATTGCGCATTGGTGCATTCGAT
+CATACAGGTAAGAAGCCTTCTACACCAGAAGAAGCCCAGAAAAAGGCATTGGAAAATTAT
+GAAAAAATCTGTAAAAAGAATGGAAATTCAGAAGAAATGATGGTTACTGCAAGCGATGAT
+TATACCATGTTTTTATGGAACCCACTAAAATCTACCAAGCCTATAGCAAGAATGACCGGT
+CACCAAAAATTAGTCAATCATGTGGCGTTCAGCCCTGATGGTAGGTATATTGTCTCAGCG
+TCTTTTGATAACTCTATCAAACTTTGGGACGGTAGAGATGGTAAGTTTATCTCCACATTT
+AGAGGGCATATAGCCAGCGTATACCAGGTTGCGTGGTCATCGGACTGCCGACTACTGGTG
+TCATGTTCCAAAGATACCACGTTGAAAGTGTGGGATGTAAGAACTAGAAAACTTTCTGTT
+GACCTCCCTGGTCATAAAGACGAAGTTTATACCGTCGACTGGAGTGTCGACGGTAAAAGA
+GTGTGTAGTGGTGGGAAAGACAAGATGGTAAGATTGTGGACGCATTGA
+>SSK22 3945 residues Pha 0 Code 0
+ATGATGATGGATATACTGAATACACAGCAACAAAAAGCGGCTGAAGGCGGGAGAGTTCTG
+GCTCCTCATACCATCTCAAGTAAGCTCGTGAAGAGATTATCAAGTCATTCCAGCCATAAA
+CTATCAAGATCTGATTTGAAAGCATTGGGTGGCTCGGAAACAATAAGCGACGGCCCCAGT
+CAGCTGACTTTTAAGGACCGATACGTTTTCAATGAATCGCTATACTTGAAAAAGCTAAAA
+AAGACCGCTTTAGATGACTACTACACGAGGGGCATAAAACTCACTAACCGCTACGAGGAA
+GACGACGGTGATGACGAAATTATTCGGTTGTCTAATGGCGACAGAATTGATGAAGACCTG
+CACTCAGGTGTCAAGTTTTTCTCCACTACACCTTATTGCAGGAAAATGAGGTCAGACAGT
+GATGAACTAGCTTGGAATGAAATTGCGACCGAACGGTTCAAATGGCAGTCAATGCTGGCC
+AGAGTGCTGAAGGGAGATATTGTTAAAGGTGAAAAGACGAGGATTGCTAACCAAGTCAAG
+AAACCAGGGTTAAATAAGGAGCTCTCAGATGAGATATGGCTCGAATTGAAGGCATGGCTG
+AATGGGAGGACCATGCAAGAGATGGAACAGTCGCTTACATATTTAAGAGATAGTTCAGAT
+TCCGTTTTTGAAGAGATAATGAAGTTTCAAATTCCACAGGGCAAGATATTGAGCCTGGAT
+GCACTGGAGGCCATCTTACAAGACCTCATGAACAGATATCACAGCGTTGTCTCTTATTGG
+CCTAACTTGAAAAAAATGTATAAGGATAAACCAATCACCAATACTGCAGAATTTACCGCT
+AGAATAGACGTAATGAATTCTTGGCTGAACTTTAAAACGAACTTAACGTTGAGGAGGCAA
+GAGTTGGACGACTGGATAAACCGTTTCTCACCGATAAGTAGTTCGGATAATTGCCAAGAG
+GATTTTGATGGTGTGCCCCAATGGAACTGCAAAATGAAGATTCTTGCAGAACAATTGATG
+AAGGAAAAGAACATCGAGTCTATATTCCAAAAAAAAATTTTCTATCCGCTATCACCTTGG
+ATGTTCAAACTGAAACTACATTTTATAGTCTACAGAGAAACTTTGACAAAGATGAACATA
+AAATATCCTTATGAAAGGTTAAGATCACTACTGGCGTTCCCCGTCTATTTAATCAAAGAA
+GTTATTTTGACTAGATTGTCATATGCACGAAAGCTTAAAAATCCAACAATGATGATGATC
+GATCAAATGATCGATGATTTTAACGCTTTTATTCGACTTTCTGTGCAATTGAAGTACACA
+CTGACAAAATATTGCTCCAATTTGCCGTTCGATGTGGATTTTGACCCGACGTTCGAAAAT
+ACTGTAATAGAAGCCATTCGTTATTTATTTTTTCTGTTGAATTTAAAGTTGATTGATTCC
+AGTAAACAAAATTTCAAAGCACCCGATCTACTCTTGAAATACTGGGATCACCTAAAAAAC
+ACCGGTCACTATATTAACGGTGCAGAAACCGTGATTCCAAATGAATTTCTCAAGTTAACT
+TTGAGACTCGTACATAAATTGCAATTCTATCTTTTGAAACAACAAAACTTCCCACCAACA
+TTTGCTAACGCTTCAGAAGCAGAAAAATGGCTAAGTTCCATTTTCGAAAATTTGGGTGCC
+ATGAAAAGAAAGCTGAACAGGTTCAGCAATATTCTAGTCAAGGCGTTCCAAAATTCTGCT
+GTTTATCAGATTAATCATAATGCACAACTTGTTAAAAAGTTAAAAGATGCTCACTATTTT
+TTGGTATACTCCGGTAACACTTTTGAGTCTAGTGGTGTATATATGTTTGCTGCTCCTGAA
+TTATTAGGTTGTGACAATGATACCATCTTAAGAATTTTGCGAAATAAATCCATTGGCTGT
+GATTTGGTCCCAAAGCTTGACATTGGAAATAATTTGAATGTGTATGATATAACAACAAAA
+GAAACAGATTTGAACATTCTAGTATCGAAAGGGGAGGATTCCAAAGGAATTCCTTACTAC
+CGAGTAGTAGCAAATTCGTCAAGTGATTTGGACAGGCATGCTCATCAGTCCAAAAAGAAG
+AATTTTTCAACAGACCCTTTTGATCAGCACCTTGATGAAAAGAACAATGAAGTTTTTGAA
+TTGGAAGTTGCTTTGAGCTCATTGGGTGCACTAGTTGTACTATATCCTGGAGAGCCAGTA
+GTTTGGGATGGACCAGTATATAAGCTTCCAGGTAACAACCTTTTTGCATCCAACGAAATG
+GATTTAGGGAAAATTGGTAACCCAAATACGTTGATTTTACTCAATCAAGGTTCTAATTAT
+GCACTGACTTATCAAATCGACAAGTTTAATCAAACGGTAGGTGATTCTGTTTCATTCATA
+GAGAAACGTTGTTCACTCAATTCAATTGAATCCTCCCTACAAAAAATCAATAAGGCATAT
+TACAAACTTACTTATACAGTATTGAACAACTACAAAGGAATTCTAGGTAGCTTTATGAAG
+CAATGTCCGGGAAATGAGTTGTTAAATTCGATATTCATGTTTGGAAGGGATTTTGGAAGA
+AGTTTCCTTAAATATAACGCCTTTAGCTCAAAGAGGAAGTACGTTATCATCTTTCTGATG
+GTTAAATTAGGAATGAACTGGTTGAAATTCCTTGTTGAAGAGTGTGATCCTACCGATCAG
+CGAACTTTCCGATGGTGCGTTCTTGCAATGGATTTTGCGATGCAGATGACTAGTGGTTAT
+AATATCCTGGCGCTGAATGTAAAGCAATTTCAAGAACTGAAGGAGAGGGTATCAGTATGT
+ATGTCATTATTAATTTCACATTTCGACGTTATGGGTGCACGAGCCACTGAAGCTGAAAAT
+GGCATGCAACAGGCAAGATTGAATATTGATACTGAAGAGAATATTGATGAAGAGGCCACC
+CTAGAAATAAACAGCAGGTTGAGACTGGAAGCTATAAAGACGTTGGAAAAGACTATGAAG
+AGGAATCCCAGGCAAATGGGTAAGGTATTGGATGCTACAGATCAGGGAAACAAATACCTA
+CTATCGCTAGCATCCTCATTATCGAATGTATCAATGAGGTGGCAAAAAAGAAGCTTCATT
+GGCGGTGGAACATTTGGACAGGTATACTCTGCAATTAATCTGGAAAACGGTGAAATCTTA
+GCTGTTAAGGAAATAAAGATACACGATACCACAACAATGAAGAAGATTTTTCCCCTGATT
+AAAGAAGAGATGACCGTATTGGAAATGTTAAACCATCCTAATATTGTCCAGTACTATGGT
+GTCGAAGTACATCGCGATAAAGTTAACATCTTCATGGAATACTGTGAGGGTGGTTCTTTA
+GCCTCGTTATTGGATCATGGAAGAATTGAAGATGAAATGGTAACACAAGTGTACACATTC
+GAACTATTAGAAGGTTTGGCATATTTGCACCAATCTGGCGTGGTGCATCGCGACATTAAA
+CCGGAGAATATCTTGCTGGATTTCAATGGAATCATAAAATATGTGGATTTTGGTACGGCA
+CGTACCGTTGTAGGATCTAGGACTAGAACTGTGCGGAACGCAGCCGTTCAAGATTTTGGA
+GTAGAAACAAAGTCCCTCAATGAAATGATGGGGACACCGATGTATATGGCTCCAGAGACT
+ATTTCAGGCTCGGCAGTTAAGGGAAAACTTGGAGCGGACGATGTATGGGCATTAGGATGT
+GTTGTGCTAGAAATGGCCACAGGTAGACGACCTTGGTCTAACTTGGATAATGAATGGGCC
+ATCATGTACCACGTTGCTGCAGGTCGAATACCGCAACTACCCAATAGAGACGAAATGACT
+GCAGCGGGAAGAGCCCTTCTTGGAAAGGTGTTTGGTTCAAGACCCCACTATGAGGGCTAC
+TGCTGTGGAACTACTGATAGACCCTTGGATGATACAAATCCGTGA
+>SOL2 948 residues Pha 0 Code 0
+ATGACTACGACGGTACCCAAGATATTCGCGTTTCACGAGTTTTCAGACGTGGCAGAGGCC
+GTAGCTGACCATGTAGTCCACGCGCAAGACGGTGCATTGGCTCCAAAGAACGAGAGGAAA
+CACTCTGTTCCCAACATCAGCATGAATGCACTGGATATGACGAGAGAGGCCTCTTGCAAA
+AGCACAGCATCTGCCGCGGAAGGGAAAAGTGGTAGCAGTGGTAGTGGCAGTGGTAGCAGT
+AAGCCCAAAAAGGAGAAACGGTTCAAGATTGCTCTCTCCGGTGGGTCATTGATCGAAGTG
+CTACACGAAGGTCTGCTAAAACGAGACGATGTACGGTGGGGAGACTGGGACATTTACTTT
+GCAGACGAGAGACTTGTACCCTTCAGCTCGAATGAAAGCAATTATGGATGCGCCAAAAGG
+AAGATTTTGGACCTGATAGACACGGCGAAGTATGGAACTCCGAAGGTGTACCACATTGAC
+GAGTCATTGATTGACGACCCGCAAGAATGCGTTGATAACTATGAAAAGGTGCTAATCCGC
+GGGTTTGCCGGTAGAGATTCCGTCAAACTTCCGATGTTCGACTTGTTCCTGCTTGGTTGT
+GCCCCCGATGGTCATATCGCATCACTCTTCCCTAACTTCCAGGACAATCTACGTGAGAAA
+CTTGCATGGGTGGTGCCCGTGGAGAACGCTCCTAGTGGGCCCTCGACCAGAATTTCGCTG
+ACTATACCTGTAATCTGCCATTCTCACAGGGTTACTTTCGTTGTCGAAGGTGCAACCAAG
+GCGCCCATCATCAAGACCATTATGGAAAGGCCTGAAAAGGGCCTACCTAGCAGTATTGTC
+AACGAAGGTGCTGCTGGTCGTGTATCATGGTTTGTTGACGACGATGCTCTTACGGACGTC
+CTCGTCACCAAAAAAAAGTATAAATTCCACCAAGGTTTGTCTATTTAA
+>ERS1 783 residues Pha 0 Code 0
+ATGGTGTCGTTAGACGATATACTAGGTATCGTGTATGTTACGTCATGGTCGATATCGATG
+TATCCACCGATAATCACCAATTGGCGCCATAAGTCAGCGAGCGCGATATCGATGGATTTT
+GTCATGTTAAATACGGCAGGTTACTCTTACCTGGTCATATCCATATTTTTGCAATTGTAC
+TGCTGGAAAATGACGGGTGATGAGTCTGACTTGGGCAGGCCCAAGTTGACGCAATTTGAT
+TTCTGGTATTGCCTGCATGGGTGCTTGATGAATGTTGTCTTATTGACCCAGGTGGTAGCT
+GGAGCGAGAATCTGGCGATTTCCAGGTAAAGGTCACCGCAAGATGAATCCATGGTACCTA
+AGGATTTTACTCGCATCACTGGCCATTTTTTCACTGCTAACCGTACAATTTATGTACTCC
+AACTACTGGTACGATTGGCATAACTCAAGAACTCTGGCGTATTGCAACAATTTGTTTTTA
+CTCAAAATATCGATGTCACTAATCAAGTACATCCCACAAGTGACGCATAACTCGACAAGA
+AAATCTATGGATTGTTTCCCCATTCAGGGTGTGTTTCTAGATGTCACTGGCGGTATCGCC
+TCGCTGCTCCAATTGATTTGGCAGTTGTCTAACGATCAAGGTTTCAGTCTGGATACGTTC
+GTGACAAATTTTGGAAAAGTGGGACTGTCAATGGTAACTTTAATATTCAACTTCATCTTT
+ATCATGCAGTGGTTTGTATATCGATCTCGAGGCCATGATCTGGCGTCAGAGTACCCGCTG
+TAG
+>PAT1 2394 residues Pha 0 Code 0
+ATGTCCTTCTTTGGGTTAGAAAATAGCGGTAATGCGCGGGATGGTCCTCTGGACTTTGAA
+GAGAGTTACAAGGGCTATGGCGAGCACGAACTTGAGGAGAACGACTATTTGAACGACGAA
+ACATTTGGTGATAATGTTCAGGTTGGTACCGACTTTGATTTTGGAAATCCTCACAGCAGC
+GGCAGCAGCGGCAACGCAATTGGTGGTAATGGCGTCGGTGCCACGGCTAGATCATATGTT
+GCAGCTACTGCAGAAGGAATTAGCGGCCCTAGGACCGATGGAACGGCAGCAGCAGGACCT
+CTAGACCTGAAGCCAATGGAATCTTTGTGGTCTACTGCACCACCTCCAGCAATGGCGCCT
+TCACCCCAAAGTACAATGGCTCCGGCTCCTGCTCCGCAGCAAATGGCCCCCCTACAGCCA
+ATCTTGTCGATGCAAGACTTGGAAAGACAACAACGTCAAATGCAGCAACAGTTTATGAAT
+TTCCACGCCATGGGTCATCCACAGGGTCTCCCACAGGGTCCGCCTCAGCAGCAATTTCCA
+ATGCAGCCTGCGTCGGGTCAACCAGGTCCCTCACAATTTGCGCCTCCACCTCCACCTCCT
+GGCGTTAATGTGAATATGAATCAAATGCCAATGGGTCCTGTACAAGTTCCAGTTCAAGCT
+TCGCCTTCACCCATCGGTATGTCCAACACTCCTTCTCCAGGCCCTGTGGTTGGCGCAACT
+AAAATGCCTCTGCAAAGTGGACGCAGATCGAAGAGAGATTTGTCGCCTGAAGAGCAAAGA
+CGTTTGCAGATTCGTCATGCCAAAGTGGAGAAAATCTTGAAATACTCAGGTTTAATGACT
+CCTCGTGATAAGGACTTCATCACCAGATATCAGTTGTCTCAAATTGTCACTGAGGACCCT
+TACAATGAGGATTTCTACTTCCAGGTCTACAAGATTATCCAAAGAGGCGGTATCACGTCC
+GAATCCAACAAAGGTTTGATTGCTAGGGCGTATTTGGAACATTCTGGACACAGACTCGGT
+GGTCGCTATAAGAGAACCGATATTGCCCTACAGAGAATGCAAAGTCAAGTAGAAAAGGCT
+GTCACTGTGGCTAAGGAAAGACCTTCTAAGTTGAAGGATCAACAAGCGGCTGCTGGTAAC
+TCTAGCCAGGATAATAAGCAAGCAAACACGGTTCTGGGCAAAATCTCTTCCACTTTGAAC
+AGCAAGAATCCAAGAAGACAACTGCAGATCCCCAGACAACAGCCTTCTTCTGACCCCGAT
+GCGCTAAAAGACGTCACTGACTCTCTGACCAACGTGGACTTGGCCTCTTCAGGGTCCTCC
+TCTACGGGCTCTTCTGCCGCTGCTGTTGCTTCTAAGCAAAGAAGAAGATCTTCATACGCG
+TTCAACAACGGTAATGGTGCCACAAATTTGAACAAATCTGGGGGCAAAAAATTCATTCTT
+GAGTTAATTGAAACAGTTTATGAAGAGATTTTAGACTTGGAAGCTAACTTGAGGAATGGC
+CAGCAAACTGACAGCACTGCAATGTGGGAGGCCCTTCACATCGACGACAGTTCATATGAC
+GTAAACCCTTTCATTTCGATGCTATCATTTGATAAAGGTATCAAGATTATGCCTAGAATT
+TTTAATTTCTTGGATAAGCAGCAAAAATTGAAAATCCTGCAAAAAATCTTCAATGAATTA
+TCACACTTGCAAATCATCATATTGAGTTCCTACAAGACTACACCAAAACCAACTTTGACA
+CAATTGAAGAAAGTCGATCTGTTCCAAATGATCATATTAAAGATCATTGTCTCGTTTTTG
+TCTAATAACTCCAATTTTATCGAAATTATGGGTCTGTTGCTACAGTTAATCAGAAACAAC
+AACGTTTCGTTCTTGACCACCTCCAAAATTGGTCTAAATTTGATCACCATTTTGATTTCT
+CGTGCCGCATTAATCAAGCAAGATTCATCAAGATCTAATATTCTTTCCTCTCCTGAAATC
+TCCACATGGAATGAGATTTATGATAAATTATTCACTTCATTGGAAAGTAAGATTCAGCTG
+ATTTTCCCTCCAAGGGAATATAACGTCCACATCATGCGTTTACAAAATGACAAGTTTATG
+GATGAAGCATACTTTGGCCAGTTCCTAGCTAGTTTAGCACTAAGTGGAAAGCTAAACCAC
+CAGAGAATCATTATTGATGAAGTACGTGATGAAATCTTTGCCACTATTAACGAGGCGGAG
+ACCTTACAAAAGAAAGAGAAAGAATTGAGTGTATTACCTCAGAGGTCTCAAGAATTAGAC
+ACAGAGTTAAAATCTATTATTTATAATAAAGAGAAACTATACCAAGATTTGAATTTGTTC
+CTAAACGTTATGGGGTTGGTGTATCGCGATGGTGAAATATCAGAACTAAAGTAA
+>SRB8 4284 residues Pha 0 Code 0
+ATGAATAACGGTTCTGGTCGATACTTGCTGACTCCCCCAGATGATCTTCACCCCTATGTG
+CCAAGCTCGAAACCTCAGGAACAAGTATACCCTGATTTCAAGCCTTGGGAGCACACTGCA
+GCAGAAGATCAAATCCTAGCAAACTTTGTGGCTAAGGGCTTTTACCATACACCAATGGTA
+AATTTCGAGTCCATATCTGCGAGATCATCTGTTCATGAATCATTAGTCACTCAATCCAAC
+ATTCTTTCCCAGCAATTCGACAAAATTATCAAGATTAGAGAAGACCACATTAATAAGATC
+CCCTCAAATTCCACGACGACATTACACGGGCCTGGTTTTCAGTTGCCTAATAGAATAACC
+CTTACTGATCATAGAAAGGAAACGTGGTTGCATGAATTGAGTTCGTCTCACACTTCGCTG
+GTCAAAATTGGCAAGTTTATACCTCACGGCTTGAAAAGAAGGCAAGTCATCGAGCAGTGC
+TATTTAAAATTTATACCATTGAAAAGGGCGATTTGGTTGATAAAGTGCTGCTATTTTATC
+GAATGGAAATCGAACCACAAAAAGAAGAGGTCAAATGCTGCTGGGGCAGATGATGCCATT
+TCCATGCACCTGCTAAAGGACTGGACGGATACCTTTGTATACATCCTGGAAAAGCTCATC
+TTTGATATGACAAATCACTATAACGATTCTCAACAACTGCGTACGTGGAAGAGGCAGATT
+TCTTATTTTTTAAAACTTTTGGGGAATTGCTACTCACTAAGATTGATCAATAAGGAAATC
+TTTCATCATTGGCTTGTAGAGTTTATAAATAAGATGGAAAACTTCGAATTTTTGCCATTA
+TCTTTACATATTTTGATGATTTTTTGGAACGACATCTGCCAAATTGATACAAATGCTCCT
+GTTGCGGCTACAATAACATCAAGTCAAAAAGAGCCCTTCTTTCTGGTAACAAAAATCACT
+GATATGCTATTGCACAAATATTATATTGTTTCCAGCAGCAAATCAATGATAAATGACGAG
+AACTACATCATCAATGATATAAAGAAAAACAACAAGATAAAGTTGAATATTCTCAAAATA
+TTATCCAGTTTAATTTTGAAAATTTTTCAAGAACAATCTTTAGAGGTGTTTATATTTCCC
+ACATCTAACTGGGAAATTTACAAGCCCTTACTTTTTGAAATAGTCTCAAACGCCGACACT
+AATCAAAATTCTGATATGAAGAAAAAATTAGAGTTAATTAGTTACAGAAACGAGTCATTG
+AAGAATAATTCTTCTATACGAAACGTAATAATGTCTGCCAGCAACGCAAATGACTTTCAA
+TTAACTATCGTCACCTGTAAACAATTTCCAAAACTATCATGCATTCAATTAAATTGTATA
+GATACTCAGTTCACCAAGCTACTGGACGATAACCCTACAGAATTCGATTGGCCCACTTAC
+GTTGACCAAAATCCCCTTACAATGCATAAAATTATTCAATTAATTCTCTGGTCCATACAT
+CCATCAAGGCAATTTGATCACTATGAATCTAATCAACTGGTAGCGAAATTATTACTATTG
+CGAATAAATTCAACAGATGAGGATTTGCACGAATTCCAGATAGAAGATGCCATTTGGTCA
+TTGGTTTTCCAATTAGCCAAAAATTTTTCGGCCCAAAAGAGGGTGGTATCATATATGATG
+CCTTCTTTGTATCGCCTGCTTAATATACTAATTACTTATGGCATCATTAAGGTCCCTACG
+TATATCAGAAAGCTAATCAGTTCCGGCCTACTTTATCTCCAAGATTCCAATGATAAGTTT
+GTGCATGTCCAGCTGTTAATTAACTTGAAAATTTCACCGTTGATGAAAAGTCAATACAAT
+ATGGTATTGAGGAACGTTATGGAATATGACGTTAAATTTTATGAAATTTTTAATTTCGAC
+CAACTCGTGGAAATCACAGAACAAATCAAAATGCGAATACTCTCCAATGATATAACTAAT
+TTGCAACTGTCGAAAACTCCTCTGAGCATTAAAATCATGGTTGCAGAATGGTACTTATCA
+CATTTATGTTCCGGTATTTTATCTAGTGTTAACCGCACAGTGTTGCTAAAAATATTCAAG
+ATTTTTTGTATCGATCTGGAGGTTTTCCACCACTTTTTTAAGTGGATCGAGTTTATTGTC
+TACCATCAATTGCTAAGTGATATAGAATCTCTGGAGGCATTGATGGACATCTTGCTATGC
+TACCAAAAATTGTTCTCACAATTCATTAATGACCATATTCTTTTTACGAAGACGTTCATA
+TTCATTTACAAGAAAGTTTTGAAAGAAAAAGACGTGCCTGCTTATAATGTGACTTCATTT
+ATGCCATTCTGGAAATTTTTTATGAAAAACTTCCCTTTTGTTTTAAAGGTGGATAACGAT
+TTAAGGATTGAGTTACAATCTGTTTACAATGATGAGAAATTGAAAACTGAGAAGCTGAAG
+AATGATAAATCAGAAGTCTTGAAGGTGTATTCCATGATCAATAATTCAAACCAAGCTGTT
+GGACAGACTTGGAATTTTCCCGAGGTGTTTCAAGTAAACATCAGGTTTCTACTACACAAC
+TCCGAGATCATTGATACAAATACAAGCAAACAGTTCCAGAAAGCACGAAACAATGTCATG
+CTTTTGATTGCCACTAACTTGAAGGAGTACAATAAATTTATGTCCATTTTCTTGAAAAGG
+AAAGACTTTACTAACAAAAATTTAATTCAATTGATCTCTCTAAAACTTCTAACTTTTGAA
+GTGACGCAGAATGTGTTGGGGCTCGAGTATATTATTCGATTATTACCAATAAACTTGGAA
+AATAATGACGGCTCATATGGTCTGTTTTTGAAGTATCATAAAGAACAATTCATAAAGTCA
+AATTTTGAGAAAATTTTACTTACATGTTATGAATTAGAAAAAAAATATCATGGCAACGAA
+TGTGAAATAAATTATTATGAGATCCTATTGAAAATTTTAATAACTTATGGGTCATCTCCC
+AAATTACTTGCAACATCTACAAAAATCATTATGTTGTTATTGAATGATAGCGTGGAAAAC
+TCATCTAATATTTTGGAGGATATTTTGTACTACTCAACTTGTCCGTCGGAAACCGATCTT
+AACGATATTCCATTGGGTAGTGGACAACCAGACAATGACACTGTTGTAACCAACGATGAT
+AAAAGTGACGATGATGATCACACAGTCGACGAAATTGATCATGTAGAATATTACGTTATG
+ATGGACTTTGCCAATCTTTGGGTTTTCCAAGCGTTTACCTGTTTCTGCATCAAAAAAATC
+ATGGAGAATAATGAGCCAGCAATGGCAATGGAAGACTTGAAGAACTTCATATTCCAAATT
+ATCGAAATAACTAATTCTAATGATTTATGTTCACAAATATTTGACCAACTGAAGGATATG
+CAGACCATTGAGATGATAACCCAAATAGTGGAGAAAGATTTCTGCACTTCTTGTTTGCAA
+AACAACAACCAAAAGATAGATGATAATTACATCGTTGTGGTGATCGAGATTATAACGTCA
+TTATCGATGAGGTTTCAAAGAGAAACTTCTGGTATGATAGTTATTTCCATGGAGAACTAT
+CATTTACTAATAAAGATCATAAGACAATTAAGTGAACTGAACGAAGGAAATTTATCTAAG
+AGAGAAATCCAAATAGATGCCGTCTTGAAAATTTTTAGCTTTCATCAGGATTCCATTTTC
+CAACGCATCATCGCTGATTTATCAGCTGATAAACCCACAAGTCCATTCATTGATAGCATA
+TGCAAGCTGTTTGATAAAATATCATTTAATTTAAGATTGAAGCTGTTCTTGTACGAAATT
+TTGTCTTCATTGAAATCATTCGCCATCTATTCATCCACAATTGATGCCCCAGCATTCCAC
+ACAAGCGGTAAGGTCGAACTACCGAAGAAATTGCTGAACTTACCACCATTCCAAGTGTCC
+TCTTTCGTTAAGGAAACAAAACTTCATAGTGGCGACTACGGGGAAGAAGAAGATGCAGAC
+CAAGAAGAATCGTTTAGTTTAAATTTAGGAATCGGCATAGTTGAAATAGCGCACGAAAAC
+GAACAGAAATGGCTCATTTATGACAAGAAAGATCATAAATATGTCTGCACATTTTCCATG
+GAGCCGTACCACTTCATCTCCAACTATAATACCAAGTACACAGATGACATGGCTACAGGC
+AGTAATGATACGACTGCGTTTAACGATTCCTGTGTAAACCTGAGTCTTTTTGATGCTCGG
+TTTGAGAGGAAAAATCCACATTGA
+>YCX3 384 residues Pha 0 Code 0
+ATGTTGTTCTATAAGCCTGTGATGAGGATGGCGGTGAGACCGCTAAAAAGCATAAGATTC
+CAGTCCTCATACACCAGTATTACTAAATTGACGAACCTAACAGAATTTAGGAATTTGATC
+AAGCAAAATGATAAACTAGTCATCGATTTTTATGCTACTTGGTGTGGCCCCTGTAAGATG
+ATGCAACCACACTTAACGAAATTAATTCAGGCTTATCCAGATGTAAGATTTGTCAAGTGC
+GACGTGGACGAATCACCAGATATTGCCAAAGAGTGTGAAGTGACGGCTATGCCCACCTTT
+GTTCTTGGCAAGGATGGCCAACTCATCGGCAAGATCATTGGAGCTAACCCTACTGCTTTA
+GAGAAGGGAATCAAAGATCTATAA
+>TUP1 2142 residues Pha 0 Code 0
+ATGACTGCCAGCGTTTCGAATACGCAGAATAAGCTGAATGAGCTTCTCGATGCCATCAGA
+CAGGAGTTTCTCCAAGTCTCACAAGAGGCAAATACCTACCGTCTTCAAAACCAAAAGGAT
+TACGATTTCAAAATGAACCAGCAGCTGGCTGAGATGCAGCAGATAAGAAACACCGTCTAC
+GAACTGGAACTAACTCACAGGAAAATGAAGGACGCGTACGAAGAAGAGATCAAGCACTTG
+AAACTAGGGCTGGAGCAAAGAGACCATCAAATTGCATCTTTGACCGTCCAGCAACAGCGG
+CAACAGCAACAGCAGCAACAGGTCCAGCAGCATTTACAACAGCAACAGCAGCAGCTAGCC
+GCTGCATCTGCATCTGTTCCAGTTGCGCAACAACCACCGGCTACTACTTCGGCCACCGCC
+ACTCCAGCAGCAAACACAACTACTGGTTCGCCATCGGCCTTCCCAGTACAAGCTAGCCGT
+CCTAATCTGGTTGGCTCACAGTTGCCTACCACCACTTTGCCTGTGGTGTCCTCAAACGCC
+CAACAACAACTACCACAACAGCAACTGCAACAGCAGCAACTTCAACAACAGCAACCACCT
+CCCCAGGTTTCCGTGGCACCATTGAGTAACACAGCCATCAACGGATCTCCTACTTCTAAA
+GAGACCACTACTTTACCCTCTGTCAAGGCACCTGAATCTACGTTGAAAGAAACTGAACCG
+GAAAATAATAATACCTCGAAGATAAATGACACCGGATCCGCCACCACGGCCACCACTACC
+ACCGCAACTGAAACTGAAATCAAACCTAAGGAGGAAGACGCCACCCCGGCTAGTTTGCAC
+CAGGATCACTACTTAGTCCCTTATAATCAAAGAGCAAACCACTCTAAACCTATCCCACCT
+TTCCTTTTGGATCTAGATTCCCAGTCTGTTCCCGATGCTCTGAAGAAGCAAACAAATGAT
+TATTATATTTTATACAACCCGGCACTACCAAGAGAAATTGACGTTGAGTTACACAAATCT
+TTGGATCATACTTCAGTTGTTTGTTGCGTGAAGTTCAGTAACGATGGTGAATACTTAGCC
+ACAGGCTGCAACAAAACTACTCAAGTGTATCGCGTTTCAGATGGTTCTCTGGTGGCCCGT
+CTATCTGACGATTCTGCTGCCAATAACCATCGAAATTCGATCACTGAAAATAACACCACC
+ACGTCCACGGATAACAATACAATGACAACCACTACTACCACCACAATTACTACCACAGCG
+ATGACTTCGGCAGCAGAATTGGCAAAAGATGTGGAAAACCTGAACACTTCGTCTTCCCCA
+TCATCCGACTTGTATATCCGTTCAGTGTGTTTTTCTCCAGATGGGAAATTTTTGGCAACA
+GGTGCTGAAGACAGACTGATTAGAATTTGGGATATTGAAAATAGAAAGATTGTTATGATT
+CTTCAAGGCCACGAACAAGATATTTATTCATTGGACTACTTTCCCTCAGGTGACAAATTA
+GTCTCCGGTTCTGGTGACCGTACCGTTCGTATTTGGGACTTACGTACAGGCCAGTGTTCA
+TTGACTTTATCCATTGAAGATGGTGTTACCACCGTCGCTGTATCACCAGGTGATGGTAAA
+TACATCGCTGCTGGTTCTCTAGATCGTGCTGTGAGAGTTTGGGATTCCGAGACCGGATTC
+TTGGTGGAAAGACTAGATTCGGAAAACGAATCCGGTACAGGCCACAAGGACTCTGTTTAT
+AGCGTTGTCTTCACTAGAGATGGACAAAGCGTTGTATCCGGCTCATTAGATAGATCTGTT
+AAGCTCTGGAATTTGCAGAATGCAAACAACAAGAGCGATTCGAAAACTCCAAATTCCGGC
+ACTTGTGAAGTTACGTATATCGGGCATAAAGACTTTGTATTGTCCGTGGCCACCACACAA
+AATGATGAGTACATCTTGTCCGGTTCCAAAGATCGTGGTGTCCTGTTTTGGGATAAGAAA
+TCCGGCAATCCGTTATTGATGTTGCAAGGTCATAGGAATTCAGTTATATCTGTGGCTGTG
+GCAAACGGGTCTCCGCTGGGTCCAGAATATAACGTTTTTGCTACTGGTAGCGGTGATTGT
+AAAGCAAGGATTTGGAAGTATAAAAAAATAGCGCCAAATTAA
+>YC16 462 residues Pha 0 Code 0
+ATGGTTACGTTCAACTGTGAGGTGTGTAATGATACTGTGCCCAAGAAGAATACCGAAAAG
+CATTATTATAGATGTCCTAACGCGTACTATACATGCATAGATTGCTCCAAGACGTTTGAA
+GATGGCGTGAGTTACAAGAATCACACGTCTTGCATCAGCGAGGACGAGAAGTACCAGAAA
+GCGTTGTACAAGGGCAACAAGAAGCAGAAGCAGAAGCAGCAGCAGAAGCAGCAGCAGAAG
+CAGCACCAGCACCAGCCAGTGGCAACTCCTGCAAAGAAAGTGGAGAAGCCTGTGATCAAG
+AAGGCAGAGAAAGTGGAAAAGACCTCGAACGGTATCGAGCTTCACAAGGGCAAGTCGTTG
+TACAAAATTTTGAAAACCATGAAGGATAAAGGGGCAAAAAAGACCTTCTTGAAAAGTCTG
+GTTGTGGATTCTGAGGGGCAAATCAGGTATGCAAAGGAATAA
+>ABP1 1779 residues Pha 0 Code 0
+ATGGCTTTGGAACCTATTGATTATACTACTCACTCGAGAGAGATCGACGCAGAGTACCTG
+AAGATTGTCAGAGGCTCCGATCCTGACACCACCTGGTTGATTATTTCACCCAATGCGAAA
+AAAGAATACGAACCTGAGTCTACCGGTTCCTCCTTTCACGATTTCTTGCAATTGTTTGAT
+GAAACCAAGGTCCAGTACGGACTGGCACGTGTGTCCCCACCAGGGTCAGACGTTGAGAAG
+ATTATTATCATTGGTTGGTGTCCTGATTCTGCGCCATTGAAGACAAGGGCCTCTTTCGCC
+GCCAATTTTGCTGCAGTTGCTAATAATCTGTTCAAGGGTTACCACGTTCAAGTTACCGCC
+AGAGACGAGGACGATCTTGACGAAAATGAACTGTTGATGAAAATCAGTAACGCGGCCGGT
+GCCCGTTATTCTATTCAGACTTCCTCCAAGCAACAGGGGAAGGCTTCCACTCCTCCCGTG
+AAGAAATCCTTCACACCTTCCAAGAGCCCTGCTCCAGTTTCTAAGAAGGAACCAGTCAAG
+ACTCCTTCCCCAGCACCTGCTGCTAAGATTTCTTCCCGTGTTAACGACAACAATGACGAC
+GACGATTGGAATGAGCCTGAATTAAAGGAACGCGACTTCGATCAGGCTCCCCTGAAACCA
+AATCAATCATCTTACAAACCAATTGGCAAAATCGACTTGCAAAAAGTGATTGCTGAAGAA
+AAGGCTAAGGAGGACCCACGTCTTGTTCAAAAGCCAACCGCTGCTGGTTCCAAGATTGAT
+CCTAGTTCTGATATCGCTAATTTAAAGAACGAATCAAAATTAAAGAGGGACTCCGAGTTT
+AACTCCTTTTTGGGCACCACTAAACCCCCCTCCATGACGGAATCTTCATTAAAGAATGAT
+GATGATAAAGTCATTAAGGGTTTTAGAAACGAGAAATCACCTGCTCAATTATGGGCCGAA
+AGAAAGGCAAAGCAAAACAGCGGCAACGCCGAAACTAAGGCTGAGGCACCAAAACCTGAA
+GTTCCAGAAGATGAGCCTGAAGGTGAACCTGACGTCAAAGATTTGAAATCAAAATTTGAA
+GGATTGGCCGCTTCAGAAAAAGAGGAGGAAGAAATGGAAAACAAATTTGCTCCTCCTCCA
+AAGAAATCAGAACCAACTATTATCTCACCAAAACCCTTCTCCAAGCCACAAGAACCTGTG
+AAAGCTGAAGAAGCCGAGCAGCCTAAGACTGATTACAAGAAGATCGGCAACCCATTACCC
+GGTATGCACATTGAAGCGGATAATGAGGAAGAACCAGAAGAGAATGATGATGACTGGGAT
+GATGATGAAGACGAGGCTGCTCAACCTCCTTTGCCTTCGAGGAATGTTGCGTCAGGAGCA
+CCAGTGCAAAAAGAAGAGCCTGAACAAGAAGAGATCGCCCCAAGCTTACCTTCTAGAAAC
+TCGATCCCAGCTCCAAAACAAGAAGAAGCACCTGAACAAGCACCTGAAGAAGAAATTGAA
+GAAGAAGCTGAGGAAGCCGCTCCACAGCTGCCATCAAGAAGCTCTGCAGCTCCTCCTCCG
+CCTCCAAGACGAGCAACTCCAGAGAAAAAGCCAAAGGAAAATCCTTGGGCCACAGCAGAA
+TATGATTACGATGCTGCAGAAGATAACGAACTGACCTTTGTGGAAAATGACAAGATTATC
+AATATTGAATTTGTCGACGATGACTGGTGGCTAGGGGAACTAGAGAAAGACGGCTCAAAA
+GGTCTCTTCCCCAGCAATTATGTGTCTTTGGGCAACTAG
+>KIN82 2181 residues Pha 0 Code 0
+ATGACTCAGCAAGAATACCGTTCCCCCTCACAACGCTTATCCAAGGGGAGGAGCATGTCG
+CTACCCAAAATATTTGCTCGTAATTTGAGATCTCTGCAAAACAATGCACCTCCTGGCAAA
+AACATCAATGTCAATTGTTTGAACGTCAATTCTTGTTCGTTGTCCGCAAGCCCAAGCTCA
+CAAATTAATATGGCTTGTAATGGAAACAAGCAAGATCTTCCCATACCGTTTCCCCTGCAT
+GTAGAATGCAACGATAGCTGGTCAAGCTCCAAACTTAACAAGTTCAAATCAATGTTTAAT
+CATAACAGATCAAAGAGCAGTGGTACTACAGATGCGTCAACTTCAGAAAAAGGTACGCAT
+AAGCGTGAACCCCGGTCGACGATACATACAGAGCTGTTACAAAGTTCCATTATCGGTGAG
+CCAAATGTCCATAGTACTACAAGTAGCACACTTATACCCAATGAGGCGATATGCTCCACA
+CCTAATGAGATCTCAGGTAGCTCTTCTCCGGACGCGGAGTTATTTACCTTTGACATGCCC
+ACAGACCCGTCATCCTTCCACACTCCTAGCTCCCCAAGTTATATAGCAAAGGACAGTAGA
+AACCTGAGTAATGGATCTTTGAATGATATTAACGAAAATGAAGAGCTCCAAAATTTCCAT
+AGAAAAATCAGCGAAAATGGCAGTGCCTCCCCCCTGGCTAACTTGTCATTATCCAATTCA
+CCAATTGATTCCCCAAGGAAAAATAGCGAAACCAGAAAGGATCAAATACCTATGAACATA
+ACACCACGTTTAAGGAGGGCCGCTTCCGAACCGTTCAATACGGCAAAGGATGGGTTAATG
+CGGGAAGATTACATTGCCTTGAAACAACCTCCAAGCTTGGGAGATATTGTAGAACCGAGG
+AGATCTCGTCGTTTAAGAACCAAGTCATTCGGTAACAAGTTCCAAGACATTACTGTCGAA
+CCTCAATCCTTCGAAAAAATTAGACTACTTGGCCAAGGTGACGTAGGTAAAGTGTATTTA
+GTGAGGGAACGCGATACCAACCAGATATTCGCCCTGAAAGTTTTGAATAAACATGAGATG
+ATCAAGAGGAAGAAAATTAAACGAGTACTCACTGAACAGGAAATTCTCGCGACAAGTGAT
+CATCCATTTATTGTGACACTGTATCATTCCTTTCAAACCAAAGACTATTTGTATCTCTGT
+ATGGAATACTGCATGGGAGGGGAATTCTTTAGAGCCTTACAAACAAGAAAAAGTAAATGC
+ATTGCAGAAGAAGATGCGAAGTTTTACGCCAGTGAAGTAGTAGCAGCTTTGGAATATTTA
+CACCTACTGGGCTTCATATACAGAGATTTGAAACCCGAAAACATATTACTGCATCAATCT
+GGTCATGTCATGCTTTCTGACTTTGATTTATCCATCCAAGCAACGGGATCAAAAAAACCC
+ACCATGAAAGACTCTACGTATTTAGATACAAAAATTTGTTCAGATGGATTCAGAACTAAT
+TCCTTTGTTGGTACTGAAGAGTATTTAGCTCCAGAAGTAATCAGAGGGAATGGCCACACT
+GCAGCAGTAGACTGGTGGACTTTAGGAATATTGATTTACGAGATGCTATTTGGCTGTACT
+CCATTTAAAGGAGATAATTCAAATGAAACATTCTCTAACATTTTAACCAAGGACGTCAAA
+TTTCCACATGATAAGGAAGTTTCGAAGAATTGTAAAGACCTGATAAAGAAACTACTAAAC
+AAAAACGAGGCAAAAAGGCTTGGTTCCAAATCAGGAGCTGCAGACATAAAGAGACATCCC
+TTCTTCAAAAAAGTTCAGTGGTCGTTCTTAAGAAACCAAGACCCCCCTCTAATACCTGCA
+TTAAATGATAACGGCTGCGAACTTCCTTTTATATTGTCTTGCAATAAACACCCGAAAAGG
+AACTCAGTGAGTGAACAGGAAACCAAAATGTTCTGTGAGAAAGTTGCAAACGATGATGAA
+ATTGATGAGGCTGATCCATTCCATGATTTTAATTCTATGAGTTTAACGAAGAAAGATCAC
+AATATCTTAACCTACTCTGAAAATTATACTACGGAAAAATTCTATACAAAGCAACTTGTA
+CAAGGCCAAGGCATAACAGCTCACATAGAAGTTTCTTTAAAGACATCATACCTGAACTAT
+AACATGTTTACAGAAAGATAA
+>MSH3 3144 residues Pha 0 Code 0
+ATGGTGATAGGTAATGAACCTAAACTGGTACTTTTGAGAGCCAAAAGCAGTGCAAATAGA
+TTTATTTTGTTGAATCTATTAACAATAATGGCGGGACAACCCACAATAAGCAGGTTTTTC
+AAGAAGGCGGTAAAATCAGAGCTGACGCATAAGCAAGAACAAGAAGTTGCGGTTGGAAAT
+GGCGCTGGTAGCGAATCCATCTGCCTTGACACTGATGAAGAGGACAATTTATCTTCTGTT
+GCAAGCACAACAGTAACTAATGATAGCTTTCCACTCAAAGGCAGTGTTTCTTCCAAGAAT
+TCGAAAAATTCAGAAAAGACTAGTGGTACTTCGACAACATTTAATGATATTGACTTTGCT
+AAGAAATTGGATAGGATTATGAAAAGACGAAGTGATGAAAATGTTGAGGCTGAAGATGAT
+GAGGAAGAGGGTGAGGAAGATTTCGTAAAAAAAAAAGCCAGAAAGTCCCCTACAGCGAAA
+CTTACTCCCTTGGACAAACAGGTGAAGGACCTGAAAATGCATCATAGAGATAAAGTGCTT
+GTTATTAGAGTAGGCTACAAGTACAAATGTTTTGCAGAGGATGCAGTAACGGTTAGCAGA
+ATACTTCACATCAAACTTGTGCCTGGAAAATTGACTATCGATGAGTCTAATCCTCAAGAT
+TGCAATCATAGGCAGTTTGCGTACTGTTCTTTCCCGGATGTCAGATTAAACGTTCACCTA
+GAGAGACTTGTGCATCATAATTTAAAGGTTGCCGTGGTAGAGCAAGCAGAAACAAGCGCT
+ATTAAGAAGCATGATCCAGGTGCCAGCAAATCAAGCGTTTTTGAAAGAAAGATTTCAAAT
+GTCTTTACCAAAGCTACATTTGGTGTTAATTCCACCTTTGTCCTTAGGGGGAAACGTATT
+CTCGGTGATACAAACAGTATATGGGCTTTGTCCCGTGACGTACATCAGGGAAAGGTGGCT
+AAATATTCCTTAATTTCTGTCAATTTAAATAACGGGGAAGTCGTGTATGATGAATTTGAA
+GAGCCTAATCTTGCTGATGAGAAACTACAGATACGAATCAAATATTTACAGCCCATAGAA
+GTACTGGTAAATACAGATGATCTTCCATTACATGTAGCGAAATTTTTCAAAGATATTTCA
+TGTCCTTTAATACACAAGCAGGAGTATGATTTGGAAGATCATGTAGTTCAGGCAATAAAA
+GTAATGAATGAGAAAATTCAACTCTCGCCGTCTCTCATACGCTTAGTTTCTAAGTTATAT
+TCGCATATGGTTGAGTACAATAATGAGCAGGTGATGTTGATTCCTTCTATCTATTCGCCC
+TTCGCATCAAAAATACATATGTTACTTGATCCTAACTCCCTGCAAAGTTTGGACATTTTT
+ACCCATGATGGTGGTAAAGGTTCTTTGTTTTGGTTATTGGACCATACAAGGACATCGTTT
+GGATTAAGAATGTTGAGAGAATGGATTCTCAAACCTTTGGTTGATGTACACCAAATTGAA
+GAGCGGCTTGATGCCATTGAGTGCATTACATCCGAAATCAACAACAGTATATTTTTTGAA
+TCGTTGAATCAAATGTTGAATCATACCCCTGACTTATTAAGAACTTTAAATCGCATAATG
+TATGGTACAACTTCTAGAAAAGAAGTCTATTTCTATTTAAAGCAAATAACTTCTTTCGTT
+GATCACTTCAAGATGCATCAATCTTACCTGTCAGAACATTTCAAGTCATCAGATGGAAGG
+ATAGGCAAACAATCTCCTTTACTTTTTAGACTATTTAGTGAATTGAATGAACTACTTTCT
+ACCACTCAGTTGCCTCATTTTTTGACCATGATCAACGTTTCTGCGGTAATGGAAAAAAAT
+TCAGATAAGCAAGTAATGGATTTTTTTAATTTAAATAACTATGATTGTTCAGAGGGTATA
+ATAAAAATTCAAAGGGAAAGCGAATCAGTACGGTCACAGTTAAAGGAAGAATTGGCAGAA
+ATACGAAAATATCTCAAACGTCCATATCTAAATTTTAGAGATGAAGTTGATTACTTAATC
+GAAGTGAAAAACTCGCAAATTAAGGACTTGCCAGATGATTGGATAAAAGTTAACAATACG
+AAGATGGTCAGTAGATTTACCACTCCCAGAACCCAGAAACTGACTCAAAAGCTAGAATAT
+TACAAGGACTTATTAATTCGGGAATCTGAACTACAGTATAAAGAATTCTTGAACAAAATT
+ACGGCAGAATATACAGAGCTCCGTAAAATTACACTCAATTTGGCGCAGTATGACTGTATT
+TTGTCGTTAGCAGCCACATCATGCAACGTAAATTATGTTAGACCAACTTTTGTGAATGGT
+CAACAAGCCATAATCGCAAAAAATGCAAGAAATCCAATTATCGAGTCGCTGGATGTTCAT
+TATGTACCAAATGATATCATGATGTCCCCAGAAAACGGTAAAATCAATATTATAACGGGG
+CCGAATATGGGTGGGAAATCATCTTATATTAGACAAGTGGCACTGCTTACTATAATGGCA
+CAGATCGGCTCATTTGTCCCCGCAGAAGAGATCAGATTAAGCATATTTGAAAACGTACTC
+ACTCGAATCGGTGCGCACGATGATATTATAAACGGTGATTCTACTTTTAAAGTGGAAATG
+CTTGATATCCTACACATCTTGAAAAATTGCAATAAACGGTCTTTACTATTATTAGACGAA
+GTGGGAAGAGGTACTGGCACGCACGATGGTATAGCAATTTCTTATGCTTTAATAAAGTAT
+TTTTCTGAGTTAAGTGACTGCCCCTTGATATTATTTACTACCCATTTTCCCATGCTGGGA
+GAAATCAAATCTCCGTTAATAAGGAATTATCATATGGATTACGTGGAAGAACAAAAAACT
+GGCGAGGACTGGATGAGTGTAATTTTTCTATATAAGTTAAAAAAGGGATTGACTTATAAT
+AGTTATGGGATGAATGTGGCGAAATTGGCACGCCTGGACAAAGATATTATAAATCGGGCA
+TTCAGTATTTCAGAAGAATTGCGGAAGGAATCCATTAACGAAGACGCGTTGAAATTATTC
+AGCTCTTTGAAAAGAATATTAAAAAGTGATAATATAACAGCAACGGATAAACTCGCGAAA
+TTACTATCATTGGATATCCACTGA
+>CDC39 6327 residues Pha 0 Code 0
+ATGCTATCGGCCACATACCGTGATTTGAACACAGCATCTAATTTAGAAACATCAAAGGAA
+AAACAGGCCGCTCAAATCGTCATTGCACAAATTAGTTTATTATTCACGACTCTTAACAAC
+GACAATTTTGAATCCGTGGAAAGAGAAATTAGACATATTTTAGACAGGTCGTCCGTAGAT
+ATTTACATAAAAGTTTGGGAACGATTATTAACCTTAAGTTCTCGGGATATTTTACAAGCG
+GGAAAATTTTTACTTCAAGAAAATCTACTACACAGACTACTATTAGAATTTGCGAAGGAT
+TTACCGAAGAAAAGCACAGACCTTATTGAGCTTTTGAAAGAACGAACCTTCAATAACCAG
+GAGTTTCAAAAACAAACAGGAATTACATTATCACTTTTCATTGATCTATTTGATAAATCT
+GCAAACAAGGACATTATAGAGTCACTTGACCGCTCCTCTCAGATTAACGATTTCAAGACA
+ATTAAGATGAATCATACAAATTATTTAAGGAATTTTTTTCTTCAAACCACACCAGAAACA
+CTAGAGTCCAATCTACGCGACTTATTGCATTCCTTGGAAGGTGAAAGTCTAAATGACTTA
+TTAGCTCTTTTACTGTCCGAAATACTTTCACCTGGGTCTCAGAATTTACAAAATGATCCC
+ACACGGAGTTGGTTGACACCTCCGATGGTTTTAGACGCAACGAACCGTGGGAACGTTATA
+GCAAGATCTATAAGTTCTCTGCAAGCCAACCAGATAAATTGGAATCGTGTGTTTAATTTA
+ATGTCAACAAAGTATTTCTTGAGCGCACCATTGATGCCTACTACAGCATCTTTGAGTTGC
+TTATTTGCAGCATTGCACGATGGTCCAGTTATTGATGAATTTTTCAGTTGCGACTGGAAA
+GTTATTTTCAAACTAGATTTGGCCATTCAACTTCATAAGTGGTCGGTACAGAATGGTTGC
+TTTGACTTATTAAATGCAGAAGGTACCAGGAAAGTTTCTGAAACCATCCCAAACACAAAG
+CAATCTTTACTCTACTTATTATCCATTGCATCATTGAATTTAGAATTGTTCCTACAAAGG
+GAGGAATTGTCTGATGGTCCTATGCTAGCTTATTTTCAAGAGTGCTTCTTTGAAGATTTC
+AACTACGCCCCTGAATATCTTATTTTAGCATTAGTCAAAGAAATGAAGCGGTTCGTTTTA
+TTGATAGAAAACAGGACAGTCATAGACGAAATACTTATTACCTTATTGATTCAAGTGCAT
+AATAAATCACCGTCATCGTTCAAGGACGTTATTTCTACAATAACCGATGATTCTAAAATC
+GTAGATGCAGCAAAAATCATAATCAACTCGGATGACGCACCTATTGCCAACTTTTTAAAA
+TCGTTGTTAGATACGGGAAGATTAGATACGGTCATTAATAAACTTCCTTTCAATGAAGCT
+TTTAAAATTTTGCCATGCGCAAGACAAATTGGTTGGGAGGGGTTCGATACTTTCTTAAAA
+ACAAAAGTTTCTCCATCTAATGTCGATGTAGTGCTGGAATCACTAGAGGTTCAAACGAAA
+ATGACTGATACAAACACTCCATTTAGGTCATTAAAGACATTTGACTTATTCGCTTTTCAT
+TCATTAATTGAAGTACTGAACAAATGCCCACTAGATGTTCTCCAATTACAAAGGTTTGAA
+TCCTTGGAATTTTCCTTATTAATTGCATTTCCTAGATTGATCAATTTTGGTTTTGGACAC
+GATGAAGCTATTTTAGCCAATGGTGACATCGCAGGGATTAATAATGATATTGAAAAGGAG
+ATGCAGAACTATTTACAGAAAATGTATAGTGGTGAGTTAGCCATTAAAGATGTAATCGAA
+CTTCTGAGAAGGTTAAGAGATAGCGACTTGCCAAGGGACCAGGAAGTCTTCACATGTATT
+ACCCATGCCGTTATAGCAGAATCGACATTCTTCCAAGATTATCCATTGGATGCATTGGCT
+ACTACATCTGTTCTTTTTGGATCCATGATTCTCTTTCAACTGTTACGTGGATTCGTATTA
+GACGTCGCATTTAGGATAATCATGAGGTTTGCCAAGGAGCCTCCAGAGTCCAAGATGTTT
+AAGTTTGCTGTACAAGCTATTTATGCATTTAGGATACGTTTGGCCGAATATCCACAGTAT
+TGTAAGGACCTCTTGAGAGATGTTCCGGCTTTGAAGTCTCAGGCTCAAGTTTACCAATCT
+ATCGTCGAAGCTGCTACCCTAGCAAATGCTCCAAAGGAAAGGTCAAGACCCGTCCAGGAA
+ATGATCCCATTAAAATTTTTTGCTGTAGATGAAGTTTCATGTCAGATCAATCAAGAAGGT
+GCTCCTAAAGATGTCGTAGAAAAAGTTCTTTTTGTTCTCAACAACGTTACTCTGGCTAAC
+TTGAATAATAAGGTTGATGAATTGAAAAAAAGTTTGACACCAAATTATTTTTCTTGGTTT
+TCCACATATTTAGTTACGCAAAGGGCTAAAACAGAACCTAACTATCATGATCTTTATAGC
+AAGGTTATAGTTGCTATGGGGTCAGGGTTGCTACATCAGTTCATGGTCAACGTTACTTTG
+AGACAATTATTTGTCCTACTATCTACAAAAGACGAGCAAGCCATCGATAAAAAGCACCTA
+AAGAATTTGGCTTCATGGTTAGGATGTATCACATTAGCTTTGAATAAACCAATTAAACAC
+AAGAATATCGCATTCAGGGAAATGTTAATCGAAGCTTATAAGGAAAATAGACTTGAAATA
+GTTGTGCCTTTTGTAACAAAGATTTTACAAAGGGCTTCTGAATCAAAAATTTTCAAGCCT
+CCAAATCCCTGGACTGTTGGCATATTAAAGCTGTTGATTGAGTTGAACGAAAAAGCAAAC
+TGGAAATTAAGTTTGACTTTCGAAGTTGAGGTTTTATTAAAATCTTTTAATTTGACCACC
+AAATCTCTCAAGCCCTCGAATTTCATCAATACTCCGGAAGTTATAGAAACTTTATCCGGT
+GCTTTGGGATCAATCACTCTGGAGCAACAACAAACAGAGCAACAAAGGCAAATTATACTA
+ATGCAACAACACCAGCAACAGATGCTAATATATCAACAGAGACAACAACAACAACAACAA
+AGGCAACAACAACAACAACATCATATTAGTGCAAATACAATCGCAGACCAACAAGCGGCA
+TTTGGCGGCGAGGGTTCAATTTCACACGACAATCCTTTTAACAACTTACTTGGTTCTACT
+ATTTTTGTAACCCACCCTGACTTGAAGAGGGTATTTCAAATGGCTTTAGCCAAGTCAGTT
+CGCGAAATTTTGTTGGAAGTAGTCGAAAAGTCATCAGGAATTGCTGTTGTTACGACGACA
+AAAATAATACTTAAAGACTTTGCCACTGAAGTTGATGAGTCTAAGTTGAAGACGGCTGCA
+ATCATTATGGTAAGGCATTTGGCACAAAGTTTAGCTCGAGCTACTTCAATTGAACCATTG
+AAAGAAGGCATACGTTCTACTATGCAATCACTAGCACCGAATTTAATGTCTCTTTCTTCT
+TCACCTGCAGAGGAGCTTGACACGGCAATAAATGAAAATATTGGCATTGCTCTAGTTTTG
+ATTGAGAAAGCATCTATGGACAAGTCTACTCAAGATTTAGCAGACCAATTGATGCAAGCG
+ATTGCTATTCGTCGTTATCACAAGGAAAGAAGGGCAGACCAACCATTTATTACGCAAAAT
+ACCAATCCATATTCACTGTCTTTACCAGAACCTCTTGGTTTGAAAAACACTGGTGTTACT
+CCTCAACAATTCAGGGTATACGAAGAATTTGGTAAGAATATTCCAAACTTGGATGTTATT
+CCGTTTGCAGGATTGCCCGCTCACGCTCCACCGATGACTCAAAATGTGGGTTCAACTCAG
+CCTCAGCAACAACAAGCGCAAATGCCTACCCAAATCCTAACCTCCGAACAAATAAGAGCT
+CAACAACAACAGCAGCAATTACAGAAAAGCCGTTTGAATCAGCCATCCCAGTCGGCTCAA
+CCTCCAGGAGTGAATGTCCCAAATCCTCAAGGTGGGATTGCTGCAGTTCAATCAGATTTG
+GAACAGAATCAACGTGTTCTCGTTCACCTCATGGACATTTTAGTTTCTCAAATTAAAGAA
+AATGCTACGAAGAATAACTTAGCTGAATTAGGCGATCAAAACCAAATTAAAACCATCATT
+TTTCAAATTTTGACATTCATTGCAAAAAGCGCACAAAAGGATCAATTAGCTTTAAAGGTA
+TCCCAAGCTGTCGTTAATAGCCTTTTTGCCACTAGTGAGAGTCCTCTCTGCAGAGAAGTT
+TTGTCCCTACTTTTGGAAAAGTTATGTTCTTTATCCCTCGTTGCTAGAAAAGACGTTGTC
+TGGTGGTTAGTTTATGCCTTGGACAGTAGGAAATTCAATGTTCCCGTTATCAGATCCCTT
+CTAGAAGTTAATTTAATTGATGCTACAGAATTAGATAACGTTTTAGTTACTGCAATGAAA
+AATAAAATGGAGAACTCAACTGAATTTGCTATGAAATTAATTCAGAATACTGTCTTGTCT
+GATGATCCAATTTTGATGAGAATGGACTTCATTAAAACCTTAGAACACTTGGCCTCTTCG
+GAAGATGAAAATGTAAAGAAATTCATCAAAGAGTTCGAAGATACTAAGATAATGCCAGTG
+AGGAAAGGTACCAAAACCACAAGAACAGAAAAGCTTTACTTAGTATTTACGGAATGGGTA
+AAATTACTTCAAAGAGTTGAGAATAACGACGTAATCACAACTGTTTTTATCAAGCAATTA
+GTCGAAAAGGGTGTTATCAGCGATACTGATAATTTACTTACATTTGTCAAAAGTTCTCTT
+GAGCTATCAGTTTCTTCATTCAAAGAAAGTGACCCGACTGATGAGGTTTTCATCGCTATT
+GATGCTCTAGGATCGCTAATTATAAAATTGTTGATTTTACAGGGTTTCAAAGATGATACA
+AGAAGAGATTACATAAATGCAATATTTTCTGTGATCGTTTTAGTGTTTGCTAAGGATCAT
+AGCCAAGAGGGTACCACATTCAATGAACGACCATATTTCAGACTATTTTCTAACATCTTA
+TACGAATGGGCTACCATCAGGACGCACAATTTTGTTAGAATATCTGATTCCAGCACTAGG
+CAGGAGCTGATCGAATTTGATTCTGTATTTTACAACACTTTCTCAGGATATTTGCACGCT
+CTGCAACCATTTGCCTTCCCTGGATTCTCATTTGCATGGGTGACACTATTATCACACAGA
+ATGTTATTACCAATTATGCTAAGATTACCCAATAAAATAGGTTGGGAAAAGTTAATGCTT
+TTGATTATCGATTTGTTTAAATTTTTGGACCAATACACAAGTAAACATGCAGTCTCTGAC
+GCTGTTTCGGTTGTTTATAAGGGAACACTGCGTGTTATTTTAGGCATTTCGAATGATATG
+CCATCCTTTTTGATTGAAAATCACTATGAATTAATGAACAATCTACCTCCAACATATTTC
+CAACTAAAGAATGTTATTTTATCTGCTATTCCTAAGAATATGACCGTTCCCAACCCATAT
+GACGTGGATCTTAATATGGAGGATATTCCAGCATGTAAAGAACTACCTGAAGTCTTCTTT
+GATCCTGTAATTGATTTACACTCATTGAAAAAGCCAGTTGACAACTACCTACGTATTCCC
+TCAAATTCATTATTAAGAACAATACTAAGCGCTATTTACAAGGATACCTATGACATAAAA
+AAGGGCGTAGGCTACGACTTTTTATCTGTTGATAGTAAATTAATTCGCGCTATTGTATTA
+CATGTGGGCATTGAAGCTGGAATAGAGTATAAGAGAACTTCTTCAAATGCGGTATTTAAT
+ACGAAGTCTTCTTATTATACTTTATTGTTCAATCTGATTCAAAATGGTAGCATCGAAATG
+AAATATCAAATTATTCTGTCTATTGTGGAACAATTGCGGTATCCAAACATCCACACCTAT
+TGGTTCAGCTTTGTGTTAATGAATATGTTCAAAAGTGACGAATGGAATGATCAAAAACTT
+GAAGTCCAAGAAATTATTTTAAGAAACTTTTTAAAAAGAATTATTGTTAACAAACCACAT
+ACCTGGGGTGTTTCAGTTTTCTTTACTCAGTTGATAAACAATAACGATATTAATCTTTTA
+GACCTGCCCTTTGTACAAAGTGTTCCCGAAATTAAACTAATTTTACAACAATTAGTAAAA
+TATTCCAAAAAATACACAACCAGTGAACAAGATGACCAATCCGCCACCATCAATAGAAGG
+CAAACCCCTCTACAATCCAACGCATAA
+>YCY4 1176 residues Pha 0 Code 0
+ATGGTTTCATTGTTCAAAAGAGGTAAGGCTCCACCGCTCACGAAAGAAGGCCCCACTTCT
+AAAAAGCCTCCTAACACAGCGTTTAGACAACAAAGGCTTAAGGCATGGCAACCAATACTG
+TCTCCTCAAAGTGTGCTTCCGTTGTTAATATTCGTTGCATGTATATTTACTCCTATTGGT
+ATTGGACTCATTGTAAGCGCTACTAAGGTACAAGATCTAACAATTGATTATAGTCATTGT
+GATACAAAAGCATCTACAACTGCTTTTGAAGATATACCAAAGAAGTACATTAAATATCAC
+TTTAAAAGTAAAGTTGAAAATAAACCACAATGGAGGCTAACCGAAAATGAAAATGGCGAA
+CAATCATGCGAACTGCAGTTCGAAATCCCAAACGATATCAAGAAATCCATTTTTATATAT
+TATAAAATAACCAATTTTTATCAAAATCATCGCAGATATGTCCAATCGTTTGACACAAAG
+CAAATATTAGGGGAGCCTATCAAAAAAGATGATCTGGATACAAGCTGTAGTCCAATAAGA
+AGTAGGGAAGACAAAATAATATATCCCTGTGGGTTGATCGCTAATTCCATGTTTAATGAT
+ACATTTTCTCAGGTGTTGAGTGGTATAGATGACACAGAAGACTATAATTTAACTAACAAG
+CATATATCATGGAGTATTGATCGTCACAGATTTAAAACCACCAAGTATAATGCTAGCGAT
+ATTGTTCCACCGCCAAACTGGATGAAGAAGTATCCCGATGGGTATACAGATGAAAATCTT
+CCTGATATCCATACTTGGGAAGAGTTCCAGGTATGGATGAGGACTGCAGCCTTTCCCAAG
+TTTTACAAGTTGACGTTGAAAAATGAATCTGCTTCTTTACCGAAGGGTAAATATCAAATG
+AACATTGAGTTGAATTATCCGATTTCACTCTTTGGTGGCACAAAATCATTTGTACTGACT
+ACAAATGGAGCTATTGGTGGTAGAAATATGTCACTAGGCGTACTGTACCTCATCGTTGCA
+GGGCTTTGCGCCTTATTTGGCATCATTTTTTTGGTTAAATTAATCTTCCAACCAAGAGCG
+ATGGGTGATCACACTTATTTGAATTTTGATGATGAAGAAAACGAGGATTATGAGGATGTA
+CACGCAGAGAATACAACATTGAGGGAAATTTTATAG
+>A2 360 residues Pha 0 Code 0
+ATGCGCAGCATAGAAAACGATAGAAGTAATTATCAACTTACACAGAAAAATAAATCGGCG
+GATGGGTTGGTATTTAATGTGGTAACTCAAGATATGATAAACAAAAGTACTAAACCTTAC
+AGAGGACACCGGTTTACAAAAGAAAATGTCCGAATACTAGAAAGTTGGTTTGCAAAGAAC
+ATCGAGAACCCATATCTAGATACCAAGGGCCTAGAGAATCTAATGAAGAATACCAGTTTA
+TCTCGCATTCAAATCAAAAACTGGGTTTCGAATAGAAGAAGAAAAGAAAAAACAATAACA
+ATCGCTCCAGAATTAGCGGACCTCTTGAGCGGTGAGCCTCTGGCAAAGAAGAAAGAATGA
+>GIT1 1557 residues Pha 0 Code 0
+ATGGAAGACAAAGATATCACATCGGTAAATGAGAAGGAAGTGAACGAGAACACTAATCCT
+AGAATAATAAAATATGATGCCGAGAGGCGTGCAACCCGTACTGAAACCTCAAAGAAAGAT
+AAATGGAAAAACATAGTTACAATCATTGCGTCCGGTTTTGCTCTGATAAGTGATGGTTAC
+GTAAATGGTTCAATGAGTATGCTAAACAAGGTTTTTGTTATGGAGTACGGTAAGAAAAAC
+TATAGCTCAAAAGTGTCGACTAGAGTTTCCAACGCAGCCCTAGTTGGTATTATTTTTGGC
+CAATTCTTTATGGGTATCGCTGCTGATTATTATAGTAGAAAATCTTGTATCCTTGTGGCC
+ACTGCTATCTTGGTTATTGGTAGTGCTCTGTGTGCTGCCTCTCACGGTACTACTGTACCT
+GGCATGTTTTGGATGTTAACAGTTATGAGAGGTTTGGTAGGTATTGGTGTTGGTGCAGAA
+TATCCTACCAGTACATTAAGTGCTAATGAGTCTGCTAATGAATATACCACTACCAAAAGA
+GGTGGTATCCTGGTTATGGTGACAAATTTGCCACTAGCCTTCGGTGGTCCATTTGCTACG
+ATCATCTTTTTAATCGTCTACAAAATCTGTTCAGGAACAAAACATTTAGAGGCGATCTGG
+AGGACTGTTTTTGCAATAGGGTGCTTCTGGCCATTGAGTGTGTTCTATTTTAGATGGAAG
+ACTGCTACTACAGAAGTCTATGAAAAAGGTAGAATCAAGAGAAATATACCATATTTCCTA
+GCATTGAAATTTTATTGGAAAAGGTTACTTGGTACATGTGGTACATGGTTTATGTATGAT
+TTTGTTACCTTCCCAAATGGTATTTTCAGTTCAACAATTATCAGTTCCGTTATCAAGGAC
+CAAAATGATTTAGTAAAAGTGGCAGAGTGGAACTTACTGTTGGGAGTTTTAGCTGTACTG
+GGTGTACCAATTGGTGCTTATCTGTCCGATCGTATTGGTCGTAAATATACGTTGATGTTT
+GGTTTCTCTGGGTACATCATCTTTGGTCTAATCATTGGATGTGCGTACGACCAATTGAAA
+AAAATCACCCCCTTGTTTATTATCTTCTACGCATTCATGAATATGTTAGGTAATGCTGGA
+CCAGGTGATATGCTTGGTGTTATTAGTAGTGAAGCGTCAGCAACCGCTGTTAGAGGTGTT
+TTCTATGGTTTATCTGCTGTGACTGGTAAAATCGGTTCTGTAGTAGGCGTCGAATGTTTC
+CAACCCATTAGGGATAATTTGGGTGCAAGATGGACTTTTATTATTGCTGCAATTTGTGGT
+CTTATTGGTATCATTATTACATATTTCTTTGTTCCACATTCTCTTGAAAGCGATTTAATG
+AAGCAAGACGTTGAATTTCACAACTATTTGGTATCCAATGGCTGGACTGGTAAGATGGGA
+TTTGATGAGACAGATGAAGAATCAATGGTTAGAACTATTGAAGTTGAAGAGAATGGTACT
+AATTGTAGTAAGAAAAACGCAGAAATAATTTCAGTCAGACAGGTCGATCAAAGTTGA
+>YCZ0 951 residues Pha 0 Code 0
+ATGTCATCTACGGACATCTGGATATCCAATGATGCATCTACTTTTCAAAAGGCACAGCTG
+CCTACTCAATTACGGCACGTCAAAGTGATTAAAATTCGTGAAGATTCTATCGGAAGGATC
+ATCCTTCTTATATCGACAGAAATCACAAATGAGGAAAATGCTGATCCAGATCTCTCAGAG
+ATTTTCATATCAGATTCGCAAGGGTTGAAATTCTCACCTGTTGAATGGACACCAAACCAT
+CAGTTTGGAAATTTTAGGCTCACTTTTCCTGATTTCTTGAAAGGGACAATATTTGGATCG
+TTTCATCCTTCCATTGACTATTCTAATCACCAAGTAAACTATACTGAAAATATAGCCGGA
+GGAGAAACCAAAATATCCGTTGATAACGGCCTCACATGGTCAAATTTGAAAGTTGTTGAT
+GAAGAAAATGCCGATTCGTTCGGCTGTGATATCACTAGGCCTGAGAGATGTTCACTTCAG
+GGTTATTTTTACAATCTAAAACTTTCAAATCCTTCTGCTGGGATCATATTAATGACAGGT
+TCTGTTGGCGATGACAATGAATTCGATCGGAAGGACCGAAAAACTTTCATTTCTAGAGAC
+GGTGGTCTAACATGGAGGGTGGCCCATAATTCTTCTGGATTATATGCTACTGGTGATCTG
+GGAAATATTATTGTATATATCCCGTCTCCTTCATATAAAGATGGTGATGTACAATCCAAA
+CTTTATTTTTCCTTGGACCAAGGTAGAACATGGAATCAATATGAGCTTGTTGACGCTTTA
+TTTTATATCCATCCATTAGAGTTGATTAATACAACGCCAGATGGATCAGGCTCAAAATTT
+ATTTTAAGCGGACATCTCATTACTACGGCTAGTCAAGAAGGAAACAACACCAACATCTCA
+TATATTGCAAGAAGTGTCCTGTATGCGATCGATTTTTCTGCTGCATTTTGA
+>YCZ1 549 residues Pha 0 Code 0
+ATGATATTACTTCATGCCATATATACTCTTTGGGTAATTATACTACTTCCGCTACTCAAT
+GCAGAGAAATTTGTCCCAAAAGTAACGGAGGCTCCTATAGAAACATCATTTAATCTAGTG
+AGTTTTGATGATTCCAACACTTCTATCAGATTAGATGGTTGGGGGGTTGTATGGATAAGT
+TTCGACGCTGGAGAAAATTGGGAAACGGTCAAAGAAATTGAAGAGCGCATTTTCAGATTT
+ACTGTTGATCCTTTCCATGGACAGGAAAGAGGTTTCGCTTTTATATGTGAATCACCCAAA
+TTCTACATTACCGACGACCGTGGGGAGTCATGGAGGGCTTTAACTATACCCTCATCAGAA
+GAATATTTAGATGGCGACTGTTTTATAACTACTCATCCTAGAAACAAAGAACTTCTTATT
+GCGAATTGCTATAGCTATATGATAGACGCAGACGTTTTATATGACCCAAGTGAAATTTAC
+TTGAGCAATGATGGGAATCCTTTTTTAAAATTAAACCTTCCTTGGAAAAGAAAAAAGACG
+ACGATATAA
+>YCZ2 1107 residues Pha 0 Code 0
+ATGAAGGCTGTCGTCATTGAAGACGGTAAAGCGGTTGTCAAAGAGGGCGTTCCCATTCCT
+GAATTGGAAGAAGGATTCGTATTGATTAAGACACTCGCTGTTGCTGGTAACCCGACTGAT
+TGGGCACACATTGACTACAAGGTCGGGCCTCAAGGATCTATTCTGGGATGTGACGCTGCC
+GGCCAAATTGTCAAATTGGGCCCAGCCGTCGATCCTAAAGACTTTTCTATTGGTGATTAT
+ATTTATGGGTTCATTCACGGATCTTCCGTAAGGTTTCCTTCCAATGGTGCTTTTGCTGAA
+TATTCTGCTATTTCAACTGTGGTTGCCTACAAATCACCCAATGAACTCAAATTTTTGGGT
+GAAGATGTTCTACCTGCCGGCCCTGTCAGGTCTTTGGAAGGGGCAGCCACTATCCCAGTG
+TCACTGACCACAGCTGGCTTGGTGTTGACCTATAACTTGGGCTTGAACCTGAAGTGGGAG
+CCATCAACCCCACAAAGAAACGGCCCCATCTTATTATGGGGCGGTGCAACTGCAGTAGGT
+CAGTCGCTCATCCAATTAGCCAATAAATTGAATGGCTTCACCAAGATCATTGTTGTGGCT
+TCTCGGAAACACGAAAAACTGTTGAAAGAATATGGTGCTGATCAACTATTTGATTACCAT
+GATATTGACGTGGTAGAACAAATTAAACACAAGTACAACAATATCTCGTATTTAGTCGAC
+TGTGTCGCGAATCAAAATACGCTTCAACAAGTGTACAAATGTGCGGCCGATAAACAGGAT
+GCTACCGTTGTCGAATTAACTAATTTGACAGAAGAAAACGTCAAAAAGGAGAATAGGAGG
+CAAAATGTCACTATTGACAGAACAAGACTGTATTCAATAGGCGGCCATGAAGTACCATTT
+GGTGGCATTACTTTCCCTGCTGACCCAGAAGCCAGGAGAGCTGCCACCGAATTCGTCAAG
+TTCATCAATCCAAAGATTAGTGATGGGCAAATTCACCATATTCCAGCAAGGGTCTATAAG
+AACGGGCTTTACGATGTTCCTCGTATCCTGGAAGACATTAAAATCGGTAAGAACTCTGGT
+GAAAAACTAGTTGCCGTATTAAACTAG
+>YCZ3 336 residues Pha 0 Code 0
+ATGGAGATGCTCTTGTTTCTGAACGAATCATACATCTTTCATAGGTTTCGTATGTGGAGT
+ATTGTTTTATGGCACTCATGTGTATTCGTATGCGCAGAATGTGGGAATGCCAATTATAGG
+GGTGCCGGGGTGCCTTGCAAAACCCTTTTACGCGCGCCTGTGAAGTTTCCGCTTTCGGTC
+AAAAAGAATATCCGAATTTTAGATTTGGACCCTCGTTCAGAAGCTTATTGTCTAAGCCTA
+AATTCAGTCTGCTTTAAACGGCTTCCGCGGAAGAAATATTTCCATCTCTTGAATTCGTAC
+AACATTAAACGTGTGTTGGGAGTCGTATACTGTTAG
+>PAU3 375 residues Pha 0 Code 0
+ATGGTCAAATTAACTTCAATCGCTGCTGGTGTTGCCGCCATCGCTGCCGGTATTGCCGCT
+GCCCCAGCCACTACCACTCTATCTCCATCTGACGAAAGGGTCAACTTGGTCGAATTGGGT
+GTTTACGTCTCCGATATCAGAGCTCATTTGGCTCAATACTACTTGTTTCAAGCAGCTCAT
+CCAACTGAGACCTACCCAGTTGAGATTGCTGAAGCTGTTTTCAACTATGGTGACTTCACC
+ACTATGTTGACTGGTATTCCAGCTGAACAAGTCACCAGAGTCATCACTGGTGTCCCATGG
+TACTCCACTAGATTGAGACCAGCCATCTCCAGTGCTCTATCTAAGGACGGTATCTACACT
+GCTATTCCAAAATAG
+>YCZ5 1086 residues Pha 0 Code 0
+ATGCTTTACCCAGAAAAATTTCAGGGCATCGGTATTTCCAACGCAAAGGATTGGAAGCAT
+CCTAAATTAGTGAGTTTTGACCCAAAACCCTTTGGCGATCATGACGTTGATGTTGAAATT
+GAAGCCTGTGGTATCTGCGGATCTGATTTTCATATAGCCGTTGGTAATTGGGGTCCAGTC
+CCAGAAAATCAAATCCTTGGACATGAAATAATTGGCCGCGTGGTGAAGGTTGGATCCAAG
+TGCCACACTGGGGTAAAAATCGGTGACCGTGTTGGTGTTGGTGCCCAAGCCTTGGCGTGT
+TTTGAGTGTGAACGTTGCAAAAGTGACAACGAGCAATACTGTACCAATGACCACGTTTTG
+ACTATGTGGACTCCTTACAAGGACGGCTACATTTCACAAGGAGGCTTTGCCTCCCACGTG
+AGGCTTCATGAACACTTTGCTATTCAAATACCAGAAAATATTCCAAGTCCGCTAGCCGCT
+CCATTATTGTGTGGTGGTATTACAGTTTTCTCTCCACTACTAAGAAATGGCTGTGGTCCA
+GGTAAGAGGGTAGGTATTGTTGGCATCGGTGGTATTGGGCATATGGGGATTCTGTTGGCT
+AAAGCTATGGGAGCCGAGGTTTATGCGTTTTCGCGAGGCCACTCCAAGCGGGAGGATTCT
+ATGAAACTCGGTGCTGATCACTATATTGCTATGTTGGAGGATAAAGGCTGGACAGAACAA
+TACTCTAACGCTTTGGACCTTCTTGTCGTTTGCTCATCATCTTTGTCGAAAGTTAATTTT
+GACAGTATCGTTAAGATTATGAAGATTGGAGGCTCCATCGTTTCAATTGCTGCTCCTGAA
+GTTAATGAAAAGCTTGTTTTAAAACCGTTGGGCCTAATGGGAGTATCAATCTCAAGCAGT
+GCTATCGGATCTAGGAAGGAAATCGAACAACTATTGAAATTAGTTTCCGAAAAGAATGTC
+AAAATATGGGTGGAAAAACTTCCGATCAGCGAAGAAGGCGTCAGCCATGCCTTTACAAGG
+ATGGAAAGCGGAGACGTCAAATACAGATTTACTTTGGTCGATTATGATAAGAAATTCCAT
+AAATAG
+>YCZ6 2499 residues Pha 0 Code 0
+ATGGATTCGATTACAGTAAAAAAACCTCGGTTAAGATTGGTTTGCCTGCAATGCAAAAAG
+ATCAAACGGAAATGTGATAAACTGCGGCCTGCTTGCTCGCGATGCCAACAAAATTCATTA
+CAGTGTGAATATGAAGAGAGAACAGATTTATCTGCCAATGTTGCAGCAAACGACTCTGAT
+GGATTCAATTCCTCTCATAAGCTCAATTTCGAACAGCAACCTGTACTTGAAAGGACTGGG
+CTTAGATATTCCTTACAAGTGCCTGAAGGTGTCGTTAATGCTACGCTGTCGATATGGAAC
+GCCGAAGATATGCTAGTTATAGTAGGATTAGTTACATTTCTGGATTATCCTTTTGCTGCG
+CATAGTCTGGCGCAACATGACCAGTATATCAGGGCACTTTGTGCTTCGTTGTACGGCATG
+GCGCTTGTTGACTTTAGCAATTATGCTAATGGTATTCCTTGTGAAGACACATCAAGAAGT
+ATACTAGGACCATTGTCATTCATAGAAAAGGCCATTTTTAGACGGATAGAACATAGTAAG
+CAATTTCGAGTTCAGTCTGCCGCCTTAGGGTTATTATACAATGCATTTTCAATGGAAGAA
+GAAAACTTCTCGACTCTTCTACCGTCACTCATCGCTGAAGTGGAAGACGTGTTGATGCAA
+AAAAAAGACTGTGAAATACTTTTGAGGTGTTTCTATCAAAATATTTATCCCTTCTATCCT
+TTTATGGACATTTCACTCTTTGAGAGCGATCTCACTAGTTTGCTTTTACAAGACGACAAT
+AATCGTTGGAAAATTAGTACTGAAGTTAAAAATGTGCGCAAAAAAATAGAAACTTTGTCA
+TTACTTACAATAGTAATGGCCATGGCCTTGATGCATTCAAAATTGGATGCAAATCTTCTT
+TCAATGGTAAAAGAAAATGCCTCCGAAAGTGCCAGGAAACTTTCTCTTTTATGTCATAAA
+CTATTATGCCTCCTGGATGTATTTCGCTATCCAAATGAGAACACTTTTACTTGCCTTTTA
+TATTTCTACGTTTCAGAGCATTTAGATCCCGAGAGTCCCGATTGTGTACTGAGCCCCACT
+AACTTGCTTACTCTGCACCATCTTTTAAATTTGTCCATGACCTTAGGTCTTCAATATGAG
+CCTTCGAAGTACAAACGTTTCAAAGATCCAGAAGTGATAAGGCAGAGACGGATATTATGG
+TTAGGAGTTCAGTCATTACTTTTTCAAATTTCTCTTGCTGAAGGTGATGCTGGTAAATCA
+AATAGTGAATATATGGAGGCATATTTAACAGACTTCGAAGAATATATTGAAGCTTCCTCA
+GAGTATGAAAAAAGTTCTGCGAGTGAATCGAACGTGCAAATGAATGATATTGTTTGGAAT
+AAGTACAAATTTCACGTCATTTTGAGTAAACTAATGTCTGATTGCACTTCAGTTATACAA
+CATCCGCAGCTTTTCCACATTTTAGGAAATATTAAAAGATCTGAAGATTTTATGGCTGAG
+AACTTTCCTACAAGTTCGATTTACCAACCCCTTCATGAAAAGGAACCAAATGCGATCAAA
+GTTGGCAAAAGTACGGTTCTCGATGTCATGGATATTCAAAAAACTGAAATATTTCTTACA
+AATATTGTGGGAAGTATGTGTTTTTTAAACATTTTTGATGTCCTATCGTTACATTTTGAA
+AAAAAATGTGTTATGCACTGGGAAGAATATGAAAAGAACTATCATTTCCTTACTTTGAAA
+AGTTTCAATGCATACTTAAAGCTAGCAGGGTTGATATCTGATTATCTCGAGAATAAGTTT
+CAAGGGAACATTTTAGAGAGTCGCGGTTATATCATAGATAAACAAATATGTTTTATGCTT
+GTAAGGATCTGGATGTTCCAATGTCGTATTTTGTTAAGGTTTTCATACAAGCAAGAAAGT
+CAGAAAAAATTGGCCTCTTCCAGTATATCCACTAACGATAATGAAAAAGAAGATGAAATG
+ATTGTCATTTTAGAAAGACTTATTAAACACATTCGTAACCAAATGGCACATTTAGTGGAT
+CTAGCAAAGGGAAAACTTCAAGATAGTTACTTTGGTGCTTACCAAACTGTTCCCATGTTT
+AGATACGTTGTGTATTTGATCGATGTTGGCGGCTTAGTATCTGTGACAAATGGGTTTTGG
+GATAAGATTTCCAGTGATGGTGAAATACCGCCAAAAGTACAACAAGCCGTGAGATTGAAA
+TGGGGATTGGACTGCAATAATTCGAGAAGAATCAAACAAAAGTTAATAAGCAGCCAGAGT
+TTGCAGAGTTTCAATCAAGTTCTGTTGTGCCAGATGGAGGATGCAGTTCTCTCCAGTTCC
+TTCGCAATAAAAGCCAATACCGCTATGTCCCAAAACACGGCTGAAGAATTTTTCAATATC
+AGCGAAGAAGAGGCTTTAAATCAACTATTGGAAAACAACAATTTTGATGCCTTCTGGGAT
+TTATTAGGTGAAAATCTGAGCGATATGCCTTCTTTGTGA
+>YCZ7 1092 residues Pha 0 Code 0
+ATGATTGGGTCCGCGTCCGACTCATCTAGCAAGTTAGGACGCCTCCGATTTCTTTCTGAA
+ACTGCCGCTATTAAAGTATCCCCGTTAATCCTAGGAGAAGTCTCATACGATGGAGCTCGT
+TCGGATTTTCTCAAATCAATGAACAAGAATCGAGCTTTTGAATTGCTTGATACTTTTTAC
+GAGGCAGGTGGAAATTTCATTGATGCCGCAAACAACTGCCAAAACGAGCAATCAGAAGAA
+TGGATTGGTGAATGGATACAGTCCAGAAGGTTACGTGATCAAATTGTCATTGCAACCAAG
+TTTATAAAAAGCGATAAAAAGTATAAAGCAGGTGAAAGTAACACTGCCAACTACTGTGGT
+AATCACAAGCGTAGTTTACATGTGAGTGTGAGGGATTCTCTCCGCAAATTGCAAACTGAT
+TGGATTGATATACTTTACGTTCACTGGTGGGATTATATGAGTTCAATCGAAGAATTTATG
+GATAGTTTGCATATTCTGGTCCAGCAGGGCAAGGTCCTCTATTTGGGTGTATCTGATACA
+CCTGCTTGGGTTGTTTCTGCGGCAAACTACTACGCTACATCTTATGGTAAAACTCCCTTT
+AGTATCTACCAAGGTAAATGGAACGTGTTGAACAGAGATTTTGAGCGTGATATTATTCCA
+ATGGCTAGGCATTTCGGTATGGCCCTCGCCCCATGGGATGTCATGGGAGGTGGAAGATTT
+CAGAGTAAAAAAGCAATGGAGGAACGGAGGAAGAATGGAGAGGGTATTCGTTCTTTCGTT
+GGCGCCTCCGAACAAACAGATGCAGAAATCAAGATTAGTGAAGCATTGGCCAAGATTGCT
+GAGGAACATGGCACTGAGTCTGTTACTGCTATTGCTATTGCCTATGTTCGCTCTAAGGCG
+AAAAATTTTTTTCCGTCGGTTGAAGGAGGAAAAATTGAGGATCTCAAAGAGAACATTAAG
+GCTCTCAGTATCGATCTAACGCCAGACAATATAAAATACTTAGAAAGTATAGTTCCTTTT
+GACATCGGATTTCCTAATAATTTTATCGTGTTAAATTCCTTGACTCAAAAATATGGTACG
+AATAATGTTTAG
diff --git a/menu.c b/menu.c
new file mode 100755
index 0000000..b96de7b
--- /dev/null
+++ b/menu.c
@@ -0,0 +1,1302 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package */
+/* Copyright (C) 2005 John F. Peden */
+/* This program is free software; you can redistribute */
+/* it and/or modify it under the terms of the GNU General Public License */
+/* as published by the Free Software Foundation; version 2 of the */
+/* License, */
+/* */
+/* This program is distributed in the hope that it will be useful, but */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA. */
+/* */
+/* */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _) */
+/* */
+/* For the latest version and information see */
+/* http://codonw.sourceforge.net */
+/**************************************************************************/
+#include <stdlib.h>
+#ifdef _WINDOWS
+#include <process.h>
+#endif
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include "codonW.h"
+
+/************** Main menu **********************************************/
+/* Drives the menu system */
+/*************************************************************************/
+void main_menu ( int menu )
+{
+ switch ( menu ) { /* go to menu X */
+ case 0:
+ menu_initial();
+ break;
+ case 1:
+ menu_1();
+ break;
+ case 2:
+ menu_2();
+ break;
+ case 3:
+ menu_3();
+ break;
+ case 4:
+ menu_4();
+ break;
+ case 5:
+ menu_5();
+ break;
+ case 6:
+ menu_6();
+ break;
+ case 7:
+ menu_7();
+ break;
+ case 8:
+ menu_8();
+ break;
+ case 9:
+ printinfo();
+ welcome();
+ pause;
+ clearscr(pm->term_length);
+ break;
+ default:
+ fprintf ( stderr,"ERROR: Unrecognised menu in main_menu\n");
+ break;
+ }
+}
+
+
+/* This is the first menu presented when running CodonW */
+
+void menu_initial (void)
+{
+ int loop = TRUE;
+ int c;
+
+ while (loop) { /* loop */
+ printf (" Initial Menu \n");
+ printf (" Option\n\t (1) Load sequence file\n");
+
+/* printf ("\t (2) Check sequence file for redundancy\n"); */
+ printf ("\t ( )\n");
+ printf ("\t (3) Change defaults\n");
+ printf ("\t (4) Codon usage indices\n");
+ printf ("\t (5) Correspondence analysis\n");
+
+/* printf ("\t (6) Basic statistics\n"); */
+ printf ("\t ( ) \n");
+
+ printf ("\t (7) Teach yourself codon usage\n");
+ printf ("\t (8) Change the output written to file\n");
+ printf ("\t (9) About C-codons\n");
+ printf ("\t (R) Run C-codons \n");
+ printf ("\t (Q) Quit \n");
+ printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+
+ gets(pm->junk);
+
+ if (isalpha((int)pm->junk[0])) {
+ c = toupper( (int) pm->junk[0]);
+
+ switch (c) {
+ case 'Q':
+ my_exit(2,"main menu");
+ break;
+ case 'R':
+ /* test that all the required files are opened */
+ if ( pm->inputfile && pm->outputfile && pm->tidyoutfile)
+ loop = FALSE;
+ else {
+ printf("Not all required files are open\n");
+ printf("About to open input and output files\n");
+ pause;
+ main_menu(1);
+ loop = FALSE;
+ }
+ break;
+ case 'H': /* help */
+ chelp ( "main_menu" );
+ break;
+ default:
+ fprintf( stderr, "The answer %s is not valid\n", pm->junk);
+ pause;
+ break;
+ } /* end of switch c */
+ } else if (isdigit((int) pm->junk[0])) {
+ c = atoi( pm->junk);
+ if (c > 0 && c <= 9 )
+ main_menu( (int) c );
+ else
+ fprintf( stderr, "The answer %s is not valid\n", pm->junk);
+ }
+ clearscr(pm->term_length);
+ }
+ return;
+}
+
+/************************* menu_1 ******************************************/
+/* Opens input and output files */
+/* It tests if a sequence file is already in memory */
+/* if so you have the option to reopen the same file when loaded the */
+/* pm->file_loaded is set to true and the 20 characters of the new filename*/
+/* are stored */
+/***************************************************************************/
+void menu_1 (void)
+{
+ char root[MAX_FILENAME_LEN];
+ int n;
+
+ clearscr(pm->term_length);
+ printf (" Loading sequence menu (type h for help)\n");
+
+ if ( strlen(pm->curr_infilename) ) {
+ printf ( "The current active file is \"%s\"\n",pm->curr_infilename);
+ fileclose(&pm->inputfile);
+ if (!(pm->inputfile = open_file("input sequence file",
+ pm->curr_infilename, "r", FALSE)))
+ my_exit(1,"menu 1");
+ } else {
+ printf( " No sequence file is currently loaded\n");
+ if (!(pm->inputfile = open_file("input sequence file\t",
+ "input.dat", "r", FALSE)))
+ my_exit(1,"menu 1");
+ }
+ /* copies the filename into pm->curr_infilename */
+ /* next finds the root of this filename */
+ /* which is used to construct other filenames */
+
+
+ strncpy(pm->curr_infilename, pm->junk, MAX_FILENAME_LEN - 1);
+ strncpy(root, pm->curr_infilename , MAX_FILENAME_LEN - 5);
+
+ /* open the .out filename */
+ for (n = (int) strlen(root); n && root[n] != '.' ; --n);
+ if (n) root[n] = '\0'; /* define root of the filename */
+
+ if ( strlen(pm->curr_outfilename)) {
+ printf( "\nThe previous output file was \"%s\"\n",
+ pm->curr_outfilename );
+ fclose( pm->outputfile);
+ }
+ if (!(pm->outputfile = open_file("output sequence file\t",
+ strcat(root, ".out"), "w", (int)pm->verbose)))
+ my_exit(1,"output menu1");
+
+ /* open the .blk filename */
+
+ strncpy(pm->curr_outfilename, pm->junk, MAX_FILENAME_LEN - 1);
+ strncpy(root, pm->curr_infilename , MAX_FILENAME_LEN - 5);
+
+ for (n = (int) strlen(root); n && root[n]!='.' ; --n);
+ if ( n ) root[n] = '\0'; /* find root of filename */
+
+ if ( strlen(pm->curr_tidyoutname)) {
+ printf( "\nThe previous bulk output file was \"%s\"\n",
+ pm->curr_tidyoutname );
+ fclose( pm->tidyoutfile);
+ }
+ if (!(pm->tidyoutfile = open_file("bulk output file\t",
+ strcat(root, ".blk"), "w", (int) pm->verbose)))
+ my_exit(1,"tidyout menu1");
+
+ strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1);
+
+ clearscr(pm->term_length);
+ return;
+}
+
+/************************* menu_2 ******************************************/
+/* Not currently implemented */
+/***************************************************************************/
+void menu_2 (void)
+{
+ int loop = TRUE;
+ int c;
+
+ clearscr(pm->term_length);
+ while ( loop ) {
+ printf (" Menu 2 \n");
+ printf (" Purifying sequences menu\n");
+ printf ("\t ( ) Sorry currently unimplemented \n");
+ printf ("\t (X) Exit this menu\n");
+ printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+ gets(pm->junk);
+ clearscr(pm->term_length);
+
+ if (isalpha((int)pm->junk[0]) || pm->junk[0]=='\0' ) {
+ c = toupper( (int) pm->junk[0]);
+ switch ( c ) {
+ case 'Q':
+ my_exit(2,"menu 2");
+ break;
+ case 'X':
+ case '\0':
+ return;
+ case 'H':
+ chelp("menu_2");
+ break;
+ default:
+ fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+ pause;
+ break;
+ }
+ }
+ }
+ return;
+}
+
+/************************* menu_3 ******************************************/
+/* To improve flexibility, many of the default values used internally by */
+/* CodonW (defined in the header file codonW.h) can be altered at runtime */
+/* using this menu. Ten default values can be customised. */
+/***************************************************************************/
+void menu_3 (void)
+{
+ int loop = TRUE;
+ int i;
+ int c;
+
+ clearscr(pm->term_length);
+ while (loop) {
+ printf (" Changing defaults\n");
+ printf (" Options\n");
+ printf (" %-40.40s", "(1) Change the ASCII delimiter in output");
+ printf ("{%s}\n",
+ (pm->seperator == ' ' ) ? "space" :
+ (pm->seperator == '\t') ? "tab" :
+ (pm->seperator == ',' ) ? "," :
+ "ERROR" );
+
+ printf (" %-40.40s", "(2) Run silently, No Warnings");
+ printf ("{%s}\n", (pm->verbose) ? "FALSE" : "TRUE");
+ printf (" %-40.40s", "(3) Log warnings/information to a file");
+ printf ("{%s}\n", (strlen(pm->curr_logfilename) > 1) ? "TRUE" :
+ "FALSE");
+ printf (" %-40.40s", "(4) Number of lines on screen");
+ printf ("{%d}\n", pm->term_length);
+ printf (" %-40.40s", "(5) Change the genetic code");
+ printf ("{%s}\n", cu[pm->code].des);
+ printf (" %-40.40s", "(6) Change the Fop/CBI values");
+ printf ("{%s}\n", fop[pm->f_type].des);
+ printf (" %-40.40s", "(7) Change the CAI values");
+ printf ("{%s}\n", cai[pm->c_type].des);
+ printf (" %-40.40s", "(8) Output Human or Computer readable");
+ printf ("{%s readable}\n", (pm->seq_format == 'M') ? "Computer" :
+ "Human");
+ printf (" %-40.40s", "(9) Concatenate or individual genes");
+ printf ("{%s genes}\n", (pm->totals == TRUE ? "concatenate":
+ "individual"));
+ printf (" %s", "(10) Correspondence analysis defaults\n");
+
+ printf (" (X) Return to previous menu\n");
+ printf ("Choices enclosed with curly brackets are the current "
+ "defaults\n");
+ printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+ gets(pm->junk);
+ clearscr(pm->term_length);
+
+ if (isalpha((int) pm->junk[0])|| pm->junk[0]=='\0') {
+ switch (c = toupper((int) pm->junk[0])){
+ case 'Q':
+ my_exit(2,"menu 3"); /* decided to quit program */
+ break;
+ case 'H':
+ chelp("menu_3");
+ break;
+ case 'X':
+ case '\0':
+ return; /* way out of loop is X or blank line */
+ break;
+ default:
+ fprintf(stderr,"The answer %s is not a valid\n", pm->junk);
+ pause;
+ continue;
+ break;
+ }
+ }
+
+ c=0;
+ if (isdigit((int)pm->junk[0]))
+ c = atoi(pm->junk);
+ if ( c <= 0 && c > 10 ) {
+ fprintf( stderr, "The answer %s is not valid\n", pm->junk);
+ continue;
+ }
+
+ switch ((int) c) {
+ case 1:
+ clearscr(pm->term_length);
+ printf (" The current separator is \"%s\"\n",
+ (pm->seperator == ' ' ) ? "space" :
+ (pm->seperator == '\t') ? "tab" :
+ (pm->seperator == ',' ) ? "," :
+ "ERROR" );
+ printf (" Please select a new separator \t:");
+ gets(pm->junk);
+ c = pm->junk[0]; /* take first character of string */
+
+ if ( strchr ("\t, ", (int)c) == NULL || c == '\0' ) {
+ /* remember the \0 is in every string */
+ printf( "WARNING: The chosen separator %s is unsuitable\n",
+ pm->junk);
+ printf( "\tSeparator is unchanged try comma,tab "
+ "or space\n\n");
+ } else
+ pm->seperator = (char) c; /* specify the column separator */
+
+ break;
+ case 2: /* warn about overwriting files?*/
+ clearscr(pm->term_length);
+ pm->verbose = (char) ((pm->verbose) ? FALSE : TRUE);
+ pm->warn = (char) ((pm->warn ) ? FALSE : TRUE);
+ break;
+ case 3: /* redirect errors to a file */
+ if ( strlen(pm->curr_logfilename) > 1 ) {
+ strcpy(pm->curr_logfilename , "" ); /* blank logfilename */
+ pm->my_err = stderr; /* redirects errors */
+ /* to stderr */
+ fclose(pm->logfile); /* close logfile */
+ } else {
+ /* open logfile and redirect stderr */
+ if (!(pm->logfile = open_file("log filename \t",
+ "warning.log", "w", (int) pm->verbose)))
+ my_exit(1," open log file menu 3");
+ pm->my_err = pm->logfile;
+ strncpy(pm->curr_logfilename, pm->junk, MAX_FILENAME_LEN-1);
+ } /* end of if */
+ break;
+
+ case 4: /* No of line on term*/
+ printf("Please give the new height of the screen [%i] ",
+ pm->term_length);
+ gets(pm->junk);
+ if ( isdigit( (int) pm->junk[0]))
+ pm->term_length = atoi(pm->junk) ;
+ break;
+
+ case 5: /*Change genetic code */
+ clearscr(pm->term_length);
+ printf(" Genetic codes currently supported are\n");
+ /* NumGeneticCodes is given in codonW.h */
+ for ( i = 0 ; i < NumGeneticCodes ; i++) {
+ (pm->code == i) ? printf ( " (%i) {%-45.45s %-17.17s}", i,
+ cu[i].des, cu[i].typ) :
+ printf ( " (%i) %-45.45s %-17.17s ", i, cu[i].des,
+ cu[i].typ) ;
+ printf("\n");
+ }
+ printf("Choice enclosed with curly brackets is "
+ "the current code\n");
+ printf("Please select a new code [no change]\n");
+ gets(pm->junk);
+ if ( isdigit( (int) pm->junk[0]) ) {
+ c = (char)atoi(pm->junk);
+ if ( c > 0 && c < NumGeneticCodes && pm->code!= (char) c ){
+ pm->code = (char) c;
+ initilize_point(pm->code,pm->f_type, pm->c_type);
+ }
+ }
+ break;
+
+ case 6: /*Change optimal codons*/
+ clearscr(pm->term_length);
+ printf(" Fop values pre-loaded are\n");
+ /* NumFopSpecies defined with the Fop_struct in codonW.h */
+ for ( i = 0 ; i < NumFopSpecies ; i++) {
+ (pm->f_type == i) ? printf (" (%i) {%-25.25s %-40.40s}",
+ i, fop[i].des, fop[i].ref) :
+ printf (" (%i) %-25.25s %-40.40s ", i, fop[i].des,
+ fop[i].ref) ;
+ printf("\n");
+ }
+ printf ("Choice enclosed with curly brackets is the current "
+ "selection\n");
+ printf ("Please select a type [no change]\n");
+ gets(pm->junk);
+ if ( isdigit( (int) pm->junk[0]) ) {
+ c = (char)atoi(pm->junk);
+ if ( c > 0 && c < NumFopSpecies && pm->f_type!=(char) c) {
+ pm->f_type = (char) c;
+ initilize_point(pm->code,pm->f_type, pm->c_type);
+ }
+ }
+ break;
+
+ case 7: /*Change CAI w values */
+ clearscr(pm->term_length);
+ printf(" CAI types currently supported are\n");
+
+ /* NumCaiSpecies currently defined in codonW.h */
+ for ( i = 0 ; i < NumCaiSpecies ; i++) {
+ (pm->c_type == i) ? printf (" (%i) {%-25.25s %-40.40s}",
+ i, cai[i].des, cai[i].ref) :
+ printf (" (%i) %-25.25s %-40.40s ", i, cai[i].des,
+ cai[i].ref) ;
+ printf("\n");
+ }
+ printf ("Choice enclosed with curly brackets is the current "
+ "selection\n");
+ printf ("Please chose a new CAI [no change]\n");
+ gets(pm->junk);
+ if ( isdigit( (int) pm->junk[0]) ) {
+ c = (char)atoi( pm->junk);
+
+ /* if valid value and different from the current choice */
+ if ( c > 0 && c < NumCaiSpecies && pm->c_type!=(char) c){
+ pm->c_type = (char) c;
+ initilize_point(pm->code,pm->f_type, pm->c_type);
+ }
+ }
+ break;
+ case 8: /* machine or human readable format */
+ clearscr(pm->term_length);
+ pm->seq_format =
+ (char) ( pm->seq_format == 'M' ? 'H' : 'M'); /*toggle */
+ break;
+ case 9: /* concatenate genes? */
+ clearscr(pm->term_length);
+ pm->totals = (char) (pm->totals == TRUE ? FALSE : TRUE);
+ break;
+ case 10: /* change COA default then go to menu5*/
+ clearscr(pm->term_length);
+ if( !pm->coa )
+ menu_5();
+ else
+ menu_coa();
+ break;
+ default:
+ fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+ break;
+ }
+ }
+ return;
+}
+
+/************************* menu_4 ******************************************/
+/* Select which indices to calculate */
+/***************************************************************************/
+void menu_4 (void)
+{
+ char loop = TRUE;
+ char *choices[] = {
+ " ",
+ "Codon Adaptation Index (CAI)",
+ "Frequency of OPtimal codons (Fop)",
+ "Codon bias index (CBI)",
+ "Effective Number of Codons (ENc)",
+ "GC content of gene (G+C)",
+ "GC of silent 3rd codon posit.(GC3s)",
+ "Silent base composition",
+ "Number of synonymous codons (L_sym)",
+ "Total number of amino acids (L_aa )",
+ "Hydrophobicity of protein (Hydro)",
+ "Aromaticity of protein (Aromo)",
+ "Select all"
+ };
+ int i,NumChoices;
+ int c;
+
+
+ NumChoices = (char) 12; /* size of choices array */
+
+ clearscr(pm->term_length);
+ while (loop) {
+ printf (" Codon usage indices\n");
+ printf (" Options\n");
+
+ for (i = 1; i <= NumChoices; i++) {
+ printf(" (%2i) ", i);
+ switch ((int) i) {
+ case 1:
+ (pm->cai) ? printf ("{%-45.45s}", choices[i]) :
+ printf (" %s ", choices[i]);
+ break;
+ case 2:
+ (pm->fop) ? printf ("{%-45.45s}", choices[i]) :
+ printf (" %s ", choices[i]);
+ break;
+ case 3:
+ (pm->cbi) ? printf ("{%-45.45s}", choices[i]) :
+ printf (" %s ", choices[i]);
+ break;
+ case 4:
+ (pm->enc) ? printf ("{%-45.45s}", choices[i]) :
+ printf (" %s ", choices[i]);
+ break;
+ case 5:
+ (pm->gc) ? printf ("{%-45.45s}", choices[i]) :
+ printf (" %s ", choices[i]);
+ break;
+ case 6:
+ (pm->gc3s)? printf ("{%-45.45s}", choices[i]) :
+ printf (" %s ", choices[i]);
+ break;
+ case 7:
+ (pm->sil_base) ? printf ("{%-45.45s}", choices[i]) :
+ printf (" %s ", choices[i]);
+ break;
+ case 8:
+ (pm->L_sym) ? printf ("{%-45.45s}", choices[i]) :
+ printf (" %s ", choices[i]);
+ break;
+ case 9:
+ (pm->L_aa)? printf ("{%-45.45s}", choices[i]) :
+ printf (" %s ", choices[i]);
+ break;
+ case 10:
+ (pm->hyd ) ? printf ("{%-45.45s}", choices[i]) :
+ printf (" %s ", choices[i]);
+ break;
+ case 11:
+ (pm->aro ) ? printf ("{%-45.45s}", choices[i]):
+ printf (" %s ", choices[i]);
+ break;
+ case 12:
+ printf (" %s ", choices[i]);
+ break;
+ default:
+ fprintf(stderr, "programming error \n");
+ my_exit(99, "menu 4");
+ break;
+ }
+ printf("\n");
+ }
+ printf (" (X) Return to previous menu\n");
+ printf ("Choices enclosed with curly brackets are the current"
+ " selections\n");
+ printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+
+
+ gets(pm->junk);
+
+ if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0') {
+ switch (c = toupper( (int) pm->junk[0])){
+ case 'Q':
+ my_exit(2,"menu 4"); /* User decides to quit programme*/
+ break;
+ case 'X':
+ case '\0':
+ return; /* <-back to previous menu-> */
+ break;
+ case 'H':
+ chelp("menu_4");
+ continue;
+ break;
+ default:
+ fprintf( stderr, "The answer %s is not a valid choice\n",
+ pm->junk);
+ continue;
+ break;
+ }
+ } else if (isdigit ( (int) pm->junk[0] ) ) {
+ c = atoi(pm->junk);
+ switch ((int) c) {
+ /* User wants to calculate CAI then we explain that it is */
+ /* dependent on the choice of CAI adaptiveness values */
+ case 1:
+ pm->cai = (char) ((pm->cai) ? FALSE : TRUE);
+ if( pm->cai){
+ clearscr(pm->term_length);
+ printf("\nTo calculate CAI a reference set of highly ");
+ printf("expressed genes \nmust be selected\n\n");
+ printf("The reference set currently selected is that of "
+ "%s\n\n",cai[pm->c_type].des);
+ printf("See the menu 'Change defaults' to change this "
+ "selection\n\n");
+ printf("If you wish to use a personal choice of CAI "
+ "vaules\n");
+ printf("\tplease continue and you will be prompted for"
+ " input\n\n");
+ pause;
+ }
+ break ;
+ case 2:
+ /* User wants to calculate Fop then we explain that it is */
+ /* dependent on the choice of optimal codons */
+ pm->fop = (char) ((pm->fop) ? FALSE : TRUE);
+ if(pm->fop){
+ clearscr(pm->term_length);
+ printf("\n\nYou have chosen to calculate Fop\n\n");
+ printf("To calculate Fop a set of optimal "
+ "codons must be selected\n");
+ printf("The optimal codons of %s are the current selection"
+ "\n\n",fop[pm->f_type].des);
+ printf("See the menu 'Change defaults' to change Fop "
+ "selection\n\n");
+ printf("If you wish to use a personal choice of Fop "
+ "vaules\n");
+ printf("\tplease continue and you will be prompted for "
+ "input\n\n");
+ pause;
+ }
+ break ;
+ case 3:
+ /* User wants to calculate CBI then we remind then that it is */
+ /* dependent on the choice of optimal codons */
+ pm->cbi = (char) ((pm->cbi) ? FALSE : TRUE);
+ if(pm->cbi){
+ clearscr(pm->term_length);
+ printf("\n\nYou have chosen to calculate CBI\n\n");
+ printf("To calculate CBI a set of optimal "
+ "codons must be selected\n");
+ printf("The optimal codons of %s are the current selection"
+ "\n\n",fop[pm->f_type].des);
+ printf("See the menu 'Change defaults' to change CBI "
+ "selection\n\n");
+ printf("If you wish to use a personal choice of CBI "
+ "vaules\n");
+ printf("\tplease continue and you will be prompted for "
+ "input\n\n");
+ pause;
+ }
+ break ;
+ case 4: /* calc Nc */
+ pm->enc = (char) ( (pm->enc) ? FALSE : TRUE);
+ break ;
+ case 5: /* calc GC */
+ pm->gc = (char) ((pm->gc ) ? FALSE : TRUE);
+ break ;
+ case 6: /* calc GC3s */
+ pm->gc3s =(char) ( (pm->gc3s) ? FALSE : TRUE);
+ break ;
+ case 7: /* calc sil base */
+ pm->sil_base = (char) ((pm->sil_base) ? FALSE : TRUE);
+ break ;
+ case 8: /* No. synonyms */
+ pm->L_sym = (char) ((pm->L_sym) ? FALSE : TRUE);
+ break ;
+ case 9: /* No. AminoAcids*/
+ pm->L_aa = (char) ((pm->L_aa) ? FALSE : TRUE);
+ break ;
+ case 10: /* hydropathicity*/
+ pm->hyd =(char) ( (pm->hyd ) ? FALSE : TRUE);
+ break;
+ case 11: /* aromatic */
+ pm->aro = (char) ((pm->aro ) ? FALSE : TRUE);
+ break;
+ case 12: /* all the above */
+ pm->cai = (char) TRUE;
+ pm->fop = (char) TRUE;
+ pm->cbi = (char) TRUE;
+ pm->enc = (char) TRUE;
+ pm->gc = (char) TRUE;
+ pm->gc3s = (char) TRUE;
+ pm->sil_base
+ = (char) TRUE;
+ pm->L_sym = (char) TRUE;
+ pm->L_aa = (char) TRUE;
+ pm->hyd = (char) TRUE;
+ pm->aro = (char) TRUE;
+ break ;
+ default:
+ fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+ break;
+ }
+ } else
+ fprintf( stderr, "The answer %s is not a valid choice\n",
+ pm->junk);
+ }
+ return;
+}
+
+/************************* menu_5 ******************************************/
+/* Select what type of COA */
+/***************************************************************************/
+void menu_5 (void)
+{
+ char *choices[] = {
+ "",
+ "COA on codon usage",
+ "COA on RSCU",
+ "COA on Amino Acid usage",
+ "Do not perform a COA"
+ };
+ int loop = TRUE;
+ int i,c,NumChoices;
+
+ NumChoices = 4;
+
+ clearscr(pm->term_length);
+
+ while ( loop ) {
+ printf (" Menu 5 Correspondence analysis\n");
+ printf (" Correspondence analysis (COA) \n");
+
+ for (i = 1; i <= NumChoices; i++) {
+ printf(" (%i) ", i);
+ switch ((int) i) {
+ case 1:
+ (pm->coa=='c') ? printf ("{%-45.45s}", choices[1]):
+ printf (" %s ", choices[1]);
+ break;
+ case 2:
+ (pm->coa=='r') ? printf ("{%-45.45s}", choices[2]):
+ printf (" %s ", choices[2]);
+ break;
+ case 3:
+ (pm->coa=='a') ? printf ("{%-45.45s}", choices[3]):
+
+ printf (" %s ", choices[3]);
+ break;
+ case 4:
+ (pm->coa== 0 ) ? printf ("{%-45.45s}", choices[4]):
+ printf (" %s ", choices[4]);
+ break;
+ default:
+ fprintf(stderr, "programming error \n");
+ my_exit(99,"menu 5");
+ break;
+ }
+ printf("\n");
+ }
+ printf (" (X) Exit this menu\n");
+ printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+ gets(pm->junk);
+ clearscr(pm->term_length);
+
+ if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0') {
+ c = toupper( (int) pm->junk[0]);
+ switch ( c ) {
+ case 'Q':
+ my_exit(2,"menu 5");
+ break;
+ case 'X':
+ case '\0':
+ return;
+ break;
+ case 'H':
+ chelp("menu_5_coa");
+ continue;
+ break;
+ default:
+ fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+ break;
+ }
+ } else {
+ c = atoi(pm->junk);
+ if ( c > 0 && c <= 4 ) {
+ switch ((int) c){
+ case 1:
+ pm->coa = 'c'; /* COA of CU */
+ break ;
+ case 2:
+ pm->coa = 'r'; /* COA of RSCU*/
+ break ;
+ case 3:
+ pm->coa = 'a'; /* COA of AA */
+ break ;
+ case 4:
+ pm->coa = FALSE;
+ break;
+#ifdef DEBUG
+ default:
+ fprintf(pm->my_err,"Error in switch in coa_raw_out\n");
+#endif
+ }
+ } else {
+ fprintf(stderr,"The answer %s is not a valid\n", pm->junk);
+ break;
+ }
+ }
+
+ if ( pm->coa ) {
+ printf( " Do you wish to see the advanced COA menu (Y/N) [N] ");
+ gets( pm->junk );
+
+ /* Select the default codon/AAs to analyse, based on genetic code */
+ initilize_coa (pm->code);
+
+ if ( (char) toupper( (int) pm->junk[0]) == 'Y' ) menu_coa();
+ }
+
+ } /* while loop */
+ return;
+}
+
+/************************* menu_6 ******************************************/
+/* Originally designed for the calculation of correlations and */
+/* other simple stats. This code is currently implemented as a perl module */
+/* and is waiting to be ported to C hence the menu is unimplemented */
+/***************************************************************************/
+
+void menu_6 (void)
+{
+ int loop = TRUE;
+ int c;
+
+ clearscr(pm->term_length);
+ while ( loop ) {
+ printf (" Menu 6-Basic Stats\n");
+ printf ("\n");
+ printf ("\t ( ) Sorry currently unimplemented \n");
+ printf ("\t (X) Exit this menu\n");
+ printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+ gets(pm->junk);
+ clearscr(pm->term_length);
+
+ if (isalpha( (int) pm->junk[0])|| pm->junk[0] == '\0') {
+ c = toupper( (int) pm->junk[0]);
+ switch ( c ) {
+ case 'Q':
+ my_exit(2,"menu 6");
+ break;
+ case 'X':
+ case '\0':
+ return;
+ case 'H':
+ chelp("menu_6");
+ break;
+ default:
+ fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+ pause;
+ break;
+ }
+ } else {
+ c = atoi(pm->junk);
+ if ( c > 0 && c <= 9 )
+ main_menu((int) c);
+ else {
+ fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+ continue;
+ }
+ }
+ }
+ return;
+}
+
+/************************* menu_7 ******************************************/
+/* This selection generates random questions about the genetic code that */
+/* has been selected. For more information see tester.c */
+/***************************************************************************/
+void menu_7 (void)
+{
+ int loop = TRUE;
+ int c;
+
+ clearscr(pm->term_length);
+ while ( loop ) {
+ printf (" Menu 7 A Bit of fun \n");
+ printf ("\n");
+ printf (" (1) Test your knowledge of the genetic code \n");
+ printf (" (X) Exit this menu\n");
+ printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+ gets(pm->junk);
+ clearscr(pm->term_length);
+
+ if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0') {
+ c = toupper( (int) pm->junk[0]);
+ switch ( c ) {
+ case 'Q':
+ my_exit(2,"menu 7");
+ break;
+ case 'X': case '\0':
+ return;
+
+ case 'H':
+ chelp("menu_7");
+ continue;
+ break;
+
+ default:
+ fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+ pause;
+ break;
+ }
+ } else {
+ c = atoi(pm->junk);
+ if ( c == 1 )
+ tester(); /****** call tester () ********************/
+ else {
+ fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+ continue;
+ }
+ }
+ }
+ return;
+}
+
+/************************* menu_8 ******************************************/
+/* This menu allows the selection of the output to be written to the file */
+/* .blk. Only one selection can be made at a time. However CodonW can be */
+/* rerun with the same input file but with different output options. To */
+/* make this easier each time this menu is selected the user is given the */
+/* choice of changing the output file */
+/***************************************************************************/
+
+
+void menu_8 (void)
+{
+ struct multi { /* struct of menu items */
+ char *string; /* description string */
+ char prog; /* programme name */
+ };
+ char loop = TRUE;
+ int c;
+ int ans1,NumChoices;
+
+ struct multi aii[] = {
+ " ", ' ', /* Initialise a single value of choices in menu */
+ "Fasta format output of DNA sequence", 'T',
+ "Reader format output of DNA sequence",'R',
+ "Translate input file to AA sequence", 'N',
+ "Codon Usage" , 'C',
+ "Amino acid usage" , 'A',
+ "RSCU values" , 'S',
+ "Relative Amino Acid usage" , 'L',
+ "Dinucleotide frequencies" , 'D',
+ "Exhaustive base compostion analysis", 'B',
+ "No output written to file" , 'X' };
+
+ NumChoices = 10; /* Number of choices in Menu */
+
+ /* if there is already an output file available the user may */
+ /* select to change it */
+
+ clearscr(pm->term_length);
+
+ /* because only one type of bulk option is permitted each time
+ codonw runs, it may be necessary to rerun with the same data
+ file but changing the blk output options, if so the user
+ is prompted with the choice of changing the blk filename */
+
+ if ( pm->analysis_run ) {
+ printf (" The current bulk output file is %s do you "
+ "wish to change this (y/n) [n] ", pm->curr_tidyoutname);
+ gets(pm->junk);
+
+ if ( toupper( (int) pm->junk[0]) == 'Y') {
+ fileclose(&pm->tidyoutfile);
+
+ if (!(pm->tidyoutfile = open_file("codon usage output file",
+ pm->curr_tidyoutname, "w",(int)pm->verbose)))
+ my_exit(1, "menu 8");
+ strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1);
+ } /* matches if ( !strlen (pm->junk) || toupper= ............. */
+
+ } else { /* matches if( strlen( pm->curr_cufilename) ) */
+ printf("Note: No output file has been selected !\n");
+ }
+
+
+ while ( loop ) {
+ printf (" Menu 8\n");
+ printf (" This output will be saved to %s\n\n", pm->curr_tidyoutname);
+
+ for ( ans1 = 1; ans1 <= NumChoices; ans1++) {
+ if (aii[ans1].prog != (char) pm->bulk)
+ printf("\n\t (%2d) %s", ans1, aii[ans1].string);
+ else
+ printf("\n\t{(%2d) %-45.45s\t\t}", ans1, aii[ans1].string);
+ }
+
+ printf ("\n\t ( X) To return to previous menu\n");
+
+ printf ("Values enclosed with curly{} brackets are the current "
+ "selection\n");
+ printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+ gets(pm->junk);
+ clearscr(pm->term_length);
+
+ if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0') {
+ switch (c = toupper( (int) pm->junk[0])){
+ case 'Q':
+ my_exit(2,"menu 8"); /* User decides to quit */
+ break;
+ case 'X':
+ case '\0':
+ return; /* <-back to previous menu-> */
+ case 'H':
+ chelp("menu_8_blk");
+ continue;
+ break;
+ default:
+ fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+ pause;
+ break;
+ }
+ } else {
+ c = atoi(pm->junk);
+ if ( c > 0 && c <= NumChoices )
+ pm->bulk = aii[c].prog;
+ else
+ fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+ }
+ } /* match while */
+ return;
+}
+
+
+/*********************** menu_coa ***************************************/
+/* This is the advanced correspondence menu, this menu is optional, when a*/
+/* a correspondence analysis is chosen, then the user is given a choice of*/
+/* entering this menu */
+/**************************************************************************/
+void menu_coa (void)
+{
+ int loop = TRUE;
+ char *p;
+ int c;
+ int i;
+
+ clearscr(pm->term_length);
+ while ( loop ) {
+ printf ("Advanced Correspondence Analysis\n");
+ printf (" (1) (Un)Select %s\n", (pm->coa=='a')? "amino acids": "codons");
+ printf (" (2) Change the number of axis (factors) recorded to file\n");
+ printf (" (3) Add additional genes after COA\n");
+ printf (" (4) Toggle level of COA output [%s]\n",
+ (pcoa->level=='e')? "Exhaustive":"Normal");
+
+ if(pm->coa != 'a' )
+ printf (" (5) No. genes used to identify optimal codons [%i%s]\n",
+ (pcoa->fop_gene <0)? (pcoa->fop_gene*-1): pcoa->fop_gene,
+ (pcoa->fop_gene <0)? "%" : " genes");
+
+ printf (" (X) Exit this menu\n");
+ printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+ gets(pm->junk);
+ clearscr(pm->term_length);
+
+ if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0' ) {
+ c = toupper( (int) pm->junk[0]);
+ switch ( c ) {
+ case 'Q':
+ my_exit(2, "menu coa");
+ break;
+ case 'X' :
+ case '\0':
+ return;
+ case 'H':
+ chelp("menu_coa");
+ continue;
+ break;
+ default:
+ fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+ pause;
+ break;
+ }
+ }else{
+ c = atoi(pm->junk);
+ switch ( (int) c ) {
+ case 1:
+ select_coa( pm->coa ); /* select what to analysis */
+ break;
+ case 2: /* Num of axis to record */
+ printf ( "Changing the number of axis generated from %i "
+ "Please input new value [%i]", (int)pcoa->axis,(int)pcoa->axis);
+ gets(pm->junk);
+ if ( !strlen(pm->junk) ) break;
+ if ( isalpha( (int) pm->junk[0])) break;
+ i = (char)atoi(pm->junk);
+ if ( pm->coa == 'a' && (i > 20 || i<0) || ( i<0 || i>59 )) {
+ fprintf(pm->my_err,"Value is out of range adjusting to max value\n");
+ if ( pm->coa == 'a' ) pcoa->axis = 20;
+ else pcoa->axis = 59;
+ } else {
+ pcoa->axis = i;
+ }
+ break;
+ case 3: /* Add additional genes */
+ printf("You have elected to add genes after the initial COA is complete\n"
+ "these will not affect the generation of axis (factors) but can\n"
+ "identify were these additional genes fall based on the trends \n"
+ "identified among the original genes\n"
+ "You must have a separate file containing sequence(s) that are\n"
+ "to be added (these genes must be DNA in fasta format)\n"
+ "Please input filename [cancel this option]: ");
+ gets(pm->junk);
+ if ( !strlen(pm->junk) ) break;
+ strncpy(pcoa->add_row,pm->junk,MAX_FILENAME_LEN-1);
+ break;
+ case 4: /* report analysis of inertia */
+ pcoa->level = (char) ( (pcoa->level=='n')? 'e':'n');
+ break;
+ case 5: /* how to identify optimal codons */
+ printf ("You have elected to alter the number of genes used \n"
+ "to identify the optimal codons\n"
+ "You can input either an absolute number of genes or a\n"
+ "percentage (example 10%%)\n "
+ "\tPlease input your choice []");
+ gets ( pm->junk);
+ if( !strlen(pm->junk) ) continue;
+ if( (p=strchr ( pm->junk,'%')) != NULL) {
+ *p='\0';
+ pcoa->fop_gene=atoi(pm->junk)*-1;
+ if ( pcoa->fop_gene == 0 || pcoa->fop_gene < 50 ) { /* err_catch */
+ printf ( " Limits are >0%% and less than 50%%\n");
+ pcoa->fop_gene= (-10); /* assume default */
+ }
+ }else {
+ pcoa->fop_gene=atoi(pm->junk); /* set No. genes */
+ }
+ break;
+ default :
+ fprintf(pm->my_err,"Answer out of range\n");
+ break;
+ }
+ }
+ }
+ return;
+}
+
+/*********************** select_coa ****************************************/
+/* This menu is called if the user wants to change the default codons/AA */
+/* to be analysised in the COA. It is called from menu_coa */
+/***************************************************************************/
+void select_coa ( char choice )
+{
+ int loop = TRUE;
+ int last_row[4];
+ int toggle;
+ int x;
+
+ char *startpoint, *endpoint;
+
+ clearscr(pm->term_length);
+
+ while ( loop ) {
+ if ( choice == 'a' ) { /* if AA analysis then */
+ for ( x = 1 ; x < 22 ; x++ ) {
+ if (!pcoa->amino[x] )
+ printf("[(%2i)_%s_%s] ", x, paa->aa3[x],paa->aa1[x] );
+ else
+ printf(" (%2i)_%s_%s ", x, paa->aa3[x],paa->aa1[x] );
+
+ if ( !(x % 4) ) printf( "\n");
+ }
+ printf( "\n");
+
+/*************** Sample of aa choice output ****************************/
+/* ( 1)_Phe_F ( 2)_Leu_L ( 3)_Ile_I ( 4)_Met_M */
+/* ( 5)_Val_V ( 6)_Ser_S ( 7)_Pro_P ( 8)_Thr_T */
+/* ( 9)_Ala_A (10)_Tyr_Y [(11)_TER_*] (12)_His_H */
+/* (13)_Gln_Q (14)_Asn_N (15)_Lys_K (16)_Asp_D */
+/* (17)_Glu_E (18)_Cys_C (19)_Trp_W (20)_Arg_R */
+/* (21)_Gly_G */
+
+ }else {
+ printf ( "Using %s \n", pcu->des );
+ for ( x = 1 ; x < 65 ; x++ ) {
+
+ if ( !pcoa->codons[x] ) printf("[");
+ else printf(" ");
+
+ if (last_row[x%4] != pcu->ca[x] )
+ printf( "(%2i) %s\t%s", x,paa->aa3[pcu->ca[x]],paa->cod[x]);
+ else
+ printf( "(%2i) \t%s", x,paa->cod[x]);
+
+ if ( !pcoa->codons[x] ) printf("]");
+ else printf(" ");
+
+ last_row[x%4] = pcu->ca[x];
+
+ if ( !(x % 4) )
+ printf( "\n");
+ if ( !(x % 16))
+ printf( "\n");
+ }
+ }
+
+/*************** Sample of codon choice output ***********************/
+/* Using Universal Genetic code */
+/* ( 1) Phe UUU ( 2) Ser UCU ( 3) Tyr UAU ( 4) Cys UGU */
+/* ( 5) UUC ( 6) UCC ( 7) UAC ( 8) UGC */
+/* ( 9) Leu UUA (10) UCA [(11) TER UAA][(12) TER UGA] */
+/* (13) UUG (14) UCG [(15) UAG][(16) Trp UGG] */
+
+ printf("%s bracketed will be excluded from the COA. ",
+ (pm->coa == 'a')? "Amino Acids": "Codons" );
+ printf("Select number(s) that\nidentify the %s you wish to toggle "
+ "(X to exit, H for help) [X] ",
+ (pm->coa == 'a')? "Amino Acids": "Codons" );
+
+ gets(pm->junk);
+
+ if ( !strlen(pm->junk) || toupper( (int) pm->junk[0]) == 'X' ) {
+ loop=FALSE;
+ continue;
+ }
+
+ if ( toupper( (int) pm->junk[0]) == 'H' ) {
+ chelp("select");
+ continue;
+ }
+
+
+ endpoint = pm->junk;
+ startpoint = pm->junk;
+
+ /* now toggle the codons and amino acids to be analysed */
+
+ while ( toggle = (int) strtol(startpoint,&endpoint,10) ) {
+ if(endpoint == startpoint ) break;
+ startpoint = endpoint;
+
+ if (pm->coa == 'a' ) {
+ if ( toggle>21 || toggle<1 ) continue; /* check value is valid */
+ pcoa->amino [toggle]= (char)((pcoa->amino [toggle])?FALSE:TRUE);
+ }else{
+ if ( toggle>64 || toggle<1 ) continue; /* check value is valid */
+ pcoa->codons[toggle]= (char)((pcoa->codons[toggle])?FALSE:TRUE);
+ }
+ }
+ }
+ return;
+}
+
+/************************* Welcome *****************************************/
+/* Prints a Banner */
+/* the \'s are a problem as they must be escaped */
+/***************************************************************************/
+void welcome ( void )
+{
+ printf ("\n\n");
+ printf (" // \\ // \\ |I \\ // \\ |I\\ I / \n");
+ printf (" |I |I I |I I |I I |I\\\\ I \\___ \n");
+ printf (" |I |I I |I I |I I |I \\\\ I \\ \n");
+ printf (" |I |I I |I I |I I |I \\\\ I |\n");
+ printf (" \\\\___/ \\\\____/ |I____/ \\\\____/ |I \\\\I \\___/\n");
+}
+
+/********************** printinfo *****************************************/
+/* Prints a summary about this programme, date, version and author of code */
+/* whether a debug version */
+/***************************************************************************/
+int printinfo(void) {
+# if defined (__FILE__ )
+ printf("\n\tSource : %s", __FILE__);
+# endif
+# if defined (DEBUG)
+ printf("(Debug version)");
+# endif
+
+ printf("\n\tAuthor : John Peden\n");
+ printf("\tVersion : %.*s\n", strlen(Revision) , Revision );
+ printf("\tRevised :%.*s %s %.*s\n",(int) strlen(Update) - 7, Update + 6,
+ (*(Update + 7) ? "\n\t by :" : ""),
+ (int) strlen(Author) - 10, Author + 9);
+
+#if defined(__DATE__ ) && defined(__TIME__)
+ printf("\n\tCompiled : %s %s\n", __DATE__, __TIME__);
+#endif
+
+ printf("\n\t-------------------------------\n\n");
+
+ printf(" All sequences must be in a single file separated by title "
+ " lines whose\n first character is either ; or > \n\t any number"
+ " or length of genes is acceptable\n\n");
+ return 1;
+}
+
+
diff --git a/open_fil.c b/open_fil.c
new file mode 100755
index 0000000..09ae020
--- /dev/null
+++ b/open_fil.c
@@ -0,0 +1,236 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package */
+/* Copyright (C) 2005 John F. Peden */
+/* This program is free software; you can redistribute */
+/* it and/or modify it under the terms of the GNU General Public License */
+/* as published by the Free Software Foundation; version 2 of the */
+/* License, */
+/* */
+/* This program is distributed in the hope that it will be useful, but */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA. */
+/* */
+/* */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _) */
+/* */
+/* For the latest version and information see */
+/* http://codonw.sourceforge.net */
+/**************************************************************************/
+
+/* This is a general subroutine, so we might as well redefine TRUE & FALSE*/
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/* What to do if we can't locate the file we where asked to open */
+/* On most systems we will try and be nice and show a choice of filenames */
+
+#ifdef _DOS
+#define no_file_found() system("dir/w");
+#elif BSD || SYSV
+#define no_file_found() system("ls -F");
+#elif defined (WIN32) || defined (_WIN)
+#define no_file_found() system("dir/w");
+#else
+#define no_file_found() printf("This would have presented a list of files\n\tbut I do not know howto your operating system\n");
+#endif
+
+/* Include header files */
+#include <stdio.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "codonW.h"
+
+/************** open_file **************************************************/
+/* This subroutine is a front end to fopen open. It takes four parameters */
+/* the parameters are used to generate a user prompt for the */
+/* filename, and to give a suggested filename, to give the write perms */
+/* for the file, and whether or not to overwrite existing files. */
+/* File_needed is just a description of the file being opened. It is */
+/* assumed that if this descriptor is missing the file is to be opened */
+/* without further user input. If default_filename is blank then there is */
+/* no default_filename */
+/* write_perm sets up the type of file being opened */
+/* verbose tells this function whether to check if there is a */
+/* previous version of any file being opened for writing */
+/***************************************************************************/
+
+FILE *open_file(char *file_needed, char *default_filename,
+char *write_perm, int verbose )
+{
+ char infile_name[MAX_FILENAME_LEN]="";
+ FILE *input=NULL;
+ char temp[4];
+ char *answer = pm->junk;
+
+ /**********************************************************************/
+ /* If a string has been given for file_needed it is assumed */
+ /* that the user will have a choice of file_names to choose */
+ /* therefore (s)he will be prompted for a name */
+ /* if a default filename was supplied by the calling function this */
+ /* will be suggested as well, otherwise there is no default */
+ /**********************************************************************/
+
+ if ( strlen(file_needed)) {
+ while (!strlen(infile_name) ) {
+ printf("\nName of %s (h for help) [%s] ",
+ file_needed,default_filename);
+ gets(infile_name); /* get filename */
+
+ if ( WasHelpCalled ( infile_name ) ) {
+ chelp("open_file_query"); /* Help .... */
+ infile_name[0]='\0';
+ continue;
+ }
+
+ if ( !strlen(infile_name) && default_filename )
+ strcpy(infile_name, default_filename);
+ } /* end of get filename */
+ } else if ( strlen(default_filename) ) /* use default filename */
+ strcpy(infile_name, default_filename);
+ else { /* not enough info */
+ fprintf(stderr, "Programming error: no filename supplied\n");
+ my_exit (0,"open file");
+ }
+
+
+ /**********************************************************************/
+ /* At this point infile_name contains a possible filename */
+ /* Depending on the mode (write_perm) string this is tested 3 ways */
+ /* */
+ /* (r or r+) Test if the file exists if not, all the files in the */
+ /* current directory are listed and the the user is prompted for */
+ /* an alternative name or they may quit the programme */
+ /* */
+ /* (a, a+) Not tested, just open the file */
+ /* */
+ /* (w, w+) If the variable verbose = FALSE then no test */
+ /* If verbose == TRUE then the file is checked to see if */
+ /* it already exsists, if it does then the user is prompted for */
+ /* either for permission to overwrite this file or to */
+ /* suggest an alternative file_name which is then tested as well */
+ /* the user can type q to quit at any stage of this prompting process */
+ /**********************************************************************/
+
+ if ( !strcmp(write_perm, "r") || !strcmp(write_perm, "r+")
+ ||!strcmp(write_perm, "rb") ){
+ while ( !(input = fopen (infile_name , write_perm ))) {
+ fprintf(stderr,"\nThese are the files in the current directory "
+ "I cannot find %.*s \n\n",strlen(infile_name),infile_name);
+ no_file_found();
+ fprintf(stderr, "\n\nPlease enter another filename, "
+ " (Q)uit, (H)elp [%s] ",infile_name);
+ gets(answer);
+
+ if (strlen (answer)==1 &&
+ ((char)toupper((int)answer[0])=='Q'))
+ my_exit(2,"open_file");
+ else if (WasHelpCalled ( infile_name )){
+ chelp ("File_not_found");
+ }
+ else if (strlen (answer))
+ strcpy (infile_name, answer);
+ } /* end of while loop */
+ strcpy ( answer,infile_name); /* allow transfer */
+ return input;
+ }
+
+ /************************* Append ***********************************/
+ else if ( !strcmp(write_perm, "a") || !strcmp(write_perm, "a+")
+ || !strcmp(write_perm, "ab") ) {
+ input = fopen (infile_name, write_perm);
+ strcpy ( answer,infile_name);
+ return input;
+ }
+ /************************* Write **********************************/
+ else if ( !strcmp( write_perm, "w") || !strcmp(write_perm, "w+")
+ ||!strcmp( write_perm, "wb") ) {
+
+ while ( verbose == TRUE ) {
+ if ( (input = fopen (infile_name , "r")) ) {
+ fclose(input); /* close the filehandle */
+ fprintf(stderr, "\nWarning :File %.*s "
+ "exists already \n\tDo you wish to"
+ " overwrite ? (y/n/h/q)\t [y] ",
+ strlen(infile_name), infile_name);
+ fgets(temp, 3, stdin);
+
+ switch (toupper( (int) temp[0])) {
+ case 'Y':
+ case '\0':
+ case '\n':
+ verbose = FALSE;
+ continue;
+ case 'Q':
+ my_exit(2,"open_file2");
+ break;
+ case 'H':
+ chelp("file_exists");
+ continue;
+ break;
+ default:
+ fprintf(stderr,
+ "\nYou decided not to overwrite, please enter\n"
+ " another filename, (q)uit, (a)ppend, (h)elp \n"
+ " (a/q/h/filename)\t[a] ");
+ gets(answer);
+ }
+
+ /* if the answer is 'a' then the default file is opened */
+ /* as appendable else if 'q' then the programme exits */
+ /* anything else is taken as a file name */
+
+ if ( strlen(answer) <= 1 ) {
+ switch (toupper( (int) answer[0])) {
+ case 'Q':
+ return (NULL);
+ case 'A':
+ case '\0':
+ case'\n':
+ verbose = FALSE; /* leave the while loop */
+ strcpy(write_perm, "a+");
+ break;
+ case 'H':
+ chelp("file_append");
+ continue;
+ break;
+ default:
+ continue;
+ }; /* end of switch */
+ }
+ } else /* filename is unique */
+ verbose = FALSE; /* exit the while loop */
+ } /* match while preserve */
+ input = fopen (infile_name,write_perm); /* opens filehandle */
+ strcpy ( answer,infile_name);
+ return input;
+ } /* matchs if w or w+ */
+ return (NULL);
+}
+
+/************** Main just for testing purposes ***************************/
+/* uncomment to test function as a standalone subroutine */
+/* will also need to replace my_exit with exit calls */
+/*************************************************************************/
+/* main ()
+ {
+ FILE *test=NULL;
+ if( test = open_file( "test file","","r",NULL))
+ printf( "Success\n");
+ else
+ printf( "Failed\n");
+ } */
+/*************************************************************************/
+
+
+
diff --git a/tester.c b/tester.c
new file mode 100755
index 0000000..016d673
--- /dev/null
+++ b/tester.c
@@ -0,0 +1,239 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package */
+/* Copyright (C) 2005 John F. Peden */
+/* This program is free software; you can redistribute */
+/* it and/or modify it under the terms of the GNU General Public License */
+/* as published by the Free Software Foundation; version 2 of the */
+/* License, */
+/* */
+/* This program is distributed in the hope that it will be useful, but */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA. */
+/* */
+/* */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _) */
+/* */
+/* For the latest version and information see */
+/* http://codonw.sourceforge.net */
+/******** Tester *****************************************************/
+/* This function is used to teach the genetic code, it generates a random */
+/* series of questions about the selected genetic code. */
+/* The questions include */
+/* 1 and 3 letter amino acid names */
+/* The translation of each codon */
+/* The size of each amino acid family */
+/**************************************************************************/
+
+#define rand_num(z) (int)((((float)rand()/((long)RAND_MAX))*(float)z)+1)
+
+#ifdef _WINDOWS
+#define beeep Beep(150,150)
+#include <time.h>
+#include <conio.h>
+#else
+#define beeep printf("\007")
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <time.h>
+#include <ctype.h>
+#include "codonW.h"
+
+/* The accuracy of the answers are recorded using these three variable */
+int num_questions = 0;
+int num_cheats = 0;
+int num_wrong = 0;
+
+
+void tester ( void ) {
+ char loop;
+ char main_loop=TRUE;
+ char tmp_AA [4];
+ char tmp_AA2[4];
+
+ srand( (unsigned)time( NULL ) ); /* initialise random num gen */
+
+ printf(" Welcome to TESTER \n(which just tests your "
+ "knowledge of the Genetic code)\n"
+ " The genetic code used is dependant on\n what"
+ " code is selected in menu 3\n"
+ " The current code is %s %s\n"
+ "\n If you get stuck try typing ? for a hint\n"
+ " To leave type exit or quit\n", pcu->des, pcu->typ);
+
+ /******************* main loop ****************************/
+ while ( main_loop ) {
+ int i,x;
+
+ i = rand_num(10); /* random number to between 1 and 10 */
+
+ printf("Type Help for help:");
+ /* the switch biases the questions so their freq is not equal */
+ switch (i) {
+ case 1:
+ case 2: /* amino acid question */
+ i = rand_num(21);
+ loop = TRUE;
+ while ( loop ) {
+ printf("\nWhat is the three letter equivalent for the AA"
+ " %s ", paa->aa1[i]);
+ gets( pm->junk ) ;
+ strcpy ( tmp_AA, paa->aa3[i] );
+ for ( x = 0 ; x < (int)strlen(tmp_AA); x++)
+ tmp_AA[x] = (char) toupper( (int) tmp_AA[x]);
+ for ( x = 0 ; x < (int)strlen(pm->junk ); x++)
+ pm->junk [x] = (char) toupper( (int) pm->junk[x]);
+ if ( !strcmp ( pm->junk, "QUIT" ) ||
+ !strcmp ( pm->junk, "EXIT" )) {
+ asummary();
+ main_loop = FALSE;
+ break;
+ }
+
+ if ( !strcmp ( pm->junk,"HELP")) {
+ chelp("fun");
+ continue;
+ }
+
+ if ( !strcmp (pm->junk, "?" ) ) {
+ printf( "Cheat %s", paa->aa3[i]);
+ num_cheats++; /* The user cheated */
+ continue;
+ }
+ if ( !strcmp (pm->junk , tmp_AA )) {
+ loop = FALSE;
+ } else {
+ num_wrong++; /* Wrong answer */
+ printf("Wrong answer (try ?)\n");
+ }
+ }
+ break;
+ case 3: /* How big is this AA family*/
+ i = rand_num(21);
+ loop = TRUE;
+ while ( loop ) {
+ printf("\nHow many codons encode the Amino Acid %s ",
+ paa->aa1[i]);
+ gets( pm->junk ) ;
+ for ( x = 0 ; x < (int)strlen(pm->junk); x++)
+ pm->junk[x] = (char) toupper( (int) pm->junk[x]);
+
+ if ( !strcmp ( pm->junk, "QUIT" ) ||
+ !strcmp ( pm->junk, "EXIT" )) {
+ asummary();
+ main_loop = FALSE;
+ break;
+ }
+
+ if ( !strcmp ( pm->junk,"HELP")) {
+ chelp("fun");
+ continue;
+ }
+
+ if ( !strcmp (pm->junk, "?" ) ) {
+ printf( "Cheat %i\n", *(da + i) );
+ num_cheats++;
+ continue;
+
+ }
+
+
+
+ if ( atoi(pm->junk) == *(da + i) )
+ loop = FALSE;
+
+ else {
+ num_wrong++;
+ printf("Wrong answer (try ?)\n");
+ }
+ }
+ break;
+ case 4: /* 60% of the time ask */
+ case 5: /* ask questions about */
+ case 6: /* codon to aa translation*/
+ case 7:
+ case 8:
+ case 9:
+ case 10:
+ i = rand_num(64);
+ loop = TRUE;
+ while ( loop ) {
+ printf("\nName the Amino Acid encoded by the codon %s ", paa->cod[i]);
+ gets( pm->junk );
+ for ( x = 0 ; x < (int)strlen(pm->junk ); x++)
+ pm->junk[x] = (char) toupper( (int) pm->junk[x]);
+ if ( !strcmp ( pm->junk, "QUIT" ) ||
+ !strcmp ( pm->junk, "EXIT" )) {
+ asummary();
+ main_loop = FALSE;
+ break;
+ }
+
+
+ if ( !strcmp ( pm->junk,"HELP")) {
+ chelp("fun");
+ continue;
+ }
+
+ if ( !strcmp (pm->junk, "?" ) ) {
+ printf( "Cheat %s (%s)", paa->aa1[pcu->ca[i]]
+ , paa->aa3[pcu->ca[i]]);
+ num_cheats++; /* tell me the answer */
+ continue;
+ }
+ /* allow 1 or 3 letter amino acid code as the ans */
+ strcpy ( tmp_AA, paa->aa1[pcu->ca[i]] );
+ strcpy ( tmp_AA2, paa->aa3[pcu->ca[i]] );
+
+ /* uppercase everything, the AA names and the answer */
+ for ( x = 0 ; x < (int)strlen(tmp_AA); x++)
+ tmp_AA[x] = (char)toupper( (int) tmp_AA[x]);
+ for ( x = 0 ; x < (int)strlen(tmp_AA2); x++)
+ tmp_AA2[x] = (char)toupper((int) tmp_AA2[x]);
+ for ( x = 0 ; x < (int)strlen(pm->junk ); x++)
+ pm->junk [x] = (char)toupper((int) pm->junk[x]);
+
+ if ( !strcmp(tmp_AA, pm->junk) ||
+ !strcmp(tmp_AA2,pm->junk) ) {
+ loop = FALSE;
+ } else {
+ printf("Wrong answer (try ?)\n");
+ num_wrong++;
+ }
+ }
+ break;
+ default:
+ printf("mistake == %i \n", i);
+ exit(0); /* error catch */
+ break;
+ } /* end of switch */
+ num_questions++;
+
+ } /* end of while */
+
+ return;
+} /* end of main */
+
+/*********** Asummary ******************************************************/
+/* Write out a summary of the users results */
+/***************************************************************************/
+void asummary (void) {
+ printf ( " You answered\n \t %5i questions\n", num_questions);
+ printf ( " \t %5i answers were wrong\n", num_wrong);
+ printf ( " \t %5i times you had to ask for a hint\n", num_cheats);
+ printf ( " \t %3.0f%c accuracy \n", (float) ( (num_questions) ?
+ (float)100 * (num_questions - num_wrong) /
+ (float)num_questions : 0 ),'%');
+ pause;
+ return;
+}
+
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/codonw.git
More information about the debian-med-commit
mailing list