[med-svn] r166 - in trunk/packages/clustalw/trunk: . debian tests.clustalw

Mon Dec 4 01:57:15 CET 2006

Author: charles-guest
Date: 2006-12-04 01:57:13 +0100 (Mon, 04 Dec 2006)
New Revision: 166

Added:
   trunk/packages/clustalw/trunk/LICENCE
   trunk/packages/clustalw/trunk/clustalw.1
   trunk/packages/clustalw/trunk/clustalx.1
   trunk/packages/clustalw/trunk/debian/
   trunk/packages/clustalw/trunk/debian/README.debian
   trunk/packages/clustalw/trunk/debian/changelog
   trunk/packages/clustalw/trunk/debian/clustalw.docs
   trunk/packages/clustalw/trunk/debian/clustalw.examples
   trunk/packages/clustalw/trunk/debian/clustalw.install
   trunk/packages/clustalw/trunk/debian/clustalw.manpages
   trunk/packages/clustalw/trunk/debian/clustalw.menu
   trunk/packages/clustalw/trunk/debian/clustalx.docs
   trunk/packages/clustalw/trunk/debian/clustalx.install
   trunk/packages/clustalw/trunk/debian/clustalx.manpages
   trunk/packages/clustalw/trunk/debian/clustalx.menu
   trunk/packages/clustalw/trunk/debian/compat
   trunk/packages/clustalw/trunk/debian/control
   trunk/packages/clustalw/trunk/debian/copyright
   trunk/packages/clustalw/trunk/debian/rules
   trunk/packages/clustalw/trunk/tests.clustalw/
   trunk/packages/clustalw/trunk/tests.clustalw/Makefile
   trunk/packages/clustalw/trunk/tests.clustalw/nuc.data
   trunk/packages/clustalw/trunk/tests.clustalw/nuc2.data
   trunk/packages/clustalw/trunk/tests.clustalw/seq
Modified:
   trunk/packages/clustalw/trunk/amenu.c
   trunk/packages/clustalw/trunk/clustalw.c
   trunk/packages/clustalw/trunk/clustalw.doc
   trunk/packages/clustalw/trunk/clustalw.h
   trunk/packages/clustalw/trunk/clustalw.ms
   trunk/packages/clustalw/trunk/clustalw.new
   trunk/packages/clustalw/trunk/clustalx.c
   trunk/packages/clustalw/trunk/clustalx.html
   trunk/packages/clustalw/trunk/clustalx_help
   trunk/packages/clustalw/trunk/globin.pep
   trunk/packages/clustalw/trunk/interface.c
   trunk/packages/clustalw/trunk/makefile
   trunk/packages/clustalw/trunk/sequence.c
   trunk/packages/clustalw/trunk/trees.c
   trunk/packages/clustalw/trunk/util.c
Log:
Load clustalw-1.83 into trunk/packages/clustalw/trunk.


Added: trunk/packages/clustalw/trunk/LICENCE
===================================================================

--- trunk/packages/clustalw/trunk/LICENCE	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/LICENCE	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,29 @@
+**********************
+LICENCE FOR CLUSTAL W
+**********************
+
+Clustal W (hereafter "the program") is copyright (c) 1994-1998 by Julie D.
+Thompson, Desmond G. Higgins and Toby J. Gibson.
+
+Permission is granted to copy, distribute and use the program provided no fee
+is charged for it and provided that this copyright and licence notice is not
+removed or altered.
+
+The full source code of the program is provided free. You should not
+distribute a modified version of the program without obtaining the permission
+of the authors. You must keep the original copyright and licence notice. You
+must also document clearly the modifications you have made. You must make
+clear that this is not the original version.
+
+Commercial distributors of Clustal W are requested to contact the Clustal W
+authors in order to take out a non-exclusive licence. See the README file
+included with Clustal W for a rationale.
+
+You should understand that this software is provided as-is. The authors make
+no claims towards its suitability for any purpose and accept absolutely no
+liability for any damages the program may cause. Use at your own risk.
+
+
+
+
+

Modified: trunk/packages/clustalw/trunk/amenu.c
===================================================================
--- trunk/packages/clustalw/trunk/amenu.c	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/amenu.c	2006-12-04 00:57:13 UTC (rev 166)
@@ -184,7 +184,7 @@
 		fprintf(stdout,"     H. HELP\n");
 		fprintf(stdout,"     X. EXIT (leave program)\n\n\n");
 		
-		getstr("Your choice",lin1);
+		getstr("Your choice",MAXLINE+1,lin1);
 
 		switch(toupper(*lin1)) {
 			case '1': seq_input(FALSE);
@@ -268,7 +268,7 @@
         fprintf(stdout,"    H.  HELP\n");
         fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
 
-        getstr("Your choice",lin1);
+        getstr("Your choice",MAXLINE+1,lin1);
         if(*lin1 == EOS) return;
 
         switch(toupper(*lin1))
@@ -361,7 +361,7 @@
         fprintf(stdout,"    H.  HELP\n");
         fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
 
-        getstr("Your choice",lin1);
+        getstr("Your choice",MAXLINE+1,lin1);
         if(*lin1 == EOS) return;
 
         switch(toupper(*lin1))
@@ -457,7 +457,7 @@
 		fprintf(stdout,"\n\n");
 		fprintf(stdout,"     H. HELP\n\n\n");
 		
-		getstr("Enter number (or [RETURN] to exit)",lin2);
+		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
 		if( *lin2 == EOS) { 
 			return;
 		}
@@ -533,7 +533,7 @@
                 fprintf(stdout,"--\n");
 
 
-                getstr("\n\nEnter number (or [RETURN] to exit)",lin2);
+                getstr("\n\nEnter number (or [RETURN] to exit)",MAXLINE+1,lin2);
                 if(*lin2 == EOS) return(output_struct_penalties);
 
         	switch(toupper(*lin2))
@@ -602,7 +602,7 @@
         fprintf(stdout,"    H.  HELP\n");
         fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
 
-        getstr("Your choice",lin1);
+        getstr("Your choice",MAXLINE+1,lin1);
         if(*lin1 == EOS) return;
 
         switch(toupper(*lin1))
@@ -677,7 +677,7 @@
 	fprintf(stdout,"\n");
 	fprintf(stdout,"     H. HELP\n\n\n");	
 	
-		getstr("Enter number (or [RETURN] to exit)",lin2);
+		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
 		if(*lin2 == EOS) return;
 		
 		switch(toupper(*lin2)) {
@@ -766,7 +766,7 @@
 	fprintf(stdout,"\n");
 	fprintf(stdout,"     H. HELP\n\n\n");	
 	
-		getstr("Enter number (or [RETURN] to exit)",lin2);
+		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
 		if(*lin2 == EOS) return;
 		
 		switch(toupper(*lin2)) {
@@ -907,7 +907,7 @@
 
 		fprintf(stdout,"     H. HELP\n\n\n");
 		
-		getstr("Enter number (or [RETURN] to exit)",lin2);
+		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
 		if( *lin2 == EOS) {
                         if(dnaflag) {
                                 dna_pw_go_penalty     = pw_go_penalty;
@@ -1029,7 +1029,7 @@
                 fprintf(stdout,"     8. Protein Gap Parameters\n\n");
 		fprintf(stdout,"     H. HELP\n\n\n");		
 
-		getstr("Enter number (or [RETURN] to exit)",lin2);
+		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
 
 		if(*lin2 == EOS) {
 			if(dnaflag) {
@@ -1122,7 +1122,7 @@
 		fprintf(stdout,"     5. Toggle End Gap Separation         :%s\n\n",(!use_endgaps) ? "OFF" : "ON");
 		fprintf(stdout,"     H. HELP\n\n\n");		
 
-		getstr("Enter number (or [RETURN] to exit)",lin2);
+		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
 
 		if(*lin2 == EOS) return;
 		
@@ -1136,7 +1136,7 @@
 			case '3':
 				fprintf(stdout,"Hydrophilic Residues Currently: %s\n",hyd_residues);
 
-				getstr("Enter residues (or [RETURN] to quit)",lin1);
+				getstr("Enter residues (or [RETURN] to quit)",MAXLINE+1,lin1);
                                 if (*lin1 != EOS) {
                                         for (i=0;i<strlen(hyd_residues) && i<26;i++) {
                                         c = lin1[i];
@@ -1188,7 +1188,7 @@
                 fprintf(stdout,"--\n");
 
 
-                getstr("\n\nEnter number (or [RETURN] to exit)",lin2);
+                getstr("\n\nEnter number (or [RETURN] to exit)",MAXLINE+1,lin2);
                 if(*lin2 == EOS) return(matn);
 
                 i=toupper(*lin2)-'0';
@@ -1223,7 +1223,7 @@
 	fprintf(stdout,"\n%s\n",title);
 	strcpy(line,prompt);
 	strcat(line, "(y/n) ? [y]");
-	getstr(line,lin2);
+	getstr(line,MAXLINE+1,lin2);
 	if ((*lin2 != 'n') && (*lin2 != 'N'))
 		return('y');
 	else

Added: trunk/packages/clustalw/trunk/clustalw.1
===================================================================
--- trunk/packages/clustalw/trunk/clustalw.1	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/clustalw.1	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,56 @@
+.TH CLUSTALW 1 
+.\" NAME should be all caps, SECTION should be 1-8, maybe w/ subsection
+.\" other parms are allowed: see man(7), man(1)
+.SH NAME
+clustalw \- a multiple sequence alignment program
+
+.SH SYNOPSIS
+.B clustalw
+.I "[options]"
+.SH "DESCRIPTION"
+This manual page documents briefly the
+.BR clustalw
+command.
+This manual page was written for the Debian GNU/Linux distribution
+because the original program does not have a manual page.
+Instead, it has other documentation; see below.
+.PP
+.B clustalw
+is a program which performs
+simultaneous alignment of many nucleotide or amino acid sequences.
+It is typically run interactively, providing a menu and an online help.
+If you prefer to use it in command-line (batch) mode, you will have
+to give several options, the minimum being -infile.
+.SH OPTIONS
+A summary of options are included below.
+For a complete description, see the last section.
+.TP
+.B \-infile=string
+Tells ClustalW to get input from the named file.
+Several formats are automatically recognized.
+.TP
+.B \-outfile=string
+Tells ClustalW to write output in the named file.
+.TP
+.B \-tree
+Calculates NJ tree.
+.TP
+.B \-profile
+Merges two alignments by profile alignment.
+.TP
+.B \-output=string
+Sets the output format. It can be GCG, GDE, PHYLIP or PIR.
+.TP
+.B \-quicktree
+Uses FAST algorithm for the alignment guide tree.
+.TP
+.B \-help
+Prints a short help. Warning: the syntax of the options is the VMS one. For
+Unix, change the / to a - and type the option name in lower case.
+.SH "SEE ALSO"
+The \-help option is a good summary. Files in /usr/share/doc/clustalw contains a lot
+of details. The Web page <http://www-igbmc.u-strasbg.fr/BioInfo/ClustalW/>
+is also helpful.
+.SH AUTHOR
+This manual page was written by Stephane Bortzmeyer <bortzmeyer at debian.org>,
+for the Debian GNU/Linux system (but may be used by others).

Modified: trunk/packages/clustalw/trunk/clustalw.c
===================================================================
--- trunk/packages/clustalw/trunk/clustalw.c	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/clustalw.c	2006-12-04 00:57:13 UTC (rev 166)
@@ -34,7 +34,7 @@
 #ifdef MSDOS
         char *help_file_name = "clustalw.hlp";
 #else
-        char *help_file_name = "clustalw_help";
+        char *help_file_name = "/usr/share/clustalw/clustalw_help";
 #endif
 
 sint max_names; /* maximum length of names in current alignment file */


Property changes on: trunk/packages/clustalw/trunk/clustalw.doc
___________________________________________________________________
Name: svn:executable
   - 
   + *

Modified: trunk/packages/clustalw/trunk/clustalw.h
===================================================================
--- trunk/packages/clustalw/trunk/clustalw.h	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/clustalw.h	2006-12-04 00:57:13 UTC (rev 166)
@@ -238,7 +238,7 @@
 char *blank_to_(char *str);
 char *upstr(char *str);
 char *lowstr(char *str);
-void getstr(char *instr, char *outstr);
+void getstr(char *instr, int n, char *outstr);
 double getreal(char *instr, double minx, double maxx, double def);
 int getint(char *instr, int minx, int maxx, int def);
 void do_system(void);


Property changes on: trunk/packages/clustalw/trunk/clustalw.ms
___________________________________________________________________
Name: svn:executable
   - 
   + *


Property changes on: trunk/packages/clustalw/trunk/clustalw.new
___________________________________________________________________
Name: svn:executable
   - 
   + *

Added: trunk/packages/clustalw/trunk/clustalx.1
===================================================================
--- trunk/packages/clustalw/trunk/clustalx.1	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/clustalx.1	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,38 @@
+.TH CLUSTALW 1 "2003 July 1" "Manual for clustalw"
+.SH NAME
+clustalw \- a multiple sequence alignment program
+
+.SH SYNOPSIS
+.B clustalw
+.I "[options]"
+.SH "DESCRIPTION"
+This manual page documents briefly the
+.BR clustalx
+command.
+This manual page was written for the Debian GNU/Linux distribution
+because the original program does not have a manual page.
+Instead, it has other documentation; see below.
+.PP
+.B clustalx
+is a windows interface for the ClustalW multiple sequence alignment
+program. It provides an integrated environment for performing multiple sequence
+and profile alignments and analysing the results. The sequence alignment is
+displayed in a window on the screen. A versatile coloring scheme has been
+incorporated allowing you to highlight conserved features  in the alignment.
+The pull-down menus at the top of the window allow you to select all the
+options required for traditional multiple sequence and profile alignment.
+.P
+You can cut-and-paste sequences to change the order of the alignment; you can
+select a subset of sequences to be aligned; you can select a sub-range of the
+alignment to be realigned and inserted back into the original alignment.
+.P
+Alignment quality analysis can be performed and low-scoring segments or
+exceptional residues can be highlighted.
+
+.SH "SEE ALSO"
+Files in /usr/share/doc/clustalx contain a lot
+of details. The Web page <http://www-igbmc.u-strasbg.fr/BioInfo/ClustalW/>
+is also helpful.
+.SH AUTHOR
+This manual page was written by Andreas Tille <tille at debian.org>,
+for the Debian GNU/Linux system (but may be used by others).

Modified: trunk/packages/clustalw/trunk/clustalx.c
===================================================================
--- trunk/packages/clustalw/trunk/clustalx.c	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/clustalx.c	2006-12-04 00:57:13 UTC (rev 166)
@@ -26,7 +26,7 @@
 #ifdef MSDOS
         char *help_file_name = "clustalx.hlp";
 #else
-        char *help_file_name = "clustalx_help";
+        char *help_file_name = "/usr/share/clustalw/clustalw_help";
 #endif
 
 sint max_names; /* maximum length of names in current alignment file */

Modified: trunk/packages/clustalw/trunk/clustalx.html
===================================================================
--- trunk/packages/clustalw/trunk/clustalx.html	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/clustalx.html	2006-12-04 00:57:13 UTC (rev 166)
@@ -2029,6 +2029,2118 @@
 <P>
 Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
 The ClustalX windows interface: flexible strategies for multiple sequence 
+alignment aided by quality analysis tools. Nucleic Acids Research, 24:4876-4882.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+The ClustalW program is described in the manuscript:
+</STRONG>
+</P>
+<P>
+Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
+sensitivity of progressive multiple sequence alignment through sequence
+weighting, positions-specific gap penalties and weight matrix choice.  Nucleic
+Acids Research, 22:4673-4680.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+The ClustalV program is described in the manuscript:
+</STRONG>
+</P>
+<P>
+Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for
+multiple sequence alignment. CABIOS 8,189-191.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+The original Clustal program is described in the manuscripts:
+</STRONG>
+</P>
+<P>
+Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence
+alignments on a microcomputer.
+CABIOS 5,151-153.
+</P>
+<P>
+Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple
+sequence alignment on a microcomputer. Gene 73,237-244.
+</P>
+<P>
+<STRONG>
+Some tips on using Clustal X:
+</STRONG>
+</P>
+<P>
+Jeannmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998)
+Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.
+</P>
+<P>
+<STRONG>
+Some tips on using Clustal W:
+</STRONG>
+</P>
+<P>
+Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for
+multiple sequence alignments. Methods Enzymol., 266, 383-402.
+</P>
+<P>
+<STRONG>
+You can get the latest version of the ClustalX program by anonymous ftp to:
+</STRONG>
+</P>
+<P>
+ftp-igbmc.u-strasbg.fr
+ftp.embl-heidelberg.de
+ftp.ebi.ac.uk
+</P>
+<P>
+<STRONG>
+Or, have a look at the following WWW site:
+</STRONG>
+</P>
+<P>
+http://www-igbmc.u-strasbg.fr/BioInfo/
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<HEAD>
+<TITLE>ClustalX Help</TITLE>
+</HEAD>
+<BODY BGCOLOR=white>
+<CENTER><H1>ClustalX Help</H1></CENTER>
+<P>
+You can get the latest version of the ClustalX program here:
+</P>
+<DL><DD>
+<A HREF="ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/">
+ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/</A>
+</DL>
+<P>For full details of usage and algorithms, please read the <A HREF="clustalw.doc"><EM>ClustalW.Doc</EM></A> file.</P>
+<PRE><EM>
+Toby  Gibson                         EMBL, Heidelberg, Germany.
+Des   Higgins                        UCC, Cork, Ireland.
+Julie Thompson/Francois Jeanmougin   IGBMC, Strasbourg, France.
+</EM></PRE>
+<CENTER><H2><A NAME="Index">Index</A></H2></CENTER>
+<OL>
+<LI><A HREF="#G">                      General help for CLUSTAL X (1.8)
+</A></LI>
+<LI><A HREF="#F">                      Input / Output Files 
+</A></LI>
+<LI><A HREF="#E">                          Editing Alignments
+</A></LI>
+<LI><A HREF="#M">                          Multiple Alignments
+</A></LI>
+<LI><A HREF="#P">                   Profile and Structure Alignments
+</A></LI>
+<LI><A HREF="#B">            Secondary Structure / Gap Penalty Masks
+</A></LI>
+<LI><A HREF="#T">                            Phylogenetic Trees
+</A></LI>
+<LI><A HREF="#C">                               Colors
+</A></LI>
+<LI><A HREF="#Q">                       Alignment Quality Analysis
+</A></LI>
+<LI><A HREF="#9">              Command Line Parameters
+</A></LI>
+<LI><A HREF="#R">                             References
+</A></LI>
+</OL>
+<CENTER><H2><A NAME="G">                      General help for CLUSTAL X (1.8)
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+Clustal X is a windows interface for the ClustalW multiple sequence alignment
+program. It provides an integrated environment for performing multiple sequence
+and profile alignments and analysing the results. The sequence alignment is
+displayed in a window on the screen. A versatile coloring scheme has been
+incorporated allowing you to highlight conserved features  in the alignment.
+The pull-down menus at the top of the window allow you to select all the
+options required for traditional multiple sequence and profile alignment.
+</P>
+<P>
+You can cut-and-paste sequences to change the order of the alignment; you can
+select a subset of sequences to be aligned; you can select a sub-range of the
+alignment to be realigned and inserted back into the original alignment.
+</P>
+<P>
+Alignment quality analysis can be performed and low-scoring segments or
+exceptional residues can be highlighted.
+</P>
+<P>
+ClustalX is available for a number of different platforms including: SUN
+Solaris, IRIX5.3 on Silicon Graphics, Digital UNIX on DECStations, Microsoft
+Windows (32 bit) for PC's, Linux ELF for x86 PC's and Macintosh PowerMac. (See
+the README file for Installation instructions.)
+</P>
+<P>
+</P>
+<P>
+<H4>
+SEQUENCE INPUT
+</H4>
+</P>
+<P>
+Sequences and profiles (a term for pre-existing alignments) are input using 
+the FILE menu. Invalid options will be disabled. All sequences must be included
+into 1 file. 7 formats are automatically recognised: NBRF/PIR, EMBL/SWISSPROT,
+Pearson (Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9 RSF and GDE flat file.
+All non-alphabetic characters (spaces, digits, punctuation marks) are ignored
+except "-" which is used to indicate a GAP ("." in MSF/RSF).  
+</P>
+<P>
+<H4>
+SEQUENCE / PROFILE ALIGNMENTS
+</H4>
+</P>
+<P>
+Clustal X has two modes which can be selected using the switch directly above
+the sequence display: MULTIPLE ALIGNMENT MODE and PROFILE ALIGNMENT MODE.
+</P>
+<P>
+To do a MULTIPLE ALIGNMENT on a set of sequences, make sure MULTIPLE ALIGNMENT
+MODE is selected. A single sequence data area is then displayed. The ALIGNMENT
+menu then allows you to either produce a guide tree for the alignment, or to do
+a multiple alignment following the guide tree, or to do a full multiple
+alignment.
+</P>
+<P>
+In PROFILE ALIGNMENT MODE, two sequence data areas are displayed, allowing you
+to align 2 alignments (termed profiles). Profiles are also used to add a new
+sequence to an old alignment, or to use secondary structure to guide the
+alignment process. GAPS in the old alignments are indicated using the "-" 
+character. PROFILES can be input in ANY of the allowed formats; just  use "-"
+(or "." for MSF/RSF) for each gap position. In Profile Alignment Mode, a button
+"Lock Scroll" is displayed which allows you to scroll the two profiles together
+using a single scroll bar. When the Lock Scroll is turned off, the two profiles
+can be scrolled independently.
+</P>
+<P>
+<H4>
+PHYLOGENETIC TREES
+</H4>
+</P>
+<P>
+Phylogenetic trees can be calculated from old alignments (read in with "-"
+characters to indicate gaps) OR after a multiple alignment while the alignment
+is still displayed.
+</P>
+<P>
+<H4>
+ALIGNMENT DISPLAY
+</H4>
+</P>
+<P>
+The alignment is displayed on the screen with the sequence names on the left
+hand side. The sequence alignment is for display only, it cannot be edited here
+(except for changing the sequence order by cutting-and-pasting on the sequence
+names). 
+</P>
+<P>
+A ruler is displayed below the sequences, starting at 1 for the first residue
+position (residue numbers in the sequence input file are ignored).
+</P>
+<P>
+A line above the alignment is used to mark strongly conserved positions. Three
+characters ('*', ':' and '.') are used:
+</P>
+<P>
+'*' indicates positions which have a single, fully conserved residue
+</P>
+<P>
+':' indicates that one of the following 'strong' groups is fully conserved:-
+<PRE>
+                 STA  
+                 NEQK  
+                 NHQK  
+                 NDEQ  
+                 QHRK  
+                 MILV  
+                 MILF  
+                 HY  
+                 FYW  
+</PRE>
+</P>
+<P>
+'.' indicates that one of the following 'weaker' groups is fully conserved:-
+<PRE>
+                 CSA  
+                 ATV  
+                 SAG  
+                 STNK  
+                 STPA  
+                 SGND  
+                 SNDEQK  
+                 NDEQHK  
+                 NEQHRK  
+                 FVLIM  
+                 HFY  
+</PRE>
+</P>
+<P>
+These are all the positively scoring groups that occur in the Gonnet Pam250
+matrix. The strong and weak groups are defined as strong score >0.5 and weak
+score =<0.5 respectively.
+</P>
+<P>
+For profile alignments, secondary structure and gap penalty masks are displayed
+above the sequences, if any data is found in the profile input file.
+</P>
+<P>
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="F">                      Input / Output Files 
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+LOAD SEQUENCES reads sequences from one of 7 file formats, replacing any
+sequences that are already loaded. All sequences must be in 1 file. The formats
+that are automatically recognised are: NBRF/PIR, EMBL/SWISSPROT, Pearson
+(Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9/RSF and GDE flat file.  All
+non-alphabetic characters (spaces, digits, punctuation  marks) are ignored
+except "-" which is used to indicate a GAP ("." in MSF/RSF).
+</P>
+<P>
+The program tries to automatically recognise the different file formats used
+and to guess whether the sequences are amino acid or nucleotide.  This is not
+always foolproof.
+</P>
+<P>
+FASTA and NBRF/PIR formats are recognised by having a ">" as the first 
+character in the file.  
+</P>
+<P>
+EMBL/Swiss Prot formats are recognised by the letters "ID" at the start of the
+file (the token for the entry name field).  
+</P>
+<P>
+CLUSTAL format is recognised by the word CLUSTAL at the beginning of the file.
+</P>
+<P>
+GCG/MSF format is recognised by one of the following:
+<UL>
+<LI>
+       - the word PileUp at the start of the file.
+</LI><LI>
+       - the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
+         at the start of the file.
+</LI><LI>
+       - the word MSF on the first line of the file, and the characters ..
+         at the end of this line.
+</LI>
+</UL>
+</P>
+<P> 
+GCG/RSF format is recognised by the word !!RICH_SEQUENCE at the beginning of
+the file.
+</P>
+<P>
+</P>
+<P>
+If 85% or more of the characters in the sequence are from A,C,G,T,U or N, the
+sequence will be assumed to be nucleotide.  This works in 97.3% of cases but
+watch out!
+</P>
+<P>
+APPEND SEQUENCES is only valid in MULTIPLE ALIGNMENT MODE. The input sequences
+do not replace those already loaded, but are appended at the end of the
+alignment.
+</P>
+<P>
+SAVE SEQUENCES AS... offers the user a choice of one of six output formats:
+CLUSTAL, NBRF/PIR, GCG/MSF, PHYLIP, NEXUS or GDE. All sequences are written
+to a single file. Options are available to save a range of the alignment, 
+switch between UPPER/LOWER case for GDE files, and to output SEQUENCE NUMBERING
+for CLUSTAL files.
+</P>
+<P>
+LOAD PROFILE 1 reads sequences in the same 7 file formats, replacing any
+sequences already loaded as Profile 1. This option will also remove any
+sequences which are loaded in Profile 2.
+</P>
+<P>
+LOAD PROFILE 2 reads sequences in the same 7 file formats, replacing any
+sequences already loaded as Profile 2.
+</P>
+<P>
+SAVE PROFILE 1 AS... is similar to the Save Sequences option except that only
+those sequences in Profile 1 will be written to the output file.
+</P>
+<P>
+SAVE PROFILE 2 AS... is similar to the Save Sequences option except that only
+those sequences in Profile 2 will be written to the output file.
+</P>
+<P>
+WRITE ALIGNMENT AS POSTSCRIPT will write the sequence display to a postscript
+format file. This will include any secondary structure / gap penalty mask 
+information and the consensus and ruler lines which are displayed on the
+screen. The Alignment Quality curve can be optionally included in the output
+file.
+</P>
+<P>
+WRITE PROFILE 1 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
+except that only the profile 1 display will be printed.
+</P>
+<P>
+WRITE PROFILE 2 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
+except that only the profile 2 display will be printed.
+</P>
+<P>
+</P>
+<P>
+<H4>
+POSTSCRIPT PARAMETERS
+</H4>
+</P>
+<P>
+A number of options are available to allow you to configure your postscript
+output file.
+</P>
+<P>
+PS COLORS FILE:
+</P>
+<P>
+The exact RGB values required to reproduce the colors used in the alignment
+window will vary from printer to printer. A PS colors file can be specified
+that contains the RGB values for all the colors required by each of your
+postscript printers.
+</P>
+<P>
+By default, Clustal X looks for a file called 'colprint.par' in the current
+directory (if your running under UNIX, it then looks in your home directory,
+and finally in the directories in your PATH environment variable). If no PS
+colors file is found or a color used on the screen is not defined here, the
+screen RGB values (from the Color Parameter File) are used.
+</P>
+<P>
+The PS colors file consists of one line for each color to be defined, with the
+color name followed by the RGB values (on a scale of 0 to 1). For example,
+</P>
+<P>
+RED          0.9 0.1 0.1
+</P>
+<P>
+Blank lines and comments (lines beginning with a '#' character) are ignored.
+</P>
+<P>
+</P>
+<P>
+PAGE SIZE:  The alignment can be displayed on either A4, A3 or US Letter size
+pages.
+</P>
+<P>
+ORIENTATION: The alignment can be displayed on either a landscape or portrait
+page.
+</P>
+<P>
+PRINT HEADER: An optional header including the postscript filename, and
+creation date can be printed at the top of each page.
+</P>
+<P>
+PRINT QUALITY CURVE: The Alignment Quality curve which is displayed underneath
+the alignment on the screen can be included in the postscript output.
+</P>
+<P>
+PRINT RULER: The ruler which is displayed underneath the alignment on the 
+screen can be included in the postscript output.
+</P>
+<P>
+PRINT RESIDUE NUMBERS: Sequence residue numbers can be printed at the right
+hand side of the alignment.
+</P>
+<P>
+RESIZE TO FIT PAGE: By default, the alignment is scaled to fit the page size
+selected. This option can be turned off, in which case a font size of 10 will
+be used for the sequences.
+</P>
+<P>
+PRINT FROM POSITION/TO: A range of the alignment can be printed. The default
+is to print the full alignment. The first and last residues to be printed are
+specified here.
+</P>
+<P>
+USE BLOCK LENGTH: The alignment can be divided into blocks of residues. The
+number of residues in a block is specified here. More than one block may then
+be printed on a single page. This is useful for long alignments of a small
+number of sequences. If the block length is set to 0, The alignment will not
+be divided into blocks, but printed across a number of pages.
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="E">                          Editing Alignments
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+Clustal X allows you to change the order of the sequences in the alignment, by
+cutting-and-pasting the sequence names.
+</P>
+<P>
+To select a group of sequences to be moved, click on a sequence name and drag
+the cursor until all the required sequences are highlighted. Holding down the
+Shift key when clicking on the first name will add new sequences to those
+already selected.
+</P>
+<P>
+(Options are provided to Select All Sequences, Select Profile 1 or Select 
+Profile 2.)
+</P>
+<P>
+The selected sequences can be removed from the alignment by using the EDIT
+menu, CUT option.
+</P>
+<P>
+To add the cut sequences back into an alignment, select a sequence by clicking
+on the sequence name. The cut sequences will be added to the alignment,
+immediately following the selected sequence, by the EDIT menu, PASTE option.
+</P>
+<P>
+To add the cut sequences to an empty alignment (eg. when cutting sequences from
+Profile 1 and pasting them to Profile 2), click on the empty sequence name
+display area, and select the EDIT menu, PASTE option as before.
+</P>
+<P>
+The sequence selection and sequence range selection can be cleared using the
+EDIT menu, CLEAR SEQUENCE SELECTION and CLEAR RANGE SELECTION options
+respectively.
+</P>
+<P>
+To search for a string of residues in the sequences, select the sequences to be
+searched by clicking on the sequence names. You can then enter the string to
+search for by selecting the SEARCH FOR STRING option. If the string is found in
+any of the sequences selected, the sequence name and column number is printed
+below the sequence display.
+</P>
+<P>
+In PROFILE ALIGNMENT MODE, the two profiles can be merged (normally done after
+alignment) by selecting ADD PROFILE 2 TO PROFILE 1. The sequences currently
+displayed as Profile 2 will be appended to Profile 1. 
+</P>
+<P>
+The REMOVE ALL GAPS option will remove all gaps from the sequences currently
+selected.
+WARNING: This option removes ALL gaps, not only those introduced by ClustalX,
+but also those that were read from the input alignment file. Any secondary
+structure information associated with the alignment will NOT be automatically
+realigned.
+</P>
+<P>
+The REMOVE GAP-ONLY COLUMNS will remove those positions in the alignment which
+contain gaps in all sequences. This can occur as a result of removing divergent
+sequences from an alignment, or if an alignment has been realigned.
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="M">                          Multiple Alignments
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+Make sure MULTIPLE ALIGNMENT MODE is selected, using the switch directly above
+the sequence display area. Then, use the ALIGNMENT menu to do multiple
+alignments.
+</P>
+<P>
+Multiple alignments are carried out in 3 stages:
+</P>
+<P> 
+1) all sequences are compared to each other (pairwise alignments);
+</P>
+<P> 
+2) a dendrogram (like a phylogenetic tree) is constructed, describing the
+approximate groupings of the sequences by similarity (stored in a file).
+</P>
+<P> 
+3) the final multiple alignment is carried out, using the dendrogram as a guide.
+</P>
+<P>
+The 3 stages are carried out automatically by the DO COMPLETE ALIGNMENT option.
+You can skip the first stages (pairwise alignments; guide tree) by using an old
+guide tree file (DO ALIGNMENT FROM GUIDE TREE); or you can just produce the
+guide tree with no final multiple alignment (PRODUCE GUIDE TREE ONLY).
+</P>
+<P>
+</P>
+<P>
+REALIGN SELECTED SEQUENCES is used to realign badly aligned sequences in the
+alignment. Sequences can be selected by clicking on the sequence names - see
+Editing Alignments for more details. The unselected sequences are then 'fixed'
+and a profile is made including only the unselected sequences. Each of the
+selected sequences in turn is then realigned to this profile. The realigned
+sequences will be displayed as a group at the end the alignment.
+</P>
+<P>
+</P>
+<P>
+REALIGN SELECTED SEQUENCE RANGE is used to realign a small region of the 
+alignment. A residue range can be selected by clicking on the sequence display
+area. A multiple alignment is then performed, following the 3 stages described
+above, but only using the selected residue range. Finally the new alignment of
+the range is pasted back into the full sequence alignment.
+</P>
+<P>
+By default, gap penalties are used at each end of the subrange in order to 
+penalise terminal gaps. If the REALIGN SEGMENT END GAP PENALTIES option is
+switched off, gaps can be introduced at the ends of the residue range at no
+cost.
+</P>
+<P>
+</P>
+<P>
+ALIGNMENT PARAMETERS displays a sub-menu with the following options:
+</P>
+<P>
+RESET NEW GAPS BEFORE ALIGNMENT will remove any new gaps introduced into the
+sequences during multiple alignment if you wish to change the parameters and
+try again. This only takes effect just before you do a second multiple
+alignment. You can make phylogenetic trees after alignment whether or not this
+is ON. If you turn this OFF, the new gaps are kept even if you do a second
+multiple alignment. This allows you to iterate the alignment gradually.
+Sometimes, the alignment is improved by a second or third pass.
+</P>
+<P>
+RESET ALL GAPS BEFORE ALIGNMENT will remove all gaps in the sequences including
+gaps which were read in from the sequence input file. This only takes effect
+just before you do a second multiple alignment.  You can make phylogenetic
+trees after alignment whether or not this is ON.  If you turn this OFF, all
+gaps are kept even if you do a second multiple alignment. This allows you to
+iterate the alignment gradually.  Sometimes, the alignment is improved by a
+second or third pass.
+</P>
+<P>
+</P>
+<P>
+PAIRWISE ALIGNMENT PARAMETERS control the speed/sensitivity of the initial
+alignments.
+</P>
+<P>
+MULTIPLE ALIGNMENT PARAMETERS control the gaps in the final multiple
+alignments.
+</P>
+<P>
+PROTEIN GAP PARAMETERS displays a temporary window which allows you to set
+various parameters only used in the alignment of protein sequences.
+</P>
+<P>
+(SECONDARY STRUCTURE PARAMETERS, for use with the Profile Alignment Mode only,
+allows you to set various parameters only used with gap penalty masks.)
+</P>
+<P>
+SAVE LOG FILE will write the alignment calculation scores to a file. The log
+filename is the same as the input sequence filename, with an extension .log
+appended.
+</P>
+<P>
+</P>
+<P>
+<H4>
+OUTPUT FORMAT OPTIONS
+</H4>
+</P>
+<P>
+You can choose from 6 different alignment formats (CLUSTAL, GCG, NBRF/PIR,
+PHYLIP, GDE and NEXUS).  You can choose more than one (or all 6 if you wish).  
+</P>
+<P>
+CLUSTAL format output is a self explanatory alignment format. It shows the
+sequences aligned in blocks. It can be read in again at a later date to (for
+example) calculate a phylogenetic tree or add in new sequences by profile
+alignment.
+</P>
+<P>
+GCG output can be used by any of the GCG programs that can work on multiple
+alignments (e.g. PRETTY, PROFILEMAKE, PLOTALIGN). It is the same as the GCG
+.msf format files (multiple sequence file); new in version 7 of GCG.
+</P>
+<P>
+NEXUS format is used by several phylogeny programs, including PAUP and
+MacClade.
+</P>
+<P>
+PHYLIP format output can be used for input to the PHYLIP package of Joe 
+Felsenstein.  This is a very widely used package for doing every imaginable
+form of phylogenetic analysis (MUCH more than the the modest introduction
+offered by this program).
+</P>
+<P>
+NBRF/PIR: this is the same as the standard PIR format with ONE ADDITION. Gap
+characters "-" are used to indicate the positions of gaps in the multiple 
+alignment. These files can be re-used as input in any part of clustal that
+allows sequences (or alignments or profiles) to be read in.  
+</P>
+<P>
+GDE:  this format is used by the GDE package of Steven Smith and is understood
+by SEQLAB in GCG 9 or later.
+</P>
+<P>
+GDE OUTPUT CASE: sequences in GDE format may be written in either upper or
+lower case.
+</P>
+<P> 
+CLUSTALW SEQUENCE NUMBERS: residue numbers may be added to the end of the
+alignment lines in clustalw format.
+</P>
+<P>
+OUTPUT ORDER is used to control the order of the sequences in the output
+alignments. By default, it uses the order in which the sequences were aligned
+(from the guide tree/dendrogram), thus automatically grouping closely related
+sequences. It can be switched to be the same as the original input order.
+</P>
+<P>
+PARAMETER OUTPUT: This option will save all your parameter settings in a
+parameter file (suffix .par) during alignment. The file can be subsequently
+used to rerun ClustalW using the same parameters.
+</P>
+<P>
+</P>
+<P>
+<H3>
+ALIGNMENT PARAMETERS
+</H3>
+</P>
+<P>
+<STRONG>
+PAIRWISE ALIGNMENT PARAMETERS
+</STRONG>
+</P>
+<P>
+A distance is calculated between every pair of sequences and these are used to
+construct the phylogenetic tree which guides the final multiple alignment. The
+scores are calculated from separate pairwise alignments. These can be
+calculated using 2 methods: dynamic programming (slow but accurate) or by the
+method of Wilbur and Lipman (extremely fast but approximate).   
+</P>
+<P>
+You can choose between the 2 alignment methods using the PAIRWISE ALIGNMENTS
+option. The slow/accurate method is fast enough for short sequences but will be
+VERY SLOW for many (e.g. >100) long (e.g. >1000 residue) sequences.   
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+SLOW-ACCURATE alignment parameters:
+</STRONG>
+</P>
+<P>
+These parameters do not have any affect on the speed of the alignments. They
+are used to give initial alignments which are then rescored to give percent
+identity scores. These % scores are the ones which are displayed on the 
+screen. The scores are converted to distances for the trees.
+</P>
+<P>
+Gap Open Penalty:      the penalty for opening a gap in the alignment.
+</P>
+<P>
+Gap Extension Penalty: the penalty for extending a gap by 1 residue.
+</P>
+<P>
+Protein Weight Matrix: the scoring table which describes the similarity of 
+each amino acid to each other.
+</P>
+<P>
+Load protein matrix: allows you to read in a comparison table from a file.
+</P>
+<P>
+DNA weight matrix: the scores assigned to matches and mismatches (including
+IUB ambiguity codes).
+</P>
+<P>
+Load DNA matrix: allows you to read in a comparison table from a file.
+</P>
+<P>
+See the Multiple alignment parameters, MATRIX option below for details of the
+matrix input format.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+FAST-APPROXIMATE alignment parameters:
+</STRONG>
+</P>
+<P>
+These similarity scores are calculated from fast, approximate, global align-
+ments, which are controlled by 4 parameters. 2 techniques are used to make
+these alignments very fast: 1) only exactly matching fragments (k-tuples) are
+considered; 2) only the 'best' diagonals (the ones with most k-tuple matches)
+are used.
+</P>
+<P>
+GAP PENALTY:   This is a penalty for each gap in the fast alignments. It has
+little effect on the speed or sensitivity except for extreme values.
+</P>
+<P>
+K-TUPLE SIZE:  This is the size of exactly matching fragment that is used. 
+INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity.
+For longer sequences (e.g. >1000 residues) you may wish to increase the
+default.
+</P>
+<P>
+TOP DIAGONALS: The number of k-tuple matches on each diagonal (in an imaginary
+dot-matrix plot) is calculated. Only the best ones (with most matches) are used
+in the alignment. This parameter specifies how many. Decrease for speed;
+increase for sensitivity.
+</P>
+<P>
+WINDOW SIZE:  This is the number of diagonals around each of the 'best' 
+diagonals that will be used. Decrease for speed; increase for sensitivity.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+MULTIPLE ALIGNMENT PARAMETERS
+</STRONG>
+</P>
+<P>
+These parameters control the final multiple alignment. This is the core of the
+program and the details are complicated. To fully understand the use of the
+parameters and the scoring system, you will have to refer to the documentation.
+</P>
+<P>
+Each step in the final multiple alignment consists of aligning two alignments 
+or sequences. This is done progressively, following the branching order in the
+GUIDE TREE. The basic parameters to control this are two gap penalties and the
+scores for various identical/non-indentical residues. 
+</P>
+<P>
+The GAP OPENING and EXTENSION PENALTIES can be set here. These control the 
+cost of opening up every new gap and the cost of every item in a gap.  
+Increasing the gap opening penalty will make gaps less frequent. Increasing 
+the gap extension penalty will make gaps shorter. Terminal gaps are not 
+penalised.
+</P>
+<P>
+The DELAY DIVERGENT SEQUENCES switch delays the alignment of the most distantly
+related sequences until after the most closely related sequences have  been
+aligned. The setting shows the percent identity level required to delay the
+addition of a sequence; sequences that are less identical than this level to
+any other sequences will be aligned later.
+</P>
+<P>
+The TRANSITION WEIGHT gives transitions (A<-->G or C<-->T i.e. purine-purine or
+pyrimidine-pyrimidine substitutions) a weight between 0 and 1; a weight of zero
+means that the transitions are scored as mismatches, while a weight of 1 gives
+the transitions the match score. For distantly related DNA sequences, the
+weight should be near to zero; for closely related sequences it can be useful
+to assign a higher score. The default is set to 0.5.
+</P>
+<P>
+</P>
+<P>
+The PROTEIN WEIGHT MATRIX option allows you to choose a series of weight
+matrices. For protein alignments, you use a weight matrix to determine the
+similarity of non-identical amino acids. For example, Tyr aligned with Phe is
+usually judged to be 'better' than Tyr aligned with Pro.
+</P>
+<P>
+There are three 'in-built' series of weight matrices offered. Each consists of
+several matrices which work differently at different evolutionary distances. To
+see the exact details, read the documentation. Crudely, we store several
+matrices in memory, spanning the full range of amino acid distance (from almost
+identical sequences to highly divergent ones). For very similar sequences, it
+is best to use a strict weight matrix which only gives a high score to
+identities and the most favoured conservative substitutions. For more divergent
+sequences, it is appropriate to use "softer" matrices which give a high score
+to many other frequent substitutions.
+</P>
+<P>
+1) BLOSUM (Henikoff). These matrices appear to be the best available for 
+carrying out data base similarity (homology searches). The matrices currently
+used are: Blosum 80, 62, 45 and 30. BLOSUM was the default in earlier Clustal X
+versions.
+</P>
+<P>
+2) PAM (Dayhoff). These have been extremely widely used since the late '70s. We
+currently use the PAM 20, 60, 120, 350 matrices.
+</P>
+<P>
+3) GONNET. These matrices were derived using almost the same procedure as the
+Dayhoff one (above) but are much more up to date and are based on a far larger
+data set. They appear to be more sensitive than the Dayhoff series. We
+currently use the GONNET 80, 120, 160, 250 and 350 matrices. This series is the
+default for Clustal X version 1.8.
+</P>
+<P>
+We also supply an identity matrix which gives a score of 10 to two identical 
+amino acids and a score of zero otherwise. This matrix is not very useful.
+</P>
+<P>
+Load protein matrix: allows you to read in a comparison matrix from a file.
+This can be either a single matrix or a series of matrices (see below for
+format). 
+</P>
+<P>
+</P>
+<P>
+DNA WEIGHT MATRIX option allows you to select a single matrix (not a series)
+used for aligning nucleic acid sequences. Two hard-coded matrices are available:
+</P>
+<P>
+1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
+of nucleic acid sequences. X's and N's are treated as matches to any IUB
+ambiguity symbol. All matches score 1.9; all mismatches for IUB symbols score 0.
+</P>
+<P>
+2) CLUSTALW(1.6). A previous system used by ClustalW, in which matches score
+1.0 and mismatches score 0. All matches for IUB symbols also score 0.
+</P>
+<P>
+Load DNA matrix: allows you to read in a nucleic acid comparison matrix from a
+file (just one matrix, not a series).
+</P>
+<P>
+</P>
+<P>
+SINGLE MATRIX INPUT FORMAT
+The format used for a single matrix is the same as the BLAST program. The
+scores in the new weight matrix should be similarities. You can use negative as
+well as positive values if you wish, although the matrix will be automatically
+adjusted to all positive scores, unless the NEGATIVE MATRIX option is selected.
+Any lines beginning with a # character are assumed to be comments. The first
+non-comment line should contain a list of amino acids in any order, using the 1
+letter code, followed by a * character. This should be followed by a square
+matrix of scores, with one row and one column for each amino acid. The last row
+and column of the matrix (corresponding to the * character) contain the minimum
+score over the whole matrix.
+</P>
+<P>
+MATRIX SERIES INPUT FORMAT
+ClustalX uses different matrices depending on the mean percent identity of the
+sequences to be aligned. You can specify a series of matrices and the range of
+the percent identity for each matrix in a matrix series file. The file is
+automatically recognised by the word CLUSTAL_SERIES at the beginning of the
+file. Each matrix in the series is then specified on one line which should
+start with the word MATRIX. This is followed by the lower and upper limits of
+the sequence percent identities for which you want to apply the matrix. The
+final entry on the matrix line is the filename of a Blast format matrix file
+(see above for details of the single matrix file format).
+</P>
+<P>
+Example.
+</P>
+<P>
+CLUSTAL_SERIES
+</P>
+<P> 
+MATRIX 81 100 /us1/user/julie/matrices/blosum80
+MATRIX 61 80 /us1/user/julie/matrices/blosum62
+MATRIX 31 60 /us1/user/julie/matrices/blosum45
+MATRIX 0 30 /us1/user/julie/matrices/blosum30
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+PROTEIN GAP PARAMETERS
+</STRONG>
+</P>
+<P>
+RESIDUE SPECIFIC PENALTIES are amino acid specific gap penalties that reduce or
+increase the gap opening penalties at each position in the alignment or 
+sequence. See the documentation for details. As an example, positions that are
+rich in glycine are more likely to have an adjacent gap than positions that are
+rich in valine.
+</P>
+<P>
+HYDROPHILIC GAP PENALTIES are used to increase the chances of a gap within a
+run (5 or more residues) of hydrophilic amino acids; these are likely to be
+loop or random coil regions where gaps are more common. The residues that are
+"considered" to be hydrophilic can be entered in HYDROPHILIC RESIDUES.
+</P>
+<P>
+GAP SEPARATION DISTANCE tries to decrease the chances of gaps being too close
+to each other. Gaps that are less than this distance apart are penalised more
+than other gaps. This does not prevent close gaps; it makes them less frequent,
+promoting a block-like appearance of the alignment.
+</P>
+<P>
+END GAP SEPARATION treats end gaps just like internal gaps for the purposes of
+avoiding gaps that are too close (set by GAP SEPARATION DISTANCE above). If you
+turn this off, end gaps will be ignored for this purpose. This is useful when
+you wish to align fragments where the end gaps are not biologically meaningful.
+</P>
+<P>
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="P">                   Profile and Structure Alignments
+</A></H2></CENTER>
+<P>
+</P>
+<P>   
+By PROFILE ALIGNMENT, we mean alignment using existing alignments. Profile 
+alignments allow you to store alignments of your favourite sequences and add
+new sequences to them in small bunches at a time. A profile is simply an
+alignment of one or more sequences (e.g. an alignment output file from Clustal
+X). Each input can be a single sequence. One or both sets of input sequences
+may include secondary structure assignments or gap penalty masks to guide the
+alignment. 
+</P>
+<P>
+Make sure PROFILE ALIGNMENT MODE is selected, using the switch directly above
+the sequence display area. Then, use the ALIGNMENT menu to do profile and
+secondary structure alignments.
+</P>
+<P>
+The profiles can be in any of the allowed input formats with "-" characters
+used to specify gaps (except for GCG/MSF where "." is used).
+</P>
+<P>
+You have to load the 2 profiles by choosing FILE, LOAD PROFILE 1 and  LOAD
+PROFILE 2. Then ALIGNMENT, ALIGN PROFILE 2 TO PROFILE 1 will align the 2
+profiles to each other. Secondary structure masks in either profile can be used
+to guide the alignment. This option compares all the sequences in profile 1
+with all the sequences in profile 2 in order to build guide trees which will be
+used to calculate sequence weights, and select appropriate alignment parameters
+for the final profile alignment.
+</P>
+<P>
+You can skip the first stage (pairwise alignments; guide trees) by using old
+guide tree files (ALIGN PROFILES FROM GUIDE TREES). 
+</P>
+<P>
+The ALIGN SEQUENCES TO PROFILE 1 option will take the sequences in the second
+profile and align them to the first profile, 1 at a time.  This is useful to
+add some new sequences to an existing alignment, or to align a set of sequences
+to a known structure. In this case, the second profile set need not be
+pre-aligned.
+</P>
+<P>
+You can skip the first stage (pairwise alignments; guide tree) by using an old
+guide tree file (ALIGN SEQUENCES TO PROFILE 1 FROM TREE). 
+</P>
+<P>
+SAVE LOG FILE will write the alignment calculation scores to a file. The log
+filename is the same as the input sequence filename, with an extension .log
+appended.
+</P>
+<P>
+The alignment parameters can be set using the ALIGNMENT PARAMETERS menu,
+Pairwise Parameters, Multiple Parameters and Protein Gap Parameters options.
+These are EXACTLY the same parameters as used by the general, automatic
+multiple alignment procedure. The general multiple alignment procedure is
+simply a series of profile alignments. Carrying out a series of profile
+alignments on larger and larger groups of sequences, allows you to manually
+build up a complete alignment, if necessary editing intermediate alignments.
+</P>
+<P>
+<STRONG>
+SECONDARY STRUCTURE PARAMETERS
+</STRONG>
+</P>
+<P>
+Use this menu to set secondary structure options. If a solved structure is
+known, it can be used to guide the alignment by raising gap penalties within
+secondary structure elements, so that gaps will preferentially be inserted into
+unstructured surface loop regions. Alternatively, a user-specified gap penalty
+mask can be supplied for a similar purpose.
+</P>
+<P>
+A gap penalty mask is a series of numbers between 1 and 9, one per position in 
+the alignment. Each number specifies how much the gap opening penalty is to be 
+raised at that position (raised by multiplying the basic gap opening penalty
+by the number) i.e. a mask figure of 1 at a position means no change
+in gap opening penalty; a figure of 4 means that the gap opening penalty is
+four times greater at that position, making gaps 4 times harder to open.
+</P>
+<P>
+The format for gap penalty masks and secondary structure masks is explained in
+a separate help section.
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="B">            Secondary Structure / Gap Penalty Masks
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+The use of secondary structure-based penalties has been shown to improve  the
+accuracy of sequence alignment. Clustal X now allows secondary structure/ gap
+penalty masks to be supplied with the input sequences used during profile
+alignment. (NB. The secondary structure information is NOT used during multiple
+sequence alignment). The masks work by raising gap penalties in specified
+regions (typically secondary structure elements) so that gaps are
+preferentially opened in the less well conserved regions (typically surface
+loops).
+</P>
+<P>
+The USE PROFILE 1(2) SECONDARY STRUCTURE / GAP PENALTY MASK options control
+whether the input 2D-structure information or gap penalty masks will be used
+during the profile alignment.
+</P>
+<P>
+The OUTPUT options control whether the secondary structure and gap penalty
+masks should be included in the Clustal X output alignments. Showing both is
+useful for understanding how the masks work. The 2D-structure information is
+itself useful in judging the alignment quality and in seeing how residue
+conservation patterns vary with secondary structure. 
+</P>
+<P>
+The HELIX and STRAND GAP PENALTY options provide the value for raising the gap
+penalty at core Alpha Helical (A) and Beta Strand (B) residues. In CLUSTAL
+format, capital residues denote the A and B core structure notation. Basic gap
+penalties are multiplied by the amount specified.
+</P>
+<P>
+The LOOP GAP PENALTY option provides the value for the gap penalty in Loops.
+By default this penalty is not raised. In CLUSTAL format, loops are specified
+by "." in the secondary structure notation.
+</P>
+<P>
+The SECONDARY STRUCTURE TERMINAL PENALTY provides the value for setting the gap
+penalty at the ends of secondary structures. Ends of secondary structures are
+known to grow or shrink, comparing related structures. Therefore by default
+these are given intermediate values, lower than the core penalties. All
+secondary structure read in as lower case in CLUSTAL format gets the reduced
+terminal penalty.
+</P>
+<P>
+The HELIX and STRAND TERMINAL POSITIONS options specify the range of structure
+termini for the intermediate penalties. In the alignment output, these are
+indicated as lower case. For Alpha Helices, by default, the range spans the 
+end-helical turn (3 residues). For Beta Strands, the default range spans the
+end residue and the adjacent loop residue, since sequence conservation often
+extends beyond the actual H-bonded Beta Strand.
+</P>
+<P>
+Clustal X can read the masks from SWISS-PROT, CLUSTAL or GDE format input
+files. For many 3-D protein structures, secondary structure information is
+recorded in the feature tables of SWISS-PROT database entries. You should
+always check that the assignments are correct - some are quite inaccurate.
+Clustal X looks for SWISS-PROT HELIX and STRAND assignments e.g.
+</P>
+<P>
+</P>
+<P>
+<PRE>
+FT   HELIX       100    115
+FT   STRAND      118    119
+</PRE>
+</P>
+<P>
+The structure and penalty masks can also be read from CLUSTAL alignment format 
+as comment lines beginning "!SS_" or "!GM_" e.g.
+</P>
+<P>
+<PRE>
+!SS_HBA_HUMA    ..aaaAAAAAAAAAAaaa.aaaAAAAAAAAAAaaaaaaAaaa.........aaaAAAAAA
+!GM_HBA_HUMA    112224444444444222122244444444442222224222111111111222444444
+HBA_HUMA        VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
+</PRE>
+</P>
+<P>
+Note that the mask itself is a set of numbers between 1 and 9 each of which is 
+assigned to the residue(s) in the same column below. 
+</P>
+<P>
+In GDE flat file format, the masks are specified as text and the names must
+begin with "SS_ or "GM_.
+</P>
+<P>
+Either a structure or penalty mask or both may be used. If both are included
+in an alignment, the user will be asked which is to be used.
+</P>
+<P>
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="T">                            Phylogenetic Trees
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+Before calculating a tree, you must have an ALIGNMENT in memory. This can be
+input using the FILE menu, LOAD SEQUENCES option or you should have just
+carried out a full multiple alignment and the alignment is still in memory.
+Remember YOU MUST ALIGN THE SEQUENCES FIRST!!!!
+</P>
+<P>
+The method used is the NJ (Neighbour Joining) method of Saitou and Nei. First
+you calculate distances (percent divergence) between all pairs of sequence from
+a multiple alignment; second you apply the NJ method to the distance matrix.
+</P>
+<P>
+To calculate a tree, use the DRAW N-J TREE option. This gives an UNROOTED tree
+and all branch lengths. The root of the tree can only be inferred by using an
+outgroup (a sequence that you are certain branches at the outside of the tree
+.... certain on biological grounds) OR if you assume a degree of constancy in
+the 'molecular clock', you can place the root in the 'middle' of the tree
+(roughly equidistant from all tips).
+</P>
+<P>
+BOOTSTRAP N-J TREE uses a method for deriving confidence values for the 
+groupings in a tree (first adapted for trees by Joe Felsenstein). It involves
+making N random samples of sites from the alignment (N should be LARGE, e.g.
+500 - 1000); drawing N trees (1 from each sample) and counting how many times
+each grouping from the original tree occurs in the sample trees. You can set N
+using the NUMBER OF BOOTSTRAP TRIALS option in the BOOTSTRAP TREE window. In
+practice, you should use a large number of bootstrap replicates (1000 is
+recommended, even if it means running the program for an hour on a slow 
+computer). You can also supply a seed number for the random number generator
+here. Different runs with the same seed will give the same answer. See the
+documentation for more details.
+</P>
+<P>
+EXCLUDE POSITIONS WITH GAPS? With this option, any alignment positions where
+ANY of the sequences have a gap will be ignored. This means that 'like' will
+be compared to 'like' in all distances, which is highly desirable. It also
+automatically throws away the most ambiguous parts of the alignment, which are
+concentrated around gaps (usually). The disadvantage is that you may throw away
+much of the data if there are many gaps (which is why it is difficult for us to
+make it the default).  
+</P>
+<P>
+CORRECT FOR MULTIPLE SUBSTITUTIONS? For small divergence (say <10%) this option
+makes no difference. For greater divergence, this option corrects for the fact
+that observed distances underestimate actual evolutionary distances. This is
+because, as sequences diverge, more than one substitution will happen at many
+sites. However, you only see one difference when you look at the present day
+sequences. Therefore, this option has the effect of stretching branch lengths
+in trees (especially long branches). The corrections used here (for DNA or
+proteins) are both due to Motoo Kimura. See the documentation for details.  
+</P>
+<P>
+Where possible, this option should be used. However, for VERY divergent
+sequences, the distances cannot be reliably corrected. You will be warned if
+this happens. Even if none of the distances in a data set exceed the reliable
+threshold, if you bootstrap the data, some of the bootstrap distances may
+randomly exceed the safe limit.  
+</P>
+<P>
+SAVE LOG FILE will write the tree calculation scores to a file. The log
+filename is the same as the input sequence filename, with an extension .log
+appended.
+</P>
+<P>
+<H4>
+OUTPUT FORMAT OPTIONS
+</H4>
+</P>
+<P>
+Three different formats are allowed. None of these displays the tree visually.
+You can display the tree using the NJPLOT program distributed with Clustal X
+OR get the PHYLIP package and use the tree drawing facilities there. 
+</P>
+<P> 
+1) CLUSTAL FORMAT TREE. This format is verbose and lists all of the distances
+between the sequences and the number of alignment positions used for each. The
+tree is described at the end of the file. It lists the sequences that are
+joined at each alignment step and the branch lengths. After two sequences are
+joined, it is referred to later as a NODE. The number of a NODE is the number
+of the lowest sequence in that NODE.   
+</P>
+<P>
+2) PHYLIP FORMAT TREE. This format is the New Hampshire format, used by many
+phylogenetic analysis packages. It consists of a series of nested parentheses,
+describing the branching order, with the sequence names and branch lengths. It
+can be read by the NJPLOT program distributed with ClustalX. It can also be
+used by the RETREE, DRAWGRAM and DRAWTREE programs of the PHYLIP package to see
+the trees graphically. This is the same format used during multiple alignment
+for the guide trees. Some other packages that can read and display New
+Hampshire format are TreeTool, TreeView, and Phylowin.
+</P>
+<P>
+3) PHYLIP DISTANCE MATRIX. This format just outputs a matrix of all the
+pairwise distances in a format that can be used by the PHYLIP package. It used
+to be useful when one could not produce distances from protein sequences in the
+Phylip package but is now redundant (PROTDIST of Phylip 3.5 now does this).
+</P>
+<P>
+4) NEXUS FORMAT TREE. This format is used by several popular phylogeny programs,
+including PAUP and MacClade. The format is described fully in:
+Maddison, D. R., D. L. Swofford and W. P. Maddison.  1997.
+NEXUS: an extensible file format for systematic information.
+Systematic Biology 46:590-621.
+</P>
+<P>
+BOOTSTRAP LABELS ON: By default, the bootstrap values are correctly placed on
+the tree branches of the phylip format output tree. The toggle allows them to
+be placed on the nodes, which is incorrect, but some display packages (e.g.
+TreeTool, TreeView and Phylowin) only support node labelling but not branch
+labelling. Care should be taken to note which branches and labels go together. 
+</P>
+<P>
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="C">                               Colors
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+Clustal X provides a versatile coloring scheme for the sequence alignment 
+display. The sequences (or profiles) are colored automatically, when they are
+loaded. Sequences can be colored either by assigning a color to specific
+residues, or on the basis of an alignment consensus. In the latter case, the
+alignment consensus is calculated automatically, and the residues in each
+column are colored according to the consensus character assigned to that
+column. In this way, you can choose to highlight, for example, conserved
+hydrophylic or hydrophobic positions in the alignment.
+</P>
+<P>
+The 'rules' used to color the alignment are specified in a COLOR PARAMETER
+FILE. Clustal X automatically looks for a file called 'colprot.par' for protein
+sequences or 'coldna.par' for DNA, in the current directory. (If your running
+under UNIX, it then looks in your home directory, and finally in the
+directories in your PATH environment variable).
+</P>
+<P>
+By default, if no color parameter file is found, protein sequences are colored
+by residue as follows:
+</P>
+<P>
+<PRE>
+	Color			Residue Code
+</P>
+<P>
+	ORANGE			GPST
+	RED			HKR
+	BLUE			FWY
+	GREEN			ILMV
+</PRE>
+</P>
+<P>
+In the case of DNA sequences, the default colors are as follows:
+</P>
+<P>
+<PRE>
+	Color			Residue Code
+</P>
+<P>
+	ORANGE			A
+	RED			C
+	BLUE			T
+	GREEN			G
+</PRE>
+</P>
+<P>
+</P>
+<P>
+The default BACKGROUND COLORING option shows the sequence residues using a
+black character on a colored background. It can be switched off to show
+residues as a colored character on a white background. 
+</P>
+<P>
+Either BLACK AND WHITE or DEFAULT COLOR options can be selected. The Color
+option looks first for the color parameter file (as described above) and, if no
+file is found, uses the default residue-specific colors.
+</P>
+<P>
+You can specify your own coloring scheme by using the LOAD COLOR PARAMETER FILE
+option. The format of the color parameter file is described below.
+</P>
+<P>
+<H4>
+COLOR PARAMETER FILE
+</H4>
+</P>
+<P>
+This file is divided into 3 sections:
+</P>
+<P>
+1) the names and rgb values of the colors
+2) the rules for calculating the consensus
+3) the rules for assigning colors to the residues
+</P>
+<P> 
+An example file is given here.
+</P>
+<P>
+<PRE>
+ --------------------------------------------------------------------
+ at rgbindex
+RED          0.9 0.1 0.1
+BLUE         0.1 0.1 0.9
+GREEN        0.1 0.9 0.1
+YELLOW       0.9 0.9 0.0
+</P>
+<P>
+ at consensus
+% = 60% w:l:v:i:m:a:f:c:y:h:p
+# = 80% w:l:v:i:m:a:f:c:y:h:p
+- = 50% e:d
++ = 60% k:r
+q = 50% q:e
+p = 50% p
+n = 50% n
+t = 50% t:s
+</P>
+<P>
+ at color
+g = RED
+p = YELLOW
+t = GREEN if t:%:#
+n = GREEN if n
+w = BLUE if %:#:p
+k = RED if +
+ --------------------------------------------------------------------
+</PRE>
+</P>
+<P>
+The first section is optional and is identified by the header @rgbindex. If
+this section exists, each color used in the file must be named and the rgb
+values specified (on a scale from 0 to 1). If the rgb index section is not
+found, the following set of hard-coded colors will be used.
+</P>
+<P>
+<PRE>
+RED          0.9 0.1 0.1
+BLUE         0.1 0.1 0.9
+GREEN        0.1 0.9 0.1
+ORANGE       0.9 0.7 0.3
+CYAN         0.1 0.9 0.9
+PINK         0.9 0.5 0.5
+MAGENTA      0.9 0.1 0.9
+YELLOW       0.9 0.9 0.0
+</PRE>
+</P>
+<P>
+The second section is optional and is identified by the header @consensus. It
+defines how the consensus is calculated.
+</P>
+<P> 
+The format of each consensus parameter is:-
+</P>
+<P> 
+<PRE>
+c = n% residue_list
+</P>
+<P> 
+        where
+              c             is a character used to identify the parameter.
+              n             is an integer value used as the percentage cutoff
+                            point.
+              residue_list  is a list of residues denoted by a single
+                            character, delimited by a colon (:).
+</PRE>
+</P>
+<P> 
+For example:   # = 60% w:l:v:i
+</P>
+<P>
+will assign a consensus character # to any column in the alignment which
+contains more than 60% of the residues w,l,v and i.
+</P>
+<P>        
+</P>
+<P> 
+The third section is identified by the header @color, and defines how colors
+are assigned to each residue in the alignment.
+</P>
+<P> 
+The color parameters can take one of two formats:
+</P>
+<P>
+<PRE>
+1) r = color
+2) r = color if consensus_list
+</P>
+<P> 
+        where
+              r             is a character used to denote a residue.
+              color         is one of the colors in the GDE color lookup table.
+              residue_list  is a list of residues denoted by a single
+                            character, delimited by a colon (:).
+</PRE>
+</P>
+<P> 
+Examples:
+1) g = ORANGE
+</P>
+<P>
+will color all glycines ORANGE, regardless of the consensus.
+</P>
+<P>
+2) w = BLUE if w:%:#
+</P>
+<P>
+will color BLUE any tryptophan which is found in a column with a consensus of
+w, % or #.
+</P>
+<P> 
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="Q">                       Alignment Quality Analysis
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+<H3>
+QUALITY SCORES
+</H3>
+</P>
+<P>
+Clustal X provides an indication of the quality of an alignment by plotting
+a 'conservation score' for each column of the alignment. A high score indicates
+a well-conserved column; a low score indicates low conservation. The quality
+curve is drawn below the alignment.
+</P>
+<P>
+Two methods are also provided to indicate single residues or sequence segments
+which score badly in the alignment.
+</P>
+<P> 
+Low-scoring residues are expected to occur at a moderate frequency in all the
+sequences because of their steady divergence due to the natural processes of
+evolution. The most divergent sequences are likely to have the most outliers.
+However, the highlighted residues are especially useful in pointing to
+sequence misalignments. Note that clustering of highlighted residues is a
+strong indication of misalignment. This can arise due to various reasons, for
+example:
+</P>
+<P> 
+        1. Partial or total misalignments caused by a failure in the
+        alignment algorithm. Usually only in difficult alignment cases.
+</P>
+<P> 
+        2. Partial or total misalignments because at least one of the
+        sequences in the given set is partly or completely unrelated to the
+        other sequences. It is up to the user to check that the set of
+        sequences are alignable.
+</P>
+<P>
+        3. Frameshift translation errors in a protein sequence causing local
+        mismatched regions to be heavily highlighted. These are surprisingly
+        common in database entries. If suspected, a 3-frame translation of
+        the source DNA needs to be examined.
+</P>
+<P> 
+Occasionally, highlighted residues may point to regions of some biological
+significance. This might happen for example if a protein alignment contains a
+sequence which has acquired new functions relative to the main sequence set. It
+is important to exclude other explanations, such as error or the natural
+divergence of sequences, before invoking a biological explanation.
+</P>
+<P>
+</P>
+<P>
+<H3>
+LOW-SCORING SEGMENTS
+</H3>
+</P>
+<P>
+Unreliable regions in the alignment can be highlighted using the Low-Scoring
+Segments option. A sequence-weighted profile is used to indicate any segments
+in the sequences which score badly. Because the profile calculation may take
+some time, an option is provided to calculate LOW-SCORING SEGMENTS. The 
+segment display can then be toggled on or off without having to repeat the
+time-consuming calculations.
+</P>
+<P>
+For details of the low-scoring segment calculation, see the CALCULATION section
+below.
+</P>
+<P>
+</P>
+<P>
+<H4>
+LOW-SCORING SEGMENT PARAMETERS
+</H4>
+</P>
+<P>
+MINIMUM LENGTH OF SEGMENTS: short segments (or even single residues) can be
+hidden by increasing the minimum length of segments which will be displayed.
+</P>
+<P>
+DNA MARKING SCALE is used to remove less significant segments from the 
+highlighted display. Increase the scale to display more segments; decrease the
+scale to remove the least significant.
+</P>
+<P>
+</P>
+<P>
+PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of each
+amino acid to each other. The matrix is used to calculate the sequence-
+weighted profile scores. There are four 'in-built' Log-Odds matrices offered:
+the Gonnet PAM 80, 120, 250, 350 matrices. A more stringent matrix which only
+gives a high score to identities and the most favoured conservative
+substitutions, may be more suitable when the sequences are closely related. For
+more divergent sequences, it is appropriate to use "softer" matrices which give
+a high score to many other frequent substitutions. This  option automatically
+recalculates the low-scoring segments.
+</P>
+<P>
+</P>
+<P>
+DNA WEIGHT MATRIX: Two hard-coded matrices are available:
+</P>
+<P>
+1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
+of nucleic acid sequences. X's and N's are treated as matches to any IUB
+ambiguity symbol. All matches score 1.0; all mismatches for IUB symbols score
+0.9.
+</P>
+<P>
+2) CLUSTALW(1.6). The previous system used by ClustalW, in which matches score
+1.0 and mismatches score 0. All matches for IUB symbols also score 0. 
+</P>
+<P>
+A new matrix can be read from a file on disk, if the filename consists only
+of lower case characters. The values in the new weight matrix should be
+similarities and should be NEGATIVE for infrequent substitutions.
+</P>
+<P> 
+INPUT FORMAT. The format used for a new matrix is the same as the BLAST
+program. Any lines beginning with a # character are assumed to be comments. The
+first non-comment line should contain a list of amino acids in any order, using
+the 1 letter code, followed by a * character. This should be followed by a
+square matrix of scores, with one row and one column for each amino acid. The
+last row and column of the matrix (corresponding to the * character) contain
+the minimum score over the whole matrix.
+</P>
+<P>
+<H4>
+QUALITY SCORE PARAMETERS
+</H4>
+</P>
+<P>
+You can customise the column 'quality scores' plotted underneath the alignment
+display using the following options.
+</P>
+<P>
+SCORE PLOT SCALE: this is a scalar value from 1 to 10, which can be used to
+change the scale of the quality score plot. 
+</P>
+<P>
+RESIDUE EXCEPTION CUTOFF: this is a scalar value from 1 to 10, which can be
+used to change the number of residue exceptions which are highlighted in the
+alignment display. (For an explanation of this cutoff, see the CALCULATION OF
+RESIDUE EXCEPTIONS section below.)
+</P>
+<P>
+PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of 
+each amino acid to each other. 
+</P>
+<P> 
+DNA WEIGHT MATRIX: two hard-coded matrices are available: IUB and CLUSTALW(1.6).
+</P>
+<P>
+For more information about the weight matrices, see the help above for
+the Low-scoring Segments Weight Matrix.
+</P>
+<P>
+For details of the quality score calculations, see the CALCULATION section
+below.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+SHOW LOW-SCORING SEGMENTS
+</STRONG>
+</P>
+<P>                       
+The low-scoring segment display can be toggled on or off. This option does not
+recalculate the profile scores.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+SHOW EXCEPTIONAL RESIDUES
+</STRONG>
+</P>
+<P>                       
+This option highlights individual residues which score badly in the alignment
+quality calculations. Residues which score exceptionally low are highlighted by
+using a white character on a grey background.
+</P>
+<P>
+<STRONG>
+SAVE QUALITY SCORES TO FILE
+</STRONG>
+</P>
+<P>
+The quality scores that are plotted underneath the alignment display can also
+be saved in a text file. Each column in the alignment is written on one line in
+the output file, with the value of the quality score at the end of the line.
+Only the sequences currently selected in the display are written to the file.
+One use for quality scores is to color residues in a protein structure by
+sequence conservation. In this way conserved surface residues can be
+highlighted to locate functional regions such as ligand-binding sites.
+</P>
+<P>
+</P>
+<P>
+<H3>
+CALCULATION OF QUALITY SCORES
+</H3>
+</P>
+<P>
+Suppose we have an alignment of m sequences of length n. Then, the alignment
+can be written as:
+</P>
+<P>
+<PRE>
+        A11 A12 A13 .......... A1n
+        A21 A22 A23 .......... A2n
+        .
+        .
+        Am1 Am2 Am3 .......... Amn
+</PRE>
+</P>
+<P>
+We also have a residue comparison matrix of size R where C(i,j) is the score
+for aligning residue i with residue j.
+</P>
+<P>
+We want to calculate a score for the conservation of the jth position in the
+alignment.
+</P>
+<P>
+To do this, we define an R-dimensional sequence space. For the jth position in 
+the alignment, each sequence consists of a single residue which is assigned a
+point S in the space. S has R dimensions, and for sequence i, the rth dimension
+is defined as:
+</P>
+<P>
+<PRE>
+	Sr =    C(r,Aij)
+</PRE>
+</P>
+<P>
+We then calculate a consensus value for the jth position in the alignment. This
+value X also has R dimensions, and the rth dimension is defined as:
+</P>
+<P>
+<PRE>
+	Xr = (   SUM   (Fij * C(i,r)) ) / m
+               1<=i<=R
+</PRE>
+</P>
+<P>
+where Fij is the count of residues i at position j in the alignment.
+</P>
+<P>
+Now we can calculate the distance Di between each sequence i and the consensus 
+position X in the R-dimensional space.
+</P>
+<P>
+<PRE>
+	Di = SQRT   (   SUM   (Xr - Sr)(Xr - Sr) )
+                      1<=i<=R
+</P>
+<P>
+</PRE>
+</P>
+<P>
+The quality score for the jth position in the alignment is defined as the mean
+of the sequence distances Di.
+</P>
+<P>
+The score is normalised by multiplying by the percentage of sequences which
+have residues (and not gaps) at this position.
+</P>
+<P>
+<H3>
+CALCULATION OF RESIDUE EXCEPTIONS
+</H3>
+</P>
+<P>
+The jth residue of the ith sequence is considered as an exception if the
+distance Di of the sequence from the consensus value P is greater than (Upper
+Quartile + Inter Quartile Range * Cutoff). The value used as a cutoff for
+displaying exceptions can be set from the SCORE PARAMETERS menu. A high cutoff
+value will only display very significant exceptions; a low value will allow
+more, less significant, exceptions to be highlighted.
+</P>
+<P>
+(NB. Sequences which contain gaps at this position are not included in the
+exception calculation.)
+</P>
+<P>
+</P>
+<P>
+<H3>
+CALCULATION OF LOW-SCORING SEGMENTS
+</H3>
+</P>
+<P>
+Suppose we have an alignment of m sequences of length n. Then, the alignment
+can be written as:
+</P>
+<P>
+<PRE>
+        A11 A12 A13 .......... A1n
+        A21 A22 A23 .......... A2n
+        .
+        .
+        Am1 Am2 Am3 .......... Amn
+</PRE>
+</P>
+<P>
+We also have a residue comparison matrix of size R where C(i,j) is the score
+for aligning residue i with residue j.
+</P>
+<P>
+We calculate sequence weights by building a neighbour-joining tree, in which
+branch lengths are proportional to divergence. Summing the branches by branch
+ownership provides the weights. See (Thompson et al., CABIOS, 10, 19 (1994) and
+Henikoff et al.,JMB, 243, 574 1994).
+</P>
+<P>
+To find the low-scoring segments in a sequence Si, we build a weighted profile
+of the remaining sequences in the alignment. Suppose we find residue r at 
+position j in the sequence; then the score for the jth position in the sequence
+is defined as
+</P>
+<P>
+<PRE>
+	Score(Si,j) = Profile(j,r)   where Profile(j,r) is the profile score
+                                       for residue r at position j in the
+                                       alignment.
+</PRE>
+</P>
+<P>
+These residue scores are summed along the sequence in both forward and backward
+directions. If the sum of the scores is positive, then it is reset to zero.
+Segments which score negatively in both directions are considered as 
+'low-scoring' and will be highlighted in the alignment display.
+</P>
+<P>
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="9">              Command Line Parameters
+</A></H2></CENTER>
+<CENTER><H3>                DATA (sequences)
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-PROFILE1=file.ext  and  -PROFILE2=file.ext  </TT></TD>
+<TD><EM>profiles (aligned sequences)</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>                VERBS (do things)
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-HELP  or -CHECK    </TT></TD>
+<TD><EM>outline the command line parameters</EM></TD>
+</TR>
+<TR>
+<TD><TT>-ALIGN              </TT></TD>
+<TD><EM>do full multiple alignment </EM></TD>
+</TR>
+<TR>
+<TD><TT>-TREE               </TT></TD>
+<TD><EM>calculate NJ tree</EM></TD>
+</TR>
+<TR>
+<TD><TT>-BOOTSTRAP(=n)      </TT></TD>
+<TD><EM>bootstrap a NJ tree (n= number of bootstraps; def. = 1000)</EM></TD>
+</TR>
+<TR>
+<TD><TT>-CONVERT            </TT></TD>
+<TD><EM>output the input sequences in a different file format</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>                PARAMETERS (set things)
+</H3></CENTER>
+<CENTER><P><STRONG>***General settings:****
+</STRONG></P></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-INTERACTIVE </TT></TD>
+<TD><EM>read command line, then enter normal interactive menus</EM></TD>
+</TR>
+<TR>
+<TD><TT>-QUICKTREE   </TT></TD>
+<TD><EM>use FAST algorithm for the alignment guide tree</EM></TD>
+</TR>
+<TR>
+<TD><TT>-TYPE=       </TT></TD>
+<TD><EM>PROTEIN or DNA sequences</EM></TD>
+</TR>
+<TR>
+<TD><TT>-NEGATIVE    </TT></TD>
+<TD><EM>protein alignment with negative values in matrix</EM></TD>
+</TR>
+<TR>
+<TD><TT>-OUTFILE=    </TT></TD>
+<TD><EM>sequence alignment file name</EM></TD>
+</TR>
+<TR>
+<TD><TT>-OUTPUT=     </TT></TD>
+<TD><EM>GCG, GDE, PHYLIP, PIR or NEXUS</EM></TD>
+</TR>
+<TR>
+<TD><TT>-OUTORDER=   </TT></TD>
+<TD><EM>INPUT or ALIGNED</EM></TD>
+</TR>
+<TR>
+<TD><TT>-CASE=       </TT></TD>
+<TD><EM>LOWER or UPPER (for GDE output only)</EM></TD>
+</TR>
+<TR>
+<TD><TT>-SEQNOS=     </TT></TD>
+<TD><EM>OFF or ON (for Clustal output only)</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Fast Pairwise Alignments:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-TOPDIAGS=n  </TT></TD>
+<TD><EM>number of best diags.</EM></TD>
+</TR>
+<TR>
+<TD><TT>-WINDOW=n    </TT></TD>
+<TD><EM>window around best diags.</EM></TD>
+</TR>
+<TR>
+<TD><TT>-PAIRGAP=n   </TT></TD>
+<TD><EM>gap penalty</EM></TD>
+</TR>
+<TR>
+<TD><TT>-SCORE=      </TT></TD>
+<TD><EM>PERCENT or ABSOLUTE</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Slow Pairwise Alignments:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-PWDNAMATRIX= </TT></TD>
+<TD><EM>DNA weight matrix=IUB, CLUSTALW or filename</EM></TD>
+</TR>
+<TR>
+<TD><TT>-PWGAPOPEN=f  </TT></TD>
+<TD><EM>gap opening penalty</EM></TD>
+</TR>
+<TR>
+<TD><TT>-PWGAPEXT=f  </TT></TD>
+<TD><EM>gap opening penalty</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Multiple Alignments:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-USETREE=    </TT></TD>
+<TD><EM>file for old guide tree</EM></TD>
+</TR>
+<TR>
+<TD><TT>-MATRIX=     </TT></TD>
+<TD><EM>Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename</EM></TD>
+</TR>
+<TR>
+<TD><TT>-DNAMATRIX=  </TT></TD>
+<TD><EM>DNA weight matrix=IUB, CLUSTALW or filename</EM></TD>
+</TR>
+<TR>
+<TD><TT>-GAPOPEN=f   </TT></TD>
+<TD><EM>gap opening penalty</EM></TD>
+</TR>
+<TR>
+<TD><TT>-GAPEXT=f  </TT></TD>
+<TD><EM>gap extension penalty</EM></TD>
+</TR>
+<TR>
+<TD><TT>-ENDGAPS     </TT></TD>
+<TD><EM>no end gap separation pen.</EM></TD>
+</TR>
+<TR>
+<TD><TT>-GAPDIST=n   </TT></TD>
+<TD><EM>gap separation pen. range</EM></TD>
+</TR>
+<TR>
+<TD><TT>-NOPGAP      </TT></TD>
+<TD><EM>residue-specific gaps off</EM></TD>
+</TR>
+<TR>
+<TD><TT>-NOHGAP    </TT></TD>
+<TD><EM>hydrophilic gaps off</EM></TD>
+</TR>
+<TR>
+<TD><TT>-HGAPRESIDUES= </TT></TD>
+<TD><EM>list hydrophilic res.</EM></TD>
+</TR>
+<TR>
+<TD><TT>-MAXDIV=n    </TT></TD>
+<TD><EM>% ident. for delay</EM></TD>
+</TR>
+<TR>
+<TD><TT>-TYPE=       </TT></TD>
+<TD><EM>PROTEIN or DNA</EM></TD>
+</TR>
+<TR>
+<TD><TT>-TRANSWEIGHT=f </TT></TD>
+<TD><EM>transitions weighting</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Profile Alignments:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-NEWTREE1=    </TT></TD>
+<TD><EM>file for new guide tree for profile1</EM></TD>
+</TR>
+<TR>
+<TD><TT>-NEWTREE2=    </TT></TD>
+<TD><EM>file for new guide tree for profile2</EM></TD>
+</TR>
+<TR>
+<TD><TT>-USETREE1=    </TT></TD>
+<TD><EM>file for old guide tree for profile1</EM></TD>
+</TR>
+<TR>
+<TD><TT>-USETREE2=    </TT></TD>
+<TD><EM>file for old guide tree for profile2</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Sequence to Profile Alignments:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-NEWTREE=    </TT></TD>
+<TD><EM>file for new guide tree</EM></TD>
+</TR>
+<TR>
+<TD><TT>-USETREE=    </TT></TD>
+<TD><EM>file for old guide tree</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Structure Alignments:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-NOSECSTR2     </TT></TD>
+<TD><EM>do not use secondary structure/gap penalty mask for profile 2</EM></TD>
+</TR>
+<TR>
+<TD><TT>-SECSTROUT=STRUCTURE or MASK or BOTH or NONE  </TT></TD>
+<TD><EM>output in alignment file</EM></TD>
+</TR>
+<TR>
+<TD><TT>-HELIXGAP=n    </TT></TD>
+<TD><EM>gap penalty for helix core residues </EM></TD>
+</TR>
+<TR>
+<TD><TT>-STRANDGAP=n   </TT></TD>
+<TD><EM>gap penalty for strand core residues</EM></TD>
+</TR>
+<TR>
+<TD><TT>-LOOPGAP=n     </TT></TD>
+<TD><EM>gap penalty for loop regions</EM></TD>
+</TR>
+<TR>
+<TD><TT>-TERMINALGAP=n </TT></TD>
+<TD><EM>gap penalty for structure termini</EM></TD>
+</TR>
+<TR>
+<TD><TT>-HELIXENDIN=n  </TT></TD>
+<TD><EM>number of residues inside helix to be treated as terminal</EM></TD>
+</TR>
+<TR>
+<TD><TT>-HELIXENDOUT=n </TT></TD>
+<TD><EM>number of residues outside helix to be treated as terminal</EM></TD>
+</TR>
+<TR>
+<TD><TT>-STRANDENDIN=n </TT></TD>
+<TD><EM>number of residues inside strand to be treated as terminal</EM></TD>
+</TR>
+<TR>
+<TD><TT>-STRANDENDOUT=n</TT></TD>
+<TD><EM>number of residues outside strand to be treated as terminal </EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Trees:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-SEED=n    </TT></TD>
+<TD><EM>seed number for bootstraps</EM></TD>
+</TR>
+<TR>
+<TD><TT>-KIMURA      </TT></TD>
+<TD><EM>use Kimura's correction</EM></TD>
+</TR>
+<TR>
+<TD><TT>-TOSSGAPS  </TT></TD>
+<TD><EM>ignore positions with gaps</EM></TD>
+</TR>
+<TR>
+<TD><TT>-BOOTLABELS=node OR branch </TT></TD>
+<TD><EM>position of bootstrap values in tree display</EM></TD>
+</TR>
+</TABLE></CENTER>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="R">                             References
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+<STRONG>
+The ClustalX program is described in the manuscript:
+</STRONG>
+</P>
+<P>
+Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
+The ClustalX windows interface: flexible strategies for multiple sequence 
 alignment aided by quality analysis tools. Nucleic Acids Research, 25:4876-4882.
 </P>
 <P>

Modified: trunk/packages/clustalw/trunk/clustalx_help
===================================================================
--- trunk/packages/clustalw/trunk/clustalx_help	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/clustalx_help	2006-12-04 00:57:13 UTC (rev 166)
@@ -1,4 +1,1524 @@
 
+This is the on-line help file for Clustal X (version 1.81), using the NCBI
+Vibrant Toolkit.   
+
+It should be named or defined as: clustalx_help 
+except with MSDOS in which case it should be named ClustalX.HLP
+
+For full details of usage and algorithms, please read the CLUSTALW.DOC file.
+
+
+Toby  Gibson                         EMBL, Heidelberg, Germany.
+Des   Higgins                        UCC, Cork, Ireland.
+Julie Thompson/Francois Jeanmougin   IGBMC, Strasbourg, France.
+
+
+
+
+>>HELP G <<
+                      General help for CLUSTAL X (1.8)
+
+Clustal X is a windows interface for the ClustalW multiple sequence alignment
+program. It provides an integrated environment for performing multiple sequence
+and profile alignments and analysing the results. The sequence alignment is
+displayed in a window on the screen. A versatile coloring scheme has been
+incorporated allowing you to highlight conserved features  in the alignment.
+The pull-down menus at the top of the window allow you to select all the
+options required for traditional multiple sequence and profile alignment.
+
+You can cut-and-paste sequences to change the order of the alignment; you can
+select a subset of sequences to be aligned; you can select a sub-range of the
+alignment to be realigned and inserted back into the original alignment.
+
+Alignment quality analysis can be performed and low-scoring segments or
+exceptional residues can be highlighted.
+
+ClustalX is available for a number of different platforms including: SUN
+Solaris, IRIX5.3 on Silicon Graphics, Digital UNIX on DECStations, Microsoft
+Windows (32 bit) for PC's, Linux ELF for x86 PC's and Macintosh PowerMac. (See
+the README file for Installation instructions.)
+
+
+<H4>
+SEQUENCE INPUT
+</H4>
+
+Sequences and profiles (a term for pre-existing alignments) are input using 
+the FILE menu. Invalid options will be disabled. All sequences must be included
+into 1 file. 7 formats are automatically recognised: NBRF/PIR, EMBL/SWISSPROT,
+Pearson (Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9 RSF and GDE flat file.
+All non-alphabetic characters (spaces, digits, punctuation marks) are ignored
+except "-" which is used to indicate a GAP ("." in MSF/RSF).  
+
+<H4>
+SEQUENCE / PROFILE ALIGNMENTS
+</H4>
+
+Clustal X has two modes which can be selected using the switch directly above
+the sequence display: MULTIPLE ALIGNMENT MODE and PROFILE ALIGNMENT MODE.
+
+To do a MULTIPLE ALIGNMENT on a set of sequences, make sure MULTIPLE ALIGNMENT
+MODE is selected. A single sequence data area is then displayed. The ALIGNMENT
+menu then allows you to either produce a guide tree for the alignment, or to do
+a multiple alignment following the guide tree, or to do a full multiple
+alignment.
+
+In PROFILE ALIGNMENT MODE, two sequence data areas are displayed, allowing you
+to align 2 alignments (termed profiles). Profiles are also used to add a new
+sequence to an old alignment, or to use secondary structure to guide the
+alignment process. GAPS in the old alignments are indicated using the "-" 
+character. PROFILES can be input in ANY of the allowed formats; just  use "-"
+(or "." for MSF/RSF) for each gap position. In Profile Alignment Mode, a button
+"Lock Scroll" is displayed which allows you to scroll the two profiles together
+using a single scroll bar. When the Lock Scroll is turned off, the two profiles
+can be scrolled independently.
+
+<H4>
+PHYLOGENETIC TREES
+</H4>
+
+Phylogenetic trees can be calculated from old alignments (read in with "-"
+characters to indicate gaps) OR after a multiple alignment while the alignment
+is still displayed.
+
+<H4>
+ALIGNMENT DISPLAY
+</H4>
+
+The alignment is displayed on the screen with the sequence names on the left
+hand side. The sequence alignment is for display only, it cannot be edited here
+(except for changing the sequence order by cutting-and-pasting on the sequence
+names). 
+
+A ruler is displayed below the sequences, starting at 1 for the first residue
+position (residue numbers in the sequence input file are ignored).
+
+A line above the alignment is used to mark strongly conserved positions. Three
+characters ('*', ':' and '.') are used:
+
+'*' indicates positions which have a single, fully conserved residue
+
+':' indicates that one of the following 'strong' groups is fully conserved:-
+<PRE>
+                 STA  
+                 NEQK  
+                 NHQK  
+                 NDEQ  
+                 QHRK  
+                 MILV  
+                 MILF  
+                 HY  
+                 FYW  
+</PRE>
+
+'.' indicates that one of the following 'weaker' groups is fully conserved:-
+<PRE>
+                 CSA  
+                 ATV  
+                 SAG  
+                 STNK  
+                 STPA  
+                 SGND  
+                 SNDEQK  
+                 NDEQHK  
+                 NEQHRK  
+                 FVLIM  
+                 HFY  
+</PRE>
+
+These are all the positively scoring groups that occur in the Gonnet Pam250
+matrix. The strong and weak groups are defined as strong score >0.5 and weak
+score =<0.5 respectively.
+
+For profile alignments, secondary structure and gap penalty masks are displayed
+above the sequences, if any data is found in the profile input file.
+
+
+>>HELP F <<
+                      Input / Output Files 
+
+LOAD SEQUENCES reads sequences from one of 7 file formats, replacing any
+sequences that are already loaded. All sequences must be in 1 file. The formats
+that are automatically recognised are: NBRF/PIR, EMBL/SWISSPROT, Pearson
+(Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9/RSF and GDE flat file.  All
+non-alphabetic characters (spaces, digits, punctuation  marks) are ignored
+except "-" which is used to indicate a GAP ("." in MSF/RSF).
+
+The program tries to automatically recognise the different file formats used
+and to guess whether the sequences are amino acid or nucleotide.  This is not
+always foolproof.
+
+FASTA and NBRF/PIR formats are recognised by having a ">" as the first 
+character in the file.  
+
+EMBL/Swiss Prot formats are recognised by the letters "ID" at the start of the
+file (the token for the entry name field).  
+
+CLUSTAL format is recognised by the word CLUSTAL at the beginning of the file.
+
+GCG/MSF format is recognised by one of the following:
+<UL>
+<LI>
+       - the word PileUp at the start of the file.
+</LI><LI>
+       - the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
+         at the start of the file.
+</LI><LI>
+       - the word MSF on the first line of the file, and the characters ..
+         at the end of this line.
+</LI>
+</UL>
+ 
+GCG/RSF format is recognised by the word !!RICH_SEQUENCE at the beginning of
+the file.
+
+
+If 85% or more of the characters in the sequence are from A,C,G,T,U or N, the
+sequence will be assumed to be nucleotide.  This works in 97.3% of cases but
+watch out!
+
+APPEND SEQUENCES is only valid in MULTIPLE ALIGNMENT MODE. The input sequences
+do not replace those already loaded, but are appended at the end of the
+alignment.
+
+SAVE SEQUENCES AS... offers the user a choice of one of six output formats:
+CLUSTAL, NBRF/PIR, GCG/MSF, PHYLIP, NEXUS or GDE. All sequences are written
+to a single file. Options are available to save a range of the alignment, 
+switch between UPPER/LOWER case for GDE files, and to output SEQUENCE NUMBERING
+for CLUSTAL files.
+
+LOAD PROFILE 1 reads sequences in the same 7 file formats, replacing any
+sequences already loaded as Profile 1. This option will also remove any
+sequences which are loaded in Profile 2.
+
+LOAD PROFILE 2 reads sequences in the same 7 file formats, replacing any
+sequences already loaded as Profile 2.
+
+SAVE PROFILE 1 AS... is similar to the Save Sequences option except that only
+those sequences in Profile 1 will be written to the output file.
+
+SAVE PROFILE 2 AS... is similar to the Save Sequences option except that only
+those sequences in Profile 2 will be written to the output file.
+
+WRITE ALIGNMENT AS POSTSCRIPT will write the sequence display to a postscript
+format file. This will include any secondary structure / gap penalty mask 
+information and the consensus and ruler lines which are displayed on the
+screen. The Alignment Quality curve can be optionally included in the output
+file.
+
+WRITE PROFILE 1 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
+except that only the profile 1 display will be printed.
+
+WRITE PROFILE 2 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
+except that only the profile 2 display will be printed.
+
+
+<H4>
+POSTSCRIPT PARAMETERS
+</H4>
+
+A number of options are available to allow you to configure your postscript
+output file.
+
+PS COLORS FILE:
+
+The exact RGB values required to reproduce the colors used in the alignment
+window will vary from printer to printer. A PS colors file can be specified
+that contains the RGB values for all the colors required by each of your
+postscript printers.
+
+By default, Clustal X looks for a file called 'colprint.par' in the current
+directory (if your running under UNIX, it then looks in your home directory,
+and finally in the directories in your PATH environment variable). If no PS
+colors file is found or a color used on the screen is not defined here, the
+screen RGB values (from the Color Parameter File) are used.
+
+The PS colors file consists of one line for each color to be defined, with the
+color name followed by the RGB values (on a scale of 0 to 1). For example,
+
+RED          0.9 0.1 0.1
+
+Blank lines and comments (lines beginning with a '#' character) are ignored.
+
+
+PAGE SIZE:  The alignment can be displayed on either A4, A3 or US Letter size
+pages.
+
+ORIENTATION: The alignment can be displayed on either a landscape or portrait
+page.
+
+PRINT HEADER: An optional header including the postscript filename, and
+creation date can be printed at the top of each page.
+
+PRINT QUALITY CURVE: The Alignment Quality curve which is displayed underneath
+the alignment on the screen can be included in the postscript output.
+
+PRINT RULER: The ruler which is displayed underneath the alignment on the 
+screen can be included in the postscript output.
+
+PRINT RESIDUE NUMBERS: Sequence residue numbers can be printed at the right
+hand side of the alignment.
+
+RESIZE TO FIT PAGE: By default, the alignment is scaled to fit the page size
+selected. This option can be turned off, in which case a font size of 10 will
+be used for the sequences.
+
+PRINT FROM POSITION/TO: A range of the alignment can be printed. The default
+is to print the full alignment. The first and last residues to be printed are
+specified here.
+
+USE BLOCK LENGTH: The alignment can be divided into blocks of residues. The
+number of residues in a block is specified here. More than one block may then
+be printed on a single page. This is useful for long alignments of a small
+number of sequences. If the block length is set to 0, The alignment will not
+be divided into blocks, but printed across a number of pages.
+
+>>HELP E <<
+                          Editing Alignments
+
+Clustal X allows you to change the order of the sequences in the alignment, by
+cutting-and-pasting the sequence names.
+
+To select a group of sequences to be moved, click on a sequence name and drag
+the cursor until all the required sequences are highlighted. Holding down the
+Shift key when clicking on the first name will add new sequences to those
+already selected.
+
+(Options are provided to Select All Sequences, Select Profile 1 or Select 
+Profile 2.)
+
+The selected sequences can be removed from the alignment by using the EDIT
+menu, CUT option.
+
+To add the cut sequences back into an alignment, select a sequence by clicking
+on the sequence name. The cut sequences will be added to the alignment,
+immediately following the selected sequence, by the EDIT menu, PASTE option.
+
+To add the cut sequences to an empty alignment (eg. when cutting sequences from
+Profile 1 and pasting them to Profile 2), click on the empty sequence name
+display area, and select the EDIT menu, PASTE option as before.
+
+The sequence selection and sequence range selection can be cleared using the
+EDIT menu, CLEAR SEQUENCE SELECTION and CLEAR RANGE SELECTION options
+respectively.
+
+To search for a string of residues in the sequences, select the sequences to be
+searched by clicking on the sequence names. You can then enter the string to
+search for by selecting the SEARCH FOR STRING option. If the string is found in
+any of the sequences selected, the sequence name and column number is printed
+below the sequence display.
+
+In PROFILE ALIGNMENT MODE, the two profiles can be merged (normally done after
+alignment) by selecting ADD PROFILE 2 TO PROFILE 1. The sequences currently
+displayed as Profile 2 will be appended to Profile 1. 
+
+The REMOVE ALL GAPS option will remove all gaps from the sequences currently
+selected.
+WARNING: This option removes ALL gaps, not only those introduced by ClustalX,
+but also those that were read from the input alignment file. Any secondary
+structure information associated with the alignment will NOT be automatically
+realigned.
+
+The REMOVE GAP-ONLY COLUMNS will remove those positions in the alignment which
+contain gaps in all sequences. This can occur as a result of removing divergent
+sequences from an alignment, or if an alignment has been realigned.
+
+>>HELP M <<
+                          Multiple Alignments
+
+Make sure MULTIPLE ALIGNMENT MODE is selected, using the switch directly above
+the sequence display area. Then, use the ALIGNMENT menu to do multiple
+alignments.
+
+Multiple alignments are carried out in 3 stages:
+ 
+1) all sequences are compared to each other (pairwise alignments);
+ 
+2) a dendrogram (like a phylogenetic tree) is constructed, describing the
+approximate groupings of the sequences by similarity (stored in a file).
+ 
+3) the final multiple alignment is carried out, using the dendrogram as a guide.
+
+The 3 stages are carried out automatically by the DO COMPLETE ALIGNMENT option.
+You can skip the first stages (pairwise alignments; guide tree) by using an old
+guide tree file (DO ALIGNMENT FROM GUIDE TREE); or you can just produce the
+guide tree with no final multiple alignment (PRODUCE GUIDE TREE ONLY).
+
+
+REALIGN SELECTED SEQUENCES is used to realign badly aligned sequences in the
+alignment. Sequences can be selected by clicking on the sequence names - see
+Editing Alignments for more details. The unselected sequences are then 'fixed'
+and a profile is made including only the unselected sequences. Each of the
+selected sequences in turn is then realigned to this profile. The realigned
+sequences will be displayed as a group at the end the alignment.
+
+
+REALIGN SELECTED SEQUENCE RANGE is used to realign a small region of the 
+alignment. A residue range can be selected by clicking on the sequence display
+area. A multiple alignment is then performed, following the 3 stages described
+above, but only using the selected residue range. Finally the new alignment of
+the range is pasted back into the full sequence alignment.
+
+By default, gap penalties are used at each end of the subrange in order to 
+penalise terminal gaps. If the REALIGN SEGMENT END GAP PENALTIES option is
+switched off, gaps can be introduced at the ends of the residue range at no
+cost.
+
+
+ALIGNMENT PARAMETERS displays a sub-menu with the following options:
+
+RESET NEW GAPS BEFORE ALIGNMENT will remove any new gaps introduced into the
+sequences during multiple alignment if you wish to change the parameters and
+try again. This only takes effect just before you do a second multiple
+alignment. You can make phylogenetic trees after alignment whether or not this
+is ON. If you turn this OFF, the new gaps are kept even if you do a second
+multiple alignment. This allows you to iterate the alignment gradually.
+Sometimes, the alignment is improved by a second or third pass.
+
+RESET ALL GAPS BEFORE ALIGNMENT will remove all gaps in the sequences including
+gaps which were read in from the sequence input file. This only takes effect
+just before you do a second multiple alignment.  You can make phylogenetic
+trees after alignment whether or not this is ON.  If you turn this OFF, all
+gaps are kept even if you do a second multiple alignment. This allows you to
+iterate the alignment gradually.  Sometimes, the alignment is improved by a
+second or third pass.
+
+
+PAIRWISE ALIGNMENT PARAMETERS control the speed/sensitivity of the initial
+alignments.
+
+MULTIPLE ALIGNMENT PARAMETERS control the gaps in the final multiple
+alignments.
+
+PROTEIN GAP PARAMETERS displays a temporary window which allows you to set
+various parameters only used in the alignment of protein sequences.
+
+(SECONDARY STRUCTURE PARAMETERS, for use with the Profile Alignment Mode only,
+allows you to set various parameters only used with gap penalty masks.)
+
+SAVE LOG FILE will write the alignment calculation scores to a file. The log
+filename is the same as the input sequence filename, with an extension .log
+appended.
+
+
+<H4>
+OUTPUT FORMAT OPTIONS
+</H4>
+
+You can choose from 6 different alignment formats (CLUSTAL, GCG, NBRF/PIR,
+PHYLIP, GDE and NEXUS).  You can choose more than one (or all 6 if you wish).  
+
+CLUSTAL format output is a self explanatory alignment format. It shows the
+sequences aligned in blocks. It can be read in again at a later date to (for
+example) calculate a phylogenetic tree or add in new sequences by profile
+alignment.
+
+GCG output can be used by any of the GCG programs that can work on multiple
+alignments (e.g. PRETTY, PROFILEMAKE, PLOTALIGN). It is the same as the GCG
+.msf format files (multiple sequence file); new in version 7 of GCG.
+
+NEXUS format is used by several phylogeny programs, including PAUP and
+MacClade.
+
+PHYLIP format output can be used for input to the PHYLIP package of Joe 
+Felsenstein.  This is a very widely used package for doing every imaginable
+form of phylogenetic analysis (MUCH more than the the modest introduction
+offered by this program).
+
+NBRF/PIR: this is the same as the standard PIR format with ONE ADDITION. Gap
+characters "-" are used to indicate the positions of gaps in the multiple 
+alignment. These files can be re-used as input in any part of clustal that
+allows sequences (or alignments or profiles) to be read in.  
+
+GDE:  this format is used by the GDE package of Steven Smith and is understood
+by SEQLAB in GCG 9 or later.
+
+GDE OUTPUT CASE: sequences in GDE format may be written in either upper or
+lower case.
+ 
+CLUSTALW SEQUENCE NUMBERS: residue numbers may be added to the end of the
+alignment lines in clustalw format.
+
+OUTPUT ORDER is used to control the order of the sequences in the output
+alignments. By default, it uses the order in which the sequences were aligned
+(from the guide tree/dendrogram), thus automatically grouping closely related
+sequences. It can be switched to be the same as the original input order.
+
+PARAMETER OUTPUT: This option will save all your parameter settings in a
+parameter file (suffix .par) during alignment. The file can be subsequently
+used to rerun ClustalW using the same parameters.
+
+
+<H3>
+ALIGNMENT PARAMETERS
+</H3>
+--------------------
+
+<STRONG>
+PAIRWISE ALIGNMENT PARAMETERS
+</STRONG>
+
+A distance is calculated between every pair of sequences and these are used to
+construct the phylogenetic tree which guides the final multiple alignment. The
+scores are calculated from separate pairwise alignments. These can be
+calculated using 2 methods: dynamic programming (slow but accurate) or by the
+method of Wilbur and Lipman (extremely fast but approximate).   
+
+You can choose between the 2 alignment methods using the PAIRWISE ALIGNMENTS
+option. The slow/accurate method is fast enough for short sequences but will be
+VERY SLOW for many (e.g. >100) long (e.g. >1000 residue) sequences.   
+
+
+<STRONG>
+SLOW-ACCURATE alignment parameters:
+</STRONG>
+
+These parameters do not have any affect on the speed of the alignments. They
+are used to give initial alignments which are then rescored to give percent
+identity scores. These % scores are the ones which are displayed on the 
+screen. The scores are converted to distances for the trees.
+
+Gap Open Penalty:      the penalty for opening a gap in the alignment.
+
+Gap Extension Penalty: the penalty for extending a gap by 1 residue.
+
+Protein Weight Matrix: the scoring table which describes the similarity of 
+each amino acid to each other.
+
+Load protein matrix: allows you to read in a comparison table from a file.
+
+DNA weight matrix: the scores assigned to matches and mismatches (including
+IUB ambiguity codes).
+
+Load DNA matrix: allows you to read in a comparison table from a file.
+
+See the Multiple alignment parameters, MATRIX option below for details of the
+matrix input format.
+
+
+<STRONG>
+FAST-APPROXIMATE alignment parameters:
+</STRONG>
+
+These similarity scores are calculated from fast, approximate, global align-
+ments, which are controlled by 4 parameters. 2 techniques are used to make
+these alignments very fast: 1) only exactly matching fragments (k-tuples) are
+considered; 2) only the 'best' diagonals (the ones with most k-tuple matches)
+are used.
+
+GAP PENALTY:   This is a penalty for each gap in the fast alignments. It has
+little effect on the speed or sensitivity except for extreme values.
+
+K-TUPLE SIZE:  This is the size of exactly matching fragment that is used. 
+INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity.
+For longer sequences (e.g. >1000 residues) you may wish to increase the
+default.
+
+TOP DIAGONALS: The number of k-tuple matches on each diagonal (in an imaginary
+dot-matrix plot) is calculated. Only the best ones (with most matches) are used
+in the alignment. This parameter specifies how many. Decrease for speed;
+increase for sensitivity.
+
+WINDOW SIZE:  This is the number of diagonals around each of the 'best' 
+diagonals that will be used. Decrease for speed; increase for sensitivity.
+
+
+<STRONG>
+MULTIPLE ALIGNMENT PARAMETERS
+</STRONG>
+
+These parameters control the final multiple alignment. This is the core of the
+program and the details are complicated. To fully understand the use of the
+parameters and the scoring system, you will have to refer to the documentation.
+
+Each step in the final multiple alignment consists of aligning two alignments 
+or sequences. This is done progressively, following the branching order in the
+GUIDE TREE. The basic parameters to control this are two gap penalties and the
+scores for various identical/non-indentical residues. 
+
+The GAP OPENING and EXTENSION PENALTIES can be set here. These control the 
+cost of opening up every new gap and the cost of every item in a gap.  
+Increasing the gap opening penalty will make gaps less frequent. Increasing 
+the gap extension penalty will make gaps shorter. Terminal gaps are not 
+penalised.
+
+The DELAY DIVERGENT SEQUENCES switch delays the alignment of the most distantly
+related sequences until after the most closely related sequences have  been
+aligned. The setting shows the percent identity level required to delay the
+addition of a sequence; sequences that are less identical than this level to
+any other sequences will be aligned later.
+
+The TRANSITION WEIGHT gives transitions (A<-->G or C<-->T i.e. purine-purine or
+pyrimidine-pyrimidine substitutions) a weight between 0 and 1; a weight of zero
+means that the transitions are scored as mismatches, while a weight of 1 gives
+the transitions the match score. For distantly related DNA sequences, the
+weight should be near to zero; for closely related sequences it can be useful
+to assign a higher score. The default is set to 0.5.
+
+
+The PROTEIN WEIGHT MATRIX option allows you to choose a series of weight
+matrices. For protein alignments, you use a weight matrix to determine the
+similarity of non-identical amino acids. For example, Tyr aligned with Phe is
+usually judged to be 'better' than Tyr aligned with Pro.
+
+There are three 'in-built' series of weight matrices offered. Each consists of
+several matrices which work differently at different evolutionary distances. To
+see the exact details, read the documentation. Crudely, we store several
+matrices in memory, spanning the full range of amino acid distance (from almost
+identical sequences to highly divergent ones). For very similar sequences, it
+is best to use a strict weight matrix which only gives a high score to
+identities and the most favoured conservative substitutions. For more divergent
+sequences, it is appropriate to use "softer" matrices which give a high score
+to many other frequent substitutions.
+
+1) BLOSUM (Henikoff). These matrices appear to be the best available for 
+carrying out data base similarity (homology searches). The matrices currently
+used are: Blosum 80, 62, 45 and 30. BLOSUM was the default in earlier Clustal X
+versions.
+
+2) PAM (Dayhoff). These have been extremely widely used since the late '70s. We
+currently use the PAM 20, 60, 120, 350 matrices.
+
+3) GONNET. These matrices were derived using almost the same procedure as the
+Dayhoff one (above) but are much more up to date and are based on a far larger
+data set. They appear to be more sensitive than the Dayhoff series. We
+currently use the GONNET 80, 120, 160, 250 and 350 matrices. This series is the
+default for Clustal X version 1.8.
+
+We also supply an identity matrix which gives a score of 10 to two identical 
+amino acids and a score of zero otherwise. This matrix is not very useful.
+
+Load protein matrix: allows you to read in a comparison matrix from a file.
+This can be either a single matrix or a series of matrices (see below for
+format). 
+
+
+DNA WEIGHT MATRIX option allows you to select a single matrix (not a series)
+used for aligning nucleic acid sequences. Two hard-coded matrices are available:
+
+1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
+of nucleic acid sequences. X's and N's are treated as matches to any IUB
+ambiguity symbol. All matches score 1.9; all mismatches for IUB symbols score 0.
+
+2) CLUSTALW(1.6). A previous system used by ClustalW, in which matches score
+1.0 and mismatches score 0. All matches for IUB symbols also score 0.
+
+Load DNA matrix: allows you to read in a nucleic acid comparison matrix from a
+file (just one matrix, not a series).
+
+
+SINGLE MATRIX INPUT FORMAT
+The format used for a single matrix is the same as the BLAST program. The
+scores in the new weight matrix should be similarities. You can use negative as
+well as positive values if you wish, although the matrix will be automatically
+adjusted to all positive scores, unless the NEGATIVE MATRIX option is selected.
+Any lines beginning with a # character are assumed to be comments. The first
+non-comment line should contain a list of amino acids in any order, using the 1
+letter code, followed by a * character. This should be followed by a square
+matrix of scores, with one row and one column for each amino acid. The last row
+and column of the matrix (corresponding to the * character) contain the minimum
+score over the whole matrix.
+
+MATRIX SERIES INPUT FORMAT
+ClustalX uses different matrices depending on the mean percent identity of the
+sequences to be aligned. You can specify a series of matrices and the range of
+the percent identity for each matrix in a matrix series file. The file is
+automatically recognised by the word CLUSTAL_SERIES at the beginning of the
+file. Each matrix in the series is then specified on one line which should
+start with the word MATRIX. This is followed by the lower and upper limits of
+the sequence percent identities for which you want to apply the matrix. The
+final entry on the matrix line is the filename of a Blast format matrix file
+(see above for details of the single matrix file format).
+
+Example.
+
+CLUSTAL_SERIES
+ 
+MATRIX 81 100 /us1/user/julie/matrices/blosum80
+MATRIX 61 80 /us1/user/julie/matrices/blosum62
+MATRIX 31 60 /us1/user/julie/matrices/blosum45
+MATRIX 0 30 /us1/user/julie/matrices/blosum30
+
+
+<STRONG>
+PROTEIN GAP PARAMETERS
+</STRONG>
+
+RESIDUE SPECIFIC PENALTIES are amino acid specific gap penalties that reduce or
+increase the gap opening penalties at each position in the alignment or 
+sequence. See the documentation for details. As an example, positions that are
+rich in glycine are more likely to have an adjacent gap than positions that are
+rich in valine.
+
+HYDROPHILIC GAP PENALTIES are used to increase the chances of a gap within a
+run (5 or more residues) of hydrophilic amino acids; these are likely to be
+loop or random coil regions where gaps are more common. The residues that are
+"considered" to be hydrophilic can be entered in HYDROPHILIC RESIDUES.
+
+GAP SEPARATION DISTANCE tries to decrease the chances of gaps being too close
+to each other. Gaps that are less than this distance apart are penalised more
+than other gaps. This does not prevent close gaps; it makes them less frequent,
+promoting a block-like appearance of the alignment.
+
+END GAP SEPARATION treats end gaps just like internal gaps for the purposes of
+avoiding gaps that are too close (set by GAP SEPARATION DISTANCE above). If you
+turn this off, end gaps will be ignored for this purpose. This is useful when
+you wish to align fragments where the end gaps are not biologically meaningful.
+
+
+>>HELP P <<
+                   Profile and Structure Alignments
+   
+By PROFILE ALIGNMENT, we mean alignment using existing alignments. Profile 
+alignments allow you to store alignments of your favourite sequences and add
+new sequences to them in small bunches at a time. A profile is simply an
+alignment of one or more sequences (e.g. an alignment output file from Clustal
+X). Each input can be a single sequence. One or both sets of input sequences
+may include secondary structure assignments or gap penalty masks to guide the
+alignment. 
+
+Make sure PROFILE ALIGNMENT MODE is selected, using the switch directly above
+the sequence display area. Then, use the ALIGNMENT menu to do profile and
+secondary structure alignments.
+
+The profiles can be in any of the allowed input formats with "-" characters
+used to specify gaps (except for GCG/MSF where "." is used).
+
+You have to load the 2 profiles by choosing FILE, LOAD PROFILE 1 and  LOAD
+PROFILE 2. Then ALIGNMENT, ALIGN PROFILE 2 TO PROFILE 1 will align the 2
+profiles to each other. Secondary structure masks in either profile can be used
+to guide the alignment. This option compares all the sequences in profile 1
+with all the sequences in profile 2 in order to build guide trees which will be
+used to calculate sequence weights, and select appropriate alignment parameters
+for the final profile alignment.
+
+You can skip the first stage (pairwise alignments; guide trees) by using old
+guide tree files (ALIGN PROFILES FROM GUIDE TREES). 
+
+The ALIGN SEQUENCES TO PROFILE 1 option will take the sequences in the second
+profile and align them to the first profile, 1 at a time.  This is useful to
+add some new sequences to an existing alignment, or to align a set of sequences
+to a known structure. In this case, the second profile set need not be
+pre-aligned.
+
+You can skip the first stage (pairwise alignments; guide tree) by using an old
+guide tree file (ALIGN SEQUENCES TO PROFILE 1 FROM TREE). 
+
+SAVE LOG FILE will write the alignment calculation scores to a file. The log
+filename is the same as the input sequence filename, with an extension .log
+appended.
+
+The alignment parameters can be set using the ALIGNMENT PARAMETERS menu,
+Pairwise Parameters, Multiple Parameters and Protein Gap Parameters options.
+These are EXACTLY the same parameters as used by the general, automatic
+multiple alignment procedure. The general multiple alignment procedure is
+simply a series of profile alignments. Carrying out a series of profile
+alignments on larger and larger groups of sequences, allows you to manually
+build up a complete alignment, if necessary editing intermediate alignments.
+
+<STRONG>
+SECONDARY STRUCTURE PARAMETERS
+</STRONG>
+
+Use this menu to set secondary structure options. If a solved structure is
+known, it can be used to guide the alignment by raising gap penalties within
+secondary structure elements, so that gaps will preferentially be inserted into
+unstructured surface loop regions. Alternatively, a user-specified gap penalty
+mask can be supplied for a similar purpose.
+
+A gap penalty mask is a series of numbers between 1 and 9, one per position in 
+the alignment. Each number specifies how much the gap opening penalty is to be 
+raised at that position (raised by multiplying the basic gap opening penalty
+by the number) i.e. a mask figure of 1 at a position means no change
+in gap opening penalty; a figure of 4 means that the gap opening penalty is
+four times greater at that position, making gaps 4 times harder to open.
+
+The format for gap penalty masks and secondary structure masks is explained in
+a separate help section.
+
+>>HELP B << 
+            Secondary Structure / Gap Penalty Masks
+
+The use of secondary structure-based penalties has been shown to improve  the
+accuracy of sequence alignment. Clustal X now allows secondary structure/ gap
+penalty masks to be supplied with the input sequences used during profile
+alignment. (NB. The secondary structure information is NOT used during multiple
+sequence alignment). The masks work by raising gap penalties in specified
+regions (typically secondary structure elements) so that gaps are
+preferentially opened in the less well conserved regions (typically surface
+loops).
+
+The USE PROFILE 1(2) SECONDARY STRUCTURE / GAP PENALTY MASK options control
+whether the input 2D-structure information or gap penalty masks will be used
+during the profile alignment.
+
+The OUTPUT options control whether the secondary structure and gap penalty
+masks should be included in the Clustal X output alignments. Showing both is
+useful for understanding how the masks work. The 2D-structure information is
+itself useful in judging the alignment quality and in seeing how residue
+conservation patterns vary with secondary structure. 
+
+The HELIX and STRAND GAP PENALTY options provide the value for raising the gap
+penalty at core Alpha Helical (A) and Beta Strand (B) residues. In CLUSTAL
+format, capital residues denote the A and B core structure notation. Basic gap
+penalties are multiplied by the amount specified.
+
+The LOOP GAP PENALTY option provides the value for the gap penalty in Loops.
+By default this penalty is not raised. In CLUSTAL format, loops are specified
+by "." in the secondary structure notation.
+
+The SECONDARY STRUCTURE TERMINAL PENALTY provides the value for setting the gap
+penalty at the ends of secondary structures. Ends of secondary structures are
+known to grow or shrink, comparing related structures. Therefore by default
+these are given intermediate values, lower than the core penalties. All
+secondary structure read in as lower case in CLUSTAL format gets the reduced
+terminal penalty.
+
+The HELIX and STRAND TERMINAL POSITIONS options specify the range of structure
+termini for the intermediate penalties. In the alignment output, these are
+indicated as lower case. For Alpha Helices, by default, the range spans the 
+end-helical turn (3 residues). For Beta Strands, the default range spans the
+end residue and the adjacent loop residue, since sequence conservation often
+extends beyond the actual H-bonded Beta Strand.
+
+Clustal X can read the masks from SWISS-PROT, CLUSTAL or GDE format input
+files. For many 3-D protein structures, secondary structure information is
+recorded in the feature tables of SWISS-PROT database entries. You should
+always check that the assignments are correct - some are quite inaccurate.
+Clustal X looks for SWISS-PROT HELIX and STRAND assignments e.g.
+
+
+<PRE>
+FT   HELIX       100    115
+FT   STRAND      118    119
+</PRE>
+
+The structure and penalty masks can also be read from CLUSTAL alignment format 
+as comment lines beginning "!SS_" or "!GM_" e.g.
+
+<PRE>
+!SS_HBA_HUMA    ..aaaAAAAAAAAAAaaa.aaaAAAAAAAAAAaaaaaaAaaa.........aaaAAAAAA
+!GM_HBA_HUMA    112224444444444222122244444444442222224222111111111222444444
+HBA_HUMA        VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
+</PRE>
+
+Note that the mask itself is a set of numbers between 1 and 9 each of which is 
+assigned to the residue(s) in the same column below. 
+
+In GDE flat file format, the masks are specified as text and the names must
+begin with "SS_ or "GM_.
+
+Either a structure or penalty mask or both may be used. If both are included
+in an alignment, the user will be asked which is to be used.
+
+
+>>HELP T <<
+                            Phylogenetic Trees
+
+Before calculating a tree, you must have an ALIGNMENT in memory. This can be
+input using the FILE menu, LOAD SEQUENCES option or you should have just
+carried out a full multiple alignment and the alignment is still in memory.
+Remember YOU MUST ALIGN THE SEQUENCES FIRST!!!!
+
+The method used is the NJ (Neighbour Joining) method of Saitou and Nei. First
+you calculate distances (percent divergence) between all pairs of sequence from
+a multiple alignment; second you apply the NJ method to the distance matrix.
+
+To calculate a tree, use the DRAW N-J TREE option. This gives an UNROOTED tree
+and all branch lengths. The root of the tree can only be inferred by using an
+outgroup (a sequence that you are certain branches at the outside of the tree
+.... certain on biological grounds) OR if you assume a degree of constancy in
+the 'molecular clock', you can place the root in the 'middle' of the tree
+(roughly equidistant from all tips).
+
+BOOTSTRAP N-J TREE uses a method for deriving confidence values for the 
+groupings in a tree (first adapted for trees by Joe Felsenstein). It involves
+making N random samples of sites from the alignment (N should be LARGE, e.g.
+500 - 1000); drawing N trees (1 from each sample) and counting how many times
+each grouping from the original tree occurs in the sample trees. You can set N
+using the NUMBER OF BOOTSTRAP TRIALS option in the BOOTSTRAP TREE window. In
+practice, you should use a large number of bootstrap replicates (1000 is
+recommended, even if it means running the program for an hour on a slow 
+computer). You can also supply a seed number for the random number generator
+here. Different runs with the same seed will give the same answer. See the
+documentation for more details.
+
+EXCLUDE POSITIONS WITH GAPS? With this option, any alignment positions where
+ANY of the sequences have a gap will be ignored. This means that 'like' will
+be compared to 'like' in all distances, which is highly desirable. It also
+automatically throws away the most ambiguous parts of the alignment, which are
+concentrated around gaps (usually). The disadvantage is that you may throw away
+much of the data if there are many gaps (which is why it is difficult for us to
+make it the default).  
+
+CORRECT FOR MULTIPLE SUBSTITUTIONS? For small divergence (say <10%) this option
+makes no difference. For greater divergence, this option corrects for the fact
+that observed distances underestimate actual evolutionary distances. This is
+because, as sequences diverge, more than one substitution will happen at many
+sites. However, you only see one difference when you look at the present day
+sequences. Therefore, this option has the effect of stretching branch lengths
+in trees (especially long branches). The corrections used here (for DNA or
+proteins) are both due to Motoo Kimura. See the documentation for details.  
+
+Where possible, this option should be used. However, for VERY divergent
+sequences, the distances cannot be reliably corrected. You will be warned if
+this happens. Even if none of the distances in a data set exceed the reliable
+threshold, if you bootstrap the data, some of the bootstrap distances may
+randomly exceed the safe limit.  
+
+SAVE LOG FILE will write the tree calculation scores to a file. The log
+filename is the same as the input sequence filename, with an extension .log
+appended.
+
+<H4>
+OUTPUT FORMAT OPTIONS
+</H4>
+
+Three different formats are allowed. None of these displays the tree visually.
+You can display the tree using the NJPLOT program distributed with Clustal X
+OR get the PHYLIP package and use the tree drawing facilities there. 
+ 
+1) CLUSTAL FORMAT TREE. This format is verbose and lists all of the distances
+between the sequences and the number of alignment positions used for each. The
+tree is described at the end of the file. It lists the sequences that are
+joined at each alignment step and the branch lengths. After two sequences are
+joined, it is referred to later as a NODE. The number of a NODE is the number
+of the lowest sequence in that NODE.   
+
+2) PHYLIP FORMAT TREE. This format is the New Hampshire format, used by many
+phylogenetic analysis packages. It consists of a series of nested parentheses,
+describing the branching order, with the sequence names and branch lengths. It
+can be read by the NJPLOT program distributed with ClustalX. It can also be
+used by the RETREE, DRAWGRAM and DRAWTREE programs of the PHYLIP package to see
+the trees graphically. This is the same format used during multiple alignment
+for the guide trees. Some other packages that can read and display New
+Hampshire format are TreeTool, TreeView, and Phylowin.
+
+3) PHYLIP DISTANCE MATRIX. This format just outputs a matrix of all the
+pairwise distances in a format that can be used by the PHYLIP package. It used
+to be useful when one could not produce distances from protein sequences in the
+Phylip package but is now redundant (PROTDIST of Phylip 3.5 now does this).
+
+4) NEXUS FORMAT TREE. This format is used by several popular phylogeny programs,
+including PAUP and MacClade. The format is described fully in:
+Maddison, D. R., D. L. Swofford and W. P. Maddison.  1997.
+NEXUS: an extensible file format for systematic information.
+Systematic Biology 46:590-621.
+
+BOOTSTRAP LABELS ON: By default, the bootstrap values are correctly placed on
+the tree branches of the phylip format output tree. The toggle allows them to
+be placed on the nodes, which is incorrect, but some display packages (e.g.
+TreeTool, TreeView and Phylowin) only support node labelling but not branch
+labelling. Care should be taken to note which branches and labels go together. 
+
+
+>>HELP C <<
+                               Colors
+
+Clustal X provides a versatile coloring scheme for the sequence alignment 
+display. The sequences (or profiles) are colored automatically, when they are
+loaded. Sequences can be colored either by assigning a color to specific
+residues, or on the basis of an alignment consensus. In the latter case, the
+alignment consensus is calculated automatically, and the residues in each
+column are colored according to the consensus character assigned to that
+column. In this way, you can choose to highlight, for example, conserved
+hydrophylic or hydrophobic positions in the alignment.
+
+The 'rules' used to color the alignment are specified in a COLOR PARAMETER
+FILE. Clustal X automatically looks for a file called 'colprot.par' for protein
+sequences or 'coldna.par' for DNA, in the current directory. (If your running
+under UNIX, it then looks in your home directory, and finally in the
+directories in your PATH environment variable).
+
+By default, if no color parameter file is found, protein sequences are colored
+by residue as follows:
+
+<PRE>
+	Color			Residue Code
+
+	ORANGE			GPST
+	RED			HKR
+	BLUE			FWY
+	GREEN			ILMV
+</PRE>
+
+In the case of DNA sequences, the default colors are as follows:
+
+<PRE>
+	Color			Residue Code
+
+	ORANGE			A
+	RED			C
+	BLUE			T
+	GREEN			G
+</PRE>
+
+
+The default BACKGROUND COLORING option shows the sequence residues using a
+black character on a colored background. It can be switched off to show
+residues as a colored character on a white background. 
+
+Either BLACK AND WHITE or DEFAULT COLOR options can be selected. The Color
+option looks first for the color parameter file (as described above) and, if no
+file is found, uses the default residue-specific colors.
+
+You can specify your own coloring scheme by using the LOAD COLOR PARAMETER FILE
+option. The format of the color parameter file is described below.
+
+<H4>
+COLOR PARAMETER FILE
+</H4>
+
+This file is divided into 3 sections:
+
+1) the names and rgb values of the colors
+2) the rules for calculating the consensus
+3) the rules for assigning colors to the residues
+ 
+An example file is given here.
+
+<PRE>
+ --------------------------------------------------------------------
+ at rgbindex
+RED          0.9 0.1 0.1
+BLUE         0.1 0.1 0.9
+GREEN        0.1 0.9 0.1
+YELLOW       0.9 0.9 0.0
+
+ at consensus
+% = 60% w:l:v:i:m:a:f:c:y:h:p
+# = 80% w:l:v:i:m:a:f:c:y:h:p
+- = 50% e:d
++ = 60% k:r
+q = 50% q:e
+p = 50% p
+n = 50% n
+t = 50% t:s
+
+ at color
+g = RED
+p = YELLOW
+t = GREEN if t:%:#
+n = GREEN if n
+w = BLUE if %:#:p
+k = RED if +
+ --------------------------------------------------------------------
+</PRE>
+
+The first section is optional and is identified by the header @rgbindex. If
+this section exists, each color used in the file must be named and the rgb
+values specified (on a scale from 0 to 1). If the rgb index section is not
+found, the following set of hard-coded colors will be used.
+
+<PRE>
+RED          0.9 0.1 0.1
+BLUE         0.1 0.1 0.9
+GREEN        0.1 0.9 0.1
+ORANGE       0.9 0.7 0.3
+CYAN         0.1 0.9 0.9
+PINK         0.9 0.5 0.5
+MAGENTA      0.9 0.1 0.9
+YELLOW       0.9 0.9 0.0
+</PRE>
+
+The second section is optional and is identified by the header @consensus. It
+defines how the consensus is calculated.
+ 
+The format of each consensus parameter is:-
+ 
+<PRE>
+c = n% residue_list
+ 
+        where
+              c             is a character used to identify the parameter.
+              n             is an integer value used as the percentage cutoff
+                            point.
+              residue_list  is a list of residues denoted by a single
+                            character, delimited by a colon (:).
+</PRE>
+ 
+For example:   # = 60% w:l:v:i
+
+will assign a consensus character # to any column in the alignment which
+contains more than 60% of the residues w,l,v and i.
+        
+ 
+The third section is identified by the header @color, and defines how colors
+are assigned to each residue in the alignment.
+ 
+The color parameters can take one of two formats:
+
+<PRE>
+1) r = color
+2) r = color if consensus_list
+ 
+        where
+              r             is a character used to denote a residue.
+              color         is one of the colors in the GDE color lookup table.
+              residue_list  is a list of residues denoted by a single
+                            character, delimited by a colon (:).
+</PRE>
+ 
+Examples:
+1) g = ORANGE
+
+will color all glycines ORANGE, regardless of the consensus.
+
+2) w = BLUE if w:%:#
+
+will color BLUE any tryptophan which is found in a column with a consensus of
+w, % or #.
+ 
+
+>>HELP Q <<
+                       Alignment Quality Analysis
+
+<H3>
+QUALITY SCORES
+</H3>
+--------------
+
+Clustal X provides an indication of the quality of an alignment by plotting
+a 'conservation score' for each column of the alignment. A high score indicates
+a well-conserved column; a low score indicates low conservation. The quality
+curve is drawn below the alignment.
+
+Two methods are also provided to indicate single residues or sequence segments
+which score badly in the alignment.
+ 
+Low-scoring residues are expected to occur at a moderate frequency in all the
+sequences because of their steady divergence due to the natural processes of
+evolution. The most divergent sequences are likely to have the most outliers.
+However, the highlighted residues are especially useful in pointing to
+sequence misalignments. Note that clustering of highlighted residues is a
+strong indication of misalignment. This can arise due to various reasons, for
+example:
+ 
+        1. Partial or total misalignments caused by a failure in the
+        alignment algorithm. Usually only in difficult alignment cases.
+ 
+        2. Partial or total misalignments because at least one of the
+        sequences in the given set is partly or completely unrelated to the
+        other sequences. It is up to the user to check that the set of
+        sequences are alignable.
+
+        3. Frameshift translation errors in a protein sequence causing local
+        mismatched regions to be heavily highlighted. These are surprisingly
+        common in database entries. If suspected, a 3-frame translation of
+        the source DNA needs to be examined.
+ 
+Occasionally, highlighted residues may point to regions of some biological
+significance. This might happen for example if a protein alignment contains a
+sequence which has acquired new functions relative to the main sequence set. It
+is important to exclude other explanations, such as error or the natural
+divergence of sequences, before invoking a biological explanation.
+
+
+<H3>
+LOW-SCORING SEGMENTS
+</H3>
+--------------------
+
+Unreliable regions in the alignment can be highlighted using the Low-Scoring
+Segments option. A sequence-weighted profile is used to indicate any segments
+in the sequences which score badly. Because the profile calculation may take
+some time, an option is provided to calculate LOW-SCORING SEGMENTS. The 
+segment display can then be toggled on or off without having to repeat the
+time-consuming calculations.
+
+For details of the low-scoring segment calculation, see the CALCULATION section
+below.
+
+
+<H4>
+LOW-SCORING SEGMENT PARAMETERS
+</H4>
+------------------------------
+
+MINIMUM LENGTH OF SEGMENTS: short segments (or even single residues) can be
+hidden by increasing the minimum length of segments which will be displayed.
+
+DNA MARKING SCALE is used to remove less significant segments from the 
+highlighted display. Increase the scale to display more segments; decrease the
+scale to remove the least significant.
+
+
+PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of each
+amino acid to each other. The matrix is used to calculate the sequence-
+weighted profile scores. There are four 'in-built' Log-Odds matrices offered:
+the Gonnet PAM 80, 120, 250, 350 matrices. A more stringent matrix which only
+gives a high score to identities and the most favoured conservative
+substitutions, may be more suitable when the sequences are closely related. For
+more divergent sequences, it is appropriate to use "softer" matrices which give
+a high score to many other frequent substitutions. This  option automatically
+recalculates the low-scoring segments.
+
+
+DNA WEIGHT MATRIX: Two hard-coded matrices are available:
+
+1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
+of nucleic acid sequences. X's and N's are treated as matches to any IUB
+ambiguity symbol. All matches score 1.0; all mismatches for IUB symbols score
+0.9.
+
+2) CLUSTALW(1.6). The previous system used by ClustalW, in which matches score
+1.0 and mismatches score 0. All matches for IUB symbols also score 0. 
+
+A new matrix can be read from a file on disk, if the filename consists only
+of lower case characters. The values in the new weight matrix should be
+similarities and should be NEGATIVE for infrequent substitutions.
+ 
+INPUT FORMAT. The format used for a new matrix is the same as the BLAST
+program. Any lines beginning with a # character are assumed to be comments. The
+first non-comment line should contain a list of amino acids in any order, using
+the 1 letter code, followed by a * character. This should be followed by a
+square matrix of scores, with one row and one column for each amino acid. The
+last row and column of the matrix (corresponding to the * character) contain
+the minimum score over the whole matrix.
+
+<H4>
+QUALITY SCORE PARAMETERS
+</H4>
+------------------------
+
+You can customise the column 'quality scores' plotted underneath the alignment
+display using the following options.
+
+SCORE PLOT SCALE: this is a scalar value from 1 to 10, which can be used to
+change the scale of the quality score plot. 
+
+RESIDUE EXCEPTION CUTOFF: this is a scalar value from 1 to 10, which can be
+used to change the number of residue exceptions which are highlighted in the
+alignment display. (For an explanation of this cutoff, see the CALCULATION OF
+RESIDUE EXCEPTIONS section below.)
+
+PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of 
+each amino acid to each other. 
+ 
+DNA WEIGHT MATRIX: two hard-coded matrices are available: IUB and CLUSTALW(1.6).
+
+For more information about the weight matrices, see the help above for
+the Low-scoring Segments Weight Matrix.
+
+For details of the quality score calculations, see the CALCULATION section
+below.
+
+
+<STRONG>
+SHOW LOW-SCORING SEGMENTS
+</STRONG>
+                       
+The low-scoring segment display can be toggled on or off. This option does not
+recalculate the profile scores.
+
+
+<STRONG>
+SHOW EXCEPTIONAL RESIDUES
+</STRONG>
+                       
+This option highlights individual residues which score badly in the alignment
+quality calculations. Residues which score exceptionally low are highlighted by
+using a white character on a grey background.
+
+<STRONG>
+SAVE QUALITY SCORES TO FILE
+</STRONG>
+
+The quality scores that are plotted underneath the alignment display can also
+be saved in a text file. Each column in the alignment is written on one line in
+the output file, with the value of the quality score at the end of the line.
+Only the sequences currently selected in the display are written to the file.
+One use for quality scores is to color residues in a protein structure by
+sequence conservation. In this way conserved surface residues can be
+highlighted to locate functional regions such as ligand-binding sites.
+
+
+<H3>
+CALCULATION OF QUALITY SCORES
+</H3>
+-----------------------------
+
+Suppose we have an alignment of m sequences of length n. Then, the alignment
+can be written as:
+
+<PRE>
+        A11 A12 A13 .......... A1n
+        A21 A22 A23 .......... A2n
+        .
+        .
+        Am1 Am2 Am3 .......... Amn
+</PRE>
+
+We also have a residue comparison matrix of size R where C(i,j) is the score
+for aligning residue i with residue j.
+
+We want to calculate a score for the conservation of the jth position in the
+alignment.
+
+To do this, we define an R-dimensional sequence space. For the jth position in 
+the alignment, each sequence consists of a single residue which is assigned a
+point S in the space. S has R dimensions, and for sequence i, the rth dimension
+is defined as:
+
+<PRE>
+	Sr =    C(r,Aij)
+</PRE>
+
+We then calculate a consensus value for the jth position in the alignment. This
+value X also has R dimensions, and the rth dimension is defined as:
+
+<PRE>
+	Xr = (   SUM   (Fij * C(i,r)) ) / m
+               1<=i<=R
+</PRE>
+
+where Fij is the count of residues i at position j in the alignment.
+
+Now we can calculate the distance Di between each sequence i and the consensus 
+position X in the R-dimensional space.
+
+<PRE>
+	Di = SQRT   (   SUM   (Xr - Sr)(Xr - Sr) )
+                      1<=i<=R
+
+</PRE>
+
+The quality score for the jth position in the alignment is defined as the mean
+of the sequence distances Di.
+
+The score is normalised by multiplying by the percentage of sequences which
+have residues (and not gaps) at this position.
+
+<H3>
+CALCULATION OF RESIDUE EXCEPTIONS
+</H3>
+---------------------------------
+
+The jth residue of the ith sequence is considered as an exception if the
+distance Di of the sequence from the consensus value P is greater than (Upper
+Quartile + Inter Quartile Range * Cutoff). The value used as a cutoff for
+displaying exceptions can be set from the SCORE PARAMETERS menu. A high cutoff
+value will only display very significant exceptions; a low value will allow
+more, less significant, exceptions to be highlighted.
+
+(NB. Sequences which contain gaps at this position are not included in the
+exception calculation.)
+
+
+<H3>
+CALCULATION OF LOW-SCORING SEGMENTS
+</H3>
+-----------------------------------
+
+Suppose we have an alignment of m sequences of length n. Then, the alignment
+can be written as:
+
+<PRE>
+        A11 A12 A13 .......... A1n
+        A21 A22 A23 .......... A2n
+        .
+        .
+        Am1 Am2 Am3 .......... Amn
+</PRE>
+
+We also have a residue comparison matrix of size R where C(i,j) is the score
+for aligning residue i with residue j.
+
+We calculate sequence weights by building a neighbour-joining tree, in which
+branch lengths are proportional to divergence. Summing the branches by branch
+ownership provides the weights. See (Thompson et al., CABIOS, 10, 19 (1994) and
+Henikoff et al.,JMB, 243, 574 1994).
+
+To find the low-scoring segments in a sequence Si, we build a weighted profile
+of the remaining sequences in the alignment. Suppose we find residue r at 
+position j in the sequence; then the score for the jth position in the sequence
+is defined as
+
+<PRE>
+	Score(Si,j) = Profile(j,r)   where Profile(j,r) is the profile score
+                                       for residue r at position j in the
+                                       alignment.
+</PRE>
+
+These residue scores are summed along the sequence in both forward and backward
+directions. If the sum of the scores is positive, then it is reset to zero.
+Segments which score negatively in both directions are considered as 
+'low-scoring' and will be highlighted in the alignment display.
+
+
+>>HELP 9 <<
+              Command Line Parameters
+
+                DATA (sequences)
+
+-INFILE=file.ext                             :input sequences
+-PROFILE1=file.ext  and  -PROFILE2=file.ext  :profiles (aligned sequences)
+
+
+                VERBS (do things)
+
+-OPTIONS	    :list the command line parameters
+-HELP  or -CHECK    :outline the command line parameters
+-ALIGN              :do full multiple alignment 
+-TREE               :calculate NJ tree
+-BOOTSTRAP(=n)      :bootstrap a NJ tree (n= number of bootstraps; def. = 1000)
+-CONVERT            :output the input sequences in a different file format
+
+
+                PARAMETERS (set things)
+
+***General settings:****
+-INTERACTIVE :read command line, then enter normal interactive menus
+-QUICKTREE   :use FAST algorithm for the alignment guide tree
+-TYPE=       :PROTEIN or DNA sequences
+-NEGATIVE    :protein alignment with negative values in matrix
+-OUTFILE=    :sequence alignment file name
+-OUTPUT=     :GCG, GDE, PHYLIP, PIR or NEXUS
+-OUTORDER=   :INPUT or ALIGNED
+-CASE=       :LOWER or UPPER (for GDE output only)
+-SEQNOS=     :OFF or ON (for Clustal output only)
+
+
+***Fast Pairwise Alignments:***
+-KTUPLE=n      :word size
+-TOPDIAGS=n  :number of best diags.
+-WINDOW=n    :window around best diags.
+-PAIRGAP=n   :gap penalty
+-SCORE=      :PERCENT or ABSOLUTE
+
+
+***Slow Pairwise Alignments:***
+-PWMATRIX=    :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
+-PWDNAMATRIX= :DNA weight matrix=IUB, CLUSTALW or filename
+-PWGAPOPEN=f  :gap opening penalty
+-PWGAPEXT=f  :gap opening penalty
+ 
+
+***Multiple Alignments:***
+-NEWTREE=    :file for new guide tree
+-USETREE=    :file for old guide tree
+-MATRIX=     :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
+-DNAMATRIX=  :DNA weight matrix=IUB, CLUSTALW or filename
+-GAPOPEN=f   :gap opening penalty
+-GAPEXT=f  :gap extension penalty
+-ENDGAPS     :no end gap separation pen.
+-GAPDIST=n   :gap separation pen. range
+-NOPGAP      :residue-specific gaps off
+-NOHGAP    :hydrophilic gaps off
+-HGAPRESIDUES= :list hydrophilic res.
+-MAXDIV=n    :% ident. for delay
+-TYPE=       :PROTEIN or DNA
+-TRANSWEIGHT=f :transitions weighting
+
+
+***Profile Alignments:***
+-PROFILE      :Merge two alignments by profile alignment
+-NEWTREE1=    :file for new guide tree for profile1
+-NEWTREE2=    :file for new guide tree for profile2
+-USETREE1=    :file for old guide tree for profile1
+-USETREE2=    :file for old guide tree for profile2
+
+
+***Sequence to Profile Alignments:***
+-SEQUENCES   :Sequentially add profile2 sequences to profile1 alignment
+-NEWTREE=    :file for new guide tree
+-USETREE=    :file for old guide tree
+
+
+***Structure Alignments:***
+-NOSECSTR1     :do not use secondary structure/gap penalty mask for profile 1 
+-NOSECSTR2     :do not use secondary structure/gap penalty mask for profile 2
+-SECSTROUT=STRUCTURE or MASK or BOTH or NONE  :output in alignment file
+-HELIXGAP=n    :gap penalty for helix core residues 
+-STRANDGAP=n   :gap penalty for strand core residues
+-LOOPGAP=n     :gap penalty for loop regions
+-TERMINALGAP=n :gap penalty for structure termini
+-HELIXENDIN=n  :number of residues inside helix to be treated as terminal
+-HELIXENDOUT=n :number of residues outside helix to be treated as terminal
+-STRANDENDIN=n :number of residues inside strand to be treated as terminal
+-STRANDENDOUT=n:number of residues outside strand to be treated as terminal 
+
+
+***Trees:***
+-OUTPUTTREE=nj OR phylip OR dist OR nexus
+-SEED=n    :seed number for bootstraps
+-KIMURA      :use Kimura's correction
+-TOSSGAPS  :ignore positions with gaps
+-BOOTLABELS=node OR branch :position of bootstrap values in tree display
+
+
+>>HELP R <<
+                             References
+
+<STRONG>
+The ClustalX program is described in the manuscript:
+</STRONG>
+
+Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
+The ClustalX windows interface: flexible strategies for multiple sequence 
+alignment aided by quality analysis tools. Nucleic Acids Research, 24:4876-4882.
+
+
+<STRONG>
+The ClustalW program is described in the manuscript:
+</STRONG>
+
+Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
+sensitivity of progressive multiple sequence alignment through sequence
+weighting, positions-specific gap penalties and weight matrix choice.  Nucleic
+Acids Research, 22:4673-4680.
+
+
+<STRONG>
+The ClustalV program is described in the manuscript:
+</STRONG>
+
+Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for
+multiple sequence alignment. CABIOS 8,189-191.
+
+
+<STRONG>
+The original Clustal program is described in the manuscripts:
+</STRONG>
+
+Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence
+alignments on a microcomputer.
+CABIOS 5,151-153.
+
+Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple
+sequence alignment on a microcomputer. Gene 73,237-244.
+
+-------------------------------------------------------------------------------
+<STRONG>
+Some tips on using Clustal X:
+</STRONG>
+
+Jeanmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998)
+Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.
+
+<STRONG>
+Some tips on using Clustal W:
+</STRONG>
+
+Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for
+multiple sequence alignments. Methods Enzymol., 266, 383-402.
+
+-------------------------------------------------------------------------------
+<STRONG>
+You can get the latest version of the ClustalX program by anonymous ftp to:
+</STRONG>
+
+ftp-igbmc.u-strasbg.fr
+ftp.embl-heidelberg.de
+ftp.ebi.ac.uk
+
+<STRONG>
+Or, have a look at the following WWW site:
+</STRONG>
+
+http://www-igbmc.u-strasbg.fr/BioInfo/
+
+
 This is the on-line help file for Clustal X (version 1.83), using the NCBI
 Vibrant Toolkit.   
 

Added: trunk/packages/clustalw/trunk/debian/README.debian
===================================================================
--- trunk/packages/clustalw/trunk/debian/README.debian	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/README.debian	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,39 @@
+ClustalW for DEBIAN
+-------------------
+
+Comments regarding the Package
+
+I decided to continue packaging the program which was orphaned because
+I needed it for packageing Arb.
+
+I removed the binary versions of njplot inside the source package because
+they just bloat the source file with unused stuff.
+Moreover I included some documentation of the clustalx GUI from version
+1.81 and builded clustalx in an extra package.
+
+For further information you might read 
+ http://bioinformatics.org/software/mollin_entry.php3?name=clustalw
+
+Please report any problem with the package to the Debian bug tracking
+system.
+
+Andreas Tille <tille at debian.org>,  Mon, 29 Oct 2001 21:40:20 +0100
+
+
+I am assisting Andreas in getting a new upstream version into Debian.
+ClustalX is apparently a superset of the source of ClustalW. However,
+to our surprise it is only distributed together with a binary. The
+source of ClustalW is distributed without binaries, but does not
+contain extras for ClustalX.
+
+In violation of the Debian Policy we therefore decided to remove the
+binaries from the ClustalX distribution (we chose the binaries for Sun
+just by chance) and refer to the remaining source code as the original
+distribution.
+
+Earlier maintainers seem to already have supported the concept of such a
+"virtual" original tar file. The debian/rules provide the get-orig-source
+script for an automated creation of the tar file as used for the diff.
+
+Steffen Moeller <moeller at pzr.uni-rostock.de> Mon, 30 Jun 2003 21:08:02 +0100
+

Added: trunk/packages/clustalw/trunk/debian/changelog
===================================================================
--- trunk/packages/clustalw/trunk/debian/changelog	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/changelog	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,134 @@
+clustalw (1.83-1.2) unstable; urgency=high
+
+  * Non-maintainer upload.
+  * No changes, let's try to trigger again the builders.
+
+ -- Fabio Tranchitella <kobold at debian.org>  Fri, 10 Nov 2006 09:16:45 +0100
+
+clustalw (1.83-1.1) unstable; urgency=low
+
+  * Non-maintainer upload.
+  * lesstif1 is deprecated, transition to lesstif2 (Closes: #374241)
+  * Dead link in the package description (Closes: #359712)
+  * Updated standards version, binary-indep rule required
+
+ -- Kai Hendry <hendry at iki.fi>  Mon, 10 Jul 2006 11:04:05 +0900
+
+clustalw (1.83-1) unstable; urgency=low
+
+  * New upstream version
+  * New maintainer
+  * Reapplied patches from 1.82-3
+  * Removed calls to insecure gets
+  * Cosmetics to makefile
+  * Extension of description
+  * Standards-Version: 3.5.10
+  * Increased version of debhelper: Build-Depends: debhelper (>= 4)
+  * Used debian/compat for debhelper version
+  * Replaces *.files by *.install, *.docs, *.manpages and used the
+    apropriate tools in debian/rules
+  * Moved clustalx to /usr/bin because it does not *belong* to the
+    X11 system
+  * Moved the manpage clustalx.1x to clustalx.1 accordingly.
+  * Removed debian/dirs
+  * Added menu entries
+
+ -- Steffen Moeller <moeller at pzr.uni-rostock.de>  Mon, 23 Jun 2003 19:26:29 +0100
+
+clustalw (1.82-3) unstable; urgency=low
+
+  * "My-wifes-Katrin-Birthday"-release (even if I doubt that she is very
+    interested in Debian packaging details)
+  * Fix a buffer override
+    closes: #135255
+
+ -- Andreas Tille <tille at debian.org>  Wed, 27 Feb 2002 07:26:29 +0100
+
+clustalw (1.82-2) unstable; urgency=low
+
+  * also compile GUI clustalx which is included in the sources now
+
+ -- Andreas Tille <tille at debian.org>  Sat,  3 Nov 2001 14:41:50 +0100
+
+clustalw (1.82-1) unstable; urgency=low
+
+  * New maintainer
+    closes: #100213
+  * New upstream version
+    closes: #40904
+  * Standards-Version: 3.5.6
+  * Moved help file to /usr/share/clustalw/clustalw_help because it
+    is architecture independend
+  * Added URL to the package description because I consider this as
+    "good style" to have an upstream link without installing the package
+  * Fixed a typo in the examples Makefile which was shiped with the
+    Debian package:
+     clustalw -profile -profile1=nuc.aln -profile2=nuc2.aln -outfile=profile.aln
+                                                      ^ this 2 was missing
+
+ -- Andreas Tille <tille at debian.org>  Mon, 29 Oct 2001 21:40:20 +0100
+
+clustalw (1.7-9) unstable; urgency=low
+
+  * Maintainer set to Debian QA Group <packages at qa.debian.org>.
+
+ -- Adrian Bunk <bunk at fs.tum.de>  Fri, 24 Aug 2001 21:09:56 +0200
+
+clustalw (1.7-8) unstable; urgency=low
+
+  * Adopted by new maintainer; closes: #92790 
+  * Updated to latest standards version and added Build-Depends (changed
+    makefile, control, dirs, docs, and rules); closes: #91132, #91411
+  * Deleted empty README.Debian file.
+  * Corrected doc path in manpage.
+  * Moved package to section non-free/science, because it is a tool that
+    is exclusively useful for molecular biologists.
+
+ -- Dr. Guenter Bechly <gbechly at debian.org>  Fri, 20 Apr 2001 19:08:00 +0200
+
+clustalw (1.7-7) unstable; urgency=low
+
+  * Better extended description. Closes #31475.
+  * Manual page. Closes #31382.
+
+ -- Stephane Bortzmeyer <bortzmeyer at debian.org>  Wed, 31 Mar 1999 10:50:02 +0200
+
+clustalw (1.7-6) unstable; urgency=low
+
+  * Yet another attempt to get past rejection :-) Licence is now included
+    in the copyright file.
+
+ -- Stephane Bortzmeyer <bortzmeyer at debian.org>  Sat, 19 Dec 1998 15:29:40 +0100
+
+clustalw (1.7-5) unstable; urgency=low
+
+  * A LICENCE is now included, non-free unfortunately.
+
+ -- Stephane Bortzmeyer <bortzmeyer at debian.org>  Thu, 17 Dec 1998 14:48:43 +0100
+
+clustalw (1.7-4) unstable; urgency=low
+
+  * Compiles with -mieee on the Alpha: otherwise, floating exception since 
+    ClustalW divides by zero (yuck!)
+
+ -- Stephane Bortzmeyer <bortzmeyer at debian.org>  Wed,  2 Dec 1998 16:16:02 +0100
+
+clustalw (1.7-3) unstable; urgency=low
+
+  * Tests and samples added
+  * On line help file added 
+
+ -- Stephane Bortzmeyer <bortzmeyer at debian.org>  Tue,  1 Dec 1998 15:57:50 +0100
+
+clustalw (1.7-2) unstable; urgency=low
+
+  * Switch to dh_make
+  * First public release
+
+ -- Stephane Bortzmeyer <bortzmeyer at debian.org>  Tue,  1 Dec 1998 15:25:13 +0100
+
+clustalw (1.7-1) unstable; urgency=low
+
+  * Initial Release.
+
+ -- Stephane Bortzmeyer <bortzmeyer at pasteur.fr>  Fri, 28 Aug 1998 16:09:48 +0200

Added: trunk/packages/clustalw/trunk/debian/clustalw.docs
===================================================================
--- trunk/packages/clustalw/trunk/debian/clustalw.docs	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/clustalw.docs	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,4 @@
+README_W
+clustalv.doc
+clustalw.doc
+clustalw.ms

Added: trunk/packages/clustalw/trunk/debian/clustalw.examples
===================================================================
--- trunk/packages/clustalw/trunk/debian/clustalw.examples	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/clustalw.examples	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1 @@
+tests.clustalw

Added: trunk/packages/clustalw/trunk/debian/clustalw.install
===================================================================
--- trunk/packages/clustalw/trunk/debian/clustalw.install	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/clustalw.install	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,2 @@
+clustalw usr/bin
+clustalw_help usr/share/clustalw

Added: trunk/packages/clustalw/trunk/debian/clustalw.manpages
===================================================================
--- trunk/packages/clustalw/trunk/debian/clustalw.manpages	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/clustalw.manpages	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1 @@
+clustalw.1

Added: trunk/packages/clustalw/trunk/debian/clustalw.menu
===================================================================
--- trunk/packages/clustalw/trunk/debian/clustalw.menu	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/clustalw.menu	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,5 @@
+?package(clustalw):needs="text"\
+  section="Apps/Science"\
+  title="ClutalW"\
+  command="/usr/bin/clustalw"\
+  hints="Global multiple nucleotide or peptide sequence alignment"

Added: trunk/packages/clustalw/trunk/debian/clustalx.docs
===================================================================
--- trunk/packages/clustalw/trunk/debian/clustalx.docs	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/clustalx.docs	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,3 @@
+README_X
+clustalx.html
+

Added: trunk/packages/clustalw/trunk/debian/clustalx.install
===================================================================
--- trunk/packages/clustalw/trunk/debian/clustalx.install	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/clustalx.install	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,2 @@
+clustalx usr/bin
+clustalx_help usr/share/clustalw

Added: trunk/packages/clustalw/trunk/debian/clustalx.manpages
===================================================================
--- trunk/packages/clustalw/trunk/debian/clustalx.manpages	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/clustalx.manpages	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1 @@
+clustalx.1

Added: trunk/packages/clustalw/trunk/debian/clustalx.menu
===================================================================
--- trunk/packages/clustalw/trunk/debian/clustalx.menu	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/clustalx.menu	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,5 @@
+?package(clustalx):needs="X11" \
+    section="Apps/Science" \
+    title="ClustalX" \
+    command="/usr/bin/clustalx"\
+    hints="GUI for clustalw"

Added: trunk/packages/clustalw/trunk/debian/compat
===================================================================
--- trunk/packages/clustalw/trunk/debian/compat	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/compat	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1 @@
+4

Added: trunk/packages/clustalw/trunk/debian/control
===================================================================
--- trunk/packages/clustalw/trunk/debian/control	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/control	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,52 @@
+Source: clustalw
+Section: non-free/science
+Priority: optional
+Maintainer: Steffen Moeller <moeller at pzr.uni-rostock.de>
+Build-Depends: debhelper (>= 4), ncbi-tools6-dev, libvibrant6-dev, lesstif2-dev
+Standards-Version: 3.7.2
+
+Package: clustalw
+Architecture: any
+Depends: ${shlibs:Depends}
+Suggests: clustalx, seaview
+Description: [Biology] Global multiple nucleotide or peptide sequence alignment
+ This program performs an alignment of multiple nucleotide or amino
+ acid sequences. It recognizes the format of input sequences and whether
+ the sequences are nucleic acid (DNA/RNA) or amino acid (proteins). The
+ output format may be selected from in various formats for multiple alignments
+ such as Phylip or FASTA. ClustalW is very well accepted. An alternative,
+ particularly for fragmented sequences, may be
+ T-Coffee (http://www.ch.embnet.org/software/TCoffee.html)
+ DIALIGN (http://dialign.gobics.de/)
+ or one of the other many algorithms as summarised on
+ http://www.techfak.uni-bielefeld.de/bcd/Curric/MulAli/welcome.html.
+ .
+ The output of ClustalW can be edited manually but preferably with an alignment
+ editor like seaview or within its companion clustalx.
+ When building a model from your alignment, this can be applied for improved
+ database searches. The debian package hmmer creates such in form of an HMM. 
+ .
+  URL: ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalW/
+  URL: ftp://ftp.ebi.ac.uk/pub/software/unix/clustalw/
+
+Package: clustalx
+Architecture: any
+Depends: ${shlibs:Depends}, clustalw
+Description: [Biology] GUI for clustalw
+ GUI interface for the ClustalW multiple sequence alignment program. It
+ provides an integrated environment for performing multiple sequence
+ and profile alignments and analysing the results. The sequence alignment is
+ displayed in a window on the screen. A versatile coloring scheme has been
+ incorporated allowing you to highlight conserved features  in the alignment.
+ The pull-down menus at the top of the window allow you to select all the
+ options required for traditional multiple sequence and profile alignment.
+ .
+ You can cut-and-paste sequences to change the order of the alignment; you can
+ select a subset of sequences to be aligned; you can select a sub-range of the
+ alignment to be realigned and inserted back into the original alignment.
+ .
+ Alignment quality analysis can be performed and low-scoring segments or
+ exceptional residues can be highlighted.
+ .
+  URL: ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/
+  URL: ftp://ftp.ebi.ac.uk/pub/software/unix/clustalx/

Added: trunk/packages/clustalw/trunk/debian/copyright
===================================================================
--- trunk/packages/clustalw/trunk/debian/copyright	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/copyright	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,72 @@
+This package was debianized by Andreas Tille <tille at debian.org> on
+Sat, 27 Oct 2001 22:16:53 +0200
+
+It was downloaded from:
+
+       ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/  and
+       ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalW/
+
+while the source was merged to one common upstream source (see README.Debian)
+
+Authors: 
+Toby Gibson <Toby.Gibson at EMBL-Heidelberg.de>
+Julie Thompson <julie at titus.u-strasbg.fr>
+Des Higgins <d.higgins at ucc.ie>
+
+Copyright:
+
+Non-free. You cannot distribute it at will. 
+Licence included here:
+
+**********************
+LICENCE FOR CLUSTAL W
+**********************
+
+Clustal W (hereafter "the program") is copyright (c) 1994-1998 by Julie D.
+Thompson, Desmond G. Higgins and Toby J. Gibson.
+
+Permission is granted to copy, distribute and use the program provided no fee
+is charged for it and provided that this copyright and licence notice is not
+removed or altered.
+
+The full source code of the program is provided free. You should not
+distribute a modified version of the program without obtaining the permission
+of the authors. You must keep the original copyright and licence notice. You
+must also document clearly the modifications you have made. You must make
+clear that this is not the original version.
+
+Commercial distributors of Clustal W are requested to contact the Clustal W
+authors in order to take out a non-exclusive licence. See the README file
+included with Clustal W for a rationale.
+
+You should understand that this software is provided as-is. The authors make
+no claims towards its suitability for any purpose and accept absolutely no
+liability for any damages the program may cause. Use at your own risk.
+
+* End of licence
+
+
+
+Special authorization for Debian:
+
+
+
+From: "Toby Gibson" <Toby.Gibson at EMBL-Heidelberg.de>
+Date: Thu, 17 Dec 1998 14:37:02 +0100
+To: Stephane Bortzmeyer <bortzmeyer at debian.org>
+Subject: Re: Fwd: clustalw_1.7-4_i386.changes REJECTED
+
+Hi Stephane,
+
+Now that we have thought about it, I don't think we can meet your stricter free
+criterion. There are already several companies who bundle Clustal W in sequence
+analysis packages and so are effectively selling it. They have paid for
+non-exclusive licences even though anyone can get the program for free: but
+they must have a multiple alignment engine, so we might as well earn some money
+which we can put toward further development.
+
+I think the main thing is to allow the distribution at all by Debian. We seem
+to have reached this point.
+
+Please do include this licence in the Debian package and I hope the release can
+go smoothly from now on.

Added: trunk/packages/clustalw/trunk/debian/rules
===================================================================
--- trunk/packages/clustalw/trunk/debian/rules	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/debian/rules	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,62 @@
+#!/usr/bin/make -f
+
+# Uncomment this to turn on verbose mode. 
+#export DH_VERBOSE=1
+
+build: build-stamp
+build-stamp:
+	dh_testdir
+
+	$(MAKE)
+
+	touch build-stamp
+
+clean:
+	dh_testdir
+	dh_testroot
+	rm -f build-stamp 
+
+	-$(MAKE) distclean
+
+	dh_clean
+
+install:
+install: build
+	dh_testdir
+	dh_testroot
+	dh_clean -k
+	dh_installdirs
+
+	# Use dh_install instead of make install      
+	# $(MAKE) DESTDIR=`pwd`/debian/tmp install
+
+	dh_install
+
+# Build architecture-dependent files here.
+binary-arch: build install
+	dh_testdir
+	dh_testroot
+#	dh_installdebconf
+	dh_installdocs
+	dh_installexamples
+	dh_installmenu
+	dh_installman
+	dh_installchangelogs
+	dh_strip
+	dh_link
+	dh_compress
+	dh_fixperms
+#	dh_makeshlibs
+	dh_installdeb
+#	dh_perl
+	dh_shlibdeps
+	dh_gencontrol
+	dh_md5sums
+	dh_builddeb
+
+get-orig-source:
+	lynx --dump ftp://ftp.ebi.ac.uk/pub/software/unix/clustalx/clustalx1.83.sun.tar.gz | tar --delete clustalx1.83.sun/clustalx clustalx1.83.sun/clustalw  -f - | gzip -c > clustalw_1.83.orig.tar.gz
+
+binary: binary-arch
+binary-indep: # does nothing
+.PHONY: build clean binary-arch binary install


Property changes on: trunk/packages/clustalw/trunk/debian/rules
___________________________________________________________________
Name: svn:executable
   + *


Property changes on: trunk/packages/clustalw/trunk/globin.pep
___________________________________________________________________
Name: svn:executable
   - 
   + *

Modified: trunk/packages/clustalw/trunk/interface.c
===================================================================
--- trunk/packages/clustalw/trunk/interface.c	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/interface.c	2006-12-04 00:57:13 UTC (rev 166)
@@ -1223,8 +1223,7 @@
 			while(fgets(temp,MAXLINE+1,help_file)) {
 				if(strstr(temp, help_marker)){
 				  	if(usemenu) {
-						fprintf(stdout,"\n");
-				    		getstr("Press [RETURN] to continue",lin2);
+				    		getstr("\nPress [RETURN] to continue",MAXLINE+1,lin2);
 				  	}
 					fclose(help_file);
 					return;
@@ -1235,8 +1234,7 @@
 				}
 			       if(usemenu) {
 			          if(nlines >= PAGE_LEN) {
-				     	   fprintf(stdout,"\n");
-			 	  	   getstr("Press [RETURN] to continue or  X  to stop",lin2);
+			 	  	   getstr("\nPress [RETURN] to continue or  X  to stop",MAXLINE+1,lin2);
 				  	   if(toupper(*lin2) == 'X') {
 						   fclose(help_file);
 						   return;
@@ -1247,8 +1245,7 @@
 			       }
 			}
 			if(usemenu) {
-				fprintf(stdout,"\n");
-				getstr("Press [RETURN] to continue",lin2);
+				getstr("\nPress [RETURN] to continue",MAXLINE+1,lin2);
 			}
 			fclose(help_file);
 		}
@@ -1286,8 +1283,7 @@
                 fputs(temp,stdout);
                 ++nlines;
                 if(nlines >= PAGE_LEN) {
-                        fprintf(stdout,"\n");
-                        getstr("Press [RETURN] to continue or  X  to stop",lin2);
+                        getstr("\nPress [RETURN] to continue or  X  to stop",MAXLINE+1,lin2);
                         if(toupper(*lin2) == 'X') {
                                 fclose(file);
                                 return;
@@ -1297,8 +1293,7 @@
                 }
         }
         fclose(file);
-        fprintf(stdout,"\n");
-        getstr("Press [RETURN] to continue",lin2);
+        getstr("\nPress [RETURN] to continue",MAXLINE+1,lin2);
 }
 
 
@@ -1747,7 +1742,7 @@
         FILE *infile;
 
         if(usemenu)
-                getstr("Enter name of the matrix file",lin2);
+                getstr("Enter name of the matrix file",MAXLINE+1,lin2);
         else
                 strcpy(lin2,str);
 
@@ -1773,7 +1768,7 @@
         FILE *infile;
 
         if(usemenu)
-                getstr("Enter name of the matrix file",lin2);
+                getstr("Enter name of the matrix file",MAXLINE+1,lin2);
         else
                 strcpy(lin2,str);
 
@@ -2163,6 +2158,7 @@
  
 {	static char temp[FILENAMELEN+1];
 	static char local_prompt[MAXLINE];
+	static char local_prompt_tmp[MAXLINE+FILENAMELEN+1];
 	FILE * file_handle;
 
 /*	if (*file_name == EOS) {
@@ -2174,17 +2170,17 @@
 		warning("Output file name is the same as input file.");
 		if (usemenu) {
 			strcpy(local_prompt,"\n\nEnter new name to avoid overwriting ");
-			strcat(local_prompt," [%s]: ");          
-			fprintf(stdout,local_prompt,file_name);
-			gets(temp);
+			strcat(local_prompt," [%s]");          
+			sprintf(local_prompt_tmp,local_prompt,file_name);
+			getstr(local_prompt_tmp,FILENAMELEN+1,temp);
 			if(*temp != EOS) strcpy(file_name,temp);
 		}
 	}
 	else if (usemenu) {
 		strcpy(local_prompt,prompt);
-		strcat(local_prompt," [%s]: ");          
-		fprintf(stdout,local_prompt,file_name);
-		gets(temp);
+		strcat(local_prompt," [%s]");          
+		sprintf(local_prompt_tmp,local_prompt,file_name);
+		getstr(local_prompt_tmp,FILENAMELEN+1,temp);
 		if(*temp != EOS) strcpy(file_name,temp);
 	}
 
@@ -2260,7 +2256,7 @@
         	}
         	else {
                  	if((tree = open_output_file(
-                	"\nEnter name for new GUIDE TREE           file  ",path,
+                	"\nEnter name for new GUIDE TREE file ",path,
                 	phylip_name,"dnd")) == NULL) return;
         	}
 	}
@@ -2327,6 +2323,7 @@
 { 
 	char path[FILENAMELEN+1];
 	char tree_name[FILENAMELEN+1],temp[MAXLINE+1];
+	char tmp_msg[MAXLINE+1+300];
 	Boolean use_tree;
 	FILE *tree;
 	sint i,j,count;
@@ -2383,9 +2380,9 @@
         	if((tree=fopen(tree_name,"r"))!=NULL) {
 #endif
 		if (usemenu)
-            	fprintf(stdout,"\nUse the existing GUIDE TREE file,  %s  (y/n) ? [y]: ",
+            	sprintf(tmp_msg,"\nUse the existing GUIDE TREE file,  %s  (y/n) ? [y]",
                                            tree_name);
-                gets(temp);
+                getstr(tmp_msg,MAXLINE+1,temp);
                 if(*temp != 'n' && *temp != 'N') {
                     strcpy(phylip_name,tree_name);
                     use_tree = TRUE;
@@ -2584,6 +2581,7 @@
 void get_tree(char *phylip_name)
 {
 	char path[FILENAMELEN+1],temp[MAXLINE+1];
+	char tmp_msg[FILENAMELEN+300];
 	sint count;
 	
 	if(empty) {
@@ -2615,9 +2613,9 @@
        			strcpy(phylip_name,path);
        			strcat(phylip_name,"dnd");
 
-            fprintf(stdout,"\nEnter a name for the guide tree file [%s]: ",
+			sprintf(tmp_msg,"\nEnter a name for the guide tree file [%s]",
                                            phylip_name);
-                	gets(temp);
+                	getstr(tmp_msg,MAXLINE+1,temp);
                 	if(*temp != EOS)
                         	strcpy(phylip_name,temp);
         	}
@@ -2685,6 +2683,8 @@
 	char path[FILENAMELEN+1];
 	char tree_name[FILENAMELEN+1];
 	char temp[MAXLINE+1];
+	char tmp_msg[FILENAMELEN+300];
+
 	Boolean use_tree1,use_tree2;
 	FILE *tree;
 	sint count,i,j,dscore;
@@ -2717,9 +2717,9 @@
 #else
         	if((tree=fopen(tree_name,"r"))!=NULL) {
 #endif
-            	fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 1,  %s  (y/n) ? [y]: ",
+            	sprintf(tmp_msg,"\nUse the existing GUIDE TREE file for Profile 1,  %s  (y/n) ? [y]",
                                            tree_name);
-                gets(temp);
+                getstr(tmp_msg,MAXLINE+1,temp);
                 if(*temp != 'n' && *temp != 'N') {
                     strcpy(p1_tree_name,tree_name);
                     use_tree1 = TRUE;
@@ -2739,19 +2739,20 @@
 			strcpy(tree_name,path);
 			strcat(tree_name,"dnd");
 #ifdef VMS
-        	if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL) {
+			if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL)
 #else
-        	if((tree=fopen(tree_name,"r"))!=NULL) {
+			if((tree=fopen(tree_name,"r"))!=NULL)
 #endif
-            	fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 2,  %s  (y/n) ? [y]: ",
-                                           tree_name);
-                gets(temp);
-                if(*temp != 'n' && *temp != 'N') {
-                    strcpy(p2_tree_name,tree_name);
-                    use_tree2 = TRUE;
-                }
-                fclose(tree);
-        	}
+			{
+				sprintf(tmp_msg,"\nUse the existing GUIDE TREE file for Profile 2,  %s  (y/n) ? [y]",
+						   tree_name);
+				getstr(tmp_msg,MAXLINE+1,temp);
+				if(*temp != 'n' && *temp != 'N') {
+				    strcpy(p2_tree_name,tree_name);
+				    use_tree2 = TRUE;
+				}
+				fclose(tree);
+			}
 		}
 		else if (!usemenu && use_tree2_file) {
 			use_tree2 = TRUE;
@@ -4194,6 +4195,7 @@
 {
   char parname[FILENAMELEN+1], temp[FILENAMELEN+1];
   char path[FILENAMELEN+1];
+  char tmp_msg[FILENAMELEN+300];
   FILE *parout;
   
   get_path(seqname,path);
@@ -4201,9 +4203,9 @@
   strcat(parname,"par");
   
   if(usemenu) {
-    fprintf(stdout,"\nEnter a name for the parameter output file [%s]: ",
+    sprintf(tmp_msg,"\nEnter a name for the parameter output file [%s]",
 	    parname);
-    gets(temp);
+    getstr(tmp_msg,FILENAMELEN+1,temp);
     if(*temp != EOS)
       strcpy(parname,temp);
   }

Modified: trunk/packages/clustalw/trunk/makefile
===================================================================
--- trunk/packages/clustalw/trunk/makefile	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/makefile	2006-12-04 00:57:13 UTC (rev 166)
@@ -1,8 +1,16 @@
-install: clustalx clustalw
 
-clean:
-	rm *.o
+RM=/bin/rm -f
 
+BINDIR=$(DESTDIR)/usr/bin
+XBINDIR=$(DESTDIR)/usr/X11R6/bin
+DOCDIR=$(DESTDIR)/usr/share/doc/clustalw
+XDOCDIR=$(DESTDIR)/usr/share/doc/clustalx
+LIBDIR=$(DESTDIR)/usr/share/clustalw
+MANDIR=$(DESTDIR)/usr/share/man/man1
+XMANDIR=$(DESTDIR)/usr/X11R6/man/man1
+DOCS=clustalv.doc clustalw.doc clustalw.ms README_W
+XDOCS=README_X clustalx.html
+
 OBJECTS = interface.o sequence.o showpair.o malign.o \
   	util.o trees.o gcgcheck.o prfalign.o pairalign.o \
   	calcgapcoeff.o calcprf1.o calcprf2.o calctree.o \
@@ -12,26 +20,37 @@
 
 HEADERS = general.h clustalw.h
 
-CC	= cc
-CFLAGS  = -c -O
+CC	= gcc
+CFLAGS  = -c -O2
+
+MACHINE=$(shell uname -m)
+ifeq ("$(MACHINE)","alpha")
+ # -mieee is for the Alpha only: ClustalW divides by zero (yes, I know it's bad)
+ # and expect the processor to goes on. -mieee tells the Alpha to comply with
+ # the IEEE standard and to shut up about divisions by zero.
+ CFLAGS  +=  -mieee
+endif
+
 LFLAGS	= -O -lm 
-NCBI_INC  = /dec/biolo/ncbi/include
-NCBI_LIB	= /dec/biolo/ncbi/lib
-CXFLAGS  = -DWIN_MOTIF -I$(NCBI_INC)
-LXFLAGS	= -L$(NCBI_LIB) -lvibrant -lncbi -lpthread -lXm -lXmu -lXt -lX11 -lm 
+NCBI_INC= /usr/include/ncbi
+NCBI_LIB= /usr/lib
+CXFLAGS	= -DWIN_MOTIF -I$(NCBI_INC)
+LXFLAGS	= -L/usr/X11R6/lib -lvibrant -lncbi -lpthread -lXm -lXmu -lXt -lX11 -lm
 
-clustalw : $(OBJECTS) amenu.o clustalw.o
-	$(CC) -o $@ $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
+all: clustalx clustalw
 
-interface.o : interface.c $(HEADERS) param.h
-	$(CC) $(CFLAGS) $*.c
+machine:
+	echo $(MACHINE)
 
-amenu.o : amenu.c $(HEADERS) param.h
-	$(CC) $(CFLAGS) $*.c
+clustalw : $(OBJECTS) $(XOBJECTS) amenu.o clustalw.o
+	$(CC) -o $@ -I$(NCBI_INC) $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
 
 clustalx : $(OBJECTS) $(XOBJECTS) clustalx.o
-	$(CC) -o $@ $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS)
+	$(CC) -o $@ -I$(NCBI_INC) $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS)
 
+clustalw.o : clustalw.c $(HEADERS)
+	$(CC) $(CFLAGS) $*.c
+
 clustalx.o : clustalx.c $(HEADERS)
 	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
 
@@ -56,6 +75,25 @@
 trees.o : trees.c $(HEADERS) dayhoff.h
 	$(CC) $(CFLAGS) $*.c
 
-.c.o :
-	$(CC) $(CFLAGS) $?
 
+
+install: all
+	install -d $(BINDIR) $(XBINDIR) $(LIBDIR) $(DOCDIR)/examples $(MANDIR) $(XMANDIR) $(XDOCDIR)
+	install -m 0755 clustalw $(BINDIR)
+	install -m 0755 clustalx $(XBINDIR)
+	install -m 0644 clustalw_help clustalx_help $(LIBDIR)
+	install -m 0644 clustalw.1 $(MANDIR)
+	install -m 0644 clustalx.1 $(MANDIR)
+	install -m 0644 $(DOCS) $(DOCDIR)
+	install -m 0644 $(XDOCS) $(XDOCDIR)
+	cp -a -R tests.clustalw $(DOCDIR)/examples/tests
+
+.PHONY:	clean distclean
+
+clean:
+	$(RM) *.o
+
+distclean: clean
+	$(RM) clustalw clustalx
+	cd tests.clustalw; make clean
+

Modified: trunk/packages/clustalw/trunk/sequence.c
===================================================================
--- trunk/packages/clustalw/trunk/sequence.c	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/sequence.c	2006-12-04 00:57:13 UTC (rev 166)
@@ -924,7 +924,7 @@
 	static Boolean dnaflag1;
 	
 	if(usemenu)
-		getstr("Enter the name of the sequence file",line);
+		getstr("Enter the name of the sequence file",FILENAMELEN+1,line);
 	else
 		strcpy(line,seqname);
 	if(*line == EOS) return -1;

Added: trunk/packages/clustalw/trunk/tests.clustalw/Makefile
===================================================================
--- trunk/packages/clustalw/trunk/tests.clustalw/Makefile	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/tests.clustalw/Makefile	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,22 @@
+# WARNING: you should run this Makefile with the 
+# '-i' option since Clustalw's return codes are random...
+
+# Makefile, tests directory and samples by Catherine Letondal <letondal at pasteur.fr>
+
+test:	align tree profile
+
+align:	
+	clustalw -infile=nuc.data
+	clustalw -infile=nuc.data -output=PHYLIP
+	clustalw -infile=nuc.data -tree
+
+tree: align
+	clustalw -infile=nuc.data -usetree=nuc.dnd
+
+profile: align
+	clustalw -infile=nuc2.data
+	clustalw -profile -profile1=nuc.aln -profile2=nuc2.aln -outfile=profile.aln
+
+clean:
+	/bin/rm -f *.dnd *.aln *.ph *.phy
+

Added: trunk/packages/clustalw/trunk/tests.clustalw/nuc.data
===================================================================
--- trunk/packages/clustalw/trunk/tests.clustalw/nuc.data	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/tests.clustalw/nuc.data	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,22 @@
+>seq1              200 residues
+AAGCTTTATTTTTCCATCTCTTTTACATTCAATGATTCTATTCATTTGTAAGAGAAGTAA
+GCANNNNNNACTCCAACAGTGGAGAAAGTCAGACTGATTAAACAGGTNNACCAAACACGT
+AAATGTTTTTGNAGAAGGAAGGGNAACTCTGGTGAGCCCGCAGAGGTGCACAGGCTNNNT
+AGAGACAGCAGTGTCAGTCC
+>seq2              209 residues
+TATGCAGACAATTATGATTACATGGGGTAAAAATACTCCAAGATAGTATTCAGTTCTCAA
+GATGGTTCATCACATTTGCCAAGTTTTTCTACCATAAAATTCCCCACCAATCCATTTATG
+TATAGAGTTGATAGTTAACAGAGATCGGAAATGGCCATTGCATCTTTAGTCTGGATTTCC
+TGAGACCTTTGGTTTATTTCTAGGGCTGC
+>seq3                  121 residues
+GTCTACGGCCATACCACCCTGAACGCGCCCGATCTCGTCTGATCTCGGAAGCTAAGCAGG
+GTCGGGCCTGGTTAGTACTTGGATGGGAGACCGCCTGGGAATACCGGGTGCTGTAGGCTT
+T
+>seq4               120 residues
+NNNNATACTCTGGTTTCTCTTCAGATCGTATAAATCTTTCCCCTTTCANCAAAGATTTCC
+GTGGAGAGGAATAACTCTGAGTCTTAAGCTAATTTTTTGAGCCTTGCTCCGACAAGGCTA
+>seq5                21 residues
+AGCCTTGTTCCGACAAGGCTA
+>seq6                120 residues
+NNNNATACTCTGGTTTCTCTTCAGATCGCATAAATCTTTCCCCTTTCANCAAAGATTTCC
+GTGGAGAGGAACAACTCTGAGTCTTAACCCAATTTTTTGAGCCTTGCCTTGGCAAGGCTA

Added: trunk/packages/clustalw/trunk/tests.clustalw/nuc2.data
===================================================================
--- trunk/packages/clustalw/trunk/tests.clustalw/nuc2.data	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/tests.clustalw/nuc2.data	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,22 @@
+>seq10              200 residues
+AAGCTTTATTTTTCCATCTCTTTTACATTCAATGATTCTATTCATTTGTAAGAGAAGTAA
+GCANNNNNNACTCCAAGTGGTTAGAAAGTCAGACTGATTAAACAGGTNNACCAAACACGT
+AAATGTTTTTGNAGAAGGAAGTTGGNAACTCTTGAGCCCGCAGAGGTGCACAGGCTNNNT
+AGAGACAGCAGTGTCAGTCC
+>seq11              209 residues
+TATGCAGACAATTATGATTACATGGGGTAAAAATACTCCAAGATAGTATTCAGTTCTCAA
+GGGTTCATCACATTTGAAGTTTTTTTTTCTACCATAAAATTCCCCACCAATCCATTTATG
+TATAGAGTTGATAGTTAACAGAGATCGGAAATGGCCATTGCATCTTTAGTCTGGATTTCC
+TGAGACCTTTGGTTTATTTCTAGGGCTGC
+>seq12                  121 residues
+GTCTACGGCCATACCACCCTGAACGCGCCCGATCTCGTCTGATCTCGGAAGCTAAGCAGG
+GTCGGGCCTGGTTAGTACTTGGATGGGAGACCGCCTGGGAATACCGGGTGCTGTAGGCTT
+T
+>seq13               120 residues
+NNNNATACTCTGGTTTCTCTTGATCGTAAATAAATCTTTCCCCTTTCANCAAAGATTTCC
+GTGGAGAGGAATAACTCTGAGTCTTAAGCTAATTTTTTGAGCCTTGCTCCGACAAGGCTA
+>seq14                21 residues
+AGCCTTGTTCCGACAAGGCTA
+>seq15                120 residues
+NNNNATACTCTGGTTTCTCTTCAGATCGCATAAATCTTTCCCCTTTCANCAAAGATTTCC
+GTGGAGAGGAACAACTCTGAGTCTTAACCCAATTTTTTGAGCCTTGCCTTGGCAAGGCTA

Added: trunk/packages/clustalw/trunk/tests.clustalw/seq
===================================================================
--- trunk/packages/clustalw/trunk/tests.clustalw/seq	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/tests.clustalw/seq	2006-12-04 00:57:13 UTC (rev 166)
@@ -0,0 +1,3 @@
+>seq20                120 residues
+NNNNAXACTCTGGTTTCTCTTCAGATCGCATAAATCTTTCCCCTTTCANCAAAGATTTCC
+GTGGAGAGGTACAACTCTGAGTCTTAACCCAATTTTTTGAGCCTTGCCTTGGCAAGGCTA

Modified: trunk/packages/clustalw/trunk/trees.c
===================================================================
--- trunk/packages/clustalw/trunk/trees.c	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/trees.c	2006-12-04 00:57:13 UTC (rev 166)
@@ -1497,7 +1497,7 @@
 		fprintf(stdout,"\n           or 3) use the PHYLIP package.");
 		fprintf(stdout,"\n\n");
 		if (usemenu) 
-			getstr("Press [RETURN] to continue",dummy);
+			getstr("Press [RETURN] to continue",10,dummy);
 	}
 
 

Modified: trunk/packages/clustalw/trunk/util.c
===================================================================
--- trunk/packages/clustalw/trunk/util.c	2006-12-04 00:56:12 UTC (rev 165)
+++ trunk/packages/clustalw/trunk/util.c	2006-12-04 00:57:13 UTC (rev 166)
@@ -171,10 +171,18 @@
 	return str;
 }
 
-void getstr(char *instr,char *outstr)
+void getstr(char *instr, int n, char *outstr)
 {	
+	int sl;
 	fprintf(stdout,"%s: ",instr);
-	gets(outstr);
+	fgets(outstr,n,stdin);
+	/*
+	 * modify outstr for compatibility with prior used (insecure) gets()
+	 */
+	sl=strlen(outstr);
+	if(sl>0 && '\n'==outstr[sl-1]) {
+		outstr[sl-1]=0;
+	}
 }
 
 double getreal(char *instr,double minx,double maxx,double def)
@@ -185,7 +193,7 @@
 	
 	while(TRUE) {
 		fprintf(stdout,"%s (%.1f-%.1f)   [%.1f]: ",instr,minx,maxx,def);
-		gets(line);
+		fgets(line,MAXLINE,stdin);
 		status=sscanf(line,"%f",&ret);
 		if(status == EOF) return def;
 		if(ret>maxx) {
@@ -210,7 +218,7 @@
 	while(TRUE) {
 		fprintf(stdout,"%s (%d..%d)    [%d]: ",
 		instr,(pint)minx,(pint)maxx,(pint)def);
-		gets(line);
+		fgets(line,MAXLINE,stdin);
 		status=sscanf(line,"%d",&ret);
 		if(status == EOF) return def;
 		if(ret>maxx) {
@@ -230,7 +238,7 @@
 {
 	char line[MAXLINE];
 	
-	getstr("\n\nEnter system command",line);
+	getstr("\n\nEnter system command",MAXLINE,line);
 	if(*line != EOS)
 		system(line);
 	fprintf(stdout,"\n\n");