[med-svn] r3341 - in trunk/packages/clustalx/trunk/debian: . patches

Fri May 1 00:07:09 UTC 2009

Author: plessy
Date: 2009-05-01 00:07:09 +0000 (Fri, 01 May 2009)
New Revision: 3341

Added:
   trunk/packages/clustalx/trunk/debian/clustalx.manpages
Removed:
   trunk/packages/clustalx/trunk/debian/clustalx.manpages
   trunk/packages/clustalx/trunk/debian/patches/amenu.c.patch
   trunk/packages/clustalx/trunk/debian/patches/clustal-help.patch
   trunk/packages/clustalx/trunk/debian/patches/clustalw.h.patch
   trunk/packages/clustalx/trunk/debian/patches/clustalx.html.patch
   trunk/packages/clustalx/trunk/debian/patches/clustalx_help.patch
   trunk/packages/clustalx/trunk/debian/patches/interface.c.patch
   trunk/packages/clustalx/trunk/debian/patches/makefile.patch
   trunk/packages/clustalx/trunk/debian/patches/sequence.c.patch
   trunk/packages/clustalx/trunk/debian/patches/series
   trunk/packages/clustalx/trunk/debian/patches/trees.c.patch
   trunk/packages/clustalx/trunk/debian/patches/util.c.patch
   trunk/packages/clustalx/trunk/debian/patches/xmenu.c.patch
Modified:
   trunk/packages/clustalx/trunk/debian/changelog
   trunk/packages/clustalx/trunk/debian/clustalx.docs
   trunk/packages/clustalx/trunk/debian/clustalx.install
   trunk/packages/clustalx/trunk/debian/clustalx.menu
   trunk/packages/clustalx/trunk/debian/control
   trunk/packages/clustalx/trunk/debian/copyright
   trunk/packages/clustalx/trunk/debian/rules
   trunk/packages/clustalx/trunk/debian/watch
Log:
Switched again to Clustal W 2.0, which is now buildable without Qt license.


Modified: trunk/packages/clustalx/trunk/debian/changelog
===================================================================

--- trunk/packages/clustalx/trunk/debian/changelog	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/changelog	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,8 +1,27 @@
-clustalx (1.83-5) UNRELEASED; urgency=low
+clustalx (2.0.1-1) unstable; urgency=low
 
+  [ Steffen Moeller ]
+  * New upstream release.
+  * Updated watch file.
+  * Removed LICENSE from debian/clustalx.docs
+  * rename to clustalx seems no longer required in debian/rules
+  * moved clustalx.1 into debian folder (eases working with svn-buildpackage)
+  * added German translation to desktop file
+
+  [ Charles Plessy ]
   * Updated my email address.
+  * New upstream release:
+    - Uses Qt instead of lesstif.
+    - Includes new code for UPGMA guide trees.
+    - Includes iterative alignment facility.
+  * debian/copyright made machine-readable.
+  * Association between Clustal X and .aln files:
+    - text/x-clustalw-alignment associated to clustalx in clustalx.desktop.
+    - .aln declared as text/x-clustalw-alignment in clustalx.sharedmimeinfo.
+    - text/x-clustalw-alignment associated to clustalx in clustalx.mime.
+    - debian/rules calls dh_installmime.
 
- -- Charles Plessy <plessy at debian.org>  Sun, 27 Apr 2008 17:02:48 +0900
+ -- Charles Plessy <charles-debian-nospam at plessy.org>  Thu, 17 Jan 2008 22:55:41 +0900
 
 clustalx (1.83-4) unstable; urgency=low
 

Modified: trunk/packages/clustalx/trunk/debian/clustalx.docs
===================================================================
--- trunk/packages/clustalx/trunk/debian/clustalx.docs	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/clustalx.docs	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,2 +1 @@
-README_X
-clustalx.html
+README

Modified: trunk/packages/clustalx/trunk/debian/clustalx.install
===================================================================
--- trunk/packages/clustalx/trunk/debian/clustalx.install	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/clustalx.install	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,4 +1,3 @@
-clustalx		usr/bin
-clustalx_help		usr/share/clustalw
-debian/clustalx.desktop	usr/share/applications
-
+clustalx usr/bin
+clustalx.hlp usr/share/clustalw
+debian/clustalx.desktop usr/share/applications

Deleted: trunk/packages/clustalx/trunk/debian/clustalx.manpages
===================================================================
--- trunk/packages/clustalx/trunk/debian/clustalx.manpages	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/clustalx.manpages	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1 +0,0 @@
-debian/clustalx.1

Copied: trunk/packages/clustalx/trunk/debian/clustalx.manpages (from rev 1410, trunk/packages/clustalx/trunk/debian/clustalx.manpages)
===================================================================
--- trunk/packages/clustalx/trunk/debian/clustalx.manpages	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/clustalx.manpages	2009-05-01 00:07:09 UTC (rev 3341)
@@ -0,0 +1 @@
+debian/clustalx.1

Modified: trunk/packages/clustalx/trunk/debian/clustalx.menu
===================================================================
--- trunk/packages/clustalx/trunk/debian/clustalx.menu	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/clustalx.menu	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,5 +1,5 @@
 ?package(clustalx):needs="X11" \
     section="Applications/Science/Biology" \
-    title="Clustal X" \
+    title="ClustalX" \
     command="/usr/bin/clustalx"\
     hints="GUI for clustalw"

Modified: trunk/packages/clustalx/trunk/debian/control
===================================================================
--- trunk/packages/clustalx/trunk/debian/control	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/control	2009-05-01 00:07:09 UTC (rev 3341)
@@ -3,19 +3,19 @@
 Priority: optional
 Maintainer: Debian-Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
 DM-Upload-Allowed: yes
-Uploaders: Steffen Moeller <moeller at debian.org>, Charles Plessy <plessy at debian.org>
-Build-Depends: debhelper (>= 5), libncbi6-dev, libvibrant6-dev, lesstif2-dev, quilt
+Uploaders: Steffen Moeller <moeller at debian.org>, Charles Plessy <charles-debian-nospam at plessy.org>
+Build-Depends: debhelper (>= 5), libqt4-dev, quilt
 Standards-Version: 3.7.3
 Vcs-Browser: http://svn.debian.org/wsvn/debian-med/trunk/packages/clustalx/trunk/?rev=0&sc=0
 Vcs-Svn: svn://svn.debian.org/svn/debian-med/trunk/packages/clustalx/trunk/
 XS-Autobuild: yes
-Homepage: ftp://ftp.ebi.ac.uk/pub/software/unix/clustalx/
+Homepage: http://www.ebi.ac.uk/Tools/clustalw2/
 
 Package: clustalx
 Architecture: any
 Depends: ${shlibs:Depends}
-Suggests: texshade|texlive-latex-extra, boxshade
-Description: GUI for Clustal W
+Suggests: clustalw, texshade|texlive-latex-extra, boxshade
+Description: GUI for clustalw
  This package offers a GUI interface for the Clustal W multiple sequence
  alignment program. It provides an integrated environment for performing
  multiple sequence- and profile-alignments to analyse the results.
@@ -32,3 +32,6 @@
  .
  An alignment quality analysis can be performed and low-scoring segments or
  exceptional residues can be highlighted.
+ .
+ For details and citation purposes see paper "Clustal W and Clustal X version 
+ 2.0", Larkin M., et al. Bioinformatics 2007 23(21):2947-2948

Modified: trunk/packages/clustalx/trunk/debian/copyright
===================================================================
--- trunk/packages/clustalx/trunk/debian/copyright	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/copyright	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,76 +1,75 @@
-This package was debianized by Andreas Tille <tille at debian.org> on
-Sat, 27 Oct 2001 22:16:53 +0200
+X-Format-Specification: http://wiki.debian.org/Proposals/CopyrightFormat
+X-Debianized-By: Stephane Bortzmeyer <bortzmeyer at debian.org>
+X-Debianized-Date: Fri, 28 Aug 1998 16:09:48 +0200
+X-Source-Downloaded-From: ftp://ftp.ebi.ac.uk/pub/software/clustalw2
+X-Upstream-Author: Des Higgins, Julie Thompson and Toby Gibson
 
-It was downloaded from:
+Files: debian/*
+Copyright: © 1998-1999 Stephane Bortzmeyer <bortzmeyer at debian.org>
+           © 2001 Dr. Guenter Bechly <gbechly at debian.org>
+           © 2001 Adrian Bunk <bunk at fs.tum.de>
+	   © 2001-2002 Andreas Tille <tille at debian.org>
+	   © 2003-2008 Steffen Möller <moeller at debian.org>
+	   © 2006 Kai Hendry <hendry at iki.fi>
+	   © 2007 Nelson A. de Oliveira <naoliv at debian.org>
+	   © 2007-2008 Charles Plessy <charles-debian-nospam at plessy.org>
+Licence: Unclear
+ The licence of the earlier works was never stated. Some works have been
+ obsoleted by release of version 2.0 — the lesstif migration for instance —
+ but authors are left in the list of copyright holders by courtesy. The work
+ of Charles Plessy and Steffen Möller can be treated as if it were public domain.
 
-       ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/  and
-       ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalW/
+Files: debian/clustalx.1
+Copyright: © 1998-1999 Stephane Bortzmeyer <bortzmeyer at debian.org>
+Licence: Unknown
 
-while the source was merged to one common upstream source (see README.Debian)
+Files: *
+Copyright: © Des Higgins, Julie Thompson and Toby Gibson
+Licence: Not for commercial use, No modification of the code.
+ Licensing ClustalW and ClustalX
+ .
+ Date:29 November 2007
+ .
+ The copyright for ClustalW and ClustalX is held by Des Higgins, Julie Thompson
+ and Toby Gibson
+ .
+ The binaries and source code are made available and can be distributed subject
+ to the following conditions:
+ .
+ Users are free to redistribute ClustalW or ClustalX in it's unmodified form as
+ long as it is not for commercial gain.
+ .
+ Anyone wishing to redistribute Clustal commercially should contact Toby Gibson
+ at gibson at embl.de
+ .
+ .
+ If users make changes/have ideas that they believe would be useful to the
+ broader research community they can send their suggestions to the clustal
+ development team at clustalw at ucd.ie where they will be considered for inclusion
+ in future releases.
 
-Authors: 
-Toby Gibson <Toby.Gibson at EMBL-Heidelberg.de>
-Julie Thompson <julie at titus.u-strasbg.fr>
-Des Higgins <d.higgins at ucc.ie>
-
-Copyright:
-
-Non-free. You cannot distribute it at will.
-
-Debian holds a special exemption for distributing (see below). The licence does
-not forbid Debian from using autobuilders to create binary packages.
-
-Licence included here:
-
-**********************
-LICENCE FOR CLUSTAL W
-**********************
-
-Clustal W (hereafter "the program") is copyright (c) 1994-1998 by Julie D.
-Thompson, Desmond G. Higgins and Toby J. Gibson.
-
-Permission is granted to copy, distribute and use the program provided no fee
-is charged for it and provided that this copyright and licence notice is not
-removed or altered.
-
-The full source code of the program is provided free. You should not
-distribute a modified version of the program without obtaining the permission
-of the authors. You must keep the original copyright and licence notice. You
-must also document clearly the modifications you have made. You must make
-clear that this is not the original version.
-
-Commercial distributors of Clustal W are requested to contact the Clustal W
-authors in order to take out a non-exclusive licence. See the README file
-included with Clustal W for a rationale.
-
-You should understand that this software is provided as-is. The authors make
-no claims towards its suitability for any purpose and accept absolutely no
-liability for any damages the program may cause. Use at your own risk.
-
-* End of licence
-
-
-
-Special authorization for Debian:
-
-
-
-From: "Toby Gibson" <Toby.Gibson at EMBL-Heidelberg.de>
-Date: Thu, 17 Dec 1998 14:37:02 +0100
-To: Stephane Bortzmeyer <bortzmeyer at debian.org>
-Subject: Re: Fwd: clustalw_1.7-4_i386.changes REJECTED
-
-Hi Stephane,
-
-Now that we have thought about it, I don't think we can meet your stricter free
-criterion. There are already several companies who bundle Clustal W in sequence
-analysis packages and so are effectively selling it. They have paid for
-non-exclusive licences even though anyone can get the program for free: but
-they must have a multiple alignment engine, so we might as well earn some money
-which we can put toward further development.
-
-I think the main thing is to allow the distribution at all by Debian. We seem
-to have reached this point.
-
-Please do include this licence in the Debian package and I hope the release can
-go smoothly from now on.
+X-Comment: Frequently asked questions about Clustal licensing?
+           ---------------------------------------------------
+ .
+ 1. Do I have to pay to use ClustalW or ClustalX?
+ No - unless you wish to redistribute Clustal for profit. In this case see question 4.
+ .
+ 2. Can I redistribute the Clustal binaries and source code?
+ Yes. We have always wanted to see Clustal have as wide a userbase and
+ distribution network as possible and are happy to see other sites host copies
+ of the official Clustal code.
+ .
+ 3. Can I make changes to the source code?
+ .
+ You can make changes for your own purposes but you should not redistribute the
+ changed code.
+ .
+ 4. I want to include ClustalW/ClustalX in a commercial application who should I contact?
+ Toby Gibson at gibson at embl.de (also cc des.higgins at ucd.ie)
+ .
+ 5. There is no Clustal distribution for platform X. I have changed the code in
+ order to compile Clustal on this platform. Can I redistribute it?
+ .
+ Ideally you should send us a copy of your source code changes as well as a
+ binary. We will include it in our contributed binaries section on our FTP site
+ along with an acknowledgement of your contribution.

Deleted: trunk/packages/clustalx/trunk/debian/patches/amenu.c.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/amenu.c.patch	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/patches/amenu.c.patch	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,130 +0,0 @@
-Index: clustalw-1.83/amenu.c
-===================================================================
---- clustalw-1.83.orig/amenu.c
-+++ clustalw-1.83/amenu.c
-@@ -184,7 +184,7 @@
- 		fprintf(stdout,"     H. HELP\n");
- 		fprintf(stdout,"     X. EXIT (leave program)\n\n\n");
- 		
--		getstr("Your choice",lin1);
-+		getstr("Your choice",MAXLINE+1,lin1);
- 
- 		switch(toupper(*lin1)) {
- 			case '1': seq_input(FALSE);
-@@ -268,7 +268,7 @@
-         fprintf(stdout,"    H.  HELP\n");
-         fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
- 
--        getstr("Your choice",lin1);
-+        getstr("Your choice",MAXLINE+1,lin1);
-         if(*lin1 == EOS) return;
- 
-         switch(toupper(*lin1))
-@@ -361,7 +361,7 @@
-         fprintf(stdout,"    H.  HELP\n");
-         fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
- 
--        getstr("Your choice",lin1);
-+        getstr("Your choice",MAXLINE+1,lin1);
-         if(*lin1 == EOS) return;
- 
-         switch(toupper(*lin1))
-@@ -457,7 +457,7 @@
- 		fprintf(stdout,"\n\n");
- 		fprintf(stdout,"     H. HELP\n\n\n");
- 		
--		getstr("Enter number (or [RETURN] to exit)",lin2);
-+		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
- 		if( *lin2 == EOS) { 
- 			return;
- 		}
-@@ -533,7 +533,7 @@
-                 fprintf(stdout,"--\n");
- 
- 
--                getstr("\n\nEnter number (or [RETURN] to exit)",lin2);
-+                getstr("\n\nEnter number (or [RETURN] to exit)",MAXLINE+1,lin2);
-                 if(*lin2 == EOS) return(output_struct_penalties);
- 
-         	switch(toupper(*lin2))
-@@ -602,7 +602,7 @@
-         fprintf(stdout,"    H.  HELP\n");
-         fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
- 
--        getstr("Your choice",lin1);
-+        getstr("Your choice",MAXLINE+1,lin1);
-         if(*lin1 == EOS) return;
- 
-         switch(toupper(*lin1))
-@@ -677,7 +677,7 @@
- 	fprintf(stdout,"\n");
- 	fprintf(stdout,"     H. HELP\n\n\n");	
- 	
--		getstr("Enter number (or [RETURN] to exit)",lin2);
-+		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
- 		if(*lin2 == EOS) return;
- 		
- 		switch(toupper(*lin2)) {
-@@ -766,7 +766,7 @@
- 	fprintf(stdout,"\n");
- 	fprintf(stdout,"     H. HELP\n\n\n");	
- 	
--		getstr("Enter number (or [RETURN] to exit)",lin2);
-+		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
- 		if(*lin2 == EOS) return;
- 		
- 		switch(toupper(*lin2)) {
-@@ -907,7 +907,7 @@
- 
- 		fprintf(stdout,"     H. HELP\n\n\n");
- 		
--		getstr("Enter number (or [RETURN] to exit)",lin2);
-+		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
- 		if( *lin2 == EOS) {
-                         if(dnaflag) {
-                                 dna_pw_go_penalty     = pw_go_penalty;
-@@ -1029,7 +1029,7 @@
-                 fprintf(stdout,"     8. Protein Gap Parameters\n\n");
- 		fprintf(stdout,"     H. HELP\n\n\n");		
- 
--		getstr("Enter number (or [RETURN] to exit)",lin2);
-+		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
- 
- 		if(*lin2 == EOS) {
- 			if(dnaflag) {
-@@ -1122,7 +1122,7 @@
- 		fprintf(stdout,"     5. Toggle End Gap Separation         :%s\n\n",(!use_endgaps) ? "OFF" : "ON");
- 		fprintf(stdout,"     H. HELP\n\n\n");		
- 
--		getstr("Enter number (or [RETURN] to exit)",lin2);
-+		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
- 
- 		if(*lin2 == EOS) return;
- 		
-@@ -1136,7 +1136,7 @@
- 			case '3':
- 				fprintf(stdout,"Hydrophilic Residues Currently: %s\n",hyd_residues);
- 
--				getstr("Enter residues (or [RETURN] to quit)",lin1);
-+				getstr("Enter residues (or [RETURN] to quit)",MAXLINE+1,lin1);
-                                 if (*lin1 != EOS) {
-                                         for (i=0;i<strlen(hyd_residues) && i<26;i++) {
-                                         c = lin1[i];
-@@ -1188,7 +1188,7 @@
-                 fprintf(stdout,"--\n");
- 
- 
--                getstr("\n\nEnter number (or [RETURN] to exit)",lin2);
-+                getstr("\n\nEnter number (or [RETURN] to exit)",MAXLINE+1,lin2);
-                 if(*lin2 == EOS) return(matn);
- 
-                 i=toupper(*lin2)-'0';
-@@ -1223,7 +1223,7 @@
- 	fprintf(stdout,"\n%s\n",title);
- 	strcpy(line,prompt);
- 	strcat(line, "(y/n) ? [y]");
--	getstr(line,lin2);
-+	getstr(line,MAXLINE+1,lin2);
- 	if ((*lin2 != 'n') && (*lin2 != 'N'))
- 		return('y');
- 	else

Deleted: trunk/packages/clustalx/trunk/debian/patches/clustal-help.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/clustal-help.patch	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/patches/clustal-help.patch	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,26 +0,0 @@
-Index: clustalw-1.83/clustalw.c
-===================================================================
---- clustalw-1.83.orig/clustalw.c
-+++ clustalw-1.83/clustalw.c
-@@ -34,7 +34,7 @@
- #ifdef MSDOS
-         char *help_file_name = "clustalw.hlp";
- #else
--        char *help_file_name = "clustalw_help";
-+        char *help_file_name = "/usr/share/clustalw/clustalw_help";
- #endif
- 
- sint max_names; /* maximum length of names in current alignment file */
-Index: clustalw-1.83/clustalx.c
-===================================================================
---- clustalw-1.83.orig/clustalx.c
-+++ clustalw-1.83/clustalx.c
-@@ -26,7 +26,7 @@
- #ifdef MSDOS
-         char *help_file_name = "clustalx.hlp";
- #else
--        char *help_file_name = "clustalx_help";
-+        char *help_file_name = "/usr/share/clustalw/clustalx_help";
- #endif
- 
- sint max_names; /* maximum length of names in current alignment file */

Deleted: trunk/packages/clustalx/trunk/debian/patches/clustalw.h.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/clustalw.h.patch	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/patches/clustalw.h.patch	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,13 +0,0 @@
-Index: clustalw-1.83/clustalw.h
-===================================================================
---- clustalw-1.83.orig/clustalw.h
-+++ clustalw-1.83/clustalw.h
-@@ -238,7 +238,7 @@
- char *blank_to_(char *str);
- char *upstr(char *str);
- char *lowstr(char *str);
--void getstr(char *instr, char *outstr);
-+void getstr(char *instr, int n, char *outstr);
- double getreal(char *instr, double minx, double maxx, double def);
- int getint(char *instr, int minx, int maxx, int def);
- void do_system(void);

Deleted: trunk/packages/clustalx/trunk/debian/patches/clustalx.html.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/clustalx.html.patch	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/patches/clustalx.html.patch	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,2123 +0,0 @@
-Index: clustalw-1.83/clustalx.html
-===================================================================
---- clustalw-1.83.orig/clustalx.html
-+++ clustalw-1.83/clustalx.html
-@@ -2029,6 +2029,2118 @@
- <P>
- Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
- The ClustalX windows interface: flexible strategies for multiple sequence 
-+alignment aided by quality analysis tools. Nucleic Acids Research, 24:4876-4882.
-+</P>
-+<P>
-+</P>
-+<P>
-+<STRONG>
-+The ClustalW program is described in the manuscript:
-+</STRONG>
-+</P>
-+<P>
-+Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
-+sensitivity of progressive multiple sequence alignment through sequence
-+weighting, positions-specific gap penalties and weight matrix choice.  Nucleic
-+Acids Research, 22:4673-4680.
-+</P>
-+<P>
-+</P>
-+<P>
-+<STRONG>
-+The ClustalV program is described in the manuscript:
-+</STRONG>
-+</P>
-+<P>
-+Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for
-+multiple sequence alignment. CABIOS 8,189-191.
-+</P>
-+<P>
-+</P>
-+<P>
-+<STRONG>
-+The original Clustal program is described in the manuscripts:
-+</STRONG>
-+</P>
-+<P>
-+Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence
-+alignments on a microcomputer.
-+CABIOS 5,151-153.
-+</P>
-+<P>
-+Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple
-+sequence alignment on a microcomputer. Gene 73,237-244.
-+</P>
-+<P>
-+<STRONG>
-+Some tips on using Clustal X:
-+</STRONG>
-+</P>
-+<P>
-+Jeannmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998)
-+Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.
-+</P>
-+<P>
-+<STRONG>
-+Some tips on using Clustal W:
-+</STRONG>
-+</P>
-+<P>
-+Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for
-+multiple sequence alignments. Methods Enzymol., 266, 383-402.
-+</P>
-+<P>
-+<STRONG>
-+You can get the latest version of the ClustalX program by anonymous ftp to:
-+</STRONG>
-+</P>
-+<P>
-+ftp-igbmc.u-strasbg.fr
-+ftp.embl-heidelberg.de
-+ftp.ebi.ac.uk
-+</P>
-+<P>
-+<STRONG>
-+Or, have a look at the following WWW site:
-+</STRONG>
-+</P>
-+<P>
-+http://www-igbmc.u-strasbg.fr/BioInfo/
-+</P>
-+<P>
-+</P>
-+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
-+<HEAD>
-+<TITLE>ClustalX Help</TITLE>
-+</HEAD>
-+<BODY BGCOLOR=white>
-+<CENTER><H1>ClustalX Help</H1></CENTER>
-+<P>
-+You can get the latest version of the ClustalX program here:
-+</P>
-+<DL><DD>
-+<A HREF="ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/">
-+ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/</A>
-+</DL>
-+<P>For full details of usage and algorithms, please read the <A HREF="clustalw.doc"><EM>ClustalW.Doc</EM></A> file.</P>
-+<PRE><EM>
-+Toby  Gibson                         EMBL, Heidelberg, Germany.
-+Des   Higgins                        UCC, Cork, Ireland.
-+Julie Thompson/Francois Jeanmougin   IGBMC, Strasbourg, France.
-+</EM></PRE>
-+<CENTER><H2><A NAME="Index">Index</A></H2></CENTER>
-+<OL>
-+<LI><A HREF="#G">                      General help for CLUSTAL X (1.8)
-+</A></LI>
-+<LI><A HREF="#F">                      Input / Output Files 
-+</A></LI>
-+<LI><A HREF="#E">                          Editing Alignments
-+</A></LI>
-+<LI><A HREF="#M">                          Multiple Alignments
-+</A></LI>
-+<LI><A HREF="#P">                   Profile and Structure Alignments
-+</A></LI>
-+<LI><A HREF="#B">            Secondary Structure / Gap Penalty Masks
-+</A></LI>
-+<LI><A HREF="#T">                            Phylogenetic Trees
-+</A></LI>
-+<LI><A HREF="#C">                               Colors
-+</A></LI>
-+<LI><A HREF="#Q">                       Alignment Quality Analysis
-+</A></LI>
-+<LI><A HREF="#9">              Command Line Parameters
-+</A></LI>
-+<LI><A HREF="#R">                             References
-+</A></LI>
-+</OL>
-+<CENTER><H2><A NAME="G">                      General help for CLUSTAL X (1.8)
-+</A></H2></CENTER>
-+<P>
-+</P>
-+<P>
-+Clustal X is a windows interface for the ClustalW multiple sequence alignment
-+program. It provides an integrated environment for performing multiple sequence
-+and profile alignments and analysing the results. The sequence alignment is
-+displayed in a window on the screen. A versatile coloring scheme has been
-+incorporated allowing you to highlight conserved features  in the alignment.
-+The pull-down menus at the top of the window allow you to select all the
-+options required for traditional multiple sequence and profile alignment.
-+</P>
-+<P>
-+You can cut-and-paste sequences to change the order of the alignment; you can
-+select a subset of sequences to be aligned; you can select a sub-range of the
-+alignment to be realigned and inserted back into the original alignment.
-+</P>
-+<P>
-+Alignment quality analysis can be performed and low-scoring segments or
-+exceptional residues can be highlighted.
-+</P>
-+<P>
-+ClustalX is available for a number of different platforms including: SUN
-+Solaris, IRIX5.3 on Silicon Graphics, Digital UNIX on DECStations, Microsoft
-+Windows (32 bit) for PC's, Linux ELF for x86 PC's and Macintosh PowerMac. (See
-+the README file for Installation instructions.)
-+</P>
-+<P>
-+</P>
-+<P>
-+<H4>
-+SEQUENCE INPUT
-+</H4>
-+</P>
-+<P>
-+Sequences and profiles (a term for pre-existing alignments) are input using 
-+the FILE menu. Invalid options will be disabled. All sequences must be included
-+into 1 file. 7 formats are automatically recognised: NBRF/PIR, EMBL/SWISSPROT,
-+Pearson (Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9 RSF and GDE flat file.
-+All non-alphabetic characters (spaces, digits, punctuation marks) are ignored
-+except "-" which is used to indicate a GAP ("." in MSF/RSF).  
-+</P>
-+<P>
-+<H4>
-+SEQUENCE / PROFILE ALIGNMENTS
-+</H4>
-+</P>
-+<P>
-+Clustal X has two modes which can be selected using the switch directly above
-+the sequence display: MULTIPLE ALIGNMENT MODE and PROFILE ALIGNMENT MODE.
-+</P>
-+<P>
-+To do a MULTIPLE ALIGNMENT on a set of sequences, make sure MULTIPLE ALIGNMENT
-+MODE is selected. A single sequence data area is then displayed. The ALIGNMENT
-+menu then allows you to either produce a guide tree for the alignment, or to do
-+a multiple alignment following the guide tree, or to do a full multiple
-+alignment.
-+</P>
-+<P>
-+In PROFILE ALIGNMENT MODE, two sequence data areas are displayed, allowing you
-+to align 2 alignments (termed profiles). Profiles are also used to add a new
-+sequence to an old alignment, or to use secondary structure to guide the
-+alignment process. GAPS in the old alignments are indicated using the "-" 
-+character. PROFILES can be input in ANY of the allowed formats; just  use "-"
-+(or "." for MSF/RSF) for each gap position. In Profile Alignment Mode, a button
-+"Lock Scroll" is displayed which allows you to scroll the two profiles together
-+using a single scroll bar. When the Lock Scroll is turned off, the two profiles
-+can be scrolled independently.
-+</P>
-+<P>
-+<H4>
-+PHYLOGENETIC TREES
-+</H4>
-+</P>
-+<P>
-+Phylogenetic trees can be calculated from old alignments (read in with "-"
-+characters to indicate gaps) OR after a multiple alignment while the alignment
-+is still displayed.
-+</P>
-+<P>
-+<H4>
-+ALIGNMENT DISPLAY
-+</H4>
-+</P>
-+<P>
-+The alignment is displayed on the screen with the sequence names on the left
-+hand side. The sequence alignment is for display only, it cannot be edited here
-+(except for changing the sequence order by cutting-and-pasting on the sequence
-+names). 
-+</P>
-+<P>
-+A ruler is displayed below the sequences, starting at 1 for the first residue
-+position (residue numbers in the sequence input file are ignored).
-+</P>
-+<P>
-+A line above the alignment is used to mark strongly conserved positions. Three
-+characters ('*', ':' and '.') are used:
-+</P>
-+<P>
-+'*' indicates positions which have a single, fully conserved residue
-+</P>
-+<P>
-+':' indicates that one of the following 'strong' groups is fully conserved:-
-+<PRE>
-+                 STA  
-+                 NEQK  
-+                 NHQK  
-+                 NDEQ  
-+                 QHRK  
-+                 MILV  
-+                 MILF  
-+                 HY  
-+                 FYW  
-+</PRE>
-+</P>
-+<P>
-+'.' indicates that one of the following 'weaker' groups is fully conserved:-
-+<PRE>
-+                 CSA  
-+                 ATV  
-+                 SAG  
-+                 STNK  
-+                 STPA  
-+                 SGND  
-+                 SNDEQK  
-+                 NDEQHK  
-+                 NEQHRK  
-+                 FVLIM  
-+                 HFY  
-+</PRE>
-+</P>
-+<P>
-+These are all the positively scoring groups that occur in the Gonnet Pam250
-+matrix. The strong and weak groups are defined as strong score >0.5 and weak
-+score =<0.5 respectively.
-+</P>
-+<P>
-+For profile alignments, secondary structure and gap penalty masks are displayed
-+above the sequences, if any data is found in the profile input file.
-+</P>
-+<P>
-+</P>
-+<P>
-+</P>
-+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
-+<CENTER><H2><A NAME="F">                      Input / Output Files 
-+</A></H2></CENTER>
-+<P>
-+</P>
-+<P>
-+LOAD SEQUENCES reads sequences from one of 7 file formats, replacing any
-+sequences that are already loaded. All sequences must be in 1 file. The formats
-+that are automatically recognised are: NBRF/PIR, EMBL/SWISSPROT, Pearson
-+(Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9/RSF and GDE flat file.  All
-+non-alphabetic characters (spaces, digits, punctuation  marks) are ignored
-+except "-" which is used to indicate a GAP ("." in MSF/RSF).
-+</P>
-+<P>
-+The program tries to automatically recognise the different file formats used
-+and to guess whether the sequences are amino acid or nucleotide.  This is not
-+always foolproof.
-+</P>
-+<P>
-+FASTA and NBRF/PIR formats are recognised by having a ">" as the first 
-+character in the file.  
-+</P>
-+<P>
-+EMBL/Swiss Prot formats are recognised by the letters "ID" at the start of the
-+file (the token for the entry name field).  
-+</P>
-+<P>
-+CLUSTAL format is recognised by the word CLUSTAL at the beginning of the file.
-+</P>
-+<P>
-+GCG/MSF format is recognised by one of the following:
-+<UL>
-+<LI>
-+       - the word PileUp at the start of the file.
-+</LI><LI>
-+       - the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
-+         at the start of the file.
-+</LI><LI>
-+       - the word MSF on the first line of the file, and the characters ..
-+         at the end of this line.
-+</LI>
-+</UL>
-+</P>
-+<P> 
-+GCG/RSF format is recognised by the word !!RICH_SEQUENCE at the beginning of
-+the file.
-+</P>
-+<P>
-+</P>
-+<P>
-+If 85% or more of the characters in the sequence are from A,C,G,T,U or N, the
-+sequence will be assumed to be nucleotide.  This works in 97.3% of cases but
-+watch out!
-+</P>
-+<P>
-+APPEND SEQUENCES is only valid in MULTIPLE ALIGNMENT MODE. The input sequences
-+do not replace those already loaded, but are appended at the end of the
-+alignment.
-+</P>
-+<P>
-+SAVE SEQUENCES AS... offers the user a choice of one of six output formats:
-+CLUSTAL, NBRF/PIR, GCG/MSF, PHYLIP, NEXUS or GDE. All sequences are written
-+to a single file. Options are available to save a range of the alignment, 
-+switch between UPPER/LOWER case for GDE files, and to output SEQUENCE NUMBERING
-+for CLUSTAL files.
-+</P>
-+<P>
-+LOAD PROFILE 1 reads sequences in the same 7 file formats, replacing any
-+sequences already loaded as Profile 1. This option will also remove any
-+sequences which are loaded in Profile 2.
-+</P>
-+<P>
-+LOAD PROFILE 2 reads sequences in the same 7 file formats, replacing any
-+sequences already loaded as Profile 2.
-+</P>
-+<P>
-+SAVE PROFILE 1 AS... is similar to the Save Sequences option except that only
-+those sequences in Profile 1 will be written to the output file.
-+</P>
-+<P>
-+SAVE PROFILE 2 AS... is similar to the Save Sequences option except that only
-+those sequences in Profile 2 will be written to the output file.
-+</P>
-+<P>
-+WRITE ALIGNMENT AS POSTSCRIPT will write the sequence display to a postscript
-+format file. This will include any secondary structure / gap penalty mask 
-+information and the consensus and ruler lines which are displayed on the
-+screen. The Alignment Quality curve can be optionally included in the output
-+file.
-+</P>
-+<P>
-+WRITE PROFILE 1 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
-+except that only the profile 1 display will be printed.
-+</P>
-+<P>
-+WRITE PROFILE 2 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
-+except that only the profile 2 display will be printed.
-+</P>
-+<P>
-+</P>
-+<P>
-+<H4>
-+POSTSCRIPT PARAMETERS
-+</H4>
-+</P>
-+<P>
-+A number of options are available to allow you to configure your postscript
-+output file.
-+</P>
-+<P>
-+PS COLORS FILE:
-+</P>
-+<P>
-+The exact RGB values required to reproduce the colors used in the alignment
-+window will vary from printer to printer. A PS colors file can be specified
-+that contains the RGB values for all the colors required by each of your
-+postscript printers.
-+</P>
-+<P>
-+By default, Clustal X looks for a file called 'colprint.par' in the current
-+directory (if your running under UNIX, it then looks in your home directory,
-+and finally in the directories in your PATH environment variable). If no PS
-+colors file is found or a color used on the screen is not defined here, the
-+screen RGB values (from the Color Parameter File) are used.
-+</P>
-+<P>
-+The PS colors file consists of one line for each color to be defined, with the
-+color name followed by the RGB values (on a scale of 0 to 1). For example,
-+</P>
-+<P>
-+RED          0.9 0.1 0.1
-+</P>
-+<P>
-+Blank lines and comments (lines beginning with a '#' character) are ignored.
-+</P>
-+<P>
-+</P>
-+<P>
-+PAGE SIZE:  The alignment can be displayed on either A4, A3 or US Letter size
-+pages.
-+</P>
-+<P>
-+ORIENTATION: The alignment can be displayed on either a landscape or portrait
-+page.
-+</P>
-+<P>
-+PRINT HEADER: An optional header including the postscript filename, and
-+creation date can be printed at the top of each page.
-+</P>
-+<P>
-+PRINT QUALITY CURVE: The Alignment Quality curve which is displayed underneath
-+the alignment on the screen can be included in the postscript output.
-+</P>
-+<P>
-+PRINT RULER: The ruler which is displayed underneath the alignment on the 
-+screen can be included in the postscript output.
-+</P>
-+<P>
-+PRINT RESIDUE NUMBERS: Sequence residue numbers can be printed at the right
-+hand side of the alignment.
-+</P>
-+<P>
-+RESIZE TO FIT PAGE: By default, the alignment is scaled to fit the page size
-+selected. This option can be turned off, in which case a font size of 10 will
-+be used for the sequences.
-+</P>
-+<P>
-+PRINT FROM POSITION/TO: A range of the alignment can be printed. The default
-+is to print the full alignment. The first and last residues to be printed are
-+specified here.
-+</P>
-+<P>
-+USE BLOCK LENGTH: The alignment can be divided into blocks of residues. The
-+number of residues in a block is specified here. More than one block may then
-+be printed on a single page. This is useful for long alignments of a small
-+number of sequences. If the block length is set to 0, The alignment will not
-+be divided into blocks, but printed across a number of pages.
-+</P>
-+<P>
-+</P>
-+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
-+<CENTER><H2><A NAME="E">                          Editing Alignments
-+</A></H2></CENTER>
-+<P>
-+</P>
-+<P>
-+Clustal X allows you to change the order of the sequences in the alignment, by
-+cutting-and-pasting the sequence names.
-+</P>
-+<P>
-+To select a group of sequences to be moved, click on a sequence name and drag
-+the cursor until all the required sequences are highlighted. Holding down the
-+Shift key when clicking on the first name will add new sequences to those
-+already selected.
-+</P>
-+<P>
-+(Options are provided to Select All Sequences, Select Profile 1 or Select 
-+Profile 2.)
-+</P>
-+<P>
-+The selected sequences can be removed from the alignment by using the EDIT
-+menu, CUT option.
-+</P>
-+<P>
-+To add the cut sequences back into an alignment, select a sequence by clicking
-+on the sequence name. The cut sequences will be added to the alignment,
-+immediately following the selected sequence, by the EDIT menu, PASTE option.
-+</P>
-+<P>
-+To add the cut sequences to an empty alignment (eg. when cutting sequences from
-+Profile 1 and pasting them to Profile 2), click on the empty sequence name
-+display area, and select the EDIT menu, PASTE option as before.
-+</P>
-+<P>
-+The sequence selection and sequence range selection can be cleared using the
-+EDIT menu, CLEAR SEQUENCE SELECTION and CLEAR RANGE SELECTION options
-+respectively.
-+</P>
-+<P>
-+To search for a string of residues in the sequences, select the sequences to be
-+searched by clicking on the sequence names. You can then enter the string to
-+search for by selecting the SEARCH FOR STRING option. If the string is found in
-+any of the sequences selected, the sequence name and column number is printed
-+below the sequence display.
-+</P>
-+<P>
-+In PROFILE ALIGNMENT MODE, the two profiles can be merged (normally done after
-+alignment) by selecting ADD PROFILE 2 TO PROFILE 1. The sequences currently
-+displayed as Profile 2 will be appended to Profile 1. 
-+</P>
-+<P>
-+The REMOVE ALL GAPS option will remove all gaps from the sequences currently
-+selected.
-+WARNING: This option removes ALL gaps, not only those introduced by ClustalX,
-+but also those that were read from the input alignment file. Any secondary
-+structure information associated with the alignment will NOT be automatically
-+realigned.
-+</P>
-+<P>
-+The REMOVE GAP-ONLY COLUMNS will remove those positions in the alignment which
-+contain gaps in all sequences. This can occur as a result of removing divergent
-+sequences from an alignment, or if an alignment has been realigned.
-+</P>
-+<P>
-+</P>
-+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
-+<CENTER><H2><A NAME="M">                          Multiple Alignments
-+</A></H2></CENTER>
-+<P>
-+</P>
-+<P>
-+Make sure MULTIPLE ALIGNMENT MODE is selected, using the switch directly above
-+the sequence display area. Then, use the ALIGNMENT menu to do multiple
-+alignments.
-+</P>
-+<P>
-+Multiple alignments are carried out in 3 stages:
-+</P>
-+<P> 
-+1) all sequences are compared to each other (pairwise alignments);
-+</P>
-+<P> 
-+2) a dendrogram (like a phylogenetic tree) is constructed, describing the
-+approximate groupings of the sequences by similarity (stored in a file).
-+</P>
-+<P> 
-+3) the final multiple alignment is carried out, using the dendrogram as a guide.
-+</P>
-+<P>
-+The 3 stages are carried out automatically by the DO COMPLETE ALIGNMENT option.
-+You can skip the first stages (pairwise alignments; guide tree) by using an old
-+guide tree file (DO ALIGNMENT FROM GUIDE TREE); or you can just produce the
-+guide tree with no final multiple alignment (PRODUCE GUIDE TREE ONLY).
-+</P>
-+<P>
-+</P>
-+<P>
-+REALIGN SELECTED SEQUENCES is used to realign badly aligned sequences in the
-+alignment. Sequences can be selected by clicking on the sequence names - see
-+Editing Alignments for more details. The unselected sequences are then 'fixed'
-+and a profile is made including only the unselected sequences. Each of the
-+selected sequences in turn is then realigned to this profile. The realigned
-+sequences will be displayed as a group at the end the alignment.
-+</P>
-+<P>
-+</P>
-+<P>
-+REALIGN SELECTED SEQUENCE RANGE is used to realign a small region of the 
-+alignment. A residue range can be selected by clicking on the sequence display
-+area. A multiple alignment is then performed, following the 3 stages described
-+above, but only using the selected residue range. Finally the new alignment of
-+the range is pasted back into the full sequence alignment.
-+</P>
-+<P>
-+By default, gap penalties are used at each end of the subrange in order to 
-+penalise terminal gaps. If the REALIGN SEGMENT END GAP PENALTIES option is
-+switched off, gaps can be introduced at the ends of the residue range at no
-+cost.
-+</P>
-+<P>
-+</P>
-+<P>
-+ALIGNMENT PARAMETERS displays a sub-menu with the following options:
-+</P>
-+<P>
-+RESET NEW GAPS BEFORE ALIGNMENT will remove any new gaps introduced into the
-+sequences during multiple alignment if you wish to change the parameters and
-+try again. This only takes effect just before you do a second multiple
-+alignment. You can make phylogenetic trees after alignment whether or not this
-+is ON. If you turn this OFF, the new gaps are kept even if you do a second
-+multiple alignment. This allows you to iterate the alignment gradually.
-+Sometimes, the alignment is improved by a second or third pass.
-+</P>
-+<P>
-+RESET ALL GAPS BEFORE ALIGNMENT will remove all gaps in the sequences including
-+gaps which were read in from the sequence input file. This only takes effect
-+just before you do a second multiple alignment.  You can make phylogenetic
-+trees after alignment whether or not this is ON.  If you turn this OFF, all
-+gaps are kept even if you do a second multiple alignment. This allows you to
-+iterate the alignment gradually.  Sometimes, the alignment is improved by a
-+second or third pass.
-+</P>
-+<P>
-+</P>
-+<P>
-+PAIRWISE ALIGNMENT PARAMETERS control the speed/sensitivity of the initial
-+alignments.
-+</P>
-+<P>
-+MULTIPLE ALIGNMENT PARAMETERS control the gaps in the final multiple
-+alignments.
-+</P>
-+<P>
-+PROTEIN GAP PARAMETERS displays a temporary window which allows you to set
-+various parameters only used in the alignment of protein sequences.
-+</P>
-+<P>
-+(SECONDARY STRUCTURE PARAMETERS, for use with the Profile Alignment Mode only,
-+allows you to set various parameters only used with gap penalty masks.)
-+</P>
-+<P>
-+SAVE LOG FILE will write the alignment calculation scores to a file. The log
-+filename is the same as the input sequence filename, with an extension .log
-+appended.
-+</P>
-+<P>
-+</P>
-+<P>
-+<H4>
-+OUTPUT FORMAT OPTIONS
-+</H4>
-+</P>
-+<P>
-+You can choose from 6 different alignment formats (CLUSTAL, GCG, NBRF/PIR,
-+PHYLIP, GDE and NEXUS).  You can choose more than one (or all 6 if you wish).  
-+</P>
-+<P>
-+CLUSTAL format output is a self explanatory alignment format. It shows the
-+sequences aligned in blocks. It can be read in again at a later date to (for
-+example) calculate a phylogenetic tree or add in new sequences by profile
-+alignment.
-+</P>
-+<P>
-+GCG output can be used by any of the GCG programs that can work on multiple
-+alignments (e.g. PRETTY, PROFILEMAKE, PLOTALIGN). It is the same as the GCG
-+.msf format files (multiple sequence file); new in version 7 of GCG.
-+</P>
-+<P>
-+NEXUS format is used by several phylogeny programs, including PAUP and
-+MacClade.
-+</P>
-+<P>
-+PHYLIP format output can be used for input to the PHYLIP package of Joe 
-+Felsenstein.  This is a very widely used package for doing every imaginable
-+form of phylogenetic analysis (MUCH more than the the modest introduction
-+offered by this program).
-+</P>
-+<P>
-+NBRF/PIR: this is the same as the standard PIR format with ONE ADDITION. Gap
-+characters "-" are used to indicate the positions of gaps in the multiple 
-+alignment. These files can be re-used as input in any part of clustal that
-+allows sequences (or alignments or profiles) to be read in.  
-+</P>
-+<P>
-+GDE:  this format is used by the GDE package of Steven Smith and is understood
-+by SEQLAB in GCG 9 or later.
-+</P>
-+<P>
-+GDE OUTPUT CASE: sequences in GDE format may be written in either upper or
-+lower case.
-+</P>
-+<P> 
-+CLUSTALW SEQUENCE NUMBERS: residue numbers may be added to the end of the
-+alignment lines in clustalw format.
-+</P>
-+<P>
-+OUTPUT ORDER is used to control the order of the sequences in the output
-+alignments. By default, it uses the order in which the sequences were aligned
-+(from the guide tree/dendrogram), thus automatically grouping closely related
-+sequences. It can be switched to be the same as the original input order.
-+</P>
-+<P>
-+PARAMETER OUTPUT: This option will save all your parameter settings in a
-+parameter file (suffix .par) during alignment. The file can be subsequently
-+used to rerun ClustalW using the same parameters.
-+</P>
-+<P>
-+</P>
-+<P>
-+<H3>
-+ALIGNMENT PARAMETERS
-+</H3>
-+</P>
-+<P>
-+<STRONG>
-+PAIRWISE ALIGNMENT PARAMETERS
-+</STRONG>
-+</P>
-+<P>
-+A distance is calculated between every pair of sequences and these are used to
-+construct the phylogenetic tree which guides the final multiple alignment. The
-+scores are calculated from separate pairwise alignments. These can be
-+calculated using 2 methods: dynamic programming (slow but accurate) or by the
-+method of Wilbur and Lipman (extremely fast but approximate).   
-+</P>
-+<P>
-+You can choose between the 2 alignment methods using the PAIRWISE ALIGNMENTS
-+option. The slow/accurate method is fast enough for short sequences but will be
-+VERY SLOW for many (e.g. >100) long (e.g. >1000 residue) sequences.   
-+</P>
-+<P>
-+</P>
-+<P>
-+<STRONG>
-+SLOW-ACCURATE alignment parameters:
-+</STRONG>
-+</P>
-+<P>
-+These parameters do not have any affect on the speed of the alignments. They
-+are used to give initial alignments which are then rescored to give percent
-+identity scores. These % scores are the ones which are displayed on the 
-+screen. The scores are converted to distances for the trees.
-+</P>
-+<P>
-+Gap Open Penalty:      the penalty for opening a gap in the alignment.
-+</P>
-+<P>
-+Gap Extension Penalty: the penalty for extending a gap by 1 residue.
-+</P>
-+<P>
-+Protein Weight Matrix: the scoring table which describes the similarity of 
-+each amino acid to each other.
-+</P>
-+<P>
-+Load protein matrix: allows you to read in a comparison table from a file.
-+</P>
-+<P>
-+DNA weight matrix: the scores assigned to matches and mismatches (including
-+IUB ambiguity codes).
-+</P>
-+<P>
-+Load DNA matrix: allows you to read in a comparison table from a file.
-+</P>
-+<P>
-+See the Multiple alignment parameters, MATRIX option below for details of the
-+matrix input format.
-+</P>
-+<P>
-+</P>
-+<P>
-+<STRONG>
-+FAST-APPROXIMATE alignment parameters:
-+</STRONG>
-+</P>
-+<P>
-+These similarity scores are calculated from fast, approximate, global align-
-+ments, which are controlled by 4 parameters. 2 techniques are used to make
-+these alignments very fast: 1) only exactly matching fragments (k-tuples) are
-+considered; 2) only the 'best' diagonals (the ones with most k-tuple matches)
-+are used.
-+</P>
-+<P>
-+GAP PENALTY:   This is a penalty for each gap in the fast alignments. It has
-+little effect on the speed or sensitivity except for extreme values.
-+</P>
-+<P>
-+K-TUPLE SIZE:  This is the size of exactly matching fragment that is used. 
-+INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity.
-+For longer sequences (e.g. >1000 residues) you may wish to increase the
-+default.
-+</P>
-+<P>
-+TOP DIAGONALS: The number of k-tuple matches on each diagonal (in an imaginary
-+dot-matrix plot) is calculated. Only the best ones (with most matches) are used
-+in the alignment. This parameter specifies how many. Decrease for speed;
-+increase for sensitivity.
-+</P>
-+<P>
-+WINDOW SIZE:  This is the number of diagonals around each of the 'best' 
-+diagonals that will be used. Decrease for speed; increase for sensitivity.
-+</P>
-+<P>
-+</P>
-+<P>
-+<STRONG>
-+MULTIPLE ALIGNMENT PARAMETERS
-+</STRONG>
-+</P>
-+<P>
-+These parameters control the final multiple alignment. This is the core of the
-+program and the details are complicated. To fully understand the use of the
-+parameters and the scoring system, you will have to refer to the documentation.
-+</P>
-+<P>
-+Each step in the final multiple alignment consists of aligning two alignments 
-+or sequences. This is done progressively, following the branching order in the
-+GUIDE TREE. The basic parameters to control this are two gap penalties and the
-+scores for various identical/non-indentical residues. 
-+</P>
-+<P>
-+The GAP OPENING and EXTENSION PENALTIES can be set here. These control the 
-+cost of opening up every new gap and the cost of every item in a gap.  
-+Increasing the gap opening penalty will make gaps less frequent. Increasing 
-+the gap extension penalty will make gaps shorter. Terminal gaps are not 
-+penalised.
-+</P>
-+<P>
-+The DELAY DIVERGENT SEQUENCES switch delays the alignment of the most distantly
-+related sequences until after the most closely related sequences have  been
-+aligned. The setting shows the percent identity level required to delay the
-+addition of a sequence; sequences that are less identical than this level to
-+any other sequences will be aligned later.
-+</P>
-+<P>
-+The TRANSITION WEIGHT gives transitions (A<-->G or C<-->T i.e. purine-purine or
-+pyrimidine-pyrimidine substitutions) a weight between 0 and 1; a weight of zero
-+means that the transitions are scored as mismatches, while a weight of 1 gives
-+the transitions the match score. For distantly related DNA sequences, the
-+weight should be near to zero; for closely related sequences it can be useful
-+to assign a higher score. The default is set to 0.5.
-+</P>
-+<P>
-+</P>
-+<P>
-+The PROTEIN WEIGHT MATRIX option allows you to choose a series of weight
-+matrices. For protein alignments, you use a weight matrix to determine the
-+similarity of non-identical amino acids. For example, Tyr aligned with Phe is
-+usually judged to be 'better' than Tyr aligned with Pro.
-+</P>
-+<P>
-+There are three 'in-built' series of weight matrices offered. Each consists of
-+several matrices which work differently at different evolutionary distances. To
-+see the exact details, read the documentation. Crudely, we store several
-+matrices in memory, spanning the full range of amino acid distance (from almost
-+identical sequences to highly divergent ones). For very similar sequences, it
-+is best to use a strict weight matrix which only gives a high score to
-+identities and the most favoured conservative substitutions. For more divergent
-+sequences, it is appropriate to use "softer" matrices which give a high score
-+to many other frequent substitutions.
-+</P>
-+<P>
-+1) BLOSUM (Henikoff). These matrices appear to be the best available for 
-+carrying out data base similarity (homology searches). The matrices currently
-+used are: Blosum 80, 62, 45 and 30. BLOSUM was the default in earlier Clustal X
-+versions.
-+</P>
-+<P>
-+2) PAM (Dayhoff). These have been extremely widely used since the late '70s. We
-+currently use the PAM 20, 60, 120, 350 matrices.
-+</P>
-+<P>
-+3) GONNET. These matrices were derived using almost the same procedure as the
-+Dayhoff one (above) but are much more up to date and are based on a far larger
-+data set. They appear to be more sensitive than the Dayhoff series. We
-+currently use the GONNET 80, 120, 160, 250 and 350 matrices. This series is the
-+default for Clustal X version 1.8.
-+</P>
-+<P>
-+We also supply an identity matrix which gives a score of 10 to two identical 
-+amino acids and a score of zero otherwise. This matrix is not very useful.
-+</P>
-+<P>
-+Load protein matrix: allows you to read in a comparison matrix from a file.
-+This can be either a single matrix or a series of matrices (see below for
-+format). 
-+</P>
-+<P>
-+</P>
-+<P>
-+DNA WEIGHT MATRIX option allows you to select a single matrix (not a series)
-+used for aligning nucleic acid sequences. Two hard-coded matrices are available:
-+</P>
-+<P>
-+1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
-+of nucleic acid sequences. X's and N's are treated as matches to any IUB
-+ambiguity symbol. All matches score 1.9; all mismatches for IUB symbols score 0.
-+</P>
-+<P>
-+2) CLUSTALW(1.6). A previous system used by ClustalW, in which matches score
-+1.0 and mismatches score 0. All matches for IUB symbols also score 0.
-+</P>
-+<P>
-+Load DNA matrix: allows you to read in a nucleic acid comparison matrix from a
-+file (just one matrix, not a series).
-+</P>
-+<P>
-+</P>
-+<P>
-+SINGLE MATRIX INPUT FORMAT
-+The format used for a single matrix is the same as the BLAST program. The
-+scores in the new weight matrix should be similarities. You can use negative as
-+well as positive values if you wish, although the matrix will be automatically
-+adjusted to all positive scores, unless the NEGATIVE MATRIX option is selected.
-+Any lines beginning with a # character are assumed to be comments. The first
-+non-comment line should contain a list of amino acids in any order, using the 1
-+letter code, followed by a * character. This should be followed by a square
-+matrix of scores, with one row and one column for each amino acid. The last row
-+and column of the matrix (corresponding to the * character) contain the minimum
-+score over the whole matrix.
-+</P>
-+<P>
-+MATRIX SERIES INPUT FORMAT
-+ClustalX uses different matrices depending on the mean percent identity of the
-+sequences to be aligned. You can specify a series of matrices and the range of
-+the percent identity for each matrix in a matrix series file. The file is
-+automatically recognised by the word CLUSTAL_SERIES at the beginning of the
-+file. Each matrix in the series is then specified on one line which should
-+start with the word MATRIX. This is followed by the lower and upper limits of
-+the sequence percent identities for which you want to apply the matrix. The
-+final entry on the matrix line is the filename of a Blast format matrix file
-+(see above for details of the single matrix file format).
-+</P>
-+<P>
-+Example.
-+</P>
-+<P>
-+CLUSTAL_SERIES
-+</P>
-+<P> 
-+MATRIX 81 100 /us1/user/julie/matrices/blosum80
-+MATRIX 61 80 /us1/user/julie/matrices/blosum62
-+MATRIX 31 60 /us1/user/julie/matrices/blosum45
-+MATRIX 0 30 /us1/user/julie/matrices/blosum30
-+</P>
-+<P>
-+</P>
-+<P>
-+<STRONG>
-+PROTEIN GAP PARAMETERS
-+</STRONG>
-+</P>
-+<P>
-+RESIDUE SPECIFIC PENALTIES are amino acid specific gap penalties that reduce or
-+increase the gap opening penalties at each position in the alignment or 
-+sequence. See the documentation for details. As an example, positions that are
-+rich in glycine are more likely to have an adjacent gap than positions that are
-+rich in valine.
-+</P>
-+<P>
-+HYDROPHILIC GAP PENALTIES are used to increase the chances of a gap within a
-+run (5 or more residues) of hydrophilic amino acids; these are likely to be
-+loop or random coil regions where gaps are more common. The residues that are
-+"considered" to be hydrophilic can be entered in HYDROPHILIC RESIDUES.
-+</P>
-+<P>
-+GAP SEPARATION DISTANCE tries to decrease the chances of gaps being too close
-+to each other. Gaps that are less than this distance apart are penalised more
-+than other gaps. This does not prevent close gaps; it makes them less frequent,
-+promoting a block-like appearance of the alignment.
-+</P>
-+<P>
-+END GAP SEPARATION treats end gaps just like internal gaps for the purposes of
-+avoiding gaps that are too close (set by GAP SEPARATION DISTANCE above). If you
-+turn this off, end gaps will be ignored for this purpose. This is useful when
-+you wish to align fragments where the end gaps are not biologically meaningful.
-+</P>
-+<P>
-+</P>
-+<P>
-+</P>
-+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
-+<CENTER><H2><A NAME="P">                   Profile and Structure Alignments
-+</A></H2></CENTER>
-+<P>
-+</P>
-+<P>   
-+By PROFILE ALIGNMENT, we mean alignment using existing alignments. Profile 
-+alignments allow you to store alignments of your favourite sequences and add
-+new sequences to them in small bunches at a time. A profile is simply an
-+alignment of one or more sequences (e.g. an alignment output file from Clustal
-+X). Each input can be a single sequence. One or both sets of input sequences
-+may include secondary structure assignments or gap penalty masks to guide the
-+alignment. 
-+</P>
-+<P>
-+Make sure PROFILE ALIGNMENT MODE is selected, using the switch directly above
-+the sequence display area. Then, use the ALIGNMENT menu to do profile and
-+secondary structure alignments.
-+</P>
-+<P>
-+The profiles can be in any of the allowed input formats with "-" characters
-+used to specify gaps (except for GCG/MSF where "." is used).
-+</P>
-+<P>
-+You have to load the 2 profiles by choosing FILE, LOAD PROFILE 1 and  LOAD
-+PROFILE 2. Then ALIGNMENT, ALIGN PROFILE 2 TO PROFILE 1 will align the 2
-+profiles to each other. Secondary structure masks in either profile can be used
-+to guide the alignment. This option compares all the sequences in profile 1
-+with all the sequences in profile 2 in order to build guide trees which will be
-+used to calculate sequence weights, and select appropriate alignment parameters
-+for the final profile alignment.
-+</P>
-+<P>
-+You can skip the first stage (pairwise alignments; guide trees) by using old
-+guide tree files (ALIGN PROFILES FROM GUIDE TREES). 
-+</P>
-+<P>
-+The ALIGN SEQUENCES TO PROFILE 1 option will take the sequences in the second
-+profile and align them to the first profile, 1 at a time.  This is useful to
-+add some new sequences to an existing alignment, or to align a set of sequences
-+to a known structure. In this case, the second profile set need not be
-+pre-aligned.
-+</P>
-+<P>
-+You can skip the first stage (pairwise alignments; guide tree) by using an old
-+guide tree file (ALIGN SEQUENCES TO PROFILE 1 FROM TREE). 
-+</P>
-+<P>
-+SAVE LOG FILE will write the alignment calculation scores to a file. The log
-+filename is the same as the input sequence filename, with an extension .log
-+appended.
-+</P>
-+<P>
-+The alignment parameters can be set using the ALIGNMENT PARAMETERS menu,
-+Pairwise Parameters, Multiple Parameters and Protein Gap Parameters options.
-+These are EXACTLY the same parameters as used by the general, automatic
-+multiple alignment procedure. The general multiple alignment procedure is
-+simply a series of profile alignments. Carrying out a series of profile
-+alignments on larger and larger groups of sequences, allows you to manually
-+build up a complete alignment, if necessary editing intermediate alignments.
-+</P>
-+<P>
-+<STRONG>
-+SECONDARY STRUCTURE PARAMETERS
-+</STRONG>
-+</P>
-+<P>
-+Use this menu to set secondary structure options. If a solved structure is
-+known, it can be used to guide the alignment by raising gap penalties within
-+secondary structure elements, so that gaps will preferentially be inserted into
-+unstructured surface loop regions. Alternatively, a user-specified gap penalty
-+mask can be supplied for a similar purpose.
-+</P>
-+<P>
-+A gap penalty mask is a series of numbers between 1 and 9, one per position in 
-+the alignment. Each number specifies how much the gap opening penalty is to be 
-+raised at that position (raised by multiplying the basic gap opening penalty
-+by the number) i.e. a mask figure of 1 at a position means no change
-+in gap opening penalty; a figure of 4 means that the gap opening penalty is
-+four times greater at that position, making gaps 4 times harder to open.
-+</P>
-+<P>
-+The format for gap penalty masks and secondary structure masks is explained in
-+a separate help section.
-+</P>
-+<P>
-+</P>
-+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
-+<CENTER><H2><A NAME="B">            Secondary Structure / Gap Penalty Masks
-+</A></H2></CENTER>
-+<P>
-+</P>
-+<P>
-+The use of secondary structure-based penalties has been shown to improve  the
-+accuracy of sequence alignment. Clustal X now allows secondary structure/ gap
-+penalty masks to be supplied with the input sequences used during profile
-+alignment. (NB. The secondary structure information is NOT used during multiple
-+sequence alignment). The masks work by raising gap penalties in specified
-+regions (typically secondary structure elements) so that gaps are
-+preferentially opened in the less well conserved regions (typically surface
-+loops).
-+</P>
-+<P>
-+The USE PROFILE 1(2) SECONDARY STRUCTURE / GAP PENALTY MASK options control
-+whether the input 2D-structure information or gap penalty masks will be used
-+during the profile alignment.
-+</P>
-+<P>
-+The OUTPUT options control whether the secondary structure and gap penalty
-+masks should be included in the Clustal X output alignments. Showing both is
-+useful for understanding how the masks work. The 2D-structure information is
-+itself useful in judging the alignment quality and in seeing how residue
-+conservation patterns vary with secondary structure. 
-+</P>
-+<P>
-+The HELIX and STRAND GAP PENALTY options provide the value for raising the gap
-+penalty at core Alpha Helical (A) and Beta Strand (B) residues. In CLUSTAL
-+format, capital residues denote the A and B core structure notation. Basic gap
-+penalties are multiplied by the amount specified.
-+</P>
-+<P>
-+The LOOP GAP PENALTY option provides the value for the gap penalty in Loops.
-+By default this penalty is not raised. In CLUSTAL format, loops are specified
-+by "." in the secondary structure notation.
-+</P>
-+<P>
-+The SECONDARY STRUCTURE TERMINAL PENALTY provides the value for setting the gap
-+penalty at the ends of secondary structures. Ends of secondary structures are
-+known to grow or shrink, comparing related structures. Therefore by default
-+these are given intermediate values, lower than the core penalties. All
-+secondary structure read in as lower case in CLUSTAL format gets the reduced
-+terminal penalty.
-+</P>
-+<P>
-+The HELIX and STRAND TERMINAL POSITIONS options specify the range of structure
-+termini for the intermediate penalties. In the alignment output, these are
-+indicated as lower case. For Alpha Helices, by default, the range spans the 
-+end-helical turn (3 residues). For Beta Strands, the default range spans the
-+end residue and the adjacent loop residue, since sequence conservation often
-+extends beyond the actual H-bonded Beta Strand.
-+</P>
-+<P>
-+Clustal X can read the masks from SWISS-PROT, CLUSTAL or GDE format input
-+files. For many 3-D protein structures, secondary structure information is
-+recorded in the feature tables of SWISS-PROT database entries. You should
-+always check that the assignments are correct - some are quite inaccurate.
-+Clustal X looks for SWISS-PROT HELIX and STRAND assignments e.g.
-+</P>
-+<P>
-+</P>
-+<P>
-+<PRE>
-+FT   HELIX       100    115
-+FT   STRAND      118    119
-+</PRE>
-+</P>
-+<P>
-+The structure and penalty masks can also be read from CLUSTAL alignment format 
-+as comment lines beginning "!SS_" or "!GM_" e.g.
-+</P>
-+<P>
-+<PRE>
-+!SS_HBA_HUMA    ..aaaAAAAAAAAAAaaa.aaaAAAAAAAAAAaaaaaaAaaa.........aaaAAAAAA
-+!GM_HBA_HUMA    112224444444444222122244444444442222224222111111111222444444
-+HBA_HUMA        VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
-+</PRE>
-+</P>
-+<P>
-+Note that the mask itself is a set of numbers between 1 and 9 each of which is 
-+assigned to the residue(s) in the same column below. 
-+</P>
-+<P>
-+In GDE flat file format, the masks are specified as text and the names must
-+begin with "SS_ or "GM_.
-+</P>
-+<P>
-+Either a structure or penalty mask or both may be used. If both are included
-+in an alignment, the user will be asked which is to be used.
-+</P>
-+<P>
-+</P>
-+<P>
-+</P>
-+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
-+<CENTER><H2><A NAME="T">                            Phylogenetic Trees
-+</A></H2></CENTER>
-+<P>
-+</P>
-+<P>
-+Before calculating a tree, you must have an ALIGNMENT in memory. This can be
-+input using the FILE menu, LOAD SEQUENCES option or you should have just
-+carried out a full multiple alignment and the alignment is still in memory.
-+Remember YOU MUST ALIGN THE SEQUENCES FIRST!!!!
-+</P>
-+<P>
-+The method used is the NJ (Neighbour Joining) method of Saitou and Nei. First
-+you calculate distances (percent divergence) between all pairs of sequence from
-+a multiple alignment; second you apply the NJ method to the distance matrix.
-+</P>
-+<P>
-+To calculate a tree, use the DRAW N-J TREE option. This gives an UNROOTED tree
-+and all branch lengths. The root of the tree can only be inferred by using an
-+outgroup (a sequence that you are certain branches at the outside of the tree
-+.... certain on biological grounds) OR if you assume a degree of constancy in
-+the 'molecular clock', you can place the root in the 'middle' of the tree
-+(roughly equidistant from all tips).
-+</P>
-+<P>
-+BOOTSTRAP N-J TREE uses a method for deriving confidence values for the 
-+groupings in a tree (first adapted for trees by Joe Felsenstein). It involves
-+making N random samples of sites from the alignment (N should be LARGE, e.g.
-+500 - 1000); drawing N trees (1 from each sample) and counting how many times
-+each grouping from the original tree occurs in the sample trees. You can set N
-+using the NUMBER OF BOOTSTRAP TRIALS option in the BOOTSTRAP TREE window. In
-+practice, you should use a large number of bootstrap replicates (1000 is
-+recommended, even if it means running the program for an hour on a slow 
-+computer). You can also supply a seed number for the random number generator
-+here. Different runs with the same seed will give the same answer. See the
-+documentation for more details.
-+</P>
-+<P>
-+EXCLUDE POSITIONS WITH GAPS? With this option, any alignment positions where
-+ANY of the sequences have a gap will be ignored. This means that 'like' will
-+be compared to 'like' in all distances, which is highly desirable. It also
-+automatically throws away the most ambiguous parts of the alignment, which are
-+concentrated around gaps (usually). The disadvantage is that you may throw away
-+much of the data if there are many gaps (which is why it is difficult for us to
-+make it the default).  
-+</P>
-+<P>
-+CORRECT FOR MULTIPLE SUBSTITUTIONS? For small divergence (say <10%) this option
-+makes no difference. For greater divergence, this option corrects for the fact
-+that observed distances underestimate actual evolutionary distances. This is
-+because, as sequences diverge, more than one substitution will happen at many
-+sites. However, you only see one difference when you look at the present day
-+sequences. Therefore, this option has the effect of stretching branch lengths
-+in trees (especially long branches). The corrections used here (for DNA or
-+proteins) are both due to Motoo Kimura. See the documentation for details.  
-+</P>
-+<P>
-+Where possible, this option should be used. However, for VERY divergent
-+sequences, the distances cannot be reliably corrected. You will be warned if
-+this happens. Even if none of the distances in a data set exceed the reliable
-+threshold, if you bootstrap the data, some of the bootstrap distances may
-+randomly exceed the safe limit.  
-+</P>
-+<P>
-+SAVE LOG FILE will write the tree calculation scores to a file. The log
-+filename is the same as the input sequence filename, with an extension .log
-+appended.
-+</P>
-+<P>
-+<H4>
-+OUTPUT FORMAT OPTIONS
-+</H4>
-+</P>
-+<P>
-+Three different formats are allowed. None of these displays the tree visually.
-+You can display the tree using the NJPLOT program distributed with Clustal X
-+OR get the PHYLIP package and use the tree drawing facilities there. 
-+</P>
-+<P> 
-+1) CLUSTAL FORMAT TREE. This format is verbose and lists all of the distances
-+between the sequences and the number of alignment positions used for each. The
-+tree is described at the end of the file. It lists the sequences that are
-+joined at each alignment step and the branch lengths. After two sequences are
-+joined, it is referred to later as a NODE. The number of a NODE is the number
-+of the lowest sequence in that NODE.   
-+</P>
-+<P>
-+2) PHYLIP FORMAT TREE. This format is the New Hampshire format, used by many
-+phylogenetic analysis packages. It consists of a series of nested parentheses,
-+describing the branching order, with the sequence names and branch lengths. It
-+can be read by the NJPLOT program distributed with ClustalX. It can also be
-+used by the RETREE, DRAWGRAM and DRAWTREE programs of the PHYLIP package to see
-+the trees graphically. This is the same format used during multiple alignment
-+for the guide trees. Some other packages that can read and display New
-+Hampshire format are TreeTool, TreeView, and Phylowin.
-+</P>
-+<P>
-+3) PHYLIP DISTANCE MATRIX. This format just outputs a matrix of all the
-+pairwise distances in a format that can be used by the PHYLIP package. It used
-+to be useful when one could not produce distances from protein sequences in the
-+Phylip package but is now redundant (PROTDIST of Phylip 3.5 now does this).
-+</P>
-+<P>
-+4) NEXUS FORMAT TREE. This format is used by several popular phylogeny programs,
-+including PAUP and MacClade. The format is described fully in:
-+Maddison, D. R., D. L. Swofford and W. P. Maddison.  1997.
-+NEXUS: an extensible file format for systematic information.
-+Systematic Biology 46:590-621.
-+</P>
-+<P>
-+BOOTSTRAP LABELS ON: By default, the bootstrap values are correctly placed on
-+the tree branches of the phylip format output tree. The toggle allows them to
-+be placed on the nodes, which is incorrect, but some display packages (e.g.
-+TreeTool, TreeView and Phylowin) only support node labelling but not branch
-+labelling. Care should be taken to note which branches and labels go together. 
-+</P>
-+<P>
-+</P>
-+<P>
-+</P>
-+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
-+<CENTER><H2><A NAME="C">                               Colors
-+</A></H2></CENTER>
-+<P>
-+</P>
-+<P>
-+Clustal X provides a versatile coloring scheme for the sequence alignment 
-+display. The sequences (or profiles) are colored automatically, when they are
-+loaded. Sequences can be colored either by assigning a color to specific
-+residues, or on the basis of an alignment consensus. In the latter case, the
-+alignment consensus is calculated automatically, and the residues in each
-+column are colored according to the consensus character assigned to that
-+column. In this way, you can choose to highlight, for example, conserved
-+hydrophylic or hydrophobic positions in the alignment.
-+</P>
-+<P>
-+The 'rules' used to color the alignment are specified in a COLOR PARAMETER
-+FILE. Clustal X automatically looks for a file called 'colprot.par' for protein
-+sequences or 'coldna.par' for DNA, in the current directory. (If your running
-+under UNIX, it then looks in your home directory, and finally in the
-+directories in your PATH environment variable).
-+</P>
-+<P>
-+By default, if no color parameter file is found, protein sequences are colored
-+by residue as follows:
-+</P>
-+<P>
-+<PRE>
-+	Color			Residue Code
-+</P>
-+<P>
-+	ORANGE			GPST
-+	RED			HKR
-+	BLUE			FWY
-+	GREEN			ILMV
-+</PRE>
-+</P>
-+<P>
-+In the case of DNA sequences, the default colors are as follows:
-+</P>
-+<P>
-+<PRE>
-+	Color			Residue Code
-+</P>
-+<P>
-+	ORANGE			A
-+	RED			C
-+	BLUE			T
-+	GREEN			G
-+</PRE>
-+</P>
-+<P>
-+</P>
-+<P>
-+The default BACKGROUND COLORING option shows the sequence residues using a
-+black character on a colored background. It can be switched off to show
-+residues as a colored character on a white background. 
-+</P>
-+<P>
-+Either BLACK AND WHITE or DEFAULT COLOR options can be selected. The Color
-+option looks first for the color parameter file (as described above) and, if no
-+file is found, uses the default residue-specific colors.
-+</P>
-+<P>
-+You can specify your own coloring scheme by using the LOAD COLOR PARAMETER FILE
-+option. The format of the color parameter file is described below.
-+</P>
-+<P>
-+<H4>
-+COLOR PARAMETER FILE
-+</H4>
-+</P>
-+<P>
-+This file is divided into 3 sections:
-+</P>
-+<P>
-+1) the names and rgb values of the colors
-+2) the rules for calculating the consensus
-+3) the rules for assigning colors to the residues
-+</P>
-+<P> 
-+An example file is given here.
-+</P>
-+<P>
-+<PRE>
-+ --------------------------------------------------------------------
-+ at rgbindex
-+RED          0.9 0.1 0.1
-+BLUE         0.1 0.1 0.9
-+GREEN        0.1 0.9 0.1
-+YELLOW       0.9 0.9 0.0
-+</P>
-+<P>
-+ at consensus
-+% = 60% w:l:v:i:m:a:f:c:y:h:p
-+# = 80% w:l:v:i:m:a:f:c:y:h:p
-+- = 50% e:d
-++ = 60% k:r
-+q = 50% q:e
-+p = 50% p
-+n = 50% n
-+t = 50% t:s
-+</P>
-+<P>
-+ at color
-+g = RED
-+p = YELLOW
-+t = GREEN if t:%:#
-+n = GREEN if n
-+w = BLUE if %:#:p
-+k = RED if +
-+ --------------------------------------------------------------------
-+</PRE>
-+</P>
-+<P>
-+The first section is optional and is identified by the header @rgbindex. If
-+this section exists, each color used in the file must be named and the rgb
-+values specified (on a scale from 0 to 1). If the rgb index section is not
-+found, the following set of hard-coded colors will be used.
-+</P>
-+<P>
-+<PRE>
-+RED          0.9 0.1 0.1
-+BLUE         0.1 0.1 0.9
-+GREEN        0.1 0.9 0.1
-+ORANGE       0.9 0.7 0.3
-+CYAN         0.1 0.9 0.9
-+PINK         0.9 0.5 0.5
-+MAGENTA      0.9 0.1 0.9
-+YELLOW       0.9 0.9 0.0
-+</PRE>
-+</P>
-+<P>
-+The second section is optional and is identified by the header @consensus. It
-+defines how the consensus is calculated.
-+</P>
-+<P> 
-+The format of each consensus parameter is:-
-+</P>
-+<P> 
-+<PRE>
-+c = n% residue_list
-+</P>
-+<P> 
-+        where
-+              c             is a character used to identify the parameter.
-+              n             is an integer value used as the percentage cutoff
-+                            point.
-+              residue_list  is a list of residues denoted by a single
-+                            character, delimited by a colon (:).
-+</PRE>
-+</P>
-+<P> 
-+For example:   # = 60% w:l:v:i
-+</P>
-+<P>
-+will assign a consensus character # to any column in the alignment which
-+contains more than 60% of the residues w,l,v and i.
-+</P>
-+<P>        
-+</P>
-+<P> 
-+The third section is identified by the header @color, and defines how colors
-+are assigned to each residue in the alignment.
-+</P>
-+<P> 
-+The color parameters can take one of two formats:
-+</P>
-+<P>
-+<PRE>
-+1) r = color
-+2) r = color if consensus_list
-+</P>
-+<P> 
-+        where
-+              r             is a character used to denote a residue.
-+              color         is one of the colors in the GDE color lookup table.
-+              residue_list  is a list of residues denoted by a single
-+                            character, delimited by a colon (:).
-+</PRE>
-+</P>
-+<P> 
-+Examples:
-+1) g = ORANGE
-+</P>
-+<P>
-+will color all glycines ORANGE, regardless of the consensus.
-+</P>
-+<P>
-+2) w = BLUE if w:%:#
-+</P>
-+<P>
-+will color BLUE any tryptophan which is found in a column with a consensus of
-+w, % or #.
-+</P>
-+<P> 
-+</P>
-+<P>
-+</P>
-+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
-+<CENTER><H2><A NAME="Q">                       Alignment Quality Analysis
-+</A></H2></CENTER>
-+<P>
-+</P>
-+<P>
-+<H3>
-+QUALITY SCORES
-+</H3>
-+</P>
-+<P>
-+Clustal X provides an indication of the quality of an alignment by plotting
-+a 'conservation score' for each column of the alignment. A high score indicates
-+a well-conserved column; a low score indicates low conservation. The quality
-+curve is drawn below the alignment.
-+</P>
-+<P>
-+Two methods are also provided to indicate single residues or sequence segments
-+which score badly in the alignment.
-+</P>
-+<P> 
-+Low-scoring residues are expected to occur at a moderate frequency in all the
-+sequences because of their steady divergence due to the natural processes of
-+evolution. The most divergent sequences are likely to have the most outliers.
-+However, the highlighted residues are especially useful in pointing to
-+sequence misalignments. Note that clustering of highlighted residues is a
-+strong indication of misalignment. This can arise due to various reasons, for
-+example:
-+</P>
-+<P> 
-+        1. Partial or total misalignments caused by a failure in the
-+        alignment algorithm. Usually only in difficult alignment cases.
-+</P>
-+<P> 
-+        2. Partial or total misalignments because at least one of the
-+        sequences in the given set is partly or completely unrelated to the
-+        other sequences. It is up to the user to check that the set of
-+        sequences are alignable.
-+</P>
-+<P>
-+        3. Frameshift translation errors in a protein sequence causing local
-+        mismatched regions to be heavily highlighted. These are surprisingly
-+        common in database entries. If suspected, a 3-frame translation of
-+        the source DNA needs to be examined.
-+</P>
-+<P> 
-+Occasionally, highlighted residues may point to regions of some biological
-+significance. This might happen for example if a protein alignment contains a
-+sequence which has acquired new functions relative to the main sequence set. It
-+is important to exclude other explanations, such as error or the natural
-+divergence of sequences, before invoking a biological explanation.
-+</P>
-+<P>
-+</P>
-+<P>
-+<H3>
-+LOW-SCORING SEGMENTS
-+</H3>
-+</P>
-+<P>
-+Unreliable regions in the alignment can be highlighted using the Low-Scoring
-+Segments option. A sequence-weighted profile is used to indicate any segments
-+in the sequences which score badly. Because the profile calculation may take
-+some time, an option is provided to calculate LOW-SCORING SEGMENTS. The 
-+segment display can then be toggled on or off without having to repeat the
-+time-consuming calculations.
-+</P>
-+<P>
-+For details of the low-scoring segment calculation, see the CALCULATION section
-+below.
-+</P>
-+<P>
-+</P>
-+<P>
-+<H4>
-+LOW-SCORING SEGMENT PARAMETERS
-+</H4>
-+</P>
-+<P>
-+MINIMUM LENGTH OF SEGMENTS: short segments (or even single residues) can be
-+hidden by increasing the minimum length of segments which will be displayed.
-+</P>
-+<P>
-+DNA MARKING SCALE is used to remove less significant segments from the 
-+highlighted display. Increase the scale to display more segments; decrease the
-+scale to remove the least significant.
-+</P>
-+<P>
-+</P>
-+<P>
-+PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of each
-+amino acid to each other. The matrix is used to calculate the sequence-
-+weighted profile scores. There are four 'in-built' Log-Odds matrices offered:
-+the Gonnet PAM 80, 120, 250, 350 matrices. A more stringent matrix which only
-+gives a high score to identities and the most favoured conservative
-+substitutions, may be more suitable when the sequences are closely related. For
-+more divergent sequences, it is appropriate to use "softer" matrices which give
-+a high score to many other frequent substitutions. This  option automatically
-+recalculates the low-scoring segments.
-+</P>
-+<P>
-+</P>
-+<P>
-+DNA WEIGHT MATRIX: Two hard-coded matrices are available:
-+</P>
-+<P>
-+1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
-+of nucleic acid sequences. X's and N's are treated as matches to any IUB
-+ambiguity symbol. All matches score 1.0; all mismatches for IUB symbols score
-+0.9.
-+</P>
-+<P>
-+2) CLUSTALW(1.6). The previous system used by ClustalW, in which matches score
-+1.0 and mismatches score 0. All matches for IUB symbols also score 0. 
-+</P>
-+<P>
-+A new matrix can be read from a file on disk, if the filename consists only
-+of lower case characters. The values in the new weight matrix should be
-+similarities and should be NEGATIVE for infrequent substitutions.
-+</P>
-+<P> 
-+INPUT FORMAT. The format used for a new matrix is the same as the BLAST
-+program. Any lines beginning with a # character are assumed to be comments. The
-+first non-comment line should contain a list of amino acids in any order, using
-+the 1 letter code, followed by a * character. This should be followed by a
-+square matrix of scores, with one row and one column for each amino acid. The
-+last row and column of the matrix (corresponding to the * character) contain
-+the minimum score over the whole matrix.
-+</P>
-+<P>
-+<H4>
-+QUALITY SCORE PARAMETERS
-+</H4>
-+</P>
-+<P>
-+You can customise the column 'quality scores' plotted underneath the alignment
-+display using the following options.
-+</P>
-+<P>
-+SCORE PLOT SCALE: this is a scalar value from 1 to 10, which can be used to
-+change the scale of the quality score plot. 
-+</P>
-+<P>
-+RESIDUE EXCEPTION CUTOFF: this is a scalar value from 1 to 10, which can be
-+used to change the number of residue exceptions which are highlighted in the
-+alignment display. (For an explanation of this cutoff, see the CALCULATION OF
-+RESIDUE EXCEPTIONS section below.)
-+</P>
-+<P>
-+PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of 
-+each amino acid to each other. 
-+</P>
-+<P> 
-+DNA WEIGHT MATRIX: two hard-coded matrices are available: IUB and CLUSTALW(1.6).
-+</P>
-+<P>
-+For more information about the weight matrices, see the help above for
-+the Low-scoring Segments Weight Matrix.
-+</P>
-+<P>
-+For details of the quality score calculations, see the CALCULATION section
-+below.
-+</P>
-+<P>
-+</P>
-+<P>
-+<STRONG>
-+SHOW LOW-SCORING SEGMENTS
-+</STRONG>
-+</P>
-+<P>                       
-+The low-scoring segment display can be toggled on or off. This option does not
-+recalculate the profile scores.
-+</P>
-+<P>
-+</P>
-+<P>
-+<STRONG>
-+SHOW EXCEPTIONAL RESIDUES
-+</STRONG>
-+</P>
-+<P>                       
-+This option highlights individual residues which score badly in the alignment
-+quality calculations. Residues which score exceptionally low are highlighted by
-+using a white character on a grey background.
-+</P>
-+<P>
-+<STRONG>
-+SAVE QUALITY SCORES TO FILE
-+</STRONG>
-+</P>
-+<P>
-+The quality scores that are plotted underneath the alignment display can also
-+be saved in a text file. Each column in the alignment is written on one line in
-+the output file, with the value of the quality score at the end of the line.
-+Only the sequences currently selected in the display are written to the file.
-+One use for quality scores is to color residues in a protein structure by
-+sequence conservation. In this way conserved surface residues can be
-+highlighted to locate functional regions such as ligand-binding sites.
-+</P>
-+<P>
-+</P>
-+<P>
-+<H3>
-+CALCULATION OF QUALITY SCORES
-+</H3>
-+</P>
-+<P>
-+Suppose we have an alignment of m sequences of length n. Then, the alignment
-+can be written as:
-+</P>
-+<P>
-+<PRE>
-+        A11 A12 A13 .......... A1n
-+        A21 A22 A23 .......... A2n
-+        .
-+        .
-+        Am1 Am2 Am3 .......... Amn
-+</PRE>
-+</P>
-+<P>
-+We also have a residue comparison matrix of size R where C(i,j) is the score
-+for aligning residue i with residue j.
-+</P>
-+<P>
-+We want to calculate a score for the conservation of the jth position in the
-+alignment.
-+</P>
-+<P>
-+To do this, we define an R-dimensional sequence space. For the jth position in 
-+the alignment, each sequence consists of a single residue which is assigned a
-+point S in the space. S has R dimensions, and for sequence i, the rth dimension
-+is defined as:
-+</P>
-+<P>
-+<PRE>
-+	Sr =    C(r,Aij)
-+</PRE>
-+</P>
-+<P>
-+We then calculate a consensus value for the jth position in the alignment. This
-+value X also has R dimensions, and the rth dimension is defined as:
-+</P>
-+<P>
-+<PRE>
-+	Xr = (   SUM   (Fij * C(i,r)) ) / m
-+               1<=i<=R
-+</PRE>
-+</P>
-+<P>
-+where Fij is the count of residues i at position j in the alignment.
-+</P>
-+<P>
-+Now we can calculate the distance Di between each sequence i and the consensus 
-+position X in the R-dimensional space.
-+</P>
-+<P>
-+<PRE>
-+	Di = SQRT   (   SUM   (Xr - Sr)(Xr - Sr) )
-+                      1<=i<=R
-+</P>
-+<P>
-+</PRE>
-+</P>
-+<P>
-+The quality score for the jth position in the alignment is defined as the mean
-+of the sequence distances Di.
-+</P>
-+<P>
-+The score is normalised by multiplying by the percentage of sequences which
-+have residues (and not gaps) at this position.
-+</P>
-+<P>
-+<H3>
-+CALCULATION OF RESIDUE EXCEPTIONS
-+</H3>
-+</P>
-+<P>
-+The jth residue of the ith sequence is considered as an exception if the
-+distance Di of the sequence from the consensus value P is greater than (Upper
-+Quartile + Inter Quartile Range * Cutoff). The value used as a cutoff for
-+displaying exceptions can be set from the SCORE PARAMETERS menu. A high cutoff
-+value will only display very significant exceptions; a low value will allow
-+more, less significant, exceptions to be highlighted.
-+</P>
-+<P>
-+(NB. Sequences which contain gaps at this position are not included in the
-+exception calculation.)
-+</P>
-+<P>
-+</P>
-+<P>
-+<H3>
-+CALCULATION OF LOW-SCORING SEGMENTS
-+</H3>
-+</P>
-+<P>
-+Suppose we have an alignment of m sequences of length n. Then, the alignment
-+can be written as:
-+</P>
-+<P>
-+<PRE>
-+        A11 A12 A13 .......... A1n
-+        A21 A22 A23 .......... A2n
-+        .
-+        .
-+        Am1 Am2 Am3 .......... Amn
-+</PRE>
-+</P>
-+<P>
-+We also have a residue comparison matrix of size R where C(i,j) is the score
-+for aligning residue i with residue j.
-+</P>
-+<P>
-+We calculate sequence weights by building a neighbour-joining tree, in which
-+branch lengths are proportional to divergence. Summing the branches by branch
-+ownership provides the weights. See (Thompson et al., CABIOS, 10, 19 (1994) and
-+Henikoff et al.,JMB, 243, 574 1994).
-+</P>
-+<P>
-+To find the low-scoring segments in a sequence Si, we build a weighted profile
-+of the remaining sequences in the alignment. Suppose we find residue r at 
-+position j in the sequence; then the score for the jth position in the sequence
-+is defined as
-+</P>
-+<P>
-+<PRE>
-+	Score(Si,j) = Profile(j,r)   where Profile(j,r) is the profile score
-+                                       for residue r at position j in the
-+                                       alignment.
-+</PRE>
-+</P>
-+<P>
-+These residue scores are summed along the sequence in both forward and backward
-+directions. If the sum of the scores is positive, then it is reset to zero.
-+Segments which score negatively in both directions are considered as 
-+'low-scoring' and will be highlighted in the alignment display.
-+</P>
-+<P>
-+</P>
-+<P>
-+</P>
-+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
-+<CENTER><H2><A NAME="9">              Command Line Parameters
-+</A></H2></CENTER>
-+<CENTER><H3>                DATA (sequences)
-+</H3></CENTER>
-+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
-+<TR>
-+<TD><STRONG>Parameter</STRONG></TD>
-+<TD><STRONG><EM>Description</EM></STRONG></TD>
-+</TR>
-+<TR>
-+<TD><TT>-PROFILE1=file.ext  and  -PROFILE2=file.ext  </TT></TD>
-+<TD><EM>profiles (aligned sequences)</EM></TD>
-+</TR>
-+</TABLE></CENTER>
-+<CENTER><H3>                VERBS (do things)
-+</H3></CENTER>
-+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
-+<TR>
-+<TD><STRONG>Parameter</STRONG></TD>
-+<TD><STRONG><EM>Description</EM></STRONG></TD>
-+</TR>
-+<TR>
-+<TD><TT>-HELP  or -CHECK    </TT></TD>
-+<TD><EM>outline the command line parameters</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-ALIGN              </TT></TD>
-+<TD><EM>do full multiple alignment </EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-TREE               </TT></TD>
-+<TD><EM>calculate NJ tree</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-BOOTSTRAP(=n)      </TT></TD>
-+<TD><EM>bootstrap a NJ tree (n= number of bootstraps; def. = 1000)</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-CONVERT            </TT></TD>
-+<TD><EM>output the input sequences in a different file format</EM></TD>
-+</TR>
-+</TABLE></CENTER>
-+<CENTER><H3>                PARAMETERS (set things)
-+</H3></CENTER>
-+<CENTER><P><STRONG>***General settings:****
-+</STRONG></P></CENTER>
-+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
-+<TR>
-+<TD><STRONG>Parameter</STRONG></TD>
-+<TD><STRONG><EM>Description</EM></STRONG></TD>
-+</TR>
-+<TR>
-+<TD><TT>-INTERACTIVE </TT></TD>
-+<TD><EM>read command line, then enter normal interactive menus</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-QUICKTREE   </TT></TD>
-+<TD><EM>use FAST algorithm for the alignment guide tree</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-TYPE=       </TT></TD>
-+<TD><EM>PROTEIN or DNA sequences</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-NEGATIVE    </TT></TD>
-+<TD><EM>protein alignment with negative values in matrix</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-OUTFILE=    </TT></TD>
-+<TD><EM>sequence alignment file name</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-OUTPUT=     </TT></TD>
-+<TD><EM>GCG, GDE, PHYLIP, PIR or NEXUS</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-OUTORDER=   </TT></TD>
-+<TD><EM>INPUT or ALIGNED</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-CASE=       </TT></TD>
-+<TD><EM>LOWER or UPPER (for GDE output only)</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-SEQNOS=     </TT></TD>
-+<TD><EM>OFF or ON (for Clustal output only)</EM></TD>
-+</TR>
-+</TABLE></CENTER>
-+<CENTER><H3>***Fast Pairwise Alignments:***
-+</H3></CENTER>
-+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
-+<TR>
-+<TD><STRONG>Parameter</STRONG></TD>
-+<TD><STRONG><EM>Description</EM></STRONG></TD>
-+</TR>
-+<TR>
-+<TD><TT>-TOPDIAGS=n  </TT></TD>
-+<TD><EM>number of best diags.</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-WINDOW=n    </TT></TD>
-+<TD><EM>window around best diags.</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-PAIRGAP=n   </TT></TD>
-+<TD><EM>gap penalty</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-SCORE=      </TT></TD>
-+<TD><EM>PERCENT or ABSOLUTE</EM></TD>
-+</TR>
-+</TABLE></CENTER>
-+<CENTER><H3>***Slow Pairwise Alignments:***
-+</H3></CENTER>
-+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
-+<TR>
-+<TD><STRONG>Parameter</STRONG></TD>
-+<TD><STRONG><EM>Description</EM></STRONG></TD>
-+</TR>
-+<TR>
-+<TD><TT>-PWDNAMATRIX= </TT></TD>
-+<TD><EM>DNA weight matrix=IUB, CLUSTALW or filename</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-PWGAPOPEN=f  </TT></TD>
-+<TD><EM>gap opening penalty</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-PWGAPEXT=f  </TT></TD>
-+<TD><EM>gap opening penalty</EM></TD>
-+</TR>
-+</TABLE></CENTER>
-+<CENTER><H3>***Multiple Alignments:***
-+</H3></CENTER>
-+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
-+<TR>
-+<TD><STRONG>Parameter</STRONG></TD>
-+<TD><STRONG><EM>Description</EM></STRONG></TD>
-+</TR>
-+<TR>
-+<TD><TT>-USETREE=    </TT></TD>
-+<TD><EM>file for old guide tree</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-MATRIX=     </TT></TD>
-+<TD><EM>Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-DNAMATRIX=  </TT></TD>
-+<TD><EM>DNA weight matrix=IUB, CLUSTALW or filename</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-GAPOPEN=f   </TT></TD>
-+<TD><EM>gap opening penalty</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-GAPEXT=f  </TT></TD>
-+<TD><EM>gap extension penalty</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-ENDGAPS     </TT></TD>
-+<TD><EM>no end gap separation pen.</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-GAPDIST=n   </TT></TD>
-+<TD><EM>gap separation pen. range</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-NOPGAP      </TT></TD>
-+<TD><EM>residue-specific gaps off</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-NOHGAP    </TT></TD>
-+<TD><EM>hydrophilic gaps off</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-HGAPRESIDUES= </TT></TD>
-+<TD><EM>list hydrophilic res.</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-MAXDIV=n    </TT></TD>
-+<TD><EM>% ident. for delay</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-TYPE=       </TT></TD>
-+<TD><EM>PROTEIN or DNA</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-TRANSWEIGHT=f </TT></TD>
-+<TD><EM>transitions weighting</EM></TD>
-+</TR>
-+</TABLE></CENTER>
-+<CENTER><H3>***Profile Alignments:***
-+</H3></CENTER>
-+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
-+<TR>
-+<TD><STRONG>Parameter</STRONG></TD>
-+<TD><STRONG><EM>Description</EM></STRONG></TD>
-+</TR>
-+<TR>
-+<TD><TT>-NEWTREE1=    </TT></TD>
-+<TD><EM>file for new guide tree for profile1</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-NEWTREE2=    </TT></TD>
-+<TD><EM>file for new guide tree for profile2</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-USETREE1=    </TT></TD>
-+<TD><EM>file for old guide tree for profile1</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-USETREE2=    </TT></TD>
-+<TD><EM>file for old guide tree for profile2</EM></TD>
-+</TR>
-+</TABLE></CENTER>
-+<CENTER><H3>***Sequence to Profile Alignments:***
-+</H3></CENTER>
-+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
-+<TR>
-+<TD><STRONG>Parameter</STRONG></TD>
-+<TD><STRONG><EM>Description</EM></STRONG></TD>
-+</TR>
-+<TR>
-+<TD><TT>-NEWTREE=    </TT></TD>
-+<TD><EM>file for new guide tree</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-USETREE=    </TT></TD>
-+<TD><EM>file for old guide tree</EM></TD>
-+</TR>
-+</TABLE></CENTER>
-+<CENTER><H3>***Structure Alignments:***
-+</H3></CENTER>
-+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
-+<TR>
-+<TD><STRONG>Parameter</STRONG></TD>
-+<TD><STRONG><EM>Description</EM></STRONG></TD>
-+</TR>
-+<TR>
-+<TD><TT>-NOSECSTR2     </TT></TD>
-+<TD><EM>do not use secondary structure/gap penalty mask for profile 2</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-SECSTROUT=STRUCTURE or MASK or BOTH or NONE  </TT></TD>
-+<TD><EM>output in alignment file</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-HELIXGAP=n    </TT></TD>
-+<TD><EM>gap penalty for helix core residues </EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-STRANDGAP=n   </TT></TD>
-+<TD><EM>gap penalty for strand core residues</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-LOOPGAP=n     </TT></TD>
-+<TD><EM>gap penalty for loop regions</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-TERMINALGAP=n </TT></TD>
-+<TD><EM>gap penalty for structure termini</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-HELIXENDIN=n  </TT></TD>
-+<TD><EM>number of residues inside helix to be treated as terminal</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-HELIXENDOUT=n </TT></TD>
-+<TD><EM>number of residues outside helix to be treated as terminal</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-STRANDENDIN=n </TT></TD>
-+<TD><EM>number of residues inside strand to be treated as terminal</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-STRANDENDOUT=n</TT></TD>
-+<TD><EM>number of residues outside strand to be treated as terminal </EM></TD>
-+</TR>
-+</TABLE></CENTER>
-+<CENTER><H3>***Trees:***
-+</H3></CENTER>
-+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
-+<TR>
-+<TD><STRONG>Parameter</STRONG></TD>
-+<TD><STRONG><EM>Description</EM></STRONG></TD>
-+</TR>
-+<TR>
-+<TD><TT>-SEED=n    </TT></TD>
-+<TD><EM>seed number for bootstraps</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-KIMURA      </TT></TD>
-+<TD><EM>use Kimura's correction</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-TOSSGAPS  </TT></TD>
-+<TD><EM>ignore positions with gaps</EM></TD>
-+</TR>
-+<TR>
-+<TD><TT>-BOOTLABELS=node OR branch </TT></TD>
-+<TD><EM>position of bootstrap values in tree display</EM></TD>
-+</TR>
-+</TABLE></CENTER>
-+</P>
-+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
-+<CENTER><H2><A NAME="R">                             References
-+</A></H2></CENTER>
-+<P>
-+</P>
-+<P>
-+<STRONG>
-+The ClustalX program is described in the manuscript:
-+</STRONG>
-+</P>
-+<P>
-+Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
-+The ClustalX windows interface: flexible strategies for multiple sequence 
- alignment aided by quality analysis tools. Nucleic Acids Research, 25:4876-4882.
- </P>
- <P>

Deleted: trunk/packages/clustalx/trunk/debian/patches/clustalx_help.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/clustalx_help.patch	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/patches/clustalx_help.patch	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,1529 +0,0 @@
-Index: clustalw-1.83/clustalx_help
-===================================================================
---- clustalw-1.83.orig/clustalx_help
-+++ clustalw-1.83/clustalx_help
-@@ -1,4 +1,1524 @@
- 
-+This is the on-line help file for Clustal X (version 1.81), using the NCBI
-+Vibrant Toolkit.   
-+
-+It should be named or defined as: clustalx_help 
-+except with MSDOS in which case it should be named ClustalX.HLP
-+
-+For full details of usage and algorithms, please read the CLUSTALW.DOC file.
-+
-+
-+Toby  Gibson                         EMBL, Heidelberg, Germany.
-+Des   Higgins                        UCC, Cork, Ireland.
-+Julie Thompson/Francois Jeanmougin   IGBMC, Strasbourg, France.
-+
-+
-+
-+
-+>>HELP G <<
-+                      General help for CLUSTAL X (1.8)
-+
-+Clustal X is a windows interface for the ClustalW multiple sequence alignment
-+program. It provides an integrated environment for performing multiple sequence
-+and profile alignments and analysing the results. The sequence alignment is
-+displayed in a window on the screen. A versatile coloring scheme has been
-+incorporated allowing you to highlight conserved features  in the alignment.
-+The pull-down menus at the top of the window allow you to select all the
-+options required for traditional multiple sequence and profile alignment.
-+
-+You can cut-and-paste sequences to change the order of the alignment; you can
-+select a subset of sequences to be aligned; you can select a sub-range of the
-+alignment to be realigned and inserted back into the original alignment.
-+
-+Alignment quality analysis can be performed and low-scoring segments or
-+exceptional residues can be highlighted.
-+
-+ClustalX is available for a number of different platforms including: SUN
-+Solaris, IRIX5.3 on Silicon Graphics, Digital UNIX on DECStations, Microsoft
-+Windows (32 bit) for PC's, Linux ELF for x86 PC's and Macintosh PowerMac. (See
-+the README file for Installation instructions.)
-+
-+
-+<H4>
-+SEQUENCE INPUT
-+</H4>
-+
-+Sequences and profiles (a term for pre-existing alignments) are input using 
-+the FILE menu. Invalid options will be disabled. All sequences must be included
-+into 1 file. 7 formats are automatically recognised: NBRF/PIR, EMBL/SWISSPROT,
-+Pearson (Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9 RSF and GDE flat file.
-+All non-alphabetic characters (spaces, digits, punctuation marks) are ignored
-+except "-" which is used to indicate a GAP ("." in MSF/RSF).  
-+
-+<H4>
-+SEQUENCE / PROFILE ALIGNMENTS
-+</H4>
-+
-+Clustal X has two modes which can be selected using the switch directly above
-+the sequence display: MULTIPLE ALIGNMENT MODE and PROFILE ALIGNMENT MODE.
-+
-+To do a MULTIPLE ALIGNMENT on a set of sequences, make sure MULTIPLE ALIGNMENT
-+MODE is selected. A single sequence data area is then displayed. The ALIGNMENT
-+menu then allows you to either produce a guide tree for the alignment, or to do
-+a multiple alignment following the guide tree, or to do a full multiple
-+alignment.
-+
-+In PROFILE ALIGNMENT MODE, two sequence data areas are displayed, allowing you
-+to align 2 alignments (termed profiles). Profiles are also used to add a new
-+sequence to an old alignment, or to use secondary structure to guide the
-+alignment process. GAPS in the old alignments are indicated using the "-" 
-+character. PROFILES can be input in ANY of the allowed formats; just  use "-"
-+(or "." for MSF/RSF) for each gap position. In Profile Alignment Mode, a button
-+"Lock Scroll" is displayed which allows you to scroll the two profiles together
-+using a single scroll bar. When the Lock Scroll is turned off, the two profiles
-+can be scrolled independently.
-+
-+<H4>
-+PHYLOGENETIC TREES
-+</H4>
-+
-+Phylogenetic trees can be calculated from old alignments (read in with "-"
-+characters to indicate gaps) OR after a multiple alignment while the alignment
-+is still displayed.
-+
-+<H4>
-+ALIGNMENT DISPLAY
-+</H4>
-+
-+The alignment is displayed on the screen with the sequence names on the left
-+hand side. The sequence alignment is for display only, it cannot be edited here
-+(except for changing the sequence order by cutting-and-pasting on the sequence
-+names). 
-+
-+A ruler is displayed below the sequences, starting at 1 for the first residue
-+position (residue numbers in the sequence input file are ignored).
-+
-+A line above the alignment is used to mark strongly conserved positions. Three
-+characters ('*', ':' and '.') are used:
-+
-+'*' indicates positions which have a single, fully conserved residue
-+
-+':' indicates that one of the following 'strong' groups is fully conserved:-
-+<PRE>
-+                 STA  
-+                 NEQK  
-+                 NHQK  
-+                 NDEQ  
-+                 QHRK  
-+                 MILV  
-+                 MILF  
-+                 HY  
-+                 FYW  
-+</PRE>
-+
-+'.' indicates that one of the following 'weaker' groups is fully conserved:-
-+<PRE>
-+                 CSA  
-+                 ATV  
-+                 SAG  
-+                 STNK  
-+                 STPA  
-+                 SGND  
-+                 SNDEQK  
-+                 NDEQHK  
-+                 NEQHRK  
-+                 FVLIM  
-+                 HFY  
-+</PRE>
-+
-+These are all the positively scoring groups that occur in the Gonnet Pam250
-+matrix. The strong and weak groups are defined as strong score >0.5 and weak
-+score =<0.5 respectively.
-+
-+For profile alignments, secondary structure and gap penalty masks are displayed
-+above the sequences, if any data is found in the profile input file.
-+
-+
-+>>HELP F <<
-+                      Input / Output Files 
-+
-+LOAD SEQUENCES reads sequences from one of 7 file formats, replacing any
-+sequences that are already loaded. All sequences must be in 1 file. The formats
-+that are automatically recognised are: NBRF/PIR, EMBL/SWISSPROT, Pearson
-+(Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9/RSF and GDE flat file.  All
-+non-alphabetic characters (spaces, digits, punctuation  marks) are ignored
-+except "-" which is used to indicate a GAP ("." in MSF/RSF).
-+
-+The program tries to automatically recognise the different file formats used
-+and to guess whether the sequences are amino acid or nucleotide.  This is not
-+always foolproof.
-+
-+FASTA and NBRF/PIR formats are recognised by having a ">" as the first 
-+character in the file.  
-+
-+EMBL/Swiss Prot formats are recognised by the letters "ID" at the start of the
-+file (the token for the entry name field).  
-+
-+CLUSTAL format is recognised by the word CLUSTAL at the beginning of the file.
-+
-+GCG/MSF format is recognised by one of the following:
-+<UL>
-+<LI>
-+       - the word PileUp at the start of the file.
-+</LI><LI>
-+       - the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
-+         at the start of the file.
-+</LI><LI>
-+       - the word MSF on the first line of the file, and the characters ..
-+         at the end of this line.
-+</LI>
-+</UL>
-+ 
-+GCG/RSF format is recognised by the word !!RICH_SEQUENCE at the beginning of
-+the file.
-+
-+
-+If 85% or more of the characters in the sequence are from A,C,G,T,U or N, the
-+sequence will be assumed to be nucleotide.  This works in 97.3% of cases but
-+watch out!
-+
-+APPEND SEQUENCES is only valid in MULTIPLE ALIGNMENT MODE. The input sequences
-+do not replace those already loaded, but are appended at the end of the
-+alignment.
-+
-+SAVE SEQUENCES AS... offers the user a choice of one of six output formats:
-+CLUSTAL, NBRF/PIR, GCG/MSF, PHYLIP, NEXUS or GDE. All sequences are written
-+to a single file. Options are available to save a range of the alignment, 
-+switch between UPPER/LOWER case for GDE files, and to output SEQUENCE NUMBERING
-+for CLUSTAL files.
-+
-+LOAD PROFILE 1 reads sequences in the same 7 file formats, replacing any
-+sequences already loaded as Profile 1. This option will also remove any
-+sequences which are loaded in Profile 2.
-+
-+LOAD PROFILE 2 reads sequences in the same 7 file formats, replacing any
-+sequences already loaded as Profile 2.
-+
-+SAVE PROFILE 1 AS... is similar to the Save Sequences option except that only
-+those sequences in Profile 1 will be written to the output file.
-+
-+SAVE PROFILE 2 AS... is similar to the Save Sequences option except that only
-+those sequences in Profile 2 will be written to the output file.
-+
-+WRITE ALIGNMENT AS POSTSCRIPT will write the sequence display to a postscript
-+format file. This will include any secondary structure / gap penalty mask 
-+information and the consensus and ruler lines which are displayed on the
-+screen. The Alignment Quality curve can be optionally included in the output
-+file.
-+
-+WRITE PROFILE 1 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
-+except that only the profile 1 display will be printed.
-+
-+WRITE PROFILE 2 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
-+except that only the profile 2 display will be printed.
-+
-+
-+<H4>
-+POSTSCRIPT PARAMETERS
-+</H4>
-+
-+A number of options are available to allow you to configure your postscript
-+output file.
-+
-+PS COLORS FILE:
-+
-+The exact RGB values required to reproduce the colors used in the alignment
-+window will vary from printer to printer. A PS colors file can be specified
-+that contains the RGB values for all the colors required by each of your
-+postscript printers.
-+
-+By default, Clustal X looks for a file called 'colprint.par' in the current
-+directory (if your running under UNIX, it then looks in your home directory,
-+and finally in the directories in your PATH environment variable). If no PS
-+colors file is found or a color used on the screen is not defined here, the
-+screen RGB values (from the Color Parameter File) are used.
-+
-+The PS colors file consists of one line for each color to be defined, with the
-+color name followed by the RGB values (on a scale of 0 to 1). For example,
-+
-+RED          0.9 0.1 0.1
-+
-+Blank lines and comments (lines beginning with a '#' character) are ignored.
-+
-+
-+PAGE SIZE:  The alignment can be displayed on either A4, A3 or US Letter size
-+pages.
-+
-+ORIENTATION: The alignment can be displayed on either a landscape or portrait
-+page.
-+
-+PRINT HEADER: An optional header including the postscript filename, and
-+creation date can be printed at the top of each page.
-+
-+PRINT QUALITY CURVE: The Alignment Quality curve which is displayed underneath
-+the alignment on the screen can be included in the postscript output.
-+
-+PRINT RULER: The ruler which is displayed underneath the alignment on the 
-+screen can be included in the postscript output.
-+
-+PRINT RESIDUE NUMBERS: Sequence residue numbers can be printed at the right
-+hand side of the alignment.
-+
-+RESIZE TO FIT PAGE: By default, the alignment is scaled to fit the page size
-+selected. This option can be turned off, in which case a font size of 10 will
-+be used for the sequences.
-+
-+PRINT FROM POSITION/TO: A range of the alignment can be printed. The default
-+is to print the full alignment. The first and last residues to be printed are
-+specified here.
-+
-+USE BLOCK LENGTH: The alignment can be divided into blocks of residues. The
-+number of residues in a block is specified here. More than one block may then
-+be printed on a single page. This is useful for long alignments of a small
-+number of sequences. If the block length is set to 0, The alignment will not
-+be divided into blocks, but printed across a number of pages.
-+
-+>>HELP E <<
-+                          Editing Alignments
-+
-+Clustal X allows you to change the order of the sequences in the alignment, by
-+cutting-and-pasting the sequence names.
-+
-+To select a group of sequences to be moved, click on a sequence name and drag
-+the cursor until all the required sequences are highlighted. Holding down the
-+Shift key when clicking on the first name will add new sequences to those
-+already selected.
-+
-+(Options are provided to Select All Sequences, Select Profile 1 or Select 
-+Profile 2.)
-+
-+The selected sequences can be removed from the alignment by using the EDIT
-+menu, CUT option.
-+
-+To add the cut sequences back into an alignment, select a sequence by clicking
-+on the sequence name. The cut sequences will be added to the alignment,
-+immediately following the selected sequence, by the EDIT menu, PASTE option.
-+
-+To add the cut sequences to an empty alignment (eg. when cutting sequences from
-+Profile 1 and pasting them to Profile 2), click on the empty sequence name
-+display area, and select the EDIT menu, PASTE option as before.
-+
-+The sequence selection and sequence range selection can be cleared using the
-+EDIT menu, CLEAR SEQUENCE SELECTION and CLEAR RANGE SELECTION options
-+respectively.
-+
-+To search for a string of residues in the sequences, select the sequences to be
-+searched by clicking on the sequence names. You can then enter the string to
-+search for by selecting the SEARCH FOR STRING option. If the string is found in
-+any of the sequences selected, the sequence name and column number is printed
-+below the sequence display.
-+
-+In PROFILE ALIGNMENT MODE, the two profiles can be merged (normally done after
-+alignment) by selecting ADD PROFILE 2 TO PROFILE 1. The sequences currently
-+displayed as Profile 2 will be appended to Profile 1. 
-+
-+The REMOVE ALL GAPS option will remove all gaps from the sequences currently
-+selected.
-+WARNING: This option removes ALL gaps, not only those introduced by ClustalX,
-+but also those that were read from the input alignment file. Any secondary
-+structure information associated with the alignment will NOT be automatically
-+realigned.
-+
-+The REMOVE GAP-ONLY COLUMNS will remove those positions in the alignment which
-+contain gaps in all sequences. This can occur as a result of removing divergent
-+sequences from an alignment, or if an alignment has been realigned.
-+
-+>>HELP M <<
-+                          Multiple Alignments
-+
-+Make sure MULTIPLE ALIGNMENT MODE is selected, using the switch directly above
-+the sequence display area. Then, use the ALIGNMENT menu to do multiple
-+alignments.
-+
-+Multiple alignments are carried out in 3 stages:
-+ 
-+1) all sequences are compared to each other (pairwise alignments);
-+ 
-+2) a dendrogram (like a phylogenetic tree) is constructed, describing the
-+approximate groupings of the sequences by similarity (stored in a file).
-+ 
-+3) the final multiple alignment is carried out, using the dendrogram as a guide.
-+
-+The 3 stages are carried out automatically by the DO COMPLETE ALIGNMENT option.
-+You can skip the first stages (pairwise alignments; guide tree) by using an old
-+guide tree file (DO ALIGNMENT FROM GUIDE TREE); or you can just produce the
-+guide tree with no final multiple alignment (PRODUCE GUIDE TREE ONLY).
-+
-+
-+REALIGN SELECTED SEQUENCES is used to realign badly aligned sequences in the
-+alignment. Sequences can be selected by clicking on the sequence names - see
-+Editing Alignments for more details. The unselected sequences are then 'fixed'
-+and a profile is made including only the unselected sequences. Each of the
-+selected sequences in turn is then realigned to this profile. The realigned
-+sequences will be displayed as a group at the end the alignment.
-+
-+
-+REALIGN SELECTED SEQUENCE RANGE is used to realign a small region of the 
-+alignment. A residue range can be selected by clicking on the sequence display
-+area. A multiple alignment is then performed, following the 3 stages described
-+above, but only using the selected residue range. Finally the new alignment of
-+the range is pasted back into the full sequence alignment.
-+
-+By default, gap penalties are used at each end of the subrange in order to 
-+penalise terminal gaps. If the REALIGN SEGMENT END GAP PENALTIES option is
-+switched off, gaps can be introduced at the ends of the residue range at no
-+cost.
-+
-+
-+ALIGNMENT PARAMETERS displays a sub-menu with the following options:
-+
-+RESET NEW GAPS BEFORE ALIGNMENT will remove any new gaps introduced into the
-+sequences during multiple alignment if you wish to change the parameters and
-+try again. This only takes effect just before you do a second multiple
-+alignment. You can make phylogenetic trees after alignment whether or not this
-+is ON. If you turn this OFF, the new gaps are kept even if you do a second
-+multiple alignment. This allows you to iterate the alignment gradually.
-+Sometimes, the alignment is improved by a second or third pass.
-+
-+RESET ALL GAPS BEFORE ALIGNMENT will remove all gaps in the sequences including
-+gaps which were read in from the sequence input file. This only takes effect
-+just before you do a second multiple alignment.  You can make phylogenetic
-+trees after alignment whether or not this is ON.  If you turn this OFF, all
-+gaps are kept even if you do a second multiple alignment. This allows you to
-+iterate the alignment gradually.  Sometimes, the alignment is improved by a
-+second or third pass.
-+
-+
-+PAIRWISE ALIGNMENT PARAMETERS control the speed/sensitivity of the initial
-+alignments.
-+
-+MULTIPLE ALIGNMENT PARAMETERS control the gaps in the final multiple
-+alignments.
-+
-+PROTEIN GAP PARAMETERS displays a temporary window which allows you to set
-+various parameters only used in the alignment of protein sequences.
-+
-+(SECONDARY STRUCTURE PARAMETERS, for use with the Profile Alignment Mode only,
-+allows you to set various parameters only used with gap penalty masks.)
-+
-+SAVE LOG FILE will write the alignment calculation scores to a file. The log
-+filename is the same as the input sequence filename, with an extension .log
-+appended.
-+
-+
-+<H4>
-+OUTPUT FORMAT OPTIONS
-+</H4>
-+
-+You can choose from 6 different alignment formats (CLUSTAL, GCG, NBRF/PIR,
-+PHYLIP, GDE and NEXUS).  You can choose more than one (or all 6 if you wish).  
-+
-+CLUSTAL format output is a self explanatory alignment format. It shows the
-+sequences aligned in blocks. It can be read in again at a later date to (for
-+example) calculate a phylogenetic tree or add in new sequences by profile
-+alignment.
-+
-+GCG output can be used by any of the GCG programs that can work on multiple
-+alignments (e.g. PRETTY, PROFILEMAKE, PLOTALIGN). It is the same as the GCG
-+.msf format files (multiple sequence file); new in version 7 of GCG.
-+
-+NEXUS format is used by several phylogeny programs, including PAUP and
-+MacClade.
-+
-+PHYLIP format output can be used for input to the PHYLIP package of Joe 
-+Felsenstein.  This is a very widely used package for doing every imaginable
-+form of phylogenetic analysis (MUCH more than the the modest introduction
-+offered by this program).
-+
-+NBRF/PIR: this is the same as the standard PIR format with ONE ADDITION. Gap
-+characters "-" are used to indicate the positions of gaps in the multiple 
-+alignment. These files can be re-used as input in any part of clustal that
-+allows sequences (or alignments or profiles) to be read in.  
-+
-+GDE:  this format is used by the GDE package of Steven Smith and is understood
-+by SEQLAB in GCG 9 or later.
-+
-+GDE OUTPUT CASE: sequences in GDE format may be written in either upper or
-+lower case.
-+ 
-+CLUSTALW SEQUENCE NUMBERS: residue numbers may be added to the end of the
-+alignment lines in clustalw format.
-+
-+OUTPUT ORDER is used to control the order of the sequences in the output
-+alignments. By default, it uses the order in which the sequences were aligned
-+(from the guide tree/dendrogram), thus automatically grouping closely related
-+sequences. It can be switched to be the same as the original input order.
-+
-+PARAMETER OUTPUT: This option will save all your parameter settings in a
-+parameter file (suffix .par) during alignment. The file can be subsequently
-+used to rerun ClustalW using the same parameters.
-+
-+
-+<H3>
-+ALIGNMENT PARAMETERS
-+</H3>
-+--------------------
-+
-+<STRONG>
-+PAIRWISE ALIGNMENT PARAMETERS
-+</STRONG>
-+
-+A distance is calculated between every pair of sequences and these are used to
-+construct the phylogenetic tree which guides the final multiple alignment. The
-+scores are calculated from separate pairwise alignments. These can be
-+calculated using 2 methods: dynamic programming (slow but accurate) or by the
-+method of Wilbur and Lipman (extremely fast but approximate).   
-+
-+You can choose between the 2 alignment methods using the PAIRWISE ALIGNMENTS
-+option. The slow/accurate method is fast enough for short sequences but will be
-+VERY SLOW for many (e.g. >100) long (e.g. >1000 residue) sequences.   
-+
-+
-+<STRONG>
-+SLOW-ACCURATE alignment parameters:
-+</STRONG>
-+
-+These parameters do not have any affect on the speed of the alignments. They
-+are used to give initial alignments which are then rescored to give percent
-+identity scores. These % scores are the ones which are displayed on the 
-+screen. The scores are converted to distances for the trees.
-+
-+Gap Open Penalty:      the penalty for opening a gap in the alignment.
-+
-+Gap Extension Penalty: the penalty for extending a gap by 1 residue.
-+
-+Protein Weight Matrix: the scoring table which describes the similarity of 
-+each amino acid to each other.
-+
-+Load protein matrix: allows you to read in a comparison table from a file.
-+
-+DNA weight matrix: the scores assigned to matches and mismatches (including
-+IUB ambiguity codes).
-+
-+Load DNA matrix: allows you to read in a comparison table from a file.
-+
-+See the Multiple alignment parameters, MATRIX option below for details of the
-+matrix input format.
-+
-+
-+<STRONG>
-+FAST-APPROXIMATE alignment parameters:
-+</STRONG>
-+
-+These similarity scores are calculated from fast, approximate, global align-
-+ments, which are controlled by 4 parameters. 2 techniques are used to make
-+these alignments very fast: 1) only exactly matching fragments (k-tuples) are
-+considered; 2) only the 'best' diagonals (the ones with most k-tuple matches)
-+are used.
-+
-+GAP PENALTY:   This is a penalty for each gap in the fast alignments. It has
-+little effect on the speed or sensitivity except for extreme values.
-+
-+K-TUPLE SIZE:  This is the size of exactly matching fragment that is used. 
-+INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity.
-+For longer sequences (e.g. >1000 residues) you may wish to increase the
-+default.
-+
-+TOP DIAGONALS: The number of k-tuple matches on each diagonal (in an imaginary
-+dot-matrix plot) is calculated. Only the best ones (with most matches) are used
-+in the alignment. This parameter specifies how many. Decrease for speed;
-+increase for sensitivity.
-+
-+WINDOW SIZE:  This is the number of diagonals around each of the 'best' 
-+diagonals that will be used. Decrease for speed; increase for sensitivity.
-+
-+
-+<STRONG>
-+MULTIPLE ALIGNMENT PARAMETERS
-+</STRONG>
-+
-+These parameters control the final multiple alignment. This is the core of the
-+program and the details are complicated. To fully understand the use of the
-+parameters and the scoring system, you will have to refer to the documentation.
-+
-+Each step in the final multiple alignment consists of aligning two alignments 
-+or sequences. This is done progressively, following the branching order in the
-+GUIDE TREE. The basic parameters to control this are two gap penalties and the
-+scores for various identical/non-indentical residues. 
-+
-+The GAP OPENING and EXTENSION PENALTIES can be set here. These control the 
-+cost of opening up every new gap and the cost of every item in a gap.  
-+Increasing the gap opening penalty will make gaps less frequent. Increasing 
-+the gap extension penalty will make gaps shorter. Terminal gaps are not 
-+penalised.
-+
-+The DELAY DIVERGENT SEQUENCES switch delays the alignment of the most distantly
-+related sequences until after the most closely related sequences have  been
-+aligned. The setting shows the percent identity level required to delay the
-+addition of a sequence; sequences that are less identical than this level to
-+any other sequences will be aligned later.
-+
-+The TRANSITION WEIGHT gives transitions (A<-->G or C<-->T i.e. purine-purine or
-+pyrimidine-pyrimidine substitutions) a weight between 0 and 1; a weight of zero
-+means that the transitions are scored as mismatches, while a weight of 1 gives
-+the transitions the match score. For distantly related DNA sequences, the
-+weight should be near to zero; for closely related sequences it can be useful
-+to assign a higher score. The default is set to 0.5.
-+
-+
-+The PROTEIN WEIGHT MATRIX option allows you to choose a series of weight
-+matrices. For protein alignments, you use a weight matrix to determine the
-+similarity of non-identical amino acids. For example, Tyr aligned with Phe is
-+usually judged to be 'better' than Tyr aligned with Pro.
-+
-+There are three 'in-built' series of weight matrices offered. Each consists of
-+several matrices which work differently at different evolutionary distances. To
-+see the exact details, read the documentation. Crudely, we store several
-+matrices in memory, spanning the full range of amino acid distance (from almost
-+identical sequences to highly divergent ones). For very similar sequences, it
-+is best to use a strict weight matrix which only gives a high score to
-+identities and the most favoured conservative substitutions. For more divergent
-+sequences, it is appropriate to use "softer" matrices which give a high score
-+to many other frequent substitutions.
-+
-+1) BLOSUM (Henikoff). These matrices appear to be the best available for 
-+carrying out data base similarity (homology searches). The matrices currently
-+used are: Blosum 80, 62, 45 and 30. BLOSUM was the default in earlier Clustal X
-+versions.
-+
-+2) PAM (Dayhoff). These have been extremely widely used since the late '70s. We
-+currently use the PAM 20, 60, 120, 350 matrices.
-+
-+3) GONNET. These matrices were derived using almost the same procedure as the
-+Dayhoff one (above) but are much more up to date and are based on a far larger
-+data set. They appear to be more sensitive than the Dayhoff series. We
-+currently use the GONNET 80, 120, 160, 250 and 350 matrices. This series is the
-+default for Clustal X version 1.8.
-+
-+We also supply an identity matrix which gives a score of 10 to two identical 
-+amino acids and a score of zero otherwise. This matrix is not very useful.
-+
-+Load protein matrix: allows you to read in a comparison matrix from a file.
-+This can be either a single matrix or a series of matrices (see below for
-+format). 
-+
-+
-+DNA WEIGHT MATRIX option allows you to select a single matrix (not a series)
-+used for aligning nucleic acid sequences. Two hard-coded matrices are available:
-+
-+1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
-+of nucleic acid sequences. X's and N's are treated as matches to any IUB
-+ambiguity symbol. All matches score 1.9; all mismatches for IUB symbols score 0.
-+
-+2) CLUSTALW(1.6). A previous system used by ClustalW, in which matches score
-+1.0 and mismatches score 0. All matches for IUB symbols also score 0.
-+
-+Load DNA matrix: allows you to read in a nucleic acid comparison matrix from a
-+file (just one matrix, not a series).
-+
-+
-+SINGLE MATRIX INPUT FORMAT
-+The format used for a single matrix is the same as the BLAST program. The
-+scores in the new weight matrix should be similarities. You can use negative as
-+well as positive values if you wish, although the matrix will be automatically
-+adjusted to all positive scores, unless the NEGATIVE MATRIX option is selected.
-+Any lines beginning with a # character are assumed to be comments. The first
-+non-comment line should contain a list of amino acids in any order, using the 1
-+letter code, followed by a * character. This should be followed by a square
-+matrix of scores, with one row and one column for each amino acid. The last row
-+and column of the matrix (corresponding to the * character) contain the minimum
-+score over the whole matrix.
-+
-+MATRIX SERIES INPUT FORMAT
-+ClustalX uses different matrices depending on the mean percent identity of the
-+sequences to be aligned. You can specify a series of matrices and the range of
-+the percent identity for each matrix in a matrix series file. The file is
-+automatically recognised by the word CLUSTAL_SERIES at the beginning of the
-+file. Each matrix in the series is then specified on one line which should
-+start with the word MATRIX. This is followed by the lower and upper limits of
-+the sequence percent identities for which you want to apply the matrix. The
-+final entry on the matrix line is the filename of a Blast format matrix file
-+(see above for details of the single matrix file format).
-+
-+Example.
-+
-+CLUSTAL_SERIES
-+ 
-+MATRIX 81 100 /us1/user/julie/matrices/blosum80
-+MATRIX 61 80 /us1/user/julie/matrices/blosum62
-+MATRIX 31 60 /us1/user/julie/matrices/blosum45
-+MATRIX 0 30 /us1/user/julie/matrices/blosum30
-+
-+
-+<STRONG>
-+PROTEIN GAP PARAMETERS
-+</STRONG>
-+
-+RESIDUE SPECIFIC PENALTIES are amino acid specific gap penalties that reduce or
-+increase the gap opening penalties at each position in the alignment or 
-+sequence. See the documentation for details. As an example, positions that are
-+rich in glycine are more likely to have an adjacent gap than positions that are
-+rich in valine.
-+
-+HYDROPHILIC GAP PENALTIES are used to increase the chances of a gap within a
-+run (5 or more residues) of hydrophilic amino acids; these are likely to be
-+loop or random coil regions where gaps are more common. The residues that are
-+"considered" to be hydrophilic can be entered in HYDROPHILIC RESIDUES.
-+
-+GAP SEPARATION DISTANCE tries to decrease the chances of gaps being too close
-+to each other. Gaps that are less than this distance apart are penalised more
-+than other gaps. This does not prevent close gaps; it makes them less frequent,
-+promoting a block-like appearance of the alignment.
-+
-+END GAP SEPARATION treats end gaps just like internal gaps for the purposes of
-+avoiding gaps that are too close (set by GAP SEPARATION DISTANCE above). If you
-+turn this off, end gaps will be ignored for this purpose. This is useful when
-+you wish to align fragments where the end gaps are not biologically meaningful.
-+
-+
-+>>HELP P <<
-+                   Profile and Structure Alignments
-+   
-+By PROFILE ALIGNMENT, we mean alignment using existing alignments. Profile 
-+alignments allow you to store alignments of your favourite sequences and add
-+new sequences to them in small bunches at a time. A profile is simply an
-+alignment of one or more sequences (e.g. an alignment output file from Clustal
-+X). Each input can be a single sequence. One or both sets of input sequences
-+may include secondary structure assignments or gap penalty masks to guide the
-+alignment. 
-+
-+Make sure PROFILE ALIGNMENT MODE is selected, using the switch directly above
-+the sequence display area. Then, use the ALIGNMENT menu to do profile and
-+secondary structure alignments.
-+
-+The profiles can be in any of the allowed input formats with "-" characters
-+used to specify gaps (except for GCG/MSF where "." is used).
-+
-+You have to load the 2 profiles by choosing FILE, LOAD PROFILE 1 and  LOAD
-+PROFILE 2. Then ALIGNMENT, ALIGN PROFILE 2 TO PROFILE 1 will align the 2
-+profiles to each other. Secondary structure masks in either profile can be used
-+to guide the alignment. This option compares all the sequences in profile 1
-+with all the sequences in profile 2 in order to build guide trees which will be
-+used to calculate sequence weights, and select appropriate alignment parameters
-+for the final profile alignment.
-+
-+You can skip the first stage (pairwise alignments; guide trees) by using old
-+guide tree files (ALIGN PROFILES FROM GUIDE TREES). 
-+
-+The ALIGN SEQUENCES TO PROFILE 1 option will take the sequences in the second
-+profile and align them to the first profile, 1 at a time.  This is useful to
-+add some new sequences to an existing alignment, or to align a set of sequences
-+to a known structure. In this case, the second profile set need not be
-+pre-aligned.
-+
-+You can skip the first stage (pairwise alignments; guide tree) by using an old
-+guide tree file (ALIGN SEQUENCES TO PROFILE 1 FROM TREE). 
-+
-+SAVE LOG FILE will write the alignment calculation scores to a file. The log
-+filename is the same as the input sequence filename, with an extension .log
-+appended.
-+
-+The alignment parameters can be set using the ALIGNMENT PARAMETERS menu,
-+Pairwise Parameters, Multiple Parameters and Protein Gap Parameters options.
-+These are EXACTLY the same parameters as used by the general, automatic
-+multiple alignment procedure. The general multiple alignment procedure is
-+simply a series of profile alignments. Carrying out a series of profile
-+alignments on larger and larger groups of sequences, allows you to manually
-+build up a complete alignment, if necessary editing intermediate alignments.
-+
-+<STRONG>
-+SECONDARY STRUCTURE PARAMETERS
-+</STRONG>
-+
-+Use this menu to set secondary structure options. If a solved structure is
-+known, it can be used to guide the alignment by raising gap penalties within
-+secondary structure elements, so that gaps will preferentially be inserted into
-+unstructured surface loop regions. Alternatively, a user-specified gap penalty
-+mask can be supplied for a similar purpose.
-+
-+A gap penalty mask is a series of numbers between 1 and 9, one per position in 
-+the alignment. Each number specifies how much the gap opening penalty is to be 
-+raised at that position (raised by multiplying the basic gap opening penalty
-+by the number) i.e. a mask figure of 1 at a position means no change
-+in gap opening penalty; a figure of 4 means that the gap opening penalty is
-+four times greater at that position, making gaps 4 times harder to open.
-+
-+The format for gap penalty masks and secondary structure masks is explained in
-+a separate help section.
-+
-+>>HELP B << 
-+            Secondary Structure / Gap Penalty Masks
-+
-+The use of secondary structure-based penalties has been shown to improve  the
-+accuracy of sequence alignment. Clustal X now allows secondary structure/ gap
-+penalty masks to be supplied with the input sequences used during profile
-+alignment. (NB. The secondary structure information is NOT used during multiple
-+sequence alignment). The masks work by raising gap penalties in specified
-+regions (typically secondary structure elements) so that gaps are
-+preferentially opened in the less well conserved regions (typically surface
-+loops).
-+
-+The USE PROFILE 1(2) SECONDARY STRUCTURE / GAP PENALTY MASK options control
-+whether the input 2D-structure information or gap penalty masks will be used
-+during the profile alignment.
-+
-+The OUTPUT options control whether the secondary structure and gap penalty
-+masks should be included in the Clustal X output alignments. Showing both is
-+useful for understanding how the masks work. The 2D-structure information is
-+itself useful in judging the alignment quality and in seeing how residue
-+conservation patterns vary with secondary structure. 
-+
-+The HELIX and STRAND GAP PENALTY options provide the value for raising the gap
-+penalty at core Alpha Helical (A) and Beta Strand (B) residues. In CLUSTAL
-+format, capital residues denote the A and B core structure notation. Basic gap
-+penalties are multiplied by the amount specified.
-+
-+The LOOP GAP PENALTY option provides the value for the gap penalty in Loops.
-+By default this penalty is not raised. In CLUSTAL format, loops are specified
-+by "." in the secondary structure notation.
-+
-+The SECONDARY STRUCTURE TERMINAL PENALTY provides the value for setting the gap
-+penalty at the ends of secondary structures. Ends of secondary structures are
-+known to grow or shrink, comparing related structures. Therefore by default
-+these are given intermediate values, lower than the core penalties. All
-+secondary structure read in as lower case in CLUSTAL format gets the reduced
-+terminal penalty.
-+
-+The HELIX and STRAND TERMINAL POSITIONS options specify the range of structure
-+termini for the intermediate penalties. In the alignment output, these are
-+indicated as lower case. For Alpha Helices, by default, the range spans the 
-+end-helical turn (3 residues). For Beta Strands, the default range spans the
-+end residue and the adjacent loop residue, since sequence conservation often
-+extends beyond the actual H-bonded Beta Strand.
-+
-+Clustal X can read the masks from SWISS-PROT, CLUSTAL or GDE format input
-+files. For many 3-D protein structures, secondary structure information is
-+recorded in the feature tables of SWISS-PROT database entries. You should
-+always check that the assignments are correct - some are quite inaccurate.
-+Clustal X looks for SWISS-PROT HELIX and STRAND assignments e.g.
-+
-+
-+<PRE>
-+FT   HELIX       100    115
-+FT   STRAND      118    119
-+</PRE>
-+
-+The structure and penalty masks can also be read from CLUSTAL alignment format 
-+as comment lines beginning "!SS_" or "!GM_" e.g.
-+
-+<PRE>
-+!SS_HBA_HUMA    ..aaaAAAAAAAAAAaaa.aaaAAAAAAAAAAaaaaaaAaaa.........aaaAAAAAA
-+!GM_HBA_HUMA    112224444444444222122244444444442222224222111111111222444444
-+HBA_HUMA        VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
-+</PRE>
-+
-+Note that the mask itself is a set of numbers between 1 and 9 each of which is 
-+assigned to the residue(s) in the same column below. 
-+
-+In GDE flat file format, the masks are specified as text and the names must
-+begin with "SS_ or "GM_.
-+
-+Either a structure or penalty mask or both may be used. If both are included
-+in an alignment, the user will be asked which is to be used.
-+
-+
-+>>HELP T <<
-+                            Phylogenetic Trees
-+
-+Before calculating a tree, you must have an ALIGNMENT in memory. This can be
-+input using the FILE menu, LOAD SEQUENCES option or you should have just
-+carried out a full multiple alignment and the alignment is still in memory.
-+Remember YOU MUST ALIGN THE SEQUENCES FIRST!!!!
-+
-+The method used is the NJ (Neighbour Joining) method of Saitou and Nei. First
-+you calculate distances (percent divergence) between all pairs of sequence from
-+a multiple alignment; second you apply the NJ method to the distance matrix.
-+
-+To calculate a tree, use the DRAW N-J TREE option. This gives an UNROOTED tree
-+and all branch lengths. The root of the tree can only be inferred by using an
-+outgroup (a sequence that you are certain branches at the outside of the tree
-+.... certain on biological grounds) OR if you assume a degree of constancy in
-+the 'molecular clock', you can place the root in the 'middle' of the tree
-+(roughly equidistant from all tips).
-+
-+BOOTSTRAP N-J TREE uses a method for deriving confidence values for the 
-+groupings in a tree (first adapted for trees by Joe Felsenstein). It involves
-+making N random samples of sites from the alignment (N should be LARGE, e.g.
-+500 - 1000); drawing N trees (1 from each sample) and counting how many times
-+each grouping from the original tree occurs in the sample trees. You can set N
-+using the NUMBER OF BOOTSTRAP TRIALS option in the BOOTSTRAP TREE window. In
-+practice, you should use a large number of bootstrap replicates (1000 is
-+recommended, even if it means running the program for an hour on a slow 
-+computer). You can also supply a seed number for the random number generator
-+here. Different runs with the same seed will give the same answer. See the
-+documentation for more details.
-+
-+EXCLUDE POSITIONS WITH GAPS? With this option, any alignment positions where
-+ANY of the sequences have a gap will be ignored. This means that 'like' will
-+be compared to 'like' in all distances, which is highly desirable. It also
-+automatically throws away the most ambiguous parts of the alignment, which are
-+concentrated around gaps (usually). The disadvantage is that you may throw away
-+much of the data if there are many gaps (which is why it is difficult for us to
-+make it the default).  
-+
-+CORRECT FOR MULTIPLE SUBSTITUTIONS? For small divergence (say <10%) this option
-+makes no difference. For greater divergence, this option corrects for the fact
-+that observed distances underestimate actual evolutionary distances. This is
-+because, as sequences diverge, more than one substitution will happen at many
-+sites. However, you only see one difference when you look at the present day
-+sequences. Therefore, this option has the effect of stretching branch lengths
-+in trees (especially long branches). The corrections used here (for DNA or
-+proteins) are both due to Motoo Kimura. See the documentation for details.  
-+
-+Where possible, this option should be used. However, for VERY divergent
-+sequences, the distances cannot be reliably corrected. You will be warned if
-+this happens. Even if none of the distances in a data set exceed the reliable
-+threshold, if you bootstrap the data, some of the bootstrap distances may
-+randomly exceed the safe limit.  
-+
-+SAVE LOG FILE will write the tree calculation scores to a file. The log
-+filename is the same as the input sequence filename, with an extension .log
-+appended.
-+
-+<H4>
-+OUTPUT FORMAT OPTIONS
-+</H4>
-+
-+Three different formats are allowed. None of these displays the tree visually.
-+You can display the tree using the NJPLOT program distributed with Clustal X
-+OR get the PHYLIP package and use the tree drawing facilities there. 
-+ 
-+1) CLUSTAL FORMAT TREE. This format is verbose and lists all of the distances
-+between the sequences and the number of alignment positions used for each. The
-+tree is described at the end of the file. It lists the sequences that are
-+joined at each alignment step and the branch lengths. After two sequences are
-+joined, it is referred to later as a NODE. The number of a NODE is the number
-+of the lowest sequence in that NODE.   
-+
-+2) PHYLIP FORMAT TREE. This format is the New Hampshire format, used by many
-+phylogenetic analysis packages. It consists of a series of nested parentheses,
-+describing the branching order, with the sequence names and branch lengths. It
-+can be read by the NJPLOT program distributed with ClustalX. It can also be
-+used by the RETREE, DRAWGRAM and DRAWTREE programs of the PHYLIP package to see
-+the trees graphically. This is the same format used during multiple alignment
-+for the guide trees. Some other packages that can read and display New
-+Hampshire format are TreeTool, TreeView, and Phylowin.
-+
-+3) PHYLIP DISTANCE MATRIX. This format just outputs a matrix of all the
-+pairwise distances in a format that can be used by the PHYLIP package. It used
-+to be useful when one could not produce distances from protein sequences in the
-+Phylip package but is now redundant (PROTDIST of Phylip 3.5 now does this).
-+
-+4) NEXUS FORMAT TREE. This format is used by several popular phylogeny programs,
-+including PAUP and MacClade. The format is described fully in:
-+Maddison, D. R., D. L. Swofford and W. P. Maddison.  1997.
-+NEXUS: an extensible file format for systematic information.
-+Systematic Biology 46:590-621.
-+
-+BOOTSTRAP LABELS ON: By default, the bootstrap values are correctly placed on
-+the tree branches of the phylip format output tree. The toggle allows them to
-+be placed on the nodes, which is incorrect, but some display packages (e.g.
-+TreeTool, TreeView and Phylowin) only support node labelling but not branch
-+labelling. Care should be taken to note which branches and labels go together. 
-+
-+
-+>>HELP C <<
-+                               Colors
-+
-+Clustal X provides a versatile coloring scheme for the sequence alignment 
-+display. The sequences (or profiles) are colored automatically, when they are
-+loaded. Sequences can be colored either by assigning a color to specific
-+residues, or on the basis of an alignment consensus. In the latter case, the
-+alignment consensus is calculated automatically, and the residues in each
-+column are colored according to the consensus character assigned to that
-+column. In this way, you can choose to highlight, for example, conserved
-+hydrophylic or hydrophobic positions in the alignment.
-+
-+The 'rules' used to color the alignment are specified in a COLOR PARAMETER
-+FILE. Clustal X automatically looks for a file called 'colprot.par' for protein
-+sequences or 'coldna.par' for DNA, in the current directory. (If your running
-+under UNIX, it then looks in your home directory, and finally in the
-+directories in your PATH environment variable).
-+
-+By default, if no color parameter file is found, protein sequences are colored
-+by residue as follows:
-+
-+<PRE>
-+	Color			Residue Code
-+
-+	ORANGE			GPST
-+	RED			HKR
-+	BLUE			FWY
-+	GREEN			ILMV
-+</PRE>
-+
-+In the case of DNA sequences, the default colors are as follows:
-+
-+<PRE>
-+	Color			Residue Code
-+
-+	ORANGE			A
-+	RED			C
-+	BLUE			T
-+	GREEN			G
-+</PRE>
-+
-+
-+The default BACKGROUND COLORING option shows the sequence residues using a
-+black character on a colored background. It can be switched off to show
-+residues as a colored character on a white background. 
-+
-+Either BLACK AND WHITE or DEFAULT COLOR options can be selected. The Color
-+option looks first for the color parameter file (as described above) and, if no
-+file is found, uses the default residue-specific colors.
-+
-+You can specify your own coloring scheme by using the LOAD COLOR PARAMETER FILE
-+option. The format of the color parameter file is described below.
-+
-+<H4>
-+COLOR PARAMETER FILE
-+</H4>
-+
-+This file is divided into 3 sections:
-+
-+1) the names and rgb values of the colors
-+2) the rules for calculating the consensus
-+3) the rules for assigning colors to the residues
-+ 
-+An example file is given here.
-+
-+<PRE>
-+ --------------------------------------------------------------------
-+ at rgbindex
-+RED          0.9 0.1 0.1
-+BLUE         0.1 0.1 0.9
-+GREEN        0.1 0.9 0.1
-+YELLOW       0.9 0.9 0.0
-+
-+ at consensus
-+% = 60% w:l:v:i:m:a:f:c:y:h:p
-+# = 80% w:l:v:i:m:a:f:c:y:h:p
-+- = 50% e:d
-++ = 60% k:r
-+q = 50% q:e
-+p = 50% p
-+n = 50% n
-+t = 50% t:s
-+
-+ at color
-+g = RED
-+p = YELLOW
-+t = GREEN if t:%:#
-+n = GREEN if n
-+w = BLUE if %:#:p
-+k = RED if +
-+ --------------------------------------------------------------------
-+</PRE>
-+
-+The first section is optional and is identified by the header @rgbindex. If
-+this section exists, each color used in the file must be named and the rgb
-+values specified (on a scale from 0 to 1). If the rgb index section is not
-+found, the following set of hard-coded colors will be used.
-+
-+<PRE>
-+RED          0.9 0.1 0.1
-+BLUE         0.1 0.1 0.9
-+GREEN        0.1 0.9 0.1
-+ORANGE       0.9 0.7 0.3
-+CYAN         0.1 0.9 0.9
-+PINK         0.9 0.5 0.5
-+MAGENTA      0.9 0.1 0.9
-+YELLOW       0.9 0.9 0.0
-+</PRE>
-+
-+The second section is optional and is identified by the header @consensus. It
-+defines how the consensus is calculated.
-+ 
-+The format of each consensus parameter is:-
-+ 
-+<PRE>
-+c = n% residue_list
-+ 
-+        where
-+              c             is a character used to identify the parameter.
-+              n             is an integer value used as the percentage cutoff
-+                            point.
-+              residue_list  is a list of residues denoted by a single
-+                            character, delimited by a colon (:).
-+</PRE>
-+ 
-+For example:   # = 60% w:l:v:i
-+
-+will assign a consensus character # to any column in the alignment which
-+contains more than 60% of the residues w,l,v and i.
-+        
-+ 
-+The third section is identified by the header @color, and defines how colors
-+are assigned to each residue in the alignment.
-+ 
-+The color parameters can take one of two formats:
-+
-+<PRE>
-+1) r = color
-+2) r = color if consensus_list
-+ 
-+        where
-+              r             is a character used to denote a residue.
-+              color         is one of the colors in the GDE color lookup table.
-+              residue_list  is a list of residues denoted by a single
-+                            character, delimited by a colon (:).
-+</PRE>
-+ 
-+Examples:
-+1) g = ORANGE
-+
-+will color all glycines ORANGE, regardless of the consensus.
-+
-+2) w = BLUE if w:%:#
-+
-+will color BLUE any tryptophan which is found in a column with a consensus of
-+w, % or #.
-+ 
-+
-+>>HELP Q <<
-+                       Alignment Quality Analysis
-+
-+<H3>
-+QUALITY SCORES
-+</H3>
-+--------------
-+
-+Clustal X provides an indication of the quality of an alignment by plotting
-+a 'conservation score' for each column of the alignment. A high score indicates
-+a well-conserved column; a low score indicates low conservation. The quality
-+curve is drawn below the alignment.
-+
-+Two methods are also provided to indicate single residues or sequence segments
-+which score badly in the alignment.
-+ 
-+Low-scoring residues are expected to occur at a moderate frequency in all the
-+sequences because of their steady divergence due to the natural processes of
-+evolution. The most divergent sequences are likely to have the most outliers.
-+However, the highlighted residues are especially useful in pointing to
-+sequence misalignments. Note that clustering of highlighted residues is a
-+strong indication of misalignment. This can arise due to various reasons, for
-+example:
-+ 
-+        1. Partial or total misalignments caused by a failure in the
-+        alignment algorithm. Usually only in difficult alignment cases.
-+ 
-+        2. Partial or total misalignments because at least one of the
-+        sequences in the given set is partly or completely unrelated to the
-+        other sequences. It is up to the user to check that the set of
-+        sequences are alignable.
-+
-+        3. Frameshift translation errors in a protein sequence causing local
-+        mismatched regions to be heavily highlighted. These are surprisingly
-+        common in database entries. If suspected, a 3-frame translation of
-+        the source DNA needs to be examined.
-+ 
-+Occasionally, highlighted residues may point to regions of some biological
-+significance. This might happen for example if a protein alignment contains a
-+sequence which has acquired new functions relative to the main sequence set. It
-+is important to exclude other explanations, such as error or the natural
-+divergence of sequences, before invoking a biological explanation.
-+
-+
-+<H3>
-+LOW-SCORING SEGMENTS
-+</H3>
-+--------------------
-+
-+Unreliable regions in the alignment can be highlighted using the Low-Scoring
-+Segments option. A sequence-weighted profile is used to indicate any segments
-+in the sequences which score badly. Because the profile calculation may take
-+some time, an option is provided to calculate LOW-SCORING SEGMENTS. The 
-+segment display can then be toggled on or off without having to repeat the
-+time-consuming calculations.
-+
-+For details of the low-scoring segment calculation, see the CALCULATION section
-+below.
-+
-+
-+<H4>
-+LOW-SCORING SEGMENT PARAMETERS
-+</H4>
-+------------------------------
-+
-+MINIMUM LENGTH OF SEGMENTS: short segments (or even single residues) can be
-+hidden by increasing the minimum length of segments which will be displayed.
-+
-+DNA MARKING SCALE is used to remove less significant segments from the 
-+highlighted display. Increase the scale to display more segments; decrease the
-+scale to remove the least significant.
-+
-+
-+PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of each
-+amino acid to each other. The matrix is used to calculate the sequence-
-+weighted profile scores. There are four 'in-built' Log-Odds matrices offered:
-+the Gonnet PAM 80, 120, 250, 350 matrices. A more stringent matrix which only
-+gives a high score to identities and the most favoured conservative
-+substitutions, may be more suitable when the sequences are closely related. For
-+more divergent sequences, it is appropriate to use "softer" matrices which give
-+a high score to many other frequent substitutions. This  option automatically
-+recalculates the low-scoring segments.
-+
-+
-+DNA WEIGHT MATRIX: Two hard-coded matrices are available:
-+
-+1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
-+of nucleic acid sequences. X's and N's are treated as matches to any IUB
-+ambiguity symbol. All matches score 1.0; all mismatches for IUB symbols score
-+0.9.
-+
-+2) CLUSTALW(1.6). The previous system used by ClustalW, in which matches score
-+1.0 and mismatches score 0. All matches for IUB symbols also score 0. 
-+
-+A new matrix can be read from a file on disk, if the filename consists only
-+of lower case characters. The values in the new weight matrix should be
-+similarities and should be NEGATIVE for infrequent substitutions.
-+ 
-+INPUT FORMAT. The format used for a new matrix is the same as the BLAST
-+program. Any lines beginning with a # character are assumed to be comments. The
-+first non-comment line should contain a list of amino acids in any order, using
-+the 1 letter code, followed by a * character. This should be followed by a
-+square matrix of scores, with one row and one column for each amino acid. The
-+last row and column of the matrix (corresponding to the * character) contain
-+the minimum score over the whole matrix.
-+
-+<H4>
-+QUALITY SCORE PARAMETERS
-+</H4>
-+------------------------
-+
-+You can customise the column 'quality scores' plotted underneath the alignment
-+display using the following options.
-+
-+SCORE PLOT SCALE: this is a scalar value from 1 to 10, which can be used to
-+change the scale of the quality score plot. 
-+
-+RESIDUE EXCEPTION CUTOFF: this is a scalar value from 1 to 10, which can be
-+used to change the number of residue exceptions which are highlighted in the
-+alignment display. (For an explanation of this cutoff, see the CALCULATION OF
-+RESIDUE EXCEPTIONS section below.)
-+
-+PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of 
-+each amino acid to each other. 
-+ 
-+DNA WEIGHT MATRIX: two hard-coded matrices are available: IUB and CLUSTALW(1.6).
-+
-+For more information about the weight matrices, see the help above for
-+the Low-scoring Segments Weight Matrix.
-+
-+For details of the quality score calculations, see the CALCULATION section
-+below.
-+
-+
-+<STRONG>
-+SHOW LOW-SCORING SEGMENTS
-+</STRONG>
-+                       
-+The low-scoring segment display can be toggled on or off. This option does not
-+recalculate the profile scores.
-+
-+
-+<STRONG>
-+SHOW EXCEPTIONAL RESIDUES
-+</STRONG>
-+                       
-+This option highlights individual residues which score badly in the alignment
-+quality calculations. Residues which score exceptionally low are highlighted by
-+using a white character on a grey background.
-+
-+<STRONG>
-+SAVE QUALITY SCORES TO FILE
-+</STRONG>
-+
-+The quality scores that are plotted underneath the alignment display can also
-+be saved in a text file. Each column in the alignment is written on one line in
-+the output file, with the value of the quality score at the end of the line.
-+Only the sequences currently selected in the display are written to the file.
-+One use for quality scores is to color residues in a protein structure by
-+sequence conservation. In this way conserved surface residues can be
-+highlighted to locate functional regions such as ligand-binding sites.
-+
-+
-+<H3>
-+CALCULATION OF QUALITY SCORES
-+</H3>
-+-----------------------------
-+
-+Suppose we have an alignment of m sequences of length n. Then, the alignment
-+can be written as:
-+
-+<PRE>
-+        A11 A12 A13 .......... A1n
-+        A21 A22 A23 .......... A2n
-+        .
-+        .
-+        Am1 Am2 Am3 .......... Amn
-+</PRE>
-+
-+We also have a residue comparison matrix of size R where C(i,j) is the score
-+for aligning residue i with residue j.
-+
-+We want to calculate a score for the conservation of the jth position in the
-+alignment.
-+
-+To do this, we define an R-dimensional sequence space. For the jth position in 
-+the alignment, each sequence consists of a single residue which is assigned a
-+point S in the space. S has R dimensions, and for sequence i, the rth dimension
-+is defined as:
-+
-+<PRE>
-+	Sr =    C(r,Aij)
-+</PRE>
-+
-+We then calculate a consensus value for the jth position in the alignment. This
-+value X also has R dimensions, and the rth dimension is defined as:
-+
-+<PRE>
-+	Xr = (   SUM   (Fij * C(i,r)) ) / m
-+               1<=i<=R
-+</PRE>
-+
-+where Fij is the count of residues i at position j in the alignment.
-+
-+Now we can calculate the distance Di between each sequence i and the consensus 
-+position X in the R-dimensional space.
-+
-+<PRE>
-+	Di = SQRT   (   SUM   (Xr - Sr)(Xr - Sr) )
-+                      1<=i<=R
-+
-+</PRE>
-+
-+The quality score for the jth position in the alignment is defined as the mean
-+of the sequence distances Di.
-+
-+The score is normalised by multiplying by the percentage of sequences which
-+have residues (and not gaps) at this position.
-+
-+<H3>
-+CALCULATION OF RESIDUE EXCEPTIONS
-+</H3>
-+---------------------------------
-+
-+The jth residue of the ith sequence is considered as an exception if the
-+distance Di of the sequence from the consensus value P is greater than (Upper
-+Quartile + Inter Quartile Range * Cutoff). The value used as a cutoff for
-+displaying exceptions can be set from the SCORE PARAMETERS menu. A high cutoff
-+value will only display very significant exceptions; a low value will allow
-+more, less significant, exceptions to be highlighted.
-+
-+(NB. Sequences which contain gaps at this position are not included in the
-+exception calculation.)
-+
-+
-+<H3>
-+CALCULATION OF LOW-SCORING SEGMENTS
-+</H3>
-+-----------------------------------
-+
-+Suppose we have an alignment of m sequences of length n. Then, the alignment
-+can be written as:
-+
-+<PRE>
-+        A11 A12 A13 .......... A1n
-+        A21 A22 A23 .......... A2n
-+        .
-+        .
-+        Am1 Am2 Am3 .......... Amn
-+</PRE>
-+
-+We also have a residue comparison matrix of size R where C(i,j) is the score
-+for aligning residue i with residue j.
-+
-+We calculate sequence weights by building a neighbour-joining tree, in which
-+branch lengths are proportional to divergence. Summing the branches by branch
-+ownership provides the weights. See (Thompson et al., CABIOS, 10, 19 (1994) and
-+Henikoff et al.,JMB, 243, 574 1994).
-+
-+To find the low-scoring segments in a sequence Si, we build a weighted profile
-+of the remaining sequences in the alignment. Suppose we find residue r at 
-+position j in the sequence; then the score for the jth position in the sequence
-+is defined as
-+
-+<PRE>
-+	Score(Si,j) = Profile(j,r)   where Profile(j,r) is the profile score
-+                                       for residue r at position j in the
-+                                       alignment.
-+</PRE>
-+
-+These residue scores are summed along the sequence in both forward and backward
-+directions. If the sum of the scores is positive, then it is reset to zero.
-+Segments which score negatively in both directions are considered as 
-+'low-scoring' and will be highlighted in the alignment display.
-+
-+
-+>>HELP 9 <<
-+              Command Line Parameters
-+
-+                DATA (sequences)
-+
-+-INFILE=file.ext                             :input sequences
-+-PROFILE1=file.ext  and  -PROFILE2=file.ext  :profiles (aligned sequences)
-+
-+
-+                VERBS (do things)
-+
-+-OPTIONS	    :list the command line parameters
-+-HELP  or -CHECK    :outline the command line parameters
-+-ALIGN              :do full multiple alignment 
-+-TREE               :calculate NJ tree
-+-BOOTSTRAP(=n)      :bootstrap a NJ tree (n= number of bootstraps; def. = 1000)
-+-CONVERT            :output the input sequences in a different file format
-+
-+
-+                PARAMETERS (set things)
-+
-+***General settings:****
-+-INTERACTIVE :read command line, then enter normal interactive menus
-+-QUICKTREE   :use FAST algorithm for the alignment guide tree
-+-TYPE=       :PROTEIN or DNA sequences
-+-NEGATIVE    :protein alignment with negative values in matrix
-+-OUTFILE=    :sequence alignment file name
-+-OUTPUT=     :GCG, GDE, PHYLIP, PIR or NEXUS
-+-OUTORDER=   :INPUT or ALIGNED
-+-CASE=       :LOWER or UPPER (for GDE output only)
-+-SEQNOS=     :OFF or ON (for Clustal output only)
-+
-+
-+***Fast Pairwise Alignments:***
-+-KTUPLE=n      :word size
-+-TOPDIAGS=n  :number of best diags.
-+-WINDOW=n    :window around best diags.
-+-PAIRGAP=n   :gap penalty
-+-SCORE=      :PERCENT or ABSOLUTE
-+
-+
-+***Slow Pairwise Alignments:***
-+-PWMATRIX=    :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
-+-PWDNAMATRIX= :DNA weight matrix=IUB, CLUSTALW or filename
-+-PWGAPOPEN=f  :gap opening penalty
-+-PWGAPEXT=f  :gap opening penalty
-+ 
-+
-+***Multiple Alignments:***
-+-NEWTREE=    :file for new guide tree
-+-USETREE=    :file for old guide tree
-+-MATRIX=     :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
-+-DNAMATRIX=  :DNA weight matrix=IUB, CLUSTALW or filename
-+-GAPOPEN=f   :gap opening penalty
-+-GAPEXT=f  :gap extension penalty
-+-ENDGAPS     :no end gap separation pen.
-+-GAPDIST=n   :gap separation pen. range
-+-NOPGAP      :residue-specific gaps off
-+-NOHGAP    :hydrophilic gaps off
-+-HGAPRESIDUES= :list hydrophilic res.
-+-MAXDIV=n    :% ident. for delay
-+-TYPE=       :PROTEIN or DNA
-+-TRANSWEIGHT=f :transitions weighting
-+
-+
-+***Profile Alignments:***
-+-PROFILE      :Merge two alignments by profile alignment
-+-NEWTREE1=    :file for new guide tree for profile1
-+-NEWTREE2=    :file for new guide tree for profile2
-+-USETREE1=    :file for old guide tree for profile1
-+-USETREE2=    :file for old guide tree for profile2
-+
-+
-+***Sequence to Profile Alignments:***
-+-SEQUENCES   :Sequentially add profile2 sequences to profile1 alignment
-+-NEWTREE=    :file for new guide tree
-+-USETREE=    :file for old guide tree
-+
-+
-+***Structure Alignments:***
-+-NOSECSTR1     :do not use secondary structure/gap penalty mask for profile 1 
-+-NOSECSTR2     :do not use secondary structure/gap penalty mask for profile 2
-+-SECSTROUT=STRUCTURE or MASK or BOTH or NONE  :output in alignment file
-+-HELIXGAP=n    :gap penalty for helix core residues 
-+-STRANDGAP=n   :gap penalty for strand core residues
-+-LOOPGAP=n     :gap penalty for loop regions
-+-TERMINALGAP=n :gap penalty for structure termini
-+-HELIXENDIN=n  :number of residues inside helix to be treated as terminal
-+-HELIXENDOUT=n :number of residues outside helix to be treated as terminal
-+-STRANDENDIN=n :number of residues inside strand to be treated as terminal
-+-STRANDENDOUT=n:number of residues outside strand to be treated as terminal 
-+
-+
-+***Trees:***
-+-OUTPUTTREE=nj OR phylip OR dist OR nexus
-+-SEED=n    :seed number for bootstraps
-+-KIMURA      :use Kimura's correction
-+-TOSSGAPS  :ignore positions with gaps
-+-BOOTLABELS=node OR branch :position of bootstrap values in tree display
-+
-+
-+>>HELP R <<
-+                             References
-+
-+<STRONG>
-+The ClustalX program is described in the manuscript:
-+</STRONG>
-+
-+Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
-+The ClustalX windows interface: flexible strategies for multiple sequence 
-+alignment aided by quality analysis tools. Nucleic Acids Research, 24:4876-4882.
-+
-+
-+<STRONG>
-+The ClustalW program is described in the manuscript:
-+</STRONG>
-+
-+Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
-+sensitivity of progressive multiple sequence alignment through sequence
-+weighting, positions-specific gap penalties and weight matrix choice.  Nucleic
-+Acids Research, 22:4673-4680.
-+
-+
-+<STRONG>
-+The ClustalV program is described in the manuscript:
-+</STRONG>
-+
-+Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for
-+multiple sequence alignment. CABIOS 8,189-191.
-+
-+
-+<STRONG>
-+The original Clustal program is described in the manuscripts:
-+</STRONG>
-+
-+Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence
-+alignments on a microcomputer.
-+CABIOS 5,151-153.
-+
-+Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple
-+sequence alignment on a microcomputer. Gene 73,237-244.
-+
-+-------------------------------------------------------------------------------
-+<STRONG>
-+Some tips on using Clustal X:
-+</STRONG>
-+
-+Jeanmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998)
-+Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.
-+
-+<STRONG>
-+Some tips on using Clustal W:
-+</STRONG>
-+
-+Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for
-+multiple sequence alignments. Methods Enzymol., 266, 383-402.
-+
-+-------------------------------------------------------------------------------
-+<STRONG>
-+You can get the latest version of the ClustalX program by anonymous ftp to:
-+</STRONG>
-+
-+ftp-igbmc.u-strasbg.fr
-+ftp.embl-heidelberg.de
-+ftp.ebi.ac.uk
-+
-+<STRONG>
-+Or, have a look at the following WWW site:
-+</STRONG>
-+
-+http://www-igbmc.u-strasbg.fr/BioInfo/
-+
-+
- This is the on-line help file for Clustal X (version 1.83), using the NCBI
- Vibrant Toolkit.   
- 

Deleted: trunk/packages/clustalx/trunk/debian/patches/interface.c.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/interface.c.patch	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/patches/interface.c.patch	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,226 +0,0 @@
-Index: clustalw-1.83/interface.c
-===================================================================
---- clustalw-1.83.orig/interface.c
-+++ clustalw-1.83/interface.c
-@@ -1223,8 +1223,7 @@
- 			while(fgets(temp,MAXLINE+1,help_file)) {
- 				if(strstr(temp, help_marker)){
- 				  	if(usemenu) {
--						fprintf(stdout,"\n");
--				    		getstr("Press [RETURN] to continue",lin2);
-+				    		getstr("\nPress [RETURN] to continue",MAXLINE+1,lin2);
- 				  	}
- 					fclose(help_file);
- 					return;
-@@ -1235,8 +1234,7 @@
- 				}
- 			       if(usemenu) {
- 			          if(nlines >= PAGE_LEN) {
--				     	   fprintf(stdout,"\n");
--			 	  	   getstr("Press [RETURN] to continue or  X  to stop",lin2);
-+			 	  	   getstr("\nPress [RETURN] to continue or  X  to stop",MAXLINE+1,lin2);
- 				  	   if(toupper(*lin2) == 'X') {
- 						   fclose(help_file);
- 						   return;
-@@ -1247,8 +1245,7 @@
- 			       }
- 			}
- 			if(usemenu) {
--				fprintf(stdout,"\n");
--				getstr("Press [RETURN] to continue",lin2);
-+				getstr("\nPress [RETURN] to continue",MAXLINE+1,lin2);
- 			}
- 			fclose(help_file);
- 		}
-@@ -1286,8 +1283,7 @@
-                 fputs(temp,stdout);
-                 ++nlines;
-                 if(nlines >= PAGE_LEN) {
--                        fprintf(stdout,"\n");
--                        getstr("Press [RETURN] to continue or  X  to stop",lin2);
-+                        getstr("\nPress [RETURN] to continue or  X  to stop",MAXLINE+1,lin2);
-                         if(toupper(*lin2) == 'X') {
-                                 fclose(file);
-                                 return;
-@@ -1297,8 +1293,7 @@
-                 }
-         }
-         fclose(file);
--        fprintf(stdout,"\n");
--        getstr("Press [RETURN] to continue",lin2);
-+        getstr("\nPress [RETURN] to continue",MAXLINE+1,lin2);
- }
- 
- 
-@@ -1747,7 +1742,7 @@
-         FILE *infile;
- 
-         if(usemenu)
--                getstr("Enter name of the matrix file",lin2);
-+                getstr("Enter name of the matrix file",MAXLINE+1,lin2);
-         else
-                 strcpy(lin2,str);
- 
-@@ -1773,7 +1768,7 @@
-         FILE *infile;
- 
-         if(usemenu)
--                getstr("Enter name of the matrix file",lin2);
-+                getstr("Enter name of the matrix file",MAXLINE+1,lin2);
-         else
-                 strcpy(lin2,str);
- 
-@@ -2163,6 +2158,7 @@
-  
- {	static char temp[FILENAMELEN+1];
- 	static char local_prompt[MAXLINE];
-+	static char local_prompt_tmp[MAXLINE+FILENAMELEN+1];
- 	FILE * file_handle;
- 
- /*	if (*file_name == EOS) {
-@@ -2174,17 +2170,17 @@
- 		warning("Output file name is the same as input file.");
- 		if (usemenu) {
- 			strcpy(local_prompt,"\n\nEnter new name to avoid overwriting ");
--			strcat(local_prompt," [%s]: ");          
--			fprintf(stdout,local_prompt,file_name);
--			gets(temp);
-+			strcat(local_prompt," [%s]");          
-+			sprintf(local_prompt_tmp,local_prompt,file_name);
-+			getstr(local_prompt_tmp,FILENAMELEN+1,temp);
- 			if(*temp != EOS) strcpy(file_name,temp);
- 		}
- 	}
- 	else if (usemenu) {
- 		strcpy(local_prompt,prompt);
--		strcat(local_prompt," [%s]: ");          
--		fprintf(stdout,local_prompt,file_name);
--		gets(temp);
-+		strcat(local_prompt," [%s]");          
-+		sprintf(local_prompt_tmp,local_prompt,file_name);
-+		getstr(local_prompt_tmp,FILENAMELEN+1,temp);
- 		if(*temp != EOS) strcpy(file_name,temp);
- 	}
- 
-@@ -2260,7 +2256,7 @@
-         	}
-         	else {
-                  	if((tree = open_output_file(
--                	"\nEnter name for new GUIDE TREE           file  ",path,
-+                	"\nEnter name for new GUIDE TREE file ",path,
-                 	phylip_name,"dnd")) == NULL) return;
-         	}
- 	}
-@@ -2327,6 +2323,7 @@
- { 
- 	char path[FILENAMELEN+1];
- 	char tree_name[FILENAMELEN+1],temp[MAXLINE+1];
-+	char tmp_msg[MAXLINE+1+300];
- 	Boolean use_tree;
- 	FILE *tree;
- 	sint i,j,count;
-@@ -2383,9 +2380,9 @@
-         	if((tree=fopen(tree_name,"r"))!=NULL) {
- #endif
- 		if (usemenu)
--            	fprintf(stdout,"\nUse the existing GUIDE TREE file,  %s  (y/n) ? [y]: ",
-+            	sprintf(tmp_msg,"\nUse the existing GUIDE TREE file,  %s  (y/n) ? [y]",
-                                            tree_name);
--                gets(temp);
-+                getstr(tmp_msg,MAXLINE+1,temp);
-                 if(*temp != 'n' && *temp != 'N') {
-                     strcpy(phylip_name,tree_name);
-                     use_tree = TRUE;
-@@ -2584,6 +2581,7 @@
- void get_tree(char *phylip_name)
- {
- 	char path[FILENAMELEN+1],temp[MAXLINE+1];
-+	char tmp_msg[FILENAMELEN+300];
- 	sint count;
- 	
- 	if(empty) {
-@@ -2615,9 +2613,9 @@
-        			strcpy(phylip_name,path);
-        			strcat(phylip_name,"dnd");
- 
--            fprintf(stdout,"\nEnter a name for the guide tree file [%s]: ",
-+			sprintf(tmp_msg,"\nEnter a name for the guide tree file [%s]",
-                                            phylip_name);
--                	gets(temp);
-+                	getstr(tmp_msg,MAXLINE+1,temp);
-                 	if(*temp != EOS)
-                         	strcpy(phylip_name,temp);
-         	}
-@@ -2685,6 +2683,8 @@
- 	char path[FILENAMELEN+1];
- 	char tree_name[FILENAMELEN+1];
- 	char temp[MAXLINE+1];
-+	char tmp_msg[FILENAMELEN+300];
-+
- 	Boolean use_tree1,use_tree2;
- 	FILE *tree;
- 	sint count,i,j,dscore;
-@@ -2717,9 +2717,9 @@
- #else
-         	if((tree=fopen(tree_name,"r"))!=NULL) {
- #endif
--            	fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 1,  %s  (y/n) ? [y]: ",
-+            	sprintf(tmp_msg,"\nUse the existing GUIDE TREE file for Profile 1,  %s  (y/n) ? [y]",
-                                            tree_name);
--                gets(temp);
-+                getstr(tmp_msg,MAXLINE+1,temp);
-                 if(*temp != 'n' && *temp != 'N') {
-                     strcpy(p1_tree_name,tree_name);
-                     use_tree1 = TRUE;
-@@ -2739,19 +2739,20 @@
- 			strcpy(tree_name,path);
- 			strcat(tree_name,"dnd");
- #ifdef VMS
--        	if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL) {
-+			if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL)
- #else
--        	if((tree=fopen(tree_name,"r"))!=NULL) {
-+			if((tree=fopen(tree_name,"r"))!=NULL)
- #endif
--            	fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 2,  %s  (y/n) ? [y]: ",
--                                           tree_name);
--                gets(temp);
--                if(*temp != 'n' && *temp != 'N') {
--                    strcpy(p2_tree_name,tree_name);
--                    use_tree2 = TRUE;
--                }
--                fclose(tree);
--        	}
-+			{
-+				sprintf(tmp_msg,"\nUse the existing GUIDE TREE file for Profile 2,  %s  (y/n) ? [y]",
-+						   tree_name);
-+				getstr(tmp_msg,MAXLINE+1,temp);
-+				if(*temp != 'n' && *temp != 'N') {
-+				    strcpy(p2_tree_name,tree_name);
-+				    use_tree2 = TRUE;
-+				}
-+				fclose(tree);
-+			}
- 		}
- 		else if (!usemenu && use_tree2_file) {
- 			use_tree2 = TRUE;
-@@ -4194,6 +4195,7 @@
- {
-   char parname[FILENAMELEN+1], temp[FILENAMELEN+1];
-   char path[FILENAMELEN+1];
-+  char tmp_msg[FILENAMELEN+300];
-   FILE *parout;
-   
-   get_path(seqname,path);
-@@ -4201,9 +4203,9 @@
-   strcat(parname,"par");
-   
-   if(usemenu) {
--    fprintf(stdout,"\nEnter a name for the parameter output file [%s]: ",
-+    sprintf(tmp_msg,"\nEnter a name for the parameter output file [%s]",
- 	    parname);
--    gets(temp);
-+    getstr(tmp_msg,FILENAMELEN+1,temp);
-     if(*temp != EOS)
-       strcpy(parname,temp);
-   }

Deleted: trunk/packages/clustalx/trunk/debian/patches/makefile.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/makefile.patch	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/patches/makefile.patch	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,101 +0,0 @@
-Index: clustalw-1.83/makefile
-===================================================================
---- clustalw-1.83.orig/makefile
-+++ clustalw-1.83/makefile
-@@ -1,7 +1,15 @@
--install: clustalx clustalw
- 
--clean:
--	rm *.o
-+RM=/bin/rm -f
-+
-+BINDIR=$(DESTDIR)/usr/bin
-+XBINDIR=$(DESTDIR)/usr/X11R6/bin
-+DOCDIR=$(DESTDIR)/usr/share/doc/clustalw
-+XDOCDIR=$(DESTDIR)/usr/share/doc/clustalx
-+LIBDIR=$(DESTDIR)/usr/share/clustalw
-+MANDIR=$(DESTDIR)/usr/share/man/man1
-+XMANDIR=$(DESTDIR)/usr/X11R6/man/man1
-+DOCS=clustalv.doc clustalw.doc clustalw.ms README_W
-+XDOCS=README_X clustalx.html
- 
- OBJECTS = interface.o sequence.o showpair.o malign.o \
-   	util.o trees.o gcgcheck.o prfalign.o pairalign.o \
-@@ -12,25 +20,36 @@
- 
- HEADERS = general.h clustalw.h
- 
--CC	= cc
--CFLAGS  = -c -O
-+CC	= gcc
-+CFLAGS  = -c -O2
-+
-+MACHINE=$(shell uname -m)
-+ifeq ("$(MACHINE)","alpha")
-+ # -mieee is for the Alpha only: ClustalW divides by zero (yes, I know it's bad)
-+ # and expect the processor to goes on. -mieee tells the Alpha to comply with
-+ # the IEEE standard and to shut up about divisions by zero.
-+ CFLAGS  +=  -mieee
-+endif
-+
- LFLAGS	= -O -lm 
--NCBI_INC  = /dec/biolo/ncbi/include
--NCBI_LIB	= /dec/biolo/ncbi/lib
--CXFLAGS  = -DWIN_MOTIF -I$(NCBI_INC)
--LXFLAGS	= -L$(NCBI_LIB) -lvibrant -lncbi -lpthread -lXm -lXmu -lXt -lX11 -lm 
-+NCBI_INC= /usr/include/ncbi
-+NCBI_LIB= /usr/lib
-+CXFLAGS	= -DWIN_MOTIF -I$(NCBI_INC)
-+LXFLAGS	= -L/usr/X11R6/lib -lvibrant -lncbi -lpthread -lXm -lXmu -lXt -lX11 -lm
- 
--clustalw : $(OBJECTS) amenu.o clustalw.o
--	$(CC) -o $@ $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
-+all: clustalx clustalw
- 
--interface.o : interface.c $(HEADERS) param.h
--	$(CC) $(CFLAGS) $*.c
-+machine:
-+	echo $(MACHINE)
- 
--amenu.o : amenu.c $(HEADERS) param.h
--	$(CC) $(CFLAGS) $*.c
-+clustalw : $(OBJECTS) $(XOBJECTS) amenu.o clustalw.o
-+	$(CC) -o $@ -I$(NCBI_INC) $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
- 
- clustalx : $(OBJECTS) $(XOBJECTS) clustalx.o
--	$(CC) -o $@ $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS)
-+	$(CC) -o $@ -I$(NCBI_INC) $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS)
-+
-+clustalw.o : clustalw.c $(HEADERS)
-+	$(CC) $(CFLAGS) $*.c
- 
- clustalx.o : clustalx.c $(HEADERS)
- 	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
-@@ -56,6 +75,25 @@
- trees.o : trees.c $(HEADERS) dayhoff.h
- 	$(CC) $(CFLAGS) $*.c
- 
--.c.o :
--	$(CC) $(CFLAGS) $?
-+
-+
-+install: all
-+	install -d $(BINDIR) $(XBINDIR) $(LIBDIR) $(DOCDIR)/examples $(MANDIR) $(XMANDIR) $(XDOCDIR)
-+	install -m 0755 clustalw $(BINDIR)
-+	install -m 0755 clustalx $(XBINDIR)
-+	install -m 0644 clustalw_help clustalx_help $(LIBDIR)
-+	install -m 0644 clustalw.1 $(MANDIR)
-+	install -m 0644 clustalx.1 $(MANDIR)
-+	install -m 0644 $(DOCS) $(DOCDIR)
-+	install -m 0644 $(XDOCS) $(XDOCDIR)
-+	cp -a -R tests.clustalw $(DOCDIR)/examples/tests
-+
-+.PHONY:	clean distclean
-+
-+clean:
-+	$(RM) *.o
-+
-+distclean: clean
-+	$(RM) clustalw clustalx
-+	cd tests.clustalw; make clean
- 

Deleted: trunk/packages/clustalx/trunk/debian/patches/sequence.c.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/sequence.c.patch	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/patches/sequence.c.patch	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,13 +0,0 @@
-Index: clustalw-1.83/sequence.c
-===================================================================
---- clustalw-1.83.orig/sequence.c
-+++ clustalw-1.83/sequence.c
-@@ -924,7 +924,7 @@
- 	static Boolean dnaflag1;
- 	
- 	if(usemenu)
--		getstr("Enter the name of the sequence file",line);
-+		getstr("Enter the name of the sequence file",FILENAMELEN+1,line);
- 	else
- 		strcpy(line,seqname);
- 	if(*line == EOS) return -1;

Deleted: trunk/packages/clustalx/trunk/debian/patches/series
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/series	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/patches/series	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,11 +0,0 @@
-amenu.c.patch
-clustal-help.patch
-clustalw.h.patch
-clustalx.html.patch
-interface.c.patch
-sequence.c.patch
-trees.c.patch
-util.c.patch
-makefile.patch
-clustalx_help.patch
-xmenu.c.patch

Deleted: trunk/packages/clustalx/trunk/debian/patches/trees.c.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/trees.c.patch	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/patches/trees.c.patch	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,13 +0,0 @@
-Index: clustalw-1.83/trees.c
-===================================================================
---- clustalw-1.83.orig/trees.c
-+++ clustalw-1.83/trees.c
-@@ -1497,7 +1497,7 @@
- 		fprintf(stdout,"\n           or 3) use the PHYLIP package.");
- 		fprintf(stdout,"\n\n");
- 		if (usemenu) 
--			getstr("Press [RETURN] to continue",dummy);
-+			getstr("Press [RETURN] to continue",10,dummy);
- 	}
- 
- 

Deleted: trunk/packages/clustalx/trunk/debian/patches/util.c.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/util.c.patch	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/patches/util.c.patch	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,52 +0,0 @@
-Index: clustalw-1.83/util.c
-===================================================================
---- clustalw-1.83.orig/util.c
-+++ clustalw-1.83/util.c
-@@ -171,10 +171,18 @@
- 	return str;
- }
- 
--void getstr(char *instr,char *outstr)
-+void getstr(char *instr, int n, char *outstr)
- {	
-+	int sl;
- 	fprintf(stdout,"%s: ",instr);
--	gets(outstr);
-+	fgets(outstr,n,stdin);
-+	/*
-+	 * modify outstr for compatibility with prior used (insecure) gets()
-+	 */
-+	sl=strlen(outstr);
-+	if(sl>0 && '\n'==outstr[sl-1]) {
-+		outstr[sl-1]=0;
-+	}
- }
- 
- double getreal(char *instr,double minx,double maxx,double def)
-@@ -185,7 +193,7 @@
- 	
- 	while(TRUE) {
- 		fprintf(stdout,"%s (%.1f-%.1f)   [%.1f]: ",instr,minx,maxx,def);
--		gets(line);
-+		fgets(line,MAXLINE,stdin);
- 		status=sscanf(line,"%f",&ret);
- 		if(status == EOF) return def;
- 		if(ret>maxx) {
-@@ -210,7 +218,7 @@
- 	while(TRUE) {
- 		fprintf(stdout,"%s (%d..%d)    [%d]: ",
- 		instr,(pint)minx,(pint)maxx,(pint)def);
--		gets(line);
-+		fgets(line,MAXLINE,stdin);
- 		status=sscanf(line,"%d",&ret);
- 		if(status == EOF) return def;
- 		if(ret>maxx) {
-@@ -230,7 +238,7 @@
- {
- 	char line[MAXLINE];
- 	
--	getstr("\n\nEnter system command",line);
-+	getstr("\n\nEnter system command",MAXLINE,line);
- 	if(*line != EOS)
- 		system(line);
- 	fprintf(stdout,"\n\n");

Deleted: trunk/packages/clustalx/trunk/debian/patches/xmenu.c.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/xmenu.c.patch	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/patches/xmenu.c.patch	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,13 +0,0 @@
-Index: xmenu.c
-===================================================================
---- ./xmenu.c	(révision 173)
-+++ ./xmenu.c	(révision 174)
-@@ -4411,7 +4411,7 @@
- 	while(TRUE) {
- 		if(fgets(temp,MAXLINE+1,fd) == NULL) {
- 			if(!found_help)
--				error("No help found in help file");
-+				error("No help found in help file [%s]",help_file);
- 			fclose(fd);
- 			return;
- 		}

Modified: trunk/packages/clustalx/trunk/debian/rules
===================================================================
--- trunk/packages/clustalx/trunk/debian/rules	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/rules	2009-05-01 00:07:09 UTC (rev 3341)
@@ -2,13 +2,16 @@
 
 include /usr/share/quilt/quilt.make
 
-build: patch build-stamp
+build: build-stamp
 build-stamp:
 	dh_testdir
+	qmake-qt4
 	$(MAKE)
+	# no longer required?
+	#mv clustalQtcurrent clustalx
 	touch build-stamp
 
-clean: unpatch
+clean:
 	dh_testdir
 	dh_testroot
 	[ ! -f Makefile ] || $(MAKE) distclean
@@ -22,6 +25,7 @@
 	dh_installdirs
 	dh_install
 
+# Build architecture-dependent files here.
 binary-arch: build install
 	dh_testdir
 	dh_testroot
@@ -42,10 +46,6 @@
 	dh_md5sums
 	dh_builddeb
 
-get-orig-source:
-	destdir=.;if [ -d debian ]; then destdir=..; fi; \
-	lynx --dump ftp://ftp.ebi.ac.uk/pub/software/unix/clustalx/clustalx1.83.sun.tar.gz | tar --delete clustalx1.83.sun/clustalx clustalx1.83.sun/clustalw  -f - | gzip -c > $$destdir/clustalw_1.83.orig.tar.gz
-
 binary: binary-arch
 binary-indep: # does nothing
 .PHONY: build clean binary-arch binary install

Modified: trunk/packages/clustalx/trunk/debian/watch
===================================================================
--- trunk/packages/clustalx/trunk/debian/watch	2009-04-30 23:56:12 UTC (rev 3340)
+++ trunk/packages/clustalx/trunk/debian/watch	2009-05-01 00:07:09 UTC (rev 3341)
@@ -1,2 +1,2 @@
 version=3
-ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/ clustalx(.*)\.src.tar.gz
+ftp://ftp.ebi.ac.uk/pub/software/clustalw2/clustalx-(.*)-src\.tar\.gz