[med-svn] r1458 - in trunk/packages/clustalx/trunk: . debian debian/patches

Thu Feb 21 13:59:38 UTC 2008

Author: charles-guest
Date: 2008-02-21 13:59:37 +0000 (Thu, 21 Feb 2008)
New Revision: 1458

Added:
   trunk/packages/clustalx/trunk/debian/clustalx.manpages
   trunk/packages/clustalx/trunk/debian/patches/amenu.c.patch
   trunk/packages/clustalx/trunk/debian/patches/clustal-help.patch
   trunk/packages/clustalx/trunk/debian/patches/clustalw.h.patch
   trunk/packages/clustalx/trunk/debian/patches/clustalx.html.patch
   trunk/packages/clustalx/trunk/debian/patches/clustalx_help.patch
   trunk/packages/clustalx/trunk/debian/patches/interface.c.patch
   trunk/packages/clustalx/trunk/debian/patches/makefile.patch
   trunk/packages/clustalx/trunk/debian/patches/sequence.c.patch
   trunk/packages/clustalx/trunk/debian/patches/series
   trunk/packages/clustalx/trunk/debian/patches/trees.c.patch
   trunk/packages/clustalx/trunk/debian/patches/util.c.patch
   trunk/packages/clustalx/trunk/debian/patches/xmenu.c.patch
Removed:
   trunk/packages/clustalx/trunk/LICENSE
   trunk/packages/clustalx/trunk/README
   trunk/packages/clustalx/trunk/debian/clustalx.manpages
Modified:
   trunk/packages/clustalx/trunk/debian/changelog
   trunk/packages/clustalx/trunk/debian/clustalx.docs
   trunk/packages/clustalx/trunk/debian/clustalx.install
   trunk/packages/clustalx/trunk/debian/clustalx.menu
   trunk/packages/clustalx/trunk/debian/control
   trunk/packages/clustalx/trunk/debian/copyright
   trunk/packages/clustalx/trunk/debian/rules
Log:
Transient Clustal?\194?\160X 1.83 package

Deleted: trunk/packages/clustalx/trunk/LICENSE
===================================================================

--- trunk/packages/clustalx/trunk/LICENSE	2008-02-20 21:14:58 UTC (rev 1457)
+++ trunk/packages/clustalx/trunk/LICENSE	2008-02-21 13:59:37 UTC (rev 1458)
@@ -1,34 +0,0 @@
-Licensing ClustalW and ClustalX
-
-Date:29 November 2007
-
-The copyright for ClustalW and ClustalX is held by Des Higgins, Julie Thompson and Toby Gibson
-
-The binaries and source code are made available and can be distributed subject to the following conditions:
-
-Users are free to redistribute ClustalW or ClustalX in it's unmodified form as long as it is not for commercial gain.
-
-Anyone wishing to redistribute Clustal commercially should contact Toby Gibson at gibson at embl.de
-
-
-If users make changes/have ideas that they believe would be useful to the broader research community they can send their suggestions to the clustal development team at clustalw at ucd.ie where they will be considered for inclusion in future releases.
-
-
-
-Frequently asked questions about Clustal licensing?
----------------------------------------------------
-
-1. Do I have to pay to use ClustalW or ClustalX?
-No - unless you wish to redistribute Clustal for profit. In this case see question 4.
-
-2. Can I redistribute the Clustal binaries and source code?
-Yes. We have always wanted to see Clustal have as wide a userbase and distribution network as possible and are happy to see other sites host copies of the official Clustal code.
-
-3. Can I make changes to the source code?
-You can make changes for your own purposes but you should not redistribute the changed code.
-
-4. I want to include ClustalW/ClustalX in a commercial application who should I contact?
-Toby Gibson at gibson at embl.de (also cc des.higgins at ucd.ie)
-
-5. There is no Clustal distribution for platform X. I have changed the code in order to compile Clustal on this platform. Can I redistribute it?
-Ideally you should send us a copy of your source code changes as well as a binary. We will include it in our contributed binaries section on our FTP site along with an acknowledgement of your contribution.

Deleted: trunk/packages/clustalx/trunk/README
===================================================================
--- trunk/packages/clustalx/trunk/README	2008-02-20 21:14:58 UTC (rev 1457)
+++ trunk/packages/clustalx/trunk/README	2008-02-21 13:59:37 UTC (rev 1458)
@@ -1,72 +0,0 @@
-CLUSTAL W and CLUSTAL X Multiple Sequence Alignment Programs
-                (version 2.0, 26 November 2007)
-
-Contact email address: clustalw at ucd.ie
-
-For details and citation purposes see paper "Clustal W and Clustal X version 2.0", Larkin M.,  et al. Bioinformatics 2007 23(21):2947-2948
-http://bioinformatics.oxfordjournals.org/cgi/content/full/23/21/2947
-
-Clustal X provides a window-based user interface to the ClustalW multiple
-alignment program.
-
-This directory contains the files:
-
-clustalw-linux-i386-2.0.tar.gz - a compressed tar file with a linux binary for ClustalW (Debian/Ubuntu)
-
-clustalw-linux-src-2.0.tar.gz - a compressed tar file with source code for Clustalw for linux
-
-clustalw-mac-universal-2.0.tgz - a compressed tar file with a Mac universal binary
-
-clustalw-windows-xp-2.0.zip - a compressed Windows XP archive
-
-clustalw-windows-xp-src.tar.gz - a compressed tar file containing the source code for ClustalW for windows
-
-clustalx-linux-i386-2.0.tar.gz - a compressed tar file with the linux binary for ClustalX (Debian/Ubuntu)
-
-clustalx-linux-rhel3-libcppstatic-i386-2.0.tgz - a compressed tar file with the linux binary for ClustalX (Red Hat Enterprise edition 3 using the XFree X-windows libraries)
-
-clustalx-mac-universal-2.0.dmg - a disk image for the Mac universal binary for ClustalX
-
-clustalx-windows-xp-2.0.zip - a compressed Windows XP archive with the PC binary for ClustalX
-
-clustalx-src-2.0.zip - a compressed tar file with the source code for ClustalX (All platforms - requires Qt GUI framework from Trolltech to compile)
-
-
-Supported systems
-----------------------------------------------------------
-Windows XP
-
-Mac OSX 10.3.9 or higher
-
-Linux (debian and red hat binaries)
-
-We have successfully run the binaries on:
-Ubuntu 6.10 Edge Eft
-Red Hat Enterprise Edition 3.0
-Debian 4.0 testing
-
-
-Contributed binaries
-----------------------------------------------------------
-For operating systems that we cannot support ourselves we
-try to maintain a collection of binaries that have been
-compiled by other users. These can be found in the sub-directory "contributed"
-
-Acknowledgements
-----------------------------------------------------------
-Science Foundation Ireland who provided the funding which enabled the development of Clustal2.0
-
-Bug Tracking
-----------------------------------------------------------
-We track bugs in Clustal in a publicly viewable bugzilla instance. You can find it at: http://bioinformatics.ucd.ie/bugzilla
-If users find a bug or have issues running clustal2.0 they should contact the development team at clustalw at ucd.ie
-
-Known issues
-----------------------------------------------------------
-1. ClustalX2.0 doesn't run on Mac OSX 10.3.8 or lower
-Resolution: upgrade Mac OS to OSX 10.3.9 or better
-
-
-Next Release
-----------------------------------------------------------
-The next release 2.0.1 is scheduled for 29th January 2008

Modified: trunk/packages/clustalx/trunk/debian/changelog
===================================================================
--- trunk/packages/clustalx/trunk/debian/changelog	2008-02-20 21:14:58 UTC (rev 1457)
+++ trunk/packages/clustalx/trunk/debian/changelog	2008-02-21 13:59:37 UTC (rev 1458)
@@ -1,30 +1,27 @@
-clustalx (2.0.1-1) unstable; urgency=low
+clustalx (1.83-1) unstable; urgency=low
 
-  [ Steffen Moeller ]
-  * New upstream release.
-  * Updated watch file.
-  * Removed LICENSE from debian/clustalx.docs
-  * rename to clustalx seems no longer required in debian/rules
-  * moved clustalx.1 into debian folder (eases working with svn-buildpackage)
-  * added German translation to desktop file
-
- -- Steffen Moeller <moeller at debian.org>  Sun, 17 Feb 2008 13:16:09 +0100
-
-clustalx (2.0-1) UNRELEASED; urgency=low
-
-  * New upstream release:
-    - Uses Qt instead of lesstif.
-    - Includes new code for UPGMA guide trees.
-    - Includes iterative alignment facility.
-  * debian/copyright made machine-readable.
+  [ Charles Plessy ]
+  * Transient package while Clustal X version > 2.0 is being relicenced.
+    This package is the same as clustalw-1.83-3 except that it only builds
+    the clustalx binary package. In parallel, the Debian clustalw source
+    package will stop building the clustalx binary package.
   * Association between Clustal X and .aln files:
     - text/x-clustalw-alignment associated to clustalx in clustalx.desktop.
     - .aln declared as text/x-clustalw-alignment in clustalx.sharedmimeinfo.
     - text/x-clustalw-alignment associated to clustalx in clustalx.mime.
     - debian/rules calls dh_installmime.
+  * debian/control:
+    - Allowed upload by Debian Maintainers.
+    - Added Homepage: field.
+    - Removed clustalw.
+    - Checked conformance with Policy 3.7.3.
 
- -- Charles Plessy <charles-debian-nospam at plessy.org>  Thu, 17 Jan 2008 22:55:41 +0900
+  [ Steffen Moeller ]
+  * Updated watch file.
+  * added German translation to desktop file
 
+ -- Charles Plessy <charles-debian-nospam at plessy.org>  Thu, 21 Feb 2008 13:40:20 +0900
+
 clustalw (1.83-3) unstable; urgency=low
 
   [ Nelson A. de Oliveira ]

Modified: trunk/packages/clustalx/trunk/debian/clustalx.docs
===================================================================
--- trunk/packages/clustalx/trunk/debian/clustalx.docs	2008-02-20 21:14:58 UTC (rev 1457)
+++ trunk/packages/clustalx/trunk/debian/clustalx.docs	2008-02-21 13:59:37 UTC (rev 1458)
@@ -1 +1,2 @@
-README
+README_X
+clustalx.html

Modified: trunk/packages/clustalx/trunk/debian/clustalx.install
===================================================================
--- trunk/packages/clustalx/trunk/debian/clustalx.install	2008-02-20 21:14:58 UTC (rev 1457)
+++ trunk/packages/clustalx/trunk/debian/clustalx.install	2008-02-21 13:59:37 UTC (rev 1458)
@@ -1,3 +1,4 @@
-clustalx usr/bin
-clustalx.hlp usr/share/clustalw
-debian/clustalx.desktop usr/share/applications
+clustalx		usr/bin
+clustalx_help		usr/share/clustalw
+debian/clustalx.desktop	usr/share/applications
+

Deleted: trunk/packages/clustalx/trunk/debian/clustalx.manpages
===================================================================
--- trunk/packages/clustalx/trunk/debian/clustalx.manpages	2008-02-20 21:14:58 UTC (rev 1457)
+++ trunk/packages/clustalx/trunk/debian/clustalx.manpages	2008-02-21 13:59:37 UTC (rev 1458)
@@ -1 +0,0 @@
-debian/clustalx.1

Added: trunk/packages/clustalx/trunk/debian/clustalx.manpages
===================================================================
--- trunk/packages/clustalx/trunk/debian/clustalx.manpages	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/clustalx.manpages	2008-02-21 13:59:37 UTC (rev 1458)
@@ -0,0 +1 @@
+debian/clustalx.1

Modified: trunk/packages/clustalx/trunk/debian/clustalx.menu
===================================================================
--- trunk/packages/clustalx/trunk/debian/clustalx.menu	2008-02-20 21:14:58 UTC (rev 1457)
+++ trunk/packages/clustalx/trunk/debian/clustalx.menu	2008-02-21 13:59:37 UTC (rev 1458)
@@ -1,5 +1,5 @@
 ?package(clustalx):needs="X11" \
     section="Applications/Science/Biology" \
-    title="ClustalX" \
+    title="Clustal X" \
     command="/usr/bin/clustalx"\
     hints="GUI for clustalw"

Modified: trunk/packages/clustalx/trunk/debian/control
===================================================================
--- trunk/packages/clustalx/trunk/debian/control	2008-02-20 21:14:58 UTC (rev 1457)
+++ trunk/packages/clustalx/trunk/debian/control	2008-02-21 13:59:37 UTC (rev 1458)
@@ -4,18 +4,18 @@
 Maintainer: Debian-Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
 DM-Upload-Allowed: yes
 Uploaders: Steffen Moeller <moeller at debian.org>, Charles Plessy <charles-debian-nospam at plessy.org>
-Build-Depends: debhelper (>= 5), libqt4-dev, quilt
+Build-Depends: debhelper (>= 5), libncbi6-dev, libvibrant6-dev, lesstif2-dev, quilt
 Standards-Version: 3.7.3
 Vcs-Browser: http://svn.debian.org/wsvn/debian-med/trunk/packages/clustalx/trunk/?rev=0&sc=0
 Vcs-Svn: svn://svn.debian.org/svn/debian-med/trunk/packages/clustalx/trunk/
 XS-Autobuild: yes
-Homepage: http://www.ebi.ac.uk/Tools/clustalw2/
+Homepage: ftp://ftp.ebi.ac.uk/pub/software/unix/clustalx/
 
 Package: clustalx
 Architecture: any
 Depends: ${shlibs:Depends}
-Suggests: clustalw, texshade|texlive-latex-extra, boxshade
-Description: GUI for clustalw
+Suggests: texshade|texlive-latex-extra, boxshade
+Description: GUI for Clustal W
  This package offers a GUI interface for the Clustal W multiple sequence
  alignment program. It provides an integrated environment for performing
  multiple sequence- and profile-alignments to analyse the results.
@@ -32,6 +32,3 @@
  .
  An alignment quality analysis can be performed and low-scoring segments or
  exceptional residues can be highlighted.
- .
- For details and citation purposes see paper "Clustal W and Clustal X version 
- 2.0", Larkin M., et al. Bioinformatics 2007 23(21):2947-2948

Modified: trunk/packages/clustalx/trunk/debian/copyright
===================================================================
--- trunk/packages/clustalx/trunk/debian/copyright	2008-02-20 21:14:58 UTC (rev 1457)
+++ trunk/packages/clustalx/trunk/debian/copyright	2008-02-21 13:59:37 UTC (rev 1458)
@@ -1,75 +1,76 @@
-X-Format-Specification: http://wiki.debian.org/Proposals/CopyrightFormat
-X-Debianized-By: Stephane Bortzmeyer <bortzmeyer at debian.org>
-X-Debianized-Date: Fri, 28 Aug 1998 16:09:48 +0200
-X-Source-Downloaded-From: ftp://ftp.ebi.ac.uk/pub/software/clustalw2
-X-Upstream-Author: Des Higgins, Julie Thompson and Toby Gibson
+This package was debianized by Andreas Tille <tille at debian.org> on
+Sat, 27 Oct 2001 22:16:53 +0200
 
-Files: debian/*
-Copyright: © 1998-1999 Stephane Bortzmeyer <bortzmeyer at debian.org>
-           © 2001 Dr. Guenter Bechly <gbechly at debian.org>
-           © 2001 Adrian Bunk <bunk at fs.tum.de>
-	   © 2001-2002 Andreas Tille <tille at debian.org>
-	   © 2003-2008 Steffen Möller <moeller at debian.org>
-	   © 2006 Kai Hendry <hendry at iki.fi>
-	   © 2007 Nelson A. de Oliveira <naoliv at debian.org>
-	   © 2007-2008 Charles Plessy <charles-debian-nospam at plessy.org>
-Licence: Unclear
- The licence of the earlier works was never stated. Some works have been
- obsoleted by release of version 2.0 — the lesstif migration for instance —
- but authors are left in the list of copyright holders by courtesy. The work
- of Charles Plessy and Steffen Möller can be treated as if it were public domain.
+It was downloaded from:
 
-Files: debian/clustalx.1
-Copyright: © 1998-1999 Stephane Bortzmeyer <bortzmeyer at debian.org>
-Licence: Unknown
+       ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/  and
+       ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalW/
 
-Files: *
-Copyright: © Des Higgins, Julie Thompson and Toby Gibson
-Licence: Not for commercial use, No modification of the code.
- Licensing ClustalW and ClustalX
- .
- Date:29 November 2007
- .
- The copyright for ClustalW and ClustalX is held by Des Higgins, Julie Thompson
- and Toby Gibson
- .
- The binaries and source code are made available and can be distributed subject
- to the following conditions:
- .
- Users are free to redistribute ClustalW or ClustalX in it's unmodified form as
- long as it is not for commercial gain.
- .
- Anyone wishing to redistribute Clustal commercially should contact Toby Gibson
- at gibson at embl.de
- .
- .
- If users make changes/have ideas that they believe would be useful to the
- broader research community they can send their suggestions to the clustal
- development team at clustalw at ucd.ie where they will be considered for inclusion
- in future releases.
+while the source was merged to one common upstream source (see README.Debian)
 
-X-Comment: Frequently asked questions about Clustal licensing?
-           ---------------------------------------------------
- .
- 1. Do I have to pay to use ClustalW or ClustalX?
- No - unless you wish to redistribute Clustal for profit. In this case see question 4.
- .
- 2. Can I redistribute the Clustal binaries and source code?
- Yes. We have always wanted to see Clustal have as wide a userbase and
- distribution network as possible and are happy to see other sites host copies
- of the official Clustal code.
- .
- 3. Can I make changes to the source code?
- .
- You can make changes for your own purposes but you should not redistribute the
- changed code.
- .
- 4. I want to include ClustalW/ClustalX in a commercial application who should I contact?
- Toby Gibson at gibson at embl.de (also cc des.higgins at ucd.ie)
- .
- 5. There is no Clustal distribution for platform X. I have changed the code in
- order to compile Clustal on this platform. Can I redistribute it?
- .
- Ideally you should send us a copy of your source code changes as well as a
- binary. We will include it in our contributed binaries section on our FTP site
- along with an acknowledgement of your contribution.
+Authors: 
+Toby Gibson <Toby.Gibson at EMBL-Heidelberg.de>
+Julie Thompson <julie at titus.u-strasbg.fr>
+Des Higgins <d.higgins at ucc.ie>
+
+Copyright:
+
+Non-free. You cannot distribute it at will.
+
+Debian holds a special exemption for distributing (see below). The licence does
+not forbid Debian from using autobuilders to create binary packages.
+
+Licence included here:
+
+**********************
+LICENCE FOR CLUSTAL W
+**********************
+
+Clustal W (hereafter "the program") is copyright (c) 1994-1998 by Julie D.
+Thompson, Desmond G. Higgins and Toby J. Gibson.
+
+Permission is granted to copy, distribute and use the program provided no fee
+is charged for it and provided that this copyright and licence notice is not
+removed or altered.
+
+The full source code of the program is provided free. You should not
+distribute a modified version of the program without obtaining the permission
+of the authors. You must keep the original copyright and licence notice. You
+must also document clearly the modifications you have made. You must make
+clear that this is not the original version.
+
+Commercial distributors of Clustal W are requested to contact the Clustal W
+authors in order to take out a non-exclusive licence. See the README file
+included with Clustal W for a rationale.
+
+You should understand that this software is provided as-is. The authors make
+no claims towards its suitability for any purpose and accept absolutely no
+liability for any damages the program may cause. Use at your own risk.
+
+* End of licence
+
+
+
+Special authorization for Debian:
+
+
+
+From: "Toby Gibson" <Toby.Gibson at EMBL-Heidelberg.de>
+Date: Thu, 17 Dec 1998 14:37:02 +0100
+To: Stephane Bortzmeyer <bortzmeyer at debian.org>
+Subject: Re: Fwd: clustalw_1.7-4_i386.changes REJECTED
+
+Hi Stephane,
+
+Now that we have thought about it, I don't think we can meet your stricter free
+criterion. There are already several companies who bundle Clustal W in sequence
+analysis packages and so are effectively selling it. They have paid for
+non-exclusive licences even though anyone can get the program for free: but
+they must have a multiple alignment engine, so we might as well earn some money
+which we can put toward further development.
+
+I think the main thing is to allow the distribution at all by Debian. We seem
+to have reached this point.
+
+Please do include this licence in the Debian package and I hope the release can
+go smoothly from now on.

Added: trunk/packages/clustalx/trunk/debian/patches/amenu.c.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/amenu.c.patch	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/patches/amenu.c.patch	2008-02-21 13:59:37 UTC (rev 1458)
@@ -0,0 +1,130 @@
+Index: clustalw-1.83/amenu.c
+===================================================================
+--- clustalw-1.83.orig/amenu.c
++++ clustalw-1.83/amenu.c
+@@ -184,7 +184,7 @@
+ 		fprintf(stdout,"     H. HELP\n");
+ 		fprintf(stdout,"     X. EXIT (leave program)\n\n\n");
+ 		
+-		getstr("Your choice",lin1);
++		getstr("Your choice",MAXLINE+1,lin1);
+ 
+ 		switch(toupper(*lin1)) {
+ 			case '1': seq_input(FALSE);
+@@ -268,7 +268,7 @@
+         fprintf(stdout,"    H.  HELP\n");
+         fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
+ 
+-        getstr("Your choice",lin1);
++        getstr("Your choice",MAXLINE+1,lin1);
+         if(*lin1 == EOS) return;
+ 
+         switch(toupper(*lin1))
+@@ -361,7 +361,7 @@
+         fprintf(stdout,"    H.  HELP\n");
+         fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
+ 
+-        getstr("Your choice",lin1);
++        getstr("Your choice",MAXLINE+1,lin1);
+         if(*lin1 == EOS) return;
+ 
+         switch(toupper(*lin1))
+@@ -457,7 +457,7 @@
+ 		fprintf(stdout,"\n\n");
+ 		fprintf(stdout,"     H. HELP\n\n\n");
+ 		
+-		getstr("Enter number (or [RETURN] to exit)",lin2);
++		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+ 		if( *lin2 == EOS) { 
+ 			return;
+ 		}
+@@ -533,7 +533,7 @@
+                 fprintf(stdout,"--\n");
+ 
+ 
+-                getstr("\n\nEnter number (or [RETURN] to exit)",lin2);
++                getstr("\n\nEnter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+                 if(*lin2 == EOS) return(output_struct_penalties);
+ 
+         	switch(toupper(*lin2))
+@@ -602,7 +602,7 @@
+         fprintf(stdout,"    H.  HELP\n");
+         fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
+ 
+-        getstr("Your choice",lin1);
++        getstr("Your choice",MAXLINE+1,lin1);
+         if(*lin1 == EOS) return;
+ 
+         switch(toupper(*lin1))
+@@ -677,7 +677,7 @@
+ 	fprintf(stdout,"\n");
+ 	fprintf(stdout,"     H. HELP\n\n\n");	
+ 	
+-		getstr("Enter number (or [RETURN] to exit)",lin2);
++		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+ 		if(*lin2 == EOS) return;
+ 		
+ 		switch(toupper(*lin2)) {
+@@ -766,7 +766,7 @@
+ 	fprintf(stdout,"\n");
+ 	fprintf(stdout,"     H. HELP\n\n\n");	
+ 	
+-		getstr("Enter number (or [RETURN] to exit)",lin2);
++		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+ 		if(*lin2 == EOS) return;
+ 		
+ 		switch(toupper(*lin2)) {
+@@ -907,7 +907,7 @@
+ 
+ 		fprintf(stdout,"     H. HELP\n\n\n");
+ 		
+-		getstr("Enter number (or [RETURN] to exit)",lin2);
++		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+ 		if( *lin2 == EOS) {
+                         if(dnaflag) {
+                                 dna_pw_go_penalty     = pw_go_penalty;
+@@ -1029,7 +1029,7 @@
+                 fprintf(stdout,"     8. Protein Gap Parameters\n\n");
+ 		fprintf(stdout,"     H. HELP\n\n\n");		
+ 
+-		getstr("Enter number (or [RETURN] to exit)",lin2);
++		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+ 
+ 		if(*lin2 == EOS) {
+ 			if(dnaflag) {
+@@ -1122,7 +1122,7 @@
+ 		fprintf(stdout,"     5. Toggle End Gap Separation         :%s\n\n",(!use_endgaps) ? "OFF" : "ON");
+ 		fprintf(stdout,"     H. HELP\n\n\n");		
+ 
+-		getstr("Enter number (or [RETURN] to exit)",lin2);
++		getstr("Enter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+ 
+ 		if(*lin2 == EOS) return;
+ 		
+@@ -1136,7 +1136,7 @@
+ 			case '3':
+ 				fprintf(stdout,"Hydrophilic Residues Currently: %s\n",hyd_residues);
+ 
+-				getstr("Enter residues (or [RETURN] to quit)",lin1);
++				getstr("Enter residues (or [RETURN] to quit)",MAXLINE+1,lin1);
+                                 if (*lin1 != EOS) {
+                                         for (i=0;i<strlen(hyd_residues) && i<26;i++) {
+                                         c = lin1[i];
+@@ -1188,7 +1188,7 @@
+                 fprintf(stdout,"--\n");
+ 
+ 
+-                getstr("\n\nEnter number (or [RETURN] to exit)",lin2);
++                getstr("\n\nEnter number (or [RETURN] to exit)",MAXLINE+1,lin2);
+                 if(*lin2 == EOS) return(matn);
+ 
+                 i=toupper(*lin2)-'0';
+@@ -1223,7 +1223,7 @@
+ 	fprintf(stdout,"\n%s\n",title);
+ 	strcpy(line,prompt);
+ 	strcat(line, "(y/n) ? [y]");
+-	getstr(line,lin2);
++	getstr(line,MAXLINE+1,lin2);
+ 	if ((*lin2 != 'n') && (*lin2 != 'N'))
+ 		return('y');
+ 	else

Added: trunk/packages/clustalx/trunk/debian/patches/clustal-help.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/clustal-help.patch	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/patches/clustal-help.patch	2008-02-21 13:59:37 UTC (rev 1458)
@@ -0,0 +1,26 @@
+Index: clustalw-1.83/clustalw.c
+===================================================================
+--- clustalw-1.83.orig/clustalw.c
++++ clustalw-1.83/clustalw.c
+@@ -34,7 +34,7 @@
+ #ifdef MSDOS
+         char *help_file_name = "clustalw.hlp";
+ #else
+-        char *help_file_name = "clustalw_help";
++        char *help_file_name = "/usr/share/clustalw/clustalw_help";
+ #endif
+ 
+ sint max_names; /* maximum length of names in current alignment file */
+Index: clustalw-1.83/clustalx.c
+===================================================================
+--- clustalw-1.83.orig/clustalx.c
++++ clustalw-1.83/clustalx.c
+@@ -26,7 +26,7 @@
+ #ifdef MSDOS
+         char *help_file_name = "clustalx.hlp";
+ #else
+-        char *help_file_name = "clustalx_help";
++        char *help_file_name = "/usr/share/clustalw/clustalx_help";
+ #endif
+ 
+ sint max_names; /* maximum length of names in current alignment file */

Added: trunk/packages/clustalx/trunk/debian/patches/clustalw.h.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/clustalw.h.patch	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/patches/clustalw.h.patch	2008-02-21 13:59:37 UTC (rev 1458)
@@ -0,0 +1,13 @@
+Index: clustalw-1.83/clustalw.h
+===================================================================
+--- clustalw-1.83.orig/clustalw.h
++++ clustalw-1.83/clustalw.h
+@@ -238,7 +238,7 @@
+ char *blank_to_(char *str);
+ char *upstr(char *str);
+ char *lowstr(char *str);
+-void getstr(char *instr, char *outstr);
++void getstr(char *instr, int n, char *outstr);
+ double getreal(char *instr, double minx, double maxx, double def);
+ int getint(char *instr, int minx, int maxx, int def);
+ void do_system(void);

Added: trunk/packages/clustalx/trunk/debian/patches/clustalx.html.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/clustalx.html.patch	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/patches/clustalx.html.patch	2008-02-21 13:59:37 UTC (rev 1458)
@@ -0,0 +1,2123 @@
+Index: clustalw-1.83/clustalx.html
+===================================================================
+--- clustalw-1.83.orig/clustalx.html
++++ clustalw-1.83/clustalx.html
+@@ -2029,6 +2029,2118 @@
+ <P>
+ Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
+ The ClustalX windows interface: flexible strategies for multiple sequence 
++alignment aided by quality analysis tools. Nucleic Acids Research, 24:4876-4882.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++The ClustalW program is described in the manuscript:
++</STRONG>
++</P>
++<P>
++Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
++sensitivity of progressive multiple sequence alignment through sequence
++weighting, positions-specific gap penalties and weight matrix choice.  Nucleic
++Acids Research, 22:4673-4680.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++The ClustalV program is described in the manuscript:
++</STRONG>
++</P>
++<P>
++Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for
++multiple sequence alignment. CABIOS 8,189-191.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++The original Clustal program is described in the manuscripts:
++</STRONG>
++</P>
++<P>
++Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence
++alignments on a microcomputer.
++CABIOS 5,151-153.
++</P>
++<P>
++Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple
++sequence alignment on a microcomputer. Gene 73,237-244.
++</P>
++<P>
++<STRONG>
++Some tips on using Clustal X:
++</STRONG>
++</P>
++<P>
++Jeannmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998)
++Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.
++</P>
++<P>
++<STRONG>
++Some tips on using Clustal W:
++</STRONG>
++</P>
++<P>
++Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for
++multiple sequence alignments. Methods Enzymol., 266, 383-402.
++</P>
++<P>
++<STRONG>
++You can get the latest version of the ClustalX program by anonymous ftp to:
++</STRONG>
++</P>
++<P>
++ftp-igbmc.u-strasbg.fr
++ftp.embl-heidelberg.de
++ftp.ebi.ac.uk
++</P>
++<P>
++<STRONG>
++Or, have a look at the following WWW site:
++</STRONG>
++</P>
++<P>
++http://www-igbmc.u-strasbg.fr/BioInfo/
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<HEAD>
++<TITLE>ClustalX Help</TITLE>
++</HEAD>
++<BODY BGCOLOR=white>
++<CENTER><H1>ClustalX Help</H1></CENTER>
++<P>
++You can get the latest version of the ClustalX program here:
++</P>
++<DL><DD>
++<A HREF="ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/">
++ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/</A>
++</DL>
++<P>For full details of usage and algorithms, please read the <A HREF="clustalw.doc"><EM>ClustalW.Doc</EM></A> file.</P>
++<PRE><EM>
++Toby  Gibson                         EMBL, Heidelberg, Germany.
++Des   Higgins                        UCC, Cork, Ireland.
++Julie Thompson/Francois Jeanmougin   IGBMC, Strasbourg, France.
++</EM></PRE>
++<CENTER><H2><A NAME="Index">Index</A></H2></CENTER>
++<OL>
++<LI><A HREF="#G">                      General help for CLUSTAL X (1.8)
++</A></LI>
++<LI><A HREF="#F">                      Input / Output Files 
++</A></LI>
++<LI><A HREF="#E">                          Editing Alignments
++</A></LI>
++<LI><A HREF="#M">                          Multiple Alignments
++</A></LI>
++<LI><A HREF="#P">                   Profile and Structure Alignments
++</A></LI>
++<LI><A HREF="#B">            Secondary Structure / Gap Penalty Masks
++</A></LI>
++<LI><A HREF="#T">                            Phylogenetic Trees
++</A></LI>
++<LI><A HREF="#C">                               Colors
++</A></LI>
++<LI><A HREF="#Q">                       Alignment Quality Analysis
++</A></LI>
++<LI><A HREF="#9">              Command Line Parameters
++</A></LI>
++<LI><A HREF="#R">                             References
++</A></LI>
++</OL>
++<CENTER><H2><A NAME="G">                      General help for CLUSTAL X (1.8)
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++Clustal X is a windows interface for the ClustalW multiple sequence alignment
++program. It provides an integrated environment for performing multiple sequence
++and profile alignments and analysing the results. The sequence alignment is
++displayed in a window on the screen. A versatile coloring scheme has been
++incorporated allowing you to highlight conserved features  in the alignment.
++The pull-down menus at the top of the window allow you to select all the
++options required for traditional multiple sequence and profile alignment.
++</P>
++<P>
++You can cut-and-paste sequences to change the order of the alignment; you can
++select a subset of sequences to be aligned; you can select a sub-range of the
++alignment to be realigned and inserted back into the original alignment.
++</P>
++<P>
++Alignment quality analysis can be performed and low-scoring segments or
++exceptional residues can be highlighted.
++</P>
++<P>
++ClustalX is available for a number of different platforms including: SUN
++Solaris, IRIX5.3 on Silicon Graphics, Digital UNIX on DECStations, Microsoft
++Windows (32 bit) for PC's, Linux ELF for x86 PC's and Macintosh PowerMac. (See
++the README file for Installation instructions.)
++</P>
++<P>
++</P>
++<P>
++<H4>
++SEQUENCE INPUT
++</H4>
++</P>
++<P>
++Sequences and profiles (a term for pre-existing alignments) are input using 
++the FILE menu. Invalid options will be disabled. All sequences must be included
++into 1 file. 7 formats are automatically recognised: NBRF/PIR, EMBL/SWISSPROT,
++Pearson (Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9 RSF and GDE flat file.
++All non-alphabetic characters (spaces, digits, punctuation marks) are ignored
++except "-" which is used to indicate a GAP ("." in MSF/RSF).  
++</P>
++<P>
++<H4>
++SEQUENCE / PROFILE ALIGNMENTS
++</H4>
++</P>
++<P>
++Clustal X has two modes which can be selected using the switch directly above
++the sequence display: MULTIPLE ALIGNMENT MODE and PROFILE ALIGNMENT MODE.
++</P>
++<P>
++To do a MULTIPLE ALIGNMENT on a set of sequences, make sure MULTIPLE ALIGNMENT
++MODE is selected. A single sequence data area is then displayed. The ALIGNMENT
++menu then allows you to either produce a guide tree for the alignment, or to do
++a multiple alignment following the guide tree, or to do a full multiple
++alignment.
++</P>
++<P>
++In PROFILE ALIGNMENT MODE, two sequence data areas are displayed, allowing you
++to align 2 alignments (termed profiles). Profiles are also used to add a new
++sequence to an old alignment, or to use secondary structure to guide the
++alignment process. GAPS in the old alignments are indicated using the "-" 
++character. PROFILES can be input in ANY of the allowed formats; just  use "-"
++(or "." for MSF/RSF) for each gap position. In Profile Alignment Mode, a button
++"Lock Scroll" is displayed which allows you to scroll the two profiles together
++using a single scroll bar. When the Lock Scroll is turned off, the two profiles
++can be scrolled independently.
++</P>
++<P>
++<H4>
++PHYLOGENETIC TREES
++</H4>
++</P>
++<P>
++Phylogenetic trees can be calculated from old alignments (read in with "-"
++characters to indicate gaps) OR after a multiple alignment while the alignment
++is still displayed.
++</P>
++<P>
++<H4>
++ALIGNMENT DISPLAY
++</H4>
++</P>
++<P>
++The alignment is displayed on the screen with the sequence names on the left
++hand side. The sequence alignment is for display only, it cannot be edited here
++(except for changing the sequence order by cutting-and-pasting on the sequence
++names). 
++</P>
++<P>
++A ruler is displayed below the sequences, starting at 1 for the first residue
++position (residue numbers in the sequence input file are ignored).
++</P>
++<P>
++A line above the alignment is used to mark strongly conserved positions. Three
++characters ('*', ':' and '.') are used:
++</P>
++<P>
++'*' indicates positions which have a single, fully conserved residue
++</P>
++<P>
++':' indicates that one of the following 'strong' groups is fully conserved:-
++<PRE>
++                 STA  
++                 NEQK  
++                 NHQK  
++                 NDEQ  
++                 QHRK  
++                 MILV  
++                 MILF  
++                 HY  
++                 FYW  
++</PRE>
++</P>
++<P>
++'.' indicates that one of the following 'weaker' groups is fully conserved:-
++<PRE>
++                 CSA  
++                 ATV  
++                 SAG  
++                 STNK  
++                 STPA  
++                 SGND  
++                 SNDEQK  
++                 NDEQHK  
++                 NEQHRK  
++                 FVLIM  
++                 HFY  
++</PRE>
++</P>
++<P>
++These are all the positively scoring groups that occur in the Gonnet Pam250
++matrix. The strong and weak groups are defined as strong score >0.5 and weak
++score =<0.5 respectively.
++</P>
++<P>
++For profile alignments, secondary structure and gap penalty masks are displayed
++above the sequences, if any data is found in the profile input file.
++</P>
++<P>
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="F">                      Input / Output Files 
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++LOAD SEQUENCES reads sequences from one of 7 file formats, replacing any
++sequences that are already loaded. All sequences must be in 1 file. The formats
++that are automatically recognised are: NBRF/PIR, EMBL/SWISSPROT, Pearson
++(Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9/RSF and GDE flat file.  All
++non-alphabetic characters (spaces, digits, punctuation  marks) are ignored
++except "-" which is used to indicate a GAP ("." in MSF/RSF).
++</P>
++<P>
++The program tries to automatically recognise the different file formats used
++and to guess whether the sequences are amino acid or nucleotide.  This is not
++always foolproof.
++</P>
++<P>
++FASTA and NBRF/PIR formats are recognised by having a ">" as the first 
++character in the file.  
++</P>
++<P>
++EMBL/Swiss Prot formats are recognised by the letters "ID" at the start of the
++file (the token for the entry name field).  
++</P>
++<P>
++CLUSTAL format is recognised by the word CLUSTAL at the beginning of the file.
++</P>
++<P>
++GCG/MSF format is recognised by one of the following:
++<UL>
++<LI>
++       - the word PileUp at the start of the file.
++</LI><LI>
++       - the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
++         at the start of the file.
++</LI><LI>
++       - the word MSF on the first line of the file, and the characters ..
++         at the end of this line.
++</LI>
++</UL>
++</P>
++<P> 
++GCG/RSF format is recognised by the word !!RICH_SEQUENCE at the beginning of
++the file.
++</P>
++<P>
++</P>
++<P>
++If 85% or more of the characters in the sequence are from A,C,G,T,U or N, the
++sequence will be assumed to be nucleotide.  This works in 97.3% of cases but
++watch out!
++</P>
++<P>
++APPEND SEQUENCES is only valid in MULTIPLE ALIGNMENT MODE. The input sequences
++do not replace those already loaded, but are appended at the end of the
++alignment.
++</P>
++<P>
++SAVE SEQUENCES AS... offers the user a choice of one of six output formats:
++CLUSTAL, NBRF/PIR, GCG/MSF, PHYLIP, NEXUS or GDE. All sequences are written
++to a single file. Options are available to save a range of the alignment, 
++switch between UPPER/LOWER case for GDE files, and to output SEQUENCE NUMBERING
++for CLUSTAL files.
++</P>
++<P>
++LOAD PROFILE 1 reads sequences in the same 7 file formats, replacing any
++sequences already loaded as Profile 1. This option will also remove any
++sequences which are loaded in Profile 2.
++</P>
++<P>
++LOAD PROFILE 2 reads sequences in the same 7 file formats, replacing any
++sequences already loaded as Profile 2.
++</P>
++<P>
++SAVE PROFILE 1 AS... is similar to the Save Sequences option except that only
++those sequences in Profile 1 will be written to the output file.
++</P>
++<P>
++SAVE PROFILE 2 AS... is similar to the Save Sequences option except that only
++those sequences in Profile 2 will be written to the output file.
++</P>
++<P>
++WRITE ALIGNMENT AS POSTSCRIPT will write the sequence display to a postscript
++format file. This will include any secondary structure / gap penalty mask 
++information and the consensus and ruler lines which are displayed on the
++screen. The Alignment Quality curve can be optionally included in the output
++file.
++</P>
++<P>
++WRITE PROFILE 1 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
++except that only the profile 1 display will be printed.
++</P>
++<P>
++WRITE PROFILE 2 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
++except that only the profile 2 display will be printed.
++</P>
++<P>
++</P>
++<P>
++<H4>
++POSTSCRIPT PARAMETERS
++</H4>
++</P>
++<P>
++A number of options are available to allow you to configure your postscript
++output file.
++</P>
++<P>
++PS COLORS FILE:
++</P>
++<P>
++The exact RGB values required to reproduce the colors used in the alignment
++window will vary from printer to printer. A PS colors file can be specified
++that contains the RGB values for all the colors required by each of your
++postscript printers.
++</P>
++<P>
++By default, Clustal X looks for a file called 'colprint.par' in the current
++directory (if your running under UNIX, it then looks in your home directory,
++and finally in the directories in your PATH environment variable). If no PS
++colors file is found or a color used on the screen is not defined here, the
++screen RGB values (from the Color Parameter File) are used.
++</P>
++<P>
++The PS colors file consists of one line for each color to be defined, with the
++color name followed by the RGB values (on a scale of 0 to 1). For example,
++</P>
++<P>
++RED          0.9 0.1 0.1
++</P>
++<P>
++Blank lines and comments (lines beginning with a '#' character) are ignored.
++</P>
++<P>
++</P>
++<P>
++PAGE SIZE:  The alignment can be displayed on either A4, A3 or US Letter size
++pages.
++</P>
++<P>
++ORIENTATION: The alignment can be displayed on either a landscape or portrait
++page.
++</P>
++<P>
++PRINT HEADER: An optional header including the postscript filename, and
++creation date can be printed at the top of each page.
++</P>
++<P>
++PRINT QUALITY CURVE: The Alignment Quality curve which is displayed underneath
++the alignment on the screen can be included in the postscript output.
++</P>
++<P>
++PRINT RULER: The ruler which is displayed underneath the alignment on the 
++screen can be included in the postscript output.
++</P>
++<P>
++PRINT RESIDUE NUMBERS: Sequence residue numbers can be printed at the right
++hand side of the alignment.
++</P>
++<P>
++RESIZE TO FIT PAGE: By default, the alignment is scaled to fit the page size
++selected. This option can be turned off, in which case a font size of 10 will
++be used for the sequences.
++</P>
++<P>
++PRINT FROM POSITION/TO: A range of the alignment can be printed. The default
++is to print the full alignment. The first and last residues to be printed are
++specified here.
++</P>
++<P>
++USE BLOCK LENGTH: The alignment can be divided into blocks of residues. The
++number of residues in a block is specified here. More than one block may then
++be printed on a single page. This is useful for long alignments of a small
++number of sequences. If the block length is set to 0, The alignment will not
++be divided into blocks, but printed across a number of pages.
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="E">                          Editing Alignments
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++Clustal X allows you to change the order of the sequences in the alignment, by
++cutting-and-pasting the sequence names.
++</P>
++<P>
++To select a group of sequences to be moved, click on a sequence name and drag
++the cursor until all the required sequences are highlighted. Holding down the
++Shift key when clicking on the first name will add new sequences to those
++already selected.
++</P>
++<P>
++(Options are provided to Select All Sequences, Select Profile 1 or Select 
++Profile 2.)
++</P>
++<P>
++The selected sequences can be removed from the alignment by using the EDIT
++menu, CUT option.
++</P>
++<P>
++To add the cut sequences back into an alignment, select a sequence by clicking
++on the sequence name. The cut sequences will be added to the alignment,
++immediately following the selected sequence, by the EDIT menu, PASTE option.
++</P>
++<P>
++To add the cut sequences to an empty alignment (eg. when cutting sequences from
++Profile 1 and pasting them to Profile 2), click on the empty sequence name
++display area, and select the EDIT menu, PASTE option as before.
++</P>
++<P>
++The sequence selection and sequence range selection can be cleared using the
++EDIT menu, CLEAR SEQUENCE SELECTION and CLEAR RANGE SELECTION options
++respectively.
++</P>
++<P>
++To search for a string of residues in the sequences, select the sequences to be
++searched by clicking on the sequence names. You can then enter the string to
++search for by selecting the SEARCH FOR STRING option. If the string is found in
++any of the sequences selected, the sequence name and column number is printed
++below the sequence display.
++</P>
++<P>
++In PROFILE ALIGNMENT MODE, the two profiles can be merged (normally done after
++alignment) by selecting ADD PROFILE 2 TO PROFILE 1. The sequences currently
++displayed as Profile 2 will be appended to Profile 1. 
++</P>
++<P>
++The REMOVE ALL GAPS option will remove all gaps from the sequences currently
++selected.
++WARNING: This option removes ALL gaps, not only those introduced by ClustalX,
++but also those that were read from the input alignment file. Any secondary
++structure information associated with the alignment will NOT be automatically
++realigned.
++</P>
++<P>
++The REMOVE GAP-ONLY COLUMNS will remove those positions in the alignment which
++contain gaps in all sequences. This can occur as a result of removing divergent
++sequences from an alignment, or if an alignment has been realigned.
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="M">                          Multiple Alignments
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++Make sure MULTIPLE ALIGNMENT MODE is selected, using the switch directly above
++the sequence display area. Then, use the ALIGNMENT menu to do multiple
++alignments.
++</P>
++<P>
++Multiple alignments are carried out in 3 stages:
++</P>
++<P> 
++1) all sequences are compared to each other (pairwise alignments);
++</P>
++<P> 
++2) a dendrogram (like a phylogenetic tree) is constructed, describing the
++approximate groupings of the sequences by similarity (stored in a file).
++</P>
++<P> 
++3) the final multiple alignment is carried out, using the dendrogram as a guide.
++</P>
++<P>
++The 3 stages are carried out automatically by the DO COMPLETE ALIGNMENT option.
++You can skip the first stages (pairwise alignments; guide tree) by using an old
++guide tree file (DO ALIGNMENT FROM GUIDE TREE); or you can just produce the
++guide tree with no final multiple alignment (PRODUCE GUIDE TREE ONLY).
++</P>
++<P>
++</P>
++<P>
++REALIGN SELECTED SEQUENCES is used to realign badly aligned sequences in the
++alignment. Sequences can be selected by clicking on the sequence names - see
++Editing Alignments for more details. The unselected sequences are then 'fixed'
++and a profile is made including only the unselected sequences. Each of the
++selected sequences in turn is then realigned to this profile. The realigned
++sequences will be displayed as a group at the end the alignment.
++</P>
++<P>
++</P>
++<P>
++REALIGN SELECTED SEQUENCE RANGE is used to realign a small region of the 
++alignment. A residue range can be selected by clicking on the sequence display
++area. A multiple alignment is then performed, following the 3 stages described
++above, but only using the selected residue range. Finally the new alignment of
++the range is pasted back into the full sequence alignment.
++</P>
++<P>
++By default, gap penalties are used at each end of the subrange in order to 
++penalise terminal gaps. If the REALIGN SEGMENT END GAP PENALTIES option is
++switched off, gaps can be introduced at the ends of the residue range at no
++cost.
++</P>
++<P>
++</P>
++<P>
++ALIGNMENT PARAMETERS displays a sub-menu with the following options:
++</P>
++<P>
++RESET NEW GAPS BEFORE ALIGNMENT will remove any new gaps introduced into the
++sequences during multiple alignment if you wish to change the parameters and
++try again. This only takes effect just before you do a second multiple
++alignment. You can make phylogenetic trees after alignment whether or not this
++is ON. If you turn this OFF, the new gaps are kept even if you do a second
++multiple alignment. This allows you to iterate the alignment gradually.
++Sometimes, the alignment is improved by a second or third pass.
++</P>
++<P>
++RESET ALL GAPS BEFORE ALIGNMENT will remove all gaps in the sequences including
++gaps which were read in from the sequence input file. This only takes effect
++just before you do a second multiple alignment.  You can make phylogenetic
++trees after alignment whether or not this is ON.  If you turn this OFF, all
++gaps are kept even if you do a second multiple alignment. This allows you to
++iterate the alignment gradually.  Sometimes, the alignment is improved by a
++second or third pass.
++</P>
++<P>
++</P>
++<P>
++PAIRWISE ALIGNMENT PARAMETERS control the speed/sensitivity of the initial
++alignments.
++</P>
++<P>
++MULTIPLE ALIGNMENT PARAMETERS control the gaps in the final multiple
++alignments.
++</P>
++<P>
++PROTEIN GAP PARAMETERS displays a temporary window which allows you to set
++various parameters only used in the alignment of protein sequences.
++</P>
++<P>
++(SECONDARY STRUCTURE PARAMETERS, for use with the Profile Alignment Mode only,
++allows you to set various parameters only used with gap penalty masks.)
++</P>
++<P>
++SAVE LOG FILE will write the alignment calculation scores to a file. The log
++filename is the same as the input sequence filename, with an extension .log
++appended.
++</P>
++<P>
++</P>
++<P>
++<H4>
++OUTPUT FORMAT OPTIONS
++</H4>
++</P>
++<P>
++You can choose from 6 different alignment formats (CLUSTAL, GCG, NBRF/PIR,
++PHYLIP, GDE and NEXUS).  You can choose more than one (or all 6 if you wish).  
++</P>
++<P>
++CLUSTAL format output is a self explanatory alignment format. It shows the
++sequences aligned in blocks. It can be read in again at a later date to (for
++example) calculate a phylogenetic tree or add in new sequences by profile
++alignment.
++</P>
++<P>
++GCG output can be used by any of the GCG programs that can work on multiple
++alignments (e.g. PRETTY, PROFILEMAKE, PLOTALIGN). It is the same as the GCG
++.msf format files (multiple sequence file); new in version 7 of GCG.
++</P>
++<P>
++NEXUS format is used by several phylogeny programs, including PAUP and
++MacClade.
++</P>
++<P>
++PHYLIP format output can be used for input to the PHYLIP package of Joe 
++Felsenstein.  This is a very widely used package for doing every imaginable
++form of phylogenetic analysis (MUCH more than the the modest introduction
++offered by this program).
++</P>
++<P>
++NBRF/PIR: this is the same as the standard PIR format with ONE ADDITION. Gap
++characters "-" are used to indicate the positions of gaps in the multiple 
++alignment. These files can be re-used as input in any part of clustal that
++allows sequences (or alignments or profiles) to be read in.  
++</P>
++<P>
++GDE:  this format is used by the GDE package of Steven Smith and is understood
++by SEQLAB in GCG 9 or later.
++</P>
++<P>
++GDE OUTPUT CASE: sequences in GDE format may be written in either upper or
++lower case.
++</P>
++<P> 
++CLUSTALW SEQUENCE NUMBERS: residue numbers may be added to the end of the
++alignment lines in clustalw format.
++</P>
++<P>
++OUTPUT ORDER is used to control the order of the sequences in the output
++alignments. By default, it uses the order in which the sequences were aligned
++(from the guide tree/dendrogram), thus automatically grouping closely related
++sequences. It can be switched to be the same as the original input order.
++</P>
++<P>
++PARAMETER OUTPUT: This option will save all your parameter settings in a
++parameter file (suffix .par) during alignment. The file can be subsequently
++used to rerun ClustalW using the same parameters.
++</P>
++<P>
++</P>
++<P>
++<H3>
++ALIGNMENT PARAMETERS
++</H3>
++</P>
++<P>
++<STRONG>
++PAIRWISE ALIGNMENT PARAMETERS
++</STRONG>
++</P>
++<P>
++A distance is calculated between every pair of sequences and these are used to
++construct the phylogenetic tree which guides the final multiple alignment. The
++scores are calculated from separate pairwise alignments. These can be
++calculated using 2 methods: dynamic programming (slow but accurate) or by the
++method of Wilbur and Lipman (extremely fast but approximate).   
++</P>
++<P>
++You can choose between the 2 alignment methods using the PAIRWISE ALIGNMENTS
++option. The slow/accurate method is fast enough for short sequences but will be
++VERY SLOW for many (e.g. >100) long (e.g. >1000 residue) sequences.   
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++SLOW-ACCURATE alignment parameters:
++</STRONG>
++</P>
++<P>
++These parameters do not have any affect on the speed of the alignments. They
++are used to give initial alignments which are then rescored to give percent
++identity scores. These % scores are the ones which are displayed on the 
++screen. The scores are converted to distances for the trees.
++</P>
++<P>
++Gap Open Penalty:      the penalty for opening a gap in the alignment.
++</P>
++<P>
++Gap Extension Penalty: the penalty for extending a gap by 1 residue.
++</P>
++<P>
++Protein Weight Matrix: the scoring table which describes the similarity of 
++each amino acid to each other.
++</P>
++<P>
++Load protein matrix: allows you to read in a comparison table from a file.
++</P>
++<P>
++DNA weight matrix: the scores assigned to matches and mismatches (including
++IUB ambiguity codes).
++</P>
++<P>
++Load DNA matrix: allows you to read in a comparison table from a file.
++</P>
++<P>
++See the Multiple alignment parameters, MATRIX option below for details of the
++matrix input format.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++FAST-APPROXIMATE alignment parameters:
++</STRONG>
++</P>
++<P>
++These similarity scores are calculated from fast, approximate, global align-
++ments, which are controlled by 4 parameters. 2 techniques are used to make
++these alignments very fast: 1) only exactly matching fragments (k-tuples) are
++considered; 2) only the 'best' diagonals (the ones with most k-tuple matches)
++are used.
++</P>
++<P>
++GAP PENALTY:   This is a penalty for each gap in the fast alignments. It has
++little effect on the speed or sensitivity except for extreme values.
++</P>
++<P>
++K-TUPLE SIZE:  This is the size of exactly matching fragment that is used. 
++INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity.
++For longer sequences (e.g. >1000 residues) you may wish to increase the
++default.
++</P>
++<P>
++TOP DIAGONALS: The number of k-tuple matches on each diagonal (in an imaginary
++dot-matrix plot) is calculated. Only the best ones (with most matches) are used
++in the alignment. This parameter specifies how many. Decrease for speed;
++increase for sensitivity.
++</P>
++<P>
++WINDOW SIZE:  This is the number of diagonals around each of the 'best' 
++diagonals that will be used. Decrease for speed; increase for sensitivity.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++MULTIPLE ALIGNMENT PARAMETERS
++</STRONG>
++</P>
++<P>
++These parameters control the final multiple alignment. This is the core of the
++program and the details are complicated. To fully understand the use of the
++parameters and the scoring system, you will have to refer to the documentation.
++</P>
++<P>
++Each step in the final multiple alignment consists of aligning two alignments 
++or sequences. This is done progressively, following the branching order in the
++GUIDE TREE. The basic parameters to control this are two gap penalties and the
++scores for various identical/non-indentical residues. 
++</P>
++<P>
++The GAP OPENING and EXTENSION PENALTIES can be set here. These control the 
++cost of opening up every new gap and the cost of every item in a gap.  
++Increasing the gap opening penalty will make gaps less frequent. Increasing 
++the gap extension penalty will make gaps shorter. Terminal gaps are not 
++penalised.
++</P>
++<P>
++The DELAY DIVERGENT SEQUENCES switch delays the alignment of the most distantly
++related sequences until after the most closely related sequences have  been
++aligned. The setting shows the percent identity level required to delay the
++addition of a sequence; sequences that are less identical than this level to
++any other sequences will be aligned later.
++</P>
++<P>
++The TRANSITION WEIGHT gives transitions (A<-->G or C<-->T i.e. purine-purine or
++pyrimidine-pyrimidine substitutions) a weight between 0 and 1; a weight of zero
++means that the transitions are scored as mismatches, while a weight of 1 gives
++the transitions the match score. For distantly related DNA sequences, the
++weight should be near to zero; for closely related sequences it can be useful
++to assign a higher score. The default is set to 0.5.
++</P>
++<P>
++</P>
++<P>
++The PROTEIN WEIGHT MATRIX option allows you to choose a series of weight
++matrices. For protein alignments, you use a weight matrix to determine the
++similarity of non-identical amino acids. For example, Tyr aligned with Phe is
++usually judged to be 'better' than Tyr aligned with Pro.
++</P>
++<P>
++There are three 'in-built' series of weight matrices offered. Each consists of
++several matrices which work differently at different evolutionary distances. To
++see the exact details, read the documentation. Crudely, we store several
++matrices in memory, spanning the full range of amino acid distance (from almost
++identical sequences to highly divergent ones). For very similar sequences, it
++is best to use a strict weight matrix which only gives a high score to
++identities and the most favoured conservative substitutions. For more divergent
++sequences, it is appropriate to use "softer" matrices which give a high score
++to many other frequent substitutions.
++</P>
++<P>
++1) BLOSUM (Henikoff). These matrices appear to be the best available for 
++carrying out data base similarity (homology searches). The matrices currently
++used are: Blosum 80, 62, 45 and 30. BLOSUM was the default in earlier Clustal X
++versions.
++</P>
++<P>
++2) PAM (Dayhoff). These have been extremely widely used since the late '70s. We
++currently use the PAM 20, 60, 120, 350 matrices.
++</P>
++<P>
++3) GONNET. These matrices were derived using almost the same procedure as the
++Dayhoff one (above) but are much more up to date and are based on a far larger
++data set. They appear to be more sensitive than the Dayhoff series. We
++currently use the GONNET 80, 120, 160, 250 and 350 matrices. This series is the
++default for Clustal X version 1.8.
++</P>
++<P>
++We also supply an identity matrix which gives a score of 10 to two identical 
++amino acids and a score of zero otherwise. This matrix is not very useful.
++</P>
++<P>
++Load protein matrix: allows you to read in a comparison matrix from a file.
++This can be either a single matrix or a series of matrices (see below for
++format). 
++</P>
++<P>
++</P>
++<P>
++DNA WEIGHT MATRIX option allows you to select a single matrix (not a series)
++used for aligning nucleic acid sequences. Two hard-coded matrices are available:
++</P>
++<P>
++1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
++of nucleic acid sequences. X's and N's are treated as matches to any IUB
++ambiguity symbol. All matches score 1.9; all mismatches for IUB symbols score 0.
++</P>
++<P>
++2) CLUSTALW(1.6). A previous system used by ClustalW, in which matches score
++1.0 and mismatches score 0. All matches for IUB symbols also score 0.
++</P>
++<P>
++Load DNA matrix: allows you to read in a nucleic acid comparison matrix from a
++file (just one matrix, not a series).
++</P>
++<P>
++</P>
++<P>
++SINGLE MATRIX INPUT FORMAT
++The format used for a single matrix is the same as the BLAST program. The
++scores in the new weight matrix should be similarities. You can use negative as
++well as positive values if you wish, although the matrix will be automatically
++adjusted to all positive scores, unless the NEGATIVE MATRIX option is selected.
++Any lines beginning with a # character are assumed to be comments. The first
++non-comment line should contain a list of amino acids in any order, using the 1
++letter code, followed by a * character. This should be followed by a square
++matrix of scores, with one row and one column for each amino acid. The last row
++and column of the matrix (corresponding to the * character) contain the minimum
++score over the whole matrix.
++</P>
++<P>
++MATRIX SERIES INPUT FORMAT
++ClustalX uses different matrices depending on the mean percent identity of the
++sequences to be aligned. You can specify a series of matrices and the range of
++the percent identity for each matrix in a matrix series file. The file is
++automatically recognised by the word CLUSTAL_SERIES at the beginning of the
++file. Each matrix in the series is then specified on one line which should
++start with the word MATRIX. This is followed by the lower and upper limits of
++the sequence percent identities for which you want to apply the matrix. The
++final entry on the matrix line is the filename of a Blast format matrix file
++(see above for details of the single matrix file format).
++</P>
++<P>
++Example.
++</P>
++<P>
++CLUSTAL_SERIES
++</P>
++<P> 
++MATRIX 81 100 /us1/user/julie/matrices/blosum80
++MATRIX 61 80 /us1/user/julie/matrices/blosum62
++MATRIX 31 60 /us1/user/julie/matrices/blosum45
++MATRIX 0 30 /us1/user/julie/matrices/blosum30
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++PROTEIN GAP PARAMETERS
++</STRONG>
++</P>
++<P>
++RESIDUE SPECIFIC PENALTIES are amino acid specific gap penalties that reduce or
++increase the gap opening penalties at each position in the alignment or 
++sequence. See the documentation for details. As an example, positions that are
++rich in glycine are more likely to have an adjacent gap than positions that are
++rich in valine.
++</P>
++<P>
++HYDROPHILIC GAP PENALTIES are used to increase the chances of a gap within a
++run (5 or more residues) of hydrophilic amino acids; these are likely to be
++loop or random coil regions where gaps are more common. The residues that are
++"considered" to be hydrophilic can be entered in HYDROPHILIC RESIDUES.
++</P>
++<P>
++GAP SEPARATION DISTANCE tries to decrease the chances of gaps being too close
++to each other. Gaps that are less than this distance apart are penalised more
++than other gaps. This does not prevent close gaps; it makes them less frequent,
++promoting a block-like appearance of the alignment.
++</P>
++<P>
++END GAP SEPARATION treats end gaps just like internal gaps for the purposes of
++avoiding gaps that are too close (set by GAP SEPARATION DISTANCE above). If you
++turn this off, end gaps will be ignored for this purpose. This is useful when
++you wish to align fragments where the end gaps are not biologically meaningful.
++</P>
++<P>
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="P">                   Profile and Structure Alignments
++</A></H2></CENTER>
++<P>
++</P>
++<P>   
++By PROFILE ALIGNMENT, we mean alignment using existing alignments. Profile 
++alignments allow you to store alignments of your favourite sequences and add
++new sequences to them in small bunches at a time. A profile is simply an
++alignment of one or more sequences (e.g. an alignment output file from Clustal
++X). Each input can be a single sequence. One or both sets of input sequences
++may include secondary structure assignments or gap penalty masks to guide the
++alignment. 
++</P>
++<P>
++Make sure PROFILE ALIGNMENT MODE is selected, using the switch directly above
++the sequence display area. Then, use the ALIGNMENT menu to do profile and
++secondary structure alignments.
++</P>
++<P>
++The profiles can be in any of the allowed input formats with "-" characters
++used to specify gaps (except for GCG/MSF where "." is used).
++</P>
++<P>
++You have to load the 2 profiles by choosing FILE, LOAD PROFILE 1 and  LOAD
++PROFILE 2. Then ALIGNMENT, ALIGN PROFILE 2 TO PROFILE 1 will align the 2
++profiles to each other. Secondary structure masks in either profile can be used
++to guide the alignment. This option compares all the sequences in profile 1
++with all the sequences in profile 2 in order to build guide trees which will be
++used to calculate sequence weights, and select appropriate alignment parameters
++for the final profile alignment.
++</P>
++<P>
++You can skip the first stage (pairwise alignments; guide trees) by using old
++guide tree files (ALIGN PROFILES FROM GUIDE TREES). 
++</P>
++<P>
++The ALIGN SEQUENCES TO PROFILE 1 option will take the sequences in the second
++profile and align them to the first profile, 1 at a time.  This is useful to
++add some new sequences to an existing alignment, or to align a set of sequences
++to a known structure. In this case, the second profile set need not be
++pre-aligned.
++</P>
++<P>
++You can skip the first stage (pairwise alignments; guide tree) by using an old
++guide tree file (ALIGN SEQUENCES TO PROFILE 1 FROM TREE). 
++</P>
++<P>
++SAVE LOG FILE will write the alignment calculation scores to a file. The log
++filename is the same as the input sequence filename, with an extension .log
++appended.
++</P>
++<P>
++The alignment parameters can be set using the ALIGNMENT PARAMETERS menu,
++Pairwise Parameters, Multiple Parameters and Protein Gap Parameters options.
++These are EXACTLY the same parameters as used by the general, automatic
++multiple alignment procedure. The general multiple alignment procedure is
++simply a series of profile alignments. Carrying out a series of profile
++alignments on larger and larger groups of sequences, allows you to manually
++build up a complete alignment, if necessary editing intermediate alignments.
++</P>
++<P>
++<STRONG>
++SECONDARY STRUCTURE PARAMETERS
++</STRONG>
++</P>
++<P>
++Use this menu to set secondary structure options. If a solved structure is
++known, it can be used to guide the alignment by raising gap penalties within
++secondary structure elements, so that gaps will preferentially be inserted into
++unstructured surface loop regions. Alternatively, a user-specified gap penalty
++mask can be supplied for a similar purpose.
++</P>
++<P>
++A gap penalty mask is a series of numbers between 1 and 9, one per position in 
++the alignment. Each number specifies how much the gap opening penalty is to be 
++raised at that position (raised by multiplying the basic gap opening penalty
++by the number) i.e. a mask figure of 1 at a position means no change
++in gap opening penalty; a figure of 4 means that the gap opening penalty is
++four times greater at that position, making gaps 4 times harder to open.
++</P>
++<P>
++The format for gap penalty masks and secondary structure masks is explained in
++a separate help section.
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="B">            Secondary Structure / Gap Penalty Masks
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++The use of secondary structure-based penalties has been shown to improve  the
++accuracy of sequence alignment. Clustal X now allows secondary structure/ gap
++penalty masks to be supplied with the input sequences used during profile
++alignment. (NB. The secondary structure information is NOT used during multiple
++sequence alignment). The masks work by raising gap penalties in specified
++regions (typically secondary structure elements) so that gaps are
++preferentially opened in the less well conserved regions (typically surface
++loops).
++</P>
++<P>
++The USE PROFILE 1(2) SECONDARY STRUCTURE / GAP PENALTY MASK options control
++whether the input 2D-structure information or gap penalty masks will be used
++during the profile alignment.
++</P>
++<P>
++The OUTPUT options control whether the secondary structure and gap penalty
++masks should be included in the Clustal X output alignments. Showing both is
++useful for understanding how the masks work. The 2D-structure information is
++itself useful in judging the alignment quality and in seeing how residue
++conservation patterns vary with secondary structure. 
++</P>
++<P>
++The HELIX and STRAND GAP PENALTY options provide the value for raising the gap
++penalty at core Alpha Helical (A) and Beta Strand (B) residues. In CLUSTAL
++format, capital residues denote the A and B core structure notation. Basic gap
++penalties are multiplied by the amount specified.
++</P>
++<P>
++The LOOP GAP PENALTY option provides the value for the gap penalty in Loops.
++By default this penalty is not raised. In CLUSTAL format, loops are specified
++by "." in the secondary structure notation.
++</P>
++<P>
++The SECONDARY STRUCTURE TERMINAL PENALTY provides the value for setting the gap
++penalty at the ends of secondary structures. Ends of secondary structures are
++known to grow or shrink, comparing related structures. Therefore by default
++these are given intermediate values, lower than the core penalties. All
++secondary structure read in as lower case in CLUSTAL format gets the reduced
++terminal penalty.
++</P>
++<P>
++The HELIX and STRAND TERMINAL POSITIONS options specify the range of structure
++termini for the intermediate penalties. In the alignment output, these are
++indicated as lower case. For Alpha Helices, by default, the range spans the 
++end-helical turn (3 residues). For Beta Strands, the default range spans the
++end residue and the adjacent loop residue, since sequence conservation often
++extends beyond the actual H-bonded Beta Strand.
++</P>
++<P>
++Clustal X can read the masks from SWISS-PROT, CLUSTAL or GDE format input
++files. For many 3-D protein structures, secondary structure information is
++recorded in the feature tables of SWISS-PROT database entries. You should
++always check that the assignments are correct - some are quite inaccurate.
++Clustal X looks for SWISS-PROT HELIX and STRAND assignments e.g.
++</P>
++<P>
++</P>
++<P>
++<PRE>
++FT   HELIX       100    115
++FT   STRAND      118    119
++</PRE>
++</P>
++<P>
++The structure and penalty masks can also be read from CLUSTAL alignment format 
++as comment lines beginning "!SS_" or "!GM_" e.g.
++</P>
++<P>
++<PRE>
++!SS_HBA_HUMA    ..aaaAAAAAAAAAAaaa.aaaAAAAAAAAAAaaaaaaAaaa.........aaaAAAAAA
++!GM_HBA_HUMA    112224444444444222122244444444442222224222111111111222444444
++HBA_HUMA        VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
++</PRE>
++</P>
++<P>
++Note that the mask itself is a set of numbers between 1 and 9 each of which is 
++assigned to the residue(s) in the same column below. 
++</P>
++<P>
++In GDE flat file format, the masks are specified as text and the names must
++begin with "SS_ or "GM_.
++</P>
++<P>
++Either a structure or penalty mask or both may be used. If both are included
++in an alignment, the user will be asked which is to be used.
++</P>
++<P>
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="T">                            Phylogenetic Trees
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++Before calculating a tree, you must have an ALIGNMENT in memory. This can be
++input using the FILE menu, LOAD SEQUENCES option or you should have just
++carried out a full multiple alignment and the alignment is still in memory.
++Remember YOU MUST ALIGN THE SEQUENCES FIRST!!!!
++</P>
++<P>
++The method used is the NJ (Neighbour Joining) method of Saitou and Nei. First
++you calculate distances (percent divergence) between all pairs of sequence from
++a multiple alignment; second you apply the NJ method to the distance matrix.
++</P>
++<P>
++To calculate a tree, use the DRAW N-J TREE option. This gives an UNROOTED tree
++and all branch lengths. The root of the tree can only be inferred by using an
++outgroup (a sequence that you are certain branches at the outside of the tree
++.... certain on biological grounds) OR if you assume a degree of constancy in
++the 'molecular clock', you can place the root in the 'middle' of the tree
++(roughly equidistant from all tips).
++</P>
++<P>
++BOOTSTRAP N-J TREE uses a method for deriving confidence values for the 
++groupings in a tree (first adapted for trees by Joe Felsenstein). It involves
++making N random samples of sites from the alignment (N should be LARGE, e.g.
++500 - 1000); drawing N trees (1 from each sample) and counting how many times
++each grouping from the original tree occurs in the sample trees. You can set N
++using the NUMBER OF BOOTSTRAP TRIALS option in the BOOTSTRAP TREE window. In
++practice, you should use a large number of bootstrap replicates (1000 is
++recommended, even if it means running the program for an hour on a slow 
++computer). You can also supply a seed number for the random number generator
++here. Different runs with the same seed will give the same answer. See the
++documentation for more details.
++</P>
++<P>
++EXCLUDE POSITIONS WITH GAPS? With this option, any alignment positions where
++ANY of the sequences have a gap will be ignored. This means that 'like' will
++be compared to 'like' in all distances, which is highly desirable. It also
++automatically throws away the most ambiguous parts of the alignment, which are
++concentrated around gaps (usually). The disadvantage is that you may throw away
++much of the data if there are many gaps (which is why it is difficult for us to
++make it the default).  
++</P>
++<P>
++CORRECT FOR MULTIPLE SUBSTITUTIONS? For small divergence (say <10%) this option
++makes no difference. For greater divergence, this option corrects for the fact
++that observed distances underestimate actual evolutionary distances. This is
++because, as sequences diverge, more than one substitution will happen at many
++sites. However, you only see one difference when you look at the present day
++sequences. Therefore, this option has the effect of stretching branch lengths
++in trees (especially long branches). The corrections used here (for DNA or
++proteins) are both due to Motoo Kimura. See the documentation for details.  
++</P>
++<P>
++Where possible, this option should be used. However, for VERY divergent
++sequences, the distances cannot be reliably corrected. You will be warned if
++this happens. Even if none of the distances in a data set exceed the reliable
++threshold, if you bootstrap the data, some of the bootstrap distances may
++randomly exceed the safe limit.  
++</P>
++<P>
++SAVE LOG FILE will write the tree calculation scores to a file. The log
++filename is the same as the input sequence filename, with an extension .log
++appended.
++</P>
++<P>
++<H4>
++OUTPUT FORMAT OPTIONS
++</H4>
++</P>
++<P>
++Three different formats are allowed. None of these displays the tree visually.
++You can display the tree using the NJPLOT program distributed with Clustal X
++OR get the PHYLIP package and use the tree drawing facilities there. 
++</P>
++<P> 
++1) CLUSTAL FORMAT TREE. This format is verbose and lists all of the distances
++between the sequences and the number of alignment positions used for each. The
++tree is described at the end of the file. It lists the sequences that are
++joined at each alignment step and the branch lengths. After two sequences are
++joined, it is referred to later as a NODE. The number of a NODE is the number
++of the lowest sequence in that NODE.   
++</P>
++<P>
++2) PHYLIP FORMAT TREE. This format is the New Hampshire format, used by many
++phylogenetic analysis packages. It consists of a series of nested parentheses,
++describing the branching order, with the sequence names and branch lengths. It
++can be read by the NJPLOT program distributed with ClustalX. It can also be
++used by the RETREE, DRAWGRAM and DRAWTREE programs of the PHYLIP package to see
++the trees graphically. This is the same format used during multiple alignment
++for the guide trees. Some other packages that can read and display New
++Hampshire format are TreeTool, TreeView, and Phylowin.
++</P>
++<P>
++3) PHYLIP DISTANCE MATRIX. This format just outputs a matrix of all the
++pairwise distances in a format that can be used by the PHYLIP package. It used
++to be useful when one could not produce distances from protein sequences in the
++Phylip package but is now redundant (PROTDIST of Phylip 3.5 now does this).
++</P>
++<P>
++4) NEXUS FORMAT TREE. This format is used by several popular phylogeny programs,
++including PAUP and MacClade. The format is described fully in:
++Maddison, D. R., D. L. Swofford and W. P. Maddison.  1997.
++NEXUS: an extensible file format for systematic information.
++Systematic Biology 46:590-621.
++</P>
++<P>
++BOOTSTRAP LABELS ON: By default, the bootstrap values are correctly placed on
++the tree branches of the phylip format output tree. The toggle allows them to
++be placed on the nodes, which is incorrect, but some display packages (e.g.
++TreeTool, TreeView and Phylowin) only support node labelling but not branch
++labelling. Care should be taken to note which branches and labels go together. 
++</P>
++<P>
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="C">                               Colors
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++Clustal X provides a versatile coloring scheme for the sequence alignment 
++display. The sequences (or profiles) are colored automatically, when they are
++loaded. Sequences can be colored either by assigning a color to specific
++residues, or on the basis of an alignment consensus. In the latter case, the
++alignment consensus is calculated automatically, and the residues in each
++column are colored according to the consensus character assigned to that
++column. In this way, you can choose to highlight, for example, conserved
++hydrophylic or hydrophobic positions in the alignment.
++</P>
++<P>
++The 'rules' used to color the alignment are specified in a COLOR PARAMETER
++FILE. Clustal X automatically looks for a file called 'colprot.par' for protein
++sequences or 'coldna.par' for DNA, in the current directory. (If your running
++under UNIX, it then looks in your home directory, and finally in the
++directories in your PATH environment variable).
++</P>
++<P>
++By default, if no color parameter file is found, protein sequences are colored
++by residue as follows:
++</P>
++<P>
++<PRE>
++	Color			Residue Code
++</P>
++<P>
++	ORANGE			GPST
++	RED			HKR
++	BLUE			FWY
++	GREEN			ILMV
++</PRE>
++</P>
++<P>
++In the case of DNA sequences, the default colors are as follows:
++</P>
++<P>
++<PRE>
++	Color			Residue Code
++</P>
++<P>
++	ORANGE			A
++	RED			C
++	BLUE			T
++	GREEN			G
++</PRE>
++</P>
++<P>
++</P>
++<P>
++The default BACKGROUND COLORING option shows the sequence residues using a
++black character on a colored background. It can be switched off to show
++residues as a colored character on a white background. 
++</P>
++<P>
++Either BLACK AND WHITE or DEFAULT COLOR options can be selected. The Color
++option looks first for the color parameter file (as described above) and, if no
++file is found, uses the default residue-specific colors.
++</P>
++<P>
++You can specify your own coloring scheme by using the LOAD COLOR PARAMETER FILE
++option. The format of the color parameter file is described below.
++</P>
++<P>
++<H4>
++COLOR PARAMETER FILE
++</H4>
++</P>
++<P>
++This file is divided into 3 sections:
++</P>
++<P>
++1) the names and rgb values of the colors
++2) the rules for calculating the consensus
++3) the rules for assigning colors to the residues
++</P>
++<P> 
++An example file is given here.
++</P>
++<P>
++<PRE>
++ --------------------------------------------------------------------
++ at rgbindex
++RED          0.9 0.1 0.1
++BLUE         0.1 0.1 0.9
++GREEN        0.1 0.9 0.1
++YELLOW       0.9 0.9 0.0
++</P>
++<P>
++ at consensus
++% = 60% w:l:v:i:m:a:f:c:y:h:p
++# = 80% w:l:v:i:m:a:f:c:y:h:p
++- = 50% e:d
+++ = 60% k:r
++q = 50% q:e
++p = 50% p
++n = 50% n
++t = 50% t:s
++</P>
++<P>
++ at color
++g = RED
++p = YELLOW
++t = GREEN if t:%:#
++n = GREEN if n
++w = BLUE if %:#:p
++k = RED if +
++ --------------------------------------------------------------------
++</PRE>
++</P>
++<P>
++The first section is optional and is identified by the header @rgbindex. If
++this section exists, each color used in the file must be named and the rgb
++values specified (on a scale from 0 to 1). If the rgb index section is not
++found, the following set of hard-coded colors will be used.
++</P>
++<P>
++<PRE>
++RED          0.9 0.1 0.1
++BLUE         0.1 0.1 0.9
++GREEN        0.1 0.9 0.1
++ORANGE       0.9 0.7 0.3
++CYAN         0.1 0.9 0.9
++PINK         0.9 0.5 0.5
++MAGENTA      0.9 0.1 0.9
++YELLOW       0.9 0.9 0.0
++</PRE>
++</P>
++<P>
++The second section is optional and is identified by the header @consensus. It
++defines how the consensus is calculated.
++</P>
++<P> 
++The format of each consensus parameter is:-
++</P>
++<P> 
++<PRE>
++c = n% residue_list
++</P>
++<P> 
++        where
++              c             is a character used to identify the parameter.
++              n             is an integer value used as the percentage cutoff
++                            point.
++              residue_list  is a list of residues denoted by a single
++                            character, delimited by a colon (:).
++</PRE>
++</P>
++<P> 
++For example:   # = 60% w:l:v:i
++</P>
++<P>
++will assign a consensus character # to any column in the alignment which
++contains more than 60% of the residues w,l,v and i.
++</P>
++<P>        
++</P>
++<P> 
++The third section is identified by the header @color, and defines how colors
++are assigned to each residue in the alignment.
++</P>
++<P> 
++The color parameters can take one of two formats:
++</P>
++<P>
++<PRE>
++1) r = color
++2) r = color if consensus_list
++</P>
++<P> 
++        where
++              r             is a character used to denote a residue.
++              color         is one of the colors in the GDE color lookup table.
++              residue_list  is a list of residues denoted by a single
++                            character, delimited by a colon (:).
++</PRE>
++</P>
++<P> 
++Examples:
++1) g = ORANGE
++</P>
++<P>
++will color all glycines ORANGE, regardless of the consensus.
++</P>
++<P>
++2) w = BLUE if w:%:#
++</P>
++<P>
++will color BLUE any tryptophan which is found in a column with a consensus of
++w, % or #.
++</P>
++<P> 
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="Q">                       Alignment Quality Analysis
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++<H3>
++QUALITY SCORES
++</H3>
++</P>
++<P>
++Clustal X provides an indication of the quality of an alignment by plotting
++a 'conservation score' for each column of the alignment. A high score indicates
++a well-conserved column; a low score indicates low conservation. The quality
++curve is drawn below the alignment.
++</P>
++<P>
++Two methods are also provided to indicate single residues or sequence segments
++which score badly in the alignment.
++</P>
++<P> 
++Low-scoring residues are expected to occur at a moderate frequency in all the
++sequences because of their steady divergence due to the natural processes of
++evolution. The most divergent sequences are likely to have the most outliers.
++However, the highlighted residues are especially useful in pointing to
++sequence misalignments. Note that clustering of highlighted residues is a
++strong indication of misalignment. This can arise due to various reasons, for
++example:
++</P>
++<P> 
++        1. Partial or total misalignments caused by a failure in the
++        alignment algorithm. Usually only in difficult alignment cases.
++</P>
++<P> 
++        2. Partial or total misalignments because at least one of the
++        sequences in the given set is partly or completely unrelated to the
++        other sequences. It is up to the user to check that the set of
++        sequences are alignable.
++</P>
++<P>
++        3. Frameshift translation errors in a protein sequence causing local
++        mismatched regions to be heavily highlighted. These are surprisingly
++        common in database entries. If suspected, a 3-frame translation of
++        the source DNA needs to be examined.
++</P>
++<P> 
++Occasionally, highlighted residues may point to regions of some biological
++significance. This might happen for example if a protein alignment contains a
++sequence which has acquired new functions relative to the main sequence set. It
++is important to exclude other explanations, such as error or the natural
++divergence of sequences, before invoking a biological explanation.
++</P>
++<P>
++</P>
++<P>
++<H3>
++LOW-SCORING SEGMENTS
++</H3>
++</P>
++<P>
++Unreliable regions in the alignment can be highlighted using the Low-Scoring
++Segments option. A sequence-weighted profile is used to indicate any segments
++in the sequences which score badly. Because the profile calculation may take
++some time, an option is provided to calculate LOW-SCORING SEGMENTS. The 
++segment display can then be toggled on or off without having to repeat the
++time-consuming calculations.
++</P>
++<P>
++For details of the low-scoring segment calculation, see the CALCULATION section
++below.
++</P>
++<P>
++</P>
++<P>
++<H4>
++LOW-SCORING SEGMENT PARAMETERS
++</H4>
++</P>
++<P>
++MINIMUM LENGTH OF SEGMENTS: short segments (or even single residues) can be
++hidden by increasing the minimum length of segments which will be displayed.
++</P>
++<P>
++DNA MARKING SCALE is used to remove less significant segments from the 
++highlighted display. Increase the scale to display more segments; decrease the
++scale to remove the least significant.
++</P>
++<P>
++</P>
++<P>
++PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of each
++amino acid to each other. The matrix is used to calculate the sequence-
++weighted profile scores. There are four 'in-built' Log-Odds matrices offered:
++the Gonnet PAM 80, 120, 250, 350 matrices. A more stringent matrix which only
++gives a high score to identities and the most favoured conservative
++substitutions, may be more suitable when the sequences are closely related. For
++more divergent sequences, it is appropriate to use "softer" matrices which give
++a high score to many other frequent substitutions. This  option automatically
++recalculates the low-scoring segments.
++</P>
++<P>
++</P>
++<P>
++DNA WEIGHT MATRIX: Two hard-coded matrices are available:
++</P>
++<P>
++1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
++of nucleic acid sequences. X's and N's are treated as matches to any IUB
++ambiguity symbol. All matches score 1.0; all mismatches for IUB symbols score
++0.9.
++</P>
++<P>
++2) CLUSTALW(1.6). The previous system used by ClustalW, in which matches score
++1.0 and mismatches score 0. All matches for IUB symbols also score 0. 
++</P>
++<P>
++A new matrix can be read from a file on disk, if the filename consists only
++of lower case characters. The values in the new weight matrix should be
++similarities and should be NEGATIVE for infrequent substitutions.
++</P>
++<P> 
++INPUT FORMAT. The format used for a new matrix is the same as the BLAST
++program. Any lines beginning with a # character are assumed to be comments. The
++first non-comment line should contain a list of amino acids in any order, using
++the 1 letter code, followed by a * character. This should be followed by a
++square matrix of scores, with one row and one column for each amino acid. The
++last row and column of the matrix (corresponding to the * character) contain
++the minimum score over the whole matrix.
++</P>
++<P>
++<H4>
++QUALITY SCORE PARAMETERS
++</H4>
++</P>
++<P>
++You can customise the column 'quality scores' plotted underneath the alignment
++display using the following options.
++</P>
++<P>
++SCORE PLOT SCALE: this is a scalar value from 1 to 10, which can be used to
++change the scale of the quality score plot. 
++</P>
++<P>
++RESIDUE EXCEPTION CUTOFF: this is a scalar value from 1 to 10, which can be
++used to change the number of residue exceptions which are highlighted in the
++alignment display. (For an explanation of this cutoff, see the CALCULATION OF
++RESIDUE EXCEPTIONS section below.)
++</P>
++<P>
++PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of 
++each amino acid to each other. 
++</P>
++<P> 
++DNA WEIGHT MATRIX: two hard-coded matrices are available: IUB and CLUSTALW(1.6).
++</P>
++<P>
++For more information about the weight matrices, see the help above for
++the Low-scoring Segments Weight Matrix.
++</P>
++<P>
++For details of the quality score calculations, see the CALCULATION section
++below.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++SHOW LOW-SCORING SEGMENTS
++</STRONG>
++</P>
++<P>                       
++The low-scoring segment display can be toggled on or off. This option does not
++recalculate the profile scores.
++</P>
++<P>
++</P>
++<P>
++<STRONG>
++SHOW EXCEPTIONAL RESIDUES
++</STRONG>
++</P>
++<P>                       
++This option highlights individual residues which score badly in the alignment
++quality calculations. Residues which score exceptionally low are highlighted by
++using a white character on a grey background.
++</P>
++<P>
++<STRONG>
++SAVE QUALITY SCORES TO FILE
++</STRONG>
++</P>
++<P>
++The quality scores that are plotted underneath the alignment display can also
++be saved in a text file. Each column in the alignment is written on one line in
++the output file, with the value of the quality score at the end of the line.
++Only the sequences currently selected in the display are written to the file.
++One use for quality scores is to color residues in a protein structure by
++sequence conservation. In this way conserved surface residues can be
++highlighted to locate functional regions such as ligand-binding sites.
++</P>
++<P>
++</P>
++<P>
++<H3>
++CALCULATION OF QUALITY SCORES
++</H3>
++</P>
++<P>
++Suppose we have an alignment of m sequences of length n. Then, the alignment
++can be written as:
++</P>
++<P>
++<PRE>
++        A11 A12 A13 .......... A1n
++        A21 A22 A23 .......... A2n
++        .
++        .
++        Am1 Am2 Am3 .......... Amn
++</PRE>
++</P>
++<P>
++We also have a residue comparison matrix of size R where C(i,j) is the score
++for aligning residue i with residue j.
++</P>
++<P>
++We want to calculate a score for the conservation of the jth position in the
++alignment.
++</P>
++<P>
++To do this, we define an R-dimensional sequence space. For the jth position in 
++the alignment, each sequence consists of a single residue which is assigned a
++point S in the space. S has R dimensions, and for sequence i, the rth dimension
++is defined as:
++</P>
++<P>
++<PRE>
++	Sr =    C(r,Aij)
++</PRE>
++</P>
++<P>
++We then calculate a consensus value for the jth position in the alignment. This
++value X also has R dimensions, and the rth dimension is defined as:
++</P>
++<P>
++<PRE>
++	Xr = (   SUM   (Fij * C(i,r)) ) / m
++               1<=i<=R
++</PRE>
++</P>
++<P>
++where Fij is the count of residues i at position j in the alignment.
++</P>
++<P>
++Now we can calculate the distance Di between each sequence i and the consensus 
++position X in the R-dimensional space.
++</P>
++<P>
++<PRE>
++	Di = SQRT   (   SUM   (Xr - Sr)(Xr - Sr) )
++                      1<=i<=R
++</P>
++<P>
++</PRE>
++</P>
++<P>
++The quality score for the jth position in the alignment is defined as the mean
++of the sequence distances Di.
++</P>
++<P>
++The score is normalised by multiplying by the percentage of sequences which
++have residues (and not gaps) at this position.
++</P>
++<P>
++<H3>
++CALCULATION OF RESIDUE EXCEPTIONS
++</H3>
++</P>
++<P>
++The jth residue of the ith sequence is considered as an exception if the
++distance Di of the sequence from the consensus value P is greater than (Upper
++Quartile + Inter Quartile Range * Cutoff). The value used as a cutoff for
++displaying exceptions can be set from the SCORE PARAMETERS menu. A high cutoff
++value will only display very significant exceptions; a low value will allow
++more, less significant, exceptions to be highlighted.
++</P>
++<P>
++(NB. Sequences which contain gaps at this position are not included in the
++exception calculation.)
++</P>
++<P>
++</P>
++<P>
++<H3>
++CALCULATION OF LOW-SCORING SEGMENTS
++</H3>
++</P>
++<P>
++Suppose we have an alignment of m sequences of length n. Then, the alignment
++can be written as:
++</P>
++<P>
++<PRE>
++        A11 A12 A13 .......... A1n
++        A21 A22 A23 .......... A2n
++        .
++        .
++        Am1 Am2 Am3 .......... Amn
++</PRE>
++</P>
++<P>
++We also have a residue comparison matrix of size R where C(i,j) is the score
++for aligning residue i with residue j.
++</P>
++<P>
++We calculate sequence weights by building a neighbour-joining tree, in which
++branch lengths are proportional to divergence. Summing the branches by branch
++ownership provides the weights. See (Thompson et al., CABIOS, 10, 19 (1994) and
++Henikoff et al.,JMB, 243, 574 1994).
++</P>
++<P>
++To find the low-scoring segments in a sequence Si, we build a weighted profile
++of the remaining sequences in the alignment. Suppose we find residue r at 
++position j in the sequence; then the score for the jth position in the sequence
++is defined as
++</P>
++<P>
++<PRE>
++	Score(Si,j) = Profile(j,r)   where Profile(j,r) is the profile score
++                                       for residue r at position j in the
++                                       alignment.
++</PRE>
++</P>
++<P>
++These residue scores are summed along the sequence in both forward and backward
++directions. If the sum of the scores is positive, then it is reset to zero.
++Segments which score negatively in both directions are considered as 
++'low-scoring' and will be highlighted in the alignment display.
++</P>
++<P>
++</P>
++<P>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="9">              Command Line Parameters
++</A></H2></CENTER>
++<CENTER><H3>                DATA (sequences)
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-PROFILE1=file.ext  and  -PROFILE2=file.ext  </TT></TD>
++<TD><EM>profiles (aligned sequences)</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>                VERBS (do things)
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-HELP  or -CHECK    </TT></TD>
++<TD><EM>outline the command line parameters</EM></TD>
++</TR>
++<TR>
++<TD><TT>-ALIGN              </TT></TD>
++<TD><EM>do full multiple alignment </EM></TD>
++</TR>
++<TR>
++<TD><TT>-TREE               </TT></TD>
++<TD><EM>calculate NJ tree</EM></TD>
++</TR>
++<TR>
++<TD><TT>-BOOTSTRAP(=n)      </TT></TD>
++<TD><EM>bootstrap a NJ tree (n= number of bootstraps; def. = 1000)</EM></TD>
++</TR>
++<TR>
++<TD><TT>-CONVERT            </TT></TD>
++<TD><EM>output the input sequences in a different file format</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>                PARAMETERS (set things)
++</H3></CENTER>
++<CENTER><P><STRONG>***General settings:****
++</STRONG></P></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-INTERACTIVE </TT></TD>
++<TD><EM>read command line, then enter normal interactive menus</EM></TD>
++</TR>
++<TR>
++<TD><TT>-QUICKTREE   </TT></TD>
++<TD><EM>use FAST algorithm for the alignment guide tree</EM></TD>
++</TR>
++<TR>
++<TD><TT>-TYPE=       </TT></TD>
++<TD><EM>PROTEIN or DNA sequences</EM></TD>
++</TR>
++<TR>
++<TD><TT>-NEGATIVE    </TT></TD>
++<TD><EM>protein alignment with negative values in matrix</EM></TD>
++</TR>
++<TR>
++<TD><TT>-OUTFILE=    </TT></TD>
++<TD><EM>sequence alignment file name</EM></TD>
++</TR>
++<TR>
++<TD><TT>-OUTPUT=     </TT></TD>
++<TD><EM>GCG, GDE, PHYLIP, PIR or NEXUS</EM></TD>
++</TR>
++<TR>
++<TD><TT>-OUTORDER=   </TT></TD>
++<TD><EM>INPUT or ALIGNED</EM></TD>
++</TR>
++<TR>
++<TD><TT>-CASE=       </TT></TD>
++<TD><EM>LOWER or UPPER (for GDE output only)</EM></TD>
++</TR>
++<TR>
++<TD><TT>-SEQNOS=     </TT></TD>
++<TD><EM>OFF or ON (for Clustal output only)</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Fast Pairwise Alignments:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-TOPDIAGS=n  </TT></TD>
++<TD><EM>number of best diags.</EM></TD>
++</TR>
++<TR>
++<TD><TT>-WINDOW=n    </TT></TD>
++<TD><EM>window around best diags.</EM></TD>
++</TR>
++<TR>
++<TD><TT>-PAIRGAP=n   </TT></TD>
++<TD><EM>gap penalty</EM></TD>
++</TR>
++<TR>
++<TD><TT>-SCORE=      </TT></TD>
++<TD><EM>PERCENT or ABSOLUTE</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Slow Pairwise Alignments:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-PWDNAMATRIX= </TT></TD>
++<TD><EM>DNA weight matrix=IUB, CLUSTALW or filename</EM></TD>
++</TR>
++<TR>
++<TD><TT>-PWGAPOPEN=f  </TT></TD>
++<TD><EM>gap opening penalty</EM></TD>
++</TR>
++<TR>
++<TD><TT>-PWGAPEXT=f  </TT></TD>
++<TD><EM>gap opening penalty</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Multiple Alignments:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-USETREE=    </TT></TD>
++<TD><EM>file for old guide tree</EM></TD>
++</TR>
++<TR>
++<TD><TT>-MATRIX=     </TT></TD>
++<TD><EM>Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename</EM></TD>
++</TR>
++<TR>
++<TD><TT>-DNAMATRIX=  </TT></TD>
++<TD><EM>DNA weight matrix=IUB, CLUSTALW or filename</EM></TD>
++</TR>
++<TR>
++<TD><TT>-GAPOPEN=f   </TT></TD>
++<TD><EM>gap opening penalty</EM></TD>
++</TR>
++<TR>
++<TD><TT>-GAPEXT=f  </TT></TD>
++<TD><EM>gap extension penalty</EM></TD>
++</TR>
++<TR>
++<TD><TT>-ENDGAPS     </TT></TD>
++<TD><EM>no end gap separation pen.</EM></TD>
++</TR>
++<TR>
++<TD><TT>-GAPDIST=n   </TT></TD>
++<TD><EM>gap separation pen. range</EM></TD>
++</TR>
++<TR>
++<TD><TT>-NOPGAP      </TT></TD>
++<TD><EM>residue-specific gaps off</EM></TD>
++</TR>
++<TR>
++<TD><TT>-NOHGAP    </TT></TD>
++<TD><EM>hydrophilic gaps off</EM></TD>
++</TR>
++<TR>
++<TD><TT>-HGAPRESIDUES= </TT></TD>
++<TD><EM>list hydrophilic res.</EM></TD>
++</TR>
++<TR>
++<TD><TT>-MAXDIV=n    </TT></TD>
++<TD><EM>% ident. for delay</EM></TD>
++</TR>
++<TR>
++<TD><TT>-TYPE=       </TT></TD>
++<TD><EM>PROTEIN or DNA</EM></TD>
++</TR>
++<TR>
++<TD><TT>-TRANSWEIGHT=f </TT></TD>
++<TD><EM>transitions weighting</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Profile Alignments:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-NEWTREE1=    </TT></TD>
++<TD><EM>file for new guide tree for profile1</EM></TD>
++</TR>
++<TR>
++<TD><TT>-NEWTREE2=    </TT></TD>
++<TD><EM>file for new guide tree for profile2</EM></TD>
++</TR>
++<TR>
++<TD><TT>-USETREE1=    </TT></TD>
++<TD><EM>file for old guide tree for profile1</EM></TD>
++</TR>
++<TR>
++<TD><TT>-USETREE2=    </TT></TD>
++<TD><EM>file for old guide tree for profile2</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Sequence to Profile Alignments:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-NEWTREE=    </TT></TD>
++<TD><EM>file for new guide tree</EM></TD>
++</TR>
++<TR>
++<TD><TT>-USETREE=    </TT></TD>
++<TD><EM>file for old guide tree</EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Structure Alignments:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-NOSECSTR2     </TT></TD>
++<TD><EM>do not use secondary structure/gap penalty mask for profile 2</EM></TD>
++</TR>
++<TR>
++<TD><TT>-SECSTROUT=STRUCTURE or MASK or BOTH or NONE  </TT></TD>
++<TD><EM>output in alignment file</EM></TD>
++</TR>
++<TR>
++<TD><TT>-HELIXGAP=n    </TT></TD>
++<TD><EM>gap penalty for helix core residues </EM></TD>
++</TR>
++<TR>
++<TD><TT>-STRANDGAP=n   </TT></TD>
++<TD><EM>gap penalty for strand core residues</EM></TD>
++</TR>
++<TR>
++<TD><TT>-LOOPGAP=n     </TT></TD>
++<TD><EM>gap penalty for loop regions</EM></TD>
++</TR>
++<TR>
++<TD><TT>-TERMINALGAP=n </TT></TD>
++<TD><EM>gap penalty for structure termini</EM></TD>
++</TR>
++<TR>
++<TD><TT>-HELIXENDIN=n  </TT></TD>
++<TD><EM>number of residues inside helix to be treated as terminal</EM></TD>
++</TR>
++<TR>
++<TD><TT>-HELIXENDOUT=n </TT></TD>
++<TD><EM>number of residues outside helix to be treated as terminal</EM></TD>
++</TR>
++<TR>
++<TD><TT>-STRANDENDIN=n </TT></TD>
++<TD><EM>number of residues inside strand to be treated as terminal</EM></TD>
++</TR>
++<TR>
++<TD><TT>-STRANDENDOUT=n</TT></TD>
++<TD><EM>number of residues outside strand to be treated as terminal </EM></TD>
++</TR>
++</TABLE></CENTER>
++<CENTER><H3>***Trees:***
++</H3></CENTER>
++<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
++<TR>
++<TD><STRONG>Parameter</STRONG></TD>
++<TD><STRONG><EM>Description</EM></STRONG></TD>
++</TR>
++<TR>
++<TD><TT>-SEED=n    </TT></TD>
++<TD><EM>seed number for bootstraps</EM></TD>
++</TR>
++<TR>
++<TD><TT>-KIMURA      </TT></TD>
++<TD><EM>use Kimura's correction</EM></TD>
++</TR>
++<TR>
++<TD><TT>-TOSSGAPS  </TT></TD>
++<TD><EM>ignore positions with gaps</EM></TD>
++</TR>
++<TR>
++<TD><TT>-BOOTLABELS=node OR branch </TT></TD>
++<TD><EM>position of bootstrap values in tree display</EM></TD>
++</TR>
++</TABLE></CENTER>
++</P>
++<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
++<CENTER><H2><A NAME="R">                             References
++</A></H2></CENTER>
++<P>
++</P>
++<P>
++<STRONG>
++The ClustalX program is described in the manuscript:
++</STRONG>
++</P>
++<P>
++Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
++The ClustalX windows interface: flexible strategies for multiple sequence 
+ alignment aided by quality analysis tools. Nucleic Acids Research, 25:4876-4882.
+ </P>
+ <P>

Added: trunk/packages/clustalx/trunk/debian/patches/clustalx_help.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/clustalx_help.patch	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/patches/clustalx_help.patch	2008-02-21 13:59:37 UTC (rev 1458)
@@ -0,0 +1,1529 @@
+Index: clustalw-1.83/clustalx_help
+===================================================================
+--- clustalw-1.83.orig/clustalx_help
++++ clustalw-1.83/clustalx_help
+@@ -1,4 +1,1524 @@
+ 
++This is the on-line help file for Clustal X (version 1.81), using the NCBI
++Vibrant Toolkit.   
++
++It should be named or defined as: clustalx_help 
++except with MSDOS in which case it should be named ClustalX.HLP
++
++For full details of usage and algorithms, please read the CLUSTALW.DOC file.
++
++
++Toby  Gibson                         EMBL, Heidelberg, Germany.
++Des   Higgins                        UCC, Cork, Ireland.
++Julie Thompson/Francois Jeanmougin   IGBMC, Strasbourg, France.
++
++
++
++
++>>HELP G <<
++                      General help for CLUSTAL X (1.8)
++
++Clustal X is a windows interface for the ClustalW multiple sequence alignment
++program. It provides an integrated environment for performing multiple sequence
++and profile alignments and analysing the results. The sequence alignment is
++displayed in a window on the screen. A versatile coloring scheme has been
++incorporated allowing you to highlight conserved features  in the alignment.
++The pull-down menus at the top of the window allow you to select all the
++options required for traditional multiple sequence and profile alignment.
++
++You can cut-and-paste sequences to change the order of the alignment; you can
++select a subset of sequences to be aligned; you can select a sub-range of the
++alignment to be realigned and inserted back into the original alignment.
++
++Alignment quality analysis can be performed and low-scoring segments or
++exceptional residues can be highlighted.
++
++ClustalX is available for a number of different platforms including: SUN
++Solaris, IRIX5.3 on Silicon Graphics, Digital UNIX on DECStations, Microsoft
++Windows (32 bit) for PC's, Linux ELF for x86 PC's and Macintosh PowerMac. (See
++the README file for Installation instructions.)
++
++
++<H4>
++SEQUENCE INPUT
++</H4>
++
++Sequences and profiles (a term for pre-existing alignments) are input using 
++the FILE menu. Invalid options will be disabled. All sequences must be included
++into 1 file. 7 formats are automatically recognised: NBRF/PIR, EMBL/SWISSPROT,
++Pearson (Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9 RSF and GDE flat file.
++All non-alphabetic characters (spaces, digits, punctuation marks) are ignored
++except "-" which is used to indicate a GAP ("." in MSF/RSF).  
++
++<H4>
++SEQUENCE / PROFILE ALIGNMENTS
++</H4>
++
++Clustal X has two modes which can be selected using the switch directly above
++the sequence display: MULTIPLE ALIGNMENT MODE and PROFILE ALIGNMENT MODE.
++
++To do a MULTIPLE ALIGNMENT on a set of sequences, make sure MULTIPLE ALIGNMENT
++MODE is selected. A single sequence data area is then displayed. The ALIGNMENT
++menu then allows you to either produce a guide tree for the alignment, or to do
++a multiple alignment following the guide tree, or to do a full multiple
++alignment.
++
++In PROFILE ALIGNMENT MODE, two sequence data areas are displayed, allowing you
++to align 2 alignments (termed profiles). Profiles are also used to add a new
++sequence to an old alignment, or to use secondary structure to guide the
++alignment process. GAPS in the old alignments are indicated using the "-" 
++character. PROFILES can be input in ANY of the allowed formats; just  use "-"
++(or "." for MSF/RSF) for each gap position. In Profile Alignment Mode, a button
++"Lock Scroll" is displayed which allows you to scroll the two profiles together
++using a single scroll bar. When the Lock Scroll is turned off, the two profiles
++can be scrolled independently.
++
++<H4>
++PHYLOGENETIC TREES
++</H4>
++
++Phylogenetic trees can be calculated from old alignments (read in with "-"
++characters to indicate gaps) OR after a multiple alignment while the alignment
++is still displayed.
++
++<H4>
++ALIGNMENT DISPLAY
++</H4>
++
++The alignment is displayed on the screen with the sequence names on the left
++hand side. The sequence alignment is for display only, it cannot be edited here
++(except for changing the sequence order by cutting-and-pasting on the sequence
++names). 
++
++A ruler is displayed below the sequences, starting at 1 for the first residue
++position (residue numbers in the sequence input file are ignored).
++
++A line above the alignment is used to mark strongly conserved positions. Three
++characters ('*', ':' and '.') are used:
++
++'*' indicates positions which have a single, fully conserved residue
++
++':' indicates that one of the following 'strong' groups is fully conserved:-
++<PRE>
++                 STA  
++                 NEQK  
++                 NHQK  
++                 NDEQ  
++                 QHRK  
++                 MILV  
++                 MILF  
++                 HY  
++                 FYW  
++</PRE>
++
++'.' indicates that one of the following 'weaker' groups is fully conserved:-
++<PRE>
++                 CSA  
++                 ATV  
++                 SAG  
++                 STNK  
++                 STPA  
++                 SGND  
++                 SNDEQK  
++                 NDEQHK  
++                 NEQHRK  
++                 FVLIM  
++                 HFY  
++</PRE>
++
++These are all the positively scoring groups that occur in the Gonnet Pam250
++matrix. The strong and weak groups are defined as strong score >0.5 and weak
++score =<0.5 respectively.
++
++For profile alignments, secondary structure and gap penalty masks are displayed
++above the sequences, if any data is found in the profile input file.
++
++
++>>HELP F <<
++                      Input / Output Files 
++
++LOAD SEQUENCES reads sequences from one of 7 file formats, replacing any
++sequences that are already loaded. All sequences must be in 1 file. The formats
++that are automatically recognised are: NBRF/PIR, EMBL/SWISSPROT, Pearson
++(Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9/RSF and GDE flat file.  All
++non-alphabetic characters (spaces, digits, punctuation  marks) are ignored
++except "-" which is used to indicate a GAP ("." in MSF/RSF).
++
++The program tries to automatically recognise the different file formats used
++and to guess whether the sequences are amino acid or nucleotide.  This is not
++always foolproof.
++
++FASTA and NBRF/PIR formats are recognised by having a ">" as the first 
++character in the file.  
++
++EMBL/Swiss Prot formats are recognised by the letters "ID" at the start of the
++file (the token for the entry name field).  
++
++CLUSTAL format is recognised by the word CLUSTAL at the beginning of the file.
++
++GCG/MSF format is recognised by one of the following:
++<UL>
++<LI>
++       - the word PileUp at the start of the file.
++</LI><LI>
++       - the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
++         at the start of the file.
++</LI><LI>
++       - the word MSF on the first line of the file, and the characters ..
++         at the end of this line.
++</LI>
++</UL>
++ 
++GCG/RSF format is recognised by the word !!RICH_SEQUENCE at the beginning of
++the file.
++
++
++If 85% or more of the characters in the sequence are from A,C,G,T,U or N, the
++sequence will be assumed to be nucleotide.  This works in 97.3% of cases but
++watch out!
++
++APPEND SEQUENCES is only valid in MULTIPLE ALIGNMENT MODE. The input sequences
++do not replace those already loaded, but are appended at the end of the
++alignment.
++
++SAVE SEQUENCES AS... offers the user a choice of one of six output formats:
++CLUSTAL, NBRF/PIR, GCG/MSF, PHYLIP, NEXUS or GDE. All sequences are written
++to a single file. Options are available to save a range of the alignment, 
++switch between UPPER/LOWER case for GDE files, and to output SEQUENCE NUMBERING
++for CLUSTAL files.
++
++LOAD PROFILE 1 reads sequences in the same 7 file formats, replacing any
++sequences already loaded as Profile 1. This option will also remove any
++sequences which are loaded in Profile 2.
++
++LOAD PROFILE 2 reads sequences in the same 7 file formats, replacing any
++sequences already loaded as Profile 2.
++
++SAVE PROFILE 1 AS... is similar to the Save Sequences option except that only
++those sequences in Profile 1 will be written to the output file.
++
++SAVE PROFILE 2 AS... is similar to the Save Sequences option except that only
++those sequences in Profile 2 will be written to the output file.
++
++WRITE ALIGNMENT AS POSTSCRIPT will write the sequence display to a postscript
++format file. This will include any secondary structure / gap penalty mask 
++information and the consensus and ruler lines which are displayed on the
++screen. The Alignment Quality curve can be optionally included in the output
++file.
++
++WRITE PROFILE 1 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
++except that only the profile 1 display will be printed.
++
++WRITE PROFILE 2 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
++except that only the profile 2 display will be printed.
++
++
++<H4>
++POSTSCRIPT PARAMETERS
++</H4>
++
++A number of options are available to allow you to configure your postscript
++output file.
++
++PS COLORS FILE:
++
++The exact RGB values required to reproduce the colors used in the alignment
++window will vary from printer to printer. A PS colors file can be specified
++that contains the RGB values for all the colors required by each of your
++postscript printers.
++
++By default, Clustal X looks for a file called 'colprint.par' in the current
++directory (if your running under UNIX, it then looks in your home directory,
++and finally in the directories in your PATH environment variable). If no PS
++colors file is found or a color used on the screen is not defined here, the
++screen RGB values (from the Color Parameter File) are used.
++
++The PS colors file consists of one line for each color to be defined, with the
++color name followed by the RGB values (on a scale of 0 to 1). For example,
++
++RED          0.9 0.1 0.1
++
++Blank lines and comments (lines beginning with a '#' character) are ignored.
++
++
++PAGE SIZE:  The alignment can be displayed on either A4, A3 or US Letter size
++pages.
++
++ORIENTATION: The alignment can be displayed on either a landscape or portrait
++page.
++
++PRINT HEADER: An optional header including the postscript filename, and
++creation date can be printed at the top of each page.
++
++PRINT QUALITY CURVE: The Alignment Quality curve which is displayed underneath
++the alignment on the screen can be included in the postscript output.
++
++PRINT RULER: The ruler which is displayed underneath the alignment on the 
++screen can be included in the postscript output.
++
++PRINT RESIDUE NUMBERS: Sequence residue numbers can be printed at the right
++hand side of the alignment.
++
++RESIZE TO FIT PAGE: By default, the alignment is scaled to fit the page size
++selected. This option can be turned off, in which case a font size of 10 will
++be used for the sequences.
++
++PRINT FROM POSITION/TO: A range of the alignment can be printed. The default
++is to print the full alignment. The first and last residues to be printed are
++specified here.
++
++USE BLOCK LENGTH: The alignment can be divided into blocks of residues. The
++number of residues in a block is specified here. More than one block may then
++be printed on a single page. This is useful for long alignments of a small
++number of sequences. If the block length is set to 0, The alignment will not
++be divided into blocks, but printed across a number of pages.
++
++>>HELP E <<
++                          Editing Alignments
++
++Clustal X allows you to change the order of the sequences in the alignment, by
++cutting-and-pasting the sequence names.
++
++To select a group of sequences to be moved, click on a sequence name and drag
++the cursor until all the required sequences are highlighted. Holding down the
++Shift key when clicking on the first name will add new sequences to those
++already selected.
++
++(Options are provided to Select All Sequences, Select Profile 1 or Select 
++Profile 2.)
++
++The selected sequences can be removed from the alignment by using the EDIT
++menu, CUT option.
++
++To add the cut sequences back into an alignment, select a sequence by clicking
++on the sequence name. The cut sequences will be added to the alignment,
++immediately following the selected sequence, by the EDIT menu, PASTE option.
++
++To add the cut sequences to an empty alignment (eg. when cutting sequences from
++Profile 1 and pasting them to Profile 2), click on the empty sequence name
++display area, and select the EDIT menu, PASTE option as before.
++
++The sequence selection and sequence range selection can be cleared using the
++EDIT menu, CLEAR SEQUENCE SELECTION and CLEAR RANGE SELECTION options
++respectively.
++
++To search for a string of residues in the sequences, select the sequences to be
++searched by clicking on the sequence names. You can then enter the string to
++search for by selecting the SEARCH FOR STRING option. If the string is found in
++any of the sequences selected, the sequence name and column number is printed
++below the sequence display.
++
++In PROFILE ALIGNMENT MODE, the two profiles can be merged (normally done after
++alignment) by selecting ADD PROFILE 2 TO PROFILE 1. The sequences currently
++displayed as Profile 2 will be appended to Profile 1. 
++
++The REMOVE ALL GAPS option will remove all gaps from the sequences currently
++selected.
++WARNING: This option removes ALL gaps, not only those introduced by ClustalX,
++but also those that were read from the input alignment file. Any secondary
++structure information associated with the alignment will NOT be automatically
++realigned.
++
++The REMOVE GAP-ONLY COLUMNS will remove those positions in the alignment which
++contain gaps in all sequences. This can occur as a result of removing divergent
++sequences from an alignment, or if an alignment has been realigned.
++
++>>HELP M <<
++                          Multiple Alignments
++
++Make sure MULTIPLE ALIGNMENT MODE is selected, using the switch directly above
++the sequence display area. Then, use the ALIGNMENT menu to do multiple
++alignments.
++
++Multiple alignments are carried out in 3 stages:
++ 
++1) all sequences are compared to each other (pairwise alignments);
++ 
++2) a dendrogram (like a phylogenetic tree) is constructed, describing the
++approximate groupings of the sequences by similarity (stored in a file).
++ 
++3) the final multiple alignment is carried out, using the dendrogram as a guide.
++
++The 3 stages are carried out automatically by the DO COMPLETE ALIGNMENT option.
++You can skip the first stages (pairwise alignments; guide tree) by using an old
++guide tree file (DO ALIGNMENT FROM GUIDE TREE); or you can just produce the
++guide tree with no final multiple alignment (PRODUCE GUIDE TREE ONLY).
++
++
++REALIGN SELECTED SEQUENCES is used to realign badly aligned sequences in the
++alignment. Sequences can be selected by clicking on the sequence names - see
++Editing Alignments for more details. The unselected sequences are then 'fixed'
++and a profile is made including only the unselected sequences. Each of the
++selected sequences in turn is then realigned to this profile. The realigned
++sequences will be displayed as a group at the end the alignment.
++
++
++REALIGN SELECTED SEQUENCE RANGE is used to realign a small region of the 
++alignment. A residue range can be selected by clicking on the sequence display
++area. A multiple alignment is then performed, following the 3 stages described
++above, but only using the selected residue range. Finally the new alignment of
++the range is pasted back into the full sequence alignment.
++
++By default, gap penalties are used at each end of the subrange in order to 
++penalise terminal gaps. If the REALIGN SEGMENT END GAP PENALTIES option is
++switched off, gaps can be introduced at the ends of the residue range at no
++cost.
++
++
++ALIGNMENT PARAMETERS displays a sub-menu with the following options:
++
++RESET NEW GAPS BEFORE ALIGNMENT will remove any new gaps introduced into the
++sequences during multiple alignment if you wish to change the parameters and
++try again. This only takes effect just before you do a second multiple
++alignment. You can make phylogenetic trees after alignment whether or not this
++is ON. If you turn this OFF, the new gaps are kept even if you do a second
++multiple alignment. This allows you to iterate the alignment gradually.
++Sometimes, the alignment is improved by a second or third pass.
++
++RESET ALL GAPS BEFORE ALIGNMENT will remove all gaps in the sequences including
++gaps which were read in from the sequence input file. This only takes effect
++just before you do a second multiple alignment.  You can make phylogenetic
++trees after alignment whether or not this is ON.  If you turn this OFF, all
++gaps are kept even if you do a second multiple alignment. This allows you to
++iterate the alignment gradually.  Sometimes, the alignment is improved by a
++second or third pass.
++
++
++PAIRWISE ALIGNMENT PARAMETERS control the speed/sensitivity of the initial
++alignments.
++
++MULTIPLE ALIGNMENT PARAMETERS control the gaps in the final multiple
++alignments.
++
++PROTEIN GAP PARAMETERS displays a temporary window which allows you to set
++various parameters only used in the alignment of protein sequences.
++
++(SECONDARY STRUCTURE PARAMETERS, for use with the Profile Alignment Mode only,
++allows you to set various parameters only used with gap penalty masks.)
++
++SAVE LOG FILE will write the alignment calculation scores to a file. The log
++filename is the same as the input sequence filename, with an extension .log
++appended.
++
++
++<H4>
++OUTPUT FORMAT OPTIONS
++</H4>
++
++You can choose from 6 different alignment formats (CLUSTAL, GCG, NBRF/PIR,
++PHYLIP, GDE and NEXUS).  You can choose more than one (or all 6 if you wish).  
++
++CLUSTAL format output is a self explanatory alignment format. It shows the
++sequences aligned in blocks. It can be read in again at a later date to (for
++example) calculate a phylogenetic tree or add in new sequences by profile
++alignment.
++
++GCG output can be used by any of the GCG programs that can work on multiple
++alignments (e.g. PRETTY, PROFILEMAKE, PLOTALIGN). It is the same as the GCG
++.msf format files (multiple sequence file); new in version 7 of GCG.
++
++NEXUS format is used by several phylogeny programs, including PAUP and
++MacClade.
++
++PHYLIP format output can be used for input to the PHYLIP package of Joe 
++Felsenstein.  This is a very widely used package for doing every imaginable
++form of phylogenetic analysis (MUCH more than the the modest introduction
++offered by this program).
++
++NBRF/PIR: this is the same as the standard PIR format with ONE ADDITION. Gap
++characters "-" are used to indicate the positions of gaps in the multiple 
++alignment. These files can be re-used as input in any part of clustal that
++allows sequences (or alignments or profiles) to be read in.  
++
++GDE:  this format is used by the GDE package of Steven Smith and is understood
++by SEQLAB in GCG 9 or later.
++
++GDE OUTPUT CASE: sequences in GDE format may be written in either upper or
++lower case.
++ 
++CLUSTALW SEQUENCE NUMBERS: residue numbers may be added to the end of the
++alignment lines in clustalw format.
++
++OUTPUT ORDER is used to control the order of the sequences in the output
++alignments. By default, it uses the order in which the sequences were aligned
++(from the guide tree/dendrogram), thus automatically grouping closely related
++sequences. It can be switched to be the same as the original input order.
++
++PARAMETER OUTPUT: This option will save all your parameter settings in a
++parameter file (suffix .par) during alignment. The file can be subsequently
++used to rerun ClustalW using the same parameters.
++
++
++<H3>
++ALIGNMENT PARAMETERS
++</H3>
++--------------------
++
++<STRONG>
++PAIRWISE ALIGNMENT PARAMETERS
++</STRONG>
++
++A distance is calculated between every pair of sequences and these are used to
++construct the phylogenetic tree which guides the final multiple alignment. The
++scores are calculated from separate pairwise alignments. These can be
++calculated using 2 methods: dynamic programming (slow but accurate) or by the
++method of Wilbur and Lipman (extremely fast but approximate).   
++
++You can choose between the 2 alignment methods using the PAIRWISE ALIGNMENTS
++option. The slow/accurate method is fast enough for short sequences but will be
++VERY SLOW for many (e.g. >100) long (e.g. >1000 residue) sequences.   
++
++
++<STRONG>
++SLOW-ACCURATE alignment parameters:
++</STRONG>
++
++These parameters do not have any affect on the speed of the alignments. They
++are used to give initial alignments which are then rescored to give percent
++identity scores. These % scores are the ones which are displayed on the 
++screen. The scores are converted to distances for the trees.
++
++Gap Open Penalty:      the penalty for opening a gap in the alignment.
++
++Gap Extension Penalty: the penalty for extending a gap by 1 residue.
++
++Protein Weight Matrix: the scoring table which describes the similarity of 
++each amino acid to each other.
++
++Load protein matrix: allows you to read in a comparison table from a file.
++
++DNA weight matrix: the scores assigned to matches and mismatches (including
++IUB ambiguity codes).
++
++Load DNA matrix: allows you to read in a comparison table from a file.
++
++See the Multiple alignment parameters, MATRIX option below for details of the
++matrix input format.
++
++
++<STRONG>
++FAST-APPROXIMATE alignment parameters:
++</STRONG>
++
++These similarity scores are calculated from fast, approximate, global align-
++ments, which are controlled by 4 parameters. 2 techniques are used to make
++these alignments very fast: 1) only exactly matching fragments (k-tuples) are
++considered; 2) only the 'best' diagonals (the ones with most k-tuple matches)
++are used.
++
++GAP PENALTY:   This is a penalty for each gap in the fast alignments. It has
++little effect on the speed or sensitivity except for extreme values.
++
++K-TUPLE SIZE:  This is the size of exactly matching fragment that is used. 
++INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity.
++For longer sequences (e.g. >1000 residues) you may wish to increase the
++default.
++
++TOP DIAGONALS: The number of k-tuple matches on each diagonal (in an imaginary
++dot-matrix plot) is calculated. Only the best ones (with most matches) are used
++in the alignment. This parameter specifies how many. Decrease for speed;
++increase for sensitivity.
++
++WINDOW SIZE:  This is the number of diagonals around each of the 'best' 
++diagonals that will be used. Decrease for speed; increase for sensitivity.
++
++
++<STRONG>
++MULTIPLE ALIGNMENT PARAMETERS
++</STRONG>
++
++These parameters control the final multiple alignment. This is the core of the
++program and the details are complicated. To fully understand the use of the
++parameters and the scoring system, you will have to refer to the documentation.
++
++Each step in the final multiple alignment consists of aligning two alignments 
++or sequences. This is done progressively, following the branching order in the
++GUIDE TREE. The basic parameters to control this are two gap penalties and the
++scores for various identical/non-indentical residues. 
++
++The GAP OPENING and EXTENSION PENALTIES can be set here. These control the 
++cost of opening up every new gap and the cost of every item in a gap.  
++Increasing the gap opening penalty will make gaps less frequent. Increasing 
++the gap extension penalty will make gaps shorter. Terminal gaps are not 
++penalised.
++
++The DELAY DIVERGENT SEQUENCES switch delays the alignment of the most distantly
++related sequences until after the most closely related sequences have  been
++aligned. The setting shows the percent identity level required to delay the
++addition of a sequence; sequences that are less identical than this level to
++any other sequences will be aligned later.
++
++The TRANSITION WEIGHT gives transitions (A<-->G or C<-->T i.e. purine-purine or
++pyrimidine-pyrimidine substitutions) a weight between 0 and 1; a weight of zero
++means that the transitions are scored as mismatches, while a weight of 1 gives
++the transitions the match score. For distantly related DNA sequences, the
++weight should be near to zero; for closely related sequences it can be useful
++to assign a higher score. The default is set to 0.5.
++
++
++The PROTEIN WEIGHT MATRIX option allows you to choose a series of weight
++matrices. For protein alignments, you use a weight matrix to determine the
++similarity of non-identical amino acids. For example, Tyr aligned with Phe is
++usually judged to be 'better' than Tyr aligned with Pro.
++
++There are three 'in-built' series of weight matrices offered. Each consists of
++several matrices which work differently at different evolutionary distances. To
++see the exact details, read the documentation. Crudely, we store several
++matrices in memory, spanning the full range of amino acid distance (from almost
++identical sequences to highly divergent ones). For very similar sequences, it
++is best to use a strict weight matrix which only gives a high score to
++identities and the most favoured conservative substitutions. For more divergent
++sequences, it is appropriate to use "softer" matrices which give a high score
++to many other frequent substitutions.
++
++1) BLOSUM (Henikoff). These matrices appear to be the best available for 
++carrying out data base similarity (homology searches). The matrices currently
++used are: Blosum 80, 62, 45 and 30. BLOSUM was the default in earlier Clustal X
++versions.
++
++2) PAM (Dayhoff). These have been extremely widely used since the late '70s. We
++currently use the PAM 20, 60, 120, 350 matrices.
++
++3) GONNET. These matrices were derived using almost the same procedure as the
++Dayhoff one (above) but are much more up to date and are based on a far larger
++data set. They appear to be more sensitive than the Dayhoff series. We
++currently use the GONNET 80, 120, 160, 250 and 350 matrices. This series is the
++default for Clustal X version 1.8.
++
++We also supply an identity matrix which gives a score of 10 to two identical 
++amino acids and a score of zero otherwise. This matrix is not very useful.
++
++Load protein matrix: allows you to read in a comparison matrix from a file.
++This can be either a single matrix or a series of matrices (see below for
++format). 
++
++
++DNA WEIGHT MATRIX option allows you to select a single matrix (not a series)
++used for aligning nucleic acid sequences. Two hard-coded matrices are available:
++
++1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
++of nucleic acid sequences. X's and N's are treated as matches to any IUB
++ambiguity symbol. All matches score 1.9; all mismatches for IUB symbols score 0.
++
++2) CLUSTALW(1.6). A previous system used by ClustalW, in which matches score
++1.0 and mismatches score 0. All matches for IUB symbols also score 0.
++
++Load DNA matrix: allows you to read in a nucleic acid comparison matrix from a
++file (just one matrix, not a series).
++
++
++SINGLE MATRIX INPUT FORMAT
++The format used for a single matrix is the same as the BLAST program. The
++scores in the new weight matrix should be similarities. You can use negative as
++well as positive values if you wish, although the matrix will be automatically
++adjusted to all positive scores, unless the NEGATIVE MATRIX option is selected.
++Any lines beginning with a # character are assumed to be comments. The first
++non-comment line should contain a list of amino acids in any order, using the 1
++letter code, followed by a * character. This should be followed by a square
++matrix of scores, with one row and one column for each amino acid. The last row
++and column of the matrix (corresponding to the * character) contain the minimum
++score over the whole matrix.
++
++MATRIX SERIES INPUT FORMAT
++ClustalX uses different matrices depending on the mean percent identity of the
++sequences to be aligned. You can specify a series of matrices and the range of
++the percent identity for each matrix in a matrix series file. The file is
++automatically recognised by the word CLUSTAL_SERIES at the beginning of the
++file. Each matrix in the series is then specified on one line which should
++start with the word MATRIX. This is followed by the lower and upper limits of
++the sequence percent identities for which you want to apply the matrix. The
++final entry on the matrix line is the filename of a Blast format matrix file
++(see above for details of the single matrix file format).
++
++Example.
++
++CLUSTAL_SERIES
++ 
++MATRIX 81 100 /us1/user/julie/matrices/blosum80
++MATRIX 61 80 /us1/user/julie/matrices/blosum62
++MATRIX 31 60 /us1/user/julie/matrices/blosum45
++MATRIX 0 30 /us1/user/julie/matrices/blosum30
++
++
++<STRONG>
++PROTEIN GAP PARAMETERS
++</STRONG>
++
++RESIDUE SPECIFIC PENALTIES are amino acid specific gap penalties that reduce or
++increase the gap opening penalties at each position in the alignment or 
++sequence. See the documentation for details. As an example, positions that are
++rich in glycine are more likely to have an adjacent gap than positions that are
++rich in valine.
++
++HYDROPHILIC GAP PENALTIES are used to increase the chances of a gap within a
++run (5 or more residues) of hydrophilic amino acids; these are likely to be
++loop or random coil regions where gaps are more common. The residues that are
++"considered" to be hydrophilic can be entered in HYDROPHILIC RESIDUES.
++
++GAP SEPARATION DISTANCE tries to decrease the chances of gaps being too close
++to each other. Gaps that are less than this distance apart are penalised more
++than other gaps. This does not prevent close gaps; it makes them less frequent,
++promoting a block-like appearance of the alignment.
++
++END GAP SEPARATION treats end gaps just like internal gaps for the purposes of
++avoiding gaps that are too close (set by GAP SEPARATION DISTANCE above). If you
++turn this off, end gaps will be ignored for this purpose. This is useful when
++you wish to align fragments where the end gaps are not biologically meaningful.
++
++
++>>HELP P <<
++                   Profile and Structure Alignments
++   
++By PROFILE ALIGNMENT, we mean alignment using existing alignments. Profile 
++alignments allow you to store alignments of your favourite sequences and add
++new sequences to them in small bunches at a time. A profile is simply an
++alignment of one or more sequences (e.g. an alignment output file from Clustal
++X). Each input can be a single sequence. One or both sets of input sequences
++may include secondary structure assignments or gap penalty masks to guide the
++alignment. 
++
++Make sure PROFILE ALIGNMENT MODE is selected, using the switch directly above
++the sequence display area. Then, use the ALIGNMENT menu to do profile and
++secondary structure alignments.
++
++The profiles can be in any of the allowed input formats with "-" characters
++used to specify gaps (except for GCG/MSF where "." is used).
++
++You have to load the 2 profiles by choosing FILE, LOAD PROFILE 1 and  LOAD
++PROFILE 2. Then ALIGNMENT, ALIGN PROFILE 2 TO PROFILE 1 will align the 2
++profiles to each other. Secondary structure masks in either profile can be used
++to guide the alignment. This option compares all the sequences in profile 1
++with all the sequences in profile 2 in order to build guide trees which will be
++used to calculate sequence weights, and select appropriate alignment parameters
++for the final profile alignment.
++
++You can skip the first stage (pairwise alignments; guide trees) by using old
++guide tree files (ALIGN PROFILES FROM GUIDE TREES). 
++
++The ALIGN SEQUENCES TO PROFILE 1 option will take the sequences in the second
++profile and align them to the first profile, 1 at a time.  This is useful to
++add some new sequences to an existing alignment, or to align a set of sequences
++to a known structure. In this case, the second profile set need not be
++pre-aligned.
++
++You can skip the first stage (pairwise alignments; guide tree) by using an old
++guide tree file (ALIGN SEQUENCES TO PROFILE 1 FROM TREE). 
++
++SAVE LOG FILE will write the alignment calculation scores to a file. The log
++filename is the same as the input sequence filename, with an extension .log
++appended.
++
++The alignment parameters can be set using the ALIGNMENT PARAMETERS menu,
++Pairwise Parameters, Multiple Parameters and Protein Gap Parameters options.
++These are EXACTLY the same parameters as used by the general, automatic
++multiple alignment procedure. The general multiple alignment procedure is
++simply a series of profile alignments. Carrying out a series of profile
++alignments on larger and larger groups of sequences, allows you to manually
++build up a complete alignment, if necessary editing intermediate alignments.
++
++<STRONG>
++SECONDARY STRUCTURE PARAMETERS
++</STRONG>
++
++Use this menu to set secondary structure options. If a solved structure is
++known, it can be used to guide the alignment by raising gap penalties within
++secondary structure elements, so that gaps will preferentially be inserted into
++unstructured surface loop regions. Alternatively, a user-specified gap penalty
++mask can be supplied for a similar purpose.
++
++A gap penalty mask is a series of numbers between 1 and 9, one per position in 
++the alignment. Each number specifies how much the gap opening penalty is to be 
++raised at that position (raised by multiplying the basic gap opening penalty
++by the number) i.e. a mask figure of 1 at a position means no change
++in gap opening penalty; a figure of 4 means that the gap opening penalty is
++four times greater at that position, making gaps 4 times harder to open.
++
++The format for gap penalty masks and secondary structure masks is explained in
++a separate help section.
++
++>>HELP B << 
++            Secondary Structure / Gap Penalty Masks
++
++The use of secondary structure-based penalties has been shown to improve  the
++accuracy of sequence alignment. Clustal X now allows secondary structure/ gap
++penalty masks to be supplied with the input sequences used during profile
++alignment. (NB. The secondary structure information is NOT used during multiple
++sequence alignment). The masks work by raising gap penalties in specified
++regions (typically secondary structure elements) so that gaps are
++preferentially opened in the less well conserved regions (typically surface
++loops).
++
++The USE PROFILE 1(2) SECONDARY STRUCTURE / GAP PENALTY MASK options control
++whether the input 2D-structure information or gap penalty masks will be used
++during the profile alignment.
++
++The OUTPUT options control whether the secondary structure and gap penalty
++masks should be included in the Clustal X output alignments. Showing both is
++useful for understanding how the masks work. The 2D-structure information is
++itself useful in judging the alignment quality and in seeing how residue
++conservation patterns vary with secondary structure. 
++
++The HELIX and STRAND GAP PENALTY options provide the value for raising the gap
++penalty at core Alpha Helical (A) and Beta Strand (B) residues. In CLUSTAL
++format, capital residues denote the A and B core structure notation. Basic gap
++penalties are multiplied by the amount specified.
++
++The LOOP GAP PENALTY option provides the value for the gap penalty in Loops.
++By default this penalty is not raised. In CLUSTAL format, loops are specified
++by "." in the secondary structure notation.
++
++The SECONDARY STRUCTURE TERMINAL PENALTY provides the value for setting the gap
++penalty at the ends of secondary structures. Ends of secondary structures are
++known to grow or shrink, comparing related structures. Therefore by default
++these are given intermediate values, lower than the core penalties. All
++secondary structure read in as lower case in CLUSTAL format gets the reduced
++terminal penalty.
++
++The HELIX and STRAND TERMINAL POSITIONS options specify the range of structure
++termini for the intermediate penalties. In the alignment output, these are
++indicated as lower case. For Alpha Helices, by default, the range spans the 
++end-helical turn (3 residues). For Beta Strands, the default range spans the
++end residue and the adjacent loop residue, since sequence conservation often
++extends beyond the actual H-bonded Beta Strand.
++
++Clustal X can read the masks from SWISS-PROT, CLUSTAL or GDE format input
++files. For many 3-D protein structures, secondary structure information is
++recorded in the feature tables of SWISS-PROT database entries. You should
++always check that the assignments are correct - some are quite inaccurate.
++Clustal X looks for SWISS-PROT HELIX and STRAND assignments e.g.
++
++
++<PRE>
++FT   HELIX       100    115
++FT   STRAND      118    119
++</PRE>
++
++The structure and penalty masks can also be read from CLUSTAL alignment format 
++as comment lines beginning "!SS_" or "!GM_" e.g.
++
++<PRE>
++!SS_HBA_HUMA    ..aaaAAAAAAAAAAaaa.aaaAAAAAAAAAAaaaaaaAaaa.........aaaAAAAAA
++!GM_HBA_HUMA    112224444444444222122244444444442222224222111111111222444444
++HBA_HUMA        VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
++</PRE>
++
++Note that the mask itself is a set of numbers between 1 and 9 each of which is 
++assigned to the residue(s) in the same column below. 
++
++In GDE flat file format, the masks are specified as text and the names must
++begin with "SS_ or "GM_.
++
++Either a structure or penalty mask or both may be used. If both are included
++in an alignment, the user will be asked which is to be used.
++
++
++>>HELP T <<
++                            Phylogenetic Trees
++
++Before calculating a tree, you must have an ALIGNMENT in memory. This can be
++input using the FILE menu, LOAD SEQUENCES option or you should have just
++carried out a full multiple alignment and the alignment is still in memory.
++Remember YOU MUST ALIGN THE SEQUENCES FIRST!!!!
++
++The method used is the NJ (Neighbour Joining) method of Saitou and Nei. First
++you calculate distances (percent divergence) between all pairs of sequence from
++a multiple alignment; second you apply the NJ method to the distance matrix.
++
++To calculate a tree, use the DRAW N-J TREE option. This gives an UNROOTED tree
++and all branch lengths. The root of the tree can only be inferred by using an
++outgroup (a sequence that you are certain branches at the outside of the tree
++.... certain on biological grounds) OR if you assume a degree of constancy in
++the 'molecular clock', you can place the root in the 'middle' of the tree
++(roughly equidistant from all tips).
++
++BOOTSTRAP N-J TREE uses a method for deriving confidence values for the 
++groupings in a tree (first adapted for trees by Joe Felsenstein). It involves
++making N random samples of sites from the alignment (N should be LARGE, e.g.
++500 - 1000); drawing N trees (1 from each sample) and counting how many times
++each grouping from the original tree occurs in the sample trees. You can set N
++using the NUMBER OF BOOTSTRAP TRIALS option in the BOOTSTRAP TREE window. In
++practice, you should use a large number of bootstrap replicates (1000 is
++recommended, even if it means running the program for an hour on a slow 
++computer). You can also supply a seed number for the random number generator
++here. Different runs with the same seed will give the same answer. See the
++documentation for more details.
++
++EXCLUDE POSITIONS WITH GAPS? With this option, any alignment positions where
++ANY of the sequences have a gap will be ignored. This means that 'like' will
++be compared to 'like' in all distances, which is highly desirable. It also
++automatically throws away the most ambiguous parts of the alignment, which are
++concentrated around gaps (usually). The disadvantage is that you may throw away
++much of the data if there are many gaps (which is why it is difficult for us to
++make it the default).  
++
++CORRECT FOR MULTIPLE SUBSTITUTIONS? For small divergence (say <10%) this option
++makes no difference. For greater divergence, this option corrects for the fact
++that observed distances underestimate actual evolutionary distances. This is
++because, as sequences diverge, more than one substitution will happen at many
++sites. However, you only see one difference when you look at the present day
++sequences. Therefore, this option has the effect of stretching branch lengths
++in trees (especially long branches). The corrections used here (for DNA or
++proteins) are both due to Motoo Kimura. See the documentation for details.  
++
++Where possible, this option should be used. However, for VERY divergent
++sequences, the distances cannot be reliably corrected. You will be warned if
++this happens. Even if none of the distances in a data set exceed the reliable
++threshold, if you bootstrap the data, some of the bootstrap distances may
++randomly exceed the safe limit.  
++
++SAVE LOG FILE will write the tree calculation scores to a file. The log
++filename is the same as the input sequence filename, with an extension .log
++appended.
++
++<H4>
++OUTPUT FORMAT OPTIONS
++</H4>
++
++Three different formats are allowed. None of these displays the tree visually.
++You can display the tree using the NJPLOT program distributed with Clustal X
++OR get the PHYLIP package and use the tree drawing facilities there. 
++ 
++1) CLUSTAL FORMAT TREE. This format is verbose and lists all of the distances
++between the sequences and the number of alignment positions used for each. The
++tree is described at the end of the file. It lists the sequences that are
++joined at each alignment step and the branch lengths. After two sequences are
++joined, it is referred to later as a NODE. The number of a NODE is the number
++of the lowest sequence in that NODE.   
++
++2) PHYLIP FORMAT TREE. This format is the New Hampshire format, used by many
++phylogenetic analysis packages. It consists of a series of nested parentheses,
++describing the branching order, with the sequence names and branch lengths. It
++can be read by the NJPLOT program distributed with ClustalX. It can also be
++used by the RETREE, DRAWGRAM and DRAWTREE programs of the PHYLIP package to see
++the trees graphically. This is the same format used during multiple alignment
++for the guide trees. Some other packages that can read and display New
++Hampshire format are TreeTool, TreeView, and Phylowin.
++
++3) PHYLIP DISTANCE MATRIX. This format just outputs a matrix of all the
++pairwise distances in a format that can be used by the PHYLIP package. It used
++to be useful when one could not produce distances from protein sequences in the
++Phylip package but is now redundant (PROTDIST of Phylip 3.5 now does this).
++
++4) NEXUS FORMAT TREE. This format is used by several popular phylogeny programs,
++including PAUP and MacClade. The format is described fully in:
++Maddison, D. R., D. L. Swofford and W. P. Maddison.  1997.
++NEXUS: an extensible file format for systematic information.
++Systematic Biology 46:590-621.
++
++BOOTSTRAP LABELS ON: By default, the bootstrap values are correctly placed on
++the tree branches of the phylip format output tree. The toggle allows them to
++be placed on the nodes, which is incorrect, but some display packages (e.g.
++TreeTool, TreeView and Phylowin) only support node labelling but not branch
++labelling. Care should be taken to note which branches and labels go together. 
++
++
++>>HELP C <<
++                               Colors
++
++Clustal X provides a versatile coloring scheme for the sequence alignment 
++display. The sequences (or profiles) are colored automatically, when they are
++loaded. Sequences can be colored either by assigning a color to specific
++residues, or on the basis of an alignment consensus. In the latter case, the
++alignment consensus is calculated automatically, and the residues in each
++column are colored according to the consensus character assigned to that
++column. In this way, you can choose to highlight, for example, conserved
++hydrophylic or hydrophobic positions in the alignment.
++
++The 'rules' used to color the alignment are specified in a COLOR PARAMETER
++FILE. Clustal X automatically looks for a file called 'colprot.par' for protein
++sequences or 'coldna.par' for DNA, in the current directory. (If your running
++under UNIX, it then looks in your home directory, and finally in the
++directories in your PATH environment variable).
++
++By default, if no color parameter file is found, protein sequences are colored
++by residue as follows:
++
++<PRE>
++	Color			Residue Code
++
++	ORANGE			GPST
++	RED			HKR
++	BLUE			FWY
++	GREEN			ILMV
++</PRE>
++
++In the case of DNA sequences, the default colors are as follows:
++
++<PRE>
++	Color			Residue Code
++
++	ORANGE			A
++	RED			C
++	BLUE			T
++	GREEN			G
++</PRE>
++
++
++The default BACKGROUND COLORING option shows the sequence residues using a
++black character on a colored background. It can be switched off to show
++residues as a colored character on a white background. 
++
++Either BLACK AND WHITE or DEFAULT COLOR options can be selected. The Color
++option looks first for the color parameter file (as described above) and, if no
++file is found, uses the default residue-specific colors.
++
++You can specify your own coloring scheme by using the LOAD COLOR PARAMETER FILE
++option. The format of the color parameter file is described below.
++
++<H4>
++COLOR PARAMETER FILE
++</H4>
++
++This file is divided into 3 sections:
++
++1) the names and rgb values of the colors
++2) the rules for calculating the consensus
++3) the rules for assigning colors to the residues
++ 
++An example file is given here.
++
++<PRE>
++ --------------------------------------------------------------------
++ at rgbindex
++RED          0.9 0.1 0.1
++BLUE         0.1 0.1 0.9
++GREEN        0.1 0.9 0.1
++YELLOW       0.9 0.9 0.0
++
++ at consensus
++% = 60% w:l:v:i:m:a:f:c:y:h:p
++# = 80% w:l:v:i:m:a:f:c:y:h:p
++- = 50% e:d
+++ = 60% k:r
++q = 50% q:e
++p = 50% p
++n = 50% n
++t = 50% t:s
++
++ at color
++g = RED
++p = YELLOW
++t = GREEN if t:%:#
++n = GREEN if n
++w = BLUE if %:#:p
++k = RED if +
++ --------------------------------------------------------------------
++</PRE>
++
++The first section is optional and is identified by the header @rgbindex. If
++this section exists, each color used in the file must be named and the rgb
++values specified (on a scale from 0 to 1). If the rgb index section is not
++found, the following set of hard-coded colors will be used.
++
++<PRE>
++RED          0.9 0.1 0.1
++BLUE         0.1 0.1 0.9
++GREEN        0.1 0.9 0.1
++ORANGE       0.9 0.7 0.3
++CYAN         0.1 0.9 0.9
++PINK         0.9 0.5 0.5
++MAGENTA      0.9 0.1 0.9
++YELLOW       0.9 0.9 0.0
++</PRE>
++
++The second section is optional and is identified by the header @consensus. It
++defines how the consensus is calculated.
++ 
++The format of each consensus parameter is:-
++ 
++<PRE>
++c = n% residue_list
++ 
++        where
++              c             is a character used to identify the parameter.
++              n             is an integer value used as the percentage cutoff
++                            point.
++              residue_list  is a list of residues denoted by a single
++                            character, delimited by a colon (:).
++</PRE>
++ 
++For example:   # = 60% w:l:v:i
++
++will assign a consensus character # to any column in the alignment which
++contains more than 60% of the residues w,l,v and i.
++        
++ 
++The third section is identified by the header @color, and defines how colors
++are assigned to each residue in the alignment.
++ 
++The color parameters can take one of two formats:
++
++<PRE>
++1) r = color
++2) r = color if consensus_list
++ 
++        where
++              r             is a character used to denote a residue.
++              color         is one of the colors in the GDE color lookup table.
++              residue_list  is a list of residues denoted by a single
++                            character, delimited by a colon (:).
++</PRE>
++ 
++Examples:
++1) g = ORANGE
++
++will color all glycines ORANGE, regardless of the consensus.
++
++2) w = BLUE if w:%:#
++
++will color BLUE any tryptophan which is found in a column with a consensus of
++w, % or #.
++ 
++
++>>HELP Q <<
++                       Alignment Quality Analysis
++
++<H3>
++QUALITY SCORES
++</H3>
++--------------
++
++Clustal X provides an indication of the quality of an alignment by plotting
++a 'conservation score' for each column of the alignment. A high score indicates
++a well-conserved column; a low score indicates low conservation. The quality
++curve is drawn below the alignment.
++
++Two methods are also provided to indicate single residues or sequence segments
++which score badly in the alignment.
++ 
++Low-scoring residues are expected to occur at a moderate frequency in all the
++sequences because of their steady divergence due to the natural processes of
++evolution. The most divergent sequences are likely to have the most outliers.
++However, the highlighted residues are especially useful in pointing to
++sequence misalignments. Note that clustering of highlighted residues is a
++strong indication of misalignment. This can arise due to various reasons, for
++example:
++ 
++        1. Partial or total misalignments caused by a failure in the
++        alignment algorithm. Usually only in difficult alignment cases.
++ 
++        2. Partial or total misalignments because at least one of the
++        sequences in the given set is partly or completely unrelated to the
++        other sequences. It is up to the user to check that the set of
++        sequences are alignable.
++
++        3. Frameshift translation errors in a protein sequence causing local
++        mismatched regions to be heavily highlighted. These are surprisingly
++        common in database entries. If suspected, a 3-frame translation of
++        the source DNA needs to be examined.
++ 
++Occasionally, highlighted residues may point to regions of some biological
++significance. This might happen for example if a protein alignment contains a
++sequence which has acquired new functions relative to the main sequence set. It
++is important to exclude other explanations, such as error or the natural
++divergence of sequences, before invoking a biological explanation.
++
++
++<H3>
++LOW-SCORING SEGMENTS
++</H3>
++--------------------
++
++Unreliable regions in the alignment can be highlighted using the Low-Scoring
++Segments option. A sequence-weighted profile is used to indicate any segments
++in the sequences which score badly. Because the profile calculation may take
++some time, an option is provided to calculate LOW-SCORING SEGMENTS. The 
++segment display can then be toggled on or off without having to repeat the
++time-consuming calculations.
++
++For details of the low-scoring segment calculation, see the CALCULATION section
++below.
++
++
++<H4>
++LOW-SCORING SEGMENT PARAMETERS
++</H4>
++------------------------------
++
++MINIMUM LENGTH OF SEGMENTS: short segments (or even single residues) can be
++hidden by increasing the minimum length of segments which will be displayed.
++
++DNA MARKING SCALE is used to remove less significant segments from the 
++highlighted display. Increase the scale to display more segments; decrease the
++scale to remove the least significant.
++
++
++PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of each
++amino acid to each other. The matrix is used to calculate the sequence-
++weighted profile scores. There are four 'in-built' Log-Odds matrices offered:
++the Gonnet PAM 80, 120, 250, 350 matrices. A more stringent matrix which only
++gives a high score to identities and the most favoured conservative
++substitutions, may be more suitable when the sequences are closely related. For
++more divergent sequences, it is appropriate to use "softer" matrices which give
++a high score to many other frequent substitutions. This  option automatically
++recalculates the low-scoring segments.
++
++
++DNA WEIGHT MATRIX: Two hard-coded matrices are available:
++
++1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
++of nucleic acid sequences. X's and N's are treated as matches to any IUB
++ambiguity symbol. All matches score 1.0; all mismatches for IUB symbols score
++0.9.
++
++2) CLUSTALW(1.6). The previous system used by ClustalW, in which matches score
++1.0 and mismatches score 0. All matches for IUB symbols also score 0. 
++
++A new matrix can be read from a file on disk, if the filename consists only
++of lower case characters. The values in the new weight matrix should be
++similarities and should be NEGATIVE for infrequent substitutions.
++ 
++INPUT FORMAT. The format used for a new matrix is the same as the BLAST
++program. Any lines beginning with a # character are assumed to be comments. The
++first non-comment line should contain a list of amino acids in any order, using
++the 1 letter code, followed by a * character. This should be followed by a
++square matrix of scores, with one row and one column for each amino acid. The
++last row and column of the matrix (corresponding to the * character) contain
++the minimum score over the whole matrix.
++
++<H4>
++QUALITY SCORE PARAMETERS
++</H4>
++------------------------
++
++You can customise the column 'quality scores' plotted underneath the alignment
++display using the following options.
++
++SCORE PLOT SCALE: this is a scalar value from 1 to 10, which can be used to
++change the scale of the quality score plot. 
++
++RESIDUE EXCEPTION CUTOFF: this is a scalar value from 1 to 10, which can be
++used to change the number of residue exceptions which are highlighted in the
++alignment display. (For an explanation of this cutoff, see the CALCULATION OF
++RESIDUE EXCEPTIONS section below.)
++
++PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of 
++each amino acid to each other. 
++ 
++DNA WEIGHT MATRIX: two hard-coded matrices are available: IUB and CLUSTALW(1.6).
++
++For more information about the weight matrices, see the help above for
++the Low-scoring Segments Weight Matrix.
++
++For details of the quality score calculations, see the CALCULATION section
++below.
++
++
++<STRONG>
++SHOW LOW-SCORING SEGMENTS
++</STRONG>
++                       
++The low-scoring segment display can be toggled on or off. This option does not
++recalculate the profile scores.
++
++
++<STRONG>
++SHOW EXCEPTIONAL RESIDUES
++</STRONG>
++                       
++This option highlights individual residues which score badly in the alignment
++quality calculations. Residues which score exceptionally low are highlighted by
++using a white character on a grey background.
++
++<STRONG>
++SAVE QUALITY SCORES TO FILE
++</STRONG>
++
++The quality scores that are plotted underneath the alignment display can also
++be saved in a text file. Each column in the alignment is written on one line in
++the output file, with the value of the quality score at the end of the line.
++Only the sequences currently selected in the display are written to the file.
++One use for quality scores is to color residues in a protein structure by
++sequence conservation. In this way conserved surface residues can be
++highlighted to locate functional regions such as ligand-binding sites.
++
++
++<H3>
++CALCULATION OF QUALITY SCORES
++</H3>
++-----------------------------
++
++Suppose we have an alignment of m sequences of length n. Then, the alignment
++can be written as:
++
++<PRE>
++        A11 A12 A13 .......... A1n
++        A21 A22 A23 .......... A2n
++        .
++        .
++        Am1 Am2 Am3 .......... Amn
++</PRE>
++
++We also have a residue comparison matrix of size R where C(i,j) is the score
++for aligning residue i with residue j.
++
++We want to calculate a score for the conservation of the jth position in the
++alignment.
++
++To do this, we define an R-dimensional sequence space. For the jth position in 
++the alignment, each sequence consists of a single residue which is assigned a
++point S in the space. S has R dimensions, and for sequence i, the rth dimension
++is defined as:
++
++<PRE>
++	Sr =    C(r,Aij)
++</PRE>
++
++We then calculate a consensus value for the jth position in the alignment. This
++value X also has R dimensions, and the rth dimension is defined as:
++
++<PRE>
++	Xr = (   SUM   (Fij * C(i,r)) ) / m
++               1<=i<=R
++</PRE>
++
++where Fij is the count of residues i at position j in the alignment.
++
++Now we can calculate the distance Di between each sequence i and the consensus 
++position X in the R-dimensional space.
++
++<PRE>
++	Di = SQRT   (   SUM   (Xr - Sr)(Xr - Sr) )
++                      1<=i<=R
++
++</PRE>
++
++The quality score for the jth position in the alignment is defined as the mean
++of the sequence distances Di.
++
++The score is normalised by multiplying by the percentage of sequences which
++have residues (and not gaps) at this position.
++
++<H3>
++CALCULATION OF RESIDUE EXCEPTIONS
++</H3>
++---------------------------------
++
++The jth residue of the ith sequence is considered as an exception if the
++distance Di of the sequence from the consensus value P is greater than (Upper
++Quartile + Inter Quartile Range * Cutoff). The value used as a cutoff for
++displaying exceptions can be set from the SCORE PARAMETERS menu. A high cutoff
++value will only display very significant exceptions; a low value will allow
++more, less significant, exceptions to be highlighted.
++
++(NB. Sequences which contain gaps at this position are not included in the
++exception calculation.)
++
++
++<H3>
++CALCULATION OF LOW-SCORING SEGMENTS
++</H3>
++-----------------------------------
++
++Suppose we have an alignment of m sequences of length n. Then, the alignment
++can be written as:
++
++<PRE>
++        A11 A12 A13 .......... A1n
++        A21 A22 A23 .......... A2n
++        .
++        .
++        Am1 Am2 Am3 .......... Amn
++</PRE>
++
++We also have a residue comparison matrix of size R where C(i,j) is the score
++for aligning residue i with residue j.
++
++We calculate sequence weights by building a neighbour-joining tree, in which
++branch lengths are proportional to divergence. Summing the branches by branch
++ownership provides the weights. See (Thompson et al., CABIOS, 10, 19 (1994) and
++Henikoff et al.,JMB, 243, 574 1994).
++
++To find the low-scoring segments in a sequence Si, we build a weighted profile
++of the remaining sequences in the alignment. Suppose we find residue r at 
++position j in the sequence; then the score for the jth position in the sequence
++is defined as
++
++<PRE>
++	Score(Si,j) = Profile(j,r)   where Profile(j,r) is the profile score
++                                       for residue r at position j in the
++                                       alignment.
++</PRE>
++
++These residue scores are summed along the sequence in both forward and backward
++directions. If the sum of the scores is positive, then it is reset to zero.
++Segments which score negatively in both directions are considered as 
++'low-scoring' and will be highlighted in the alignment display.
++
++
++>>HELP 9 <<
++              Command Line Parameters
++
++                DATA (sequences)
++
++-INFILE=file.ext                             :input sequences
++-PROFILE1=file.ext  and  -PROFILE2=file.ext  :profiles (aligned sequences)
++
++
++                VERBS (do things)
++
++-OPTIONS	    :list the command line parameters
++-HELP  or -CHECK    :outline the command line parameters
++-ALIGN              :do full multiple alignment 
++-TREE               :calculate NJ tree
++-BOOTSTRAP(=n)      :bootstrap a NJ tree (n= number of bootstraps; def. = 1000)
++-CONVERT            :output the input sequences in a different file format
++
++
++                PARAMETERS (set things)
++
++***General settings:****
++-INTERACTIVE :read command line, then enter normal interactive menus
++-QUICKTREE   :use FAST algorithm for the alignment guide tree
++-TYPE=       :PROTEIN or DNA sequences
++-NEGATIVE    :protein alignment with negative values in matrix
++-OUTFILE=    :sequence alignment file name
++-OUTPUT=     :GCG, GDE, PHYLIP, PIR or NEXUS
++-OUTORDER=   :INPUT or ALIGNED
++-CASE=       :LOWER or UPPER (for GDE output only)
++-SEQNOS=     :OFF or ON (for Clustal output only)
++
++
++***Fast Pairwise Alignments:***
++-KTUPLE=n      :word size
++-TOPDIAGS=n  :number of best diags.
++-WINDOW=n    :window around best diags.
++-PAIRGAP=n   :gap penalty
++-SCORE=      :PERCENT or ABSOLUTE
++
++
++***Slow Pairwise Alignments:***
++-PWMATRIX=    :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
++-PWDNAMATRIX= :DNA weight matrix=IUB, CLUSTALW or filename
++-PWGAPOPEN=f  :gap opening penalty
++-PWGAPEXT=f  :gap opening penalty
++ 
++
++***Multiple Alignments:***
++-NEWTREE=    :file for new guide tree
++-USETREE=    :file for old guide tree
++-MATRIX=     :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
++-DNAMATRIX=  :DNA weight matrix=IUB, CLUSTALW or filename
++-GAPOPEN=f   :gap opening penalty
++-GAPEXT=f  :gap extension penalty
++-ENDGAPS     :no end gap separation pen.
++-GAPDIST=n   :gap separation pen. range
++-NOPGAP      :residue-specific gaps off
++-NOHGAP    :hydrophilic gaps off
++-HGAPRESIDUES= :list hydrophilic res.
++-MAXDIV=n    :% ident. for delay
++-TYPE=       :PROTEIN or DNA
++-TRANSWEIGHT=f :transitions weighting
++
++
++***Profile Alignments:***
++-PROFILE      :Merge two alignments by profile alignment
++-NEWTREE1=    :file for new guide tree for profile1
++-NEWTREE2=    :file for new guide tree for profile2
++-USETREE1=    :file for old guide tree for profile1
++-USETREE2=    :file for old guide tree for profile2
++
++
++***Sequence to Profile Alignments:***
++-SEQUENCES   :Sequentially add profile2 sequences to profile1 alignment
++-NEWTREE=    :file for new guide tree
++-USETREE=    :file for old guide tree
++
++
++***Structure Alignments:***
++-NOSECSTR1     :do not use secondary structure/gap penalty mask for profile 1 
++-NOSECSTR2     :do not use secondary structure/gap penalty mask for profile 2
++-SECSTROUT=STRUCTURE or MASK or BOTH or NONE  :output in alignment file
++-HELIXGAP=n    :gap penalty for helix core residues 
++-STRANDGAP=n   :gap penalty for strand core residues
++-LOOPGAP=n     :gap penalty for loop regions
++-TERMINALGAP=n :gap penalty for structure termini
++-HELIXENDIN=n  :number of residues inside helix to be treated as terminal
++-HELIXENDOUT=n :number of residues outside helix to be treated as terminal
++-STRANDENDIN=n :number of residues inside strand to be treated as terminal
++-STRANDENDOUT=n:number of residues outside strand to be treated as terminal 
++
++
++***Trees:***
++-OUTPUTTREE=nj OR phylip OR dist OR nexus
++-SEED=n    :seed number for bootstraps
++-KIMURA      :use Kimura's correction
++-TOSSGAPS  :ignore positions with gaps
++-BOOTLABELS=node OR branch :position of bootstrap values in tree display
++
++
++>>HELP R <<
++                             References
++
++<STRONG>
++The ClustalX program is described in the manuscript:
++</STRONG>
++
++Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
++The ClustalX windows interface: flexible strategies for multiple sequence 
++alignment aided by quality analysis tools. Nucleic Acids Research, 24:4876-4882.
++
++
++<STRONG>
++The ClustalW program is described in the manuscript:
++</STRONG>
++
++Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
++sensitivity of progressive multiple sequence alignment through sequence
++weighting, positions-specific gap penalties and weight matrix choice.  Nucleic
++Acids Research, 22:4673-4680.
++
++
++<STRONG>
++The ClustalV program is described in the manuscript:
++</STRONG>
++
++Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for
++multiple sequence alignment. CABIOS 8,189-191.
++
++
++<STRONG>
++The original Clustal program is described in the manuscripts:
++</STRONG>
++
++Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence
++alignments on a microcomputer.
++CABIOS 5,151-153.
++
++Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple
++sequence alignment on a microcomputer. Gene 73,237-244.
++
++-------------------------------------------------------------------------------
++<STRONG>
++Some tips on using Clustal X:
++</STRONG>
++
++Jeanmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998)
++Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.
++
++<STRONG>
++Some tips on using Clustal W:
++</STRONG>
++
++Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for
++multiple sequence alignments. Methods Enzymol., 266, 383-402.
++
++-------------------------------------------------------------------------------
++<STRONG>
++You can get the latest version of the ClustalX program by anonymous ftp to:
++</STRONG>
++
++ftp-igbmc.u-strasbg.fr
++ftp.embl-heidelberg.de
++ftp.ebi.ac.uk
++
++<STRONG>
++Or, have a look at the following WWW site:
++</STRONG>
++
++http://www-igbmc.u-strasbg.fr/BioInfo/
++
++
+ This is the on-line help file for Clustal X (version 1.83), using the NCBI
+ Vibrant Toolkit.   
+ 

Added: trunk/packages/clustalx/trunk/debian/patches/interface.c.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/interface.c.patch	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/patches/interface.c.patch	2008-02-21 13:59:37 UTC (rev 1458)
@@ -0,0 +1,226 @@
+Index: clustalw-1.83/interface.c
+===================================================================
+--- clustalw-1.83.orig/interface.c
++++ clustalw-1.83/interface.c
+@@ -1223,8 +1223,7 @@
+ 			while(fgets(temp,MAXLINE+1,help_file)) {
+ 				if(strstr(temp, help_marker)){
+ 				  	if(usemenu) {
+-						fprintf(stdout,"\n");
+-				    		getstr("Press [RETURN] to continue",lin2);
++				    		getstr("\nPress [RETURN] to continue",MAXLINE+1,lin2);
+ 				  	}
+ 					fclose(help_file);
+ 					return;
+@@ -1235,8 +1234,7 @@
+ 				}
+ 			       if(usemenu) {
+ 			          if(nlines >= PAGE_LEN) {
+-				     	   fprintf(stdout,"\n");
+-			 	  	   getstr("Press [RETURN] to continue or  X  to stop",lin2);
++			 	  	   getstr("\nPress [RETURN] to continue or  X  to stop",MAXLINE+1,lin2);
+ 				  	   if(toupper(*lin2) == 'X') {
+ 						   fclose(help_file);
+ 						   return;
+@@ -1247,8 +1245,7 @@
+ 			       }
+ 			}
+ 			if(usemenu) {
+-				fprintf(stdout,"\n");
+-				getstr("Press [RETURN] to continue",lin2);
++				getstr("\nPress [RETURN] to continue",MAXLINE+1,lin2);
+ 			}
+ 			fclose(help_file);
+ 		}
+@@ -1286,8 +1283,7 @@
+                 fputs(temp,stdout);
+                 ++nlines;
+                 if(nlines >= PAGE_LEN) {
+-                        fprintf(stdout,"\n");
+-                        getstr("Press [RETURN] to continue or  X  to stop",lin2);
++                        getstr("\nPress [RETURN] to continue or  X  to stop",MAXLINE+1,lin2);
+                         if(toupper(*lin2) == 'X') {
+                                 fclose(file);
+                                 return;
+@@ -1297,8 +1293,7 @@
+                 }
+         }
+         fclose(file);
+-        fprintf(stdout,"\n");
+-        getstr("Press [RETURN] to continue",lin2);
++        getstr("\nPress [RETURN] to continue",MAXLINE+1,lin2);
+ }
+ 
+ 
+@@ -1747,7 +1742,7 @@
+         FILE *infile;
+ 
+         if(usemenu)
+-                getstr("Enter name of the matrix file",lin2);
++                getstr("Enter name of the matrix file",MAXLINE+1,lin2);
+         else
+                 strcpy(lin2,str);
+ 
+@@ -1773,7 +1768,7 @@
+         FILE *infile;
+ 
+         if(usemenu)
+-                getstr("Enter name of the matrix file",lin2);
++                getstr("Enter name of the matrix file",MAXLINE+1,lin2);
+         else
+                 strcpy(lin2,str);
+ 
+@@ -2163,6 +2158,7 @@
+  
+ {	static char temp[FILENAMELEN+1];
+ 	static char local_prompt[MAXLINE];
++	static char local_prompt_tmp[MAXLINE+FILENAMELEN+1];
+ 	FILE * file_handle;
+ 
+ /*	if (*file_name == EOS) {
+@@ -2174,17 +2170,17 @@
+ 		warning("Output file name is the same as input file.");
+ 		if (usemenu) {
+ 			strcpy(local_prompt,"\n\nEnter new name to avoid overwriting ");
+-			strcat(local_prompt," [%s]: ");          
+-			fprintf(stdout,local_prompt,file_name);
+-			gets(temp);
++			strcat(local_prompt," [%s]");          
++			sprintf(local_prompt_tmp,local_prompt,file_name);
++			getstr(local_prompt_tmp,FILENAMELEN+1,temp);
+ 			if(*temp != EOS) strcpy(file_name,temp);
+ 		}
+ 	}
+ 	else if (usemenu) {
+ 		strcpy(local_prompt,prompt);
+-		strcat(local_prompt," [%s]: ");          
+-		fprintf(stdout,local_prompt,file_name);
+-		gets(temp);
++		strcat(local_prompt," [%s]");          
++		sprintf(local_prompt_tmp,local_prompt,file_name);
++		getstr(local_prompt_tmp,FILENAMELEN+1,temp);
+ 		if(*temp != EOS) strcpy(file_name,temp);
+ 	}
+ 
+@@ -2260,7 +2256,7 @@
+         	}
+         	else {
+                  	if((tree = open_output_file(
+-                	"\nEnter name for new GUIDE TREE           file  ",path,
++                	"\nEnter name for new GUIDE TREE file ",path,
+                 	phylip_name,"dnd")) == NULL) return;
+         	}
+ 	}
+@@ -2327,6 +2323,7 @@
+ { 
+ 	char path[FILENAMELEN+1];
+ 	char tree_name[FILENAMELEN+1],temp[MAXLINE+1];
++	char tmp_msg[MAXLINE+1+300];
+ 	Boolean use_tree;
+ 	FILE *tree;
+ 	sint i,j,count;
+@@ -2383,9 +2380,9 @@
+         	if((tree=fopen(tree_name,"r"))!=NULL) {
+ #endif
+ 		if (usemenu)
+-            	fprintf(stdout,"\nUse the existing GUIDE TREE file,  %s  (y/n) ? [y]: ",
++            	sprintf(tmp_msg,"\nUse the existing GUIDE TREE file,  %s  (y/n) ? [y]",
+                                            tree_name);
+-                gets(temp);
++                getstr(tmp_msg,MAXLINE+1,temp);
+                 if(*temp != 'n' && *temp != 'N') {
+                     strcpy(phylip_name,tree_name);
+                     use_tree = TRUE;
+@@ -2584,6 +2581,7 @@
+ void get_tree(char *phylip_name)
+ {
+ 	char path[FILENAMELEN+1],temp[MAXLINE+1];
++	char tmp_msg[FILENAMELEN+300];
+ 	sint count;
+ 	
+ 	if(empty) {
+@@ -2615,9 +2613,9 @@
+        			strcpy(phylip_name,path);
+        			strcat(phylip_name,"dnd");
+ 
+-            fprintf(stdout,"\nEnter a name for the guide tree file [%s]: ",
++			sprintf(tmp_msg,"\nEnter a name for the guide tree file [%s]",
+                                            phylip_name);
+-                	gets(temp);
++                	getstr(tmp_msg,MAXLINE+1,temp);
+                 	if(*temp != EOS)
+                         	strcpy(phylip_name,temp);
+         	}
+@@ -2685,6 +2683,8 @@
+ 	char path[FILENAMELEN+1];
+ 	char tree_name[FILENAMELEN+1];
+ 	char temp[MAXLINE+1];
++	char tmp_msg[FILENAMELEN+300];
++
+ 	Boolean use_tree1,use_tree2;
+ 	FILE *tree;
+ 	sint count,i,j,dscore;
+@@ -2717,9 +2717,9 @@
+ #else
+         	if((tree=fopen(tree_name,"r"))!=NULL) {
+ #endif
+-            	fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 1,  %s  (y/n) ? [y]: ",
++            	sprintf(tmp_msg,"\nUse the existing GUIDE TREE file for Profile 1,  %s  (y/n) ? [y]",
+                                            tree_name);
+-                gets(temp);
++                getstr(tmp_msg,MAXLINE+1,temp);
+                 if(*temp != 'n' && *temp != 'N') {
+                     strcpy(p1_tree_name,tree_name);
+                     use_tree1 = TRUE;
+@@ -2739,19 +2739,20 @@
+ 			strcpy(tree_name,path);
+ 			strcat(tree_name,"dnd");
+ #ifdef VMS
+-        	if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL) {
++			if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL)
+ #else
+-        	if((tree=fopen(tree_name,"r"))!=NULL) {
++			if((tree=fopen(tree_name,"r"))!=NULL)
+ #endif
+-            	fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 2,  %s  (y/n) ? [y]: ",
+-                                           tree_name);
+-                gets(temp);
+-                if(*temp != 'n' && *temp != 'N') {
+-                    strcpy(p2_tree_name,tree_name);
+-                    use_tree2 = TRUE;
+-                }
+-                fclose(tree);
+-        	}
++			{
++				sprintf(tmp_msg,"\nUse the existing GUIDE TREE file for Profile 2,  %s  (y/n) ? [y]",
++						   tree_name);
++				getstr(tmp_msg,MAXLINE+1,temp);
++				if(*temp != 'n' && *temp != 'N') {
++				    strcpy(p2_tree_name,tree_name);
++				    use_tree2 = TRUE;
++				}
++				fclose(tree);
++			}
+ 		}
+ 		else if (!usemenu && use_tree2_file) {
+ 			use_tree2 = TRUE;
+@@ -4194,6 +4195,7 @@
+ {
+   char parname[FILENAMELEN+1], temp[FILENAMELEN+1];
+   char path[FILENAMELEN+1];
++  char tmp_msg[FILENAMELEN+300];
+   FILE *parout;
+   
+   get_path(seqname,path);
+@@ -4201,9 +4203,9 @@
+   strcat(parname,"par");
+   
+   if(usemenu) {
+-    fprintf(stdout,"\nEnter a name for the parameter output file [%s]: ",
++    sprintf(tmp_msg,"\nEnter a name for the parameter output file [%s]",
+ 	    parname);
+-    gets(temp);
++    getstr(tmp_msg,FILENAMELEN+1,temp);
+     if(*temp != EOS)
+       strcpy(parname,temp);
+   }

Added: trunk/packages/clustalx/trunk/debian/patches/makefile.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/makefile.patch	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/patches/makefile.patch	2008-02-21 13:59:37 UTC (rev 1458)
@@ -0,0 +1,101 @@
+Index: clustalw-1.83/makefile
+===================================================================
+--- clustalw-1.83.orig/makefile
++++ clustalw-1.83/makefile
+@@ -1,7 +1,15 @@
+-install: clustalx clustalw
+ 
+-clean:
+-	rm *.o
++RM=/bin/rm -f
++
++BINDIR=$(DESTDIR)/usr/bin
++XBINDIR=$(DESTDIR)/usr/X11R6/bin
++DOCDIR=$(DESTDIR)/usr/share/doc/clustalw
++XDOCDIR=$(DESTDIR)/usr/share/doc/clustalx
++LIBDIR=$(DESTDIR)/usr/share/clustalw
++MANDIR=$(DESTDIR)/usr/share/man/man1
++XMANDIR=$(DESTDIR)/usr/X11R6/man/man1
++DOCS=clustalv.doc clustalw.doc clustalw.ms README_W
++XDOCS=README_X clustalx.html
+ 
+ OBJECTS = interface.o sequence.o showpair.o malign.o \
+   	util.o trees.o gcgcheck.o prfalign.o pairalign.o \
+@@ -12,25 +20,36 @@
+ 
+ HEADERS = general.h clustalw.h
+ 
+-CC	= cc
+-CFLAGS  = -c -O
++CC	= gcc
++CFLAGS  = -c -O2
++
++MACHINE=$(shell uname -m)
++ifeq ("$(MACHINE)","alpha")
++ # -mieee is for the Alpha only: ClustalW divides by zero (yes, I know it's bad)
++ # and expect the processor to goes on. -mieee tells the Alpha to comply with
++ # the IEEE standard and to shut up about divisions by zero.
++ CFLAGS  +=  -mieee
++endif
++
+ LFLAGS	= -O -lm 
+-NCBI_INC  = /dec/biolo/ncbi/include
+-NCBI_LIB	= /dec/biolo/ncbi/lib
+-CXFLAGS  = -DWIN_MOTIF -I$(NCBI_INC)
+-LXFLAGS	= -L$(NCBI_LIB) -lvibrant -lncbi -lpthread -lXm -lXmu -lXt -lX11 -lm 
++NCBI_INC= /usr/include/ncbi
++NCBI_LIB= /usr/lib
++CXFLAGS	= -DWIN_MOTIF -I$(NCBI_INC)
++LXFLAGS	= -L/usr/X11R6/lib -lvibrant -lncbi -lpthread -lXm -lXmu -lXt -lX11 -lm
+ 
+-clustalw : $(OBJECTS) amenu.o clustalw.o
+-	$(CC) -o $@ $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
++all: clustalx clustalw
+ 
+-interface.o : interface.c $(HEADERS) param.h
+-	$(CC) $(CFLAGS) $*.c
++machine:
++	echo $(MACHINE)
+ 
+-amenu.o : amenu.c $(HEADERS) param.h
+-	$(CC) $(CFLAGS) $*.c
++clustalw : $(OBJECTS) $(XOBJECTS) amenu.o clustalw.o
++	$(CC) -o $@ -I$(NCBI_INC) $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
+ 
+ clustalx : $(OBJECTS) $(XOBJECTS) clustalx.o
+-	$(CC) -o $@ $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS)
++	$(CC) -o $@ -I$(NCBI_INC) $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS)
++
++clustalw.o : clustalw.c $(HEADERS)
++	$(CC) $(CFLAGS) $*.c
+ 
+ clustalx.o : clustalx.c $(HEADERS)
+ 	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+@@ -56,6 +75,25 @@
+ trees.o : trees.c $(HEADERS) dayhoff.h
+ 	$(CC) $(CFLAGS) $*.c
+ 
+-.c.o :
+-	$(CC) $(CFLAGS) $?
++
++
++install: all
++	install -d $(BINDIR) $(XBINDIR) $(LIBDIR) $(DOCDIR)/examples $(MANDIR) $(XMANDIR) $(XDOCDIR)
++	install -m 0755 clustalw $(BINDIR)
++	install -m 0755 clustalx $(XBINDIR)
++	install -m 0644 clustalw_help clustalx_help $(LIBDIR)
++	install -m 0644 clustalw.1 $(MANDIR)
++	install -m 0644 clustalx.1 $(MANDIR)
++	install -m 0644 $(DOCS) $(DOCDIR)
++	install -m 0644 $(XDOCS) $(XDOCDIR)
++	cp -a -R tests.clustalw $(DOCDIR)/examples/tests
++
++.PHONY:	clean distclean
++
++clean:
++	$(RM) *.o
++
++distclean: clean
++	$(RM) clustalw clustalx
++	cd tests.clustalw; make clean
+ 

Added: trunk/packages/clustalx/trunk/debian/patches/sequence.c.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/sequence.c.patch	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/patches/sequence.c.patch	2008-02-21 13:59:37 UTC (rev 1458)
@@ -0,0 +1,13 @@
+Index: clustalw-1.83/sequence.c
+===================================================================
+--- clustalw-1.83.orig/sequence.c
++++ clustalw-1.83/sequence.c
+@@ -924,7 +924,7 @@
+ 	static Boolean dnaflag1;
+ 	
+ 	if(usemenu)
+-		getstr("Enter the name of the sequence file",line);
++		getstr("Enter the name of the sequence file",FILENAMELEN+1,line);
+ 	else
+ 		strcpy(line,seqname);
+ 	if(*line == EOS) return -1;

Added: trunk/packages/clustalx/trunk/debian/patches/series
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/series	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/patches/series	2008-02-21 13:59:37 UTC (rev 1458)
@@ -0,0 +1,11 @@
+amenu.c.patch
+clustal-help.patch
+clustalw.h.patch
+clustalx.html.patch
+interface.c.patch
+sequence.c.patch
+trees.c.patch
+util.c.patch
+makefile.patch
+clustalx_help.patch
+xmenu.c.patch

Added: trunk/packages/clustalx/trunk/debian/patches/trees.c.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/trees.c.patch	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/patches/trees.c.patch	2008-02-21 13:59:37 UTC (rev 1458)
@@ -0,0 +1,13 @@
+Index: clustalw-1.83/trees.c
+===================================================================
+--- clustalw-1.83.orig/trees.c
++++ clustalw-1.83/trees.c
+@@ -1497,7 +1497,7 @@
+ 		fprintf(stdout,"\n           or 3) use the PHYLIP package.");
+ 		fprintf(stdout,"\n\n");
+ 		if (usemenu) 
+-			getstr("Press [RETURN] to continue",dummy);
++			getstr("Press [RETURN] to continue",10,dummy);
+ 	}
+ 
+ 

Added: trunk/packages/clustalx/trunk/debian/patches/util.c.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/util.c.patch	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/patches/util.c.patch	2008-02-21 13:59:37 UTC (rev 1458)
@@ -0,0 +1,52 @@
+Index: clustalw-1.83/util.c
+===================================================================
+--- clustalw-1.83.orig/util.c
++++ clustalw-1.83/util.c
+@@ -171,10 +171,18 @@
+ 	return str;
+ }
+ 
+-void getstr(char *instr,char *outstr)
++void getstr(char *instr, int n, char *outstr)
+ {	
++	int sl;
+ 	fprintf(stdout,"%s: ",instr);
+-	gets(outstr);
++	fgets(outstr,n,stdin);
++	/*
++	 * modify outstr for compatibility with prior used (insecure) gets()
++	 */
++	sl=strlen(outstr);
++	if(sl>0 && '\n'==outstr[sl-1]) {
++		outstr[sl-1]=0;
++	}
+ }
+ 
+ double getreal(char *instr,double minx,double maxx,double def)
+@@ -185,7 +193,7 @@
+ 	
+ 	while(TRUE) {
+ 		fprintf(stdout,"%s (%.1f-%.1f)   [%.1f]: ",instr,minx,maxx,def);
+-		gets(line);
++		fgets(line,MAXLINE,stdin);
+ 		status=sscanf(line,"%f",&ret);
+ 		if(status == EOF) return def;
+ 		if(ret>maxx) {
+@@ -210,7 +218,7 @@
+ 	while(TRUE) {
+ 		fprintf(stdout,"%s (%d..%d)    [%d]: ",
+ 		instr,(pint)minx,(pint)maxx,(pint)def);
+-		gets(line);
++		fgets(line,MAXLINE,stdin);
+ 		status=sscanf(line,"%d",&ret);
+ 		if(status == EOF) return def;
+ 		if(ret>maxx) {
+@@ -230,7 +238,7 @@
+ {
+ 	char line[MAXLINE];
+ 	
+-	getstr("\n\nEnter system command",line);
++	getstr("\n\nEnter system command",MAXLINE,line);
+ 	if(*line != EOS)
+ 		system(line);
+ 	fprintf(stdout,"\n\n");

Added: trunk/packages/clustalx/trunk/debian/patches/xmenu.c.patch
===================================================================
--- trunk/packages/clustalx/trunk/debian/patches/xmenu.c.patch	                        (rev 0)
+++ trunk/packages/clustalx/trunk/debian/patches/xmenu.c.patch	2008-02-21 13:59:37 UTC (rev 1458)
@@ -0,0 +1,13 @@
+Index: xmenu.c
+===================================================================
+--- ./xmenu.c	(révision 173)
++++ ./xmenu.c	(révision 174)
+@@ -4411,7 +4411,7 @@
+ 	while(TRUE) {
+ 		if(fgets(temp,MAXLINE+1,fd) == NULL) {
+ 			if(!found_help)
+-				error("No help found in help file");
++				error("No help found in help file [%s]",help_file);
+ 			fclose(fd);
+ 			return;
+ 		}

Modified: trunk/packages/clustalx/trunk/debian/rules
===================================================================
--- trunk/packages/clustalx/trunk/debian/rules	2008-02-20 21:14:58 UTC (rev 1457)
+++ trunk/packages/clustalx/trunk/debian/rules	2008-02-21 13:59:37 UTC (rev 1458)
@@ -2,16 +2,13 @@
 
 include /usr/share/quilt/quilt.make
 
-build: build-stamp
+build: patch build-stamp
 build-stamp:
 	dh_testdir
-	qmake-qt4
 	$(MAKE)
-	# no longer required?
-	#mv clustalQtcurrent clustalx
 	touch build-stamp
 
-clean:
+clean: unpatch
 	dh_testdir
 	dh_testroot
 	[ ! -f Makefile ] || $(MAKE) distclean
@@ -25,7 +22,6 @@
 	dh_installdirs
 	dh_install
 
-# Build architecture-dependent files here.
 binary-arch: build install
 	dh_testdir
 	dh_testroot
@@ -46,6 +42,10 @@
 	dh_md5sums
 	dh_builddeb
 
+get-orig-source:
+	destdir=.;if [ -d debian ]; then destdir=..; fi; \
+	lynx --dump ftp://ftp.ebi.ac.uk/pub/software/unix/clustalx/clustalx1.83.sun.tar.gz | tar --delete clustalx1.83.sun/clustalx clustalx1.83.sun/clustalw  -f - | gzip -c > $$destdir/clustalw_1.83.orig.tar.gz
+
 binary: binary-arch
 binary-indep: # does nothing
 .PHONY: build clean binary-arch binary install